def get_video_data(self):
        """
			Returns video feature data depending on parameter provided in config file.
			Performs fdhh algorithm if required otherwise return raw video (WARNING: Potential RAM overflow)

			Returns
			-------
			X_train, X_test
			
		"""
        feature_str = 'fdhh' if self.fdhh else 'pca'
        if self.options.mode == 'test':
            feature_path = (f'{self.feature_folder}_FD',
                            f'train_test_{feature_str}.pic')
        else:
            feature_path = (f'{self.feature_folder}_FD',
                            f'train_dev_{feature_str}.pic')

        # Return saved features if exist:
        if not self.options.save_features and os.path.exists(
                f'{feature_path[0]}/{feature_path[1]}'):
            X_train, X_test = load_from_file(
                f'{feature_path[0]}/{feature_path[1]}')
        else:
            X_train, X_test = self.get_train_test()
            '''X_train, X_test = scale(X_train, X_test, scale_type='standard', axis=0, use_boxcox=True, boxcox_axis=0,
			                        use_pandas=True, verbose=self.options.verbose)'''
            X_train, X_test = scale(X_train,
                                    X_test,
                                    scale_type='minmax',
                                    axis=0,
                                    use_pandas=True,
                                    verbose=self.options.verbose)
            if self.fdhh:
                if self.options.verbose:
                    print('Performing FDHH over train and test set...')
                X_train = X_train.groupby(level=0).apply(self.FDHH)
                X_test = X_test.groupby(level=0).apply(self.FDHH)
                if self.options.verbose:
                    print(
                        f'Sparsity in Train fdhh = {np.sum(X_train.values == 0) / X_train.size}'
                    )
                    print(
                        f'Sparsity in Test fdhh = {np.sum(X_test.values == 0) / X_test.size}'
                    )
            else:
                X_train, X_test = self.video_pca(X_train, X_test)

        if self.options.save_features:
            save_to_file(feature_path[0], feature_path[1], (X_train, X_test))
            self.options.save_features = False

        if not self.fdhh:
            X_train = self.split_videos(X_train)
            X_test = self.split_videos(X_test)

        return [X_train, X_test]
Exemple #2
0
def main():
    if len(sys.argv) < 3:
        raise RuntimeError(
            'Command Line Argument Must be (sketch file, style file)')

    style_f = './data/styles/%s' % sys.argv[2]
    test_f = './data/test/%s' % sys.argv[1]

    filename = sys.argv[1][:-4] + sys.argv[2][:-4] + '.png'

    style = Image.open(style_f).convert('RGB')
    style = transforms.Resize((512, 512))(style)
    style_pil = style

    test = Image.open(test_f).convert('RGB')
    test_pil = transforms.Resize((512, 512))(test)

    transform = transforms.Compose(
        [transforms.Resize((512, 512)),
         transforms.ToTensor()])

    test = transform(test)
    test = scale(test)
    test = test.unsqueeze(0).to(device)

    to_pil = transforms.ToPILImage()

    try:
        images = list(crop_region(style))
        result = {}
        for i, img in enumerate(images, 1):
            colors = cgm.extract(img, topk + 1)
            result[str(i)] = {
                '%d' % i: get_rgb(colors[i])
                for i in range(1, topk + 1)
            }

        color_tensor = make_colorgram_tensor(result)
        color_tensor = color_tensor.unsqueeze(0).to(device)

        fakeB, _ = model(test, color_tensor)
        fakeB = fakeB.squeeze(0)
        fakeB = re_scale(fakeB.detach().cpu())
        fakeB = to_pil(fakeB)

        result_image = Image.new('RGB', (512 * 3, 512))
        result_image.paste(test_pil, (512 * 0, 0, 512 * 1, 512))
        result_image.paste(style_pil, (512 * 1, 0, 512 * 2, 512))
        result_image.paste(fakeB, (512 * 2, 0, 512 * 3, 512))
        save_image(result_image, os.path.join(out_root, filename))

    except IndexError:
        exit(1)
Exemple #3
0
def predict():
    # get the car details from the form
    car_details = list(request.form.values())
    # preprocess the car details
    car_details_processed = preprocess.encode(car_details)
    # apply scaler to the car details
    final_car_details = preprocess.scale(car_details_processed)
    # predict the car price 
    car_price = model.predict(final_car_details)
    # round the price value
    car_price = round(car_price[0])
    
    return render_template('index.html', prediction_text=f'The price of the {car_details[0].capitalize()} {car_details[1]} {car_details[3]} model grade {car_details[5]} {car_details[4]}, {car_details[6]} car with mileage value of {car_details[2]}km should be {car_price:,} naira')
def actbycolor(stylelist, testfile):
    print(stylelist)
    print(stylelist[0])
    if len(sys.argv) < 3:
        raise RuntimeError(
            'Command Line Argument Must be (sketch file, style file)')
    result = {}
    j = 0
    for i in range(1, 5):
        temp = {}
        for k in range(1, 5):
            if j >= len(stylelist):
                j = 0
            temp[str(k)] = stylelist[j]
            j = j + 1
        result[str(i)] = temp
    print(result)
    test_f = testfile

    # filename = sys.argv[1][:-4] + sys.argv[2][:-4] + '.png'
    test = Image.open(test_f).convert('RGB')
    test_pil = transforms.Resize((256, 256))(test)

    style = Image.open('media/immm.jpg').convert('RGB')
    style = transforms.Resize((256, 256))(style)
    print(style)
    style_pil = style

    transform = transforms.Compose(
        [transforms.Resize((256, 256)),
         transforms.ToTensor()])

    test = transform(test)
    test = scale(test)
    test = test.unsqueeze(0).to(device)

    to_pil = transforms.ToPILImage()
    nowtime = time.time()
    try:
        nowtime2 = time.time()
        print(result)
        print(nowtime2 - nowtime)
        color_tensor = make_colorgram_tensor(result)
        color_tensor = color_tensor.unsqueeze(0).to(device)

        fakeB, _ = model(test, color_tensor)
        fakeB = fakeB.squeeze(0)
        fakeB = re_scale(fakeB.detach().cpu())
        fakeB = to_pil(fakeB)
        fakeB.save(os.path.expanduser('media/result.jpg'))

        result_image = Image.new('RGB', (256 * 3, 256))
        result_image.paste(test_pil, (256 * 0, 0, 256 * 1, 256))
        result_image.paste(style_pil, (256 * 1, 0, 256 * 2, 256))
        result_image.paste(fakeB, (256 * 2, 0, 256 * 3, 256))
        result_image.save(os.path.expanduser('media/compareresult.jpg'))
        # save_image(result_image, os.path.join(out_root, filename))

    except IndexError:
        exit(1)
    nowtime2 = time.time()
    print(nowtime2 - nowtime)


# if __name__ == "__main__":
#     main()
Exemple #5
0
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from augment import augment
from keras.layers import LSTM, Input, RepeatVector
from keras.models import Model
from preprocess import scale
from scipy.spatial.distance import pdist, squareform
from sklearn.metrics import silhouette_score
from statsmodels.tsa.stattools import coint

# First party modules
import n2d

# real data for clustering
test_x = scale("Data/stock_close.csv")

# fake data for training
train_x = augment(test_x, 100)
# transpose for our autoencoder
train_x = train_x.T

# x_test = np.asarray(test_x.values)
# x_test = x_test.reshape(476, 1225, 1)
#
# train_x = train_x.reshape(47600, 1225, 1)
#
# x.shape[0]
# x.shape[1]

# not used, an experiment
Exemple #6
0
import embedding
import model
import preprocess
from sklearn.model_selection import KFold

input_dataset = './Augmented_Feat.csv'
embedmodel = embedding.train_word2vec('./glove.6B.300d.txt')
question = './questions.csv'


df = preprocess.cleaning_dataset(input_dataset)
df = preprocess.question_demoting(df, question)

X, y = preprocess.scale(df)

X_train, X_test, y_train, y_test = preprocess.split(X, y, 0.2)

split = 5
index = 0
train_model = [None] * split
tokenizer = [None] * split
acc = [None] * split
kfold = KFold(n_splits=split, shuffle=True, random_state=101)
for train, test in kfold.split(X_train, y_train):
    train_model[index], tokenizer[index] = model.train(X_train.iloc[train], y_train[train], embedmodel)
    test_results = model.predict(X_train.iloc[test], train_model[index], tokenizer[index])
    test_results, y_true = model.processresult(test_results, y_train[test])
    acc[index], _ = model.evaluate(test_results, y_true)
    index += 1

index = 0
Exemple #7
0
    def __getitem__(self, index):
        """
        Niko Dataset Get Item
        @param index: index
        Returns:
            if self.color_histogram
            tuple: (imageA == original, imageB == sketch, colors)
            else:
            tuple: (imageA == original, imageB == sketch)

            if self.resize
            resized image will be appended end of the above tuple
        """
        filename = self.image_files[index]
        file_id = filename.split('/')[-1][:-4]

        if self.color_histogram:
            # build colorgram tensor
            color_info = self.color_cache.get(file_id, None)
            if color_info is None:
                with open(
                        os.path.join('./data/colorgram',
                                     '%s.json' % file_id).replace('\\', '/'),
                        'r') as json_file:
                    # load color info dictionary from json file
                    color_info = json.loads(json_file.read())
                    self.color_cache[file_id] = color_info
            colors = make_colorgram_tensor(color_info)

        image = Image.open(filename)
        image_width, image_height = image.size
        imageA = image.crop((0, 0, image_width // 2, image_height))
        imageB = image.crop((image_width // 2, 0, image_width, image_height))

        # default transforms, pad if needed and center crop 256
        width_pad = self.size - image_width // 2
        if width_pad < 0:
            # do not pad
            width_pad = 0

        height_pad = self.size - image_height
        if height_pad < 0:
            height_pad = 0

        # padding as white
        padding = transforms.Pad((width_pad // 2, height_pad // 2 + 1,
                                  width_pad // 2 + 1, height_pad // 2),
                                 (255, 255, 255))

        # use center crop
        crop = transforms.CenterCrop(self.size)

        imageA = padding(imageA)
        imageA = crop(imageA)

        imageB = padding(imageB)
        imageB = crop(imageB)

        if self.transform is not None:
            imageA = self.transform(imageA)
            imageB = self.transform(imageB)

        # scale image into range [-1, 1]
        imageA = scale(imageA)
        imageB = scale(imageB)
        if not self.color_histogram:
            return imageA, imageB
        else:
            return imageA, imageB, colors
Exemple #8
0
import preprocess
from sklearn.model_selection import KFold


def avg(rms, mae):
    return (rms + mae) / 2


input_dataset = './Augmented_Feat.csv'
embedmodel = embedding.train_word2vec('./glove.6B.300d.txt')
question = './questions.csv'

df = preprocess.cleaning_dataset(input_dataset)
df = preprocess.question_demoting(df, question)

X, y, scaler_y = preprocess.scale(df)

X_train, X_test, y_train, y_test = preprocess.split(X, y, 0.2)

split = 5
index = 0
train_model = [None] * split
tokenizer = [None] * split
rms = [None] * split
mae = [None] * split
kfold = KFold(n_splits=split, shuffle=True, random_state=101)
for train, test in kfold.split(X_train, y_train):
    train_model[index], tokenizer[index] = model.train(X_train.iloc[train],
                                                       y_train[train],
                                                       embedmodel)
    test_results = model.predict(X_train.iloc[test], train_model[index],
Exemple #9
0
def training_loop(dataloader_X,
                  dataloader_Y,
                  test_dataloader_X,
                  test_dataloader_Y,
                  G_XtoY,
                  G_YtoX,
                  D_X,
                  D_Y,
                  g_optimizer,
                  d_x_optimizer,
                  d_y_optimizer,
                  n_epochs=1000):

    print_every = 10

    # keep track of losses over time
    losses = []

    test_iter_X = iter(test_dataloader_X)
    test_iter_Y = iter(test_dataloader_Y)

    # Get some fixed data from domains X and Y for sampling. These are images that are held
    # constant throughout training, to help inspect the model's performance.
    fixed_X = test_iter_X.next()[0]
    fixed_Y = test_iter_Y.next()[0]
    fixed_X = scale(fixed_X)  # scale to a range -1 to 1
    fixed_Y = scale(fixed_Y)

    # batches per epoch
    iter_X = iter(dataloader_X)
    iter_Y = iter(dataloader_Y)
    batches_per_epoch = min(len(iter_X), len(iter_Y))

    for epoch in range(1, n_epochs + 1):

        # Reset iterators for each epoch
        if epoch % batches_per_epoch == 0:
            iter_X = iter(dataloader_X)
            iter_Y = iter(dataloader_Y)

        images_X, _ = iter_X.next()
        images_X = scale(images_X)  # scale to a range -1 to 1

        images_Y, _ = iter_Y.next()
        images_Y = scale(images_Y)

        # move images to GPU if available (otherwise stay on CPU)
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        images_X = images_X.to(device)
        images_Y = images_Y.to(device)

        # ============================================
        #            TRAIN THE DISCRIMINATORS
        # ============================================

        ##   First: D_X, real and fake loss components   ##
        d_x_optimizer.zero_grad()

        # 1. Compute the discriminator losses on real images
        D_out = D_X(images_X)
        D_x_real_err = real_mse_loss(D_out)

        # 2. Generate fake images that look like domain X based on real images in domain Y
        G_out = G_YtoX(images_Y)

        # 3. Compute the fake loss for D_X
        D_out = D_X(G_out)
        D_x_fake_err = fake_mse_loss(D_out)

        # 4. Compute the total loss and perform backprop
        d_x_loss = D_x_real_err + D_x_fake_err
        d_x_loss.backward()
        d_x_optimizer.step()

        ##   Second: D_Y, real and fake loss components   ##
        d_y_optimizer.zero_grad()

        # 1. Compute the discriminator losses on real images
        D_out = D_Y(images_Y)
        D_y_real_err = real_mse_loss(D_out)

        # 2. Generate fake images that look like domain Y based on real images in domain X
        G_out = G_XtoY(images_X)

        # 3. Compute the fake loss for D_Y
        D_out = D_Y(G_out)
        D_y_fake_err = fake_mse_loss(D_out)

        # 4. Compute the total loss and perform backprop
        d_y_loss = D_y_real_err + D_y_fake_err
        d_y_loss.backward()
        d_y_optimizer.step()

        # =========================================
        #            TRAIN THE GENERATORS
        # =========================================

        ##    First: generate fake X images and reconstructed Y images    ##
        g_optimizer.zero_grad()
        # 1. Generate fake images that look like domain X based on real images in domain Y
        G_out = G_YtoX(images_Y)

        # 2. Compute the generator loss based on domain X
        D_out = D_X(G_out)
        g_YtoX_loss = real_mse_loss(D_out)

        # 3. Create a reconstructed y
        reconstructed_y = G_XtoY(G_out)

        # 4. Compute the cycle consistency loss (the reconstruction loss)
        reconstructed_y_loss = cycle_consistency_loss(images_Y,
                                                      reconstructed_y,
                                                      lambda_weight=10)

        ##    Second: generate fake Y images and reconstructed X images    ##

        # 1. Generate fake images that look like domain Y based on real images in domain X
        G_out = G_XtoY(images_X)

        # 2. Compute the generator loss based on domain Y
        D_out = D_Y(G_out)
        g_XtoY_loss = real_mse_loss(D_out)

        # 3. Create a reconstructed x
        reconstructed_x = G_YtoX(G_out)

        # 4. Compute the cycle consistency loss (the reconstruction loss)
        reconstructed_x_loss = cycle_consistency_loss(images_X,
                                                      reconstructed_x,
                                                      lambda_weight=10)

        # 5. Add up all generator and reconstructed losses and perform backprop
        g_total_loss = g_YtoX_loss + g_XtoY_loss + reconstructed_y_loss + reconstructed_x_loss
        g_total_loss.backward()
        g_optimizer.step()

        # Print the log info
        if epoch % print_every == 0:
            # append real and fake discriminator losses and the generator loss
            losses.append(
                (d_x_loss.item(), d_y_loss.item(), g_total_loss.item()))
            print(
                'Epoch [{:5d}/{:5d}] | d_X_loss: {:6.4f} | d_Y_loss: {:6.4f} | g_total_loss: {:6.4f}'
                .format(epoch, n_epochs, d_x_loss.item(), d_y_loss.item(),
                        g_total_loss.item()))

        sample_every = 100
        # Save the generated samples
        if epoch % sample_every == 0:
            G_YtoX.eval()  # set generators to eval mode for sample generation
            G_XtoY.eval()
            save_samples(epoch,
                         fixed_Y,
                         fixed_X,
                         G_YtoX,
                         G_XtoY,
                         batch_size=16)
            G_YtoX.train()
            G_XtoY.train()

        checkpoint_every = 1000
        # Save the model parameters
        if epoch % checkpoint_every == 0:
            checkpoint(epoch, G_XtoY, G_YtoX, D_X, D_Y)

    return G_XtoY, G_YtoX, D_X, D_Y, d_x_optimizer, d_y_optimizer, g_optimizer, losses
def test_CNN(data, CNN):
    """
        Trains a CNN for a given training set of points.
    """
    X_test = data["test"]["X"]
    y_test = data["test"]["y"]
    evaluated = CNN.evaluate(X_test, y_test)
    results = {}
    for n,metric in enumerate(CNN.metrics_names):
        results[metric] = evaluated[n]
    return results

if __name__ == "__main__":
    data = preprocess.read_data()
    data = preprocess.one_hot(data)
    data = preprocess.scale(data)
    data = preprocess.reshape_4D(data)

    msg = "Requires cmdline arg 'load' or 'save'"
    if len(sys.argv) == 2:
        if sys.argv[1].lower() == "load":
            CNN = load_model(config.CNN_save_name)
        elif sys.argv[1].lower() == "save":
            CNN = create_CNN(data)
            CNN = train_CNN(data, CNN)
            CNN.save(config.CNN_save_name)
        else:
            raise KeyError(msg)
    else:
        raise KeyError(msg)
Exemple #11
0
dt_performance = []
rf_mdls = []
rf_performance = []
snn_mdls = []
snn_performance = []

# Perform 10-fold cross validation on all regressors to get R, R2, MSE, and MAE
for train_idx, test_idx in kcv.split(Xtrain):
    X, Y = shuffle(Xtrain, Ytrain)

    # Create cross validation indices and data
    Xtrain2, Xtest2 = X.iloc[train_idx, :], X.iloc[test_idx, :]
    Ytrain2, Ytest2 = Y.iloc[train_idx, :], Y.iloc[test_idx, :]

    # Scale the data using min-max scaling
    Xtrain2, xmax, xmin = preprocess.scale(Xtrain2)
    Xtest2 = (Xtest2 - xmin) / (xmax - xmin)
    Ytrain2, ymax, ymin = preprocess.scale(Ytrain2)
    Ytest2 = (Ytest2 - ymin) / (ymax - ymin)

    # 1. Ordinary least squares
    ols = linear_model.LinearRegression()
    ols.fit(Xtrain2, Ytrain2)
    ols_pred = pd.DataFrame(ols.predict(Xtest2),
                            index=Ytest2.index,
                            columns=Ytest2.columns)
    ols_scores = evaluateMLmodel.regression_eval_metrics(ols_pred, Ytest2)
    ols_mdls.append(ols)
    ols_performance.append(ols_scores.mean(axis=0))

    # 2. Ridge Regression
# TODO: Create dimension using components and specs if needed
# specs = pd.read_csv('data/specs.csv')
# components = pd.read_csv('data/components.csv')
# from os import listdir, path
# comp_files = [f for f in listdir('data') if 'comp_' in f]
# for f in comp_files:
#     c = pd.read_csv(path.join('data',f))
#     print c.columns
#     components = pd.merge(components, c, how='left')

# train2['quantity'] = np.log(train2.quantity)
# test2['quantity'] = np.log(test2.quantity)

print "Scale dimensions..."
scale_dimensions = ['annual_usage', 'quantity', 'diameter', 'bend_radius', 'wall', 'length', 'num_bends', 'num_boss', 'num_bracket']
train2[scale_dimensions] = preprocess.scale(train2[scale_dimensions])
test2[scale_dimensions] = preprocess.scale(test2[scale_dimensions])


X = train2
# X = X.drop(['tube_assembly_id', 'quote_date', 'cost', 'supplier', 'material_id', 'end_a', 'end_x'], axis=1)
X = X.drop(['tube_assembly_id', 'quote_date', 'cost', 'supplier'], axis=1)

X_test = test2
# X_test = X_test.drop(['tube_assembly_id', 'quote_date','id', 'supplier', 'material_id', 'end_a', 'end_x'], axis=1)
X_test = X_test.drop(['tube_assembly_id', 'quote_date','id', 'supplier'], axis=1)


y = train2['cost']

m = X.shape[0]
Exemple #13
0
    def preprocess(self, feature_type, X_train, X_test):
        """
			Scale and reduce dimensionality of input features

			Parameters:
			-----------
			X_train, X_test : np.array (n, n_features)
			Arrays of input features where each row should be a single feature set

			Returns
			-------
			X_train, X_test : np.array (n, n_in)
				Scaled and reduced features
		"""

        # Extract scaler information from config:
        scalers = self.config[feature_type][f'{feature_type}_scaler'].split(
            '+')
        scaler_axis = self.config[feature_type][
            f'{feature_type}_scale_axis'].split('+')
        if len(scalers) > 1:
            scaler_idx = 1
            use_boxcox = True
        else:
            scaler_idx = 0
            use_boxcox = False
        scaler = scalers[scaler_idx]

        for i in range(len(scaler_axis)):
            if scaler_axis[i] == '':
                scaler_axis[i] = None
            else:
                scaler_axis[i] = int(scaler_axis[i])

        # Scale data:
        if scaler == 'minmax' or scaler == 'standard':
            X_train, X_test = scale(X_train,
                                    X_test,
                                    scale_type=scaler,
                                    axis=scaler_axis[scaler_idx],
                                    use_boxcox=use_boxcox,
                                    boxcox_axis=scaler_axis[0])
        elif self.options.verbose:
            print(
                'No scaler has been used before PCA. If this behaviour is unintentional check configurations.'
            )

        # Perform PCA:
        try:
            pca_pars = self.pars['PCA'][f'{feature_type}_components']
            X_train, X_test, pca = pca_transform(X_train,
                                                 X_test,
                                                 pca_components=pca_pars)
        except KeyError:
            if self.options.verbose:
                print(
                    'No pca performed during preprocessing. If this behaviour is unintentional check parameters.'
                )
        X_train, X_test = scale(X_train, X_test, scale_type='minmax', axis=0)

        # Reshape for LSTM:
        if feature_type == 'video' and not self.video.fdhh:
            X_train = X_train.reshape(-1, self.seq_length, X_train.shape[-1])
            X_test = X_test.reshape(-1, self.seq_length, X_test.shape[-1])
            if self.options.verbose:
                print(f"Training input shape for the LSTM is {X_train.shape}")
        return X_train, X_test
Exemple #14
0
# display sample of training images
from visualize_data import visualize_data

images = visualize_data(dataloader_X, dataloader_Y)

# pre-processing the images
from preprocess import scale

# current range
img = images[0]

print('Min: ', img.min())
print('Max: ', img.max())

# scaled range
scaled_img = scale(img)

print('Scaled min: ', scaled_img.min())
print('Scaled max: ', scaled_img.max())

# define the CycleGAN model
from model import create_model, print_models

G_XtoY, G_YtoX, D_X, D_Y = create_model(g_conv_dim=64,
                                        d_conv_dim=64,
                                        n_res_blocks=6)

# print all of the models
print_models(G_XtoY, G_YtoX, D_X, D_Y)

# computing the discriminator and generator losses