def get_video_data(self): """ Returns video feature data depending on parameter provided in config file. Performs fdhh algorithm if required otherwise return raw video (WARNING: Potential RAM overflow) Returns ------- X_train, X_test """ feature_str = 'fdhh' if self.fdhh else 'pca' if self.options.mode == 'test': feature_path = (f'{self.feature_folder}_FD', f'train_test_{feature_str}.pic') else: feature_path = (f'{self.feature_folder}_FD', f'train_dev_{feature_str}.pic') # Return saved features if exist: if not self.options.save_features and os.path.exists( f'{feature_path[0]}/{feature_path[1]}'): X_train, X_test = load_from_file( f'{feature_path[0]}/{feature_path[1]}') else: X_train, X_test = self.get_train_test() '''X_train, X_test = scale(X_train, X_test, scale_type='standard', axis=0, use_boxcox=True, boxcox_axis=0, use_pandas=True, verbose=self.options.verbose)''' X_train, X_test = scale(X_train, X_test, scale_type='minmax', axis=0, use_pandas=True, verbose=self.options.verbose) if self.fdhh: if self.options.verbose: print('Performing FDHH over train and test set...') X_train = X_train.groupby(level=0).apply(self.FDHH) X_test = X_test.groupby(level=0).apply(self.FDHH) if self.options.verbose: print( f'Sparsity in Train fdhh = {np.sum(X_train.values == 0) / X_train.size}' ) print( f'Sparsity in Test fdhh = {np.sum(X_test.values == 0) / X_test.size}' ) else: X_train, X_test = self.video_pca(X_train, X_test) if self.options.save_features: save_to_file(feature_path[0], feature_path[1], (X_train, X_test)) self.options.save_features = False if not self.fdhh: X_train = self.split_videos(X_train) X_test = self.split_videos(X_test) return [X_train, X_test]
def main(): if len(sys.argv) < 3: raise RuntimeError( 'Command Line Argument Must be (sketch file, style file)') style_f = './data/styles/%s' % sys.argv[2] test_f = './data/test/%s' % sys.argv[1] filename = sys.argv[1][:-4] + sys.argv[2][:-4] + '.png' style = Image.open(style_f).convert('RGB') style = transforms.Resize((512, 512))(style) style_pil = style test = Image.open(test_f).convert('RGB') test_pil = transforms.Resize((512, 512))(test) transform = transforms.Compose( [transforms.Resize((512, 512)), transforms.ToTensor()]) test = transform(test) test = scale(test) test = test.unsqueeze(0).to(device) to_pil = transforms.ToPILImage() try: images = list(crop_region(style)) result = {} for i, img in enumerate(images, 1): colors = cgm.extract(img, topk + 1) result[str(i)] = { '%d' % i: get_rgb(colors[i]) for i in range(1, topk + 1) } color_tensor = make_colorgram_tensor(result) color_tensor = color_tensor.unsqueeze(0).to(device) fakeB, _ = model(test, color_tensor) fakeB = fakeB.squeeze(0) fakeB = re_scale(fakeB.detach().cpu()) fakeB = to_pil(fakeB) result_image = Image.new('RGB', (512 * 3, 512)) result_image.paste(test_pil, (512 * 0, 0, 512 * 1, 512)) result_image.paste(style_pil, (512 * 1, 0, 512 * 2, 512)) result_image.paste(fakeB, (512 * 2, 0, 512 * 3, 512)) save_image(result_image, os.path.join(out_root, filename)) except IndexError: exit(1)
def predict(): # get the car details from the form car_details = list(request.form.values()) # preprocess the car details car_details_processed = preprocess.encode(car_details) # apply scaler to the car details final_car_details = preprocess.scale(car_details_processed) # predict the car price car_price = model.predict(final_car_details) # round the price value car_price = round(car_price[0]) return render_template('index.html', prediction_text=f'The price of the {car_details[0].capitalize()} {car_details[1]} {car_details[3]} model grade {car_details[5]} {car_details[4]}, {car_details[6]} car with mileage value of {car_details[2]}km should be {car_price:,} naira')
def actbycolor(stylelist, testfile): print(stylelist) print(stylelist[0]) if len(sys.argv) < 3: raise RuntimeError( 'Command Line Argument Must be (sketch file, style file)') result = {} j = 0 for i in range(1, 5): temp = {} for k in range(1, 5): if j >= len(stylelist): j = 0 temp[str(k)] = stylelist[j] j = j + 1 result[str(i)] = temp print(result) test_f = testfile # filename = sys.argv[1][:-4] + sys.argv[2][:-4] + '.png' test = Image.open(test_f).convert('RGB') test_pil = transforms.Resize((256, 256))(test) style = Image.open('media/immm.jpg').convert('RGB') style = transforms.Resize((256, 256))(style) print(style) style_pil = style transform = transforms.Compose( [transforms.Resize((256, 256)), transforms.ToTensor()]) test = transform(test) test = scale(test) test = test.unsqueeze(0).to(device) to_pil = transforms.ToPILImage() nowtime = time.time() try: nowtime2 = time.time() print(result) print(nowtime2 - nowtime) color_tensor = make_colorgram_tensor(result) color_tensor = color_tensor.unsqueeze(0).to(device) fakeB, _ = model(test, color_tensor) fakeB = fakeB.squeeze(0) fakeB = re_scale(fakeB.detach().cpu()) fakeB = to_pil(fakeB) fakeB.save(os.path.expanduser('media/result.jpg')) result_image = Image.new('RGB', (256 * 3, 256)) result_image.paste(test_pil, (256 * 0, 0, 256 * 1, 256)) result_image.paste(style_pil, (256 * 1, 0, 256 * 2, 256)) result_image.paste(fakeB, (256 * 2, 0, 256 * 3, 256)) result_image.save(os.path.expanduser('media/compareresult.jpg')) # save_image(result_image, os.path.join(out_root, filename)) except IndexError: exit(1) nowtime2 = time.time() print(nowtime2 - nowtime) # if __name__ == "__main__": # main()
import matplotlib.pyplot as plt import numpy as np import pandas as pd from augment import augment from keras.layers import LSTM, Input, RepeatVector from keras.models import Model from preprocess import scale from scipy.spatial.distance import pdist, squareform from sklearn.metrics import silhouette_score from statsmodels.tsa.stattools import coint # First party modules import n2d # real data for clustering test_x = scale("Data/stock_close.csv") # fake data for training train_x = augment(test_x, 100) # transpose for our autoencoder train_x = train_x.T # x_test = np.asarray(test_x.values) # x_test = x_test.reshape(476, 1225, 1) # # train_x = train_x.reshape(47600, 1225, 1) # # x.shape[0] # x.shape[1] # not used, an experiment
import embedding import model import preprocess from sklearn.model_selection import KFold input_dataset = './Augmented_Feat.csv' embedmodel = embedding.train_word2vec('./glove.6B.300d.txt') question = './questions.csv' df = preprocess.cleaning_dataset(input_dataset) df = preprocess.question_demoting(df, question) X, y = preprocess.scale(df) X_train, X_test, y_train, y_test = preprocess.split(X, y, 0.2) split = 5 index = 0 train_model = [None] * split tokenizer = [None] * split acc = [None] * split kfold = KFold(n_splits=split, shuffle=True, random_state=101) for train, test in kfold.split(X_train, y_train): train_model[index], tokenizer[index] = model.train(X_train.iloc[train], y_train[train], embedmodel) test_results = model.predict(X_train.iloc[test], train_model[index], tokenizer[index]) test_results, y_true = model.processresult(test_results, y_train[test]) acc[index], _ = model.evaluate(test_results, y_true) index += 1 index = 0
def __getitem__(self, index): """ Niko Dataset Get Item @param index: index Returns: if self.color_histogram tuple: (imageA == original, imageB == sketch, colors) else: tuple: (imageA == original, imageB == sketch) if self.resize resized image will be appended end of the above tuple """ filename = self.image_files[index] file_id = filename.split('/')[-1][:-4] if self.color_histogram: # build colorgram tensor color_info = self.color_cache.get(file_id, None) if color_info is None: with open( os.path.join('./data/colorgram', '%s.json' % file_id).replace('\\', '/'), 'r') as json_file: # load color info dictionary from json file color_info = json.loads(json_file.read()) self.color_cache[file_id] = color_info colors = make_colorgram_tensor(color_info) image = Image.open(filename) image_width, image_height = image.size imageA = image.crop((0, 0, image_width // 2, image_height)) imageB = image.crop((image_width // 2, 0, image_width, image_height)) # default transforms, pad if needed and center crop 256 width_pad = self.size - image_width // 2 if width_pad < 0: # do not pad width_pad = 0 height_pad = self.size - image_height if height_pad < 0: height_pad = 0 # padding as white padding = transforms.Pad((width_pad // 2, height_pad // 2 + 1, width_pad // 2 + 1, height_pad // 2), (255, 255, 255)) # use center crop crop = transforms.CenterCrop(self.size) imageA = padding(imageA) imageA = crop(imageA) imageB = padding(imageB) imageB = crop(imageB) if self.transform is not None: imageA = self.transform(imageA) imageB = self.transform(imageB) # scale image into range [-1, 1] imageA = scale(imageA) imageB = scale(imageB) if not self.color_histogram: return imageA, imageB else: return imageA, imageB, colors
import preprocess from sklearn.model_selection import KFold def avg(rms, mae): return (rms + mae) / 2 input_dataset = './Augmented_Feat.csv' embedmodel = embedding.train_word2vec('./glove.6B.300d.txt') question = './questions.csv' df = preprocess.cleaning_dataset(input_dataset) df = preprocess.question_demoting(df, question) X, y, scaler_y = preprocess.scale(df) X_train, X_test, y_train, y_test = preprocess.split(X, y, 0.2) split = 5 index = 0 train_model = [None] * split tokenizer = [None] * split rms = [None] * split mae = [None] * split kfold = KFold(n_splits=split, shuffle=True, random_state=101) for train, test in kfold.split(X_train, y_train): train_model[index], tokenizer[index] = model.train(X_train.iloc[train], y_train[train], embedmodel) test_results = model.predict(X_train.iloc[test], train_model[index],
def training_loop(dataloader_X, dataloader_Y, test_dataloader_X, test_dataloader_Y, G_XtoY, G_YtoX, D_X, D_Y, g_optimizer, d_x_optimizer, d_y_optimizer, n_epochs=1000): print_every = 10 # keep track of losses over time losses = [] test_iter_X = iter(test_dataloader_X) test_iter_Y = iter(test_dataloader_Y) # Get some fixed data from domains X and Y for sampling. These are images that are held # constant throughout training, to help inspect the model's performance. fixed_X = test_iter_X.next()[0] fixed_Y = test_iter_Y.next()[0] fixed_X = scale(fixed_X) # scale to a range -1 to 1 fixed_Y = scale(fixed_Y) # batches per epoch iter_X = iter(dataloader_X) iter_Y = iter(dataloader_Y) batches_per_epoch = min(len(iter_X), len(iter_Y)) for epoch in range(1, n_epochs + 1): # Reset iterators for each epoch if epoch % batches_per_epoch == 0: iter_X = iter(dataloader_X) iter_Y = iter(dataloader_Y) images_X, _ = iter_X.next() images_X = scale(images_X) # scale to a range -1 to 1 images_Y, _ = iter_Y.next() images_Y = scale(images_Y) # move images to GPU if available (otherwise stay on CPU) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") images_X = images_X.to(device) images_Y = images_Y.to(device) # ============================================ # TRAIN THE DISCRIMINATORS # ============================================ ## First: D_X, real and fake loss components ## d_x_optimizer.zero_grad() # 1. Compute the discriminator losses on real images D_out = D_X(images_X) D_x_real_err = real_mse_loss(D_out) # 2. Generate fake images that look like domain X based on real images in domain Y G_out = G_YtoX(images_Y) # 3. Compute the fake loss for D_X D_out = D_X(G_out) D_x_fake_err = fake_mse_loss(D_out) # 4. Compute the total loss and perform backprop d_x_loss = D_x_real_err + D_x_fake_err d_x_loss.backward() d_x_optimizer.step() ## Second: D_Y, real and fake loss components ## d_y_optimizer.zero_grad() # 1. Compute the discriminator losses on real images D_out = D_Y(images_Y) D_y_real_err = real_mse_loss(D_out) # 2. Generate fake images that look like domain Y based on real images in domain X G_out = G_XtoY(images_X) # 3. Compute the fake loss for D_Y D_out = D_Y(G_out) D_y_fake_err = fake_mse_loss(D_out) # 4. Compute the total loss and perform backprop d_y_loss = D_y_real_err + D_y_fake_err d_y_loss.backward() d_y_optimizer.step() # ========================================= # TRAIN THE GENERATORS # ========================================= ## First: generate fake X images and reconstructed Y images ## g_optimizer.zero_grad() # 1. Generate fake images that look like domain X based on real images in domain Y G_out = G_YtoX(images_Y) # 2. Compute the generator loss based on domain X D_out = D_X(G_out) g_YtoX_loss = real_mse_loss(D_out) # 3. Create a reconstructed y reconstructed_y = G_XtoY(G_out) # 4. Compute the cycle consistency loss (the reconstruction loss) reconstructed_y_loss = cycle_consistency_loss(images_Y, reconstructed_y, lambda_weight=10) ## Second: generate fake Y images and reconstructed X images ## # 1. Generate fake images that look like domain Y based on real images in domain X G_out = G_XtoY(images_X) # 2. Compute the generator loss based on domain Y D_out = D_Y(G_out) g_XtoY_loss = real_mse_loss(D_out) # 3. Create a reconstructed x reconstructed_x = G_YtoX(G_out) # 4. Compute the cycle consistency loss (the reconstruction loss) reconstructed_x_loss = cycle_consistency_loss(images_X, reconstructed_x, lambda_weight=10) # 5. Add up all generator and reconstructed losses and perform backprop g_total_loss = g_YtoX_loss + g_XtoY_loss + reconstructed_y_loss + reconstructed_x_loss g_total_loss.backward() g_optimizer.step() # Print the log info if epoch % print_every == 0: # append real and fake discriminator losses and the generator loss losses.append( (d_x_loss.item(), d_y_loss.item(), g_total_loss.item())) print( 'Epoch [{:5d}/{:5d}] | d_X_loss: {:6.4f} | d_Y_loss: {:6.4f} | g_total_loss: {:6.4f}' .format(epoch, n_epochs, d_x_loss.item(), d_y_loss.item(), g_total_loss.item())) sample_every = 100 # Save the generated samples if epoch % sample_every == 0: G_YtoX.eval() # set generators to eval mode for sample generation G_XtoY.eval() save_samples(epoch, fixed_Y, fixed_X, G_YtoX, G_XtoY, batch_size=16) G_YtoX.train() G_XtoY.train() checkpoint_every = 1000 # Save the model parameters if epoch % checkpoint_every == 0: checkpoint(epoch, G_XtoY, G_YtoX, D_X, D_Y) return G_XtoY, G_YtoX, D_X, D_Y, d_x_optimizer, d_y_optimizer, g_optimizer, losses
def test_CNN(data, CNN): """ Trains a CNN for a given training set of points. """ X_test = data["test"]["X"] y_test = data["test"]["y"] evaluated = CNN.evaluate(X_test, y_test) results = {} for n,metric in enumerate(CNN.metrics_names): results[metric] = evaluated[n] return results if __name__ == "__main__": data = preprocess.read_data() data = preprocess.one_hot(data) data = preprocess.scale(data) data = preprocess.reshape_4D(data) msg = "Requires cmdline arg 'load' or 'save'" if len(sys.argv) == 2: if sys.argv[1].lower() == "load": CNN = load_model(config.CNN_save_name) elif sys.argv[1].lower() == "save": CNN = create_CNN(data) CNN = train_CNN(data, CNN) CNN.save(config.CNN_save_name) else: raise KeyError(msg) else: raise KeyError(msg)
dt_performance = [] rf_mdls = [] rf_performance = [] snn_mdls = [] snn_performance = [] # Perform 10-fold cross validation on all regressors to get R, R2, MSE, and MAE for train_idx, test_idx in kcv.split(Xtrain): X, Y = shuffle(Xtrain, Ytrain) # Create cross validation indices and data Xtrain2, Xtest2 = X.iloc[train_idx, :], X.iloc[test_idx, :] Ytrain2, Ytest2 = Y.iloc[train_idx, :], Y.iloc[test_idx, :] # Scale the data using min-max scaling Xtrain2, xmax, xmin = preprocess.scale(Xtrain2) Xtest2 = (Xtest2 - xmin) / (xmax - xmin) Ytrain2, ymax, ymin = preprocess.scale(Ytrain2) Ytest2 = (Ytest2 - ymin) / (ymax - ymin) # 1. Ordinary least squares ols = linear_model.LinearRegression() ols.fit(Xtrain2, Ytrain2) ols_pred = pd.DataFrame(ols.predict(Xtest2), index=Ytest2.index, columns=Ytest2.columns) ols_scores = evaluateMLmodel.regression_eval_metrics(ols_pred, Ytest2) ols_mdls.append(ols) ols_performance.append(ols_scores.mean(axis=0)) # 2. Ridge Regression
# TODO: Create dimension using components and specs if needed # specs = pd.read_csv('data/specs.csv') # components = pd.read_csv('data/components.csv') # from os import listdir, path # comp_files = [f for f in listdir('data') if 'comp_' in f] # for f in comp_files: # c = pd.read_csv(path.join('data',f)) # print c.columns # components = pd.merge(components, c, how='left') # train2['quantity'] = np.log(train2.quantity) # test2['quantity'] = np.log(test2.quantity) print "Scale dimensions..." scale_dimensions = ['annual_usage', 'quantity', 'diameter', 'bend_radius', 'wall', 'length', 'num_bends', 'num_boss', 'num_bracket'] train2[scale_dimensions] = preprocess.scale(train2[scale_dimensions]) test2[scale_dimensions] = preprocess.scale(test2[scale_dimensions]) X = train2 # X = X.drop(['tube_assembly_id', 'quote_date', 'cost', 'supplier', 'material_id', 'end_a', 'end_x'], axis=1) X = X.drop(['tube_assembly_id', 'quote_date', 'cost', 'supplier'], axis=1) X_test = test2 # X_test = X_test.drop(['tube_assembly_id', 'quote_date','id', 'supplier', 'material_id', 'end_a', 'end_x'], axis=1) X_test = X_test.drop(['tube_assembly_id', 'quote_date','id', 'supplier'], axis=1) y = train2['cost'] m = X.shape[0]
def preprocess(self, feature_type, X_train, X_test): """ Scale and reduce dimensionality of input features Parameters: ----------- X_train, X_test : np.array (n, n_features) Arrays of input features where each row should be a single feature set Returns ------- X_train, X_test : np.array (n, n_in) Scaled and reduced features """ # Extract scaler information from config: scalers = self.config[feature_type][f'{feature_type}_scaler'].split( '+') scaler_axis = self.config[feature_type][ f'{feature_type}_scale_axis'].split('+') if len(scalers) > 1: scaler_idx = 1 use_boxcox = True else: scaler_idx = 0 use_boxcox = False scaler = scalers[scaler_idx] for i in range(len(scaler_axis)): if scaler_axis[i] == '': scaler_axis[i] = None else: scaler_axis[i] = int(scaler_axis[i]) # Scale data: if scaler == 'minmax' or scaler == 'standard': X_train, X_test = scale(X_train, X_test, scale_type=scaler, axis=scaler_axis[scaler_idx], use_boxcox=use_boxcox, boxcox_axis=scaler_axis[0]) elif self.options.verbose: print( 'No scaler has been used before PCA. If this behaviour is unintentional check configurations.' ) # Perform PCA: try: pca_pars = self.pars['PCA'][f'{feature_type}_components'] X_train, X_test, pca = pca_transform(X_train, X_test, pca_components=pca_pars) except KeyError: if self.options.verbose: print( 'No pca performed during preprocessing. If this behaviour is unintentional check parameters.' ) X_train, X_test = scale(X_train, X_test, scale_type='minmax', axis=0) # Reshape for LSTM: if feature_type == 'video' and not self.video.fdhh: X_train = X_train.reshape(-1, self.seq_length, X_train.shape[-1]) X_test = X_test.reshape(-1, self.seq_length, X_test.shape[-1]) if self.options.verbose: print(f"Training input shape for the LSTM is {X_train.shape}") return X_train, X_test
# display sample of training images from visualize_data import visualize_data images = visualize_data(dataloader_X, dataloader_Y) # pre-processing the images from preprocess import scale # current range img = images[0] print('Min: ', img.min()) print('Max: ', img.max()) # scaled range scaled_img = scale(img) print('Scaled min: ', scaled_img.min()) print('Scaled max: ', scaled_img.max()) # define the CycleGAN model from model import create_model, print_models G_XtoY, G_YtoX, D_X, D_Y = create_model(g_conv_dim=64, d_conv_dim=64, n_res_blocks=6) # print all of the models print_models(G_XtoY, G_YtoX, D_X, D_Y) # computing the discriminator and generator losses