def __init__(self, args): self.args = args data = torchvision.datasets.FakeData( size=args.size, image_size=(1, 32, 32), num_classes=args.num_classes, transform=torchvision.transforms.ToTensor(), ) self.validation_data, self.train_data = utils.random_split( data, args.validation_percent)
def generate_sets(self): dataset_classes = glob.glob(self.path + "/*") for folder in dataset_classes: if "/" in folder: class_name = folder.split("/")[-1] else: class_name = folder.split("\\")[-1] self.classes.append(class_name) class_files = glob.glob(folder + "/*.JPEG") test_size = len(class_files) / 3 train, test = utils.random_split(class_files, test_size) self.train_set.append(train) self.test_set.append(test)
merge_nets = merge(convnets, mode="concat") reshape = Reshape((128, 7))(merge_nets) lstm = LSTM(128, input_dim=frames, input_length=128, return_sequences=False, activation="tanh")(reshape) # dropout1 = Dropout(0.5)(lstm) dense1 = Dense(512, activation="relu")(lstm) # dropout2 = Dropout(0.5)(dense1) prediction = Dense(1, activation="sigmoid")(dense1) return Model(input=inputs, output=prediction) # Load data from utils import random_split X_train, X_test, y_train, y_test = random_split("images/", 32, 7) _, frames, channels, width, height = np.shape(X_train) # Reshape to match CNN shapes X_train = list(X_train.reshape(frames, -1, channels, width, height)) X_test = list(X_test.reshape(frames, -1, channels, width, height)) image_shape = (channels, width, height) # Create model model = functional_model(image_shape, frames) model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer="adam") #SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
def experiment_clfs(df, target_col, unused_cols, numeric_cols, clfs, model_lst, grid, test_length=6, is_temporal=True, draw=True, table=True): ''' Experiment with different parameters for classifiers. Loop through each model and evaluate correspondingly. Inputs: df: dataframe (joint table) target_col: (numpy array) target variable unused_cols: (numpy array) unused varaibles in df numeric_cols: (numpy array) numerical variables in df clfs: (dictionary) classifiers from create_clfs_params() function model_lst: (list of strings) model names to use grid: (dictionary) grid from create_clfs_params() function test_length: (positive int) testing window (unit=month) is_temporal: (bool) True if use temporal validation to split data; False if use random split draw: (bool) True if plot PR curve for each variable table: (bool) True if output evaluation results Outputs: PR-curves classifiers_eval.csv: csv file that stores evaluation results ''' output_cols = ('model', 'parameters', 'train_time', 'test_time', 'accuracy', 'F1_score', 'auc', 'p@1', 'p@2', 'p@5', 'p@10', 'p@20', 'p@30', 'p@50', 'r@1', 'r@2', 'r@5', 'r@10', 'r@20', 'r@30', 'r@50') output_df = pd.DataFrame(columns=output_cols) if is_temporal: x_train, x_test, y_train, y_test = utils.split_data( df, target_col, unused_cols, test_length) if x_train is None and x_test is None and y_train is None and y_test is None: print( "Temporal split failed. Switch to random split at test size=30%." ) x_train, x_test, y_train, y_test = utils.random_split( df, target_col, unused_cols) else: x_train, x_test, y_train, y_test = utils.random_split( df, target_col, unused_cols) #discretize numeric cols: x_train, x_test = preprocess.discretize(x_train, x_test, numeric_cols) clf_lst = [clfs[x] for x in model_lst] for i, clf in enumerate(clf_lst): print(model_lst[i]) params = grid[model_lst[i]] for p in ParameterGrid(params): try: model = clf.set_params(**p) start_train = time.time() model.fit(x_train, y_train) end_train = time.time() train_time = end_train - start_train start_test = time.time() y_pred = model.predict(x_test) end_test = time.time() test_time = end_test - start_test y_pred_probs = model.predict_proba(x_test)[:, 1] scores = evaluate(y_pred, y_pred_probs, y_test) index = len(output_df) output_df.loc[index] = [ model_lst[i], p, train_time, test_time, scores['accuracy'], scores['F1_score'], scores['auc'], scores['p@1'], scores['p@2'], scores['p@5'], scores['p@10'], scores['p@20'], scores['p@30'], scores['p@50'], scores['r@1'], scores['r@2'], scores['r@5'], scores['r@10'], scores['r@20'], scores['r@30'], scores['r@50'] ] if draw: model_name = model_lst[i] + str(index) plot_pr_curve(y_test, y_pred_probs, model_name, p) index += 1 except Exception as e: print(e) pass print("1 classifier completed.") if table: output_df.to_csv('eval_results/classifiers_eval.csv') return output_df
def load_numpy_states(train_idx=None, test_idx=None): X = load_numpy()['states'] if train_idx is None: train_idx, test_idx = random_split(np.arange(X.shape[0])) return X[], X_test
def test_randsplit(): l = range(40) sample_size = 5 reminder, sample = utils.random_split(l, sample_size) print("reminder = {0}".format(reminder)) print("sample = {0}".format(sample))
sfsnet_model.load_state_dict(torch.load("/home/hd8t/xiangyu.yin/results/metadata/checkpoints/Skip_First.pkl")) face = [] name = [] for img in glob.glob(Celeb_path + "*.png"): n_suffix = img.split('/')[-1] face.append(img) name.append(n_suffix.split('.')[0]) datasize = len(face) validation_count = int(2 * datasize / 100) train_count = datasize - validation_count transform = transforms.Compose([ transforms.Resize(Size_for_Image), transforms.ToTensor() ]) full_dataset = CelebDataset(face, name, transform) train_dataset, val_dataset = random_split(full_dataset, [train_count, validation_count]) celeb_dl = DataLoader(train_dataset, batch_size=1, shuffle=True) wandb.init(tensorboard=True) for bix, data in enumerate(celeb_dl): fa, na = data na = na[0] predicted_normal, predicted_albedo, predicted_sh, predicted_shading, out_recon = sfsnet_model(fa) print('Consecute the {}th face'.format(bix)) if bix % 10 == 0: print(predicted_albedo) print(predicted_shading) out_celeb = out_dir + 'Celeb' + str(bix) if not os.path.exists(out_celeb): os.system("mkdir " + out_celeb) out_celeb += "/" wandb_log_images(wandb, predicted_normal, None, suffix + "Predicted Normal", \
input_dim=frames, input_length=128, return_sequences=False, activation="tanh")(reshape) # dropout1 = Dropout(0.5)(lstm) dense1 = Dense(512, activation="relu")(lstm) # dropout2 = Dropout(0.5)(dense1) prediction = Dense(1, activation="sigmoid")(dense1) return Model(input=inputs, output=prediction) # Load data from utils import random_split X_train, X_test, y_train, y_test = random_split("images/", 32, 7) _, frames, channels, width, height = np.shape(X_train) # Reshape to match CNN shapes X_train = list(X_train.reshape(frames, -1, channels, width, height)) X_test = list(X_test.reshape(frames, -1, channels, width, height)) image_shape = (channels, width, height) # Create model model = functional_model(image_shape, frames) model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer="adam") #SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)