def retrain(params): # load_data train_trial_list = \ [x for x in TRIAL_LIST if x != params['outer_f']] sb_n = params['sb_n'] train_loader = pre.load_data_cnn(DATA_PATH, sb_n, train_trial_list, params['batch_size']) model = utils.Model() model.to(DEVICE) optimizer = getattr(torch.optim, params['optimizer'])(model.parameters(), lr=params['lr']) eng = utils.EngineTrain(model, optimizer, device=DEVICE) loss_params = pre.update_loss_params(params) loss_params['device'] = DEVICE print(loss_params) best_loss = params['best_loss'] for epoch in range(1, EPOCHS + 1): if 'annealing_step' in loss_params: loss_params['epoch_num'] = epoch train_loss = eng.re_train(train_loader, loss_params) print(f"epoch:{epoch}, train_loss:{train_loss}", f"best_loss_from_cv:{best_loss}") if train_loss < best_loss: break torch.save(model.state_dict(), params['saved_model']) return
def run_training(fold, params, save_model=False): df = pd.read_csv('../data/train_features.csv') df = df.drop(['cp_type', 'cp_time', 'cp_dose'], axis=1) targets_df = pd.read_csv('../data/train_target_folds.csv') features = df.drop('sig_id', axis=1).columns target_columns = targets_df.drop(['sig_id', 'kfold'], axis=1).columns df = df.merge(targets_df, on='sig_id', how='left') train_df = df[df.kfold != fold].reset_index(drop=True) valid_df = df[df.kfold != fold].reset_index(drop=True) xtrain = train_df[features].to_numpy() ytrain = train_df[target_columns].to_numpy() xvalid = valid_df[features].to_numpy() yvalid = valid_df[target_columns].to_numpy() train_dataset = utils.MoaDataset(features=xtrain, targets=ytrain) valid_dataset = utils.MoaDataset(features=xvalid, targets=yvalid) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1024, num_workers=8, shuffle=True) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=1024, num_workers=8) model = utils.Model(n_features=xtrain.shape[1], n_targets=ytrain.shape[1], n_layers=params['num_layers'], hidden_size=params['hidden_size'], dropout=params['dropout']) model.to(DEVICE) optimizer = torch.optim.Adam(model.parameters(), lr=params['learning_rate']) eng = utils.Engine(model, optimizer, device=DEVICE) best_loss = np.Inf early_stopping_iter = 10 early_stopping_counter = 0 for epoch in range(EPOCHS): train_loss = eng.train(train_loader) valid_loss = eng.evaluate(valid_loader) print(f'{fold}, {epoch}, {train_loss}, {valid_loss}') if valid_loss < best_loss: best_loss = valid_loss if save_model: torch.save(model.state_dict(), f'model_{fold}.bin') else: early_stopping_counter += 1 if early_stopping_counter > early_stopping_iter: break return best_loss
def get_model_chi(mol, param_path, run_path, model_name): """Consolidate the actual chi-getting process.""" # print('Param path: {}\nModel Name: {}'.format(param_path, model_name)) model = utils.Model(mol, param_path, run_path, model_name) model.make_fits() model.obs_sample() model.chiSq(mol) model.delete() return model.raw_chi
def train_data(dataset_dir, fraction): unclassified = {} model = defaultdict(Counter) topics = Counter() word_counter = Counter() doc_topic_counter = Counter() for topic in os.listdir(dataset_dir): if topic.startswith('.'): continue topic_dir = dataset_dir + "/" + topic for cur_file in os.listdir(topic_dir): file_path = topic_dir + "/" + cur_file words = utils.get_file_content(file_path) # Flip a coin to check if the program can see the classification of the document. if read_classification(fraction): topics[topic] += len(words) doc_topic_counter[topic] += 1 for word in words: model[topic][word] += 1 word_counter[word] += 1 else: topics[topic] += 0 doc_topic_counter[topic] += 0 if cur_file not in unclassified: unclassified[cur_file] = (words, 'None') else: new_name = cur_file + random_name() while new_name in unclassified: print("More duplicates found!") new_name = cur_file + random_name() unclassified[new_name] = (words, 'None') # Create our Model. model_obj = utils.Model(model, topics, word_counter, doc_topic_counter) # Iteratively classify unclassified documents till the loop ends or when there are no more # changes to the model. for i in range(12): print("Iteration %d" % i) model_obj, count_changed = train_unclassified_documents( unclassified, model_obj) if count_changed == 0: break # Return the object for serialization. return model_obj
def test(params): # load_data device = torch.device('cpu') test_trial = params['outer_f'] sb_n = params['sb_n'] # Load testing Data inputs, targets = pre.load_data_test_cnn(DATA_PATH, sb_n, test_trial) # Load trained model model = utils.Model() model.load_state_dict( torch.load(params['saved_model'], map_location=device)) model.eval() # Get Results outputs = model(inputs.to(device)).detach() # Load the Testing Engine eng = utils.EngineTest(outputs, targets) common_keys_for_update_results = ['sb_n', 'edl_used', 'outer_f'] dict_for_update_acc = \ {key: params[key] for key in common_keys_for_update_results} dict_for_update_R = copy.deepcopy(dict_for_update_acc) eng.update_result_acc(dict_for_update_acc) # Get the optimal activation function if EDL_USED == 0: dict_for_update_R['acti_fun'] = 'softmax' else: # Get from hyperparameter study core_path = f'study/ecnn{EDL_USED}/sb{sb_n}' study_path = "sqlite:///" + core_path + f"/t{test_trial}.db" loaded_study = optuna.load_study(study_name="STUDY", storage=study_path) temp_best_trial = loaded_study.best_trial dict_for_update_R['acti_fun'] = temp_best_trial.params['evi_fun'] print(dict_for_update_R) eng.update_result_R(dict_for_update_R) return
def run_training(fold, params, save_model=False): df = pd.read_csv("../Data/lish-moa/train_features.csv") df = df.drop(["cp_type", "cp_time", "cp_dose"], axis=1) targets_df = pd.read_csv( "/home/self-made-lol/Desktop/Mechanism_of_Actions/Data/lish-moa/train_tragets_fold.csv" ) features_columns = df.drop("sig_id", axis=1).columns target_columns = targets_df.drop(["sig_id", "kfold"], axis=1).columns df = df.merge(targets_df, on='sig_id', how='left') train_df = df[df.kfold != fold].reset_index(drop=True) valid_df = df[df.kfold == fold].reset_index(drop=True) xtrain = train_df[features_columns].to_numpy() ytrain = train_df[target_columns].to_numpy() xvalid = valid_df[features_columns].to_numpy() yvalid = valid_df[target_columns].to_numpy() train_dataset = utils.MoaDataset(features=xtrain, targets=ytrain) valid_dataset = utils.MoaDataset(features=xvalid, targets=yvalid) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1024, num_workers=8, shuffle=True) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=1024, num_workers=8) model = utils.Model( nfeatures=xtrain.shape[1], ntargets=ytrain.shape[1], nlayers=params["num_layers"], hidden_size=params["hidden_size"], dropout=params["dropout"], ) model.to(DEVICE) optimizer = torch.optim.Adam(model.parameters(), lr=params["learning_rate"]) eng = utils.Engine(model, optimizer, device=DEVICE) best_loss = np.inf early_stopping_iter = 10 early_stopping_counter = 0 for epoch in range(EPOCHS): train_loss = eng.train(train_loader) valid_loss = eng.evaluate(valid_loader) print(f"{fold}, {epoch}, {train_loss}, {valid_loss}") if valid_loss < best_loss: best_loss = valid_loss if save_model: torch.save(model.state_dict(), f"model_{fold}.bin") else: early_stopping_counter += 1 if early_stopping_counter > early_stopping_iter: break return best_loss
def run_training(fold, params, save_model): # load_data ''' temp_trial_list = [ x for x in TRIAL_LIST if x not in params['test_trial_list']] ''' o_f = params['outer_f'] # outer fold num temp_trial_list = [x for x in TRIAL_LIST if x != o_f] valid_trial_list = [temp_trial_list.pop(fold)] train_trial_list = temp_trial_list sb_n = params['sb_n'] train_loader = pre.load_data_cnn(DATA_PATH, sb_n, train_trial_list, params['batch_size']) valid_loader = pre.load_data_cnn(DATA_PATH, sb_n, valid_trial_list, params['batch_size']) trainloaders = { "train": train_loader, "val": valid_loader, } # Load Model model = utils.Model() model.to(DEVICE) # optimizer = torch.optim.Adam(model.parameters(), lr=0.01,amsgrad=True) optimizer = getattr(torch.optim, params['optimizer'])(model.parameters(), lr=params['lr']) eng = utils.EngineTrain(model, optimizer, device=DEVICE) loss_params = pre.update_loss_params(params) loss_params['device'] = DEVICE if save_model: prefix_path = f'model_innerloop/ecnn{EDL_USED}/' if not os.path.exists(prefix_path): os.makedirs(prefix_path) filename = f"sb{sb_n}_o{o_f}_i{fold}.pt" model_name = os.path.join(prefix_path, filename) best_loss = np.inf early_stopping_iter = 10 for epoch in range(1, EPOCHS + 1): if 'annealing_step' in loss_params: loss_params['epoch_num'] = epoch train_losses = eng.train(trainloaders, loss_params) train_loss = train_losses['train'] valid_loss = train_losses['val'] print(f"fold:{fold}, " f"epoch:{epoch}, " f"train_loss: {train_loss}, " f"valid_loss: {valid_loss}. ") if valid_loss < best_loss: best_loss = valid_loss early_stopping_counter = 0 if save_model: torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer': params['optimizer'], 'optimizer_state_dict': optimizer.state_dict(), 'train_loss': train_loss, 'valid_loss': valid_loss }, model_name) else: early_stopping_counter += 1 if early_stopping_counter > early_stopping_iter: break return best_loss
def run_training(fold, save_model=False): df = pd.read_csv("./input/train_features.csv") df = df.drop(["cp_time", "cp_dose", "cp_type"], axis=1) targets_df = pd.read_csv("./input/train_targets_folds.csv") feature_columns = df.drop("sig_id", axis=1).columns target_columns = targets_df.drop("sig_id", axis=1).columns df = df.merge(targets_df, on="sig_id", how="left") # print(df) train_df = df[df.kfold != fold].reset_index(drop=True) valid_df = df[df.kfold == fold].reset_index(drop=True) xtrain = train_df[feature_columns].to_numpy() ytrain = train_df[target_columns].to_numpy() xvalid = valid_df[feature_columns].to_numpy() yvalid = valid_df[target_columns].to_numpy() train_dataset = utils.MOADataset(features=xtrain, targets=ytrain) valid_dataset = utils.MOADataset(features=xvalid, targets=yvalid) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1024, num_workers=8, shuffle=True) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=1024, num_workers=8) model = utils.Model( nfeatures=xtrain.shape[1], ntargets=ytrain.shape[1], nlayers=2, hidden_size=128, dropout=0.3, ) model.to(DEVICE) optimizer = torch.optim.Adam(model.parameters(), lr=0.001) eng = utils.Engine(model, optimizer, device=DEVICE) best_loss = np.inf early_stopping_iter = 10 early_stopping_counter = 0 for epoch in range(EPOCHS): train_loss = eng.train(train_loader) valid_loss = eng.evaluate(valid_loader) print(f"{fold}, {epoch}, {train_loss}, {valid_loss}") if valid_loss < best_loss: best_loss = valid_loss if save_model: torch.save(model.state_dict(), f"model_{fold}.bin") else: early_stopping_counter += 1 if early_stopping_counter > early_stopping_iter: break
def run_training(fold, save_model=False): df = pd.read_csv( "/home/hasan/Data Set/Drug Classification/train_features.csv") df = df.drop(['cp_type', 'cp_time', 'cp_dose'], axis=1) targets_df = pd.read_csv("/home/hasan/spyder_code/train_targets_folds.csv") feature_columns = df.drop('sig_id', axis=1).columns target_columns = targets_df.drop(['sig_id', 'kfold'], axis=1).columns df = df.merge(targets_df, on='sig_id', how='left') train_df = df[df.kfold != fold].reset_index(drop=True) valid_df = df[df.kfold == fold].reset_index(drop=True) xtrain = train_df[feature_columns].to_numpy() ytrain = train_df[target_columns].to_numpy() xvalid = valid_df[feature_columns].to_numpy() yvalid = valid_df[target_columns].to_numpy() train_dataset = utils.MoaDataset(features=xtrain, targets=ytrain) valid_dataset = utils.MoaDataset(features=xvalid, targets=yvalid) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1024, num_workers=8, shuffle=True) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=1024, num_workers=8) model = utils.Model(nfeatures=xtrain.shape[1], ntargets=ytrain.shape[1], nlayers=2, hidden_size=128, dropout=0.3) # model.to(DEVICE) optimizer = torch.optim.Adam(model.parameters(), lr=0.001) eng = utils.Engine(model, optimizer, DEVICE) best_loss = np.inf early_stopping_iter = 10 early_stopping_counter = 0 for epoch in range(EPOCHS): train_loss = eng.train(train_loader) valid_loss = eng.evaluate(valid_loader) print(f"{fold}, {epoch}, {train_loss}, {valid_loss}") if valid_loss < best_loss: best_loss = valid_loss if save_model: torch.save(model.state_dict(), f'model_{fold}.bin') else: early_stopping_counter += 1 if early_stopping_counter > early_stopping_iter: break
hps = parser.parse_args() hps.n_labels = 10 hps.final_search = True if hps.final_search in ['True', 'true', '1' ] else False settings.init(hps) sess = tf.InteractiveSession() ### load dataset and create the model import utils data1, x_test, y_test, y_test_0 = utils.load_dataset(hps) conv_l, dense_l = utils.get_weights_conv(data1, hps) settings.init(hps) settings.init_layers(conv_l, dense_l) model = utils.Model(hps) if hps.p == 'linf': import FAB_linf elif hps.p == 'l2': import FAB_l2 elif hps.p == 'l1': import FAB_l1 ### run the attack in batches of size hps.bs for the first hps.im images of the test set if hps.dataset in ['cifar10']: y_test_0 = y_test_0[0] t1 = time.time() adv = np.zeros(x_test[:hps.im].shape) res = np.zeros([hps.im]) sp = 0 while sp < hps.im: if hps.p == 'linf': res[sp:sp + hps.bs], adv[sp:sp + hps.bs] = FAB_linf.FABattack_linf( model, x_test[sp:sp + hps.bs], y_test_0[sp:sp + hps.bs], sess,
def run_training(): if torch.cuda.is_available(): DEVICE = 'cuda' else: DEVICE = 'cpu' df_train = pd.read_csv(PATH + 'train_features.csv') targets = pd.read_csv(PATH + 'train_targets_scored.csv') utils.get_dummies(df_train, ['cp_type', 'cp_dose', 'cp_time']) sig_ids = df_train['sig_id'] df_train.drop('sig_id', axis=1, inplace=True) targets.drop('sig_id', axis=1, inplace=True) # TODO use unscored data for training as well X_train, X_val, y_train, y_val = train_test_split(df_train.values, targets.values, test_size=0.3, random_state=42) train_dataset = utils.ModelDataset(X_train, y_train) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4) val_dataset = utils.ModelDataset(X_val, y_val) val_loader = DataLoader(val_dataset, batch_size=1) model = utils.Model(X_train.shape[1], y_train.shape[1], num_layers, hidden_size) model.to(DEVICE) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1) engine = utils.Engine(model, optimizer, device=DEVICE) best_loss = np.inf early_stopping = 10 early_stopping_counter = 0 # TODO use optuns for trails for epoch in range(EPOCHS): train_loss = engine.train(train_loader) val_loss = engine.validate(val_loader) scheduler.step(val_loss) print(f'Epoch {epoch}, train_loss {train_loss}, val_loss {val_loss}') if val_loss < best_loss: best_loss = val_loss torch.save(model.state_dict(), '/models') else: early_stopping_counter += 1 if early_stopping_counter > early_stopping: break print(f'best loss {best_loss}') return best_loss