def __init__(self, freq_bins, classes_num, emb_layers, hidden_units, drop_rate): super(DecisionLevelSingleAttention, self).__init__() self.emb = EmbeddingLayers( freq_bins=freq_bins, emb_layers=emb_layers, hidden_units=hidden_units, drop_rate=drop_rate) self.attention = Attention( n_in=hidden_units, n_out=classes_num) self.param_count = count_parameters(self) print(self.param_count)
gcn_flag=True) #model_test = ChebNet(block_dura, filters, Nlabels, K=5,gcn_layer=num_layers,dropout=0.25) model_test = model_test.to(device) # print('type of adj ',type(adj_mat)) # adj_mat = sparse_mx_to_torch_sparse_tensor(adj_mat) # adj_mat = torch.FloatTensor(adj_mat) # print(adj_mat,'\n',type(adj_mat),np.array(adj_mat).shape) # print('adj_mat of stack ',adj_mat,np.array(adj_mat).shape) adj_mat = torch.stack(adj_mat) # adj_mat = torch.stack([torch.Tensor(adj_mat[ii]) for ii in range(len(adj_mat))]) adj_mat = adj_mat.to(device) loss_func = nn.CrossEntropyLoss() num_epochs = 10 print(model_test) print("{} paramters to be trained in the model\n".format( count_parameters(model_test))) optimizer = optim.Adam(model_test.parameters(), lr=0.001, weight_decay=5e-4) model_fit_evaluate(model_test, adj_mat[1], device, train_loader, test_loader, optimizer, loss_func, num_epochs) ## ChebNet model_test = ChebNet(block_dura, filters, Nlabels, K=5, gcn_layer=num_layers, dropout=0.25) model_test = model_test.to(device) print(model_test) print("{} paramters to be trained in the model\n".format( count_parameters(model_test)))
def build_model(features, labels, training, train_batches): # Generate a summary node for the images tf.summary.image('images', features, max_outputs=6) model = MyModel(FLAGS.net, weight_decay=FLAGS.weight_decay) logits, end_points = model(features, training) predictions = { 'classes': tf.argmax(logits, axis=1), 'top_3': tf.nn.top_k(logits, k=3)[1] } onehot_labels = tf.one_hot(labels, depth=_NUM_CLASSES) loss_weights = tf.cond( training, lambda: tf.reduce_sum( onehot_labels * get_class_weights(), axis=1, keepdims=True), lambda: tf.constant(1.0, dtype=tf.float32)) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=logits) loss = tf.reduce_mean(loss_weights * cross_entropy) if 'AuxLogits' in end_points: aux_cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=end_points['AuxLogits']) loss += 0.4 * tf.reduce_mean(loss_weights * aux_cross_entropy) # Create a tensor named cross_entropy for logging purposes. tf.summary.scalar('optimization/loss', loss) global_step = tf.train.get_or_create_global_step() if FLAGS.lr_decay_rule == 'exp': learning_rate = tf.train.exponential_decay( learning_rate=FLAGS.learning_rate, global_step=global_step, decay_steps=train_batches * 10, decay_rate=0.5, staircase=True) elif FLAGS.lr_decay_rule == 'step': learning_rate = learning_rate_with_decay( batches_per_epoch=train_batches, boundary_epochs=[8, 20, 30, 40], decay_rates=[1, 0.1, 0.01, 0.001, 1e-4])(global_step) elif FLAGS.lr_decay_rule == 'kar': current_epoch = tf.ceil(global_step / train_batches) learning_rate = tf.cond( current_epoch <= 2, lambda: FLAGS.learning_rate, lambda: FLAGS.learning_rate * tf.pow( 0.8, tf.to_float(current_epoch - 2))) else: learning_rate = FLAGS.learning_rate tf.summary.scalar('optimization/learning_rate', learning_rate) if FLAGS.optimizer == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=1.0) elif FLAGS.optimizer == 'rmsprop': optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate) else: optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) ################## Update all params ##################### variables_to_train = _get_variables_to_train(FLAGS.trainable_scopes) count_parameters(variables_to_train) gradients = tf.gradients(loss, variables_to_train) grad_updates = optimizer.apply_gradients(zip(gradients, variables_to_train), global_step=global_step, name='train_op') train_op = tf.group(grad_updates, update_ops) ################## Evaluation ########################### top_1_acc, update_top_1 = tf.metrics.accuracy(labels, predictions['classes'], name='metrics') top_3_acc, update_top_3 = tf.metrics.mean(tf.nn.in_top_k( predictions=logits, targets=labels, k=3), name='metrics') running_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="metrics") metrics_init = tf.variables_initializer(var_list=running_vars) metrics_update = tf.group([update_top_1, update_top_3]) top_1_error = 1.0 - top_1_acc top_3_error = 1.0 - top_3_acc metrics = { 'init': metrics_init, 'update': metrics_update, 'top_1_error': top_1_error, 'top_3_error': top_3_error } tf.summary.scalar('metrics/top_1_error', top_1_error) tf.summary.scalar('metrics/top_3_error', top_3_error) return train_op, loss, predictions, metrics
def train(config, data_provider, p): # skip training if test mode if not config.do_train: log_to_file('Skip train', 'Not enabled training') return device = config.device log_to_file('Device', device) # log pytorch version log_to_file('PyTorch version', torch.__version__) # prepare model log_to_file('based on base_model #', p) for i in range(config.model_count): log_to_file('begin training model #', i) model = Model(config) weight_initial(model, config) model.to(device) # state_dict = torch.load(config.model_base_path(p)) # model = Model(config) # model.load_state_dict(state_dict) # model.to(device) # log param count log_to_file('Trainable params count', count_parameters(model)) print(model.parameters()) # exit() # OPTIMIZER optimizer = optim.SGD(model.parameters(), lr=config.start_lr) log_to_file("Optimizer", "SGD") # call backs scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, threshold=config.loss_delta, patience=4, cooldown=4, verbose=True, min_lr=config.min_lr, factor=0.2) model_check_callback = ModelCheckPointCallBack( model, config.model_save_path(p * config.model_count + i), period=1, delta=config.loss_delta, ) early_stop_callback = EarlyStopCallBack(patience=25, delta=config.loss_delta) # some vars epoch_loss = 0 validation_loss = 0 data_provider.new_epoch() # reset data provider # output the validation dataset # val_data_path = os.path.join(config.working_dir, 'val_data_{}.csv'.format(p*config.model_count+i)) # val_df=pd.DataFrame(data_provider.validation_samples[i],columns=["hla_a","peptide", "ic50"]) # val_df.to_csv(val_data_path, sep=',',header=True,index=True) steps = data_provider.train_steps() log_to_file('Start training1', datetime.datetime.now()) for epoch in range(config.epochs): epoch_start_time = datetime.datetime.now() # train batches print(steps) model.train(True) for _ in range(steps): data = data_provider.batch_train(i) print("***") loss = batch_train(model, device, data, config) print("loss:", loss) # exit() loss.backward() # clip grads nn.utils.clip_grad_value_(model.parameters(), config.grad_clip) # update params optimizer.step() # record loss epoch_loss += loss.item() # reset grad optimizer.zero_grad() # time compute time_delta = datetime.datetime.now() - epoch_start_time # validation on epoch end model.eval() # print(data_provider.val_steps()) # print(data_provider.batch_index_val) # validation_call val_sample = [] val_pred = [] for _ in range(data_provider.val_steps()): data = data_provider.batch_val(i) t_loss, t_pred, t_samples = batch_validation( model, device, data, config) val_sample.append(t_samples) val_pred.append(t_pred) validation_loss += t_loss # log log_to_file( "Training process", "[base_model{0:1d}]-[model{1:1d}]-[Epoch {2:04d}] - time: {3:4d} s, train_loss: {4:0.5f}, val_loss: {5:0.5f}" .format(p, i, epoch, time_delta.seconds, epoch_loss / steps, validation_loss / data_provider.val_steps())) # call back model_check_callback.check( epoch, validation_loss / data_provider.val_steps()) if early_stop_callback.check( epoch, validation_loss / data_provider.val_steps()): break # LR schedule scheduler.step(loss.item()) # reset loss epoch_loss = 0 validation_loss = 0 # reset data provider data_provider.new_epoch() # save last epoch model torch.save( model.state_dict(), os.path.join( config.working_dir, 'last_epoch_model_{}.pytorch'.format(p * config.model_count + i))) #validation_call val_path = os.path.join( config.working_dir, 'val_result_{}.csv'.format(p * config.model_count + i)) val_temp_list = [] for ii in range(len(val_sample)): for jj in range(len(val_sample[ii])): temp = { "hla_a": val_sample[ii][jj][0], "peptide": val_sample[ii][jj][1], "ic50": val_sample[ii][jj][2], "pred_ic50": val_pred[ii][jj] } val_temp_list.append(temp) val_df = pd.DataFrame(val_temp_list) val_df["up_ic50"] = 50000**val_df["ic50"] val_df["up_pred_ic50"] = 50000**val_df["pred_ic50"] val_df["binding"] = val_df["up_ic50"].apply(lambda x: 1 if x < 500 else 0) val_df["pred_binding"] = val_df["up_pred_ic50"].apply( lambda x: 1 if x < 500 else 0)
def testChebNet(ERP_Matrix,adj_mat,subTrial): acc = [] loss = [] task_contrasts = {"HC": "Healthy_Person", "PD":"Parkinson_Person" } params = {'batch_size': 2, 'shuffle': True, 'num_workers': 1} target_name = (list(task_contrasts.values())) Nlabels = len(target_name) Region_Num = np.array(ERP_Matrix)[0].shape[-1] print(Region_Num) block_dura = 64 test_size = 0.2 randomseed=1234 test_sub_num = len(ERP_Matrix) print('test_sub_num ',test_sub_num) rs = np.random.RandomState(randomseed) train_sid, test_sid = train_test_split(range(test_sub_num), test_size=test_size, random_state=rs, shuffle=True) print('training on %d subjects, validating on %d subjects' % (len(train_sid), len(test_sid))) ####train set fmri_data_train = [ERP_Matrix[i] for i in train_sid] label_data_train = pd.DataFrame(np.array([subTrial[i] for i in train_sid])) # print(type(label_data_train),'\n',label_data_train) ERP_train_dataset = ERP_matrix_datasets(fmri_data_train, label_data_train, target_name, block_dura=700, isTrain='train') train_loader = DataLoader(ERP_train_dataset, collate_fn=ERP_samples_collate_fn, **params) ####test set fmri_data_test = [ERP_Matrix[i] for i in test_sid] label_data_test = pd.DataFrame(np.array([subTrial[i] for i in test_sid])) # print(type(label_data_test),'\n',label_data_test) ERP_test_dataset = ERP_matrix_datasets(fmri_data_test, label_data_test, target_name, block_dura=700, isTrain='test') test_loader = DataLoader(ERP_test_dataset, collate_fn=ERP_samples_collate_fn, **params) ## ChebNet from model import ChebNet from model import count_parameters, model_fit_evaluate filters=32 num_layers=2 # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") device = torch.device("cpu") ## 1stGCN model_test = ChebNet(block_dura, filters, Nlabels, gcn_layer=num_layers,dropout=0.25,gcn_flag=True) model_test = model_test.to(device) adj_mat = torch.stack(adj_mat) adj_mat = adj_mat.to(device) loss_func = nn.CrossEntropyLoss() num_epochs=15 print(model_test) print("{} paramters to be trained in the model\n".format(count_parameters(model_test))) optimizer = optim.Adam(model_test.parameters(),lr=0.001, weight_decay=5e-4) model_fit_evaluate(model_test,adj_mat[0],device,train_loader,test_loader,optimizer,loss_func,num_epochs) ## ChebNet model_test = ChebNet(block_dura, filters, Nlabels, K=5,gcn_layer=num_layers,dropout=0.25) model_test = model_test.to(device) print(model_test) print("{} paramters to be trained in the model\n".format(count_parameters(model_test))) optimizer = optim.Adam(model_test.parameters(),lr=0.001, weight_decay=5e-4) model_fit_evaluate(model_test,adj_mat[0],device,train_loader,test_loader,optimizer,loss_func,num_epochs) return acc,loss
def train(rundir, model_name, epochs, learning_rate, augment, weight_decay, patience, factor, use_gpu): train_loader, valid_loader, test_loader = load_data(augment, use_gpu) writer = SummaryWriter(rundir) model = NetFactory.createNet(model_name) print("total parameters: ", count_parameters(model)) print("trainable parameters: ", count_trainable_parameters(model)) if use_gpu: model = model.cuda() optimizer = torch.optim.Adam(model.parameters(), learning_rate, weight_decay=weight_decay) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=patience, factor=factor, threshold=1e-4) best_val_loss = float('inf') start_time = datetime.now() for epoch in range(epochs): change = datetime.now() - start_time print('starting epoch {}. time passed: {}'.format( epoch + 1, str(change))) train_loss, train_auc, _, _ = run_model(writer, epoch, model, train_loader, train=True, optimizer=optimizer) print(f'train loss: {train_loss:0.4f}') print(f'train AUC: {train_auc:0.4f}') val_loss, val_auc, _, _ = run_model(writer, epoch, model, valid_loader) print(f'valid loss: {val_loss:0.4f}') print(f'valid AUC: {val_auc:0.4f}') scheduler.step(val_loss) if val_loss < best_val_loss: best_val_loss = val_loss file_name = f'{model_name}_val{val_loss:0.4f}_train{train_loss:0.4f}_epoch{epoch+1}' save_path = Path(rundir) / 'models' / file_name torch.save(model.state_dict(), save_path) save_path2 = Path(rundir) / 'models' / 'bestmodel' torch.save(model.state_dict(), save_path2) lr = get_learning_rate(optimizer) if len(lr) > 0: writer.add_scalar('data/learing_rate', lr[0], epoch) writer.add_scalar('data/train_loss', train_loss, epoch) writer.add_scalar('data/train_auc', train_auc, epoch) writer.add_scalar('data/val_loss', val_loss, epoch) writer.add_scalar('data/val_auc', val_auc, epoch) writer.export_scalars_to_json(rundir + "/loss_auc.json") writer.close()