def train(args): # load and preprocess dataset #data = load_data(args) #data = CoraFull() #data = Coauthor('cs') #FIRST, CHECK DATASET path = './dataset/' + str(args.dataset) + '/' ''' edges = np.loadtxt(path + 'edges.txt') edges = edges.astype(int) features = np.loadtxt(path + 'features.txt') train_mask = np.loadtxt(path + 'train_mask.txt') train_mask = train_mask.astype(int) labels = np.loadtxt(path + 'labels.txt') labels = labels.astype(int) ''' edges = np.load(path + 'edges.npy') features = np.load(path + 'features.npy') train_mask = np.load(path + 'train_mask.npy') labels = np.load(path + 'labels.npy') num_edges = edges.shape[0] num_nodes = features.shape[0] num_feats = features.shape[1] n_classes = max(labels) - min(labels) + 1 assert train_mask.shape[0] == num_nodes print('dataset {}'.format(args.dataset)) print('# of edges : {}'.format(num_edges)) print('# of nodes : {}'.format(num_nodes)) print('# of features : {}'.format(num_feats)) features = torch.FloatTensor(features) labels = torch.LongTensor(labels) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(train_mask) else: train_mask = torch.ByteTensor(train_mask) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() u = edges[:, 0] v = edges[:, 1] #initialize a DGL graph g = DGLGraph() g.add_nodes(num_nodes) g.add_edges(u, v) # add self loop if isinstance(g, nx.classes.digraph.DiGraph): g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) elif isinstance(g, DGLGraph): g = transform.add_self_loop(g) n_edges = g.number_of_edges() # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.in_drop, args.attn_drop, args.negative_slope, args.residual) if args.early_stop: stopper = EarlyStopping(patience=100) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] record_time = 0 avg_run_time = 0 Used_memory = 0 for epoch in range(args.num_epochs): #print('epoch = ', epoch) #print('mem0 = {}'.format(mem0)) torch.cuda.synchronize() tf = time.time() model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) now_mem = torch.cuda.max_memory_allocated(0) print('now_mem : ', now_mem) Used_memory = max(now_mem, Used_memory) tf1 = time.time() optimizer.zero_grad() torch.cuda.synchronize() t1 = time.time() loss.backward() torch.cuda.synchronize() optimizer.step() t2 = time.time() run_time_this_epoch = t2 - tf if epoch >= 3: dur.append(time.time() - t0) record_time += 1 avg_run_time += run_time_this_epoch train_acc = accuracy(logits[train_mask], labels[train_mask]) #log for each step print( 'Epoch {:05d} | Time(s) {:.4f} | train_acc {:.6f} | Used_Memory {:.6f} mb' .format(epoch, run_time_this_epoch, train_acc, (now_mem * 1.0 / (1024**2)))) ''' if args.fastmode: val_acc = accuracy(logits[val_mask], labels[val_mask]) else: val_acc = evaluate(model, features, labels, val_mask) if args.early_stop: if stopper.step(val_acc, model): break print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |" " ValAcc /{:.4f} | ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(), train_acc, val_acc, n_edges / np.mean(dur) / 1000)) ''' if args.early_stop: model.load_state_dict(torch.load('es_checkpoint.pt')) #OUTPUT we need avg_run_time = avg_run_time * 1. / record_time Used_memory /= (1024**3) print('^^^{:6f}^^^{:6f}'.format(Used_memory, avg_run_time))
def train(self, setting): train_data, train_loader = self._get_data(split='train') vali_data, vali_loader = self._get_data(split='valid', scaler=train_data.scaler) test_data, test_loader = self._get_data(split='test', scaler=train_data.scaler) path = './checkpoints/' + setting if not os.path.exists(path): os.makedirs(path) time_now = time.time() train_steps = len(train_loader) early_stopping = EarlyStopping(patience=self.args.patience, verbose=True) model_optim = self._select_optimizer() criterion = self._select_criterion() train_loss_epochs = [] valid_loss_epochs = [] test_loss_epochs = [] for epoch in range(self.args.train_epochs): self.current_epoch = epoch iter_count = 0 train_loss = [] self.model.train() for i, batch in enumerate(train_loader): iter_count += 1 if 'UCR' in self.args.data_path: batch_x, batch_y = batch batch_x_mark, batch_y_mark = None, None else: batch_x, batch_y, batch_x_mark, batch_y_mark = batch batch_x_mark = batch_x_mark.double().to(self.device) batch_y_mark = batch_y_mark.double().to(self.device) model_optim.zero_grad() batch_x = batch_x.double().to(self.device) batch_y = batch_y.double().to(self.device) if self.args.model == 'HLInformer': dec_inp = torch.zeros_like( batch_y[:, -self.args.pred_len:, :]).double() #dec_inp = torch.cat([batch_x[:, -self.args.label_len:, :], dec_inp], dim=1).double().to(self.device) dec_inp_mark = batch_y_mark[:, :, 1:] #torch.cat([batch_x_mark[:, -self.args.label_len:, :], batch_y_mark[:,:,1:]], dim=1).double().to(self.device) outputs = self.model(batch_x, batch_x_mark, dec_inp, dec_inp_mark) batch_y = batch_y[:, -self.args.pred_len:, :].to(self.device) else: outputs = self.model(batch_x, batch_x_mark) if self.args.model[:2] == 'HL' and len( self.args.group_factors ) > 0 and 'UCR' not in self.args.data_path: batch_y = torch.cat( (batch_x[:, (-max(self.args.group_factors) + 1):, 0:1], batch_y), dim=1) if self.args.classification: batch_y = torch.tensor(batch_y, dtype=torch.long) loss = criterion(outputs, batch_y.squeeze()) train_loss.append(loss.item()) if (i + 1) % 100 == 0: print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format( i + 1, epoch + 1, loss.item())) speed = (time.time() - time_now) / iter_count left_time = speed * ( (self.args.train_epochs - epoch) * train_steps - i) print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format( speed, left_time)) iter_count = 0 time_now = time.time() loss.backward() model_optim.step() if self.args.plot_gradients: self.plot_grad_flow() train_loss = np.average(train_loss) vali_loss = self.validate(vali_loader, criterion, train_data) test_loss = self.validate(test_loader, criterion, train_data) train_loss_epochs.append(train_loss) valid_loss_epochs.append(vali_loss) test_loss_epochs.append(test_loss) print( "Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}" .format(epoch + 1, train_steps, train_loss, vali_loss, test_loss)) early_stopping(vali_loss, self.model, path) if early_stopping.early_stop: print("Early stopping") train_loss_epochs = np.array(train_loss_epochs) valid_loss_epochs = np.array(valid_loss_epochs) test_loss_epochs = np.array(test_loss_epochs) np.save(path + '/train_history.npy', train_loss_epochs) np.save(path + '/valid_history.npy', valid_loss_epochs) np.save(path + '/test_history.npy', test_loss_epochs) break best_model_path = path + '/' + 'checkpoint.pth'.format(epoch) self.model.load_state_dict(torch.load(best_model_path)) return self.model
class NARM: def __init__(self, sess, k, configs, tr_x, tr_y, val_x, val_y, te_x, te_y, num_items, init_way, logger): self.sess = sess self.configs = configs self.tr_x = tr_x self.tr_y = tr_y self.val_x = val_x self.val_y = val_y self.te_x = te_x self.te_y = te_y self.num_items = num_items self.logger = logger self.rnn_hidden_size = configs.rnn_hidden_size self.batch_size = configs.batch_size self.num_layers = configs.num_layers # Initialize the optimizer self.optimizer_type = configs.optimizer_type self.weight_decay = configs.weight_decay self.momentum = configs.momentum self.lr = configs.lr self.eps = configs.eps self.clip_grad = configs.clip_grad self.clip_grad_threshold = configs.clip_grad_threshold self.lr_decay_step = configs.lr_decay_step self.lr_decay = configs.lr_decay self.lr_decay_rate = configs.lr_decay_rate self.drop_prob_ho = configs.drop_prob_ho self.drop_prob_input = configs.drop_prob_input self.drop_prob_recurrent = configs.drop_prob_recurrent # etc self.k = k self.time_sort = configs.time_sort self.loss_type = configs.loss_type self.n_epochs = configs.n_epochs self.is_shuffle = configs.is_shuffle self.embedding_size = configs.embedding_size self.num_topics = configs.num_topics self.early_stop = EarlyStopping(configs.max_patience) # batch_iterator self.tr_sess_idx = np.arange(len(self.tr_y)) self.val_sess_idx = np.arange(len(self.val_y)) self.te_sess_idx = np.arange(len(self.te_y)) # record best epoch self.max_val_recall = [0 for _ in range(len(self.k))] self.max_te_recall = [0 for _ in range(len(self.k))] self.best_epoch = 0 tr_lengths = [len(s) for s in self.tr_x] val_lengths = [len(s) for s in self.val_x] te_lengths = [len(s) for s in self.te_x] tr_maxlen = np.max(tr_lengths) val_maxlen = np.max(val_lengths) te_maxlen = np.max(te_lengths) self.maxlen = np.max([tr_maxlen, val_maxlen, te_maxlen]) self.maxlen = None self.embed_init, self.weight_init, self.bias_init, self.gate_bias_init, self.kern_init = init_way def run(self): self.prepare_model() tf.global_variables_initializer().run() print("End of model prepare") for epoch in range(self.n_epochs): start_time = time.time() tr_pred_loss = self.train_model() val_pred_loss, val_recall_list, val_mrr_list = self.pred_evaluation( mode="valid") te_pred_loss, te_recall_list, te_mrr_list = self.pred_evaluation( mode="test") self.best_epoch, best_check = write_log( self.logger, epoch, tr_pred_loss, val_pred_loss, te_pred_loss, self.k, val_recall_list, val_mrr_list, te_recall_list, te_mrr_list, self.max_val_recall, self.max_te_recall, self.best_epoch, start_time) if self.early_stop.validate(val_recall_list[3]): self.logger.info("Training process is stopped early") break def prepare_model(self): self.rnn_x = tf.placeholder(tf.int32, [None, None], name='input') self.rnn_y = tf.placeholder(tf.int64, [None, self.num_items], name='output') self.mask = tf.placeholder(tf.float32, [None, None], name='mask') self.keep_prob_input = tf.placeholder(tf.float32, name='keep_prob_input') self.keep_prob_ho = tf.placeholder(tf.float32, name='keep_prob_ho') self.batch_var_length = tf.placeholder(tf.int32, name="variable_length") Wemb = tf.get_variable('Wemb', [self.num_items, self.embedding_size], initializer=self.embed_init) W_encoder = tf.get_variable( 'W_encoder', [self.rnn_hidden_size, self.rnn_hidden_size], initializer=self.weight_init) W_decoder = tf.get_variable( 'W_decoder', [self.rnn_hidden_size, self.rnn_hidden_size], initializer=self.weight_init) Bi_vector = tf.get_variable('Bi_vector', [1, self.rnn_hidden_size], initializer=self.weight_init) if self.loss_type == 'EMB': bili = tf.get_variable( 'bili', [self.embedding_size, 2 * self.rnn_hidden_size], initializer=self.weight_init) elif self.loss_type == "Trilinear": ws = tf.get_variable('ws', [self.embedding_size, self.embedding_size], initializer=self.weight_init) bs = tf.get_variable('bs', [self.embedding_size], initializer=self.bias_init) wt = tf.get_variable('wt', [self.embedding_size, self.embedding_size], initializer=self.weight_init) bt = tf.get_variable('bt', [self.embedding_size], initializer=self.bias_init) elif self.loss_type == "TOP1": W_top1 = tf.get_variable( 'W_top1', [2 * self.rnn_hidden_size, self.num_items], initializer=self.weight_init) b_top1 = tf.get_variable('b_top1', [1, self.num_items], initializer=self.bias_init) elif self.loss_type == "TOP1_variant": bili = tf.get_variable( 'bili', [self.embedding_size, 2 * self.rnn_hidden_size], initializer=self.weight_init) W_top1 = tf.get_variable( 'W_top1', [2 * self.rnn_hidden_size, self.num_items], initializer=self.weight_init) b_top1 = tf.get_variable('b_top1', [1, self.num_items], initializer=self.bias_init) emb = tf.nn.embedding_lookup(Wemb, self.rnn_x) emb = tf.nn.dropout(emb, self.keep_prob_input) custom_cell = tf.contrib.rnn.GRUCell(num_units=self.rnn_hidden_size) outputs, states = tf.nn.dynamic_rnn( custom_cell, emb, sequence_length=self.batch_var_length, dtype=tf.float32) self.outputs = outputs self.last_hidden = states # 512 x 100 outputs = tf.transpose(outputs, perm=[1, 0, 2]) # 19x512x100 squares = tf.map_fn(lambda x: compute_alpha( x, self.last_hidden, W_encoder, W_decoder, Bi_vector), outputs) # 19x512 weight = tf.nn.softmax(tf.transpose(squares) + 100000000. * (self.mask - 1), axis=1) # batch_size * max_len attention_proj = tf.reduce_sum(outputs * tf.transpose(weight)[:, :, None], axis=0) # num_items x 2*100 if self.loss_type == 'EMB': proj = tf.concat([attention_proj, states], 1) proj = tf.nn.dropout(proj, self.keep_prob_ho) ytem = tf.matmul(Wemb, bili) pred = tf.matmul(proj, tf.transpose(ytem)) self.pred = tf.nn.softmax(pred) self.cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=self.rnn_y)) elif self.loss_type == "Trilinear": hs = tf.nn.tanh(tf.matmul(attention_proj, ws) + bs) # batch * hidden ht = tf.nn.tanh(tf.matmul(states, wt) + bt) # batch * hidden pred = tf.nn.sigmoid( tf.matmul(tf.multiply(ht, hs), tf.transpose(Wemb))) # batch * n_item self.pred = tf.nn.softmax(pred) self.cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=self.rnn_y)) elif self.loss_type == "TOP1": proj = tf.concat([attention_proj, states], 1) proj = tf.nn.dropout(proj, self.keep_prob_ho) pred = tf.matmul(proj, W_top1) + b_top1 self.pred = tf.nn.tanh(pred) self.cost = loss_fn(self.rnn_y, self.pred, self.loss_type) elif self.loss_type == "TOP1_variant": proj = tf.concat([attention_proj, states], 1) proj = tf.nn.dropout(proj, self.keep_prob_ho) ytem = tf.matmul(Wemb, bili) pred = tf.matmul(proj, tf.transpose(ytem)) self.pred = tf.nn.tanh(pred) self.cost = loss_fn(self.rnn_y, self.pred, self.loss_type) self.optimizer = tf.train.AdamOptimizer(self.lr).minimize(self.cost) def train_model(self): if self.configs.is_shuffle: self.tr_sess_idx = np.random.permutation(self.tr_sess_idx) batch_loss_list = [] num_batch = math.ceil( np.float32(len(self.tr_sess_idx)) / self.batch_size) for batch_itr in range(int(num_batch)): start_itr = self.batch_size * batch_itr end_itr = np.minimum(self.batch_size * (batch_itr + 1), len(self.tr_sess_idx)) temp_batch_x = self.tr_x[self.tr_sess_idx[start_itr:end_itr]] temp_batch_y = self.tr_y[self.tr_sess_idx[start_itr:end_itr]] batch_x, batch_y, mask, labels, lengths = convert_batch_data( temp_batch_x, temp_batch_y, self.num_items, maxlen=None) temp_keep_prob_ho = 1.0 - self.drop_prob_ho temp_keep_prob_input = 1.0 - self.drop_prob_input feed_dict = { self.rnn_x: batch_x, self.rnn_y: batch_y, self.mask: mask, self.keep_prob_input: temp_keep_prob_input, self.keep_prob_ho: temp_keep_prob_ho, self.batch_var_length: lengths } _, pred_loss_, preds2 = self.sess.run( [self.optimizer, self.cost, self.pred], feed_dict=feed_dict) batch_loss_list.append(pred_loss_) return np.mean(batch_loss_list) def pred_evaluation(self, mode): if mode == "valid": sess_idx = self.val_sess_idx df_x = self.val_x df_y = self.val_y elif mode == "test": sess_idx = self.te_sess_idx df_x = self.te_x df_y = self.te_y batch_loss_list = [] recalls = [] mrrs = [] evaluation_point_count = [] for itr in range(len(self.k)): recalls.append(0) mrrs.append(0) evaluation_point_count.append(0) num_batch = math.ceil(np.float32(len(sess_idx)) / self.batch_size) for batch_itr in range(int(num_batch)): start_itr = self.batch_size * batch_itr end_itr = np.minimum(self.batch_size * (batch_itr + 1), len(sess_idx)) temp_batch_x = df_x[sess_idx[start_itr:end_itr]] temp_batch_y = df_y[sess_idx[start_itr:end_itr]] batch_x, batch_y, mask, labels, lengths = convert_batch_data( temp_batch_x, temp_batch_y, self.num_items, maxlen=None) feed_dict = { self.rnn_x: batch_x, self.rnn_y: batch_y, self.mask: mask, self.keep_prob_input: 1.0, self.keep_prob_ho: 1.0, self.batch_var_length: lengths } preds, pred_loss_ = self.sess.run([self.pred, self.cost], feed_dict=feed_dict) batch_loss_list.append(pred_loss_) recalls, mrrs, evaluation_point_count = evaluation( labels, preds, recalls, mrrs, evaluation_point_count, self.k) recall_list = [] mrr_list = [] for itr in range(len(self.k)): recall = np.asarray(recalls[itr], dtype=np.float32) / evaluation_point_count[itr] mrr = np.asarray(mrrs[itr], dtype=np.float32) / evaluation_point_count[itr] if self.max_val_recall[itr] < recall and mode == "valid": self.max_val_recall[itr] = recall if self.max_te_recall[itr] < recall and mode == "test": self.max_te_recall[itr] = recall recall_list.append(recall) mrr_list.append(mrr) return np.mean(batch_loss_list), recall_list, mrr_list
def train(name, run, folds_csv): wandb.init(project='dfdc', config=config_defaults, name=f'{name},val_fold:{VAL_FOLD},run{run}') config = wandb.config os.makedirs(OUTPUT_DIR, exist_ok=True) model = timm.create_model('xception', pretrained=True, num_classes=1) model.to(device) # model = DataParallel(model).to(device) wandb.watch(model) if config.optimizer == 'radam' : optimizer = torch_optimizer.RAdam(model.parameters(), lr=config.learning_rate, weight_decay = config.weight_decay) elif config.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, patience=config.schedule_patience, threshold=0.001, mode="min", factor = config.schedule_factor ) criterion = nn.BCEWithLogitsLoss() es = EarlyStopping(patience = 10, mode='min') data_train = CelebDF_Dataset(data_root=DATA_ROOT, mode='train', folds_csv=folds_csv, val_fold=VAL_FOLD, test_fold=TEST_FOLD, cutout_fill=config.cutout_fill, hardcore=False, random_erase=True, oversample_real=True, transforms=create_train_transforms(size=224)) data_train.reset(config.rand_seed) train_data_loader = DataLoader( data_train, batch_size=config.train_batch_size, num_workers=8, shuffle=True, drop_last=True) data_val = CelebDF_Dataset(data_root=DATA_ROOT, mode='val', folds_csv=folds_csv, val_fold=VAL_FOLD, test_fold=TEST_FOLD, hardcore=False, oversample_real=False, transforms=create_val_transforms(size=224)) data_val.reset(config.rand_seed) val_data_loader = DataLoader(data_val, batch_size=config.valid_batch_size, num_workers=8, shuffle=False, drop_last=True) data_test = CelebDF_Dataset(data_root=DATA_ROOT, mode='test', folds_csv=folds_csv, val_fold=VAL_FOLD, test_fold=TEST_FOLD, hardcore=False, oversample_real=False, transforms=create_val_transforms(size=224)) data_test.reset(config.rand_seed) test_data_loader = DataLoader(data_test, batch_size=config.valid_batch_size, num_workers=8, shuffle=False, drop_last=True) train_history = [] val_history = [] test_history = [] for epoch in range(config.epochs): print(f"Epoch = {epoch}/{config.epochs-1}") print("------------------") train_metrics = train_epoch(model, train_data_loader, optimizer, criterion, epoch) valid_metrics = valid_epoch(model, val_data_loader, criterion, epoch) scheduler.step(valid_metrics['valid_loss']) print(f"TRAIN_AUC = {train_metrics['train_auc']}, TRAIN_LOSS = {train_metrics['train_loss']}") print(f"VALID_AUC = {valid_metrics['valid_auc']}, VALID_LOSS = {valid_metrics['valid_loss']}") train_history.append(train_metrics) val_history.append(valid_metrics) es(valid_metrics['valid_loss'], model, model_path=os.path.join(OUTPUT_DIR,f"{name}_fold_{VAL_FOLD}_run_{run}.h5")) if es.early_stop: print("Early stopping") break model.load_state_dict(torch.load(f'weights/{name}_fold_{VAL_FOLD}_run_{run}.h5')) neptune.init('sowmen/dfdc') neptune.create_experiment(name=f'{name},val_fold:{VAL_FOLD},run{run}') test_history = test(model, test_data_loader, criterion) try: pkl.dump( train_history, open( f"train_history{name}{run}.pkl", "wb" ) ) pkl.dump( val_history, open( f"val_history{name}{run}.pkl", "wb" ) ) pkl.dump( test_history, open( f"test_history{name}{run}.pkl", "wb" ) ) except: print("Error pickling") wandb.save(f'weights/{name}_fold_{VAL_FOLD}_run_{run}.h5')
model = resuneta.model print('ResUnet-a compiled!') else: model = unet((rows, cols, channels)) #model.compile(optimizer=adam, loss=loss, metrics=['accuracy']) model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy']) # print model information model.summary() filepath = './models/' # define early stopping callback earlystop = EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=10, verbose=1, mode='min') checkpoint = ModelCheckpoint(filepath + 'unet_exp_' + str(exp) + '.h5', monitor='val_loss', verbose=1, save_best_only=True, mode='min') callbacks_list = [earlystop, checkpoint] # train the model start_training = time.time() model_info = model.fit(patches_tr_aug, patches_tr_ref_aug_h, batch_size=batch_size, epochs=10,
def private_dataset_train(args): device ='cuda' if args.gpu else 'cpu' # 用于初始化模型的部分 # 获得FEMNIST数据集! train_dataset,test_dataset = get_private_dataset_balanced(args) user_groups = FEMNIST_iid(train_dataset, args.user_number) models = {"2_layer_CNN": CNN_2layer_fc_model, # 字典的函数类型 "3_layer_CNN": CNN_3layer_fc_model} modelsindex = ["2_layer_CNN","3_layer_CNN"] if args.new_private_training: model_list,model_type_list = get_model_list(args.initialurl,modelsindex,models) #model_list,model_type_list = get_model_list('Src/EmptyModel',modelsindex,models) else: model_list,model_type_list = get_model_list(args.privateurl,modelsindex,models) #model_list,model_type_list = get_model_list('Src/EmptyModelFemnist',modelsindex,models) private_model_private_dataset_train_losses = [] private_model_private_dataset_validation_losses = [] for n, model in enumerate(model_list): print('train Local Model {} on Private Dataset'.format(n)) model.to(device) if args.optimizer == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.5) elif args.optimizer == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4) trainloader = DataLoader(DatasetSplit(train_dataset,list(user_groups[n])),batch_size=32,shuffle=True) testloader = DataLoader(test_dataset,batch_size=128, shuffle=True) criterion = nn.NLLLoss().to(device) train_epoch_losses = [] validation_epoch_losses = [] print('Begin Private Training') earlyStopping = EarlyStopping(patience=5,verbose=True,path='Src/EmptyModelFemnist/LocalModel{}Type{}.pkl'.format(n,model_type_list[n],args.privateepoch)) for epoch in range(args.privateepoch): model.train() train_batch_losses = [] for batch_idx, (images, labels) in enumerate(trainloader): images,labels = images.to(device),labels.to(device) optimizer.zero_grad() outputs = model(images) loss = criterion(outputs,labels) loss.backward() optimizer.step() if batch_idx % 5 ==0: print('Local Model {} Type {} Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( n,model_type_list[n],epoch + 1, batch_idx * len(images), len(trainloader.dataset), 100. * batch_idx / len(trainloader), loss.item())) train_batch_losses.append(loss.item()) loss_avg = sum(train_batch_losses)/len(train_batch_losses) train_epoch_losses.append(loss_avg) model.eval() val_batch_losses = [] for batch_idx, (images, labels) in enumerate(testloader): images,labels = images.to(device),labels.to(device) outputs = model(images) loss = criterion(outputs,labels) if batch_idx % 5 ==0: print('Local Model {} Type {} Val Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( n,model_type_list[n],epoch + 1, batch_idx * len(images), len(testloader.dataset), 100. * batch_idx / len(testloader), loss.item())) val_batch_losses.append(loss.item()) loss_avg = sum(val_batch_losses)/len(val_batch_losses) validation_epoch_losses.append(loss_avg) earlyStopping(loss_avg, model) if earlyStopping.early_stop: print("Early stopping") break # torch.save(model.state_dict(),'Src/PrivateModel/LocalModel{}Type{}.pkl'.format(n,model_type_list[n],args.privateepoch)) private_model_private_dataset_train_losses.append(train_epoch_losses) private_model_private_dataset_validation_losses.append(validation_epoch_losses) plt.figure() for i,val in enumerate(private_model_private_dataset_train_losses): print(val) plt.plot(range(len(val)),val,label='model :'+str(i)) plt.legend(loc='best') plt.title('private_model_private_dataset_train_demo_losses') plt.xlabel('epoches') plt.ylabel('Train loss') x_major_locator = MultipleLocator(1)# 把x轴的刻度间隔设置为1,并存在变量里 ax = plt.gca()# ax为两条坐标轴的实例 ax.xaxis.set_major_locator(x_major_locator)# 把x轴的主刻度设置为1的倍数 plt.xlim(0, args.privateepoch) plt.savefig('Src/Figure/private_model_private_dataset_train_demo_losses.png') plt.show() plt.figure() for i, val in enumerate(private_model_private_dataset_validation_losses): print(val) plt.plot(range(len(val)), val, label='model :' + str(i)) plt.legend(loc='best') plt.title('private_model_private_dataset_validation_demo_losses') plt.xlabel('epoches') plt.ylabel('Validation loss') x_major_locator = MultipleLocator(1) # 把x轴的刻度间隔设置为1,并存在变量里 ax = plt.gca() # ax为两条坐标轴的实例 ax.xaxis.set_major_locator(x_major_locator) # 把x轴的主刻度设置为1的倍数 plt.xlim(0, args.privateepoch) plt.savefig('Src/Figure/private_model_private_dataset_validation_demo_losses.png') plt.show() print('End Private Training')
def train(self): from datetime import datetime current_time = datetime.now().strftime('%b%d_%H-%M-%S') task = self.args.task tb_writer = SummaryWriter(log_dir='./runs/' + task + "/" + current_time + self.args.prefix, comment=self.args.prefix) vocabs, lexical_mapping = self._build_model() train_data = DataLoader(self.args, vocabs, lexical_mapping, self.args.train_data, self.args.batch_size, for_train=True) dev_data = DataLoader(self.args, vocabs, lexical_mapping, self.args.dev_data, self.args.batch_size, for_train=False) test_data = DataLoader(self.args, vocabs, lexical_mapping, self.args.test_data, self.args.batch_size, for_train='Eval') train_data.set_unk_rate(self.args.unk_rate) # WRITE PARAMETERS with open('./' + 'param' + '.txt', 'w') as f: for name, param in self.model.named_parameters(): f.writelines('name:' + name + "\n") f.writelines(str(param)) f.writelines('size:' + str(param.size()) + '\n') no_decay = ['bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay) ], 'weight_decay': 0. }, { 'params': [ p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay) ], 'weight_decay': 0.0 }] gradient_accumulation_steps = 1 t_total = len( train_data) // gradient_accumulation_steps * self.args.epochs optimizer = AdamW(optimizer_grouped_parameters, lr=self.args.lr, eps=self.args.adam_epsilon) scheduler = WarmupLinearSchedule(optimizer, warmup_steps=self.args.warmup_steps, t_total=t_total) self.model.zero_grad() set_seed(42, self.args.gpus) batches_acm, loss_acm = 0, 0 # Train! logger.info("***** Running training *****") logger.info(" Task: %s", self.args.task) logger.info(" Num examples = %d", len(train_data)) logger.info(" Num Epochs = %d", self.args.epochs) logger.info(" Total optimization steps = %d", t_total) logger.info(" Running Language Model = %s", self.args.lm_model) logger.info(" Running Model = %s", self.args.encoder_type) best_acc = 0 best_model_wts = copy.deepcopy(self.model.state_dict()) total_steps = 0 train_iterator = trange(int(self.args.epochs), desc="Epoch") # initialize the early_stopping object early_stopping = EarlyStopping(patience=self.args.patience, verbose=True) for _ in train_iterator: epoch_iterator = tqdm(train_data, desc="Iteration") running_loss = 0.0 running_corrects = 0 batch_count = self.args.batch_multiplier # Turn on the train mode for step, batch in enumerate(epoch_iterator): self.model.train() batch = move_to_cuda(batch, self.device) logits, labels, ans_ids = self.model(batch, train=True) logits_for_pred = logits.clone().detach() loss = self.criterion(logits, labels) loss_value = loss.item() pred_values, pred_indices = torch.max(logits_for_pred, 1) labels = labels.tolist() pred = pred_indices.tolist() corrects = [i for i, j in zip(labels, pred) if i == j] # Statistics running_loss += loss.item() running_corrects += len(corrects) if batch_count == 0: torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0) optimizer.step() scheduler.step() total_steps += 1 optimizer.zero_grad() self.model.zero_grad() batch_count = self.args.batch_multiplier loss_acm += loss_value loss.backward() batch_count -= 1 if (batches_acm % (self.args.batch_multiplier * self.args.batch_size) == 0) & (batches_acm != 0) & (step != 0): logger.info( 'Train Epoch %d, Batch %d, loss %.3f, Accuracy %.3f', _, batches_acm, loss_acm / batches_acm, running_corrects / (self.args.batch_size * step)) tb_writer.add_scalar('Training_loss', loss_acm / batches_acm, batches_acm) tb_writer.add_scalar( 'Training_Accuracy', running_corrects / (self.args.batch_size * step)) torch.cuda.empty_cache() batches_acm += 1 epoch_loss = running_loss / batches_acm epoch_acc = running_corrects / len(train_data) print('{} Loss: {:.4f} Acc: {:.4f}'.format(_, epoch_loss, epoch_acc)) tb_writer.add_scalar('Training_Epoch_loss', epoch_loss, _) tb_writer.add_scalar('Training_Epoch_Accuracy', epoch_acc, _) # Evaluate on Development Set eval_epoch_acc, eval_epoch_loss = self._run_evaluate( dev_data, _, write_answer=False) print('Overall_Dev Acc: {:.4f}'.format(eval_epoch_acc)) tb_writer.add_scalar('Dev_Epoch_Accuracy', eval_epoch_acc, _) ################################## # Evaluate on Test Set test_epoch_acc, test_epoch_loss = self._run_evaluate( test_data, _, write_answer=True) print('Overall_Test Acc: {:.4f}'.format(test_epoch_acc)) tb_writer.add_scalar('Test_Epoch_Accuracy', test_epoch_acc, _) # Save only best accuracy model on dev set if eval_epoch_acc > best_acc: best_acc = eval_epoch_acc best_model_wts = copy.deepcopy(self.model.state_dict()) # early_stopping needs the validation loss to check if it has decresed, # and if it has, it will make a checkpoint of the current model early_stopping(epoch_acc, self.model) if early_stopping.early_stop: print("Early stopping") break self.model.train() logger.info('Best val Acc: {:4f}'.format(best_acc)) torch.save( { 'args': self.save_args, 'model': best_model_wts }, '%s/epoch%d_batch%d_model_best_%s' % (self.args.ckpt, self.args.epochs, batches_acm, self.args.prefix))
def train(config): loss_full = [] exhaustion_count = 0 step = 0 config.step = step writer = None start_time = time.time() if config.log_tensorboard: writer = SummaryWriter( log_dir= f"{config.train_dir}/runs/{config.model_name}/{config.turn}-{datetime.datetime.now().replace(microsecond=0).isoformat()}{'-' + os.environ['REMARK'] if 'REMARK' in os.environ else ''}" ) model = get_model(config) optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate) early_stopping = EarlyStopping() early_stopping_loss = EarlyStopping(patience=200) model, _ = restore_checkpoint(config, model, optimizer, early_stopping) step = config.step dataset = get_train_dataset(config) for epoch in range(config.epochs): print(f'epoch {epoch}/{config.epochs}') dataloader = iter( DataLoader(dataset, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers, drop_last=True, pin_memory=config.pin_memory)) total_batches = int(len(dataset) / config.batch_size) with tqdm(total=total_batches, desc=f'Training epoch {epoch}/{config.epochs}') as pbar: for i in range(1, total_batches + 1): model.train() try: minibatch = next(dataloader) except StopIteration: exhaustion_count += 1 tqdm.write( f"Training data exhausted for {exhaustion_count} times after {i} batches, reuse the dataset." ) dataloader = iter( DataLoader(dataset, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers, drop_last=True, pin_memory=config.pin_memory)) minibatch = next(dataloader) step += 1 if config.model_name == 'LSTUR': y_pred = model(minibatch["user"], minibatch["clicked_news_length"], minibatch["candidate_news"], minibatch["clicked_news"]) elif config.model_name == 'HiFiArk': y_pred, regularizer_loss = model( minibatch["candidate_news"], minibatch["clicked_news"]) elif config.model_name == 'TANR': y_pred, topic_classification_loss = model( minibatch["candidate_news"], minibatch["clicked_news"]) elif config.model_name.startswith('DM'): y_pred = model(minibatch) else: y_pred = model(minibatch["candidate_news"], minibatch["clicked_news"]) loss = torch.stack( [x[0] for x in -F.log_softmax(y_pred, dim=1)]).mean() if config.model_name == 'HiFiArk': if i % config.num_iters_show_loss == 0: if config.log_tensorboard: writer.add_scalar('Train/BaseLoss', loss.item(), step) writer.add_scalar('Train/RegularizerLoss', regularizer_loss.item(), step) writer.add_scalar( 'Train/RegularizerBaseRatio', regularizer_loss.item() / loss.item(), step) loss += config.regularizer_loss_weight * regularizer_loss elif config.model_name == 'TANR': if i % config.num_iters_show_loss == 0: if config.log_tensorboard: writer.add_scalar('Train/BaseLoss', loss.item(), step) writer.add_scalar('Train/TopicClassificationLoss', topic_classification_loss.item(), step) writer.add_scalar( 'Train/TopicBaseRatio', topic_classification_loss.item() / loss.item(), step) loss += config.topic_classification_loss_weight * topic_classification_loss loss_full.append(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() loss_f = np.mean(loss_full) if i % config.num_iters_show_loss == 0: if config.log_tensorboard: writer.add_scalar('Train/Loss', loss.item(), step) if i % config.num_batches_show_loss == 0: tqdm.write( f"Time {time_since(start_time)}, batches {i}, current loss {loss.item():.4f}, average loss: {loss_f:.4f}" ) stopping_loss, _ = early_stopping_loss(loss_f) if stopping_loss: tqdm.write('Early stop due to no improvement on loss.') eval_and_save_checkpoint(config, model, optimizer, early_stopping, writer, loss_f, step, start_time, i) break if i % config.num_batches_validate == 0 or i == total_batches: should_break = eval_and_save_checkpoint( config, model, optimizer, early_stopping, writer, loss_f, step, start_time, i) if should_break: break pbar.update(1)
def main(args): g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \ val_mask, test_mask = load_imdb_raw() if hasattr(torch, 'BoolTensor'): train_mask = train_mask.bool() val_mask = val_mask.bool() test_mask = test_mask.bool() features_m, features_a, features_d = features features_a = torch.zeros(features_a.shape[0], 10) features_d = torch.zeros(features_d.shape[0], 10) features_m = features_m.to(args['device']) features_a = features_a.to(args['device']) features_d = features_d.to(args['device']) features = {'movie': features_m, 'actor': features_a, 'director':features_d} in_size = {'actor': features_a.shape[1], 'movie': features_m.shape[1], 'director': features_d.shape[1]} labels = labels.to(args['device']) train_mask = train_mask.to(args['device']) val_mask = val_mask.to(args['device']) test_mask = test_mask.to(args['device']) model = HMSG(meta_paths = [['ma','am'], ['md', 'dm'], ['am'], ['dm']], in_size = in_size, hidden_size = args['hidden_units'], out_size = num_classes, aggre_type = 'attention', num_heads = args['num_heads'], dropout = args['dropout']).to(args['device']) g = g.to(args['device']) stopper = EarlyStopping(patience=args['patience']) loss_fcn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) for epoch in range(args['num_epochs']): model.train() z, logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() train_acc, train_micro_f1, train_macro_f1 = score(logits[train_mask], labels[train_mask]) val_loss, val_acc, val_micro_f1, val_macro_f1, z = evaluate(model, g, features, labels, val_mask, loss_fcn) early_stop = stopper.step(val_loss.data.item(), val_acc, model) print('Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | ' 'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'.format( epoch + 1, loss.item(), train_micro_f1, train_macro_f1, val_loss.item(), val_micro_f1, val_macro_f1)) if early_stop: break stopper.load_checkpoint(model) test_loss, test_acc, test_micro_f1, test_macro_f1, z = evaluate(model, g, features, labels, test_mask, loss_fcn) emd_imdb, label_imdb = z[test_mask], labels[test_mask] np.savetxt('./out/emd_imdb.txt',emd_imdb.cpu()) np.savetxt('./out/label_imdb.txt', np.array(label_imdb.cpu(), dtype=np.int32)) print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'.format( test_loss.item(), test_micro_f1, test_macro_f1))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--data_dir', default='./data/', type=str) parser.add_argument('--output_dir', default='output/', type=str) parser.add_argument('--data_name', default='Beauty', type=str) parser.add_argument('--do_eval', action='store_true') parser.add_argument('--ckp', default=10, type=int, help="pretrain epochs 10, 20, 30...") # model args parser.add_argument("--model_name", default='Finetune_sample', type=str) parser.add_argument("--hidden_size", type=int, default=64, help="hidden size of transformer model") parser.add_argument("--num_hidden_layers", type=int, default=2, help="number of layers") parser.add_argument('--num_attention_heads', default=2, type=int) parser.add_argument('--hidden_act', default="gelu", type=str) # gelu relu parser.add_argument("--attention_probs_dropout_prob", type=float, default=0.5, help="attention dropout p") parser.add_argument("--hidden_dropout_prob", type=float, default=0.5, help="hidden dropout p") parser.add_argument("--initializer_range", type=float, default=0.02) parser.add_argument('--max_seq_length', default=50, type=int) # train args parser.add_argument("--lr", type=float, default=0.001, help="learning rate of adam") parser.add_argument("--batch_size", type=int, default=256, help="number of batch_size") parser.add_argument("--epochs", type=int, default=200, help="number of epochs") parser.add_argument("--no_cuda", action="store_true") parser.add_argument("--log_freq", type=int, default=1, help="per epoch print res") parser.add_argument("--seed", default=42, type=int) parser.add_argument("--weight_decay", type=float, default=0.0, help="weight_decay of adam") parser.add_argument("--adam_beta1", type=float, default=0.9, help="adam first beta value") parser.add_argument("--adam_beta2", type=float, default=0.999, help="adam second beta value") parser.add_argument("--gpu_id", type=str, default="0", help="gpu_id") args = parser.parse_args() set_seed(args.seed) check_path(args.output_dir) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id args.cuda_condition = torch.cuda.is_available() and not args.no_cuda args.data_file = args.data_dir + args.data_name + '.txt' args.sample_file = args.data_dir + args.data_name + '_sample.txt' item2attribute_file = args.data_dir + args.data_name + '_item2attributes.json' user_seq, max_item, sample_seq = \ get_user_seqs_and_sample(args.data_file, args.sample_file) item2attribute, attribute_size = get_item2attribute_json(item2attribute_file) args.item_size = max_item + 2 args.mask_id = max_item + 1 args.attribute_size = attribute_size + 1 # save model args args_str = f'{args.model_name}-{args.data_name}-{args.ckp}' args.log_file = os.path.join(args.output_dir, args_str + '.txt') print(str(args)) with open(args.log_file, 'a') as f: f.write(str(args) + '\n') args.item2attribute = item2attribute # save model checkpoint = args_str + '.pt' args.checkpoint_path = os.path.join(args.output_dir, checkpoint) train_dataset = SASRecDataset(args, user_seq, data_type='train') train_sampler = RandomSampler(train_dataset) train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.batch_size) eval_dataset = SASRecDataset(args, user_seq, test_neg_items=sample_seq, data_type='valid') eval_sampler = SequentialSampler(eval_dataset) eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.batch_size) test_dataset = SASRecDataset(args, user_seq, test_neg_items=sample_seq, data_type='test') test_sampler = SequentialSampler(test_dataset) test_dataloader = DataLoader(test_dataset, sampler=test_sampler, batch_size=args.batch_size) model = S3RecModel(args=args) trainer = FinetuneTrainer(model, train_dataloader, eval_dataloader, test_dataloader, args) if args.do_eval: trainer.load(args.checkpoint_path) print(f'Load model from {args.checkpoint_path} for test!') scores, result_info = trainer.test(0, full_sort=False) else: pretrained_path = os.path.join(args.output_dir, f'{args.data_name}-epochs-{args.ckp}.pt') try: trainer.load(pretrained_path) print(f'Load Checkpoint From {pretrained_path}!') except FileNotFoundError: print(f'{pretrained_path} Not Found! The Model is same as SASRec') early_stopping = EarlyStopping(args.checkpoint_path, patience=10, verbose=True) for epoch in range(args.epochs): trainer.train(epoch) scores, _ = trainer.valid(epoch, full_sort=False) # evaluate on MRR early_stopping(np.array(scores[-1:]), trainer.model) if early_stopping.early_stop: print("Early stopping") break print('---------------Sample 99 results-------------------') # load the best model trainer.model.load_state_dict(torch.load(args.checkpoint_path)) scores, result_info = trainer.test(0, full_sort=False) print(args_str) print(result_info) with open(args.log_file, 'a') as f: f.write(args_str + '\n') f.write(result_info + '\n')
# if config.use_pre_embedding: # pre_embed = load_pre_embedding('../pre_embedding/sgns.baidubaike.bigram-char', word2id, config.embed_size) model = Intent_Model(len(word2id), config.embed_size, config.hidden_size, 13, pre_embed, config.dropout, use_gpu) if os.path.exists(config.model_path+'/ed_model/model.ckpt'): model.load_state_dict(torch.load(config.model_path+'/ed_model/model.ckpt')) print('load model state dict successful!') if use_gpu: model = model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=config.lr)#, momentum=0.9) criterion = torch.nn.CrossEntropyLoss() best_dev_acc = -1.0 earlystopping = EarlyStopping(config.delta, config.earlystop) stop_flag = False t_loss = 0. b_loss = 0. time1 = time.time() for epoch in range(config.ed_epochs): np.random.shuffle(train_data) for i, data in enumerate(train_data): text_ids = data['text_ids'] # mention_positions = data['mention_position'] label = data['label'] label = Variable(torch.LongTensor([label])) text_ids = Variable(torch.LongTensor(text_ids)) if use_gpu:
def main(args): # load and preprocess dataset g, features, labels, n_classes, train_mask, val_mask, test_mask, lp_dict, ind_features, ind_labels = load_reg_data(args) num_feats = features.shape[1] n_edges = g.number_of_edges() print("""----Data statistics------' #use cuda: %d #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (args.gpu, n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() ind_features = ind_features.cuda() labels = labels.cuda() ind_labels = ind_labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.in_drop, args.attn_drop, args.negative_slope, args.residual, args.bias) print(model) if args.early_stop: stopper = EarlyStopping(patience=100) if cuda: model.cuda() # use optimizer optimizer = torch.optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.epochs): model.train() if epoch >= 3: t0 = time.time() # forward pred = model(features) loss = loss_fcn(pred[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) train_r2 = compute_r2(pred[train_mask], labels[train_mask]) if args.fastmode: val_r2 = compute_r2(pred[val_mask], labels[val_mask]) else: val_r2 = evaluate(model, features, labels, val_mask) if args.early_stop: if stopper.step(val_r2, model): break if epoch > 3: print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainR2 {:.4f} |" " Val R2 {:.4f} | ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(), train_r2, val_r2, n_edges / np.mean(dur) / 1000)) print() if args.early_stop: model.load_state_dict(torch.load('es_checkpoint.pt')) evaluate_test(model, features, labels, test_mask, lp_dict, meta="2012") evaluate_test(model, ind_features, ind_labels, test_mask, lp_dict, meta="2016")
def train_pred_labels(model, train, val, auxiliary_weight=1., mini_batch_size=100, lr=3e-4, nb_epochs=100, patience=20, **kwargs): """ Train the PyTorch model on the training set. Parameters ---------- model : PyTorch NN object PyTorch neural network model train : TensorDataset Dataset containing inputs, targets, classes for training (train_inner) val : TensorDataset Dataset containing inputs, targets, classes for validation auxiliary_weight: float Weight of auxiliary loss mini_batch_size : int The size of the batch processing size lr : float Learning rate for the model training nb_epochs : int The number of epochs used to train the model patience : int number of epochs without val improvement for early stopping (None to disable) Returns ------- (NN object, train loss history, val accuracy history) """ train_losses = [] val_accs = [] # Defining the optimizer for GD optimizer = torch.optim.Adam(model.parameters(), lr=lr) # Defining the criteria to calculate losses criterion = nn.BCEWithLogitsLoss() # for Binary Classification criterion_digit = nn.CrossEntropyLoss() # for MultiClass Classification # Defining the early stopping criterion early_stopping = EarlyStopping(patience) # Defining DataLoaders for better mini-batches handling # Shuffling makes batches differ between epochs and results in more robust training train_loader = DataLoader(train, mini_batch_size, shuffle=True) # Learning loop for e in range(nb_epochs): # Train the input dataset by dividing it into mini_batch_size small datasets for train_input, train_target, train_class in train_loader: output, output_first_digit, output_second_digit = model(train_input) loss_comparison = criterion(output, train_target) loss_digits = criterion_digit(output_first_digit, train_class[:, 0]) + \ criterion_digit(output_second_digit, train_class[:, 1]) loss = loss_comparison + auxiliary_weight * loss_digits optimizer.zero_grad() loss.backward() optimizer.step() train_losses.append(loss.item()) val_accs.append(compute_accuracy(model, val, mini_batch_size)) # If the validation accuracy has not improved enough in the last patience epochs # then stop training if early_stopping(val_accs[-1]): break return model, train_losses, val_accs
def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # # Load dataset dataset = dgl.data.FraudDataset(args.dataset, train_size=0.4) graph = dataset[0] num_classes = dataset.num_classes # check cuda if args.gpu >= 0 and th.cuda.is_available(): device = 'cuda:{}'.format(args.gpu) else: device = 'cpu' # retrieve labels of ground truth labels = graph.ndata['label'].to(device) # Extract node features feat = graph.ndata['feature'].to(device) # retrieve masks for train/validation/test train_mask = graph.ndata['train_mask'] val_mask = graph.ndata['val_mask'] test_mask = graph.ndata['test_mask'] train_idx = th.nonzero(train_mask, as_tuple=False).squeeze(1).to(device) val_idx = th.nonzero(val_mask, as_tuple=False).squeeze(1).to(device) test_idx = th.nonzero(test_mask, as_tuple=False).squeeze(1).to(device) # Reinforcement learning module only for positive training nodes rl_idx = th.nonzero(train_mask.to(device) & labels.bool(), as_tuple=False).squeeze(1) graph = graph.to(device) # Step 2: Create model =================================================================== # model = CAREGNN(in_dim=feat.shape[-1], num_classes=num_classes, hid_dim=args.hid_dim, num_layers=args.num_layers, activation=th.tanh, step_size=args.step_size, edges=graph.canonical_etypes) model = model.to(device) # Step 3: Create training components ===================================================== # _, cnt = th.unique(labels, return_counts=True) loss_fn = th.nn.CrossEntropyLoss(weight=1 / cnt) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) if args.early_stop: stopper = EarlyStopping(patience=100) # Step 4: training epochs =============================================================== # for epoch in range(args.max_epoch): # Training and validation using a full graph model.train() logits_gnn, logits_sim = model(graph, feat) # compute loss tr_loss = loss_fn(logits_gnn[train_idx], labels[train_idx]) + \ args.sim_weight * loss_fn(logits_sim[train_idx], labels[train_idx]) tr_recall = recall_score( labels[train_idx].cpu(), logits_gnn.data[train_idx].argmax(dim=1).cpu()) tr_auc = roc_auc_score(labels[train_idx].cpu(), logits_gnn.data[train_idx][:, 1].cpu()) # validation val_loss = loss_fn(logits_gnn[val_idx], labels[val_idx]) + \ args.sim_weight * loss_fn(logits_sim[val_idx], labels[val_idx]) val_recall = recall_score(labels[val_idx].cpu(), logits_gnn.data[val_idx].argmax(dim=1).cpu()) val_auc = roc_auc_score(labels[val_idx].cpu(), logits_gnn.data[val_idx][:, 1].cpu()) # backward optimizer.zero_grad() tr_loss.backward() optimizer.step() # Print out performance print( "Epoch {}, Train: Recall: {:.4f} AUC: {:.4f} Loss: {:.4f} | Val: Recall: {:.4f} AUC: {:.4f} Loss: {:.4f}" .format(epoch, tr_recall, tr_auc, tr_loss.item(), val_recall, val_auc, val_loss.item())) # Adjust p value with reinforcement learning module model.RLModule(graph, epoch, rl_idx) if args.early_stop: if stopper.step(val_auc, model): break # Test after all epoch model.eval() if args.early_stop: model.load_state_dict(th.load('es_checkpoint.pt')) # forward logits_gnn, logits_sim = model.forward(graph, feat) # compute loss test_loss = loss_fn(logits_gnn[test_idx], labels[test_idx]) + \ args.sim_weight * loss_fn(logits_sim[test_idx], labels[test_idx]) test_recall = recall_score(labels[test_idx].cpu(), logits_gnn[test_idx].argmax(dim=1).cpu()) test_auc = roc_auc_score(labels[test_idx].cpu(), logits_gnn.data[test_idx][:, 1].cpu()) print("Test Recall: {:.4f} AUC: {:.4f} Loss: {:.4f}".format( test_recall, test_auc, test_loss.item()))
def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # # Load dataset dataset = dgl.data.FraudDataset(args.dataset, train_size=0.4) graph = dataset[0] num_classes = dataset.num_classes # check cuda if args.gpu >= 0 and th.cuda.is_available(): device = 'cuda:{}'.format(args.gpu) args.num_workers = 0 else: device = 'cpu' # retrieve labels of ground truth labels = graph.ndata['label'].to(device) # Extract node features feat = graph.ndata['feature'].to(device) layers_feat = feat.expand(args.num_layers, -1, -1) # retrieve masks for train/validation/test train_mask = graph.ndata['train_mask'] val_mask = graph.ndata['val_mask'] test_mask = graph.ndata['test_mask'] train_idx = th.nonzero(train_mask, as_tuple=False).squeeze(1).to(device) val_idx = th.nonzero(val_mask, as_tuple=False).squeeze(1).to(device) test_idx = th.nonzero(test_mask, as_tuple=False).squeeze(1).to(device) # Reinforcement learning module only for positive training nodes rl_idx = th.nonzero(train_mask.to(device) & labels.bool(), as_tuple=False).squeeze(1) graph = graph.to(device) # Step 2: Create model =================================================================== # model = CAREGNN(in_dim=feat.shape[-1], num_classes=num_classes, hid_dim=args.hid_dim, num_layers=args.num_layers, activation=th.tanh, step_size=args.step_size, edges=graph.canonical_etypes) model = model.to(device) # Step 3: Create training components ===================================================== # _, cnt = th.unique(labels, return_counts=True) loss_fn = th.nn.CrossEntropyLoss(weight=1 / cnt) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) if args.early_stop: stopper = EarlyStopping(patience=100) # Step 4: training epochs =============================================================== # for epoch in range(args.max_epoch): # calculate the distance of each edges and sample based on the distance dists = [] p = [] for i in range(args.num_layers): dist = {} graph.ndata['nd'] = th.tanh(model.layers[i].MLP(layers_feat[i])) for etype in graph.canonical_etypes: graph.apply_edges(_l1_dist, etype=etype) dist[etype] = graph.edges[etype].data.pop('ed').detach().cpu() dists.append(dist) p.append(model.layers[i].p) graph.ndata.pop('nd') sampler = CARESampler(p, dists, args.num_layers) # train model.train() tr_loss = 0 tr_recall = 0 tr_auc = 0 tr_blk = 0 train_dataloader = dgl.dataloading.DataLoader( graph, train_idx, sampler, batch_size=args.batch_size, shuffle=True, drop_last=False, num_workers=args.num_workers) for input_nodes, output_nodes, blocks in train_dataloader: blocks = [b.to(device) for b in blocks] train_feature = blocks[0].srcdata['feature'] train_label = blocks[-1].dstdata['label'] logits_gnn, logits_sim = model(blocks, train_feature) # compute loss blk_loss = loss_fn(logits_gnn, train_label) + args.sim_weight * loss_fn( logits_sim, train_label) tr_loss += blk_loss.item() tr_recall += recall_score(train_label.cpu(), logits_gnn.argmax(dim=1).detach().cpu()) tr_auc += roc_auc_score( train_label.cpu(), softmax(logits_gnn, dim=1)[:, 1].detach().cpu()) tr_blk += 1 # backward optimizer.zero_grad() blk_loss.backward() optimizer.step() # Reinforcement learning module model.RLModule(graph, epoch, rl_idx, dists) # validation model.eval() val_dataloader = dgl.dataloading.DataLoader( graph, val_idx, sampler, batch_size=args.batch_size, shuffle=True, drop_last=False, num_workers=args.num_workers) val_recall, val_auc, val_loss = evaluate(model, loss_fn, val_dataloader, device) # Print out performance print( "In epoch {}, Train Recall: {:.4f} | Train AUC: {:.4f} | Train Loss: {:.4f}; " "Valid Recall: {:.4f} | Valid AUC: {:.4f} | Valid loss: {:.4f}". format(epoch, tr_recall / tr_blk, tr_auc / tr_blk, tr_loss / tr_blk, val_recall, val_auc, val_loss)) if args.early_stop: if stopper.step(val_auc, model): break # Test with mini batch after all epoch model.eval() if args.early_stop: model.load_state_dict(th.load('es_checkpoint.pt')) test_dataloader = dgl.dataloading.DataLoader(graph, test_idx, sampler, batch_size=args.batch_size, shuffle=True, drop_last=False, num_workers=args.num_workers) test_recall, test_auc, test_loss = evaluate(model, loss_fn, test_dataloader, device) print("Test Recall: {:.4f} | Test AUC: {:.4f} | Test loss: {:.4f}".format( test_recall, test_auc, test_loss))
def run(): df = pd.read_csv(config.TRAINING_FILE).fillna("none") df_train, df_valid = model_selection.train_test_split( df, test_size=0.1, random_state=42, stratify=df.category.values) df_train = df_train.reset_index(drop=True) df_valid = df_valid.reset_index(drop=True) train_dataset = dataset.LivedoorDataset(article=df_train.article.values, targets=df_train.category.values) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4) valid_dataset = dataset.LivedoorDataset(article=df_valid.article.values, targets=df_valid.category.values) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = AlbertBaseJapanese() model.to(device) param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { "params": [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], "weight_decay": 0.001, }, { "params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], "weight_decay": 0.0, }, ] num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=3e-5) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) es = EarlyStopping(patience=5, mode="max") for epoch in range(config.EPOCHS): engine.train_fn(train_data_loader, model, optimizer, device, scheduler) outputs, targets = engine.valid_fn(valid_data_loader, model, device) accuracy = metrics.accuracy_score(targets, outputs) print(f"epoch = {epoch}, accuracy = {accuracy}") es(accuracy, model, config.MODEL_PATH) if es.early_stop: print("EarlyStopping.") break
def main(args): # load and preprocess dataset data = load_data(args) if args.gpu < 0: device = "/cpu:0" else: device = "/gpu:{}".format(args.gpu) with tf.device(device): features = tf.convert_to_tensor(data.features, dtype=tf.float32) labels = tf.convert_to_tensor(data.labels, dtype=tf.int64) train_mask = tf.convert_to_tensor(data.train_mask, dtype=tf.bool) val_mask = tf.convert_to_tensor(data.val_mask, dtype=tf.bool) test_mask = tf.convert_to_tensor(data.test_mask, dtype=tf.bool) num_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.numpy().sum(), val_mask.numpy().sum(), test_mask.numpy().sum())) g = data.graph # add self loop g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) n_edges = g.number_of_edges() # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, tf.nn.elu, args.in_drop, args.attn_drop, args.negative_slope, args.residual) print(model) if args.early_stop: stopper = EarlyStopping(patience=100) # loss_fcn = tf.keras.losses.SparseCategoricalCrossentropy( # from_logits=False) loss_fcn = tf.nn.sparse_softmax_cross_entropy_with_logits # use optimizer optimizer = tf.keras.optimizers.Adam(learning_rate=args.lr, epsilon=1e-8) # initialize graph dur = [] for epoch in range(args.epochs): if epoch >= 3: t0 = time.time() # forward with tf.GradientTape() as tape: tape.watch(model.trainable_weights) logits = model(features, training=True) loss_value = tf.reduce_mean( loss_fcn(labels=labels[train_mask], logits=logits[train_mask])) # Manually Weight Decay # We found Tensorflow has a different implementation on weight decay # of Adam(W) optimizer with PyTorch. And this results in worse results. # Manually adding weights to the loss to do weight decay solves this problem. for weight in model.trainable_weights: loss_value = loss_value + \ args.weight_decay*tf.nn.l2_loss(weight) grads = tape.gradient(loss_value, model.trainable_weights) optimizer.apply_gradients(zip(grads, model.trainable_weights)) if epoch >= 3: dur.append(time.time() - t0) train_acc = accuracy(logits[train_mask], labels[train_mask]) if args.fastmode: val_acc = accuracy(logits[val_mask], labels[val_mask]) else: val_acc = evaluate(model, features, labels, val_mask) if args.early_stop: if stopper.step(val_acc, model): break print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |" " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss_value.numpy().item(), train_acc, val_acc, n_edges / np.mean(dur) / 1000)) print() if args.early_stop: model.load_weights('es_checkpoint.pb') acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))
model = HeteroRGCN(G, args.hidden_dim, args.out_dim) if os.path.isfile(os.path.join(args.model_dir, 'model.pt')) and args.load_pretrained: model.load_state_dict(torch.load(os.path.join(args.model_dir, 'model.pt'))) if args.cuda and torch.cuda.is_available: model = model.cuda() opt = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wc) best_embeddings = None best_loss = 1e20 early_stopping = EarlyStopping(patience=args.patience, verbose=True, datadir=args.model_dir) for epoch in range(args.epochs): total_loss = 0 for batch_edges, batch_weights, neg in tqdm(dataset, desc='Training'): pos0 = batch_edges[:, 0] pos1 = batch_edges[:, 1] logits = model(G) all_embeddings = torch.cat([logits[ntype] for ntype in G.ntypes]) embedings0 = F.normalize(all_embeddings[pos0], dim=1) embedings1 = F.normalize(all_embeddings[pos1], dim=1) neg_embedings = F.normalize(all_embeddings[neg], dim=1) if args.weighted_loss: batch_weights = torch.FloatTensor(batch_weights)
class STAMP: def __init__(self, sess, k, configs, tr_x, tr_y, val_x, val_y, te_x, te_y, num_items, init_way, logger): self.sess = sess self.configs = configs self.tr_x = tr_x self.tr_y = tr_y self.val_x = val_x self.val_y = val_y self.te_x = te_x self.te_y = te_y #self.num_items = 37484 #num_items self.num_items = num_items # num_items self.logger = logger self.rnn_hidden_size = configs.rnn_hidden_size self.batch_size = configs.batch_size self.num_layers = configs.num_layers # Initialize the optimizer self.optimizer_type = configs.optimizer_type self.weight_decay = configs.weight_decay self.momentum = configs.momentum self.lr = configs.lr self.eps = configs.eps self.clip_grad = configs.clip_grad self.clip_grad_threshold = configs.clip_grad_threshold self.lr_decay_step = configs.lr_decay_step self.lr_decay = configs.lr_decay self.lr_decay_rate = configs.lr_decay_rate self.drop_prob_ho = configs.drop_prob_ho self.drop_prob_input = configs.drop_prob_input self.drop_prob_recurrent = configs.drop_prob_recurrent # etc self.k = k self.time_sort = configs.time_sort self.loss_type = configs.loss_type self.n_epochs = configs.n_epochs self.is_shuffle = configs.is_shuffle self.embedding_size = configs.embedding_size self.num_topics = configs.num_topics self.early_stop = EarlyStopping(configs.max_patience) # batch_iterator self.tr_sess_idx = np.arange(len(self.tr_y)) self.val_sess_idx = np.arange(len(self.val_y)) self.te_sess_idx = np.arange(len(self.te_y)) # record best epoch self.max_val_recall = [0 for _ in range(len(self.k))] self.max_te_recall = [0 for _ in range(len(self.k))] self.best_epoch = 0 tr_lengths = [len(s) for s in self.tr_x] val_lengths = [len(s) for s in self.val_x] te_lengths = [len(s) for s in self.te_x] tr_maxlen = np.max(tr_lengths) val_maxlen = np.max(val_lengths) te_maxlen = np.max(te_lengths) self.maxlen = np.max([tr_maxlen, val_maxlen, te_maxlen]) self.maxlen = None self.embed_init, self.weight_init, self.bias_init, self.gate_bias_init, self.kern_init = init_way def run(self): self.prepare_model() tf.global_variables_initializer().run() print("End of model prepare") for epoch in range(self.n_epochs): start_time = time.time() tr_pred_loss = self.train_model() val_pred_loss, val_recall_list, val_mrr_list = self.pred_evaluation( mode="valid") te_pred_loss, te_recall_list, te_mrr_list = self.pred_evaluation( mode="test") self.best_epoch, best_check = write_log( self.logger, epoch, tr_pred_loss, val_pred_loss, te_pred_loss, self.k, val_recall_list, val_mrr_list, te_recall_list, te_mrr_list, self.max_val_recall, self.max_te_recall, self.best_epoch, start_time) if self.early_stop.validate(val_recall_list[3]): self.logger.info("Training process is stopped early") break def prepare_model(self): self.rnn_x1 = tf.placeholder(tf.int32, [None, self.maxlen], name='input1') self.rnn_x2 = tf.placeholder(tf.int32, [None, 1], name='input2') self.rnn_y = tf.placeholder(tf.int64, [None, self.num_items], name='output') self.mask_x1 = tf.placeholder(tf.float32, [None, self.maxlen], name='mask_x1') # batch_size * maxlen self.mask_x2 = tf.placeholder(tf.float32, [None, 1], name='mask_x2') self.keep_prob_input = tf.placeholder(tf.float32, name='keep_prob_input') self.keep_prob_ho = tf.placeholder(tf.float32, name='keep_prob_ho') self.batch_var_length = tf.placeholder(tf.float32, name="variable_length") Wemb = tf.get_variable('Wemb', [self.num_items, self.embedding_size], initializer=self.embed_init) w0 = tf.get_variable('w0', [self.embedding_size, 1], initializer=self.weight_init) w1 = tf.get_variable('w1', [self.embedding_size, self.embedding_size], initializer=self.weight_init) w2 = tf.get_variable('w2', [self.embedding_size, self.embedding_size], initializer=self.weight_init) w3 = tf.get_variable('w3', [self.embedding_size, self.embedding_size], initializer=self.weight_init) ba = tf.get_variable('ba', [self.embedding_size], initializer=self.bias_init) if self.loss_type == 'EMB': bili = tf.get_variable( 'bili', [self.embedding_size, 2 * self.rnn_hidden_size], initializer=self.weight_init) elif self.loss_type == "Trilinear": ws = tf.get_variable('ws', [self.embedding_size, self.embedding_size], initializer=self.weight_init) bs = tf.get_variable('bs', [self.embedding_size], initializer=self.bias_init) wt = tf.get_variable('wt', [self.embedding_size, self.embedding_size], initializer=self.weight_init) bt = tf.get_variable('bt', [self.embedding_size], initializer=self.bias_init) elif self.loss_type == "TOP1": W_top1 = tf.get_variable( 'W_top1', [2 * self.rnn_hidden_size, self.num_items], initializer=self.weight_init) b_top1 = tf.get_variable('b_top1', [1, self.num_items], initializer=self.bias_init) elif self.loss_type == "TOP1_variant": bili = tf.get_variable( 'bili', [self.embedding_size, 2 * self.rnn_hidden_size], initializer=self.weight_init) W_top1 = tf.get_variable( 'W_top1', [2 * self.rnn_hidden_size, self.num_items], initializer=self.weight_init) b_top1 = tf.get_variable('b_top1', [1, self.num_items], initializer=self.bias_init) emb_x1 = tf.nn.embedding_lookup( Wemb, self.rnn_x1) # xi (batch_size * maxlen * num_hidden) emb_x2 = tf.squeeze(tf.nn.embedding_lookup(Wemb, self.rnn_x2), axis=1) # xt (batch_size * num_hidden) tiled_mask = tf.tile(tf.expand_dims(self.mask_x1, 2), [1, 1, self.rnn_hidden_size ]) # xt (batch_size * maxlen * num_hidden) ms = tf.reduce_sum(tf.multiply(emb_x1, tiled_mask), axis=1) # batch_size * num_hidden tiled_var_length = tf.tile( tf.reshape(self.batch_var_length, [-1, 1]), [1, self.rnn_hidden_size]) # (batch_size * num_hidden) ms = tf.reshape(tf.div(ms, tiled_var_length), [-1, self.rnn_hidden_size]) # batch_size * num_hidden outputs1 = tf.transpose(emb_x1, perm=[1, 0, 2]) # maxlen * batch_size * num_hidden unnormalized_alpha = tf.map_fn( lambda x: compute_alpha_STAMP(x, emb_x2, ms, w0, w1, w2, w3, ba), outputs1) # maxlen * batch_size unnormalized_alpha = tf.multiply(tf.transpose(unnormalized_alpha), self.mask_x1) # batch_size * maxlen self.unnormalized_alpha = unnormalized_alpha alpha = unnormalized_alpha # batch_size * maxlen #alpha = tf.nn.softmax(unnormalized_alpha + 100000000. * (self.mask_x1 - 1), dim=1) # batch_size * max_len self.alpha = alpha tiled_alpha = tf.tile( tf.expand_dims(alpha, axis=2), [1, 1, self.rnn_hidden_size]) # batch_size * maxlen * hidden_size self.tiled_alpha = tiled_alpha ma = tf.reduce_sum(tf.multiply(emb_x1, tiled_alpha), axis=1) # batch * hidden hs = tf.nn.tanh(tf.matmul(ma, ws) + bs) # batch * hidden ht = tf.nn.tanh(tf.matmul(emb_x2, wt) + bt) # batch * hidden if self.loss_type == 'EMB': proj = tf.concat([hs, ht], 1) proj = tf.nn.dropout(proj, self.keep_prob_ho) ytem = tf.matmul(Wemb, bili) pred = tf.matmul(proj, tf.transpose(ytem)) self.pred = tf.nn.softmax(pred) self.cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=self.rnn_y)) elif self.loss_type == "Trilinear": pred = tf.nn.sigmoid( tf.matmul(tf.multiply(ht, hs), tf.transpose(Wemb))) # batch * n_item self.pred = tf.nn.softmax(pred) self.cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=self.rnn_y)) elif self.loss_type == "TOP1": proj = tf.concat([hs, ht], 1) proj = tf.nn.dropout(proj, self.keep_prob_ho) pred = tf.matmul(proj, W_top1) + b_top1 self.pred = tf.nn.tanh(pred) self.cost = loss_fn(self.rnn_y, self.pred, self.loss_type) elif self.loss_type == "TOP1_variant": pred = tf.nn.sigmoid( tf.matmul(tf.multiply(ht, hs), tf.transpose(Wemb))) # batch * n_item self.pred = tf.nn.tanh(pred) self.cost = loss_fn(self.rnn_y, self.pred, self.loss_type) self.optimizer = tf.train.AdamOptimizer(self.lr).minimize(self.cost) def train_model(self): if self.configs.is_shuffle: self.tr_sess_idx = np.random.permutation(self.tr_sess_idx) batch_loss_list = [] num_batch = math.ceil( np.float32(len(self.tr_sess_idx)) / self.batch_size) for batch_itr in range(int(num_batch)): start_itr = self.batch_size * batch_itr end_itr = np.minimum(self.batch_size * (batch_itr + 1), len(self.tr_sess_idx)) temp_batch_x = self.tr_x[self.tr_sess_idx[start_itr:end_itr]] # temp_batch_y = self.tr_y[self.tr_sess_idx[start_itr:end_itr]] # batch_x1, batch_x2, batch_y, mask_x1, mask_x2, labels, lengths = convert_batch_data_stamp( temp_batch_x, temp_batch_y, self.num_items, maxlen=self.maxlen) temp_keep_prob_ho = 1.0 - self.drop_prob_ho temp_keep_prob_input = 1.0 - self.drop_prob_input feed_dict = { self.rnn_x1: batch_x1, self.rnn_x2: batch_x2, self.rnn_y: batch_y, self.mask_x1: mask_x1, self.mask_x2: mask_x2, self.keep_prob_input: temp_keep_prob_input, self.keep_prob_ho: temp_keep_prob_ho, self.batch_var_length: lengths } _, pred_loss_, preds2 = self.sess.run( [self.optimizer, self.cost, self.pred], feed_dict=feed_dict) batch_loss_list.append(pred_loss_) return np.mean(batch_loss_list) def pred_evaluation(self, mode): if mode == "valid": sess_idx = self.val_sess_idx df_x = self.val_x df_y = self.val_y elif mode == "test": sess_idx = self.te_sess_idx df_x = self.te_x df_y = self.te_y batch_loss_list = [] recalls = [] mrrs = [] evaluation_point_count = [] for itr in range(len(self.k)): recalls.append(0) mrrs.append(0) evaluation_point_count.append(0) num_batch = math.ceil(np.float32(len(sess_idx)) / self.batch_size) for batch_itr in range(int(num_batch)): start_itr = self.batch_size * batch_itr end_itr = np.minimum(self.batch_size * (batch_itr + 1), len(sess_idx)) temp_batch_x = df_x[sess_idx[start_itr:end_itr]] temp_batch_y = df_y[sess_idx[start_itr:end_itr]] batch_x1, batch_x2, batch_y, mask_x1, mask_x2, labels, lengths \ = convert_batch_data_stamp(temp_batch_x,temp_batch_y,self.num_items,maxlen=self.maxlen) feed_dict = { self.rnn_x1: batch_x1, self.rnn_x2: batch_x2, self.rnn_y: batch_y, self.mask_x1: mask_x1, self.mask_x2: mask_x2, self.keep_prob_input: 1.0, self.keep_prob_ho: 1.0, self.batch_var_length: lengths } preds, pred_loss_ = self.sess.run([self.pred, self.cost], feed_dict=feed_dict) batch_loss_list.append(pred_loss_) recalls, mrrs, evaluation_point_count = evaluation( labels, preds, recalls, mrrs, evaluation_point_count, self.k) recall_list = [] mrr_list = [] for itr in range(len(self.k)): recall = np.asarray(recalls[itr], dtype=np.float32) / evaluation_point_count[itr] mrr = np.asarray(mrrs[itr], dtype=np.float32) / evaluation_point_count[itr] if self.max_val_recall[itr] < recall and mode == "valid": self.max_val_recall[itr] = recall if self.max_te_recall[itr] < recall and mode == "test": self.max_te_recall[itr] = recall recall_list.append(recall) mrr_list.append(mrr) return np.mean(batch_loss_list), recall_list, mrr_list
def main(args): # load and preprocess dataset data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) num_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) if args.gpu <= 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() g = data.graph # add self loop g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) n_edges = g.number_of_edges() # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.in_drop, args.attn_drop, args.negative_slope, args.residual) print(model) if args.early_stop: stopper = EarlyStopping(patience=100) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) train_acc = accuracy(logits[train_mask], labels[train_mask]) if args.fastmode: val_acc = accuracy(logits[val_mask], labels[val_mask]) else: val_acc = evaluate(model, features, labels, val_mask) if args.early_stop: if stopper.step(val_acc, model): break print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |" " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss.item(), train_acc, val_acc, n_edges / np.mean(dur) / 1000)) print() if args.early_stop: model.load_state_dict(torch.load('es_checkpoint.pt')) acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))
def train(train_dataset, dev_dataset): train_dataloader = DataLoader(train_dataset, batch_size=args.train_batch_size, shuffle=True, num_workers=2) global best_dev nonlocal global_step n_sample = len(train_dataloader) early_stopping = EarlyStopping(args.patience, logger=logger) # Loss function adversarial_loss = torch.nn.BCELoss().to(device) classified_loss = torch.nn.CrossEntropyLoss().to(device) # Optimizers optimizer_G = torch.optim.Adam(G.parameters(), lr=args.G_lr) # optimizer for generator optimizer_D = torch.optim.Adam( D.parameters(), lr=args.D_lr) # optimizer for discriminator optimizer_E = AdamW(E.parameters(), args.bert_lr) G_total_train_loss = [] D_total_fake_loss = [] D_total_real_loss = [] FM_total_train_loss = [] D_total_class_loss = [] valid_detection_loss = [] valid_oos_ind_precision = [] valid_oos_ind_recall = [] valid_oos_ind_f_score = [] all_features = [] result = dict() for i in range(args.n_epoch): # Initialize model state G.train() D.train() E.train() G_train_loss = 0 D_fake_loss = 0 D_real_loss = 0 FM_train_loss = 0 D_class_loss = 0 for sample in tqdm.tqdm(train_dataloader): sample = (i.to(device) for i in sample) token, mask, type_ids, y = sample batch = len(token) ood_sample = (y == 0.0) # weight = torch.ones(len(ood_sample)).to(device) - ood_sample * args.beta # real_loss_func = torch.nn.BCELoss(weight=weight).to(device) # the label used to train generator and discriminator. valid_label = FloatTensor(batch, 1).fill_(1.0).detach() fake_label = FloatTensor(batch, 1).fill_(0.0).detach() optimizer_E.zero_grad() sequence_output, pooled_output = E(token, mask, type_ids) real_feature = pooled_output # train D on real optimizer_D.zero_grad() real_f_vector, discriminator_output, classification_output = D( real_feature, return_feature=True) discriminator_output = discriminator_output.squeeze() real_loss = adversarial_loss(discriminator_output, (y != 0.0).float()) # real_loss = real_loss_func(discriminator_output, (y != 0.0).float()) if n_class > 2: # 大于2表示除了训练判别器还要训练分类器 class_loss = classified_loss(classification_output, y.long()) real_loss += class_loss D_class_loss += class_loss.detach() real_loss.backward() if args.do_vis: all_features.append(real_f_vector.detach()) # 除去 G # # # train D on fake # if args.model == 'lstm_gan' or args.model == 'cnn_gan': # z = FloatTensor(np.random.normal(0, 1, (batch, 32, args.G_z_dim))).to(device) # else: # z = FloatTensor(np.random.normal(0, 1, (batch, args.G_z_dim))).to(device) # fake_feature = G(z).detach() # fake_discriminator_output = D.detect_only(fake_feature) # # fake_loss = args.beta * adversarial_loss(fake_discriminator_output, fake_label) # fake_loss = adversarial_loss(fake_discriminator_output, fake_label) # fake_loss.backward() optimizer_D.step() if args.fine_tune: optimizer_E.step() # 除去 G # # train G # optimizer_G.zero_grad() # if args.model == 'lstm_gan' or args.model == 'cnn_gan': # z = FloatTensor(np.random.normal(0, 1, (batch, 32, args.G_z_dim))).to(device) # else: # z = FloatTensor(np.random.normal(0, 1, (batch, args.G_z_dim))).to(device) # fake_f_vector, D_decision = D.detect_only(G(z), return_feature=True) # gd_loss = adversarial_loss(D_decision, valid_label) # fm_loss = torch.abs(torch.mean(real_f_vector.detach(), 0) - torch.mean(fake_f_vector, 0)).mean() # g_loss = gd_loss + 0 * fm_loss # g_loss.backward() # optimizer_G.step() global_step += 1 # D_fake_loss += fake_loss.detach() D_real_loss += real_loss.detach() # G_train_loss += g_loss.detach() + fm_loss.detach() # FM_train_loss += fm_loss.detach() logger.info('[Epoch {}] Train: D_fake_loss: {}'.format( i, D_fake_loss / n_sample)) logger.info('[Epoch {}] Train: D_real_loss: {}'.format( i, D_real_loss / n_sample)) logger.info('[Epoch {}] Train: D_class_loss: {}'.format( i, D_class_loss / n_sample)) logger.info('[Epoch {}] Train: G_train_loss: {}'.format( i, G_train_loss / n_sample)) logger.info('[Epoch {}] Train: FM_train_loss: {}'.format( i, FM_train_loss / n_sample)) logger.info( '---------------------------------------------------------------------------' ) D_total_fake_loss.append(D_fake_loss / n_sample) D_total_real_loss.append(D_real_loss / n_sample) D_total_class_loss.append(D_class_loss / n_sample) G_total_train_loss.append(G_train_loss / n_sample) FM_total_train_loss.append(FM_train_loss / n_sample) if dev_dataset: logger.info( '#################### eval result at step {} ####################' .format(global_step)) eval_result = eval(dev_dataset) valid_detection_loss.append(eval_result['detection_loss']) valid_oos_ind_precision.append( eval_result['oos_ind_precision']) valid_oos_ind_recall.append(eval_result['oos_ind_recall']) valid_oos_ind_f_score.append(eval_result['oos_ind_f_score']) # 1 表示要保存模型 # 0 表示不需要保存模型 # -1 表示不需要模型,且超过了patience,需要early stop signal = early_stopping(-eval_result['eer']) if signal == -1: break elif signal == 0: pass elif signal == 1: save_gan_model(D, G, config['gan_save_path']) if args.fine_tune: save_model(E, path=config['bert_save_path'], model_name='bert') logger.info(eval_result) logger.info('valid_eer: {}'.format(eval_result['eer'])) logger.info('valid_oos_ind_precision: {}'.format( eval_result['oos_ind_precision'])) logger.info('valid_oos_ind_recall: {}'.format( eval_result['oos_ind_recall'])) logger.info('valid_oos_ind_f_score: {}'.format( eval_result['oos_ind_f_score'])) logger.info('valid_fpr95: {}'.format( ErrorRateAt95Recall(eval_result['all_binary_y'], eval_result['y_score']))) if args.patience >= args.n_epoch: save_gan_model(D, G, config['gan_save_path']) if args.fine_tune: save_model(E, path=config['bert_save_path'], model_name='bert') freeze_data['D_total_fake_loss'] = D_total_fake_loss freeze_data['D_total_real_loss'] = D_total_real_loss freeze_data['D_total_class_loss'] = D_total_class_loss freeze_data['G_total_train_loss'] = G_total_train_loss freeze_data['FM_total_train_loss'] = FM_total_train_loss freeze_data['valid_real_loss'] = valid_detection_loss freeze_data['valid_oos_ind_precision'] = valid_oos_ind_precision freeze_data['valid_oos_ind_recall'] = valid_oos_ind_recall freeze_data['valid_oos_ind_f_score'] = valid_oos_ind_f_score best_dev = -early_stopping.best_score if args.do_vis: all_features = torch.cat(all_features, 0).cpu().numpy() result['all_features'] = all_features return result
def main(): best_prec1 = 0 test = True log = True save_best = True sample_length = 0.5 num_samples = np.int(np.round( 5000 / sample_length)) # together I want about 5000 seconds from each subject batch_size = 100 num_epochs = 200 dropout = 0.4 task = 'subject_prediction' os.environ["CUDA_VISIBLE_DEVICES"] = "1" torch.backends.cudnn.benchmark = True root_path = pathlib.Path.cwd() matrix = root_path.joinpath( 'data', f'cleaned_{sample_length}sec_{num_samples}.npy') training_dataset = LFPData(data_file=matrix, split='train', standardize=True) training_loader = DataLoader(training_dataset, shuffle=True, batch_size=batch_size, pin_memory=True, num_workers=1) validation_set = LFPData(data_file=matrix, split='valid', standardize=True) validation_loader = DataLoader(validation_set, shuffle=False, batch_size=batch_size, pin_memory=True, num_workers=1) # input_shape = (2, np.int(422 * sample_length)) # this is a hack to figure out shape of fc layer # net = conv1d_nn.Net(input_shape=input_shape, dropout=dropout) net = conv1d_nn.FCN(in_channels=2, num_classes=9) net.apply(init_weights) net.cuda() criterion = nn.CrossEntropyLoss() criterion.cuda() # optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) optimizer = optim.Adam(net.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-8) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, threshold=1e-2) stop_criterion = EarlyStopping() title = f'FCN2_cleaned_{sample_length}sec_{num_samples}' if log: log_dir = root_path.joinpath('logs', title) if not log_dir.exists(): log_dir.mkdir() training_log = log_dir.joinpath('log') if not training_log.exists(): open(str(training_log), 'w').close() result_writer = ResultsWriter(str(training_log), overwrite=True) mlog = MeterLogger(server='localhost', port=8097, nclass=9, title=title, env=title) for epoch in range(1, num_epochs + 1): mlog.timer.reset() train_epoch(training_loader, net, criterion, optimizer, mlog) if log: result_writer.update(title, {'Train': mlog.peek_meter()}) mlog.print_meter(mode="Train", iepoch=epoch) mlog.reset_meter(mode="Train", iepoch=epoch) validation_loss = val_epoch(validation_loader, net, criterion, mlog) prec1 = mlog.meter['accuracy'].value()[0] if save_best: # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 if is_best: best_prec1 = max(prec1, best_prec1) save_checkpoint( root_path.joinpath('checkpoints', title), { 'epoch': epoch + 1, 'state_dict': net.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best) if log: result_writer.update(title, {'Validation': mlog.peek_meter()}) mlog.print_meter(mode="Test", iepoch=epoch) mlog.reset_meter(mode="Test", iepoch=epoch) stop_criterion.eval_loss(validation_loss) if stop_criterion.get_nsteps() >= 30: print('Early stopping') break print(optimizer.param_groups[0]['lr']) scheduler.step(validation_loss) print('Training finished', best_prec1) if test: test_set = LFPData(data_file=matrix, split='test', standardize=True) test_loader = DataLoader(test_set, shuffle=False, batch_size=batch_size, pin_memory=True, num_workers=1) test_loss, test_acc = test_epoch(test_loader, net, criterion, mlog) result_writer.update( title, {'Test': { 'loss': test_loss, 'accuracy': test_acc }}) print(test_loss, test_acc) # save pngs of visdom plot into log path plot_visdom(mlog, log_dir)
def main(args): # If args['hetero'] is True, g would be a heterogeneous graph. # Otherwise, it will be a list of homogeneous graphs. args_academic = read_args() data = dataprocess_han.input_data_han(args_academic) #g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \ #val_mask, test_mask = load_data(args['dataset']) features = torch.tensor(data.a_text_embed, dtype=torch.float32) labels = torch.tensor(data.a_class) APA_g = dgl.graph(data.APA_matrix, ntype='author', etype='coauthor') APVPA_g = dgl.graph(data.APVPA_matrix, ntype='author', etype='attendance') APPA_g = dgl.graph(data.APPA_matrix, ntype='author', etype='reference') #g = [APA_g, APPA_g] g = [APA_g, APVPA_g, APPA_g] num_classes = 4 features = features.to(args['device']) labels = labels.to(args['device']) #if args['hetero']: #from model_hetero import HAN #model = HAN(meta_paths=[['pa', 'ap'], ['pf', 'fp']], #in_size=features.shape[1], #hidden_size=args['hidden_units'], #out_size=num_classes, #num_heads=args['num_heads'], #dropout=args['dropout']).to(args['device']) #else: model = HAN(num_meta_paths=len(g), in_size=features.shape[1], hidden_size=args['hidden_units'], out_size=num_classes, num_heads=args['num_heads'], dropout=args['dropout']).to(args['device']) stopper = EarlyStopping(patience=args['patience']) loss_fcn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) model.load_state_dict(torch.load("./model_para.pt")) for epoch in range(args['num_epochs']): X = [[i] for i in range(args_academic.A_n)] train_X, test_X, _, _ = train_test_split(X, X, test_size=0.8) # train_X, test_X, _, _ = train_test_split(train_X, train_X, test_size=0.2) # train_mask = get_binary_mask(args_academic.A_n, train_X) test_mask = get_binary_mask(args_academic.A_n, test_X) #train_mask = torch.tensor(data.train_mask) #test_mask = torch.tensor(data.test_mask) val_mask = test_mask train_mask = train_mask.to(args['device']) val_mask = val_mask.to(args['device']) test_mask = test_mask.to(args['device']) model.train() logits, _ = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() train_acc, train_micro_f1, train_macro_f1 = score( logits[train_mask], labels[train_mask]) val_loss, val_acc, val_micro_f1, val_macro_f1 = evaluate( model, g, features, labels, val_mask, loss_fcn) early_stop = stopper.step(val_loss.data.item(), val_acc, model) print( 'Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | ' 'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'. format(epoch + 1, loss.item(), train_micro_f1, train_macro_f1, val_loss.item(), val_micro_f1, val_macro_f1)) if early_stop: break stopper.load_checkpoint(model) model.eval() _, embedding = model(g, features) embed_file = open("./node_embedding.txt", "w") for k in range(embedding.shape[0]): embed_file.write('a' + str(k) + " ") for l in range(embedding.shape[1] - 1): embed_file.write(str(embedding[k][l].item()) + " ") embed_file.write(str(embedding[k][-1].item()) + "\n") embed_file.close() #test_loss, test_acc, test_micro_f1, test_macro_f1 = evaluate(model, g, features, labels, test_mask, loss_fcn) #print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'.format( #test_loss.item(), test_micro_f1, test_macro_f1)) torch.save(model.state_dict(), "./model_para.pt")
import numpy as np import utils from utils import EarlyStopping from tqdm import tqdm from torch import nn, optim from config import config from torchsummary import summary from torch.optim import SGD, Adam import torch.nn.functional as F from torch.utils.data import Dataset, DataLoader from timeit import default_timer as timer from sklearn.metrics import f1_score, accuracy_score from thop import profile from pthflops import count_ops early_stopping = EarlyStopping(patience=20, verbose=True) #patience=7, verbose=False early_stopping_f1 = EarlyStopping(patience=20, verbose=True) #patience=7, verbose=False class SARDataset(Dataset): def __init__(self, images_df, datapath, labelpath, winsize=36, mode="2D"): self.images_df = images_df.copy() #csv data = np.load(datapath) self.data = data.transpose([2, 0, 1]).astype('float32') self.label = np.load(labelpath) self.mode = mode self.winsize = winsize def __len__(self): return len(self.images_df)
elif args.freeze_bert and args.use_adversary: raise Exception( 'No purpose in using an adversary if BERT layers are frozen') else: param_optimizer = list(model.named_parameters( )) + list(predictor.named_parameters()) + list(discriminator.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ {'params': [p for n, p in param_optimizer if not any( nd in n for nd in no_decay)], 'weight_decay': 0.01}, {'params': [p for n, p in param_optimizer if any( nd in n for nd in no_decay)], 'weight_decay': 0.0} ] es = EarlyStopping(patience=args.es_patience) optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate, correct_bias=False) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=args.num_warmup_steps, num_training_steps=num_train_optimization_steps) for epoch in range(1, num_train_epochs+1): # training if not args.freeze_bert: model.train() else: model.eval() predictor.train() if args.use_adversary: discriminator.train()
def main(args): # load and preprocess dataset if args.dataset == 'cora': data = CoraGraphDataset() elif args.dataset == 'citeseer': data = CiteseerGraphDataset() elif args.dataset == 'pubmed': data = PubmedGraphDataset() else: raise ValueError('Unknown dataset: {}'.format(args.dataset)) g = data[0] if args.gpu < 0: cuda = False else: cuda = True g = g.int().to(args.gpu) features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] num_feats = features.shape[1] n_classes = data.num_labels n_edges = g.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_edges = g.number_of_edges() # create model heads = ([args.num_heads] * (args.num_layers - 1)) + [args.num_out_heads] model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.in_drop, args.attn_drop, args.negative_slope, args.residual) print(model) if args.early_stop: stopper = EarlyStopping(patience=100) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.epochs): model.train() if epoch >= 3: if cuda: torch.cuda.synchronize() t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: if cuda: torch.cuda.synchronize() dur.append(time.time() - t0) train_acc = accuracy(logits[train_mask], labels[train_mask]) if args.fastmode: val_acc = accuracy(logits[val_mask], labels[val_mask]) else: val_acc = evaluate(model, features, labels, val_mask) if args.early_stop: if stopper.step(val_acc, model): break print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |" " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss.item(), train_acc, val_acc, n_edges / np.mean(dur) / 1000)) print() if args.early_stop: model.load_state_dict(torch.load('es_checkpoint.pt')) acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))
def main(args): # If args['hetero'] is True, g would be a heterogeneous graph. # Otherwise, it will be a list of homogeneous graphs. g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \ val_mask, test_mask = load_data(args['dataset']) if hasattr(torch, 'BoolTensor'): train_mask = train_mask.bool() # 布尔类型转换 val_mask = val_mask.bool() test_mask = test_mask.bool() features = features.to(args['device']) labels = labels.to(args['device']) train_mask = train_mask.to(args['device']) val_mask = val_mask.to(args['device']) test_mask = test_mask.to(args['device']) if args['hetero']: # 构建异构图的邻居节点 from model_hetero import HAN model = HAN( meta_paths=[['pa', 'ap'], ['pf', 'fp']], # 之前构建的边: pa, ap,组合成meta-path: PAP in_size=features.shape[1], hidden_size=args['hidden_units'], out_size=num_classes, num_heads=args['num_heads'], dropout=args['dropout']).to(args['device']) g = g.to(args['device']) else: from model import HAN model = HAN(num_meta_paths=len(g), in_size=features.shape[1], hidden_size=args['hidden_units'], out_size=num_classes, num_heads=args['num_heads'], dropout=args['dropout']).to(args['device']) g = [graph.to(args['device']) for graph in g] stopper = EarlyStopping(patience=args['patience']) loss_fcn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) for epoch in range(args['num_epochs']): model.train() logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() train_acc, train_micro_f1, train_macro_f1 = score( logits[train_mask], labels[train_mask]) val_loss, val_acc, val_micro_f1, val_macro_f1 = evaluate( model, g, features, labels, val_mask, loss_fcn) early_stop = stopper.step(val_loss.data.item(), val_acc, model) print( 'Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | ' 'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'. format(epoch + 1, loss.item(), train_micro_f1, train_macro_f1, val_loss.item(), val_micro_f1, val_macro_f1)) if early_stop: break stopper.load_checkpoint(model) test_loss, test_acc, test_micro_f1, test_macro_f1 = evaluate( model, g, features, labels, test_mask, loss_fcn) print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'. format(test_loss.item(), test_micro_f1, test_macro_f1))
def main(args): # load and preprocess dataset if args.dataset == 'reddit': data = RedditDataset() elif args.dataset in ['photo', "computer"]: data = MsDataset(args) else: data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) num_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() current_time = time.strftime('%d_%H:%M:%S', localtime()) writer = SummaryWriter(log_dir='runs/' + current_time + '_' + args.sess, flush_secs=30) print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.sum().item(), val_mask.sum().item(), test_mask.sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.bool().cuda() val_mask = val_mask.bool().cuda() test_mask = test_mask.bool().cuda() g = data.graph # add self loop if args.dataset != 'reddit': g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) n_edges = g.number_of_edges() print('edge number %d'%(n_edges)) # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.idrop, args.adrop, args.alpha, args.bias, args.residual, args.l0) print(model) if args.early_stop: stopper = EarlyStopping(patience=150) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) dur = [] time_used = 0 for epoch in range(args.epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) loss_l0 = args.loss_l0*( model.gat_layers[0].loss) optimizer.zero_grad() (loss + loss_l0).backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) train_acc = accuracy(logits[train_mask], labels[train_mask]) writer.add_scalar('edge_num/0', model.gat_layers[0].num, epoch) if args.fastmode: val_acc, loss = accuracy(logits[val_mask], labels[val_mask], loss_fcn) else: val_acc,_ = evaluate(model, features, labels, val_mask, loss_fcn) if args.early_stop: if stopper.step(val_acc, model): break print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |" " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), train_acc, val_acc, n_edges / np.mean(dur) / 1000)) writer.add_scalar('loss', loss.item(), epoch) writer.add_scalar('f1/train_f1_mic', train_acc, epoch) writer.add_scalar('f1/test_f1_mic', val_acc, epoch) writer.add_scalar('time/time', time_used, epoch) writer.close() if args.early_stop: model.load_state_dict(torch.load('es_checkpoint.pt')) acc, _ = evaluate(model,features, labels, test_mask, loss_fcn) print("Test Accuracy {:.4f}".format(acc))
def __init__(self, sess, k, configs, tr_x, tr_y, val_x, val_y, te_x, te_y, num_items, init_way, logger): self.sess = sess self.configs = configs self.tr_x = tr_x self.tr_y = tr_y self.val_x = val_x self.val_y = val_y self.te_x = te_x self.te_y = te_y self.num_items = num_items self.logger = logger self.rnn_hidden_size = configs.rnn_hidden_size self.batch_size = configs.batch_size self.num_layers = configs.num_layers # Initialize the optimizer self.optimizer_type = configs.optimizer_type self.weight_decay = configs.weight_decay self.momentum = configs.momentum self.lr = configs.lr self.eps = configs.eps self.clip_grad = configs.clip_grad self.clip_grad_threshold = configs.clip_grad_threshold self.lr_decay_step = configs.lr_decay_step self.lr_decay = configs.lr_decay self.lr_decay_rate = configs.lr_decay_rate self.drop_prob_ho = configs.drop_prob_ho self.drop_prob_input = configs.drop_prob_input self.drop_prob_recurrent = configs.drop_prob_recurrent # etc self.k = k self.time_sort = configs.time_sort self.loss_type = configs.loss_type self.n_epochs = configs.n_epochs self.is_shuffle = configs.is_shuffle self.embedding_size = configs.embedding_size self.num_topics = configs.num_topics self.early_stop = EarlyStopping(configs.max_patience) # batch_iterator self.tr_sess_idx = np.arange(len(self.tr_y)) self.val_sess_idx = np.arange(len(self.val_y)) self.te_sess_idx = np.arange(len(self.te_y)) # record best epoch self.max_val_recall = [0 for _ in range(len(self.k))] self.max_te_recall = [0 for _ in range(len(self.k))] self.best_epoch = 0 tr_lengths = [len(s) for s in self.tr_x] val_lengths = [len(s) for s in self.val_x] te_lengths = [len(s) for s in self.te_x] tr_maxlen = np.max(tr_lengths) val_maxlen = np.max(val_lengths) te_maxlen = np.max(te_lengths) self.maxlen = np.max([tr_maxlen, val_maxlen, te_maxlen]) self.maxlen = None self.embed_init, self.weight_init, self.bias_init, self.gate_bias_init, self.kern_init = init_way
return validation_loss, validation_acc_1 # Part 5. 'main' function if __name__ == '__main__': logger.info("Begin evaluating on validation set before training") validate_function(val_loader) logger.info("training status: ") early_stopping = EarlyStopping(patience=basic_configs['early_stopping_patience'], delta=0) for epoch in range(basic_configs['num_epochs']): logger.info("Begin training epoch {}".format(epoch + 1)) validation_acc = train_function(epoch) if validation_acc.avg > max_val_acc: max_val_acc = validation_acc.avg max_val_acc_epoch = epoch + 1 early_stopping(validation_acc.avg) logger.info("Early stopping counter: {}".format(early_stopping.counter)) logger.info("Early stopping best_score: {}".format(early_stopping.best_score)) logger.info("Early stopping early_stop: {}".format(early_stopping.early_stop))