Exemple #1
0
def train(args):

    # load and preprocess dataset
    #data = load_data(args)
    #data = CoraFull()
    #data = Coauthor('cs')
    #FIRST, CHECK DATASET
    path = './dataset/' + str(args.dataset) + '/'
    '''
    edges = np.loadtxt(path + 'edges.txt')
    edges = edges.astype(int)

    features = np.loadtxt(path + 'features.txt')

    train_mask = np.loadtxt(path + 'train_mask.txt')
    train_mask = train_mask.astype(int)

    labels = np.loadtxt(path + 'labels.txt')
    labels = labels.astype(int)
    '''
    edges = np.load(path + 'edges.npy')
    features = np.load(path + 'features.npy')
    train_mask = np.load(path + 'train_mask.npy')
    labels = np.load(path + 'labels.npy')

    num_edges = edges.shape[0]
    num_nodes = features.shape[0]
    num_feats = features.shape[1]
    n_classes = max(labels) - min(labels) + 1

    assert train_mask.shape[0] == num_nodes

    print('dataset {}'.format(args.dataset))
    print('# of edges : {}'.format(num_edges))
    print('# of nodes : {}'.format(num_nodes))
    print('# of features : {}'.format(num_feats))

    features = torch.FloatTensor(features)
    labels = torch.LongTensor(labels)

    if hasattr(torch, 'BoolTensor'):
        train_mask = torch.BoolTensor(train_mask)

    else:
        train_mask = torch.ByteTensor(train_mask)

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()

    u = edges[:, 0]
    v = edges[:, 1]

    #initialize a DGL graph
    g = DGLGraph()
    g.add_nodes(num_nodes)
    g.add_edges(u, v)

    # add self loop
    if isinstance(g, nx.classes.digraph.DiGraph):
        g.remove_edges_from(nx.selfloop_edges(g))
        g = DGLGraph(g)
        g.add_edges(g.nodes(), g.nodes())
    elif isinstance(g, DGLGraph):
        g = transform.add_self_loop(g)

    n_edges = g.number_of_edges()
    # create model
    heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]
    model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes,
                heads, F.elu, args.in_drop, args.attn_drop,
                args.negative_slope, args.residual)
    if args.early_stop:
        stopper = EarlyStopping(patience=100)
    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    record_time = 0
    avg_run_time = 0
    Used_memory = 0

    for epoch in range(args.num_epochs):
        #print('epoch = ', epoch)
        #print('mem0 = {}'.format(mem0))
        torch.cuda.synchronize()
        tf = time.time()
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])
        now_mem = torch.cuda.max_memory_allocated(0)
        print('now_mem : ', now_mem)
        Used_memory = max(now_mem, Used_memory)
        tf1 = time.time()

        optimizer.zero_grad()
        torch.cuda.synchronize()
        t1 = time.time()
        loss.backward()
        torch.cuda.synchronize()
        optimizer.step()
        t2 = time.time()
        run_time_this_epoch = t2 - tf

        if epoch >= 3:
            dur.append(time.time() - t0)
            record_time += 1

            avg_run_time += run_time_this_epoch

        train_acc = accuracy(logits[train_mask], labels[train_mask])

        #log for each step
        print(
            'Epoch {:05d} | Time(s) {:.4f} | train_acc {:.6f} | Used_Memory {:.6f} mb'
            .format(epoch, run_time_this_epoch, train_acc,
                    (now_mem * 1.0 / (1024**2))))
        '''
        if args.fastmode:
            val_acc = accuracy(logits[val_mask], labels[val_mask])
        else:
            val_acc = evaluate(model, features, labels, val_mask)
            if args.early_stop:
                if stopper.step(val_acc, model):   
                    break

        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |"
              " ValAcc /{:.4f} | ETputs(KTEPS) {:.2f}".
              format(epoch, np.mean(dur), loss.item(), train_acc,
                     val_acc, n_edges / np.mean(dur) / 1000))
        
        '''

    if args.early_stop:
        model.load_state_dict(torch.load('es_checkpoint.pt'))

    #OUTPUT we need
    avg_run_time = avg_run_time * 1. / record_time
    Used_memory /= (1024**3)
    print('^^^{:6f}^^^{:6f}'.format(Used_memory, avg_run_time))
Exemple #2
0
    def train(self, setting):
        train_data, train_loader = self._get_data(split='train')
        vali_data, vali_loader = self._get_data(split='valid',
                                                scaler=train_data.scaler)
        test_data, test_loader = self._get_data(split='test',
                                                scaler=train_data.scaler)

        path = './checkpoints/' + setting
        if not os.path.exists(path):
            os.makedirs(path)

        time_now = time.time()

        train_steps = len(train_loader)
        early_stopping = EarlyStopping(patience=self.args.patience,
                                       verbose=True)

        model_optim = self._select_optimizer()
        criterion = self._select_criterion()

        train_loss_epochs = []
        valid_loss_epochs = []
        test_loss_epochs = []

        for epoch in range(self.args.train_epochs):
            self.current_epoch = epoch
            iter_count = 0
            train_loss = []

            self.model.train()
            for i, batch in enumerate(train_loader):
                iter_count += 1

                if 'UCR' in self.args.data_path:
                    batch_x, batch_y = batch
                    batch_x_mark, batch_y_mark = None, None
                else:
                    batch_x, batch_y, batch_x_mark, batch_y_mark = batch
                    batch_x_mark = batch_x_mark.double().to(self.device)
                    batch_y_mark = batch_y_mark.double().to(self.device)

                model_optim.zero_grad()

                batch_x = batch_x.double().to(self.device)
                batch_y = batch_y.double().to(self.device)

                if self.args.model == 'HLInformer':
                    dec_inp = torch.zeros_like(
                        batch_y[:, -self.args.pred_len:, :]).double()
                    #dec_inp = torch.cat([batch_x[:, -self.args.label_len:, :], dec_inp], dim=1).double().to(self.device)
                    dec_inp_mark = batch_y_mark[:, :,
                                                1:]  #torch.cat([batch_x_mark[:, -self.args.label_len:, :], batch_y_mark[:,:,1:]], dim=1).double().to(self.device)
                    outputs = self.model(batch_x, batch_x_mark, dec_inp,
                                         dec_inp_mark)
                    batch_y = batch_y[:,
                                      -self.args.pred_len:, :].to(self.device)
                else:
                    outputs = self.model(batch_x, batch_x_mark)

                if self.args.model[:2] == 'HL' and len(
                        self.args.group_factors
                ) > 0 and 'UCR' not in self.args.data_path:
                    batch_y = torch.cat(
                        (batch_x[:, (-max(self.args.group_factors) + 1):,
                                 0:1], batch_y),
                        dim=1)

                if self.args.classification:
                    batch_y = torch.tensor(batch_y, dtype=torch.long)
                loss = criterion(outputs, batch_y.squeeze())
                train_loss.append(loss.item())

                if (i + 1) % 100 == 0:
                    print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(
                        i + 1, epoch + 1, loss.item()))
                    speed = (time.time() - time_now) / iter_count
                    left_time = speed * (
                        (self.args.train_epochs - epoch) * train_steps - i)
                    print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(
                        speed, left_time))
                    iter_count = 0
                    time_now = time.time()

                loss.backward()
                model_optim.step()
                if self.args.plot_gradients:
                    self.plot_grad_flow()

            train_loss = np.average(train_loss)
            vali_loss = self.validate(vali_loader, criterion, train_data)
            test_loss = self.validate(test_loader, criterion, train_data)

            train_loss_epochs.append(train_loss)
            valid_loss_epochs.append(vali_loss)
            test_loss_epochs.append(test_loss)

            print(
                "Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}"
                .format(epoch + 1, train_steps, train_loss, vali_loss,
                        test_loss))
            early_stopping(vali_loss, self.model, path)
            if early_stopping.early_stop:
                print("Early stopping")
                train_loss_epochs = np.array(train_loss_epochs)
                valid_loss_epochs = np.array(valid_loss_epochs)
                test_loss_epochs = np.array(test_loss_epochs)

                np.save(path + '/train_history.npy', train_loss_epochs)
                np.save(path + '/valid_history.npy', valid_loss_epochs)
                np.save(path + '/test_history.npy', test_loss_epochs)

                break

        best_model_path = path + '/' + 'checkpoint.pth'.format(epoch)
        self.model.load_state_dict(torch.load(best_model_path))

        return self.model
Exemple #3
0
class NARM:
    def __init__(self, sess, k, configs, tr_x, tr_y, val_x, val_y, te_x, te_y,
                 num_items, init_way, logger):
        self.sess = sess
        self.configs = configs
        self.tr_x = tr_x
        self.tr_y = tr_y
        self.val_x = val_x
        self.val_y = val_y
        self.te_x = te_x
        self.te_y = te_y
        self.num_items = num_items
        self.logger = logger

        self.rnn_hidden_size = configs.rnn_hidden_size
        self.batch_size = configs.batch_size
        self.num_layers = configs.num_layers

        # Initialize the optimizer
        self.optimizer_type = configs.optimizer_type
        self.weight_decay = configs.weight_decay
        self.momentum = configs.momentum
        self.lr = configs.lr
        self.eps = configs.eps

        self.clip_grad = configs.clip_grad
        self.clip_grad_threshold = configs.clip_grad_threshold
        self.lr_decay_step = configs.lr_decay_step
        self.lr_decay = configs.lr_decay
        self.lr_decay_rate = configs.lr_decay_rate
        self.drop_prob_ho = configs.drop_prob_ho
        self.drop_prob_input = configs.drop_prob_input
        self.drop_prob_recurrent = configs.drop_prob_recurrent

        # etc
        self.k = k
        self.time_sort = configs.time_sort
        self.loss_type = configs.loss_type
        self.n_epochs = configs.n_epochs
        self.is_shuffle = configs.is_shuffle
        self.embedding_size = configs.embedding_size
        self.num_topics = configs.num_topics
        self.early_stop = EarlyStopping(configs.max_patience)

        # batch_iterator
        self.tr_sess_idx = np.arange(len(self.tr_y))
        self.val_sess_idx = np.arange(len(self.val_y))
        self.te_sess_idx = np.arange(len(self.te_y))

        # record best epoch
        self.max_val_recall = [0 for _ in range(len(self.k))]
        self.max_te_recall = [0 for _ in range(len(self.k))]
        self.best_epoch = 0

        tr_lengths = [len(s) for s in self.tr_x]
        val_lengths = [len(s) for s in self.val_x]
        te_lengths = [len(s) for s in self.te_x]
        tr_maxlen = np.max(tr_lengths)
        val_maxlen = np.max(val_lengths)
        te_maxlen = np.max(te_lengths)
        self.maxlen = np.max([tr_maxlen, val_maxlen, te_maxlen])
        self.maxlen = None
        self.embed_init, self.weight_init, self.bias_init, self.gate_bias_init, self.kern_init = init_way

    def run(self):
        self.prepare_model()
        tf.global_variables_initializer().run()
        print("End of model prepare")
        for epoch in range(self.n_epochs):
            start_time = time.time()
            tr_pred_loss = self.train_model()
            val_pred_loss, val_recall_list, val_mrr_list = self.pred_evaluation(
                mode="valid")
            te_pred_loss, te_recall_list, te_mrr_list = self.pred_evaluation(
                mode="test")

            self.best_epoch, best_check = write_log(
                self.logger, epoch, tr_pred_loss, val_pred_loss, te_pred_loss,
                self.k, val_recall_list, val_mrr_list, te_recall_list,
                te_mrr_list, self.max_val_recall, self.max_te_recall,
                self.best_epoch, start_time)
            if self.early_stop.validate(val_recall_list[3]):
                self.logger.info("Training process is stopped early")
                break

    def prepare_model(self):
        self.rnn_x = tf.placeholder(tf.int32, [None, None], name='input')
        self.rnn_y = tf.placeholder(tf.int64, [None, self.num_items],
                                    name='output')
        self.mask = tf.placeholder(tf.float32, [None, None], name='mask')
        self.keep_prob_input = tf.placeholder(tf.float32,
                                              name='keep_prob_input')
        self.keep_prob_ho = tf.placeholder(tf.float32, name='keep_prob_ho')
        self.batch_var_length = tf.placeholder(tf.int32,
                                               name="variable_length")

        Wemb = tf.get_variable('Wemb', [self.num_items, self.embedding_size],
                               initializer=self.embed_init)
        W_encoder = tf.get_variable(
            'W_encoder', [self.rnn_hidden_size, self.rnn_hidden_size],
            initializer=self.weight_init)
        W_decoder = tf.get_variable(
            'W_decoder', [self.rnn_hidden_size, self.rnn_hidden_size],
            initializer=self.weight_init)
        Bi_vector = tf.get_variable('Bi_vector', [1, self.rnn_hidden_size],
                                    initializer=self.weight_init)
        if self.loss_type == 'EMB':
            bili = tf.get_variable(
                'bili', [self.embedding_size, 2 * self.rnn_hidden_size],
                initializer=self.weight_init)
        elif self.loss_type == "Trilinear":
            ws = tf.get_variable('ws',
                                 [self.embedding_size, self.embedding_size],
                                 initializer=self.weight_init)
            bs = tf.get_variable('bs', [self.embedding_size],
                                 initializer=self.bias_init)
            wt = tf.get_variable('wt',
                                 [self.embedding_size, self.embedding_size],
                                 initializer=self.weight_init)
            bt = tf.get_variable('bt', [self.embedding_size],
                                 initializer=self.bias_init)
        elif self.loss_type == "TOP1":
            W_top1 = tf.get_variable(
                'W_top1', [2 * self.rnn_hidden_size, self.num_items],
                initializer=self.weight_init)
            b_top1 = tf.get_variable('b_top1', [1, self.num_items],
                                     initializer=self.bias_init)
        elif self.loss_type == "TOP1_variant":
            bili = tf.get_variable(
                'bili', [self.embedding_size, 2 * self.rnn_hidden_size],
                initializer=self.weight_init)
            W_top1 = tf.get_variable(
                'W_top1', [2 * self.rnn_hidden_size, self.num_items],
                initializer=self.weight_init)
            b_top1 = tf.get_variable('b_top1', [1, self.num_items],
                                     initializer=self.bias_init)

        emb = tf.nn.embedding_lookup(Wemb, self.rnn_x)
        emb = tf.nn.dropout(emb, self.keep_prob_input)

        custom_cell = tf.contrib.rnn.GRUCell(num_units=self.rnn_hidden_size)
        outputs, states = tf.nn.dynamic_rnn(
            custom_cell,
            emb,
            sequence_length=self.batch_var_length,
            dtype=tf.float32)

        self.outputs = outputs
        self.last_hidden = states  # 512 x 100
        outputs = tf.transpose(outputs, perm=[1, 0, 2])  # 19x512x100

        squares = tf.map_fn(lambda x: compute_alpha(
            x, self.last_hidden, W_encoder, W_decoder, Bi_vector),
                            outputs)  # 19x512
        weight = tf.nn.softmax(tf.transpose(squares) + 100000000. *
                               (self.mask - 1),
                               axis=1)  # batch_size * max_len
        attention_proj = tf.reduce_sum(outputs *
                                       tf.transpose(weight)[:, :, None],
                                       axis=0)

        # num_items x 2*100
        if self.loss_type == 'EMB':
            proj = tf.concat([attention_proj, states], 1)
            proj = tf.nn.dropout(proj, self.keep_prob_ho)
            ytem = tf.matmul(Wemb, bili)
            pred = tf.matmul(proj, tf.transpose(ytem))
            self.pred = tf.nn.softmax(pred)
            self.cost = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred,
                                                           labels=self.rnn_y))
        elif self.loss_type == "Trilinear":
            hs = tf.nn.tanh(tf.matmul(attention_proj, ws) +
                            bs)  # batch * hidden
            ht = tf.nn.tanh(tf.matmul(states, wt) + bt)  # batch * hidden
            pred = tf.nn.sigmoid(
                tf.matmul(tf.multiply(ht, hs),
                          tf.transpose(Wemb)))  # batch * n_item
            self.pred = tf.nn.softmax(pred)
            self.cost = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred,
                                                           labels=self.rnn_y))
        elif self.loss_type == "TOP1":
            proj = tf.concat([attention_proj, states], 1)
            proj = tf.nn.dropout(proj, self.keep_prob_ho)
            pred = tf.matmul(proj, W_top1) + b_top1
            self.pred = tf.nn.tanh(pred)
            self.cost = loss_fn(self.rnn_y, self.pred, self.loss_type)
        elif self.loss_type == "TOP1_variant":
            proj = tf.concat([attention_proj, states], 1)
            proj = tf.nn.dropout(proj, self.keep_prob_ho)
            ytem = tf.matmul(Wemb, bili)
            pred = tf.matmul(proj, tf.transpose(ytem))
            self.pred = tf.nn.tanh(pred)
            self.cost = loss_fn(self.rnn_y, self.pred, self.loss_type)

        self.optimizer = tf.train.AdamOptimizer(self.lr).minimize(self.cost)

    def train_model(self):
        if self.configs.is_shuffle:
            self.tr_sess_idx = np.random.permutation(self.tr_sess_idx)
        batch_loss_list = []
        num_batch = math.ceil(
            np.float32(len(self.tr_sess_idx)) / self.batch_size)
        for batch_itr in range(int(num_batch)):
            start_itr = self.batch_size * batch_itr
            end_itr = np.minimum(self.batch_size * (batch_itr + 1),
                                 len(self.tr_sess_idx))
            temp_batch_x = self.tr_x[self.tr_sess_idx[start_itr:end_itr]]
            temp_batch_y = self.tr_y[self.tr_sess_idx[start_itr:end_itr]]
            batch_x, batch_y, mask, labels, lengths = convert_batch_data(
                temp_batch_x, temp_batch_y, self.num_items, maxlen=None)
            temp_keep_prob_ho = 1.0 - self.drop_prob_ho
            temp_keep_prob_input = 1.0 - self.drop_prob_input
            feed_dict = {
                self.rnn_x: batch_x,
                self.rnn_y: batch_y,
                self.mask: mask,
                self.keep_prob_input: temp_keep_prob_input,
                self.keep_prob_ho: temp_keep_prob_ho,
                self.batch_var_length: lengths
            }
            _, pred_loss_, preds2 = self.sess.run(
                [self.optimizer, self.cost, self.pred], feed_dict=feed_dict)
            batch_loss_list.append(pred_loss_)

        return np.mean(batch_loss_list)

    def pred_evaluation(self, mode):
        if mode == "valid":
            sess_idx = self.val_sess_idx
            df_x = self.val_x
            df_y = self.val_y
        elif mode == "test":
            sess_idx = self.te_sess_idx
            df_x = self.te_x
            df_y = self.te_y

        batch_loss_list = []
        recalls = []
        mrrs = []
        evaluation_point_count = []
        for itr in range(len(self.k)):
            recalls.append(0)
            mrrs.append(0)
            evaluation_point_count.append(0)
        num_batch = math.ceil(np.float32(len(sess_idx)) / self.batch_size)
        for batch_itr in range(int(num_batch)):
            start_itr = self.batch_size * batch_itr
            end_itr = np.minimum(self.batch_size * (batch_itr + 1),
                                 len(sess_idx))
            temp_batch_x = df_x[sess_idx[start_itr:end_itr]]
            temp_batch_y = df_y[sess_idx[start_itr:end_itr]]
            batch_x, batch_y, mask, labels, lengths = convert_batch_data(
                temp_batch_x, temp_batch_y, self.num_items, maxlen=None)
            feed_dict = {
                self.rnn_x: batch_x,
                self.rnn_y: batch_y,
                self.mask: mask,
                self.keep_prob_input: 1.0,
                self.keep_prob_ho: 1.0,
                self.batch_var_length: lengths
            }
            preds, pred_loss_ = self.sess.run([self.pred, self.cost],
                                              feed_dict=feed_dict)
            batch_loss_list.append(pred_loss_)

            recalls, mrrs, evaluation_point_count = evaluation(
                labels, preds, recalls, mrrs, evaluation_point_count, self.k)

        recall_list = []
        mrr_list = []
        for itr in range(len(self.k)):
            recall = np.asarray(recalls[itr],
                                dtype=np.float32) / evaluation_point_count[itr]
            mrr = np.asarray(mrrs[itr],
                             dtype=np.float32) / evaluation_point_count[itr]
            if self.max_val_recall[itr] < recall and mode == "valid":
                self.max_val_recall[itr] = recall
            if self.max_te_recall[itr] < recall and mode == "test":
                self.max_te_recall[itr] = recall
            recall_list.append(recall)
            mrr_list.append(mrr)

        return np.mean(batch_loss_list), recall_list, mrr_list
Exemple #4
0
def train(name, run, folds_csv):
    
    wandb.init(project='dfdc', 
               config=config_defaults,
               name=f'{name},val_fold:{VAL_FOLD},run{run}')
    config = wandb.config
    
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    
    model = timm.create_model('xception', pretrained=True, num_classes=1)
    model.to(device)
    # model = DataParallel(model).to(device)
    wandb.watch(model)
    
    if config.optimizer == 'radam' :
        optimizer = torch_optimizer.RAdam(model.parameters(), 
                                          lr=config.learning_rate,
                                          weight_decay = config.weight_decay)
    elif config.optimizer == 'sgd':
        optimizer = optim.SGD(model.parameters(), 
                              lr=config.learning_rate,
                              weight_decay=config.weight_decay)
        
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        patience=config.schedule_patience,
        threshold=0.001,
        mode="min",
        factor = config.schedule_factor
    )
    criterion = nn.BCEWithLogitsLoss()
    es = EarlyStopping(patience = 10, mode='min')
    
    data_train = CelebDF_Dataset(data_root=DATA_ROOT,
                                mode='train',
                                folds_csv=folds_csv,
                                val_fold=VAL_FOLD,
                                test_fold=TEST_FOLD,
                                cutout_fill=config.cutout_fill,
                                hardcore=False,
                                random_erase=True,
                                oversample_real=True,
                                transforms=create_train_transforms(size=224))
    data_train.reset(config.rand_seed)
    train_data_loader = DataLoader( data_train, 
                                    batch_size=config.train_batch_size, 
                                    num_workers=8, 
                                    shuffle=True, 
                                    drop_last=True)

    data_val = CelebDF_Dataset(data_root=DATA_ROOT,
                                mode='val',
                                folds_csv=folds_csv,
                                val_fold=VAL_FOLD,
                                test_fold=TEST_FOLD,
                                hardcore=False,
                                oversample_real=False,
                                transforms=create_val_transforms(size=224))
    data_val.reset(config.rand_seed)

    val_data_loader = DataLoader(data_val, 
                                 batch_size=config.valid_batch_size, 
                                 num_workers=8, 
                                 shuffle=False, 
                                 drop_last=True)
    
    data_test = CelebDF_Dataset(data_root=DATA_ROOT,
                            mode='test',
                            folds_csv=folds_csv,
                            val_fold=VAL_FOLD,
                            test_fold=TEST_FOLD,
                            hardcore=False,
                            oversample_real=False,
                            transforms=create_val_transforms(size=224))
    data_test.reset(config.rand_seed)

    test_data_loader = DataLoader(data_test, 
                                 batch_size=config.valid_batch_size, 
                                 num_workers=8, 
                                 shuffle=False, 
                                 drop_last=True)

    train_history = []
    val_history = []
    test_history = []
    
    for epoch in range(config.epochs):
        print(f"Epoch = {epoch}/{config.epochs-1}")
        print("------------------")
        
        train_metrics = train_epoch(model, train_data_loader, optimizer, criterion, epoch)
        valid_metrics = valid_epoch(model, val_data_loader, criterion, epoch)
        scheduler.step(valid_metrics['valid_loss'])

        print(f"TRAIN_AUC = {train_metrics['train_auc']}, TRAIN_LOSS = {train_metrics['train_loss']}")
        print(f"VALID_AUC = {valid_metrics['valid_auc']}, VALID_LOSS = {valid_metrics['valid_loss']}")
        
        train_history.append(train_metrics)
        val_history.append(valid_metrics)

        es(valid_metrics['valid_loss'], model, model_path=os.path.join(OUTPUT_DIR,f"{name}_fold_{VAL_FOLD}_run_{run}.h5"))
        if es.early_stop:
            print("Early stopping")
            break
    
    model.load_state_dict(torch.load(f'weights/{name}_fold_{VAL_FOLD}_run_{run}.h5'))

    neptune.init('sowmen/dfdc')
    neptune.create_experiment(name=f'{name},val_fold:{VAL_FOLD},run{run}')

    test_history = test(model, test_data_loader, criterion)

    try:
        pkl.dump( train_history, open( f"train_history{name}{run}.pkl", "wb" ) )
        pkl.dump( val_history, open( f"val_history{name}{run}.pkl", "wb" ) )
        pkl.dump( test_history, open( f"test_history{name}{run}.pkl", "wb" ) )
    except:
        print("Error pickling")

    wandb.save(f'weights/{name}_fold_{VAL_FOLD}_run_{run}.h5')
    model = resuneta.model
    print('ResUnet-a compiled!')
else:
    model = unet((rows, cols, channels))
    #model.compile(optimizer=adam, loss=loss, metrics=['accuracy'])
    model.compile(optimizer=adam,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    # print model information
    model.summary()

filepath = './models/'
# define early stopping callback
earlystop = EarlyStopping(monitor='val_loss',
                          min_delta=0.0001,
                          patience=10,
                          verbose=1,
                          mode='min')
checkpoint = ModelCheckpoint(filepath + 'unet_exp_' + str(exp) + '.h5',
                             monitor='val_loss',
                             verbose=1,
                             save_best_only=True,
                             mode='min')
callbacks_list = [earlystop, checkpoint]

# train the model
start_training = time.time()
model_info = model.fit(patches_tr_aug,
                       patches_tr_ref_aug_h,
                       batch_size=batch_size,
                       epochs=10,
def private_dataset_train(args):
    device ='cuda' if args.gpu else 'cpu'
    # 用于初始化模型的部分
    # 获得FEMNIST数据集!
    train_dataset,test_dataset = get_private_dataset_balanced(args)
    user_groups = FEMNIST_iid(train_dataset, args.user_number)

    models = {"2_layer_CNN": CNN_2layer_fc_model,  # 字典的函数类型
          "3_layer_CNN": CNN_3layer_fc_model}
    modelsindex = ["2_layer_CNN","3_layer_CNN"]

    if args.new_private_training:
        model_list,model_type_list = get_model_list(args.initialurl,modelsindex,models)
        #model_list,model_type_list = get_model_list('Src/EmptyModel',modelsindex,models)
    else:
        model_list,model_type_list = get_model_list(args.privateurl,modelsindex,models)
        #model_list,model_type_list = get_model_list('Src/EmptyModelFemnist',modelsindex,models)


    private_model_private_dataset_train_losses = []
    private_model_private_dataset_validation_losses = []
    for n, model in enumerate(model_list):
        print('train Local Model {} on Private Dataset'.format(n))
        model.to(device)
        if args.optimizer == 'sgd':
            optimizer = torch.optim.SGD(model.parameters(), lr=args.lr,
                                    momentum=0.5)
        elif args.optimizer == 'adam':
            optimizer = torch.optim.Adam(model.parameters(), lr=args.lr,
                                     weight_decay=1e-4)
        trainloader = DataLoader(DatasetSplit(train_dataset,list(user_groups[n])),batch_size=32,shuffle=True)
        testloader = DataLoader(test_dataset,batch_size=128, shuffle=True)
        criterion = nn.NLLLoss().to(device)
        train_epoch_losses = []
        validation_epoch_losses = []
        print('Begin Private Training')
        earlyStopping = EarlyStopping(patience=5,verbose=True,path='Src/EmptyModelFemnist/LocalModel{}Type{}.pkl'.format(n,model_type_list[n],args.privateepoch))
        for epoch in range(args.privateepoch):
            model.train()
            train_batch_losses = []
            for batch_idx, (images, labels) in enumerate(trainloader):
                images,labels = images.to(device),labels.to(device)
                optimizer.zero_grad()
                outputs = model(images)
                loss = criterion(outputs,labels)
                loss.backward()
                optimizer.step()
                if batch_idx % 5 ==0:
                    print('Local Model {} Type {} Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                        n,model_type_list[n],epoch + 1, batch_idx * len(images), len(trainloader.dataset),
                        100. * batch_idx / len(trainloader), loss.item()))
                train_batch_losses.append(loss.item())
            loss_avg = sum(train_batch_losses)/len(train_batch_losses)
            train_epoch_losses.append(loss_avg)

            model.eval()
            val_batch_losses = []
            for batch_idx, (images, labels) in enumerate(testloader):
                images,labels = images.to(device),labels.to(device)
                outputs = model(images)
                loss = criterion(outputs,labels)
                if batch_idx % 5 ==0:
                    print('Local Model {} Type {} Val Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                        n,model_type_list[n],epoch + 1, batch_idx * len(images), len(testloader.dataset),
                        100. * batch_idx / len(testloader), loss.item()))
                val_batch_losses.append(loss.item())
            loss_avg = sum(val_batch_losses)/len(val_batch_losses)
            validation_epoch_losses.append(loss_avg)
            earlyStopping(loss_avg, model)
            if earlyStopping.early_stop:
                print("Early stopping")
                break

        # torch.save(model.state_dict(),'Src/PrivateModel/LocalModel{}Type{}.pkl'.format(n,model_type_list[n],args.privateepoch))
        private_model_private_dataset_train_losses.append(train_epoch_losses)
        private_model_private_dataset_validation_losses.append(validation_epoch_losses)


    plt.figure()
    for i,val in enumerate(private_model_private_dataset_train_losses):
        print(val)
        plt.plot(range(len(val)),val,label='model :'+str(i))
    plt.legend(loc='best')
    plt.title('private_model_private_dataset_train_demo_losses')
    plt.xlabel('epoches')
    plt.ylabel('Train loss')
    x_major_locator = MultipleLocator(1)# 把x轴的刻度间隔设置为1,并存在变量里
    ax = plt.gca()# ax为两条坐标轴的实例
    ax.xaxis.set_major_locator(x_major_locator)# 把x轴的主刻度设置为1的倍数
    plt.xlim(0, args.privateepoch)
    plt.savefig('Src/Figure/private_model_private_dataset_train_demo_losses.png')
    plt.show()

    plt.figure()
    for i, val in enumerate(private_model_private_dataset_validation_losses):
        print(val)
        plt.plot(range(len(val)), val, label='model :' + str(i))
    plt.legend(loc='best')
    plt.title('private_model_private_dataset_validation_demo_losses')
    plt.xlabel('epoches')
    plt.ylabel('Validation loss')
    x_major_locator = MultipleLocator(1)  # 把x轴的刻度间隔设置为1,并存在变量里
    ax = plt.gca()  # ax为两条坐标轴的实例
    ax.xaxis.set_major_locator(x_major_locator)  # 把x轴的主刻度设置为1的倍数
    plt.xlim(0, args.privateepoch)
    plt.savefig('Src/Figure/private_model_private_dataset_validation_demo_losses.png')
    plt.show()

    print('End Private Training')
Exemple #7
0
    def train(self):

        from datetime import datetime
        current_time = datetime.now().strftime('%b%d_%H-%M-%S')
        task = self.args.task
        tb_writer = SummaryWriter(log_dir='./runs/' + task + "/" +
                                  current_time + self.args.prefix,
                                  comment=self.args.prefix)

        vocabs, lexical_mapping = self._build_model()

        train_data = DataLoader(self.args,
                                vocabs,
                                lexical_mapping,
                                self.args.train_data,
                                self.args.batch_size,
                                for_train=True)
        dev_data = DataLoader(self.args,
                              vocabs,
                              lexical_mapping,
                              self.args.dev_data,
                              self.args.batch_size,
                              for_train=False)
        test_data = DataLoader(self.args,
                               vocabs,
                               lexical_mapping,
                               self.args.test_data,
                               self.args.batch_size,
                               for_train='Eval')

        train_data.set_unk_rate(self.args.unk_rate)

        # WRITE PARAMETERS
        with open('./' + 'param' + '.txt', 'w') as f:

            for name, param in self.model.named_parameters():
                f.writelines('name:' + name + "\n")
                f.writelines(str(param))
                f.writelines('size:' + str(param.size()) + '\n')

        no_decay = ['bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [{
            'params': [
                p for n, p in self.model.named_parameters()
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.
        }, {
            'params': [
                p for n, p in self.model.named_parameters()
                if any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.0
        }]
        gradient_accumulation_steps = 1
        t_total = len(
            train_data) // gradient_accumulation_steps * self.args.epochs

        optimizer = AdamW(optimizer_grouped_parameters,
                          lr=self.args.lr,
                          eps=self.args.adam_epsilon)
        scheduler = WarmupLinearSchedule(optimizer,
                                         warmup_steps=self.args.warmup_steps,
                                         t_total=t_total)

        self.model.zero_grad()

        set_seed(42, self.args.gpus)

        batches_acm, loss_acm = 0, 0

        # Train!
        logger.info("***** Running training *****")
        logger.info("  Task: %s", self.args.task)
        logger.info("  Num examples = %d", len(train_data))
        logger.info("  Num Epochs = %d", self.args.epochs)
        logger.info("  Total optimization steps = %d", t_total)
        logger.info("  Running Language Model = %s", self.args.lm_model)
        logger.info("  Running Model = %s", self.args.encoder_type)

        best_acc = 0
        best_model_wts = copy.deepcopy(self.model.state_dict())
        total_steps = 0

        train_iterator = trange(int(self.args.epochs), desc="Epoch")

        # initialize the early_stopping object
        early_stopping = EarlyStopping(patience=self.args.patience,
                                       verbose=True)

        for _ in train_iterator:
            epoch_iterator = tqdm(train_data, desc="Iteration")

            running_loss = 0.0
            running_corrects = 0

            batch_count = self.args.batch_multiplier

            # Turn on the train mode
            for step, batch in enumerate(epoch_iterator):

                self.model.train()
                batch = move_to_cuda(batch, self.device)

                logits, labels, ans_ids = self.model(batch, train=True)
                logits_for_pred = logits.clone().detach()
                loss = self.criterion(logits, labels)
                loss_value = loss.item()

                pred_values, pred_indices = torch.max(logits_for_pred, 1)
                labels = labels.tolist()
                pred = pred_indices.tolist()
                corrects = [i for i, j in zip(labels, pred) if i == j]

                # Statistics
                running_loss += loss.item()
                running_corrects += len(corrects)

                if batch_count == 0:
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                                   1.0)

                    optimizer.step()
                    scheduler.step()

                    total_steps += 1
                    optimizer.zero_grad()
                    self.model.zero_grad()

                    batch_count = self.args.batch_multiplier

                loss_acm += loss_value

                loss.backward()
                batch_count -= 1

                if (batches_acm %
                    (self.args.batch_multiplier * self.args.batch_size)
                        == 0) & (batches_acm != 0) & (step != 0):
                    logger.info(
                        'Train Epoch %d, Batch %d, loss %.3f, Accuracy %.3f',
                        _, batches_acm, loss_acm / batches_acm,
                        running_corrects / (self.args.batch_size * step))
                    tb_writer.add_scalar('Training_loss',
                                         loss_acm / batches_acm, batches_acm)
                    tb_writer.add_scalar(
                        'Training_Accuracy',
                        running_corrects / (self.args.batch_size * step))
                    torch.cuda.empty_cache()
                batches_acm += 1

            epoch_loss = running_loss / batches_acm
            epoch_acc = running_corrects / len(train_data)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(_, epoch_loss,
                                                       epoch_acc))

            tb_writer.add_scalar('Training_Epoch_loss', epoch_loss, _)
            tb_writer.add_scalar('Training_Epoch_Accuracy', epoch_acc, _)

            # Evaluate on Development Set
            eval_epoch_acc, eval_epoch_loss = self._run_evaluate(
                dev_data, _, write_answer=False)

            print('Overall_Dev Acc: {:.4f}'.format(eval_epoch_acc))

            tb_writer.add_scalar('Dev_Epoch_Accuracy', eval_epoch_acc, _)

            ##################################

            # Evaluate on Test Set
            test_epoch_acc, test_epoch_loss = self._run_evaluate(
                test_data, _, write_answer=True)

            print('Overall_Test Acc: {:.4f}'.format(test_epoch_acc))
            tb_writer.add_scalar('Test_Epoch_Accuracy', test_epoch_acc, _)

            # Save only best accuracy model on dev set
            if eval_epoch_acc > best_acc:
                best_acc = eval_epoch_acc
                best_model_wts = copy.deepcopy(self.model.state_dict())

            # early_stopping needs the validation loss to check if it has decresed,
            # and if it has, it will make a checkpoint of the current model
            early_stopping(epoch_acc, self.model)
            if early_stopping.early_stop:
                print("Early stopping")
                break

            self.model.train()

        logger.info('Best val Acc: {:4f}'.format(best_acc))

        torch.save(
            {
                'args': self.save_args,
                'model': best_model_wts
            }, '%s/epoch%d_batch%d_model_best_%s' %
            (self.args.ckpt, self.args.epochs, batches_acm, self.args.prefix))
Exemple #8
0
def train(config):
    loss_full = []
    exhaustion_count = 0
    step = 0
    config.step = step
    writer = None
    start_time = time.time()

    if config.log_tensorboard:
        writer = SummaryWriter(
            log_dir=
            f"{config.train_dir}/runs/{config.model_name}/{config.turn}-{datetime.datetime.now().replace(microsecond=0).isoformat()}{'-' + os.environ['REMARK'] if 'REMARK' in os.environ else ''}"
        )

    model = get_model(config)
    optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)
    early_stopping = EarlyStopping()
    early_stopping_loss = EarlyStopping(patience=200)
    model, _ = restore_checkpoint(config, model, optimizer, early_stopping)
    step = config.step
    dataset = get_train_dataset(config)

    for epoch in range(config.epochs):
        print(f'epoch {epoch}/{config.epochs}')
        dataloader = iter(
            DataLoader(dataset,
                       batch_size=config.batch_size,
                       shuffle=True,
                       num_workers=config.num_workers,
                       drop_last=True,
                       pin_memory=config.pin_memory))

        total_batches = int(len(dataset) / config.batch_size)
        with tqdm(total=total_batches,
                  desc=f'Training epoch {epoch}/{config.epochs}') as pbar:
            for i in range(1, total_batches + 1):
                model.train()
                try:
                    minibatch = next(dataloader)
                except StopIteration:
                    exhaustion_count += 1
                    tqdm.write(
                        f"Training data exhausted for {exhaustion_count} times after {i} batches, reuse the dataset."
                    )
                    dataloader = iter(
                        DataLoader(dataset,
                                   batch_size=config.batch_size,
                                   shuffle=True,
                                   num_workers=config.num_workers,
                                   drop_last=True,
                                   pin_memory=config.pin_memory))
                    minibatch = next(dataloader)

                step += 1
                if config.model_name == 'LSTUR':
                    y_pred = model(minibatch["user"],
                                   minibatch["clicked_news_length"],
                                   minibatch["candidate_news"],
                                   minibatch["clicked_news"])
                elif config.model_name == 'HiFiArk':
                    y_pred, regularizer_loss = model(
                        minibatch["candidate_news"], minibatch["clicked_news"])
                elif config.model_name == 'TANR':
                    y_pred, topic_classification_loss = model(
                        minibatch["candidate_news"], minibatch["clicked_news"])
                elif config.model_name.startswith('DM'):
                    y_pred = model(minibatch)
                else:
                    y_pred = model(minibatch["candidate_news"],
                                   minibatch["clicked_news"])

                loss = torch.stack(
                    [x[0] for x in -F.log_softmax(y_pred, dim=1)]).mean()
                if config.model_name == 'HiFiArk':
                    if i % config.num_iters_show_loss == 0:
                        if config.log_tensorboard:
                            writer.add_scalar('Train/BaseLoss', loss.item(),
                                              step)
                            writer.add_scalar('Train/RegularizerLoss',
                                              regularizer_loss.item(), step)
                            writer.add_scalar(
                                'Train/RegularizerBaseRatio',
                                regularizer_loss.item() / loss.item(), step)
                    loss += config.regularizer_loss_weight * regularizer_loss
                elif config.model_name == 'TANR':
                    if i % config.num_iters_show_loss == 0:
                        if config.log_tensorboard:
                            writer.add_scalar('Train/BaseLoss', loss.item(),
                                              step)
                            writer.add_scalar('Train/TopicClassificationLoss',
                                              topic_classification_loss.item(),
                                              step)
                            writer.add_scalar(
                                'Train/TopicBaseRatio',
                                topic_classification_loss.item() / loss.item(),
                                step)
                    loss += config.topic_classification_loss_weight * topic_classification_loss
                loss_full.append(loss.item())
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                loss_f = np.mean(loss_full)

                if i % config.num_iters_show_loss == 0:
                    if config.log_tensorboard:
                        writer.add_scalar('Train/Loss', loss.item(), step)

                if i % config.num_batches_show_loss == 0:
                    tqdm.write(
                        f"Time {time_since(start_time)}, batches {i}, current loss {loss.item():.4f}, average loss: {loss_f:.4f}"
                    )
                    stopping_loss, _ = early_stopping_loss(loss_f)
                    if stopping_loss:
                        tqdm.write('Early stop due to no improvement on loss.')
                        eval_and_save_checkpoint(config, model, optimizer,
                                                 early_stopping, writer,
                                                 loss_f, step, start_time, i)
                        break

                if i % config.num_batches_validate == 0 or i == total_batches:
                    should_break = eval_and_save_checkpoint(
                        config, model, optimizer, early_stopping, writer,
                        loss_f, step, start_time, i)
                    if should_break:
                        break

                pbar.update(1)
Exemple #9
0
def main(args):
    g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \
    val_mask, test_mask = load_imdb_raw()

    if hasattr(torch, 'BoolTensor'):
        train_mask = train_mask.bool()
        val_mask = val_mask.bool()
        test_mask = test_mask.bool()

    features_m, features_a, features_d = features

    features_a = torch.zeros(features_a.shape[0], 10)
    features_d = torch.zeros(features_d.shape[0], 10)

    features_m = features_m.to(args['device'])
    features_a = features_a.to(args['device'])
    features_d = features_d.to(args['device'])

    features = {'movie': features_m, 'actor': features_a, 'director':features_d}
    
    in_size = {'actor': features_a.shape[1], 'movie': features_m.shape[1], 'director': features_d.shape[1]}

    labels = labels.to(args['device'])
    train_mask = train_mask.to(args['device'])
    val_mask = val_mask.to(args['device'])
    test_mask = test_mask.to(args['device'])

    model = HMSG(meta_paths = [['ma','am'], ['md', 'dm'], ['am'], ['dm']],
                in_size = in_size,
                hidden_size = args['hidden_units'],
                out_size = num_classes,
                aggre_type = 'attention',
                num_heads = args['num_heads'],
                dropout = args['dropout']).to(args['device'])
    g = g.to(args['device'])

    stopper = EarlyStopping(patience=args['patience'])
    loss_fcn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'],
                                 weight_decay=args['weight_decay'])

    for epoch in range(args['num_epochs']):
        model.train()
        z, logits = model(g, features)

        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_acc, train_micro_f1, train_macro_f1 = score(logits[train_mask], labels[train_mask])
        val_loss, val_acc, val_micro_f1, val_macro_f1, z = evaluate(model, g, features, labels, val_mask, loss_fcn)
        early_stop = stopper.step(val_loss.data.item(), val_acc, model)

        print('Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | '
             'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'.format(
           epoch + 1, loss.item(), train_micro_f1, train_macro_f1, val_loss.item(), val_micro_f1, val_macro_f1))

        if early_stop:
            break

    stopper.load_checkpoint(model)
    test_loss, test_acc, test_micro_f1, test_macro_f1, z = evaluate(model, g, features, labels, test_mask, loss_fcn)

    emd_imdb, label_imdb = z[test_mask], labels[test_mask]
    np.savetxt('./out/emd_imdb.txt',emd_imdb.cpu())
    np.savetxt('./out/label_imdb.txt', np.array(label_imdb.cpu(), dtype=np.int32))

    print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'.format(
        test_loss.item(), test_micro_f1, test_macro_f1))
Exemple #10
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('--data_dir', default='./data/', type=str)
    parser.add_argument('--output_dir', default='output/', type=str)
    parser.add_argument('--data_name', default='Beauty', type=str)
    parser.add_argument('--do_eval', action='store_true')
    parser.add_argument('--ckp', default=10, type=int, help="pretrain epochs 10, 20, 30...")

    # model args
    parser.add_argument("--model_name", default='Finetune_sample', type=str)
    parser.add_argument("--hidden_size", type=int, default=64, help="hidden size of transformer model")
    parser.add_argument("--num_hidden_layers", type=int, default=2, help="number of layers")
    parser.add_argument('--num_attention_heads', default=2, type=int)
    parser.add_argument('--hidden_act', default="gelu", type=str) # gelu relu
    parser.add_argument("--attention_probs_dropout_prob", type=float, default=0.5, help="attention dropout p")
    parser.add_argument("--hidden_dropout_prob", type=float, default=0.5, help="hidden dropout p")
    parser.add_argument("--initializer_range", type=float, default=0.02)
    parser.add_argument('--max_seq_length', default=50, type=int)

    # train args
    parser.add_argument("--lr", type=float, default=0.001, help="learning rate of adam")
    parser.add_argument("--batch_size", type=int, default=256, help="number of batch_size")
    parser.add_argument("--epochs", type=int, default=200, help="number of epochs")
    parser.add_argument("--no_cuda", action="store_true")
    parser.add_argument("--log_freq", type=int, default=1, help="per epoch print res")
    parser.add_argument("--seed", default=42, type=int)

    parser.add_argument("--weight_decay", type=float, default=0.0, help="weight_decay of adam")
    parser.add_argument("--adam_beta1", type=float, default=0.9, help="adam first beta value")
    parser.add_argument("--adam_beta2", type=float, default=0.999, help="adam second beta value")
    parser.add_argument("--gpu_id", type=str, default="0", help="gpu_id")

    args = parser.parse_args()

    set_seed(args.seed)
    check_path(args.output_dir)


    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id
    args.cuda_condition = torch.cuda.is_available() and not args.no_cuda

    args.data_file = args.data_dir + args.data_name + '.txt'
    args.sample_file = args.data_dir + args.data_name + '_sample.txt'
    item2attribute_file = args.data_dir + args.data_name + '_item2attributes.json'

    user_seq, max_item, sample_seq = \
        get_user_seqs_and_sample(args.data_file, args.sample_file)

    item2attribute, attribute_size = get_item2attribute_json(item2attribute_file)

    args.item_size = max_item + 2
    args.mask_id = max_item + 1
    args.attribute_size = attribute_size + 1

    # save model args
    args_str = f'{args.model_name}-{args.data_name}-{args.ckp}'
    args.log_file = os.path.join(args.output_dir, args_str + '.txt')
    print(str(args))
    with open(args.log_file, 'a') as f:
        f.write(str(args) + '\n')

    args.item2attribute = item2attribute

    # save model
    checkpoint = args_str + '.pt'
    args.checkpoint_path = os.path.join(args.output_dir, checkpoint)

    train_dataset = SASRecDataset(args, user_seq, data_type='train')
    train_sampler = RandomSampler(train_dataset)
    train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.batch_size)

    eval_dataset = SASRecDataset(args, user_seq, test_neg_items=sample_seq, data_type='valid')
    eval_sampler = SequentialSampler(eval_dataset)
    eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.batch_size)

    test_dataset = SASRecDataset(args, user_seq, test_neg_items=sample_seq, data_type='test')
    test_sampler = SequentialSampler(test_dataset)
    test_dataloader = DataLoader(test_dataset, sampler=test_sampler, batch_size=args.batch_size)


    model = S3RecModel(args=args)

    trainer = FinetuneTrainer(model, train_dataloader, eval_dataloader,
                              test_dataloader, args)


    if args.do_eval:
        trainer.load(args.checkpoint_path)
        print(f'Load model from {args.checkpoint_path} for test!')
        scores, result_info = trainer.test(0, full_sort=False)

    else:
        pretrained_path = os.path.join(args.output_dir, f'{args.data_name}-epochs-{args.ckp}.pt')
        try:
            trainer.load(pretrained_path)
            print(f'Load Checkpoint From {pretrained_path}!')

        except FileNotFoundError:
            print(f'{pretrained_path} Not Found! The Model is same as SASRec')

        early_stopping = EarlyStopping(args.checkpoint_path, patience=10, verbose=True)
        for epoch in range(args.epochs):
            trainer.train(epoch)
            scores, _ = trainer.valid(epoch, full_sort=False)
            # evaluate on MRR
            early_stopping(np.array(scores[-1:]), trainer.model)
            if early_stopping.early_stop:
                print("Early stopping")
                break

        print('---------------Sample 99 results-------------------')
        # load the best model
        trainer.model.load_state_dict(torch.load(args.checkpoint_path))
        scores, result_info = trainer.test(0, full_sort=False)

    print(args_str)
    print(result_info)
    with open(args.log_file, 'a') as f:
        f.write(args_str + '\n')
        f.write(result_info + '\n')
Exemple #11
0
# if config.use_pre_embedding:
#     pre_embed = load_pre_embedding('../pre_embedding/sgns.baidubaike.bigram-char', word2id, config.embed_size)

model = Intent_Model(len(word2id), config.embed_size, config.hidden_size, 13, pre_embed, config.dropout, use_gpu)

if os.path.exists(config.model_path+'/ed_model/model.ckpt'):
    model.load_state_dict(torch.load(config.model_path+'/ed_model/model.ckpt'))
    print('load model state dict successful!')

if use_gpu:
    model = model.cuda()

optimizer = torch.optim.Adam(model.parameters(), lr=config.lr)#, momentum=0.9)
criterion = torch.nn.CrossEntropyLoss()
best_dev_acc = -1.0
earlystopping = EarlyStopping(config.delta, config.earlystop)
stop_flag = False
t_loss = 0.
b_loss = 0.
time1 = time.time()
for epoch in range(config.ed_epochs):
    np.random.shuffle(train_data)
    for i, data in enumerate(train_data):
        text_ids = data['text_ids']
        # mention_positions = data['mention_position']
        label = data['label']
        label = Variable(torch.LongTensor([label]))
        text_ids = Variable(torch.LongTensor(text_ids))


        if use_gpu:
def main(args):
    # load and preprocess dataset
    g, features, labels, n_classes, train_mask, val_mask, test_mask, lp_dict, ind_features, ind_labels = load_reg_data(args)
    num_feats = features.shape[1]
    n_edges = g.number_of_edges()

    print("""----Data statistics------'
      #use cuda: %d
      #Edges %d
      #Classes %d 
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (args.gpu, n_edges, n_classes,
           train_mask.int().sum().item(),
           val_mask.int().sum().item(),
           test_mask.int().sum().item()))
    
    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        ind_features = ind_features.cuda()
        labels = labels.cuda()
        ind_labels = ind_labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    # create model
    heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]
    model = GAT(g,
                args.num_layers,
                num_feats,
                args.num_hidden,
                n_classes,
                heads,
                F.elu,
                args.in_drop,
                args.attn_drop,
                args.negative_slope,
                args.residual,
                args.bias)
    print(model)
    if args.early_stop:
        stopper = EarlyStopping(patience=100)
    if cuda:
        model.cuda()

    # use optimizer
    optimizer = torch.optim.Adam(
        model.parameters(), lr=args.lr, weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    for epoch in range(args.epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        pred = model(features)
        loss = loss_fcn(pred[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        train_r2 = compute_r2(pred[train_mask], labels[train_mask])

        if args.fastmode:
            val_r2 = compute_r2(pred[val_mask], labels[val_mask])
        else:
            val_r2 = evaluate(model, features, labels, val_mask)
            if args.early_stop:
                if stopper.step(val_r2, model):
                    break

        if epoch > 3:
            print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainR2 {:.4f} |"
              " Val R2 {:.4f} | ETputs(KTEPS) {:.2f}".
              format(epoch, np.mean(dur), loss.item(), train_r2,
                     val_r2, n_edges / np.mean(dur) / 1000))

    print()
    if args.early_stop:
        model.load_state_dict(torch.load('es_checkpoint.pt'))
    evaluate_test(model, features, labels, test_mask, lp_dict, meta="2012")
    evaluate_test(model, ind_features, ind_labels, test_mask, lp_dict, meta="2016")
Exemple #13
0
def train_pred_labels(model, train, val, auxiliary_weight=1., mini_batch_size=100,
                          lr=3e-4, nb_epochs=100, patience=20, **kwargs):
    """
        Train the PyTorch model on the training set.

        Parameters
        ----------
        model : PyTorch NN object
            PyTorch neural network model
        train : TensorDataset
            Dataset containing inputs, targets, classes for training (train_inner)
        val : TensorDataset
            Dataset containing inputs, targets, classes for validation
        auxiliary_weight: float
            Weight of auxiliary loss
        mini_batch_size : int
            The size of the batch processing size
        lr : float
            Learning rate for the model training
        nb_epochs : int
            The number of epochs used to train the model
        patience : int
            number of epochs without val improvement for early stopping (None to disable)

        Returns
        -------

        (NN object, train loss history, val accuracy history)
    """
    train_losses = []
    val_accs = []

    # Defining the optimizer for GD
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    # Defining the criteria to calculate losses
    criterion = nn.BCEWithLogitsLoss()         # for Binary Classification
    criterion_digit = nn.CrossEntropyLoss()    # for MultiClass Classification

    # Defining the early stopping criterion
    early_stopping = EarlyStopping(patience)

    # Defining DataLoaders for better mini-batches handling
    # Shuffling makes batches differ between epochs and results in more robust training
    train_loader = DataLoader(train, mini_batch_size, shuffle=True)

    # Learning loop
    for e in range(nb_epochs):
        # Train the input dataset by dividing it into mini_batch_size small datasets
        for train_input, train_target, train_class in train_loader:
            output, output_first_digit, output_second_digit = model(train_input)
            loss_comparison = criterion(output, train_target)
            loss_digits = criterion_digit(output_first_digit, train_class[:, 0]) + \
                          criterion_digit(output_second_digit, train_class[:, 1])
            loss = loss_comparison + auxiliary_weight * loss_digits

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_losses.append(loss.item())
        val_accs.append(compute_accuracy(model, val, mini_batch_size))

        # If the validation accuracy has not improved enough in the last patience epochs
        # then stop training
        if early_stopping(val_accs[-1]):
            break

    return model, train_losses, val_accs
Exemple #14
0
def main(args):
    # Step 1: Prepare graph data and retrieve train/validation/test index ============================= #
    # Load dataset
    dataset = dgl.data.FraudDataset(args.dataset, train_size=0.4)
    graph = dataset[0]
    num_classes = dataset.num_classes

    # check cuda
    if args.gpu >= 0 and th.cuda.is_available():
        device = 'cuda:{}'.format(args.gpu)
    else:
        device = 'cpu'

    # retrieve labels of ground truth
    labels = graph.ndata['label'].to(device)

    # Extract node features
    feat = graph.ndata['feature'].to(device)

    # retrieve masks for train/validation/test
    train_mask = graph.ndata['train_mask']
    val_mask = graph.ndata['val_mask']
    test_mask = graph.ndata['test_mask']

    train_idx = th.nonzero(train_mask, as_tuple=False).squeeze(1).to(device)
    val_idx = th.nonzero(val_mask, as_tuple=False).squeeze(1).to(device)
    test_idx = th.nonzero(test_mask, as_tuple=False).squeeze(1).to(device)

    # Reinforcement learning module only for positive training nodes
    rl_idx = th.nonzero(train_mask.to(device) & labels.bool(),
                        as_tuple=False).squeeze(1)

    graph = graph.to(device)

    # Step 2: Create model =================================================================== #
    model = CAREGNN(in_dim=feat.shape[-1],
                    num_classes=num_classes,
                    hid_dim=args.hid_dim,
                    num_layers=args.num_layers,
                    activation=th.tanh,
                    step_size=args.step_size,
                    edges=graph.canonical_etypes)

    model = model.to(device)

    # Step 3: Create training components ===================================================== #
    _, cnt = th.unique(labels, return_counts=True)
    loss_fn = th.nn.CrossEntropyLoss(weight=1 / cnt)
    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           weight_decay=args.weight_decay)
    if args.early_stop:
        stopper = EarlyStopping(patience=100)

    # Step 4: training epochs =============================================================== #
    for epoch in range(args.max_epoch):
        # Training and validation using a full graph
        model.train()
        logits_gnn, logits_sim = model(graph, feat)

        # compute loss
        tr_loss = loss_fn(logits_gnn[train_idx], labels[train_idx]) + \
                  args.sim_weight * loss_fn(logits_sim[train_idx], labels[train_idx])

        tr_recall = recall_score(
            labels[train_idx].cpu(),
            logits_gnn.data[train_idx].argmax(dim=1).cpu())
        tr_auc = roc_auc_score(labels[train_idx].cpu(),
                               logits_gnn.data[train_idx][:, 1].cpu())

        # validation
        val_loss = loss_fn(logits_gnn[val_idx], labels[val_idx]) + \
                   args.sim_weight * loss_fn(logits_sim[val_idx], labels[val_idx])
        val_recall = recall_score(labels[val_idx].cpu(),
                                  logits_gnn.data[val_idx].argmax(dim=1).cpu())
        val_auc = roc_auc_score(labels[val_idx].cpu(),
                                logits_gnn.data[val_idx][:, 1].cpu())

        # backward
        optimizer.zero_grad()
        tr_loss.backward()
        optimizer.step()

        # Print out performance
        print(
            "Epoch {}, Train: Recall: {:.4f} AUC: {:.4f} Loss: {:.4f} | Val: Recall: {:.4f} AUC: {:.4f} Loss: {:.4f}"
            .format(epoch, tr_recall, tr_auc, tr_loss.item(), val_recall,
                    val_auc, val_loss.item()))

        # Adjust p value with reinforcement learning module
        model.RLModule(graph, epoch, rl_idx)

        if args.early_stop:
            if stopper.step(val_auc, model):
                break

    # Test after all epoch
    model.eval()
    if args.early_stop:
        model.load_state_dict(th.load('es_checkpoint.pt'))

    # forward
    logits_gnn, logits_sim = model.forward(graph, feat)

    # compute loss
    test_loss = loss_fn(logits_gnn[test_idx], labels[test_idx]) + \
                args.sim_weight * loss_fn(logits_sim[test_idx], labels[test_idx])
    test_recall = recall_score(labels[test_idx].cpu(),
                               logits_gnn[test_idx].argmax(dim=1).cpu())
    test_auc = roc_auc_score(labels[test_idx].cpu(),
                             logits_gnn.data[test_idx][:, 1].cpu())

    print("Test Recall: {:.4f} AUC: {:.4f} Loss: {:.4f}".format(
        test_recall, test_auc, test_loss.item()))
Exemple #15
0
def main(args):
    # Step 1: Prepare graph data and retrieve train/validation/test index ============================= #
    # Load dataset
    dataset = dgl.data.FraudDataset(args.dataset, train_size=0.4)
    graph = dataset[0]
    num_classes = dataset.num_classes

    # check cuda
    if args.gpu >= 0 and th.cuda.is_available():
        device = 'cuda:{}'.format(args.gpu)
        args.num_workers = 0
    else:
        device = 'cpu'

    # retrieve labels of ground truth
    labels = graph.ndata['label'].to(device)

    # Extract node features
    feat = graph.ndata['feature'].to(device)
    layers_feat = feat.expand(args.num_layers, -1, -1)

    # retrieve masks for train/validation/test
    train_mask = graph.ndata['train_mask']
    val_mask = graph.ndata['val_mask']
    test_mask = graph.ndata['test_mask']

    train_idx = th.nonzero(train_mask, as_tuple=False).squeeze(1).to(device)
    val_idx = th.nonzero(val_mask, as_tuple=False).squeeze(1).to(device)
    test_idx = th.nonzero(test_mask, as_tuple=False).squeeze(1).to(device)

    # Reinforcement learning module only for positive training nodes
    rl_idx = th.nonzero(train_mask.to(device) & labels.bool(),
                        as_tuple=False).squeeze(1)

    graph = graph.to(device)

    # Step 2: Create model =================================================================== #
    model = CAREGNN(in_dim=feat.shape[-1],
                    num_classes=num_classes,
                    hid_dim=args.hid_dim,
                    num_layers=args.num_layers,
                    activation=th.tanh,
                    step_size=args.step_size,
                    edges=graph.canonical_etypes)

    model = model.to(device)

    # Step 3: Create training components ===================================================== #
    _, cnt = th.unique(labels, return_counts=True)
    loss_fn = th.nn.CrossEntropyLoss(weight=1 / cnt)
    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           weight_decay=args.weight_decay)
    if args.early_stop:
        stopper = EarlyStopping(patience=100)

    # Step 4: training epochs =============================================================== #
    for epoch in range(args.max_epoch):
        # calculate the distance of each edges and sample based on the distance
        dists = []
        p = []
        for i in range(args.num_layers):
            dist = {}
            graph.ndata['nd'] = th.tanh(model.layers[i].MLP(layers_feat[i]))
            for etype in graph.canonical_etypes:
                graph.apply_edges(_l1_dist, etype=etype)
                dist[etype] = graph.edges[etype].data.pop('ed').detach().cpu()
            dists.append(dist)
            p.append(model.layers[i].p)
        graph.ndata.pop('nd')
        sampler = CARESampler(p, dists, args.num_layers)

        # train
        model.train()
        tr_loss = 0
        tr_recall = 0
        tr_auc = 0
        tr_blk = 0
        train_dataloader = dgl.dataloading.DataLoader(
            graph,
            train_idx,
            sampler,
            batch_size=args.batch_size,
            shuffle=True,
            drop_last=False,
            num_workers=args.num_workers)

        for input_nodes, output_nodes, blocks in train_dataloader:
            blocks = [b.to(device) for b in blocks]
            train_feature = blocks[0].srcdata['feature']
            train_label = blocks[-1].dstdata['label']
            logits_gnn, logits_sim = model(blocks, train_feature)

            # compute loss
            blk_loss = loss_fn(logits_gnn,
                               train_label) + args.sim_weight * loss_fn(
                                   logits_sim, train_label)
            tr_loss += blk_loss.item()
            tr_recall += recall_score(train_label.cpu(),
                                      logits_gnn.argmax(dim=1).detach().cpu())
            tr_auc += roc_auc_score(
                train_label.cpu(),
                softmax(logits_gnn, dim=1)[:, 1].detach().cpu())
            tr_blk += 1

            # backward
            optimizer.zero_grad()
            blk_loss.backward()
            optimizer.step()

        # Reinforcement learning module
        model.RLModule(graph, epoch, rl_idx, dists)

        # validation
        model.eval()
        val_dataloader = dgl.dataloading.DataLoader(
            graph,
            val_idx,
            sampler,
            batch_size=args.batch_size,
            shuffle=True,
            drop_last=False,
            num_workers=args.num_workers)

        val_recall, val_auc, val_loss = evaluate(model, loss_fn,
                                                 val_dataloader, device)

        # Print out performance
        print(
            "In epoch {}, Train Recall: {:.4f} | Train AUC: {:.4f} | Train Loss: {:.4f}; "
            "Valid Recall: {:.4f} | Valid AUC: {:.4f} | Valid loss: {:.4f}".
            format(epoch, tr_recall / tr_blk, tr_auc / tr_blk,
                   tr_loss / tr_blk, val_recall, val_auc, val_loss))

        if args.early_stop:
            if stopper.step(val_auc, model):
                break

    # Test with mini batch after all epoch
    model.eval()
    if args.early_stop:
        model.load_state_dict(th.load('es_checkpoint.pt'))
    test_dataloader = dgl.dataloading.DataLoader(graph,
                                                 test_idx,
                                                 sampler,
                                                 batch_size=args.batch_size,
                                                 shuffle=True,
                                                 drop_last=False,
                                                 num_workers=args.num_workers)

    test_recall, test_auc, test_loss = evaluate(model, loss_fn,
                                                test_dataloader, device)

    print("Test Recall: {:.4f} | Test AUC: {:.4f} | Test loss: {:.4f}".format(
        test_recall, test_auc, test_loss))
def run():
    df = pd.read_csv(config.TRAINING_FILE).fillna("none")

    df_train, df_valid = model_selection.train_test_split(
        df, test_size=0.1, random_state=42, stratify=df.category.values)

    df_train = df_train.reset_index(drop=True)
    df_valid = df_valid.reset_index(drop=True)

    train_dataset = dataset.LivedoorDataset(article=df_train.article.values,
                                            targets=df_train.category.values)

    train_data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4)

    valid_dataset = dataset.LivedoorDataset(article=df_valid.article.values,
                                            targets=df_valid.category.values)

    valid_data_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = AlbertBaseJapanese()
    model.to(device)

    param_optimizer = list(model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [
        {
            "params": [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.001,
        },
        {
            "params":
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            "weight_decay":
            0.0,
        },
    ]

    num_train_steps = int(
        len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS)
    optimizer = AdamW(optimizer_parameters, lr=3e-5)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=0, num_training_steps=num_train_steps)

    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)

    es = EarlyStopping(patience=5, mode="max")
    for epoch in range(config.EPOCHS):
        engine.train_fn(train_data_loader, model, optimizer, device, scheduler)
        outputs, targets = engine.valid_fn(valid_data_loader, model, device)
        accuracy = metrics.accuracy_score(targets, outputs)
        print(f"epoch = {epoch}, accuracy = {accuracy}")
        es(accuracy, model, config.MODEL_PATH)
        if es.early_stop:
            print("EarlyStopping.")
            break
Exemple #17
0
def main(args):
    # load and preprocess dataset
    data = load_data(args)

    if args.gpu < 0:
        device = "/cpu:0"
    else:
        device = "/gpu:{}".format(args.gpu)

    with tf.device(device):

        features = tf.convert_to_tensor(data.features, dtype=tf.float32)
        labels = tf.convert_to_tensor(data.labels, dtype=tf.int64)
        train_mask = tf.convert_to_tensor(data.train_mask, dtype=tf.bool)
        val_mask = tf.convert_to_tensor(data.val_mask, dtype=tf.bool)
        test_mask = tf.convert_to_tensor(data.test_mask, dtype=tf.bool)
        num_feats = features.shape[1]
        n_classes = data.num_labels
        n_edges = data.graph.number_of_edges()
        print("""----Data statistics------'
        #Edges %d
        #Classes %d 
        #Train samples %d
        #Val samples %d
        #Test samples %d""" %
              (n_edges, n_classes, train_mask.numpy().sum(),
               val_mask.numpy().sum(), test_mask.numpy().sum()))

        g = data.graph
        # add self loop
        g.remove_edges_from(nx.selfloop_edges(g))
        g = DGLGraph(g)
        g.add_edges(g.nodes(), g.nodes())
        n_edges = g.number_of_edges()
        # create model
        heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]
        model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes,
                    heads, tf.nn.elu, args.in_drop, args.attn_drop,
                    args.negative_slope, args.residual)
        print(model)
        if args.early_stop:
            stopper = EarlyStopping(patience=100)

        # loss_fcn = tf.keras.losses.SparseCategoricalCrossentropy(
        #     from_logits=False)
        loss_fcn = tf.nn.sparse_softmax_cross_entropy_with_logits

        # use optimizer
        optimizer = tf.keras.optimizers.Adam(learning_rate=args.lr,
                                             epsilon=1e-8)

        # initialize graph
        dur = []
        for epoch in range(args.epochs):
            if epoch >= 3:
                t0 = time.time()
            # forward
            with tf.GradientTape() as tape:
                tape.watch(model.trainable_weights)
                logits = model(features, training=True)
                loss_value = tf.reduce_mean(
                    loss_fcn(labels=labels[train_mask],
                             logits=logits[train_mask]))
                # Manually Weight Decay
                # We found Tensorflow has a different implementation on weight decay
                # of Adam(W) optimizer with PyTorch. And this results in worse results.
                # Manually adding weights to the loss to do weight decay solves this problem.
                for weight in model.trainable_weights:
                    loss_value = loss_value + \
                        args.weight_decay*tf.nn.l2_loss(weight)

                grads = tape.gradient(loss_value, model.trainable_weights)
                optimizer.apply_gradients(zip(grads, model.trainable_weights))

            if epoch >= 3:
                dur.append(time.time() - t0)

            train_acc = accuracy(logits[train_mask], labels[train_mask])

            if args.fastmode:
                val_acc = accuracy(logits[val_mask], labels[val_mask])
            else:
                val_acc = evaluate(model, features, labels, val_mask)
                if args.early_stop:
                    if stopper.step(val_acc, model):
                        break

            print(
                "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |"
                " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format(
                    epoch, np.mean(dur),
                    loss_value.numpy().item(), train_acc, val_acc,
                    n_edges / np.mean(dur) / 1000))

        print()
        if args.early_stop:
            model.load_weights('es_checkpoint.pb')
        acc = evaluate(model, features, labels, test_mask)
        print("Test Accuracy {:.4f}".format(acc))
Exemple #18
0
model = HeteroRGCN(G, args.hidden_dim, args.out_dim)

if os.path.isfile(os.path.join(args.model_dir,
                               'model.pt')) and args.load_pretrained:
    model.load_state_dict(torch.load(os.path.join(args.model_dir, 'model.pt')))

if args.cuda and torch.cuda.is_available:
    model = model.cuda()

opt = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wc)

best_embeddings = None
best_loss = 1e20
early_stopping = EarlyStopping(patience=args.patience,
                               verbose=True,
                               datadir=args.model_dir)
for epoch in range(args.epochs):
    total_loss = 0
    for batch_edges, batch_weights, neg in tqdm(dataset, desc='Training'):
        pos0 = batch_edges[:, 0]
        pos1 = batch_edges[:, 1]

        logits = model(G)
        all_embeddings = torch.cat([logits[ntype] for ntype in G.ntypes])

        embedings0 = F.normalize(all_embeddings[pos0], dim=1)
        embedings1 = F.normalize(all_embeddings[pos1], dim=1)
        neg_embedings = F.normalize(all_embeddings[neg], dim=1)
        if args.weighted_loss:
            batch_weights = torch.FloatTensor(batch_weights)
Exemple #19
0
class STAMP:
    def __init__(self, sess, k, configs, tr_x, tr_y, val_x, val_y, te_x, te_y,
                 num_items, init_way, logger):
        self.sess = sess
        self.configs = configs
        self.tr_x = tr_x
        self.tr_y = tr_y
        self.val_x = val_x
        self.val_y = val_y
        self.te_x = te_x
        self.te_y = te_y
        #self.num_items = 37484 #num_items
        self.num_items = num_items  # num_items
        self.logger = logger

        self.rnn_hidden_size = configs.rnn_hidden_size
        self.batch_size = configs.batch_size
        self.num_layers = configs.num_layers

        # Initialize the optimizer
        self.optimizer_type = configs.optimizer_type
        self.weight_decay = configs.weight_decay
        self.momentum = configs.momentum
        self.lr = configs.lr
        self.eps = configs.eps

        self.clip_grad = configs.clip_grad
        self.clip_grad_threshold = configs.clip_grad_threshold
        self.lr_decay_step = configs.lr_decay_step
        self.lr_decay = configs.lr_decay
        self.lr_decay_rate = configs.lr_decay_rate
        self.drop_prob_ho = configs.drop_prob_ho
        self.drop_prob_input = configs.drop_prob_input
        self.drop_prob_recurrent = configs.drop_prob_recurrent

        # etc
        self.k = k
        self.time_sort = configs.time_sort
        self.loss_type = configs.loss_type
        self.n_epochs = configs.n_epochs
        self.is_shuffle = configs.is_shuffle
        self.embedding_size = configs.embedding_size
        self.num_topics = configs.num_topics
        self.early_stop = EarlyStopping(configs.max_patience)

        # batch_iterator
        self.tr_sess_idx = np.arange(len(self.tr_y))
        self.val_sess_idx = np.arange(len(self.val_y))
        self.te_sess_idx = np.arange(len(self.te_y))

        # record best epoch
        self.max_val_recall = [0 for _ in range(len(self.k))]
        self.max_te_recall = [0 for _ in range(len(self.k))]
        self.best_epoch = 0

        tr_lengths = [len(s) for s in self.tr_x]
        val_lengths = [len(s) for s in self.val_x]
        te_lengths = [len(s) for s in self.te_x]
        tr_maxlen = np.max(tr_lengths)
        val_maxlen = np.max(val_lengths)
        te_maxlen = np.max(te_lengths)
        self.maxlen = np.max([tr_maxlen, val_maxlen, te_maxlen])
        self.maxlen = None
        self.embed_init, self.weight_init, self.bias_init, self.gate_bias_init, self.kern_init = init_way

    def run(self):
        self.prepare_model()
        tf.global_variables_initializer().run()
        print("End of model prepare")
        for epoch in range(self.n_epochs):
            start_time = time.time()
            tr_pred_loss = self.train_model()
            val_pred_loss, val_recall_list, val_mrr_list = self.pred_evaluation(
                mode="valid")
            te_pred_loss, te_recall_list, te_mrr_list = self.pred_evaluation(
                mode="test")

            self.best_epoch, best_check = write_log(
                self.logger, epoch, tr_pred_loss, val_pred_loss, te_pred_loss,
                self.k, val_recall_list, val_mrr_list, te_recall_list,
                te_mrr_list, self.max_val_recall, self.max_te_recall,
                self.best_epoch, start_time)
            if self.early_stop.validate(val_recall_list[3]):
                self.logger.info("Training process is stopped early")
                break

    def prepare_model(self):
        self.rnn_x1 = tf.placeholder(tf.int32, [None, self.maxlen],
                                     name='input1')
        self.rnn_x2 = tf.placeholder(tf.int32, [None, 1], name='input2')
        self.rnn_y = tf.placeholder(tf.int64, [None, self.num_items],
                                    name='output')
        self.mask_x1 = tf.placeholder(tf.float32, [None, self.maxlen],
                                      name='mask_x1')  # batch_size * maxlen
        self.mask_x2 = tf.placeholder(tf.float32, [None, 1], name='mask_x2')
        self.keep_prob_input = tf.placeholder(tf.float32,
                                              name='keep_prob_input')
        self.keep_prob_ho = tf.placeholder(tf.float32, name='keep_prob_ho')
        self.batch_var_length = tf.placeholder(tf.float32,
                                               name="variable_length")

        Wemb = tf.get_variable('Wemb', [self.num_items, self.embedding_size],
                               initializer=self.embed_init)
        w0 = tf.get_variable('w0', [self.embedding_size, 1],
                             initializer=self.weight_init)
        w1 = tf.get_variable('w1', [self.embedding_size, self.embedding_size],
                             initializer=self.weight_init)
        w2 = tf.get_variable('w2', [self.embedding_size, self.embedding_size],
                             initializer=self.weight_init)
        w3 = tf.get_variable('w3', [self.embedding_size, self.embedding_size],
                             initializer=self.weight_init)
        ba = tf.get_variable('ba', [self.embedding_size],
                             initializer=self.bias_init)

        if self.loss_type == 'EMB':
            bili = tf.get_variable(
                'bili', [self.embedding_size, 2 * self.rnn_hidden_size],
                initializer=self.weight_init)
        elif self.loss_type == "Trilinear":
            ws = tf.get_variable('ws',
                                 [self.embedding_size, self.embedding_size],
                                 initializer=self.weight_init)
            bs = tf.get_variable('bs', [self.embedding_size],
                                 initializer=self.bias_init)
            wt = tf.get_variable('wt',
                                 [self.embedding_size, self.embedding_size],
                                 initializer=self.weight_init)
            bt = tf.get_variable('bt', [self.embedding_size],
                                 initializer=self.bias_init)
        elif self.loss_type == "TOP1":
            W_top1 = tf.get_variable(
                'W_top1', [2 * self.rnn_hidden_size, self.num_items],
                initializer=self.weight_init)
            b_top1 = tf.get_variable('b_top1', [1, self.num_items],
                                     initializer=self.bias_init)
        elif self.loss_type == "TOP1_variant":
            bili = tf.get_variable(
                'bili', [self.embedding_size, 2 * self.rnn_hidden_size],
                initializer=self.weight_init)
            W_top1 = tf.get_variable(
                'W_top1', [2 * self.rnn_hidden_size, self.num_items],
                initializer=self.weight_init)
            b_top1 = tf.get_variable('b_top1', [1, self.num_items],
                                     initializer=self.bias_init)

        emb_x1 = tf.nn.embedding_lookup(
            Wemb, self.rnn_x1)  # xi (batch_size * maxlen * num_hidden)
        emb_x2 = tf.squeeze(tf.nn.embedding_lookup(Wemb, self.rnn_x2),
                            axis=1)  # xt (batch_size * num_hidden)
        tiled_mask = tf.tile(tf.expand_dims(self.mask_x1, 2),
                             [1, 1, self.rnn_hidden_size
                              ])  # xt (batch_size * maxlen * num_hidden)
        ms = tf.reduce_sum(tf.multiply(emb_x1, tiled_mask),
                           axis=1)  # batch_size * num_hidden
        tiled_var_length = tf.tile(
            tf.reshape(self.batch_var_length, [-1, 1]),
            [1, self.rnn_hidden_size])  # (batch_size * num_hidden)
        ms = tf.reshape(tf.div(ms, tiled_var_length),
                        [-1, self.rnn_hidden_size])  # batch_size * num_hidden

        outputs1 = tf.transpose(emb_x1,
                                perm=[1, 0,
                                      2])  # maxlen * batch_size * num_hidden
        unnormalized_alpha = tf.map_fn(
            lambda x: compute_alpha_STAMP(x, emb_x2, ms, w0, w1, w2, w3, ba),
            outputs1)  # maxlen * batch_size
        unnormalized_alpha = tf.multiply(tf.transpose(unnormalized_alpha),
                                         self.mask_x1)  # batch_size * maxlen
        self.unnormalized_alpha = unnormalized_alpha
        alpha = unnormalized_alpha  # batch_size * maxlen
        #alpha = tf.nn.softmax(unnormalized_alpha + 100000000. * (self.mask_x1 - 1), dim=1)  # batch_size * max_len
        self.alpha = alpha
        tiled_alpha = tf.tile(
            tf.expand_dims(alpha, axis=2),
            [1, 1, self.rnn_hidden_size])  # batch_size * maxlen * hidden_size
        self.tiled_alpha = tiled_alpha
        ma = tf.reduce_sum(tf.multiply(emb_x1, tiled_alpha),
                           axis=1)  # batch * hidden
        hs = tf.nn.tanh(tf.matmul(ma, ws) + bs)  # batch * hidden
        ht = tf.nn.tanh(tf.matmul(emb_x2, wt) + bt)  # batch * hidden

        if self.loss_type == 'EMB':
            proj = tf.concat([hs, ht], 1)
            proj = tf.nn.dropout(proj, self.keep_prob_ho)
            ytem = tf.matmul(Wemb, bili)
            pred = tf.matmul(proj, tf.transpose(ytem))
            self.pred = tf.nn.softmax(pred)
            self.cost = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred,
                                                           labels=self.rnn_y))
        elif self.loss_type == "Trilinear":
            pred = tf.nn.sigmoid(
                tf.matmul(tf.multiply(ht, hs),
                          tf.transpose(Wemb)))  # batch * n_item
            self.pred = tf.nn.softmax(pred)
            self.cost = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred,
                                                           labels=self.rnn_y))
        elif self.loss_type == "TOP1":
            proj = tf.concat([hs, ht], 1)
            proj = tf.nn.dropout(proj, self.keep_prob_ho)
            pred = tf.matmul(proj, W_top1) + b_top1
            self.pred = tf.nn.tanh(pred)
            self.cost = loss_fn(self.rnn_y, self.pred, self.loss_type)
        elif self.loss_type == "TOP1_variant":
            pred = tf.nn.sigmoid(
                tf.matmul(tf.multiply(ht, hs),
                          tf.transpose(Wemb)))  # batch * n_item
            self.pred = tf.nn.tanh(pred)
            self.cost = loss_fn(self.rnn_y, self.pred, self.loss_type)

        self.optimizer = tf.train.AdamOptimizer(self.lr).minimize(self.cost)

    def train_model(self):
        if self.configs.is_shuffle:
            self.tr_sess_idx = np.random.permutation(self.tr_sess_idx)
        batch_loss_list = []
        num_batch = math.ceil(
            np.float32(len(self.tr_sess_idx)) / self.batch_size)
        for batch_itr in range(int(num_batch)):
            start_itr = self.batch_size * batch_itr
            end_itr = np.minimum(self.batch_size * (batch_itr + 1),
                                 len(self.tr_sess_idx))
            temp_batch_x = self.tr_x[self.tr_sess_idx[start_itr:end_itr]]  #
            temp_batch_y = self.tr_y[self.tr_sess_idx[start_itr:end_itr]]  #
            batch_x1, batch_x2, batch_y, mask_x1, mask_x2, labels, lengths = convert_batch_data_stamp(
                temp_batch_x, temp_batch_y, self.num_items, maxlen=self.maxlen)

            temp_keep_prob_ho = 1.0 - self.drop_prob_ho
            temp_keep_prob_input = 1.0 - self.drop_prob_input
            feed_dict = {
                self.rnn_x1: batch_x1,
                self.rnn_x2: batch_x2,
                self.rnn_y: batch_y,
                self.mask_x1: mask_x1,
                self.mask_x2: mask_x2,
                self.keep_prob_input: temp_keep_prob_input,
                self.keep_prob_ho: temp_keep_prob_ho,
                self.batch_var_length: lengths
            }
            _, pred_loss_, preds2 = self.sess.run(
                [self.optimizer, self.cost, self.pred], feed_dict=feed_dict)
            batch_loss_list.append(pred_loss_)

        return np.mean(batch_loss_list)

    def pred_evaluation(self, mode):
        if mode == "valid":
            sess_idx = self.val_sess_idx
            df_x = self.val_x
            df_y = self.val_y
        elif mode == "test":
            sess_idx = self.te_sess_idx
            df_x = self.te_x
            df_y = self.te_y

        batch_loss_list = []
        recalls = []
        mrrs = []
        evaluation_point_count = []
        for itr in range(len(self.k)):
            recalls.append(0)
            mrrs.append(0)
            evaluation_point_count.append(0)
        num_batch = math.ceil(np.float32(len(sess_idx)) / self.batch_size)

        for batch_itr in range(int(num_batch)):
            start_itr = self.batch_size * batch_itr
            end_itr = np.minimum(self.batch_size * (batch_itr + 1),
                                 len(sess_idx))
            temp_batch_x = df_x[sess_idx[start_itr:end_itr]]
            temp_batch_y = df_y[sess_idx[start_itr:end_itr]]
            batch_x1, batch_x2, batch_y, mask_x1, mask_x2, labels, lengths \
                = convert_batch_data_stamp(temp_batch_x,temp_batch_y,self.num_items,maxlen=self.maxlen)

            feed_dict = {
                self.rnn_x1: batch_x1,
                self.rnn_x2: batch_x2,
                self.rnn_y: batch_y,
                self.mask_x1: mask_x1,
                self.mask_x2: mask_x2,
                self.keep_prob_input: 1.0,
                self.keep_prob_ho: 1.0,
                self.batch_var_length: lengths
            }
            preds, pred_loss_ = self.sess.run([self.pred, self.cost],
                                              feed_dict=feed_dict)

            batch_loss_list.append(pred_loss_)

            recalls, mrrs, evaluation_point_count = evaluation(
                labels, preds, recalls, mrrs, evaluation_point_count, self.k)

        recall_list = []
        mrr_list = []
        for itr in range(len(self.k)):
            recall = np.asarray(recalls[itr],
                                dtype=np.float32) / evaluation_point_count[itr]
            mrr = np.asarray(mrrs[itr],
                             dtype=np.float32) / evaluation_point_count[itr]
            if self.max_val_recall[itr] < recall and mode == "valid":
                self.max_val_recall[itr] = recall
            if self.max_te_recall[itr] < recall and mode == "test":
                self.max_te_recall[itr] = recall
            recall_list.append(recall)
            mrr_list.append(mrr)

        return np.mean(batch_loss_list), recall_list, mrr_list
Exemple #20
0
def main(args):
    # load and preprocess dataset
    data = load_data(args)
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    if hasattr(torch, 'BoolTensor'):
        train_mask = torch.BoolTensor(data.train_mask)
        val_mask = torch.BoolTensor(data.val_mask)
        test_mask = torch.BoolTensor(data.test_mask)
    else:
        train_mask = torch.ByteTensor(data.train_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        test_mask = torch.ByteTensor(data.test_mask)
    num_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d 
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes, train_mask.int().sum().item(),
           val_mask.int().sum().item(), test_mask.int().sum().item()))

    if args.gpu <= 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    g = data.graph
    # add self loop
    g.remove_edges_from(nx.selfloop_edges(g))
    g = DGLGraph(g)
    g.add_edges(g.nodes(), g.nodes())
    n_edges = g.number_of_edges()
    # create model
    heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]
    model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes,
                heads, F.elu, args.in_drop, args.attn_drop,
                args.negative_slope, args.residual)
    print(model)
    if args.early_stop:
        stopper = EarlyStopping(patience=100)
    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    for epoch in range(args.epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        train_acc = accuracy(logits[train_mask], labels[train_mask])

        if args.fastmode:
            val_acc = accuracy(logits[val_mask], labels[val_mask])
        else:
            val_acc = evaluate(model, features, labels, val_mask)
            if args.early_stop:
                if stopper.step(val_acc, model):
                    break

        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |"
              " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format(
                  epoch, np.mean(dur), loss.item(), train_acc, val_acc,
                  n_edges / np.mean(dur) / 1000))

    print()
    if args.early_stop:
        model.load_state_dict(torch.load('es_checkpoint.pt'))
    acc = evaluate(model, features, labels, test_mask)
    print("Test Accuracy {:.4f}".format(acc))
Exemple #21
0
    def train(train_dataset, dev_dataset):
        train_dataloader = DataLoader(train_dataset,
                                      batch_size=args.train_batch_size,
                                      shuffle=True,
                                      num_workers=2)

        global best_dev
        nonlocal global_step
        n_sample = len(train_dataloader)
        early_stopping = EarlyStopping(args.patience, logger=logger)
        # Loss function
        adversarial_loss = torch.nn.BCELoss().to(device)
        classified_loss = torch.nn.CrossEntropyLoss().to(device)

        # Optimizers
        optimizer_G = torch.optim.Adam(G.parameters(),
                                       lr=args.G_lr)  # optimizer for generator
        optimizer_D = torch.optim.Adam(
            D.parameters(), lr=args.D_lr)  # optimizer for discriminator
        optimizer_E = AdamW(E.parameters(), args.bert_lr)

        G_total_train_loss = []
        D_total_fake_loss = []
        D_total_real_loss = []
        FM_total_train_loss = []
        D_total_class_loss = []
        valid_detection_loss = []
        valid_oos_ind_precision = []
        valid_oos_ind_recall = []
        valid_oos_ind_f_score = []

        all_features = []
        result = dict()

        for i in range(args.n_epoch):

            # Initialize model state
            G.train()
            D.train()
            E.train()

            G_train_loss = 0
            D_fake_loss = 0
            D_real_loss = 0
            FM_train_loss = 0
            D_class_loss = 0

            for sample in tqdm.tqdm(train_dataloader):
                sample = (i.to(device) for i in sample)
                token, mask, type_ids, y = sample
                batch = len(token)

                ood_sample = (y == 0.0)
                # weight = torch.ones(len(ood_sample)).to(device) - ood_sample * args.beta
                # real_loss_func = torch.nn.BCELoss(weight=weight).to(device)

                # the label used to train generator and discriminator.
                valid_label = FloatTensor(batch, 1).fill_(1.0).detach()
                fake_label = FloatTensor(batch, 1).fill_(0.0).detach()

                optimizer_E.zero_grad()
                sequence_output, pooled_output = E(token, mask, type_ids)
                real_feature = pooled_output

                # train D on real
                optimizer_D.zero_grad()
                real_f_vector, discriminator_output, classification_output = D(
                    real_feature, return_feature=True)
                discriminator_output = discriminator_output.squeeze()
                real_loss = adversarial_loss(discriminator_output,
                                             (y != 0.0).float())
                # real_loss = real_loss_func(discriminator_output, (y != 0.0).float())
                if n_class > 2:  # 大于2表示除了训练判别器还要训练分类器
                    class_loss = classified_loss(classification_output,
                                                 y.long())
                    real_loss += class_loss
                    D_class_loss += class_loss.detach()
                real_loss.backward()

                if args.do_vis:
                    all_features.append(real_f_vector.detach())

                # 除去 G
                # # # train D on fake
                # if args.model == 'lstm_gan' or args.model == 'cnn_gan':
                #     z = FloatTensor(np.random.normal(0, 1, (batch, 32, args.G_z_dim))).to(device)
                # else:
                #     z = FloatTensor(np.random.normal(0, 1, (batch, args.G_z_dim))).to(device)
                # fake_feature = G(z).detach()
                # fake_discriminator_output = D.detect_only(fake_feature)
                # # fake_loss = args.beta * adversarial_loss(fake_discriminator_output, fake_label)
                # fake_loss = adversarial_loss(fake_discriminator_output, fake_label)
                # fake_loss.backward()
                optimizer_D.step()

                if args.fine_tune:
                    optimizer_E.step()

                # 除去 G
                # # train G
                # optimizer_G.zero_grad()
                # if args.model == 'lstm_gan' or args.model == 'cnn_gan':
                #     z = FloatTensor(np.random.normal(0, 1, (batch, 32, args.G_z_dim))).to(device)
                # else:
                #     z = FloatTensor(np.random.normal(0, 1, (batch, args.G_z_dim))).to(device)
                # fake_f_vector, D_decision = D.detect_only(G(z), return_feature=True)
                # gd_loss = adversarial_loss(D_decision, valid_label)
                # fm_loss = torch.abs(torch.mean(real_f_vector.detach(), 0) - torch.mean(fake_f_vector, 0)).mean()
                # g_loss = gd_loss + 0 * fm_loss
                # g_loss.backward()
                # optimizer_G.step()

                global_step += 1

                # D_fake_loss += fake_loss.detach()
                D_real_loss += real_loss.detach()
                # G_train_loss += g_loss.detach() + fm_loss.detach()
                # FM_train_loss += fm_loss.detach()

            logger.info('[Epoch {}] Train: D_fake_loss: {}'.format(
                i, D_fake_loss / n_sample))
            logger.info('[Epoch {}] Train: D_real_loss: {}'.format(
                i, D_real_loss / n_sample))
            logger.info('[Epoch {}] Train: D_class_loss: {}'.format(
                i, D_class_loss / n_sample))
            logger.info('[Epoch {}] Train: G_train_loss: {}'.format(
                i, G_train_loss / n_sample))
            logger.info('[Epoch {}] Train: FM_train_loss: {}'.format(
                i, FM_train_loss / n_sample))
            logger.info(
                '---------------------------------------------------------------------------'
            )

            D_total_fake_loss.append(D_fake_loss / n_sample)
            D_total_real_loss.append(D_real_loss / n_sample)
            D_total_class_loss.append(D_class_loss / n_sample)
            G_total_train_loss.append(G_train_loss / n_sample)
            FM_total_train_loss.append(FM_train_loss / n_sample)

            if dev_dataset:
                logger.info(
                    '#################### eval result at step {} ####################'
                    .format(global_step))
                eval_result = eval(dev_dataset)

                valid_detection_loss.append(eval_result['detection_loss'])
                valid_oos_ind_precision.append(
                    eval_result['oos_ind_precision'])
                valid_oos_ind_recall.append(eval_result['oos_ind_recall'])
                valid_oos_ind_f_score.append(eval_result['oos_ind_f_score'])

                # 1 表示要保存模型
                # 0 表示不需要保存模型
                # -1 表示不需要模型,且超过了patience,需要early stop
                signal = early_stopping(-eval_result['eer'])
                if signal == -1:
                    break
                elif signal == 0:
                    pass
                elif signal == 1:
                    save_gan_model(D, G, config['gan_save_path'])
                    if args.fine_tune:
                        save_model(E,
                                   path=config['bert_save_path'],
                                   model_name='bert')

                logger.info(eval_result)
                logger.info('valid_eer: {}'.format(eval_result['eer']))
                logger.info('valid_oos_ind_precision: {}'.format(
                    eval_result['oos_ind_precision']))
                logger.info('valid_oos_ind_recall: {}'.format(
                    eval_result['oos_ind_recall']))
                logger.info('valid_oos_ind_f_score: {}'.format(
                    eval_result['oos_ind_f_score']))
                logger.info('valid_fpr95: {}'.format(
                    ErrorRateAt95Recall(eval_result['all_binary_y'],
                                        eval_result['y_score'])))

        if args.patience >= args.n_epoch:
            save_gan_model(D, G, config['gan_save_path'])
            if args.fine_tune:
                save_model(E, path=config['bert_save_path'], model_name='bert')

        freeze_data['D_total_fake_loss'] = D_total_fake_loss
        freeze_data['D_total_real_loss'] = D_total_real_loss
        freeze_data['D_total_class_loss'] = D_total_class_loss
        freeze_data['G_total_train_loss'] = G_total_train_loss
        freeze_data['FM_total_train_loss'] = FM_total_train_loss
        freeze_data['valid_real_loss'] = valid_detection_loss
        freeze_data['valid_oos_ind_precision'] = valid_oos_ind_precision
        freeze_data['valid_oos_ind_recall'] = valid_oos_ind_recall
        freeze_data['valid_oos_ind_f_score'] = valid_oos_ind_f_score

        best_dev = -early_stopping.best_score

        if args.do_vis:
            all_features = torch.cat(all_features, 0).cpu().numpy()
            result['all_features'] = all_features
        return result
Exemple #22
0
def main():
    best_prec1 = 0
    test = True
    log = True
    save_best = True
    sample_length = 0.5
    num_samples = np.int(np.round(
        5000 /
        sample_length))  # together I want about 5000 seconds from each subject
    batch_size = 100
    num_epochs = 200
    dropout = 0.4
    task = 'subject_prediction'

    os.environ["CUDA_VISIBLE_DEVICES"] = "1"
    torch.backends.cudnn.benchmark = True

    root_path = pathlib.Path.cwd()
    matrix = root_path.joinpath(
        'data', f'cleaned_{sample_length}sec_{num_samples}.npy')

    training_dataset = LFPData(data_file=matrix,
                               split='train',
                               standardize=True)
    training_loader = DataLoader(training_dataset,
                                 shuffle=True,
                                 batch_size=batch_size,
                                 pin_memory=True,
                                 num_workers=1)

    validation_set = LFPData(data_file=matrix, split='valid', standardize=True)
    validation_loader = DataLoader(validation_set,
                                   shuffle=False,
                                   batch_size=batch_size,
                                   pin_memory=True,
                                   num_workers=1)
    # input_shape = (2, np.int(422 * sample_length))  # this is a hack to figure out shape of fc layer
    # net = conv1d_nn.Net(input_shape=input_shape, dropout=dropout)
    net = conv1d_nn.FCN(in_channels=2, num_classes=9)
    net.apply(init_weights)
    net.cuda()

    criterion = nn.CrossEntropyLoss()
    criterion.cuda()
    # optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
    optimizer = optim.Adam(net.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-8)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     'min',
                                                     patience=5,
                                                     threshold=1e-2)
    stop_criterion = EarlyStopping()

    title = f'FCN2_cleaned_{sample_length}sec_{num_samples}'
    if log:
        log_dir = root_path.joinpath('logs', title)
        if not log_dir.exists():
            log_dir.mkdir()
        training_log = log_dir.joinpath('log')
        if not training_log.exists():
            open(str(training_log), 'w').close()
        result_writer = ResultsWriter(str(training_log), overwrite=True)

    mlog = MeterLogger(server='localhost',
                       port=8097,
                       nclass=9,
                       title=title,
                       env=title)

    for epoch in range(1, num_epochs + 1):
        mlog.timer.reset()

        train_epoch(training_loader, net, criterion, optimizer, mlog)

        if log:
            result_writer.update(title, {'Train': mlog.peek_meter()})
        mlog.print_meter(mode="Train", iepoch=epoch)
        mlog.reset_meter(mode="Train", iepoch=epoch)
        validation_loss = val_epoch(validation_loader, net, criterion, mlog)

        prec1 = mlog.meter['accuracy'].value()[0]

        if save_best:
            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            if is_best:
                best_prec1 = max(prec1, best_prec1)
                save_checkpoint(
                    root_path.joinpath('checkpoints', title), {
                        'epoch': epoch + 1,
                        'state_dict': net.state_dict(),
                        'best_prec1': best_prec1,
                        'optimizer': optimizer.state_dict(),
                    }, is_best)

        if log:
            result_writer.update(title, {'Validation': mlog.peek_meter()})
        mlog.print_meter(mode="Test", iepoch=epoch)
        mlog.reset_meter(mode="Test", iepoch=epoch)

        stop_criterion.eval_loss(validation_loss)
        if stop_criterion.get_nsteps() >= 30:
            print('Early stopping')
            break
        print(optimizer.param_groups[0]['lr'])
        scheduler.step(validation_loss)

    print('Training finished', best_prec1)

    if test:
        test_set = LFPData(data_file=matrix, split='test', standardize=True)
        test_loader = DataLoader(test_set,
                                 shuffle=False,
                                 batch_size=batch_size,
                                 pin_memory=True,
                                 num_workers=1)
        test_loss, test_acc = test_epoch(test_loader, net, criterion, mlog)

        result_writer.update(
            title, {'Test': {
                'loss': test_loss,
                'accuracy': test_acc
            }})

        print(test_loss, test_acc)

    # save pngs of visdom plot into log path
    plot_visdom(mlog, log_dir)
Exemple #23
0
def main(args):
    # If args['hetero'] is True, g would be a heterogeneous graph.
    # Otherwise, it will be a list of homogeneous graphs.
    args_academic = read_args()
    data = dataprocess_han.input_data_han(args_academic)
    #g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \
    #val_mask, test_mask = load_data(args['dataset'])
    features = torch.tensor(data.a_text_embed, dtype=torch.float32)
    labels = torch.tensor(data.a_class)

    APA_g = dgl.graph(data.APA_matrix, ntype='author', etype='coauthor')
    APVPA_g = dgl.graph(data.APVPA_matrix, ntype='author', etype='attendance')
    APPA_g = dgl.graph(data.APPA_matrix, ntype='author', etype='reference')

    #g = [APA_g, APPA_g]
    g = [APA_g, APVPA_g, APPA_g]

    num_classes = 4
    features = features.to(args['device'])
    labels = labels.to(args['device'])

    #if args['hetero']:
    #from model_hetero import HAN
    #model = HAN(meta_paths=[['pa', 'ap'], ['pf', 'fp']],
    #in_size=features.shape[1],
    #hidden_size=args['hidden_units'],
    #out_size=num_classes,
    #num_heads=args['num_heads'],
    #dropout=args['dropout']).to(args['device'])
    #else:
    model = HAN(num_meta_paths=len(g),
                in_size=features.shape[1],
                hidden_size=args['hidden_units'],
                out_size=num_classes,
                num_heads=args['num_heads'],
                dropout=args['dropout']).to(args['device'])

    stopper = EarlyStopping(patience=args['patience'])
    loss_fcn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args['lr'],
                                 weight_decay=args['weight_decay'])
    model.load_state_dict(torch.load("./model_para.pt"))

    for epoch in range(args['num_epochs']):

        X = [[i] for i in range(args_academic.A_n)]
        train_X, test_X, _, _ = train_test_split(X, X, test_size=0.8)  #
        train_X, test_X, _, _ = train_test_split(train_X,
                                                 train_X,
                                                 test_size=0.2)  #

        train_mask = get_binary_mask(args_academic.A_n, train_X)
        test_mask = get_binary_mask(args_academic.A_n, test_X)

        #train_mask = torch.tensor(data.train_mask)
        #test_mask = torch.tensor(data.test_mask)
        val_mask = test_mask
        train_mask = train_mask.to(args['device'])
        val_mask = val_mask.to(args['device'])
        test_mask = test_mask.to(args['device'])
        model.train()
        logits, _ = model(g, features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_acc, train_micro_f1, train_macro_f1 = score(
            logits[train_mask], labels[train_mask])
        val_loss, val_acc, val_micro_f1, val_macro_f1 = evaluate(
            model, g, features, labels, val_mask, loss_fcn)
        early_stop = stopper.step(val_loss.data.item(), val_acc, model)

        print(
            'Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | '
            'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'.
            format(epoch + 1, loss.item(), train_micro_f1, train_macro_f1,
                   val_loss.item(), val_micro_f1, val_macro_f1))

        if early_stop:
            break

    stopper.load_checkpoint(model)
    model.eval()
    _, embedding = model(g, features)
    embed_file = open("./node_embedding.txt", "w")
    for k in range(embedding.shape[0]):
        embed_file.write('a' + str(k) + " ")
        for l in range(embedding.shape[1] - 1):
            embed_file.write(str(embedding[k][l].item()) + " ")
        embed_file.write(str(embedding[k][-1].item()) + "\n")
    embed_file.close()
    #test_loss, test_acc, test_micro_f1, test_macro_f1 = evaluate(model, g, features, labels, test_mask, loss_fcn)
    #print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'.format(
    #test_loss.item(), test_micro_f1, test_macro_f1))
    torch.save(model.state_dict(), "./model_para.pt")
Exemple #24
0
import numpy as np
import utils
from utils import EarlyStopping
from tqdm import tqdm
from torch import nn, optim
from config import config
from torchsummary import summary
from torch.optim import SGD, Adam
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from timeit import default_timer as timer
from sklearn.metrics import f1_score, accuracy_score
from thop import profile
from pthflops import count_ops

early_stopping = EarlyStopping(patience=20,
                               verbose=True)  #patience=7, verbose=False
early_stopping_f1 = EarlyStopping(patience=20,
                                  verbose=True)  #patience=7, verbose=False


class SARDataset(Dataset):
    def __init__(self, images_df, datapath, labelpath, winsize=36, mode="2D"):
        self.images_df = images_df.copy()  #csv
        data = np.load(datapath)
        self.data = data.transpose([2, 0, 1]).astype('float32')
        self.label = np.load(labelpath)
        self.mode = mode
        self.winsize = winsize

    def __len__(self):
        return len(self.images_df)
    elif args.freeze_bert and args.use_adversary:
        raise Exception(
            'No purpose in using an adversary if BERT layers are frozen')
    else:
        param_optimizer = list(model.named_parameters(
        )) + list(predictor.named_parameters()) + list(discriminator.named_parameters())

    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer if not any(
            nd in n for nd in no_decay)], 'weight_decay': 0.01},
        {'params': [p for n, p in param_optimizer if any(
            nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]

    es = EarlyStopping(patience=args.es_patience)

    optimizer = AdamW(optimizer_grouped_parameters,
                      lr=learning_rate, correct_bias=False)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=args.num_warmup_steps, num_training_steps=num_train_optimization_steps)

    for epoch in range(1, num_train_epochs+1):
        # training
        if not args.freeze_bert:
            model.train()
        else:
            model.eval()
        predictor.train()
        if args.use_adversary:
            discriminator.train()
Exemple #26
0
def main(args):
    # load and preprocess dataset
    if args.dataset == 'cora':
        data = CoraGraphDataset()
    elif args.dataset == 'citeseer':
        data = CiteseerGraphDataset()
    elif args.dataset == 'pubmed':
        data = PubmedGraphDataset()
    else:
        raise ValueError('Unknown dataset: {}'.format(args.dataset))

    g = data[0]
    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        g = g.int().to(args.gpu)

    features = g.ndata['feat']
    labels = g.ndata['label']
    train_mask = g.ndata['train_mask']
    val_mask = g.ndata['val_mask']
    test_mask = g.ndata['test_mask']
    num_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = g.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes, train_mask.int().sum().item(),
           val_mask.int().sum().item(), test_mask.int().sum().item()))

    # add self loop
    g = dgl.remove_self_loop(g)
    g = dgl.add_self_loop(g)
    n_edges = g.number_of_edges()
    # create model
    heads = ([args.num_heads] * (args.num_layers - 1)) + [args.num_out_heads]
    model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes,
                heads, F.elu, args.in_drop, args.attn_drop,
                args.negative_slope, args.residual)
    print(model)
    if args.early_stop:
        stopper = EarlyStopping(patience=100)
    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    for epoch in range(args.epochs):
        model.train()
        if epoch >= 3:
            if cuda:
                torch.cuda.synchronize()
            t0 = time.time()
        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            if cuda:
                torch.cuda.synchronize()
            dur.append(time.time() - t0)

        train_acc = accuracy(logits[train_mask], labels[train_mask])

        if args.fastmode:
            val_acc = accuracy(logits[val_mask], labels[val_mask])
        else:
            val_acc = evaluate(model, features, labels, val_mask)
            if args.early_stop:
                if stopper.step(val_acc, model):
                    break

        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |"
              " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format(
                  epoch, np.mean(dur), loss.item(), train_acc, val_acc,
                  n_edges / np.mean(dur) / 1000))

    print()
    if args.early_stop:
        model.load_state_dict(torch.load('es_checkpoint.pt'))
    acc = evaluate(model, features, labels, test_mask)
    print("Test Accuracy {:.4f}".format(acc))
Exemple #27
0
def main(args):
    # If args['hetero'] is True, g would be a heterogeneous graph.
    # Otherwise, it will be a list of homogeneous graphs.
    g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \
    val_mask, test_mask = load_data(args['dataset'])

    if hasattr(torch, 'BoolTensor'):
        train_mask = train_mask.bool()  # 布尔类型转换
        val_mask = val_mask.bool()
        test_mask = test_mask.bool()

    features = features.to(args['device'])
    labels = labels.to(args['device'])
    train_mask = train_mask.to(args['device'])
    val_mask = val_mask.to(args['device'])
    test_mask = test_mask.to(args['device'])

    if args['hetero']:  # 构建异构图的邻居节点
        from model_hetero import HAN
        model = HAN(
            meta_paths=[['pa', 'ap'],
                        ['pf', 'fp']],  # 之前构建的边: pa, ap,组合成meta-path: PAP
            in_size=features.shape[1],
            hidden_size=args['hidden_units'],
            out_size=num_classes,
            num_heads=args['num_heads'],
            dropout=args['dropout']).to(args['device'])
        g = g.to(args['device'])
    else:
        from model import HAN
        model = HAN(num_meta_paths=len(g),
                    in_size=features.shape[1],
                    hidden_size=args['hidden_units'],
                    out_size=num_classes,
                    num_heads=args['num_heads'],
                    dropout=args['dropout']).to(args['device'])
        g = [graph.to(args['device']) for graph in g]

    stopper = EarlyStopping(patience=args['patience'])
    loss_fcn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args['lr'],
                                 weight_decay=args['weight_decay'])

    for epoch in range(args['num_epochs']):
        model.train()
        logits = model(g, features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_acc, train_micro_f1, train_macro_f1 = score(
            logits[train_mask], labels[train_mask])
        val_loss, val_acc, val_micro_f1, val_macro_f1 = evaluate(
            model, g, features, labels, val_mask, loss_fcn)
        early_stop = stopper.step(val_loss.data.item(), val_acc, model)

        print(
            'Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | '
            'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'.
            format(epoch + 1, loss.item(), train_micro_f1, train_macro_f1,
                   val_loss.item(), val_micro_f1, val_macro_f1))

        if early_stop:
            break

    stopper.load_checkpoint(model)
    test_loss, test_acc, test_micro_f1, test_macro_f1 = evaluate(
        model, g, features, labels, test_mask, loss_fcn)
    print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'.
          format(test_loss.item(), test_micro_f1, test_macro_f1))
Exemple #28
0
def main(args):
    # load and preprocess dataset
    if args.dataset == 'reddit':
        data = RedditDataset()
    elif args.dataset in ['photo', "computer"]:
        data = MsDataset(args)
    else:
        data = load_data(args)

    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    train_mask = torch.ByteTensor(data.train_mask)
    val_mask = torch.ByteTensor(data.val_mask)
    test_mask = torch.ByteTensor(data.test_mask)
    num_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    current_time = time.strftime('%d_%H:%M:%S', localtime())
    writer = SummaryWriter(log_dir='runs/' + current_time + '_' + args.sess, flush_secs=30)

    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes,
           train_mask.sum().item(),
           val_mask.sum().item(),
           test_mask.sum().item()))

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.bool().cuda()
        val_mask = val_mask.bool().cuda()
        test_mask = test_mask.bool().cuda()


    g = data.graph
    # add self loop
    if args.dataset != 'reddit':
        g.remove_edges_from(nx.selfloop_edges(g))
        g = DGLGraph(g)
    g.add_edges(g.nodes(), g.nodes())
    n_edges = g.number_of_edges()
    print('edge number %d'%(n_edges))
    # create model
    heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]

    model = GAT(g,
                args.num_layers,
                num_feats,
                args.num_hidden,
                n_classes,
                heads,
                F.elu,
                args.idrop,
                args.adrop,
                args.alpha,
                args.bias,
                args.residual, args.l0)
    print(model)
    if args.early_stop:
        stopper = EarlyStopping(patience=150)
    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)

    dur = []
    time_used = 0

    for epoch in range(args.epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()

        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        loss_l0 = args.loss_l0*( model.gat_layers[0].loss)
        optimizer.zero_grad()
        (loss + loss_l0).backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        train_acc = accuracy(logits[train_mask], labels[train_mask])
        writer.add_scalar('edge_num/0', model.gat_layers[0].num, epoch)

        if args.fastmode:
            val_acc, loss = accuracy(logits[val_mask], labels[val_mask], loss_fcn)
        else:
            val_acc,_ = evaluate(model, features, labels, val_mask, loss_fcn)
            if args.early_stop:
                if stopper.step(val_acc, model):   
                    break

        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |"
              " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), train_acc,
                     val_acc, n_edges / np.mean(dur) / 1000))
        writer.add_scalar('loss', loss.item(), epoch)
        writer.add_scalar('f1/train_f1_mic', train_acc, epoch)
        writer.add_scalar('f1/test_f1_mic', val_acc, epoch)
        writer.add_scalar('time/time', time_used, epoch)

    writer.close()
    if args.early_stop:
        model.load_state_dict(torch.load('es_checkpoint.pt'))
    acc, _ = evaluate(model,features, labels, test_mask, loss_fcn)
    print("Test Accuracy {:.4f}".format(acc))
Exemple #29
0
    def __init__(self, sess, k, configs, tr_x, tr_y, val_x, val_y, te_x, te_y,
                 num_items, init_way, logger):
        self.sess = sess
        self.configs = configs
        self.tr_x = tr_x
        self.tr_y = tr_y
        self.val_x = val_x
        self.val_y = val_y
        self.te_x = te_x
        self.te_y = te_y
        self.num_items = num_items
        self.logger = logger

        self.rnn_hidden_size = configs.rnn_hidden_size
        self.batch_size = configs.batch_size
        self.num_layers = configs.num_layers

        # Initialize the optimizer
        self.optimizer_type = configs.optimizer_type
        self.weight_decay = configs.weight_decay
        self.momentum = configs.momentum
        self.lr = configs.lr
        self.eps = configs.eps

        self.clip_grad = configs.clip_grad
        self.clip_grad_threshold = configs.clip_grad_threshold
        self.lr_decay_step = configs.lr_decay_step
        self.lr_decay = configs.lr_decay
        self.lr_decay_rate = configs.lr_decay_rate
        self.drop_prob_ho = configs.drop_prob_ho
        self.drop_prob_input = configs.drop_prob_input
        self.drop_prob_recurrent = configs.drop_prob_recurrent

        # etc
        self.k = k
        self.time_sort = configs.time_sort
        self.loss_type = configs.loss_type
        self.n_epochs = configs.n_epochs
        self.is_shuffle = configs.is_shuffle
        self.embedding_size = configs.embedding_size
        self.num_topics = configs.num_topics
        self.early_stop = EarlyStopping(configs.max_patience)

        # batch_iterator
        self.tr_sess_idx = np.arange(len(self.tr_y))
        self.val_sess_idx = np.arange(len(self.val_y))
        self.te_sess_idx = np.arange(len(self.te_y))

        # record best epoch
        self.max_val_recall = [0 for _ in range(len(self.k))]
        self.max_te_recall = [0 for _ in range(len(self.k))]
        self.best_epoch = 0

        tr_lengths = [len(s) for s in self.tr_x]
        val_lengths = [len(s) for s in self.val_x]
        te_lengths = [len(s) for s in self.te_x]
        tr_maxlen = np.max(tr_lengths)
        val_maxlen = np.max(val_lengths)
        te_maxlen = np.max(te_lengths)
        self.maxlen = np.max([tr_maxlen, val_maxlen, te_maxlen])
        self.maxlen = None
        self.embed_init, self.weight_init, self.bias_init, self.gate_bias_init, self.kern_init = init_way
    return validation_loss, validation_acc_1




# Part 5. 'main' function
if __name__ == '__main__':

    logger.info("Begin evaluating on validation set before training")
    validate_function(val_loader)
    
    logger.info("training status: ")

    
    early_stopping = EarlyStopping(patience=basic_configs['early_stopping_patience'], delta=0)

    for epoch in range(basic_configs['num_epochs']):
        logger.info("Begin training epoch {}".format(epoch + 1))
        validation_acc = train_function(epoch)

        if validation_acc.avg > max_val_acc:
            max_val_acc = validation_acc.avg
            max_val_acc_epoch = epoch + 1

        
        early_stopping(validation_acc.avg)
        logger.info("Early stopping counter: {}".format(early_stopping.counter))
        logger.info("Early stopping best_score: {}".format(early_stopping.best_score))
        logger.info("Early stopping early_stop: {}".format(early_stopping.early_stop))