Ejemplo n.º 1
0
def main():
    logger = Logger(args.logdir)
    params = dict()
    params['batch_size'] = args.batch_size
    params['data_dir'] = args.path_to_train_data
    params['major'] = 'users'
    params['itemIdInd'] = 1
    params['userIdInd'] = 0
    print("Loading training data")
    data_layer = input_layer.UserItemRecDataProvider(params=params)
    print("Data loaded")
    print("Total items found: {}".format(len(data_layer.data.keys())))
    print("Vector dim: {}".format(data_layer.vector_dim))

    print("Loading eval data")
    eval_params = copy.deepcopy(params)
    # must set eval batch size to 1 to make sure no examples are missed
    eval_params['data_dir'] = args.path_to_eval_data
    eval_data_layer = input_layer.UserItemRecDataProvider(
        params=eval_params,
        user_id_map=data_layer.userIdMap,  # the mappings are provided
        item_id_map=data_layer.itemIdMap)
    eval_data_layer.src_data = data_layer.data
    rencoder = model.AutoEncoder(
        layer_sizes=[data_layer.vector_dim] +
        [int(l) for l in args.hidden_layers.split(',')],
        nl_type=args.non_linearity_type,
        is_constrained=args.constrained,
        dp_drop_prob=args.drop_prob,
        last_layer_activations=not args.skip_last_layer_nl)

    model_checkpoint = args.logdir + "/model"
    path_to_model = Path(model_checkpoint)
    if path_to_model.is_file():
        print("Loading model from: {}".format(model_checkpoint))
        rencoder.load_state_dict(torch.load(model_checkpoint))

    print('######################################################')
    print('######################################################')
    print('############# AutoEncoder Model: #####################')
    print(rencoder)
    print('######################################################')
    print('######################################################')

    gpu_ids = [int(g) for g in args.gpu_ids.split(',')]
    print('Using GPUs: {}'.format(gpu_ids))
    if len(gpu_ids) > 1:
        rencoder = nn.DataParallel(rencoder, device_ids=gpu_ids)
    rencoder = rencoder.cuda()

    if args.optimizer == "adam":
        optimizer = optim.Adam(rencoder.parameters(),
                               lr=args.lr,
                               weight_decay=args.weight_decay)
    elif args.optimizer == "adagrad":
        optimizer = optim.Adagrad(rencoder.parameters(),
                                  lr=args.lr,
                                  weight_decay=args.weight_decay)
    elif args.optimizer == "momentum":
        optimizer = optim.SGD(rencoder.parameters(),
                              lr=args.lr,
                              momentum=0.9,
                              weight_decay=args.weight_decay)
        scheduler = MultiStepLR(optimizer,
                                milestones=[24, 36, 48, 66, 72],
                                gamma=0.5)
    elif args.optimizer == "rmsprop":
        optimizer = optim.RMSprop(rencoder.parameters(),
                                  lr=args.lr,
                                  momentum=0.9,
                                  weight_decay=args.weight_decay)
    else:
        raise ValueError('Unknown optimizer kind')

    t_loss = 0.0
    t_loss_denom = 0.0
    global_step = 0

    if args.noise_prob > 0.0:
        dp = nn.Dropout(p=args.noise_prob)

    for epoch in range(args.num_epochs):
        print('Doing epoch {} of {}'.format(epoch, args.num_epochs))
        e_start_time = time.time()
        rencoder.train()
        total_epoch_loss = 0.0
        denom = 0.0
        if args.optimizer == "momentum":
            scheduler.step()
        for i, mb in enumerate(data_layer.iterate_one_epoch()):
            inputs = Variable(mb.cuda().to_dense())
            optimizer.zero_grad()
            outputs = rencoder(inputs)
            loss, num_ratings = model.MSEloss(outputs, inputs)
            loss = loss / num_ratings
            loss.backward()
            optimizer.step()
            global_step += 1
            t_loss += loss.data[0]
            t_loss_denom += 1

            if i % args.summary_frequency == 0:
                print('[%d, %5d] RMSE: %.7f' %
                      (epoch, i, sqrt(t_loss / t_loss_denom)))
                logger.scalar_summary("Training_RMSE",
                                      sqrt(t_loss / t_loss_denom), global_step)
                t_loss = 0
                t_loss_denom = 0.0
                log_var_and_grad_summaries(logger, rencoder.encode_w,
                                           global_step, "Encode_W")
                log_var_and_grad_summaries(logger, rencoder.encode_b,
                                           global_step, "Encode_b")
                if not rencoder.is_constrained:
                    log_var_and_grad_summaries(logger, rencoder.decode_w,
                                               global_step, "Decode_W")
                log_var_and_grad_summaries(logger, rencoder.decode_b,
                                           global_step, "Decode_b")

            total_epoch_loss += loss.data[0]
            denom += 1

            #if args.aug_step > 0 and i % args.aug_step == 0 and i > 0:
            if args.aug_step > 0:
                # Magic data augmentation trick happen here
                for t in range(args.aug_step):
                    inputs = Variable(outputs.data)
                    if args.noise_prob > 0.0:
                        inputs = dp(inputs)
                    optimizer.zero_grad()
                    outputs = rencoder(inputs)
                    loss, num_ratings = model.MSEloss(outputs, inputs)
                    loss = loss / num_ratings
                    loss.backward()
                    optimizer.step()

        e_end_time = time.time()
        print(
            'Total epoch {} finished in {} seconds with TRAINING RMSE loss: {}'
            .format(epoch, e_end_time - e_start_time,
                    sqrt(total_epoch_loss / denom)))
        logger.scalar_summary("Training_RMSE_per_epoch",
                              sqrt(total_epoch_loss / denom), epoch)
        logger.scalar_summary("Epoch_time", e_end_time - e_start_time, epoch)
        if epoch % 3 == 0 or epoch == args.num_epochs - 1:
            eval_loss = do_eval(rencoder, eval_data_layer)
            print('Epoch {} EVALUATION LOSS: {}'.format(epoch, eval_loss))
            logger.scalar_summary("EVALUATION_RMSE", eval_loss, epoch)
            print("Saving model to {}".format(model_checkpoint + ".epoch_" +
                                              str(epoch)))
            torch.save(rencoder.state_dict(),
                       model_checkpoint + ".epoch_" + str(epoch))

    print("Saving model to {}".format(model_checkpoint + ".last"))
    torch.save(rencoder.state_dict(), model_checkpoint + ".last")
Ejemplo n.º 2
0
 def test_adagrad_sparse(self):
     self._test_rosenbrock_sparse(
         lambda params: optim.Adagrad(params, lr=1e-1))
Ejemplo n.º 3
0
                    torch.matmul(self.Wo, embedding_tensor) + self.bo)
                u_value = torch.tanh(
                    torch.matmul(self.Wu, embedding_tensor) + self.bu)
                cell = i_gate * u_value
                hidden = o_gate * torch.tanh(cell)
                logits = (torch.matmul(self.Why, hidden) + self.by).view(
                    1, output_size)
                target = Var(torch.LongTensor([int(scores[index])]))
                loss = F.nll_loss(F.log_softmax(logits, dim=1), target)
                return (loss, hidden, cell)

        return rec(0)[0]


net = TreeNet()
opt = optim.Adagrad(net.parameters(), lr=learning_rate)

epocNum = 6
loopStart = time.time()
loss_save = []
for epoc in range(epocNum):
    total_loss = 0
    for n in range(tree_data_size):
        opt.zero_grad()
        loss = net.forward(scores[n], words[n], lchs[n], rchs[n])
        total_loss += loss.data[0]
        loss.backward()
        opt.step()
    loss_save.append(total_loss / tree_data_size)
    print("epoc {}, average_loss {}".format(epoc, total_loss / tree_data_size))
Ejemplo n.º 4
0
 def configure_optimizers__adagrad(self):
     optimizer = optim.Adagrad(self.parameters(), lr=self.learning_rate)
     return optimizer
Ejemplo n.º 5
0
 def adagrad_constructor(params):
     adagrad = optim.Adagrad(params, lr=1e-1)
     return StochasticWeightAveraging(adagrad,
                                      swa_start=1000,
                                      swa_freq=1,
                                      swa_lr=1e-2)
Ejemplo n.º 6
0
                    nn.ReLU(),
                    nn.BatchNorm1d(h_dim),
                    nn.Dropout(),
                    nn.Linear(h_dim, input_dim),
                    nn.ReLU(),
                    nn.BatchNorm1d(input_dim))      
            def forward(self, x):
                output = self.EnE(x)
                Xhat = self.DeE(output)
                return Xhat, output           

        torch.cuda.manual_seed_all(42)

        AutoencoderE = AE()

        solverE = optim.Adagrad(AutoencoderE.parameters(), lr=lrE)
        rec_criterion = torch.nn.MSELoss()

        for it in range(epoch):

            epoch_cost4 = 0
            num_minibatches = int(n_sampE / mb_size) 

            for i, (dataE, dataM, dataC, target) in enumerate(trainLoader):
                
                AutoencoderE.train()
                Dat_train = torch.cat((dataE, dataM, dataC), 1)
                Dat_hat, ZX = AutoencoderE(Dat_train)

                loss = rec_criterion(Dat_hat, Dat_train)     
    def step(itr, step_size):
        if itr%(2*step_size) < step_size:
            return (itr%(2*step_size)) / step_size
        return (2*step_size-(itr%(2*step_size)))/step_size
        
    return lr_lambda


# In[ ]:


loss_function = nn.CrossEntropyLoss()

clr = cyclical_lr(500, 1e-5, 1e-2)

optimizer = optim.Adagrad(parameters_to_update, lr=1.0)

scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, [clr])


# In[ ]:


def train_model(model, data_loaders, loss_function, optimizer, num_epochs, device):
    
    start = time.time()
    val_accuracies = list()
    max_val_accuracy = 0.0

    optimal_model_parameters = copy.deepcopy(model.state_dict())
Ejemplo n.º 8
0
def main(args):
    episodes = split_data(args.data)

    #episodes = episodes[:len(episodes)//30] # for debug
    valid_rate = 0.15
    episodes = np.array(episodes, dtype=object)
    valid_num = int(valid_rate * len(episodes))
    valid_episodes = episodes[:valid_num]
    episodes = episodes[valid_num:]

    vocab2index, index2vocab, embedding_weight, embedding_dim = build_vocab(
        episodes, args.embedding, 100, train_oov=False)
    episodes_text2index(episodes, vocab2index)
    episodes_text2index(valid_episodes, vocab2index)

    batch_size = args.batch_size
    #batch_list = get_batch_list(episodes, batch_size)
    #valid_batch_list = get_batch_list(valid_episodes, batch_size)

    save_round = 1
    date = datetime.datetime.now().strftime("%d-%H-%M")
    save_path = 'model/model_{}'.format(date)
    print('save_path = {}'.format(save_path))
    if not os.path.exists(save_path):
        os.makedirs(save_path, exist_ok=True)
    with open(os.path.join(save_path, 'vocab.pickle'), 'wb') as f:
        pickle.dump({
            'vocab2index': vocab2index,
            'index2vocab': index2vocab
        }, f)
    log_file = codecs.open(os.path.join(save_path, 'log'), 'w')
    embedding_weight = torch.Tensor(embedding_weight)

    #oracle = generator.Generator(GEN_EMBEDDING_DIM, GEN_HIDDEN_DIM, VOCAB_SIZE, MAX_SEQ_LEN, gpu=CUDA)
    #oracle.load_state_dict(torch.load(oracle_state_dict_path))
    #oracle_samples = torch.load(oracle_samples_path).type(torch.LongTensor)
    # a new oracle can be generated by passing oracle_init=True in the generator constructor
    # samples for the new oracle can be generated using helpers.batchwise_sample()

    gen = generator.Generator(embedding_dim,
                              GEN_HIDDEN_DIM,
                              len(vocab2index),
                              MAX_SEQ_LEN,
                              embedding_weight,
                              gpu=CUDA)
    dis = discriminator.Discriminator(embedding_dim,
                                      DIS_HIDDEN_DIM,
                                      len(vocab2index),
                                      MAX_SEQ_LEN,
                                      embedding_weight,
                                      gpu=CUDA)

    if CUDA:
        #oracle = oracle.cuda()
        gen = gen.cuda()
        dis = dis.cuda()
        #oracle_samples = oracle_samples.cuda()
    #for parameters in gen.parameters():
    #    print(parameters)

    # GENERATOR MLE TRAINING
    print('Starting Generator MLE Training...')
    gen_optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                      gen.parameters()),
                               lr=1e-2)
    train_generator_MLE(gen, gen_optimizer, episodes, valid_episodes,
                        batch_size, MLE_TRAIN_EPOCHS)

    # torch.save(gen.state_dict(), pretrained_gen_path)
    # gen.load_state_dict(torch.load(pretrained_gen_path))

    # PRETRAIN DISCRIMINATOR
    print('\nStarting Discriminator Training...')
    dis_optimizer = optim.Adagrad(
        filter(lambda p: p.requires_grad, dis.parameters()))
    train_discriminator(dis, dis_optimizer, episodes, valid_episodes, gen,
                        batch_size, 1, 1)

    # torch.save(dis.state_dict(), pretrained_dis_path)
    # dis.load_state_dict(torch.load(pretrained_dis_path))

    # ADVERSARIAL TRAINING
    #print('\nStarting Adversarial Training...')
    #oracle_loss = helpers.batchwise_oracle_nll(gen, oracle, POS_NEG_SAMPLES, BATCH_SIZE, MAX_SEQ_LEN,
    #start_letter=START_LETTER, gpu=CUDA)
    #print('\nInitial Oracle Sample Loss : %.4f' % oracle_loss)

    for epoch in range(ADV_TRAIN_EPOCHS):
        print('\n--------\nEPOCH %d\n--------' % (epoch + 1))
        # TRAIN GENERATOR
        print('\nAdversarial Training Generator : ', end='')
        sys.stdout.flush()
        train_generator_PG(gen, gen_optimizer, dis, batch_size, episodes, 1,
                           20)

        # TRAIN DISCRIMINATOR
        print('\nAdversarial Training Discriminator : ')
        train_discriminator(dis, dis_optimizer, episodes, valid_episodes, gen,
                            batch_size, 1, 1)
Ejemplo n.º 9
0
def main(args):
    """This major function controls finding data, splitting train and validation data, building datasets,
    building dataloaders, building a model, loading a model, training a model, testing a model, and writing
    a submission"""
    best_acc = 0

    # Specify the GPUs to use
    print("Finding GPUs...")
    gpus = list(range(torch.cuda.device_count()))
    print('--- GPUS: {} ---'.format(str(gpus)))

    if "train" in args.modes.lower():
        # List the trainval folders
        print("Load trainval data...")
        trainval_folder_names = [
            x for x in os.listdir(args.trainval_data_path)
            if os.path.isdir(os.path.join(args.trainval_data_path, x))
        ]
        more_train_img_names = [
            x for x in os.listdir(
                os.path.join(args.more_train_data_path, 'JPEGImages'))
        ]

        # Figure out how many folders to use for training and validation
        num_train_folders = int(
            len(trainval_folder_names) * args.trainval_split_percentage)
        num_more_train_imgs = len(more_train_img_names)
        num_val_folders = len(trainval_folder_names) - num_train_folders
        print("Building dataset split...")
        print("--- Number of train folders: {} ---".format(num_train_folders))
        print("--- Number of additional train images: {} ---".format(
            num_more_train_imgs))
        print("--- Number of val folders: {} ---".format(num_val_folders))

        # Choose the training and validation folders
        random.shuffle(
            trainval_folder_names)  # TODO if loading a model, be careful
        train_folder_names = trainval_folder_names[:num_train_folders]
        val_folder_names = trainval_folder_names[num_train_folders:]

        # Make dataloaders
        print("Making train and val dataloaders...")
        train_loader = make_dataloader(train_folder_names,
                                       args.trainval_data_path,
                                       args.batch_size, args.task, args.modes)
        more_train_loader = make_dataloader(more_train_img_names,
                                            args.more_train_data_path,
                                            args.batch_size,
                                            args.task,
                                            args.modes,
                                            xml=True)
        val_loader = make_dataloader(val_folder_names, args.trainval_data_path,
                                     args.batch_size, args.task, args.modes)

    # Build and load the model
    model = build_model(args, gpus)
    model = load_model(args, model, args.load_epoch)

    # Declare the optimizer, learning rate scheduler, and training loops. Note that models are saved to the current directory.

    print("Creating optimizer and scheduler...")
    if args.task == 4:
        if args.optimizer_string == 'RMSprop':
            optimizer = optim.RMSprop(model.parameters(),
                                      lr=args.lr,
                                      weight_decay=args.weight_decay)
        elif args.optimizer_string == 'Adam':
            optimizer = optim.Adam(model.parameters(),
                                   lr=args.lr,
                                   weight_decay=args.weight_decay)
        elif args.optimizer_string == 'SGD':
            optimizer = optim.SGD(model.parameters(),
                                  lr=args.lr,
                                  weight_decay=args.weight_decay)
        elif args.optimizer_string == 'Adagrad':
            optimizer = optim.Adagrad(model.parameters(),
                                      lr=args.lr,
                                      weight_decay=args.weight_decay)
        elif args.optimizer_string == 'Adadelta':
            optimizer = optim.Adadelta(model.parameters(),
                                       lr=args.lr,
                                       weight_decay=args.weight_decay)
        else:
            optimizer = optim.SGD(model.parameters(),
                                  lr=args.lr,
                                  weight_decay=args.weight_decay)

        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                         factor=0.3,
                                                         patience=10,
                                                         verbose=True)
    else:
        optimizer = optim.Adam(params=model.parameters(),
                               lr=args.lr,
                               weight_decay=args.weight_decay,
                               amsgrad=True)
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                         factor=0.5,
                                                         patience=5,
                                                         verbose=True)

    # This trainer class does all the work
    print("Instantiating runner...")
    if args.task == 2:
        runner = Runner(model, optimizer, sum_mse, args.task, args.save_dir)
    else:
        runner = Runner(model, optimizer, sum_cross_entropy, args.task,
                        args.save_dir)
    best_acc = 0

    if "train" in args.modes.lower():
        print("Begin training... {}, lr:{} + wd:{} + opt:{} + bs:{} ".format(
            str(args.model), str(args.lr), str(args.weight_decay),
            str(args.optimizer_string), str(args.batch_size)))
        best_acc = runner.loop(args.num_epoch, train_loader, more_train_loader,
                               val_loader, scheduler, args.batch_size)

    args.save_path = save_path = args.save_dir.split(
        '/')[-1] + '-' + args.model + '-' + str(best_acc) + '-' + str(
            args.lr) + '-' + str(args.weight_decay) + '-' + str(
                args.optimizer_string) + '-' + str(args.batch_size)

    if "test" in args.modes.lower():
        print("Load test data...")
        # Get test folder names
        test_folder_names = [
            x for x in os.listdir(args.test_data_path)
            if os.path.isdir(os.path.join(args.test_data_path, x))
        ]

        # Switch to eval mode
        model = build_model(args, gpus)
        model = load_model(args, model, 9999)
        model.eval()

        # Make test dataloader
        print("Making test dataloaders...")
        test_loader = make_dataloader(test_folder_names, args.test_data_path,
                                      args.batch_size, args.task, 'test')

        # Run the dataloader through the neural network
        print("Conducting a test...")
        _, _, outputs, logits = runner.test(test_loader, args.batch_size)

        # Write the submission to CSV
        print("Writing a submission to \"csvs/{}.csv\"...".format(save_path))

        if args.task == 2:
            with open('csvs/' + save_path + '.csv', 'w') as sub:
                sub.write('guid/image/axis,value\n')
                for name, val in outputs:
                    # Build path
                    mod_name = name.split('/')[5] + '/' + name.split(
                        '/')[6].split('_')[0]
                    x = val[0]
                    y = val[1]
                    z = val[2]

                    # Print and write row
                    sub.write(mod_name + '/x,' + str(x) + '\n')
                    sub.write(mod_name + '/y,' + str(y) + '\n')
                    sub.write(mod_name + '/z,' + str(z) + '\n')
            np.save('logits/' + save_path + '.npy',
                    np.array([l for p, l in logits]))

        else:
            print(
                "writing a submission to \"csvs/{}.csv\"...".format(save_path))
            with open('csvs/' + save_path + '.csv', 'w') as sub:
                sub.write('guid/image,label\n')
                for name, val in outputs:
                    # Build path
                    mod_name = name.split('/')[4] + '/' + name.split(
                        '/')[5].split('_')[0]
                    mod_val = int(list_mapping[int(np.argmax(val))])

                    # Print and write row
                    sub.write(mod_name + ',' + str(mod_val) + '\n')
            np.save('logits/' + save_path + '.npy',
                    np.array([l for p, l in logits]))

        # TODO average multiple logits results
        # This function loads these logits but they should be reshaped with .reshape(-1, 23)
        # test_logits = np.load('logits/'+save_path+'.npy')
        #print("0s: {}".format(str(np.count_nonzero(test_logits == 0.0))))
        #print("1s: {}".format(str(np.count_nonzero(test_logits == 1.0))))
        #print("2s: {}".format(str(np.count_nonzero(test_logits == 2.0))))
        print('Done!')
Ejemplo n.º 10
0
def train(data):
    print("Training model...")
    data.show_data_summary()
    save_data_name = data.model_dir + ".dset"
    data.save(save_data_name)
    if data.sentence_classification:
        model = SentClassifier(data)
    else:
        model = SeqLabel(data)
    # loss_function = nn.NLLLoss()
    if data.optimizer.lower() == "sgd":
        optimizer = optim.SGD(model.parameters(),
                              lr=data.HP_lr,
                              momentum=data.HP_momentum,
                              weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adagrad":
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=data.HP_lr,
                                  weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adadelta":
        optimizer = optim.Adadelta(model.parameters(),
                                   lr=data.HP_lr,
                                   weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "rmsprop":
        optimizer = optim.RMSprop(model.parameters(),
                                  lr=data.HP_lr,
                                  weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adam":
        optimizer = optim.Adam(model.parameters(),
                               lr=data.HP_lr,
                               weight_decay=data.HP_l2)
    else:
        print("Optimizer illegal: %s" % (data.optimizer))
        exit(1)
    best_dev = -10
    # data.HP_iteration = 1
    ## start training
    for idx in range(data.HP_iteration):
        epoch_start = time.time()
        temp_start = epoch_start
        print("Epoch: %s/%s" % (idx, data.HP_iteration))
        if data.optimizer == "SGD":
            optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr)
        instance_count = 0
        sample_id = 0
        sample_loss = 0
        total_loss = 0
        right_token = 0
        whole_token = 0
        random.shuffle(data.train_Ids)
        ## set model in train model
        model.train()
        model.zero_grad()
        batch_size = data.HP_batch_size
        batch_id = 0
        train_num = len(data.train_Ids)
        total_batch = train_num // batch_size + 1
        for batch_id in range(total_batch):
            start = batch_id * batch_size
            end = (batch_id + 1) * batch_size
            if end > train_num:
                end = train_num
            instance = data.train_Ids[start:end]
            if not instance:
                continue
            batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
                instance, data.HP_gpu, True, data.sentence_classification)
            instance_count += 1
            loss, tag_seq = model.neg_log_likelihood_loss(
                batch_word, batch_features, batch_wordlen, batch_char,
                batch_charlen, batch_charrecover, batch_label, mask)
            right, whole = predict_check(tag_seq, batch_label, mask,
                                         data.sentence_classification)
            right_token += right
            whole_token += whole
            # print("loss:",loss.item())
            sample_loss += loss.item()
            total_loss += loss.item()
            if end % 500 == 0:
                temp_time = time.time()
                temp_cost = temp_time - temp_start
                temp_start = temp_time
                print(
                    "     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"
                    % (end, temp_cost, sample_loss, right_token, whole_token,
                       (right_token + 0.) / whole_token))
                if sample_loss > 1e8 or str(sample_loss) == "nan":
                    print(
                        "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...."
                    )
                    exit(1)
                sys.stdout.flush()
                sample_loss = 0
            loss.backward()
            optimizer.step()
            model.zero_grad()
        temp_time = time.time()
        temp_cost = temp_time - temp_start
        print("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" %
              (end, temp_cost, sample_loss, right_token, whole_token,
               (right_token + 0.) / whole_token))

        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        print(
            "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s"
            % (idx, epoch_cost, train_num / epoch_cost, total_loss))
        print("totalloss:", total_loss)
        if total_loss > 1e8 or str(total_loss) == "nan":
            print(
                "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...."
            )
            exit(1)
        # continue
        speed, acc, p, r, f, _, _ = evaluate(data, model, "dev")
        dev_finish = time.time()
        dev_cost = dev_finish - epoch_finish

        if data.seg:
            current_score = f
            print(
                "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (dev_cost, speed, acc, p, r, f))
        else:
            current_score = acc
            print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" %
                  (dev_cost, speed, acc))

        if current_score > best_dev:
            if data.seg:
                print("Exceed previous best f score:", best_dev)
            else:
                print("Exceed previous best acc score:", best_dev)
            # model_name = data.model_dir +'.'+ str(idx) + ".model"
            model_name = data.model_dir + ".model"
            print("Save current best model in file:", model_name)
            torch.save(model.state_dict(), model_name)
            best_dev = current_score
        # ## decode test
        speed, acc, p, r, f, _, _ = evaluate(data, model, "test")
        test_finish = time.time()
        test_cost = test_finish - dev_finish
        if data.seg:
            print(
                "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (test_cost, speed, acc, p, r, f))
        else:
            print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" %
                  (test_cost, speed, acc))
        gc.collect()
Ejemplo n.º 11
0
model = resnet.ResNet18().cuda()

if args.method == 'sgd':
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9)
elif args.method == 'adam':
    optimizer = optim.Adam(model.parameters(), lr=args.lr)
elif args.method == 'hadam':
    import Hadam
    optimizer = Hadam.Hadam(model.parameters(),
                            lr=args.lr,
                            fraction=args.fraction,
                            eta=args.eta,
                            gamma=args.gamma,
                            bias_correction=args.bias_correction)
elif args.method == 'adagrad':
    optimizer = optim.Adagrad(model.parameters(), lr=args.lr)

model.train()

for epoch in range(start_epoch + 1, args.Nepoch + 1):
    for batch_idx, (data, target) in enumerate(train_loader):
        # Get Samples
        data, target = Variable(data).cuda(), Variable(target).cuda()
        # Init
        optimizer.zero_grad()

        # Predict
        y_pred = model(data)

        # Calculate loss
        loss = F.cross_entropy(y_pred, target)
Ejemplo n.º 12
0
def prepare(args):
    global trainloader
    global testloader
    global net
    global criterion
    global optimizer

    # Data
    print('==> Preparing data..')
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

    testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
    testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)

    #classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

    # Model
    print('==> Building model..')
    if args['model'] == 'vgg':
        net = VGG('VGG19')
    if args['model'] == 'resnet18':
        net = ResNet18()
    if args['model'] == 'googlenet':
        net = GoogLeNet()
    if args['model'] == 'densenet121':
        net = DenseNet121()
    if args['model'] == 'mobilenet':
        net = MobileNet()
    if args['model'] == 'dpn92':
        net = DPN92()
    if args['model'] == 'shufflenetg2':
        net = ShuffleNetG2()
    if args['model'] == 'senet18':
        net = SENet18()

    net = net.to(device)
    if device == 'cuda':
        net = torch.nn.DataParallel(net)
        cudnn.benchmark = True

    criterion = nn.CrossEntropyLoss()
    #optimizer = optim.SGD(net.parameters(), lr=args['lr'], momentum=0.9, weight_decay=5e-4)

    if args['optimizer'] == 'SGD':
        optimizer = optim.SGD(net.parameters(), lr=args['lr'], momentum=0.9, weight_decay=5e-4)
    if args['optimizer'] == 'Adadelta':
        optimizer = optim.Adadelta(net.parameters(), lr=args['lr'])
    if args['optimizer'] == 'Adagrad':
        optimizer = optim.Adagrad(net.parameters(), lr=args['lr'])
    if args['optimizer'] == 'Adam':
        optimizer = optim.Adam(net.parameters(), lr=args['lr'])
    if args['optimizer'] == 'Adamax':
        optimizer = optim.Adam(net.parameters(), lr=args['lr'])
Ejemplo n.º 13
0
    def init_optimizers(optimizer: str, model_named_parameters: Generator,
                        learning_rate: float, adam_epsilon: float,
                        weight_decay):
        """

        @param optimizer: parameter to choose the optimizer
        @param model_named_parameters: model parameters
        @param learning_rate: learning rate
        @param adam_epsilon: adam epsilon value
        @param weight_decay: weight decay
        @return: return optimizer
        """
        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [
                    p for n, p in model_named_parameters
                    if not any(nd in n for nd in no_decay)
                ],
                "weight_decay":
                weight_decay,
            },
            {
                "params": [
                    p for n, p in model_named_parameters
                    if any(nd in n for nd in no_decay)
                ],
                "weight_decay":
                0.0
            },
        ]
        if optimizer.lower() == "adamax":
            optimizer = optim.Adamax(optimizer_grouped_parameters,
                                     lr=learning_rate,
                                     eps=adam_epsilon)
        elif optimizer.lower() == "adamw":
            optimizer = AdamW(optimizer_grouped_parameters,
                              lr=learning_rate,
                              eps=adam_epsilon)
        elif optimizer.lower() == "adam":
            optimizer = optim.Adam(optimizer_grouped_parameters,
                                   lr=learning_rate,
                                   eps=adam_epsilon)

        elif optimizer.lower() == "radam":
            optimizer = RAdam(optimizer_grouped_parameters,
                              lr=learning_rate,
                              eps=adam_epsilon)

        elif optimizer.lower() == "adadelta":
            optimizer = optim.Adadelta(optimizer_grouped_parameters,
                                       lr=learning_rate,
                                       eps=adam_epsilon)

        elif optimizer.lower() == "adagrad":
            optimizer = optim.Adagrad(optimizer_grouped_parameters,
                                      lr=learning_rate,
                                      eps=adam_epsilon)
        else:
            optimizer = optim.SGD(optimizer_grouped_parameters,
                                  lr=learning_rate)
        return optimizer
Ejemplo n.º 14
0
def train(args,data,model):
    logger.info("Training modules...")
    model.show_model_summary(logger)
    print("Training Parameters:%s",args)

    if args.optimizer.lower() == "sgd":
        optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum,weight_decay=args.l2)
    elif args.optimizer.lower() == "adagrad":
        optimizer = optim.Adagrad(model.parameters(), lr=args.lr, weight_decay=args.l2)
    elif args.optimizer.lower() == "adadelta":
        optimizer = optim.Adadelta(model.parameters(), lr=args.lr, weight_decay=args.l2)
    elif args.optimizer.lower() == "rmsprop":
        optimizer = optim.RMSprop(model.parameters(), lr=args.lr, weight_decay=args.l2)
    elif args.optimizer.lower() == "adam":
        optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.l2)
    else:
        print("Optimizer illegal: %s"%(args.optimizer))
        exit(1)
    best_dev = 0

    ## start training
    for idx in range(args.iteration):
        epoch_start = time.time()
        temp_start = epoch_start
        #print("Epoch: %s/%s" %(idx,modules.iteration))
        if args.optimizer == "SGD":
            optimizer = lr_decay(optimizer, idx, args.lr_decay, args.lr)
        instance_count = 0
        sample_loss = 0
        total_loss = 0
        sample_whole_token = 0
        sample_H2B_high_right_token = 0
        sample_H2B_bot_right_token = 0
        sample_H2B_all_right_token = 0

        sample_B2H_high_right_token = 0
        sample_B2H_bot_right_token = 0
        sample_B2H_all_right_token = 0
        random.shuffle(data.train_Ids)

        model.train()
        model.zero_grad()
        batch_size = args.batch_size
        train_num = len(data.train_Ids)
        total_batch = train_num//batch_size+1

        for batch_id in range(total_batch):

            start = batch_id*batch_size
            end = (batch_id+1)*batch_size
            if end >train_num:
                end = train_num
            instance = data.train_Ids[start:end]
            if not instance:
                continue
            batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_hlabel,batch_llabel, mask  =\
                batchify_sequence_labeling_with_label(instance, args.gpu,args.max_sent_length,True)
            instance_count += 1

            if args.model == 'DUAL':
                H2BH_loss, H2BB_loss, B2HB_loss, B2HH_loss, H2BH_tag_seqs, H2BB_tag_seqs, B2HB_tag_seqs, B2HH_tag_seqs = model.calculate_loss(
                    batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_hlabel, batch_llabel,mask)

                H2B_whole, H2B_high_right, H2B_bot_right, H2B_all_right = predict_check(H2BH_tag_seqs, H2BB_tag_seqs,batch_hlabel, batch_llabel,mask)
                sample_whole_token += H2B_whole

                sample_H2B_high_right_token += H2B_high_right
                sample_H2B_bot_right_token += H2B_bot_right
                sample_H2B_all_right_token += H2B_all_right

                _, B2H_high_right, B2H_bot_right, B2H_all_right = predict_check(B2HH_tag_seqs, B2HB_tag_seqs,batch_hlabel, batch_llabel,mask)
                sample_B2H_high_right_token += B2H_high_right
                sample_B2H_bot_right_token += B2H_bot_right
                sample_B2H_all_right_token += B2H_all_right

                loss = args.H2BH*H2BH_loss + args.H2BB*H2BB_loss + args.B2HB*B2HB_loss + args.B2HH*B2HH_loss
            elif args.model == 'H2B':
                H2BH_loss, H2BB_loss, H2BH_tag_seqs, H2BB_tag_seqs = model.calculate_loss(batch_word, batch_wordlen,batch_char, batch_charlen,
                                                                                          batch_charrecover,batch_hlabel, batch_llabel,mask)
                H2B_whole, H2B_high_right, H2B_bot_right, H2B_all_right = predict_check(H2BH_tag_seqs, H2BB_tag_seqs,
                                                                                        batch_hlabel, batch_llabel,
                                                                                        mask)
                sample_whole_token += H2B_whole
                sample_H2B_high_right_token += H2B_high_right
                sample_H2B_bot_right_token += H2B_bot_right
                sample_H2B_all_right_token += H2B_all_right

                loss = args.H2BH * H2BH_loss + args.H2BB * H2BB_loss
            elif args.model == 'B2H':
                B2HB_loss, B2HH_loss, B2HB_tag_seqs, B2HH_tag_seqs = model.calculate_loss(batch_word, batch_wordlen,batch_char, batch_charlen,
                                                                                          batch_charrecover,batch_hlabel, batch_llabel,mask)

                B2H_whole, B2H_high_right, B2H_bot_right, B2H_all_right = predict_check(B2HH_tag_seqs, B2HB_tag_seqs,batch_hlabel, batch_llabel,mask)
                sample_whole_token += B2H_whole
                sample_B2H_high_right_token += B2H_high_right
                sample_B2H_bot_right_token += B2H_bot_right
                sample_B2H_all_right_token += B2H_all_right
                loss = args.B2HB * B2HB_loss + args.B2HH * B2HH_loss

            sample_loss += loss.item()
            total_loss += loss.item()
            #if end%(10*args.batch_size) == 0:
            if end % (10*args.batch_size) == 0:
                temp_time = time.time()
                temp_cost = temp_time - temp_start
                temp_start = temp_time
                print("     Instance: %s; Time: %.2fs; loss: %.4f;Token Num:%s ||| H2B Hacc:%.4f;Bacc: %.4f;"
                      "ALLacc:%.4f|||||B2H Hacc:%.4f;Bacc:%.4f;ALLacc:%.4f"
                      % (end, temp_cost, sample_loss, sample_whole_token,
                         (sample_H2B_high_right_token + 0.)/ sample_whole_token,(sample_H2B_bot_right_token + 0.)/ sample_whole_token,
                         (sample_H2B_all_right_token + 0.)/ sample_whole_token,(sample_B2H_high_right_token + 0.)/ sample_whole_token,
                         (sample_B2H_bot_right_token + 0.)/ sample_whole_token,(sample_B2H_all_right_token + 0.) / sample_whole_token))

                if sample_loss > 1e8 or str(sample_loss) == "nan":
                    print("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT....")
                    exit(1)
                sys.stdout.flush()
                sample_loss = 0

                sample_whole_token = 0
                sample_H2B_high_right_token = 0
                sample_H2B_bot_right_token = 0
                sample_H2B_all_right_token = 0

                sample_B2H_high_right_token = 0
                sample_B2H_bot_right_token = 0
                sample_B2H_all_right_token = 0

            loss.backward()
            if args.clip:
                torch.nn.utils.clip_grad_norm_(model.parameters(),args.clip)
            optimizer.step()
            model.zero_grad()
        temp_time = time.time()
        temp_cost = temp_time - temp_start
        print("     Instance: %s; Time: %.2fs; loss: %.4f;Token Num:%s ||| H2B Hacc:%.4f;Bacc: %.4f;"
              "ALLacc:%.4f|||||B2H Hacc:%.4f;Bacc:%.4f;ALLacc:%.4f"
              % (end, temp_cost, sample_loss, sample_whole_token,
                 (sample_H2B_high_right_token + 0.) / sample_whole_token,(sample_H2B_bot_right_token + 0.) / sample_whole_token,
                 (sample_H2B_all_right_token + 0.) / sample_whole_token,(sample_B2H_high_right_token + 0.) / sample_whole_token,
                 (sample_B2H_bot_right_token + 0.) / sample_whole_token,(sample_B2H_all_right_token + 0.) / sample_whole_token))

        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        logger.info("Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s"%(idx, epoch_cost, train_num/epoch_cost, total_loss))
        print("Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s" % (idx, epoch_cost, train_num / epoch_cost, total_loss))
        #print("totalloss:", total_loss)
        if total_loss > 1e8 or str(total_loss) == "nan":
            print("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT....")
            exit(1)

        # continue
        if args.model == 'DUAL':
            H2B_evals,B2H_evals, H2B_results,B2H_results= evaluate(data, model,logger, "dev",best_dev=best_dev)
            current_score = B2H_evals[2]
        elif args.model == 'H2B':
            H2B_evals, _,_,_ = evaluate(data, model,logger, "dev",best_dev=best_dev)
            current_score = H2B_evals[2]
        elif args.model == 'B2H':
            B2H_evals, _,_,_ = evaluate(data, model,logger, "dev",best_dev=best_dev)
            current_score = B2H_evals[2]

        if current_score > best_dev:
            print("New f score %f > previous %f ,Save current best modules in file:%s" % (current_score,best_dev,args.load_model_name))
            torch.save(model.state_dict(), args.load_model_name)
            best_dev = current_score
        gc.collect()
Ejemplo n.º 15
0
def train(data,
          name,
          save_dset,
          save_model_dir,
          seg=True,
          ignore=False,
          cove_flag=False):
    print('---Training model---')
    data.show_data_summary()
    save_data_name = save_dset
    save_data_setting(data, save_data_name)
    model = NER(data, cove_flag)
    if data.gpu:
        model = model.cuda()

    if data.optim.lower() == 'adam':
        optimizer = optim.Adam(model.parameters())
    elif data.optim.lower() == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters())
    elif data.optim.lower() == 'adadelta':
        optimizer = optim.Adadelta(model.parameters())
    elif data.optim.lower() == 'adagrad':
        optimizer = optim.Adagrad(model.parameters())
    elif data.optim.lower() == 'sgd':
        optimizer = optim.SGD(model.parameters(),
                              lr=data.lr,
                              momentum=data.momentum)
    else:
        optimizer = None
        print('Error optimizer selection, please check config.optim.')
        exit(1)

    best_dev = -1
    epoch = data.iteration
    vis = visdom.Visdom()
    losses = []
    all_F = [[0., 0., 0.]]
    dict_F = {}
    label_F = []
    for idx in range(epoch):
        epoch_start = time.time()
        tmp_start = epoch_start
        print('Epoch: %s/%s' % (idx, epoch))
        if data.optim.lower() == 'sgd':
            optimizer = lr_decay(optimizer, idx, data.lr_decay, data.lr)
        instance_count = 0
        sample_loss = 0
        total_loss = 0
        right_token = 0
        whole_token = 0
        random.shuffle(data.train_ids)
        model.train()
        batch_size = data.batch_size
        train_num = len(data.train_ids)
        total_batch = train_num // batch_size
        for batch_id in range(total_batch):
            model.zero_grad()
            start = batch_id * batch_size
            end = (batch_id + 1) * batch_size
            # if end > train_num:
            #     break
            #     #end = train_num
            instance = data.train_ids[start:end]
            # if not instance:
            #     continue
            batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
                instance, data.gpu)
            instance_count += 1
            loss, tag_seq = model.neg_log_likelihood_loss(
                batch_word, batch_wordlen, batch_char, batch_charlen,
                batch_charrecover, batch_label, mask)
            right, whole = predict_check(tag_seq, batch_label, mask)
            right_token += right
            whole_token += whole
            sample_loss += loss.data[0]
            total_loss += loss.data[0]

            if end % 500 == 0:
                tmp_time = time.time()
                tmp_cost = tmp_time - tmp_start
                tmp_start = tmp_time
                print(
                    '\tInstance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f'
                    % (end, tmp_cost, sample_loss, right_token, whole_token,
                       (right_token + 0.0) / whole_token))
                sys.stdout.flush()
                losses.append(sample_loss / 500.0)
                Lwin = 'Loss of ' + name
                vis.line(np.array(losses),
                         X=np.array([i for i in range(len(losses))]),
                         win=Lwin,
                         opts={
                             'title': Lwin,
                             'legend': ['loss']
                         })
                sample_loss = 0
            loss.backward()
            if data.clip:
                torch.nn.utils.clip_grad_norm(model.parameters(), 10.0)
            optimizer.step()
        # tmp_time = time.time()
        # tmp_cost = tmp_time - tmp_start
        # print('\tInstance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f'
        #       % (end, tmp_cost, sample_loss, right_token, whole_token, (right_token+0.0) / whole_token))
        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        print(
            'Epoch: %s training finished. Time: %.2fs, speed: %.2ft/s, total_loss: %s'
            % (idx, epoch_cost, train_num / epoch_cost, total_loss))
        speed, acc, p, r, f_dev, dict_dev = evaluate(data,
                                                     model,
                                                     'dev',
                                                     ignore=ignore)

        dev_finish = time.time()
        dev_cost = dev_finish - epoch_finish

        if seg:
            current_score = f_dev
            print(
                'Dev: time: %.2fs, speed: %.2ft/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f'
                % (dev_cost, speed, acc, p, r, f_dev))
        else:
            current_score = acc
            print('Dev: time: %.2fs, speed: %.2ft/s; acc: %.4f' %
                  (dev_cost, speed, acc))
        if current_score > best_dev:
            if seg:
                print('Exceed previous best f score: ', best_dev)
            else:
                print('Exceed previous best acc score: ', best_dev)
            model_name = save_model_dir + '/' + name
            torch.save(model.state_dict(), model_name)
            best_dev = current_score
            with open(
                    save_model_dir + '/' + name + '_eval_' + str(idx) + '.txt',
                    'w') as f:
                if seg:
                    f.write('acc: %.4f, p: %.4f, r: %.4f, f: %.4f' %
                            (acc, p, r, best_dev))
                    f.write('acc: %.4f, p: %.4f' % (acc, p))
                else:
                    f.write('acc: %.4f' % acc)

        speed, acc, p, r, f_test, dict_test = evaluate(data,
                                                       model,
                                                       'test',
                                                       ignore=ignore)
        test_finish = time.time()
        test_cost = test_finish - epoch_finish

        if seg:
            print(
                'Test: time: %.2fs, speed: %.2ft/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f'
                % (test_cost, speed, acc, p, r, f_test))
        else:
            print('Test: time: %.2fs, speed: %.2ft/s; acc: %.4f' %
                  (test_cost, speed, acc))

        speed, acc, p, r, f_train, dict_train = evaluate(data,
                                                         model,
                                                         'train',
                                                         ignore=ignore)

        all_F.append([f_train * 100.0, f_dev * 100.0, f_test * 100.0])
        Fwin = 'F1-score of ' + name + ' {train, dev, test}'
        vis.line(np.array(all_F),
                 X=np.array([i for i in range(len(all_F))]),
                 win=Fwin,
                 opts={
                     'title': Fwin,
                     'legend': ['train', 'dev', 'test']
                 })

        if dict_train:
            for key, value in dict_train.items():
                if key not in label_F:
                    dict_F[key] = [[0., 0., 0.]]
                    label_F.append(key)
                dict_F[key].append([
                    dict_train[key] * 100.0, dict_dev[key] * 100.0,
                    dict_test[key] * 100.0
                ])
                Fwin = 'F1-score of ' + name + '_' + key + ' {train, dev, test}'
                vis.line(np.array(dict_F[key]),
                         X=np.array([i for i in range(len(dict_F[key]))]),
                         win=Fwin,
                         opts={
                             'title': Fwin,
                             'legend': ['train', 'dev', 'test']
                         })
        gc.collect()
Ejemplo n.º 16
0
def main(args):
    np.random.seed(args.seed)
    th.manual_seed(args.seed)
    th.cuda.manual_seed(args.seed)

    cuda = args.gpu >= 0
    device = th.device('cuda:{}'.format(
        args.gpu)) if cuda else th.device('cpu')
    if cuda:
        th.cuda.set_device(args.gpu)

    trainset = data.SST()
    train_loader = DataLoader(dataset=trainset,
                              batch_size=args.batch_size,
                              collate_fn=batcher(device),
                              shuffle=True,
                              num_workers=0)

    model = TreeLSTM(trainset.num_vocabs,
                     args.x_size,
                     args.h_size,
                     trainset.num_classes,
                     args.dropout,
                     cell_type='childsum' if args.child_sum else 'nary',
                     pretrained_emb=trainset.pretrained_emb).to(device)
    print(model)
    params_ex_emb = [
        x for x in list(model.parameters())
        if x.requires_grad and x.size(0) != trainset.num_vocabs
    ]
    params_emb = list(model.embedding.parameters())

    optimizer = optim.Adagrad([{
        'params': params_ex_emb,
        'lr': args.lr,
        'weight_decay': args.weight_decay
    }, {
        'params': params_emb,
        'lr': 0.1 * args.lr
    }])

    for epoch in range(args.epochs):
        model.train()
        count = 0
        t_epoch = time.time()
        for step, batch in enumerate(train_loader):
            g = batch.graph
            n = g.number_of_nodes()
            h = th.zeros((n, args.h_size)).to(device)
            c = th.zeros((n, args.h_size)).to(device)

            logits = model(batch, h, c)
            logp = F.log_softmax(logits, 1)
            loss = F.nll_loss(logp, batch.label, reduction='elementwise_mean')
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            count += 1

            if cuda:
                th.cuda.synchronize()

        t_epoch_end = time.time()

        print('Epoch {:05d} batch {} training time {:.4f}s'.format(
            epoch, count, t_epoch_end - t_epoch))
Ejemplo n.º 17
0
def test_optimizer(data):
    print('---Test Optimizers---')
    model_SGD = NER(data)
    model_Adam = NER(data)
    model_RMSprop = NER(data)
    model_Adadelta = NER(data)
    model_Adagrad = NER(data)

    if data.gpu:
        model_SGD = model_SGD.cuda()
        model_Adam = model_Adam.cuda()
        model_RMSprop = model_RMSprop.cuda()
        model_Adadelta = model_Adadelta.cuda()
        model_Adagrad = model_Adagrad.cuda()

    optimizer_SGD = optim.SGD(model_SGD.parameters(),
                              lr=data.lr,
                              momentum=data.momentum)
    optimizer_Adam = optim.Adam(model_Adam.parameters())
    optimizer_RMSprop = optim.RMSprop(model_RMSprop.parameters())
    optimizer_Adadelta = optim.Adadelta(model_Adadelta.parameters())
    optimizer_Adagrad = optim.Adagrad(model_Adagrad.parameters())

    epoch = data.iteration
    vis = visdom.Visdom()
    losses = []
    train_F = [[0., 0., 0., 0., 0.]]
    dev_F = [[0., 0., 0., 0., 0.]]
    test_F = [[0., 0., 0., 0., 0.]]
    for idx in range(epoch):
        epoch_start = time.time()
        print('Epoch: %s/%s' % (idx, epoch))

        optimizer_SGD = lr_decay(optimizer_SGD, idx, data.lr_decay, data.lr)
        instance_count = 0
        sample_loss_SGD = 0
        sample_loss_Adam = 0
        sample_loss_RMSprop = 0
        sample_loss_Adadelta = 0
        sample_loss_Adagrad = 0
        random.shuffle(data.train_ids)

        model_SGD.train()
        model_Adam.train()
        model_RMSprop.train()
        model_Adadelta.train()
        model_Adagrad.train()
        model_SGD.zero_grad()
        model_Adam.zero_grad()
        model_RMSprop.zero_grad()
        model_Adadelta.zero_grad()
        model_Adagrad.zero_grad()

        batch_size = data.batch_size
        train_num = len(data.train_ids)
        total_batch = train_num // batch_size + 1
        for batch_id in range(total_batch):
            start = batch_id * batch_size
            end = (batch_id + 1) * batch_size
            if end > train_num:
                end = train_num
            instance = data.train_ids[start:end]
            if not instance:
                continue
            batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
                instance, data.gpu)
            instance_count += 1
            loss_SGD, tag_seq_SGD = model_SGD.neg_log_likelihood_loss(
                batch_word, batch_wordlen, batch_char, batch_charlen,
                batch_charrecover, batch_label, mask)
            loss_Adam, tag_seq_Adam = model_Adam.neg_log_likelihood_loss(
                batch_word, batch_wordlen, batch_char, batch_charlen,
                batch_charrecover, batch_label, mask)
            loss_RMSprop, tag_seq_RMSprop = model_RMSprop.neg_log_likelihood_loss(
                batch_word, batch_wordlen, batch_char, batch_charlen,
                batch_charrecover, batch_label, mask)
            loss_Adadelta, tag_seq_Adadelta = model_Adadelta.neg_log_likelihood_loss(
                batch_word, batch_wordlen, batch_char, batch_charlen,
                batch_charrecover, batch_label, mask)
            loss_Adagrad, tag_seq_Adagrad = model_Adagrad.neg_log_likelihood_loss(
                batch_word, batch_wordlen, batch_char, batch_charlen,
                batch_charrecover, batch_label, mask)

            sample_loss_SGD += loss_SGD.data[0]
            sample_loss_Adam += loss_Adam.data[0]
            sample_loss_RMSprop += loss_RMSprop.data[0]
            sample_loss_Adadelta += loss_Adadelta.data[0]
            sample_loss_Adagrad += loss_Adagrad.data[0]

            if end % 500 == 0:
                sys.stdout.flush()
                losses.append([
                    sample_loss_SGD / 50.0, sample_loss_Adam / 50.0,
                    sample_loss_RMSprop / 50.0, sample_loss_Adadelta / 50.0,
                    sample_loss_Adagrad / 50.0
                ])
                Lwin = 'Loss of Optimizers'
                vis.line(np.array(losses),
                         X=np.array([i for i in range(len(losses))]),
                         win=Lwin,
                         opts={
                             'title':
                             Lwin,
                             'legend':
                             ['SGD', 'Adam', 'RMSprop', 'Adadelta', 'Adagrad']
                         })
                sample_loss_SGD = 0
                sample_loss_Adam = 0
                sample_loss_RMSprop = 0
                sample_loss_Adadelta = 0
                sample_loss_Adagrad = 0
            loss_SGD.backward()
            loss_Adam.backward()
            loss_RMSprop.backward()
            loss_Adadelta.backward()
            loss_Adagrad.backward()
            # if data.clip:
            #     torch.nn.utils.clip_grad_norm(model.parameters(), 10.0)
            optimizer_SGD.step()
            optimizer_Adam.step()
            optimizer_RMSprop.step()
            optimizer_Adadelta.step()
            optimizer_Adagrad.step()
            model_SGD.zero_grad()
            model_Adam.zero_grad()
            model_RMSprop.zero_grad()
            model_Adadelta.zero_grad()
            model_Adagrad.zero_grad()

        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        print('Epoch: %s training finished. Time: %.2fs, speed: %.2ft/s' %
              (idx, epoch_cost, train_num / epoch_cost))

        speed, acc, p, r, f_train_SGD, _ = evaluate(data, model_SGD, 'train')
        speed, acc, p, r, f_train_Adam, _ = evaluate(data, model_Adam, 'train')
        speed, acc, p, r, f_train_RMSprop, _ = evaluate(
            data, model_RMSprop, 'train')
        speed, acc, p, r, f_train_Adadelta, _ = evaluate(
            data, model_Adadelta, 'train')
        speed, acc, p, r, f_train_Adagrad, _ = evaluate(
            data, model_Adagrad, 'train')

        train_F.append([
            f_train_SGD * 100, f_train_Adam * 100, f_train_RMSprop * 100,
            f_train_Adadelta * 100, f_train_Adagrad * 100
        ])
        train_Fwin = 'F1-score of Optimizers{train}'
        vis.line(np.array(train_F),
                 X=np.array([i for i in range(len(train_F))]),
                 win=train_Fwin,
                 opts={
                     'title': train_Fwin,
                     'legend':
                     ['SGD', 'Adam', 'RMSprop', 'Adadelta', 'Adagrad']
                 })

        speed, acc, p, r, f_dev_SGD, _ = evaluate(data, model_SGD, 'dev')
        speed, acc, p, r, f_dev_Adam, _ = evaluate(data, model_Adam, 'dev')
        speed, acc, p, r, f_dev_RMSprop, _ = evaluate(data, model_RMSprop,
                                                      'dev')
        speed, acc, p, r, f_dev_Adadelta, _ = evaluate(data, model_Adadelta,
                                                       'dev')
        speed, acc, p, r, f_dev_Adagrad, _ = evaluate(data, model_Adagrad,
                                                      'dev')

        dev_F.append([
            f_dev_SGD * 100, f_dev_Adam * 100, f_dev_RMSprop * 100,
            f_dev_Adadelta * 100, f_dev_Adagrad * 100
        ])
        dev_Fwin = 'F1-score of Optimizers{dev}'
        vis.line(np.array(dev_F),
                 X=np.array([i for i in range(len(dev_F))]),
                 win=dev_Fwin,
                 opts={
                     'title': dev_Fwin,
                     'legend':
                     ['SGD', 'Adam', 'RMSprop', 'Adadelta', 'Adagrad']
                 })

        speed, acc, p, r, f_test_SGD, _ = evaluate(data, model_SGD, 'test')
        speed, acc, p, r, f_test_Adam, _ = evaluate(data, model_Adam, 'test')
        speed, acc, p, r, f_test_RMSprop, _ = evaluate(data, model_RMSprop,
                                                       'test')
        speed, acc, p, r, f_test_Adadelta, _ = evaluate(
            data, model_Adadelta, 'test')
        speed, acc, p, r, f_test_Adagrad, _ = evaluate(data, model_Adagrad,
                                                       'test')

        test_F.append([
            f_test_SGD * 100, f_test_Adam * 100, f_test_RMSprop * 100,
            f_test_Adadelta * 100, f_test_Adagrad * 100
        ])
        test_Fwin = 'F1-score of Optimizers{test}'
        vis.line(np.array(test_F),
                 X=np.array([i for i in range(len(test_F))]),
                 win=test_Fwin,
                 opts={
                     'title': test_Fwin,
                     'legend':
                     ['SGD', 'Adam', 'RMSprop', 'Adadelta', 'Adagrad']
                 })
        gc.collect()
Ejemplo n.º 18
0
    margin = 4
    use_rank_weight = True
    lr = 0.2
    user_negs_n = 5000
    n_negative = 10
    topk = 5
    train1_pd, test1_pd, test2_pd, test3_pd, test4_pd, most_popular_items, n_users, n_items = movielens(
        'datasets/ml/ratings.csv')
    n_users = int(n_users)
    n_items = int(n_items)
    network = model_R.KVMRN(dim=dim,
                            n_users=n_users,
                            n_items=n_items,
                            memory_size=memory_size)
    network = network.cuda()
    optimizer = optim.Adagrad(network.parameters(), lr=lr)

    # valid_users = valid_pd['user'].sample(1000).values

    test_pds = [test1_pd, test2_pd, test3_pd, test4_pd]
    # test_pds = [test1_pd]
    train_pd = train1_pd
    previous_test_pd = train1_pd

    for test_part, test_pd in enumerate(test_pds):
        train_users = train_pd['user'].values
        train_items = train_pd['item'].values
        all_users_in_train = set(list(train_users))
        all_items_in_train = set(list(train_items))
        user_to_train_set = dict()
        user_to_test_set = dict()
Ejemplo n.º 19
0
    def __init__(self,
                 input_size,
                 bert_input_size,
                 inference_type="zeroshot",
                 num_topics=10,
                 model_type='prodLDA',
                 hidden_sizes=(100, 100),
                 activation='softplus',
                 dropout=0.2,
                 learn_priors=True,
                 batch_size=64,
                 lr=2e-3,
                 momentum=0.99,
                 solver='adam',
                 num_epochs=100,
                 num_samples=10,
                 reduce_on_plateau=False,
                 topic_prior_mean=0.0,
                 topic_prior_variance=None,
                 num_data_loader_workers=0):
        """
        :param input_size: int, dimension of input
        :param bert_input_size: int, dimension of input that comes from BERT embeddings
        :param inference_type: string, you can choose between the contextual model and the combined model
        :param num_topics: int, number of topic components, (default 10)
        :param model_type: string, 'prodLDA' or 'LDA' (default 'prodLDA')
        :param hidden_sizes: tuple, length = n_layers, (default (100, 100))
        :param activation: string, 'softplus', 'relu', 'sigmoid', 'swish', 'tanh', 'leakyrelu', 'rrelu', 'elu',
         'selu' (default 'softplus')
        :param dropout: float, dropout to use (default 0.2)
        :param learn_priors: bool, make priors a learnable parameter (default True)
        :param batch_size: int, size of batch to use for training (default 64)
        :param lr: float, learning rate to use for training (default 2e-3)
        :param momentum: float, momentum to use for training (default 0.99)
        :param solver: string, optimizer 'adam' or 'sgd' (default 'adam')
        :param num_samples: int, number of times theta needs to be sampled
        :param num_epochs: int, number of epochs to train for, (default 100)
        :param reduce_on_plateau: bool, reduce learning rate by 10x on plateau of 10 epochs (default False)
        :param num_data_loader_workers: int, number of data loader workers (default cpu_count). set it to 0 if you are using Windows
        """

        assert isinstance(input_size, int) and input_size > 0, \
            "input_size must by type int > 0."
        assert isinstance(num_topics, int) and input_size > 0, \
            "num_topics must by type int > 0."
        assert model_type in ['LDA', 'prodLDA'], \
            "model must be 'LDA' or 'prodLDA'."
        assert isinstance(hidden_sizes, tuple), \
            "hidden_sizes must be type tuple."
        assert activation in ['softplus', 'relu', 'sigmoid', 'swish', 'tanh', 'leakyrelu',
                              'rrelu', 'elu', 'selu'], \
            "activation must be 'softplus', 'relu', 'sigmoid', 'swish', 'leakyrelu'," \
            " 'rrelu', 'elu', 'selu' or 'tanh'."
        assert dropout >= 0, "dropout must be >= 0."
        # assert isinstance(learn_priors, bool), "learn_priors must be boolean."
        assert isinstance(batch_size, int) and batch_size > 0, \
            "batch_size must be int > 0."
        assert lr > 0, "lr must be > 0."
        assert isinstance(momentum, float) and momentum > 0 and momentum <= 1, \
            "momentum must be 0 < float <= 1."
        assert solver in ['adagrad', 'adam', 'sgd', 'adadelta', 'rmsprop'], \
            "solver must be 'adam', 'adadelta', 'sgd', 'rmsprop' or 'adagrad'"
        assert isinstance(reduce_on_plateau, bool), \
            "reduce_on_plateau must be type bool."
        assert isinstance(topic_prior_mean, float), \
            "topic_prior_mean must be type float"
        # and topic_prior_variance >= 0, \
        # assert isinstance(topic_prior_variance, float), \
        #    "topic prior_variance must be type float"

        self.input_size = input_size
        self.num_topics = num_topics
        self.model_type = model_type
        self.hidden_sizes = hidden_sizes
        self.activation = activation
        self.dropout = dropout
        self.learn_priors = learn_priors
        self.batch_size = batch_size
        self.lr = lr
        self.num_samples = num_samples
        self.bert_size = bert_input_size
        self.momentum = momentum
        self.solver = solver
        self.num_epochs = num_epochs
        self.reduce_on_plateau = reduce_on_plateau
        self.num_data_loader_workers = num_data_loader_workers
        self.topic_prior_mean = topic_prior_mean
        self.topic_prior_variance = topic_prior_variance
        # init inference avitm network
        self.model = DecoderNetwork(input_size, self.bert_size, inference_type,
                                    num_topics, model_type, hidden_sizes,
                                    activation, dropout, self.learn_priors,
                                    self.topic_prior_mean,
                                    self.topic_prior_variance)
        self.early_stopping = EarlyStopping(patience=5, verbose=False)
        # init optimizer
        if self.solver == 'adam':
            self.optimizer = optim.Adam(self.model.parameters(),
                                        lr=lr,
                                        betas=(self.momentum, 0.99))
        elif self.solver == 'sgd':
            self.optimizer = optim.SGD(self.model.parameters(),
                                       lr=lr,
                                       momentum=self.momentum)
        elif self.solver == 'adagrad':
            self.optimizer = optim.Adagrad(self.model.parameters(), lr=lr)
        elif self.solver == 'adadelta':
            self.optimizer = optim.Adadelta(self.model.parameters(), lr=lr)
        elif self.solver == 'rmsprop':
            self.optimizer = optim.RMSprop(self.model.parameters(),
                                           lr=lr,
                                           momentum=self.momentum)
        # init lr scheduler
        if self.reduce_on_plateau:
            self.scheduler = ReduceLROnPlateau(self.optimizer, patience=10)

        # performance attributes
        self.best_loss_train = float('inf')

        # training attributes
        self.model_dir = None
        self.train_data = None
        self.nn_epoch = None

        # learned topics
        self.best_components = None

        # Use cuda if available
        if torch.cuda.is_available():
            self.USE_CUDA = True
        else:
            self.USE_CUDA = False
        if self.USE_CUDA:
            self.model = self.model.cuda()
Ejemplo n.º 20
0
def main():
    global args
    args = parse_args()
    # global logger
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)
    formatter = logging.Formatter(
        "[%(asctime)s] %(levelname)s:%(name)s:%(message)s")
    # file logger
    fh = logging.FileHandler(os.path.join(args.save, args.expname) + '.log',
                             mode='w')
    fh.setLevel(logging.INFO)
    fh.setFormatter(formatter)
    logger.addHandler(fh)
    # console logger
    ch = logging.StreamHandler()
    ch.setLevel(logging.DEBUG)
    ch.setFormatter(formatter)
    logger.addHandler(ch)
    # argument validation
    args.cuda = args.cuda and torch.cuda.is_available()
    device = torch.device("cuda:0" if args.cuda else "cpu")
    if args.sparse and args.wd != 0:
        logger.error('Sparsity and weight decay are incompatible, pick one!')
        exit()
    logger.debug(args)
    torch.manual_seed(args.seed)
    random.seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)
        torch.backends.cudnn.benchmark = True
    if not os.path.exists(args.save):
        os.makedirs(args.save)

    train_dir = os.path.join(args.data, 'train/')
    dev_dir = os.path.join(args.data, 'dev/')
    test_dir = os.path.join(args.data, 'test/')

    # write unique words from all token files
    sick_vocab_file = os.path.join(args.data, 'sick.vocab')
    if not os.path.isfile(sick_vocab_file):
        token_files_b = [
            os.path.join(split, 'b.toks')
            for split in [train_dir, dev_dir, test_dir]
        ]
        token_files_a = [
            os.path.join(split, 'a.toks')
            for split in [train_dir, dev_dir, test_dir]
        ]
        token_files = token_files_a + token_files_b
        sick_vocab_file = os.path.join(args.data, 'sick.vocab')
        utils.build_vocab(token_files, sick_vocab_file)

    # get vocab object from vocab file previously written
    vocab = Vocab(filename=sick_vocab_file,
                  data=[
                      Constants.PAD_WORD, Constants.UNK_WORD,
                      Constants.BOS_WORD, Constants.EOS_WORD
                  ])
    logger.debug('==> SICK vocabulary size : %d ' % vocab.size())

    # load SICK dataset splits
    train_file = os.path.join(args.data, 'sick_train.pth')
    if os.path.isfile(train_file):
        train_dataset = torch.load(train_file)
    else:
        train_dataset = SICKDataset(train_dir, vocab, args.num_classes)
        torch.save(train_dataset, train_file)
    logger.debug('==> Size of train data   : %d ' % len(train_dataset))
    dev_file = os.path.join(args.data, 'sick_dev.pth')
    if os.path.isfile(dev_file):
        dev_dataset = torch.load(dev_file)
    else:
        dev_dataset = SICKDataset(dev_dir, vocab, args.num_classes)
        torch.save(dev_dataset, dev_file)
    logger.debug('==> Size of dev data     : %d ' % len(dev_dataset))
    test_file = os.path.join(args.data, 'sick_test.pth')
    if os.path.isfile(test_file):
        test_dataset = torch.load(test_file)
    else:
        test_dataset = SICKDataset(test_dir, vocab, args.num_classes)
        torch.save(test_dataset, test_file)
    logger.debug('==> Size of test data    : %d ' % len(test_dataset))

    # initialize model, criterion/loss_function, optimizer
    model = ABCNN(vocab.size(), args.input_dim, args.mem_dim, args.hidden_dim,
                  args.num_classes, args.sparse, args.freeze_embed)
    criterion = nn.KLDivLoss()

    # for words common to dataset vocab and GLOVE, use GLOVE vectors
    # for other words in dataset vocab, use random normal vectors
    emb_file = os.path.join(args.data, 'sick_embed.pth')
    if os.path.isfile(emb_file):
        emb = torch.load(emb_file, map_location='cpu')  #改
    else:
        # load glove embeddings and vocab
        glove_vocab, glove_emb = utils.load_word_vectors(
            os.path.join(args.glove, 'glove.840B.300d'))
        logger.debug('==> GLOVE vocabulary size: %d ' % glove_vocab.size())
        emb = torch.zeros(vocab.size(),
                          glove_emb.size(1),
                          dtype=torch.float,
                          device=device)
        emb.normal_(0, 0.05)
        # zero out the embeddings for padding and other special words if they are absent in vocab
        for idx, item in enumerate([
                Constants.PAD_WORD, Constants.UNK_WORD, Constants.BOS_WORD,
                Constants.EOS_WORD
        ]):
            emb[idx].zero_()
        for word in vocab.labelToIdx.keys():
            if glove_vocab.getIndex(word):
                emb[vocab.getIndex(word)] = glove_emb[glove_vocab.getIndex(
                    word)]
        torch.save(emb, emb_file)
    # plug these into embedding matrix inside model
    model.emb.weight.data.copy_(emb)

    model.to(device), criterion.to(device)
    if args.optim == 'adam':
        optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                      model.parameters()),
                               lr=args.lr,
                               weight_decay=args.wd)
    elif args.optim == 'adagrad':
        optimizer = optim.Adagrad(filter(lambda p: p.requires_grad,
                                         model.parameters()),
                                  lr=args.lr,
                                  weight_decay=args.wd)
    elif args.optim == 'sgd':
        optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                     model.parameters()),
                              lr=args.lr,
                              weight_decay=args.wd)
    metrics = Metrics(args.num_classes)

    # create trainer object for training and testing
    trainer = Trainer(args, model, criterion, optimizer, device)

    best = -float('inf')
    for epoch in range(args.epochs):
        train_loss = trainer.train(train_dataset)
        train_loss, train_pred = trainer.test(train_dataset)
        dev_loss, dev_pred = trainer.test(dev_dataset)
        test_loss, test_pred = trainer.test(test_dataset)

        train_pearson = metrics.pearson(train_pred, train_dataset.labels)
        train_mse = metrics.mse(train_pred, train_dataset.labels)
        logger.info(
            '==> Epoch {}, Train \tLoss: {}\tPearson: {}\tMSE: {}'.format(
                epoch, train_loss, train_pearson, train_mse))
        dev_pearson = metrics.pearson(dev_pred, dev_dataset.labels)
        dev_mse = metrics.mse(dev_pred, dev_dataset.labels)
        logger.info(
            '==> Epoch {}, Dev \tLoss: {}\tPearson: {}\tMSE: {}'.format(
                epoch, dev_loss, dev_pearson, dev_mse))
        test_pearson = metrics.pearson(test_pred, test_dataset.labels)
        test_mse = metrics.mse(test_pred, test_dataset.labels)
        logger.info(
            '==> Epoch {}, Test \tLoss: {}\tPearson: {}\tMSE: {}'.format(
                epoch, test_loss, test_pearson, test_mse))

        if best < test_pearson:
            best = test_pearson
            checkpoint = {
                'model': trainer.model.state_dict(),
                'optim': trainer.optimizer,
                'pearson': test_pearson,
                'mse': test_mse,
                'args': args,
                'epoch': epoch
            }
            logger.debug(
                '==> New optimum found, checkpointing everything now...')
            torch.save(checkpoint,
                       '%s.pt' % os.path.join(args.save, args.expname))
Ejemplo n.º 21
0
 def configure_optimizers(self):
     # Adagrad creates state tensors immediately, model is not yet on GPU.
     return optim.Adagrad(self.parameters())
Ejemplo n.º 22
0
def main():
    import argparse

    fmt_class = argparse.ArgumentDefaultsHelpFormatter
    parser = argparse.ArgumentParser(formatter_class=fmt_class)
    group = parser.add_argument_group('Data')
    group.add_argument('dataset',
                       choices=sorted(DATASETS.keys()),
                       help='dataset to be used')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        help='disables CUDA training')
    parser.add_argument('--seed', type=int, default=0, help='random seed')

    group = parser.add_argument_group('Semantic Loss')
    parser.add_argument('-a',
                        '--alpha',
                        type=float,
                        default=1.0,
                        help='trade-off between losses')

    group = parser.add_argument_group('Neural Net')
    parser.add_argument('--n-epochs',
                        type=int,
                        default=100,
                        help='number of epochs to train')
    parser.add_argument('--batch-size',
                        type=int,
                        default=64,
                        help='batch size for training and evaluation')
    parser.add_argument('--lr', type=float, default=1.0, help='learning rate')
    parser.add_argument('--gamma',
                        type=float,
                        default=0.7,
                        help='Learning rate step gamma')
    args = parser.parse_args()

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    use_cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device('cuda' if use_cuda else 'cpu')

    # Load the data
    dataset = DATASETS[args.dataset]()
    indices = list(range(dataset.data.shape[0]))
    tr, ts = train_test_split(indices, test_size=0.2)
    tr_loader, ts_loader = nndt.dataset_to_loaders(dataset,
                                                   tr,
                                                   ts,
                                                   device,
                                                   batch_size=args.batch_size)

    # Build the semantic loss
    sl = nndt.DecisionTreeLoss(dataset).fit(dataset.data[tr],
                                            dataset.target[tr])
    sl.sync()

    # Build the neural net
    n_inputs = dataset.data.shape[1]
    net = nndt.FeedForwardNetwork(dataset, n_inputs).to(device)

    # Evaluate the NN+DT combo
    optimizer = optim.Adagrad(net.parameters(), lr=args.lr)
    scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
    for epoch in range(1, args.n_epochs + 1):
        nndt.train(net, device, tr_loader, optimizer, sl, args.alpha)
        label_loss, distillation_loss, n_correct = nndt.test(
            net, device, ts_loader, sl)
        print(
            f'{epoch} : ll={label_loss:5.3f} dl={distillation_loss:5.3f} acc={n_correct}'
        )
        scheduler.step()
Ejemplo n.º 23
0
def train():
    print("start")
    cmd = argparse.ArgumentParser(sys.argv[0], conflict_handler='resolve')
    cmd.add_argument('--seed', default=1, type=int, help='The random seed.')
    cmd.add_argument('--gpu',
                     default=-1,
                     type=int,
                     help='Use id of gpu, -1 if cpu.')
    cmd.add_argument('--cont',
                     default=1,
                     type=int,
                     help='if continue 1 load previous model'
                     )  ###################불러올려면 여기 저장

    cmd.add_argument('--train_path',
                     required=True,
                     help='The path to the training file.')
    cmd.add_argument('--valid_path', help='The path to the development file.')
    cmd.add_argument('--test_path', help='The path to the testing file.')

    cmd.add_argument('--config_path',
                     required=True,
                     help='the path to the config file.')
    cmd.add_argument("--word_embedding", help="The path to word vectors.")

    cmd.add_argument(
        '--optimizer',
        default='sgd',
        choices=['sgd', 'adam', 'adagrad'],
        help='the type of optimizer: valid options=[sgd, adam, adagrad]')
    cmd.add_argument("--lr",
                     type=float,
                     default=0.01,
                     help='the learning rate.')
    cmd.add_argument("--lr_decay",
                     type=float,
                     default=0,
                     help='the learning rate decay.')

    cmd.add_argument("--model", required=True, help="path to save model")

    cmd.add_argument("--batch_size",
                     "--batch",
                     type=int,
                     default=128,
                     help='the batch size.')
    cmd.add_argument("--max_epoch",
                     type=int,
                     default=100,
                     help='the maximum number of iteration.')

    cmd.add_argument("--clip_grad",
                     type=float,
                     default=5,
                     help='the tense of clipped grad.')

    cmd.add_argument('--max_sent_len',
                     type=int,
                     default=20,
                     help='maximum sentence length.')

    cmd.add_argument('--min_count',
                     type=int,
                     default=5,
                     help='minimum word count.')

    cmd.add_argument('--max_vocab_size',
                     type=int,
                     default=150000,
                     help='maximum vocabulary size.')

    cmd.add_argument('--save_classify_layer',
                     default=True,
                     action='store_true',
                     help="whether to save the classify layer")

    cmd.add_argument('--valid_size',
                     type=int,
                     default=0,
                     help="size of validation dataset when there's no valid.")
    cmd.add_argument('--eval_steps',
                     required=False,
                     type=int,
                     help='report every xx batches.')
    print("argment 들어감")
    opt = cmd.parse_args(sys.argv[2:])

    with open(opt.config_path, 'r') as fin:
        config = json.load(fin)

    # Dump configurations
    print(opt)
    print(config)

    # set seed.
    torch.manual_seed(opt.seed)
    random.seed(opt.seed)
    if opt.gpu >= 0:
        torch.cuda.set_device(opt.gpu)
        if opt.seed > 0:
            torch.cuda.manual_seed(opt.seed)
    print("Gpu 셋팅 끝")
    use_cuda = opt.gpu >= 0 and torch.cuda.is_available()

    token_embedder_name = config['token_embedder']['name'].lower()
    token_embedder_max_chars = config['token_embedder'].get(
        'max_characters_per_token', None)
    '''
  if token_embedder_name == 'cnn':
    train_data = read_corpus_yield(opt.train_path, token_embedder_max_chars, opt.max_sent_len)
  elif token_embedder_name == 'lstm':
    train_data = read_corpus(opt.train_path, opt.max_sent_len)
  else:
    raise ValueError('Unknown token embedder name: {}'.format(token_embedder_name))
  '''
    #logging.info('training instance: {}, training tokens: {}.'.format(len(train_data),
    #                                                                 sum([len(s) - 1 for s in train_data])))
    print("read corpus 끝")

    if opt.valid_path is not None:
        if token_embedder_name == 'cnn':
            valid_data = read_corpus(opt.valid_path, token_embedder_max_chars,
                                     opt.max_sent_len)
        elif token_embedder_name == 'lstm':
            valid_data = read_corpus(opt.valid_path, opt.max_sent_len)
        else:
            raise ValueError(
                'Unknown token embedder name: {}'.format(token_embedder_name))
        logging.info('valid instance: {}, valid tokens: {}.'.format(
            len(valid_data), sum([len(s) - 1 for s in valid_data])))
    elif opt.valid_size > 0:
        train_data, valid_data = divide(train_data, opt.valid_size)
        logging.info(
            'training instance: {}, training tokens after division: {}.'.
            format(len(train_data), sum([len(s) - 1 for s in train_data])))
        logging.info('valid instance: {}, valid tokens: {}.'.format(
            len(valid_data), sum([len(s) - 1 for s in valid_data])))
    else:
        valid_data = None

    if opt.test_path is not None:
        if token_embedder_name == 'cnn':
            test_data = read_corpus(opt.test_path, token_embedder_max_chars,
                                    opt.max_sent_len)
        elif token_embedder_name == 'lstm':
            test_data = read_corpus(opt.test_path, opt.max_sent_len)
        else:
            raise ValueError(
                'Unknown token embedder name: {}'.format(token_embedder_name))
        logging.info('testing instance: {}, testing tokens: {}.'.format(
            len(test_data), sum([len(s) - 1 for s in test_data])))
    else:
        test_data = None

    print("word임베딩 시작")
    if opt.word_embedding is not None:
        print("dhfdhkfdhkdhdfkdhgkssghksghgsk")
        embs = load_embedding(opt.word_embedding)
        word_lexicon = {word: i for i, word in enumerate(embs[0])}
    else:
        embs = None
        word_lexicon = {}
    '''
  # Maintain the vocabulary. vocabulary is used in either WordEmbeddingInput or softmax classification
  vocab = get_truncated_vocab(train_data, opt.min_count)
  # Ensure index of '<oov>' is 0
  for special_word in ['<oov>', '<bos>', '<eos>',  '<pad>']:
    if special_word not in word_lexicon:
      word_lexicon[special_word] = len(word_lexicon)

  for word, _ in vocab:
    if word not in word_lexicon:
      word_lexicon[word] = len(word_lexicon)
  
  # Word Embedding
  if config['token_embedder']['word_dim'] > 0:
    word_emb_layer = EmbeddingLayer(config['token_embedder']['word_dim'], word_lexicon, fix_emb=False, embs=embs)
    logging.info('Word embedding size: {0}'.format(len(word_emb_layer.word2id)))
  else:
    word_emb_layer = None
    logging.info('Vocabulary size: {0}'.format(len(word_lexicon)))
  print("word임베딩 끝 캐릭터 시작")
  # Character Lexicon
  if config['token_embedder']['char_dim'] > 0:
    char_lexicon = {}
    for sentence in train_data:
      for word in sentence:
        for ch in word:
          if ch not in char_lexicon:
            char_lexicon[ch] = len(char_lexicon)

    for special_char in ['<bos>', '<eos>', '<oov>', '<pad>', '<bow>', '<eow>']:
      if special_char not in char_lexicon:
        char_lexicon[special_char] = len(char_lexicon)

    char_emb_layer = EmbeddingLayer(config['token_embedder']['char_dim'], char_lexicon, fix_emb=False)
    logging.info('Char embedding size: {0}'.format(len(char_emb_layer.word2id)))
  else:
    char_lexicon = None
    char_emb_layer = None
  '''
    ''' 여기 바꿔줬다 그냥 로딩하는걸로..'''
    if config['token_embedder']['char_dim'] > 0:
        char_lexicon = {}
        with codecs.open(os.path.join(opt.model, 'char.dic'),
                         'r',
                         encoding='utf-8') as fpi:
            for line in fpi:
                tokens = line.strip().split('\t')
                if len(tokens) == 1:
                    tokens.insert(0, '\u3000')
                token, i = tokens
                char_lexicon[token] = int(i)
        char_emb_layer = EmbeddingLayer(config['token_embedder']['char_dim'],
                                        char_lexicon,
                                        fix_emb=False,
                                        embs=None)
        logging.info('char embedding size: ' +
                     str(len(char_emb_layer.word2id)))
    else:
        char_lexicon = None
        char_emb_layer = None

        # For the model trained with word form word encoder.
    if config['token_embedder']['word_dim'] > 0:
        word_lexicon = {}
        with codecs.open(os.path.join(opt.model, 'word.dic'),
                         'r',
                         encoding='utf-8') as fpi:
            for line in fpi:
                tokens = line.strip().split('\t')
                if len(tokens) == 1:
                    tokens.insert(0, '\u3000')
                token, i = tokens
                word_lexicon[token] = int(i)
        word_emb_layer = EmbeddingLayer(config['token_embedder']['word_dim'],
                                        word_lexicon,
                                        fix_emb=False,
                                        embs=None)
        logging.info('word embedding size: ' +
                     str(len(word_emb_layer.word2id)))
    else:
        word_lexicon = None
        word_emb_layer = None

    print("캐릭터 임베딩 끝 배치 시작")
    #train = create_batches(train_data, opt.batch_size, word_lexicon, char_lexicon, config, use_cuda=use_cuda)

    if opt.eval_steps is None:
        #opt.eval_steps = len(train[0])
        opt.eval_steps = 4096  #len(train_data)/opt.batch_size
    logging.info('Evaluate every {0} batches.'.format(opt.eval_steps))

    if valid_data is not None:
        valid = create_batches(valid_data,
                               opt.batch_size,
                               word_lexicon,
                               char_lexicon,
                               config,
                               sort=False,
                               shuffle=False,
                               use_cuda=use_cuda)
    else:
        valid = None

    if test_data is not None:
        test = create_batches(test_data,
                              opt.batch_size,
                              word_lexicon,
                              char_lexicon,
                              config,
                              sort=False,
                              shuffle=False,
                              use_cuda=use_cuda)
    else:
        test = None

    label_to_ix = word_lexicon
    logging.info('vocab size: {0}'.format(len(label_to_ix)))

    nclasses = len(label_to_ix)
    print("모델 만들자고 친구")
    model = Model(config, word_emb_layer, char_emb_layer, nclasses, use_cuda)
    logging.info(str(model))
    if use_cuda:
        model = model.cuda()

    if opt.cont == 1:
        print("모델 로드 했다!!")
        model.load_model(opt.model)
    print("옵티마이저 설정")
    need_grad = lambda x: x.requires_grad
    if opt.optimizer.lower() == 'adam':
        optimizer = optim.Adam(filter(need_grad, model.parameters()),
                               lr=opt.lr)
    elif opt.optimizer.lower() == 'sgd':
        optimizer = optim.SGD(filter(need_grad, model.parameters()), lr=opt.lr)
    elif opt.optimizer.lower() == 'adagrad':
        optimizer = optim.Adagrad(filter(need_grad, model.parameters()),
                                  lr=opt.lr)
    else:
        raise ValueError('Unknown optimizer {}'.format(opt.optimizer.lower()))

    print("디렉토리 만들자")
    try:
        os.makedirs(opt.model)
    except OSError as exception:
        if exception.errno != errno.EEXIST:
            raise
    print("딕셔너리 만들자구 친구")
    print(opt.cont)
    print(opt.model)
    """
  if config['token_embedder']['char_dim'] > 0:
    with codecs.open(os.path.join(opt.model, 'char.dic'), 'w', encoding='utf-8') as fpo:
      for ch, i in char_emb_layer.word2id.items():
        print('{0}\t{1}'.format(ch, i), file=fpo)

  with codecs.open(os.path.join(opt.model, 'word.dic'), 'w', encoding='utf-8') as fpo:
    for w, i in word_lexicon.items():
      print('{0}\t{1}'.format(w, i), file=fpo)
  """
    json.dump(
        vars(opt),
        codecs.open(os.path.join(opt.model, 'config.json'),
                    'w',
                    encoding='utf-8'))

    best_train = 1e+8
    best_valid = 1e+8
    test_result = 1e+8
    print("드디어 학습시작 시작")
    for epoch in range(opt.max_epoch):

        train_data = read_corpus_yield(opt.train_path,
                                       token_embedder_max_chars,
                                       opt.max_sent_len)
        train = create_batches(train_data,
                               opt.batch_size,
                               word_lexicon,
                               char_lexicon,
                               config,
                               use_cuda=use_cuda)
        best_train, best_valid, test_result = train_model(
            epoch, opt, model, optimizer, train, valid, test, best_train,
            best_valid, test_result)

        if opt.lr_decay > 0:
            optimizer.param_groups[0]['lr'] *= opt.lr_decay

    if valid_data is None:
        logging.info("best train ppl: {:.6f}.".format(best_train))
    elif test_data is None:
        logging.info("best train ppl: {:.6f}, best valid ppl: {:.6f}.".format(
            best_train, best_valid))
    else:
        logging.info(
            "best train ppl: {:.6f}, best valid ppl: {:.6f}, test ppl: {:.6f}."
            .format(best_train, best_valid, test_result))
Ejemplo n.º 24
0
def trainEpochs(decoder,
                n_epochs,
                print_every=1000,
                plot_every=100,
                learning_rate=0.01,
                total_batch=100,
                batch_size=1,
                penalty=(1, 0.5),
                gamma=0.1):
    start = time.time()

    plot_losses = []
    print_loss_total = 0
    plot_loss_total = 0

    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)

    #criterion = nn.CrossEntropyLoss()
    criterion = nn.MSELoss()

    scheduler = optim.lr_scheduler.StepLR(decoder_optimizer,
                                          step_size=2,
                                          gamma=gamma)
    iter = 0
    for epoch in range(1, n_epochs + 1):
        start, end = 0, batch_size

        if epoch > 5:
            decoder_optimizer = optim.Adagrad(decoder.parameters(),
                                              lr=learning_rate)
        #verbose = (iter % print_every == 0)
        while end <= total_batch:
            iter += 1
            target_tensor = torch.from_numpy(np.array(
                train_Y[start:end][:])).to(device).float()
            input_tensor = torch.from_numpy(np.array(
                train_X[start:end][:])).to(device).float()
            #target_tensor = torch.from_numpy(np.array(train_Y[num])).to(device).float()
            #input_tensor = Variable(input_tensor, requires_grad=True)
            #print(input_tensor.shape, target_tensor.shape, decoder)
            #print(decoder_optimizer, criterion)
            loss, decoder_output = train(input_tensor, target_tensor, decoder,
                                         decoder_optimizer, criterion)
            print_loss_total += loss
            if iter % print_every == 0:
                print_loss_avg = print_loss_total / print_every
                #print(decoder_output.view(-1).detach().cpu().numpy())
                #print(target_tensor)
                #print(decoder_optimizer)
                print(
                    "loss%i/%i:" % (iter, n_epochs *
                                    (total_batch // batch_size)),
                    print_loss_avg)
                print_loss_total = 0

                #training_progress = validation(decoder, train_X, train_Y)
                training_progress = (decoder_output.view(
                    batch_size, -1).cpu().detach().numpy(), train_Y[start:end])
                f = open(
                    '/home/yixing/Fischer/DeepPerformance/Bi-LSTM-CNN_batch_progress.pkl',
                    "wb")
                pickle.dump(training_progress, f)
                f.close()
                torch.save(
                    decoder.state_dict(),
                    '/home/yixing/Fischer/DeepPerformance/Bi-LSTM-CNN_batch_7L1.pt'
                )

            start += batch_size
            end += batch_size
        scheduler.step()
Ejemplo n.º 25
0
for param in net.cnn.parameters():
    param.requires_grad = True

# fc_params = list(map(id, net.cnn.fc.parameters()))
# base_params = list(filter(lambda p: id(p) not in fc_params, net.cnn.parameters()))
# optimizer = optim.Adagrad([{'params': base_params},
#                            {'params': net.cnn.fc.parameters(), 'lr': 0.005}
#                            ], lr=0.0005, weight_decay=0.005)
# start_epoch = 0
# optimizer = optim.Adam(net.cnn.fc.parameters(), weight_decay=0.0005)
# optimizer = torch.optim.SGD([
#     {'params': base_params},
#     {'params': net.cnn.fc.parameters(), 'lr': 1}
# ], lr=1e-4, momentum=0.9, weight_decay=0.0005)
from zeroshot.cub_test import zsl_test, gzsl_test
import copy

optimizer = optim.Adagrad(net.cnn.parameters(), lr=0.001, weight_decay=0.005)
for epoch in range(start_epoch, 500):
    train(epoch, net, optimizer)
    test(epoch, net)
    if epoch > 10:
        net1 = copy.deepcopy(net)
        zsl_test(epoch, net1, optimizer)
        del net1
        # net2 = copy.deepcopy(net)
        # gzsl_test(epoch, net2, optimizer)
        # del net2
log.close()
Ejemplo n.º 26
0
        class Classifier(nn.Module):
            def __init__(self):
                super(Classifier, self).__init__()
                self.FC = torch.nn.Sequential(
                    nn.Linear(Z_in, 1),
                    nn.Dropout(rate),
                    nn.Sigmoid())
            def forward(self, x):
                return self.FC(x)

        torch.cuda.manual_seed_all(42)

        AutoencoderE = AEE()

        solverE = optim.Adagrad(AutoencoderE.parameters(), lr=lrE)

        Clas = Classifier()
        SolverClass = optim.Adagrad(Clas.parameters(), lr=lrCL, weight_decay = wd)
        C_loss = torch.nn.BCELoss()

        for it in range(epoch):

            epoch_cost4 = 0
            epoch_cost3 = []
            num_minibatches = int(n_sampE / mb_size) 

            for i, (dataE, target) in enumerate(trainLoader):
                flag = 0
                AutoencoderE.train()
Ejemplo n.º 27
0
def test_learning_v3():
    embedding_size = 10
    batch_size = 16

    triples, hops = [], []

    for i in range(16):
        triples += [(f'a{i}', 'p', f'b{i}'), (f'b{i}', 'q', f'c{i}')]
        hops += [(f'a{i}', 'r', f'c{i}')]

    entity_lst = sorted({e
                         for (e, _, _) in triples + hops}
                        | {e
                           for (e, _, e) in triples + hops})
    predicate_lst = sorted({p for (_, p, _) in triples + hops})

    nb_entities, nb_predicates = len(entity_lst), len(predicate_lst)

    entity_to_index = {e: i for i, e in enumerate(entity_lst)}
    predicate_to_index = {p: i for i, p in enumerate(predicate_lst)}

    torch.manual_seed(0)

    kernel = GaussianKernel()

    entity_embeddings = nn.Embedding(nb_entities,
                                     embedding_size * 2,
                                     sparse=True)
    predicate_embeddings = nn.Embedding(nb_predicates,
                                        embedding_size * 2,
                                        sparse=True)

    fact_rel = torch.from_numpy(
        np.array([predicate_to_index[p] for (_, p, _) in triples]))
    fact_arg1 = torch.from_numpy(
        np.array([entity_to_index[s] for (s, _, _) in triples]))
    fact_arg2 = torch.from_numpy(
        np.array([entity_to_index[o] for (_, _, o) in triples]))
    facts = [fact_rel, fact_arg1, fact_arg2]

    model = NeuralKB(entity_embeddings=entity_embeddings,
                     predicate_embeddings=predicate_embeddings,
                     kernel=kernel,
                     facts=facts)

    reformulator = AttentiveReformulator(2, predicate_embeddings)
    hoppy = SimpleHoppy(model, entity_embeddings, hops=reformulator)

    N3_reg = N3()

    params = [
        p for p in hoppy.parameters()
        if not torch.equal(p, entity_embeddings.weight)
        and not torch.equal(p, predicate_embeddings.weight)
    ]

    loss_function = nn.CrossEntropyLoss(reduction='mean')

    p_emb = predicate_embeddings(
        torch.from_numpy(np.array([predicate_to_index['p']])))
    q_emb = predicate_embeddings(
        torch.from_numpy(np.array([predicate_to_index['q']])))
    # r_emb = predicate_embeddings(torch.from_numpy(np.array([predicate_to_index['r']])))

    optimizer = optim.Adagrad(params, lr=0.1)

    hops_data = []
    for i in range(128):
        hops_data += hops

    batches = make_batches(len(hops_data), batch_size)

    c, d = 0.0, 0.0

    for batch_start, batch_end in batches:
        hops_batch = hops_data[batch_start:batch_end]

        s_lst = [s for (s, _, _) in hops_batch]
        p_lst = [p for (_, p, _) in hops_batch]
        o_lst = [o for (_, _, o) in hops_batch]

        xs_np = np.array([entity_to_index[s] for s in s_lst])
        xp_np = np.array([predicate_to_index[p] for p in p_lst])
        xo_np = np.array([entity_to_index[o] for o in o_lst])

        xs = torch.from_numpy(xs_np)
        xp = torch.from_numpy(xp_np)
        xo = torch.from_numpy(xo_np)

        xs_emb = entity_embeddings(xs)
        xp_emb = predicate_embeddings(xp)
        xo_emb = entity_embeddings(xo)

        sp_scores, po_scores = hoppy.forward(xp_emb, xs_emb, xo_emb)

        loss = loss_function(sp_scores, xo) + loss_function(po_scores, xs)

        factors = [hoppy.factor(e) for e in [xp_emb, xs_emb, xo_emb]]
        loss += 0.1 * N3_reg(factors)

        tmp = hoppy.hops(xp_emb)
        hop_1_emb = tmp[0]
        hop_2_emb = tmp[1]

        c = kernel.pairwise(p_emb, hop_1_emb).mean().cpu().detach().numpy()
        d = kernel.pairwise(q_emb, hop_2_emb).mean().cpu().detach().numpy()

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    assert c > 0.95
    assert d > 0.95
Ejemplo n.º 28
0
Archivo: train.py Proyecto: zqxyz73/dgl
def main(args):
    np.random.seed(args.seed)
    th.manual_seed(args.seed)
    th.cuda.manual_seed(args.seed)

    best_epoch = -1
    best_dev_acc = 0

    cuda = args.gpu >= 0
    device = th.device('cuda:{}'.format(
        args.gpu)) if cuda else th.device('cpu')
    if cuda:
        th.cuda.set_device(args.gpu)

    trainset = SST()
    train_loader = DataLoader(dataset=trainset,
                              batch_size=args.batch_size,
                              collate_fn=batcher(device),
                              shuffle=True,
                              num_workers=0)
    devset = SST(mode='dev')
    dev_loader = DataLoader(dataset=devset,
                            batch_size=100,
                            collate_fn=batcher(device),
                            shuffle=False,
                            num_workers=0)

    testset = SST(mode='test')
    test_loader = DataLoader(dataset=testset,
                             batch_size=100,
                             collate_fn=batcher(device),
                             shuffle=False,
                             num_workers=0)

    model = TreeLSTM(trainset.num_vocabs,
                     args.x_size,
                     args.h_size,
                     trainset.num_classes,
                     args.dropout,
                     cell_type='childsum' if args.child_sum else 'nary',
                     pretrained_emb=trainset.pretrained_emb).to(device)
    print(model)
    params_ex_emb = [
        x for x in list(model.parameters())
        if x.requires_grad and x.size(0) != trainset.num_vocabs
    ]
    params_emb = list(model.embedding.parameters())

    for p in params_ex_emb:
        if p.dim() > 1:
            INIT.xavier_uniform_(p)

    optimizer = optim.Adagrad([{
        'params': params_ex_emb,
        'lr': args.lr,
        'weight_decay': args.weight_decay
    }, {
        'params': params_emb,
        'lr': 0.1 * args.lr
    }])

    dur = []
    for epoch in range(args.epochs):
        t_epoch = time.time()
        model.train()
        for step, batch in enumerate(train_loader):
            g = batch.graph
            n = g.number_of_nodes()
            h = th.zeros((n, args.h_size)).to(device)
            c = th.zeros((n, args.h_size)).to(device)
            if step >= 3:
                t0 = time.time()  # tik

            logits = model(batch, h, c)
            logp = F.log_softmax(logits, 1)
            loss = F.nll_loss(logp, batch.label, reduction='sum')

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if step >= 3:
                dur.append(time.time() - t0)  # tok

            if step > 0 and step % args.log_every == 0:
                pred = th.argmax(logits, 1)
                acc = th.sum(th.eq(batch.label, pred))
                root_ids = [
                    i for i in range(batch.graph.number_of_nodes())
                    if batch.graph.out_degree(i) == 0
                ]
                root_acc = np.sum(batch.label.cpu().data.numpy()[root_ids] ==
                                  pred.cpu().data.numpy()[root_ids])

                print(
                    "Epoch {:05d} | Step {:05d} | Loss {:.4f} | Acc {:.4f} | Root Acc {:.4f} | Time(s) {:.4f}"
                    .format(epoch, step, loss.item(),
                            1.0 * acc.item() / len(batch.label),
                            1.0 * root_acc / len(root_ids), np.mean(dur)))
        print('Epoch {:05d} training time {:.4f}s'.format(
            epoch,
            time.time() - t_epoch))

        # eval on dev set
        accs = []
        root_accs = []
        model.eval()
        for step, batch in enumerate(dev_loader):
            g = batch.graph
            n = g.number_of_nodes()
            with th.no_grad():
                h = th.zeros((n, args.h_size)).to(device)
                c = th.zeros((n, args.h_size)).to(device)
                logits = model(batch, h, c)

            pred = th.argmax(logits, 1)
            acc = th.sum(th.eq(batch.label, pred)).item()
            accs.append([acc, len(batch.label)])
            root_ids = [
                i for i in range(batch.graph.number_of_nodes())
                if batch.graph.out_degree(i) == 0
            ]
            root_acc = np.sum(batch.label.cpu().data.numpy()[root_ids] ==
                              pred.cpu().data.numpy()[root_ids])
            root_accs.append([root_acc, len(root_ids)])

        dev_acc = 1.0 * np.sum([x[0]
                                for x in accs]) / np.sum([x[1] for x in accs])
        dev_root_acc = 1.0 * np.sum([x[0] for x in root_accs]) / np.sum(
            [x[1] for x in root_accs])
        print("Epoch {:05d} | Dev Acc {:.4f} | Root Acc {:.4f}".format(
            epoch, dev_acc, dev_root_acc))

        if dev_root_acc > best_dev_acc:
            best_dev_acc = dev_root_acc
            best_epoch = epoch
            th.save(model.state_dict(), 'best_{}.pkl'.format(args.seed))
        else:
            if best_epoch <= epoch - 10:
                break

        # lr decay
        for param_group in optimizer.param_groups:
            param_group['lr'] = max(1e-5, param_group['lr'] * 0.99)  #10
            print(param_group['lr'])

    # test
    model.load_state_dict(th.load('best_{}.pkl'.format(args.seed)))
    accs = []
    root_accs = []
    model.eval()
    for step, batch in enumerate(test_loader):
        g = batch.graph
        n = g.number_of_nodes()
        with th.no_grad():
            h = th.zeros((n, args.h_size)).to(device)
            c = th.zeros((n, args.h_size)).to(device)
            logits = model(batch, h, c)

        pred = th.argmax(logits, 1)
        acc = th.sum(th.eq(batch.label, pred)).item()
        accs.append([acc, len(batch.label)])
        root_ids = [
            i for i in range(batch.graph.number_of_nodes())
            if batch.graph.out_degree(i) == 0
        ]
        root_acc = np.sum(batch.label.cpu().data.numpy()[root_ids] ==
                          pred.cpu().data.numpy()[root_ids])
        root_accs.append([root_acc, len(root_ids)])

    test_acc = 1.0 * np.sum([x[0]
                             for x in accs]) / np.sum([x[1] for x in accs])
    test_root_acc = 1.0 * np.sum([x[0] for x in root_accs]) / np.sum(
        [x[1] for x in root_accs])
    print(
        '------------------------------------------------------------------------------------'
    )
    print("Epoch {:05d} | Test Acc {:.4f} | Root Acc {:.4f}".format(
        best_epoch, test_acc, test_root_acc))
Ejemplo n.º 29
0
        super(FirstNet, self).__init__()
        self.size = size
        self.fc0 = nn.Linear(size, 20)
        self.fc1 = nn.Linear(20, 3)

    def forward(self, x):
        x = x.view(-1, self.size)
        x = F.relu(self.fc0(x))
        x = self.fc1(x)
        #x = F.relu(self.fc2(x))
        return F.log_softmax(x, dim=1)


model = FirstNet(X.shape[1])
print(model)
optimizer = optim.Adagrad(model.parameters(), lr=0.3)


def train(epoch, model):
    model.train()
    t_loss = 0
    correct = 0
    for batch_idx, (data, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        labels = labels.long()
        loss = F.nll_loss(output, labels)
        loss.backward()
        optimizer.step()
        pred = output.data.max(
            1, keepdim=True)[1]  # get the index of the max log-probability
Ejemplo n.º 30
0
epoch1 = 6
# optimizer = optim.Adagrad(optim_params, lr=0.001, weight_decay=0.005)
optimizer = optim.Adam(optim_params, weight_decay=0.005)
if start_epoch < epoch1:
    for epoch in range(start_epoch, epoch1):
        train(epoch, net, optimizer)
        test(epoch, net)
    start_epoch = epoch1

fc_params = list(map(id, net.fc2.parameters()))
base_params = list(filter(lambda p: id(p) not in fc_params, net.parameters()))

for param in base_params:
    param.requires_grad = True

optimizer = optim.Adagrad(base_params, lr=0.001, weight_decay=0.005)

from zeroshot.awa2_test import zsl_test
import copy

for epoch in range(start_epoch, 100):
    train(epoch, net, optimizer)
    test(epoch, net)
    if epoch > 6:
        net1 = copy.deepcopy(net)
        zsl_test(epoch, net1, optimizer)
        del net1
        # net2 = copy.deepcopy(net)
        # gzsl_test(epoch, net2, optimizer)
        # del net2
log.close()