예제 #1
0
def main():
    args = parse_arguments()
    # argument setting
    print("=== Argument Setting ===")
    print("src: " + args.src)
    print("tgt: " + args.tgt)
    print("seed: " + str(args.seed))
    print("train_seed: " + str(args.train_seed))
    print("model_type: " + str(args.model))
    print("max_seq_length: " + str(args.max_seq_length))
    print("batch_size: " + str(args.batch_size))
    print("pre_epochs: " + str(args.pre_epochs))
    print("num_epochs: " + str(args.num_epochs))
    print("AD weight: " + str(args.alpha))
    print("KD weight: " + str(args.beta))
    print("temperature: " + str(args.temperature))
    set_seed(args.train_seed)

    if args.model in ['roberta', 'distilroberta']:
        tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    else:
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    # preprocess data
    print("=== Processing datasets ===")
    if args.src in ['blog', 'airline', 'imdb']:
        src_x, src_y = CSV2Array(
            os.path.join('data', args.src, args.src + '.csv'))
    else:
        src_x, src_y = XML2Array(
            os.path.join('data', args.src, 'negative.review'),
            os.path.join('data', args.src, 'positive.review'))

    src_x, src_test_x, src_y, src_test_y = train_test_split(
        src_x, src_y, test_size=0.2, stratify=src_y, random_state=args.seed)

    if args.tgt in ['blog', 'airline', 'imdb']:
        tgt_x, tgt_y = CSV2Array(
            os.path.join('data', args.tgt, args.tgt + '.csv'))
    else:
        tgt_x, tgt_y = XML2Array(
            os.path.join('data', args.tgt, 'negative.review'),
            os.path.join('data', args.tgt, 'positive.review'))

    tgt_train_x, tgt_test_y, tgt_train_y, tgt_test_y = train_test_split(
        tgt_x, tgt_y, test_size=0.2, stratify=tgt_y, random_state=args.seed)

    if args.model in ['roberta', 'distilroberta']:
        src_features = roberta_convert_examples_to_features(
            src_x, src_y, args.max_seq_length, tokenizer)
        src_test_features = roberta_convert_examples_to_features(
            src_test_x, src_test_y, args.max_seq_length, tokenizer)
        tgt_features = roberta_convert_examples_to_features(
            tgt_x, tgt_y, args.max_seq_length, tokenizer)
        tgt_train_features = roberta_convert_examples_to_features(
            tgt_train_x, tgt_train_y, args.max_seq_length, tokenizer)
    else:
        src_features = convert_examples_to_features(src_x, src_y,
                                                    args.max_seq_length,
                                                    tokenizer)
        src_test_features = convert_examples_to_features(
            src_test_x, src_test_y, args.max_seq_length, tokenizer)
        tgt_features = convert_examples_to_features(tgt_x, tgt_y,
                                                    args.max_seq_length,
                                                    tokenizer)
        tgt_train_features = convert_examples_to_features(
            tgt_train_x, tgt_train_y, args.max_seq_length, tokenizer)

    # load dataset

    src_data_loader = get_data_loader(src_features, args.batch_size)
    src_data_eval_loader = get_data_loader(src_test_features, args.batch_size)
    tgt_data_train_loader = get_data_loader(tgt_train_features,
                                            args.batch_size)
    tgt_data_all_loader = get_data_loader(tgt_features, args.batch_size)

    # load models
    if args.model == 'bert':
        src_encoder = BertEncoder()
        tgt_encoder = BertEncoder()
        src_classifier = BertClassifier()
    elif args.model == 'distilbert':
        src_encoder = DistilBertEncoder()
        tgt_encoder = DistilBertEncoder()
        src_classifier = BertClassifier()
    elif args.model == 'roberta':
        src_encoder = RobertaEncoder()
        tgt_encoder = RobertaEncoder()
        src_classifier = RobertaClassifier()
    else:
        src_encoder = DistilRobertaEncoder()
        tgt_encoder = DistilRobertaEncoder()
        src_classifier = RobertaClassifier()
    discriminator = Discriminator()

    if args.load:
        src_encoder = init_model(args,
                                 src_encoder,
                                 restore=param.src_encoder_path)
        src_classifier = init_model(args,
                                    src_classifier,
                                    restore=param.src_classifier_path)
        tgt_encoder = init_model(args,
                                 tgt_encoder,
                                 restore=param.tgt_encoder_path)
        discriminator = init_model(args,
                                   discriminator,
                                   restore=param.d_model_path)
    else:
        src_encoder = init_model(args, src_encoder)
        src_classifier = init_model(args, src_classifier)
        tgt_encoder = init_model(args, tgt_encoder)
        discriminator = init_model(args, discriminator)

    # train source model
    print("=== Training classifier for source domain ===")
    if args.pretrain:
        src_encoder, src_classifier = pretrain(args, src_encoder,
                                               src_classifier, src_data_loader)

    # eval source model
    print("=== Evaluating classifier for source domain ===")
    evaluate(src_encoder, src_classifier, src_data_loader)
    evaluate(src_encoder, src_classifier, src_data_eval_loader)
    evaluate(src_encoder, src_classifier, tgt_data_all_loader)

    for params in src_encoder.parameters():
        params.requires_grad = False

    for params in src_classifier.parameters():
        params.requires_grad = False

    # train target encoder by GAN
    print("=== Training encoder for target domain ===")
    if args.adapt:
        tgt_encoder.load_state_dict(src_encoder.state_dict())
        tgt_encoder = adapt(args, src_encoder, tgt_encoder, discriminator,
                            src_classifier, src_data_loader,
                            tgt_data_train_loader, tgt_data_all_loader)

    # eval target encoder on lambda0.1 set of target dataset
    print("=== Evaluating classifier for encoded target domain ===")
    print(">>> source only <<<")
    evaluate(src_encoder, src_classifier, tgt_data_all_loader)
    print(">>> domain adaption <<<")
    evaluate(tgt_encoder, src_classifier, tgt_data_all_loader)
예제 #2
0
def main(args, f):
    # args = parse_arguments()
    set_seed(args.train_seed)

    if args.model in ['roberta', 'distilroberta']:
        tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    else:
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    # preprocess data
    src_eval_loader, src_loader, tgt_all_loader, tgt_train_loader = get_all_dataloader(
        args, tokenizer)

    # load models
    if args.model == 'bert':
        encoder = BertEncoder()
        src_encoder = BertEncoder()
        classifier = BertClassifier()
    elif args.model == 'distilbert':
        encoder = DistilBertEncoder()
        src_encoder = DistilBertEncoder()
        classifier = BertClassifier()
    elif args.model == 'roberta':
        encoder = RobertaEncoder()
        src_encoder = RobertaEncoder()
        classifier = RobertaClassifier()
    else:
        encoder = DistilRobertaEncoder()
        src_encoder = DistilRobertaEncoder()
        classifier = RobertaClassifier()

    # domain discriminator
    discriminator = AdversarialNetworkCdan(param.input_dim * param.num_labels,
                                           param.hidden_dim)

    # parallel models
    if torch.cuda.device_count() > 1:
        print('Let\'s use {} GPUs!'.format(torch.cuda.device_count()))
        encoder = nn.DataParallel(encoder)
        src_encoder = nn.DataParallel(src_encoder)
        classifier = nn.DataParallel(classifier)
        discriminator = nn.DataParallel(discriminator)

    if args.load:
        encoder = init_model(args,
                             encoder,
                             restore_path=param.src_encoder_path)
        src_encoder = init_model(args,
                                 src_encoder,
                                 restore_path=param.tgt_encoder_path)
        classifier = init_model(args,
                                classifier,
                                restore_path=param.src_classifier_path)
        # discriminator = init_model(args, discriminator, restore_path=param.d_model_path)
    else:
        encoder = init_model(args, encoder)
        src_encoder = init_model(args, src_encoder)
        classifier = init_model(args, classifier)

    discriminator = init_model(args, discriminator)

    # train source model
    print("=== Pretrain encoder for source domain ===")
    if args.pretrain:
        encoder, classifier = pretrain(args, encoder, classifier, src_loader)

    # eval source model
    print("=== Evaluating classifier for source domain ===")
    evaluate(args, encoder, classifier, src_loader)
    src_acc = evaluate(args, encoder, classifier, tgt_all_loader)
    f.write(f'{args.src} -> {args.tgt} no adapt acc on src data: {src_acc}\n')
    # x, y = save_features(args, encoder, src_loader)
    # np.savez(os.path.join(param.model_root, 's_feat_pretrain'), x, y)
    # x, y = save_features(args, encoder, tgt_all_loader)
    # np.savez(os.path.join(param.model_root, 't_feat_pretrain'), x, y)

    # adapt
    print("=== Adapt encoder for target domain ===")
    src_encoder.load_state_dict(encoder.state_dict())
    if args.src_free:
        # use the same encoder and copy encoder to src_encoder have different baseline results
        s_res_features = src_gmm(args, encoder, src_loader)
        src_loader = s_numpy_dataloader(s_res_features, args.batch_size)
        encoder, classifier = cdan_adapt_src_free(args, encoder, src_encoder,
                                                  discriminator, classifier,
                                                  src_loader, tgt_train_loader,
                                                  tgt_all_loader)
    elif args.data_free:
        s_res_features = src_gmm(args, encoder, src_loader)
        t_res_features = tgt_gmm(encoder, tgt_all_loader, 1)
        src_loader = s_numpy_dataloader(s_res_features, args.batch_size)
        tgt_train_loader = t_numpy_dataloader(t_res_features, args.batch_size)
        encoder, classifier = cdan_adapt_data_free(args, encoder,
                                                   discriminator, classifier,
                                                   src_loader,
                                                   tgt_train_loader,
                                                   tgt_all_loader)
    else:
        encoder, classifier = cdan_adapt(args, encoder, discriminator,
                                         classifier, src_loader,
                                         tgt_train_loader, tgt_all_loader)
    # x, y = save_features(args, encoder, src_loader)
    # np.savez(os.path.join(param.model_root, 's_feat_adapt_cdan'), x, y)
    # x, y = save_features(args, encoder, tgt_all_loader)
    # np.savez(os.path.join(param.model_root, 't_feat_adapt_cdan'), x, y)

    # argument setting
    print(
        f"model_type: {args.model}; batch_size: {args.batch_size}; data_free: {args.data_free}; "
        f"src_free: {args.src_free}; pre_epochs: {args.pre_epochs}; num_epochs: {args.num_epochs}; "
        f"src: {args.src}; tgt: {args.tgt}; kd: {args.kd}; dp: {args.dp}; ent: {args.ent}"
    )

    # eval target encoder on lambda0.1 set of target dataset
    print("=== Evaluating classifier for encoded target domain ===")
    print(">>> domain adaption <<<")
    tgt_acc = evaluate(args, encoder, classifier, tgt_all_loader)
    f.write(f'{args.src} -> {args.tgt}: DA acc on tgt data: {tgt_acc}\n')
    f.write(
        f"model_type: {args.model}; batch_size: {args.batch_size}; data_free: {args.data_free}; "
        f"src_free: {args.src_free}; pre_epochs: {args.pre_epochs}; num_epochs: {args.num_epochs}; "
        f"src: {args.src}; tgt: {args.tgt}; kd: {args.kd}; dp: {args.dp}; ent: {args.ent}\n\n"
    )
예제 #3
0
def main(args, f):
    set_seed(args.train_seed)
    if args.model in ['roberta', 'distilroberta']:
        tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    else:
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    # preprocess data
    src_eval_loader, src_loader, tgt_all_loader, tgt_train_loader, tgt_te = get_all_dataloader(
        args, tokenizer)

    # load models
    if args.model == 'bert':
        src_encoder = BertEncoder()
        # encoder = BertEncoder()
        classifier = BertClassifier()
    elif args.model == 'distilbert':
        src_encoder = DistilBertEncoder()
        # encoder = DistilBertEncoder()
        classifier = BertClassifier()
    elif args.model == 'roberta':
        src_encoder = RobertaEncoder()
        # encoder = RobertaEncoder()
        classifier = RobertaClassifier()
    else:
        src_encoder = DistilRobertaEncoder()
        # encoder = DistilRobertaEncoder()
        classifier = RobertaClassifier()
    discriminator = Discriminator()

    # parallel models
    if torch.cuda.device_count() > 1:
        print('Let\'s use {} GPUs!'.format(torch.cuda.device_count()))
        src_encoder = nn.DataParallel(src_encoder)
        classifier = nn.DataParallel(classifier)
        # encoder = nn.DataParallel(encoder)
        discriminator = nn.DataParallel(discriminator)

    if args.load:
        src_encoder = init_model(args,
                                 src_encoder,
                                 restore_path=param.src_encoder_path)
        classifier = init_model(args,
                                classifier,
                                restore_path=param.src_classifier_path)
        # encoder = init_model(args, encoder, restore_path=param.tgt_encoder_path)
        # discriminator = init_model(args, discriminator, restore_path=param.d_model_path)
    else:
        src_encoder = init_model(args, src_encoder)
        classifier = init_model(args, classifier)

    # encoder = init_model(args, encoder)
    discriminator = init_model(args, discriminator)

    # train source model
    if args.pretrain:
        print("=== Training classifier for source domain ===")
        src_encoder, classifier = pretrain(args, src_encoder, classifier,
                                           src_loader)

        # save pretrained model
        # save_model(args, src_encoder, param.src_encoder_path)
        # save_model(args, classifier, param.src_classifier_path)

    # eval source model
    print("=== Evaluating classifier for source domain ===")
    evaluate(args, src_encoder, classifier, src_loader)
    src_acc = evaluate(args, src_encoder, classifier, tgt_all_loader)
    f.write(f'{args.src} -> {args.tgt}: No adapt acc on src data: {src_acc}\n')

    # adapt
    print("=== Adapt tgt encoder ===")
    # encoder.load_state_dict(src_encoder.state_dict())
    # if args.src_free:
    # s_res_features = src_gmm(args, src_encoder, src_loader)
    # src_loader = s_numpy_dataloader(s_res_features, args.batch_size)
    # encoder = aad_adapt_src_free(args, src_encoder, encoder, discriminator,
    #                                  classifier, src_loader, tgt_train_loader, tgt_all_loader)
    # else:
    if args.adapt:
        encoder, classifier = shot_adapt(args, src_encoder, classifier,
                                         tgt_train_loader, tgt_all_loader,
                                         tgt_te)

    # save_model(args, encoder, param.tgt_encoder_path)

    # argument setting
    # print("=== Argument Setting ===")
    print(
        f"model_type: {args.model}; max_seq_len: {args.max_seq_length}; batch_size: {args.batch_size}; "
        f"pre_epochs: {args.pre_epochs}; num_epochs: {args.num_epochs}; src: {args.src}; tgt: {args.tgt}; "
        f'src_free: {args.src_free}; dp: {args.dp}')

    # eval target encoder on lambda0.1 set of target dataset
    print("=== Evaluating classifier for encoded target domain ===")
    print(">>> domain adaption <<<")
    tgt_acc = evaluate(args, encoder, classifier, tgt_all_loader)
    f.write(f'{args.src} -> {args.tgt}: DA acc on tgt data: {tgt_acc}\n')
    f.write(
        f"model_type: {args.model}; batch_size: {args.batch_size}; pre_epochs: {args.pre_epochs}; "
        f"num_epochs: {args.num_epochs}; src_free: {args.src_free}; src: {args.src}; "
        f"tgt: {args.tgt}; dp: {args.dp}\n\n")
예제 #4
0
  def model_fn(features, labels, mode, params):
    # features name and shape
    _info('*** Features ****')
    for name in sorted(features.keys()):
      tf.logging.info(' name = {}, shape = {}'.format(name, features[name].shape))

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    # get data
    input_x = features['input_x']
    input_mask = features['input_mask']
    if is_training:
      input_y = features['input_y']
      seq_length = features['seq_length']
    else:
      input_y = None
      seq_length = None

    # build encoder
    model = BertEncoder(
      config=cg.BertEncoderConfig,
      is_training=is_training,
      input_ids=input_x,
      input_mask=input_mask)
    embedding_table = model.get_embedding_table()
    encoder_output = tf.reduce_sum(model.get_sequence_output(), axis=1)

    # build decoder
    decoder_model = Decoder(
      config=cg.DecoderConfig,
      is_training=is_training,
      encoder_state=encoder_output,
      embedding_table=embedding_table,
      decoder_intput_data=input_y,
      seq_length_decoder_input_data=seq_length)
    logits, sample_id, ppl_seq, ppl = decoder_model.get_decoder_output()

    if mode == tf.estimator.ModeKeys.PREDICT:
      predictions = {'sample_id': sample_id, 'ppls': ppl_seq}
      output_spec = tf.estimator.EstimatorSpec(mode, predictions=predictions)
    else:
      if mode == tf.estimator.ModeKeys.TRAIN:
        max_time = ft.get_shape_list(labels, expected_rank=2)[1]
        target_weights = tf.sequence_mask(seq_length, max_time, dtype=logits.dtype)
        batch_size = tf.cast(ft.get_shape_list(labels, expected_rank=2)[0], tf.float32)

        loss = tf.reduce_sum(
          tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits) * target_weights) / batch_size

        learning_rate = tf.train.polynomial_decay(cg.learning_rate,
                                          tf.train.get_or_create_global_step(),
                                          cg.train_steps / 100,
                                          end_learning_rate=1e-4,
                                          power=1.0,
                                          cycle=False)

        lr = tf.maximum(tf.constant(cg.lr_limit), learning_rate)
        optimizer = tf.train.AdamOptimizer(lr, name='optimizer')
        tvars = tf.trainable_variables()
        gradients = tf.gradients(loss, tvars, colocate_gradients_with_ops=cg.colocate_gradients_with_ops)
        clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
        train_op = optimizer.apply_gradients(zip(clipped_gradients, tvars), global_step=tf.train.get_global_step())


        # this is excellent, because it could display the result each step, i.e., each step equals to batch_size.
        # the output_spec, display the result every save checkpoints step.
        logging_hook = tf.train.LoggingTensorHook({'loss' : loss, 'ppl': ppl, 'lr': lr}, every_n_iter=cg.print_info_interval)

        output_spec = tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op, training_hooks=[logging_hook])
      elif mode == tf.estimator.ModeKeys.EVAL:
        # TODO
        raise NotImplementedError
    
    return output_spec
예제 #5
0
def main(args):
    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # Image preprocessing
    train_transform = transforms.Compose([
        transforms.RandomCrop(args.image_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406),
                             (0.229, 0.224, 0.225))])

    # val_transform = transforms.Compose([
    #     transforms.Resize(args.image_size, interpolation=Image.LANCZOS),
    #     transforms.ToTensor(),
    #     transforms.Normalize((0.485, 0.456, 0.406),
    #                          (0.229, 0.224, 0.225))])

    # Load vocabulary wrapper.
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)
    # Build data loader
    train_data_loader = get_loader(args.train_image_dir, args.train_vqa_path, args.ix_to_ans_file, args.train_description_file, vocab, train_transform, args.batch_size, shuffle=True, num_workers=args.num_workers)
    #val_data_loader = get_loader(args.val_image_dir, args.val_vqa_path, args.ix_to_ans_file, vocab, val_transform, args.batch_size, shuffle=False, num_workers=args.num_workers)

    image_encoder = ImageEncoder(args.img_feature_size)
    question_emb_size = 1024
    # description_emb_size = 512
    no_ans = 1000
    question_encoder = BertEncoder(question_emb_size)
    # ques_description_encoder = BertEncoder(description_emb_size)
    # vqa_decoder = VQA_Model(args.img_feature_size, question_emb_size, description_emb_size, no_ans)
    vqa_decoder = VQA_Model(args.img_feature_size, question_emb_size, no_ans)
    
    

    pretrained_epoch = 0
    if args.pretrained_epoch > 0:
        pretrained_epoch = args.pretrained_epoch
        image_encoder.load_state_dict(torch.load('./models/image_encoder-' + str(pretrained_epoch) + '.pkl'))
        question_encoder.load_state_dict(torch.load('./models/question_encoder-' + str(pretrained_epoch) + '.pkl'))
        # ques_description_encoder.load_state_dict(torch.load('./models/ques_description_encoder-' + str(pretrained_epoch) + '.pkl'))
        vqa_decoder.load_state_dict(torch.load('./models/vqa_decoder-' + str(pretrained_epoch) + '.pkl'))

    if torch.cuda.is_available():
        image_encoder.cuda()
        question_encoder.cuda()
        # ques_description_encoder.cuda()
        vqa_decoder.cuda()
        print("Cuda is enabled...")

    criterion = nn.CrossEntropyLoss()
    # params = image_encoder.get_params() + question_encoder.get_params() + ques_description_encoder.get_params() + vqa_decoder.get_params()
    params = list(image_encoder.parameters()) + list(question_encoder.parameters())  + list(vqa_decoder.parameters())
    #print("params: ", params)
    optimizer = torch.optim.Adam(params, lr=args.learning_rate, weight_decay=args.weight_decay)
    total_train_step = len(train_data_loader)

    min_avg_loss = float("inf")
    overfit_warn = 0

    for epoch in range(args.num_epochs):
        if epoch < pretrained_epoch:
            continue

        image_encoder.train()
        question_encoder.train()
        #ques_description_encoder.train()
        vqa_decoder.train()
        avg_loss = 0.0
        avg_acc = 0.0
        for bi, (question_arr, image_vqa, target_answer, answer_str) in enumerate(train_data_loader):
            loss = 0
            image_encoder.zero_grad()
            question_encoder.zero_grad()
            #ques_description_encoder.zero_grad()
            vqa_decoder.zero_grad()
            
            images = to_var(torch.stack(image_vqa))    
            question_arr = to_var(torch.stack(question_arr))
            #ques_desc_arr = to_var(torch.stack(ques_desc_arr))
            target_answer = to_var(torch.tensor(target_answer))

            image_emb = image_encoder(images)
            question_emb = question_encoder(question_arr)
            #ques_desc_emb = ques_description_encoder(ques_desc_arr)
            #output = vqa_decoder(image_emb, question_emb, ques_desc_emb)
            output = vqa_decoder(image_emb, question_emb)
            
            loss = criterion(output, target_answer)

            _, prediction = torch.max(output,1)
            no_correct_prediction = prediction.eq(target_answer).sum().item()
            accuracy = no_correct_prediction * 100/ args.batch_size

            ####
            target_answer_no = target_answer.tolist()
            prediction_no = prediction.tolist()
            ####
            loss_num = loss.item()
            avg_loss += loss.item()
            avg_acc += no_correct_prediction
            #loss /= (args.batch_size)
            loss.backward()
            optimizer.step()

            # Print log info
            if bi % args.log_step == 0:
                print('Epoch [%d/%d], Train Step [%d/%d], Loss: %.4f, Acc: %.4f'
                      %(epoch + 1, args.num_epochs, bi, total_train_step, loss.item(), accuracy))
            
        avg_loss /= (args.batch_size * total_train_step)
        avg_acc /= (args.batch_size * total_train_step)
        print('Epoch [%d/%d], Average Train Loss: %.4f, Average Train acc: %.4f' %(epoch + 1, args.num_epochs, avg_loss, avg_acc))

        # Save the models
        
        torch.save(image_encoder.state_dict(), os.path.join(args.model_path, 'image_encoder-%d.pkl' %(epoch+1)))
        torch.save(question_encoder.state_dict(), os.path.join(args.model_path, 'question_encoder-%d.pkl' %(epoch+1)))
        #torch.save(ques_description_encoder.state_dict(), os.path.join(args.model_path, 'ques_description_encoder-%d.pkl' %(epoch+1)))
        torch.save(vqa_decoder.state_dict(), os.path.join(args.model_path, 'vqa_decoder-%d.pkl' %(epoch+1)))

        overfit_warn = overfit_warn + 1 if (min_avg_loss < avg_loss) else 0
        min_avg_loss = min(min_avg_loss, avg_loss)
        lossFileName = "result/result_"+str(epoch)+".txt"
        test_fd = open(lossFileName, 'w')
        test_fd.write('Epoch: '+ str(epoch) + ' avg_loss: ' + str(avg_loss)+ " avg_acc: "+ str(avg_acc)+"\n")
        test_fd.close()

        if overfit_warn >= 5:
            print("terminated as overfitted")
            break
예제 #6
0
파일: main.py 프로젝트: xiyanghu/bert-DANN
def main():
    args = parse_arguments()
    # argument setting
    print("=== Argument Setting ===")
    print("src: " + args.src)
    print("tgt: " + args.tgt)
    print("alpha: " + str(args.alpha))
    print("seed: " + str(args.seed))
    print("train_seed: " + str(args.train_seed))
    print("model_type: " + str(args.model))
    print("max_seq_length: " + str(args.max_seq_length))
    print("batch_size: " + str(args.batch_size))
    print("num_epochs: " + str(args.num_epochs))
    set_seed(args.train_seed)

    if args.model == 'roberta':
        tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    else:
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    # preprocess data
    print("=== Processing datasets ===")
    if args.src == 'blog':
        src_x, src_y = CSV2Array(os.path.join('data', args.src, 'blog.csv'))

    elif args.src == 'airline':
        src_x, src_y = CSV2Array(os.path.join('data', args.src, 'airline.csv'))

    else:
        src_x, src_y = XML2Array(
            os.path.join('data', args.src, 'negative.review'),
            os.path.join('data', args.src, 'positive.review'))

    src_x, src_test_x, src_y, src_test_y = train_test_split(
        src_x, src_y, test_size=0.2, stratify=src_y, random_state=args.seed)

    if args.tgt == 'blog':
        tgt_x, tgt_y = CSV2Array(os.path.join('data', args.tgt, 'blog.csv'))

    elif args.tgt == 'airline':
        tgt_x, tgt_y = CSV2Array(os.path.join('data', args.tgt, 'airline.csv'))
    else:
        tgt_x, tgt_y = XML2Array(
            os.path.join('data', args.tgt, 'negative.review'),
            os.path.join('data', args.tgt, 'positive.review'))

    tgt_train_x, _, tgt_train_y, _ = train_test_split(tgt_x,
                                                      tgt_y,
                                                      test_size=0.2,
                                                      stratify=tgt_y,
                                                      random_state=args.seed)

    if args.model == 'roberta':
        src_features = roberta_convert_examples_to_features(
            src_x, src_y, args.max_seq_length, tokenizer)
        src_test_features = roberta_convert_examples_to_features(
            src_test_x, src_test_y, args.max_seq_length, tokenizer)
        tgt_features = roberta_convert_examples_to_features(
            tgt_train_x, tgt_train_y, args.max_seq_length, tokenizer)
        tgt_all_features = roberta_convert_examples_to_features(
            tgt_x, tgt_y, args.max_seq_length, tokenizer)
    else:
        src_features = convert_examples_to_features(src_x, src_y,
                                                    args.max_seq_length,
                                                    tokenizer)
        src_test_features = convert_examples_to_features(
            src_test_x, src_test_y, args.max_seq_length, tokenizer)
        tgt_features = convert_examples_to_features(tgt_train_x, tgt_train_y,
                                                    args.max_seq_length,
                                                    tokenizer)
        tgt_all_features = convert_examples_to_features(
            tgt_x, tgt_y, args.max_seq_length, tokenizer)

    # load dataset

    src_data_loader = get_data_loader(src_features, args.batch_size)
    src_data_loader_eval = get_data_loader(src_test_features, args.batch_size)
    tgt_data_loader = get_data_loader(tgt_features, args.batch_size)
    tgt_data_loader_all = get_data_loader(tgt_all_features, args.batch_size)

    # load models
    if args.model == 'bert':
        encoder = BertEncoder()
        cls_classifier = BertClassifier()
        dom_classifier = DomainClassifier()
    elif args.model == 'distilbert':
        encoder = DistilBertEncoder()
        cls_classifier = BertClassifier()
        dom_classifier = DomainClassifier()
    else:
        encoder = RobertaEncoder()
        cls_classifier = RobertaClassifier()
        dom_classifier = RobertaDomainClassifier()

    if args.load:
        encoder = init_model(encoder, restore=param.encoder_path)
        cls_classifier = init_model(cls_classifier,
                                    restore=param.cls_classifier_path)
        dom_classifier = init_model(dom_classifier,
                                    restore=param.dom_classifier_path)
    else:
        encoder = init_model(encoder)
        cls_classifier = init_model(cls_classifier)
        dom_classifier = init_model(dom_classifier)

    print("=== Start Training ===")
    if args.train:
        encoder, cls_classifier, dom_classifier = train(
            args, encoder, cls_classifier, dom_classifier, src_data_loader,
            src_data_loader_eval, tgt_data_loader, tgt_data_loader_all)

    print("=== Evaluating classifier for encoded target domain ===")
    print(">>> after training <<<")
    evaluate(encoder, cls_classifier, tgt_data_loader_all)
예제 #7
0
def main(args, f):
    # args = parse_arguments()
    set_seed(args.train_seed)

    if args.model in ['roberta', 'distilroberta']:
        tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    else:
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    # preprocess data
    src_eval_loader, src_loader, tgt_all_loader, tgt_train_loader = get_all_dataloader(
        args, tokenizer)

    # load models
    if args.model == 'bert':
        src_encoder = BertEncoder()
        tgt_encoder = BertEncoder()
        src_classifier = BertClassifier()
    elif args.model == 'distilbert':
        src_encoder = DistilBertEncoder()
        tgt_encoder = DistilBertEncoder()
        src_classifier = BertClassifier()
    elif args.model == 'roberta':
        src_encoder = RobertaEncoder()
        tgt_encoder = RobertaEncoder()
        src_classifier = RobertaClassifier()
    else:
        src_encoder = DistilRobertaEncoder()
        tgt_encoder = DistilRobertaEncoder()
        src_classifier = RobertaClassifier()
    discriminator = Discriminator()  # output dims is 2 instead of 1

    if args.load:
        src_encoder = init_model(args,
                                 src_encoder,
                                 restore_path=param.src_encoder_path)
        src_classifier = init_model(args,
                                    src_classifier,
                                    restore_path=param.src_classifier_path)
        tgt_encoder = init_model(args,
                                 tgt_encoder,
                                 restore_path=param.tgt_encoder_path)
        discriminator = init_model(args,
                                   discriminator,
                                   restore_path=param.d_model_path)
    else:
        src_encoder = init_model(args, src_encoder)
        src_classifier = init_model(args, src_classifier)
        tgt_encoder = init_model(args, tgt_encoder)
        discriminator = init_model(args, discriminator)

    # parallel models
    if torch.cuda.device_count() > 1:
        print('Let\'s use {} GPUs!'.format(torch.cuda.device_count()))
        src_encoder = nn.DataParallel(src_encoder)
        src_classifier = nn.DataParallel(src_classifier)
        tgt_encoder = nn.DataParallel(tgt_encoder)
        discriminator = nn.DataParallel(discriminator)

    # train source model
    print("=== Training classifier for source domain ===")
    if args.pretrain:
        src_encoder, src_classifier = pretrain(args, src_encoder,
                                               src_classifier, src_loader)

    # eval source model
    print("=== Evaluating classifier for source domain ===")
    evaluate(args, src_encoder, src_classifier, src_loader)
    src_acc = evaluate(args, src_encoder, src_classifier, tgt_all_loader)
    f.write(f'{args.src} -> {args.tgt}: No adapt acc on src data: {src_acc}\n')

    for params in src_encoder.parameters():
        params.requires_grad = False

    # train target encoder by ADDA
    print("=== Training encoder for target domain ===")
    if args.adapt:
        tgt_encoder.load_state_dict(src_encoder.state_dict())
        tgt_encoder = adda_adapt(args, src_encoder, tgt_encoder, discriminator,
                                 src_loader, tgt_train_loader)

    # argument setting
    print(
        f"model_type: {args.model}; max_seq_len: {args.max_seq_length}; batch_size: {args.batch_size}; "
        f"pre_epochs: {args.pre_epochs}; num_epochs: {args.num_epochs}; src: {args.src}; tgt: {args.tgt}"
    )

    # eval target encoder on lambda0.1 set of target dataset
    print("=== Evaluating classifier for encoded target domain ===")
    print(">>> domain adaption <<<")
    tgt_acc = evaluate(args, tgt_encoder, src_classifier, tgt_all_loader)
    f.write(f'{args.src} -> {args.tgt}: DA acc on tgt data: {tgt_acc}\n')
    f.write(
        f"model_type: {args.model}; batch_size: {args.batch_size}; pre_epochs: {args.pre_epochs}; "
        f"num_epochs: {args.num_epochs}; src_free: {args.src_free}; src: {args.src}; "
        f"tgt: {args.tgt};\n\n")
예제 #8
0
  def model_fn(features, labels, mode, params):
    # features name and shape
    for name in sorted(features.keys()):
      tf.logging.info(' name = {}, shape = {}'.format(name, features[name].shape))

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    # get data
    input_data = features['input_data']
    input_mask = features['input_mask']
    if mode == tf.estimator.ModeKeys.TRAIN:
      sentiment_labels = features['sentiment_labels']
      sentiment_mask_indices = features['sentiment_mask_indices']
      true_length_from_data = features['true_length']

    # build model
    model = BertEncoder(
      config=cg.BertEncoderConfig,
      is_training=is_training,
      input_ids=input_data,
      input_mask=input_mask)
    
    tvars = tf.trainable_variables()
    initialized_variable_names = {}
    if init_checkpoint:
      (assignment_map, initialized_variable_names
      ) = get_assignment_map_from_checkpoint(tvars, init_checkpoint)
      tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                      init_string)

    # [cls] output -> [b, h]
    cls_output = model.get_cls_output()
    # sequence_output -> [b, s, h], do not contain [CLS], because the mask indices do not shift
    sequence_output = model.get_sequence_output()[:, 1:, :]

    # project the hidden size to the num_classes
    with tf.variable_scope('final_output'):
      # [b, num_classes]
      output_logits = tf.layers.dense(
        cls_output,
        cg.BertEncoderConfig.num_classes,
        name='final_output',
        kernel_initializer=ft.create_initializer(initializer_range=cg.BertEncoderConfig.initializer_range))

    if mode == tf.estimator.ModeKeys.PREDICT:
      output_softmax = tf.nn.softmax(output_logits, axis=-1)
      output_result = tf.argmax(output_softmax, axis=-1)
      predictions = {'predict': output_result}
      output_spec = tf.estimator.EstimatorSpec(mode, predictions=predictions)
    else:
      if mode == tf.estimator.ModeKeys.TRAIN:
        # masked_output -> [b * x, h]
        masked_output = gather_indexs(sequence_output, sentiment_mask_indices)
        
        # get output for word polarity prediction
        with tf.variable_scope('sentiment_project'):
          # [b * x, 2]
          output_sentiment = tf.layers.dense(
            masked_output,
            2,
            name='final_output',
            kernel_initializer=ft.create_initializer(initializer_range=cg.BertEncoderConfig.initializer_range))
        # output_sentiment_probs = tf.nn.softmax(output_sentiment, axis=-1)

        batch_size = tf.cast(ft.get_shape_list(labels, expected_rank=1)[0], dtype=tf.float32)
        # cross-entropy loss
        cls_loss = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(
          labels=labels,
          logits=output_logits)) / batch_size

        # mse loss
        # # Regression Model
        true_sequence = get_true_sequence(true_length_from_data)
        # mse_loss = calculate_mse_loss(
        #   output_sentiment, sentiment_labels, true_sequence)

        # # Classification Model
        true_label_flatten = tf.reshape(sentiment_labels, [-1])
        mse_loss = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(
          labels=true_label_flatten,
          logits=output_sentiment) * true_sequence) / tf.reduce_sum(true_sequence)

        loss = cls_loss + mse_loss
        # loss = cls_loss

        learning_rate = tf.train.polynomial_decay(cg.learning_rate,
                                  tf.train.get_or_create_global_step(),
                                  cg.train_steps,
                                  end_learning_rate=cg.lr_limit,
                                  power=1.0,
                                  cycle=False)

        lr = tf.maximum(tf.constant(cg.lr_limit), learning_rate)
        optimizer = tf.train.AdamOptimizer(lr, name='optimizer')
        tvars = tf.trainable_variables()
        gradients = tf.gradients(loss, tvars, colocate_gradients_with_ops=cg.colocate_gradients_with_ops)
        clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
        train_op = optimizer.apply_gradients(zip(clipped_gradients, tvars), global_step=tf.train.get_global_step())

        current_steps = tf.train.get_or_create_global_step()
        logging_hook = tf.train.LoggingTensorHook(
          {'step' : current_steps, 'loss' : loss, 'cls_loss' : cls_loss, 'mse_loss': mse_loss, 'lr' : lr}, 
          every_n_iter=cg.print_info_interval)

        output_spec = tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op, training_hooks=[logging_hook])
      elif mode == tf.estimator.ModeKeys.EVAL:
        # TODO
        raise NotImplementedError
    
    return output_spec