Esempio n. 1
0
def main():
    ###############################################################################
    # Load data
    ###############################################################################

    dictionary = data.Dictionary()
    train_corpus = data.Corpus(dictionary)
    dev_corpus = data.Corpus(dictionary)
    test_corpus = data.Corpus(dictionary)

    task_names = ['snli', 'multinli'] if args.task == 'allnli' else [args.task]
    for task in task_names:
        skip_first_line = True if task == 'sick' else False
        train_corpus.parse(task,
                           args.data,
                           'train.txt',
                           args.tokenize,
                           num_examples=args.max_example,
                           skip_first_line=skip_first_line)
        if task == 'multinli':
            dev_corpus.parse(task, args.data, 'dev_matched.txt', args.tokenize)
            dev_corpus.parse(task, args.data, 'dev_mismatched.txt',
                             args.tokenize)
            test_corpus.parse(task,
                              args.data,
                              'test_matched.txt',
                              args.tokenize,
                              is_test_corpus=False)
            test_corpus.parse(task,
                              args.data,
                              'test_mismatched.txt',
                              args.tokenize,
                              is_test_corpus=False)
        else:
            dev_corpus.parse(task,
                             args.data,
                             'dev.txt',
                             args.tokenize,
                             skip_first_line=skip_first_line)
            test_corpus.parse(task,
                              args.data,
                              'test.txt',
                              args.tokenize,
                              is_test_corpus=False,
                              skip_first_line=skip_first_line)

    print('train set size = ', len(train_corpus.data))
    print('development set size = ', len(dev_corpus.data))
    print('test set size = ', len(test_corpus.data))
    print('vocabulary size = ', len(dictionary))

    # save the dictionary object to use during testing
    helper.save_object(dictionary,
                       args.save_path + args.task + '_dictionary.pkl')

    embeddings_index = helper.load_word_embeddings(args.word_vectors_directory,
                                                   args.word_vectors_file,
                                                   dictionary.word2idx)
    print('number of OOV words = ', len(dictionary) - len(embeddings_index))

    # ###############################################################################
    # # Build the model
    # ###############################################################################

    model = SentenceClassifier(dictionary, embeddings_index, args)
    optim_fn, optim_params = helper.get_optimizer(args.optimizer)
    optimizer = optim_fn(filter(lambda p: p.requires_grad, model.parameters()),
                         **optim_params)
    best_acc = 0

    if args.cuda:
        model = model.cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_acc = checkpoint['best_acc']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    # ###############################################################################
    # # Train the model
    # ###############################################################################

    train = Train(model, optimizer, dictionary, embeddings_index, args,
                  best_acc)
    bestmodel = train.train_epochs(train_corpus, dev_corpus, args.start_epoch,
                                   args.epochs)
    test_batches = helper.batchify(test_corpus.data, args.batch_size)
    if 'multinli' in task_names:
        print(
            'Skipping evaluating best model. Evaluate using the test script.')
    else:
        test_accuracy, test_f1 = evaluate(bestmodel, test_batches, dictionary)
        print('accuracy: %.2f%%' % test_accuracy)
        print('f1: %.2f%%' % test_f1)
Esempio n. 2
0
size_p = (args.size_p, args.size_p) # cropped local patch size
sub_batch_size = args.sub_batch_size # batch size for train local patches
###################################
print("creating models......")

path_g = os.path.join(model_path, args.path_g)
path_g2l = os.path.join(model_path, args.path_g2l)
path_l2g = os.path.join(model_path, args.path_l2g)
model, global_fixed = create_model_load_weights(n_class, mode, evaluation, path_g=path_g, path_g2l=path_g2l, path_l2g=path_l2g)

###################################
num_epochs = args.num_epochs
learning_rate = args.lr
lamb_fmreg = args.lamb_fmreg

optimizer = get_optimizer(model, mode, learning_rate=learning_rate)

scheduler = LR_Scheduler('poly', learning_rate, num_epochs, len(dataloader_train))
##################################

criterion1 = FocalLoss(gamma=3)
criterion2 = nn.CrossEntropyLoss()
criterion3 = lovasz_softmax
criterion = lambda x,y: criterion1(x, y)
# criterion = lambda x,y: 0.5*criterion1(x, y) + 0.5*criterion3(x, y)
mse = nn.MSELoss()

if not evaluation:
    
    writer = SummaryWriter(log_dir=os.path.join(log_path, task_name))
    f_log = open(os.path.join(log_path, task_name + ".log"), 'w')
Esempio n. 3
0
            batch_size, kwargs)
    nr_classes = 200

    # Load the polars and update the trainy labels.
    classpolars = torch.from_numpy(np.load(args.hpnfile)).float()
    args.output_dims = int(args.hpnfile.split("/")[-1].split("-")[1][:-1])

    # Load the model.
    if args.network == "resnet32":
        model = resnet.ResNet(32, args.output_dims, 1, classpolars)
    elif args.network == "densenet121":
        model = densenet.DenseNet121(args.output_dims, classpolars)
    model = model.to(device)

    # Load the optimizer.
    optimizer = helper.get_optimizer(args.optimizer, model.parameters(), \
            args.learning_rate, args.momentum, args.decay)

    # Initialize the loss functions.
    f_loss = nn.CosineSimilarity(eps=1e-9).cuda()

    # Main loop.
    testscores = []
    learning_rate = args.learning_rate
    for i in xrange(args.epochs):
        print "---"
        # Learning rate decay.
        if i in [args.drop1, args.drop2]:
            learning_rate *= 0.1
            for param_group in optimizer.param_groups:
                param_group['lr'] = learning_rate
Esempio n. 4
0
print('development set size = ', len(dev_corpus.data))
print('test set size = ', len(test_corpus.data))
print('vocabulary size = ', len(dictionary))

# save the dictionary object to use during testing
helper.save_object(dictionary, args.save_path + 'dictionary.p')

embeddings_index = helper.load_word_embeddings(args.word_vectors_directory, args.word_vectors_file, dictionary.word2idx)
print('number of OOV words = ', len(dictionary) - len(embeddings_index))

# ###############################################################################
# # Build the model
# ###############################################################################

model = SentenceClassifier(dictionary, embeddings_index, args)
optim_fn, optim_params = helper.get_optimizer(args.optimizer)
optimizer = optim_fn(filter(lambda p: p.requires_grad, model.parameters()), **optim_params)
best_acc = 0

# for training on multiple GPUs. use CUDA_VISIBLE_DEVICES=0,1 to specify which GPUs to use
if 'CUDA_VISIBLE_DEVICES' in os.environ:
    cuda_visible_devices = [int(x) for x in os.environ['CUDA_VISIBLE_DEVICES'].split(',')]
    if len(cuda_visible_devices) > 1:
        model = torch.nn.DataParallel(model, device_ids=cuda_visible_devices)
if args.cuda:
    model = model.cuda()

if args.resume:
    if os.path.isfile(args.resume):
        print("=> loading checkpoint '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
Esempio n. 5
0
embeddings_index = helper.load_word_embeddings(args.word_vectors_directory,
                                               args.word_vectors_file,
                                               dictionary.word2idx)
print('number of OOV words = ', len(dictionary) - len(embeddings_index))

# ###############################################################################
# # Build the model
# ###############################################################################

model = LSTM(dictionary, embeddings_index, args)
selector = Selector(dictionary, embeddings_index, args)

print(selector)
print(model)
optim_fn_selector, optim_params_selector = helper.get_optimizer(args.optimizer)
optimizer_selector = optim_fn_selector(
    filter(lambda p: p.requires_grad, selector.parameters()),
    **optim_params_selector)
optim_fn, optim_params = helper.get_optimizer(args.optimizer)
optimizer = optim_fn(filter(lambda p: p.requires_grad, model.parameters()),
                     **optim_params)

best_acc = 0
param_dict_selector = helper.count_parameters(selector)
param_dict = helper.count_parameters(model)
print(
    'number of trainable parameters = ',
    numpy.sum(list(param_dict_selector.values())),
    numpy.sum(list(param_dict.values())),
    numpy.sum(list(param_dict.values())) +
Esempio n. 6
0
    model = model.to(device)

    # To CUDA.
    if args.multigpu == 1:
        model = torch.nn.DataParallel(model.cuda())
    else:
        model = model.to(device)

    # Network parameters.
    optimname = args.optimizer
    lr = args.learning_rate
    momentum = args.momentum
    decay = args.decay
    params = model.parameters()
    # Set the optimizer.
    optimizer = helper.get_optimizer(optimname, params, lr, momentum, decay)

    # Initialize the loss functions.
    f_loss = nn.CosineSimilarity(eps=1e-9).cuda()

    resdir = args.resdir + "omniart/"
    args.do_norm = 1
    testscores = []

    # Iterative optimization.
    for i in xrange(args.epochs):
        print "---"
        # Update learning rate.
        if i in [args.drop1, args.drop2]:
            lr = lr * 0.1
            for param_group in optimizer.param_groups:
Esempio n. 7
0
def main():
    # if output directory doesn't exist, create it
    if not os.path.exists(args.save_path):
        os.makedirs(args.save_path)

    # set the random seed manually for reproducibility.
    numpy.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        if not args.cuda:
            print(
                "WARNING: You have a CUDA device, so you should probably run with --cuda"
            )
        else:
            torch.cuda.manual_seed(args.seed)

    print('\ncommand-line params : {0}\n'.format(sys.argv[1:]))
    print('{0}\n'.format(args))

    ###############################################################################
    # Load data
    ###############################################################################

    dictionary = data.Dictionary()
    tasks = []
    train_dict, dev_dict = {}, {}

    if 'quora' in args.task:
        print('**Task name : Quora**')
        # load quora dataset
        quora_train = data.Corpus(args.data, dictionary)
        quora_train.parse('quora/train.txt', 'quora', args.tokenize,
                          args.max_example)
        print('Found {} pairs of train sentences.'.format(len(
            quora_train.data)))

        quora_dev = data.Corpus(args.data, dictionary)
        quora_dev.parse('quora/dev.txt', 'quora', args.tokenize)
        print('Found {} pairs of dev sentences.'.format(len(quora_dev.data)))

        quora_test = data.Corpus(args.data, dictionary)
        quora_test.parse('quora/test.txt', 'quora', args.tokenize)
        print('Found {} pairs of test sentences.'.format(len(quora_test.data)))

        tasks.append(('quora', 2))
        train_dict['quora'] = quora_train
        dev_dict['quora'] = quora_dev

    if 'snli' in args.task:
        print('**Task name : SNLI**')
        # load snli dataset
        snli_train = data.Corpus(args.data, dictionary)
        snli_train.parse('snli/train.txt', 'snli', args.tokenize,
                         args.max_example)
        print('Found {} pairs of train sentences.'.format(len(
            snli_train.data)))

        snli_dev = data.Corpus(args.data, dictionary)
        snli_dev.parse('snli/dev.txt', 'snli', args.tokenize)
        print('Found {} pairs of dev sentences.'.format(len(snli_dev.data)))

        snli_test = data.Corpus(args.data, dictionary)
        snli_test.parse('snli/test.txt', 'snli', args.tokenize)
        print('Found {} pairs of test sentences.'.format(len(snli_test.data)))

        tasks.append(('snli', 3))
        train_dict['snli'] = snli_train
        dev_dict['snli'] = snli_dev

    if 'multinli' in args.task:
        print('**Task name : Multi-NLI**')
        # load multinli dataset
        multinli_train = data.Corpus(args.data, dictionary)
        multinli_train.parse('multinli/train.txt', 'multinli', args.tokenize,
                             args.max_example)
        print('Found {} pairs of train sentences.'.format(
            len(multinli_train.data)))

        multinli_dev = data.Corpus(args.data, dictionary)
        multinli_dev.parse('multinli/dev_matched.txt', 'multinli',
                           args.tokenize)
        multinli_dev.parse('multinli/dev_mismatched.txt', 'multinli',
                           args.tokenize)
        print('Found {} pairs of dev sentences.'.format(len(
            multinli_dev.data)))

        multinli_test = data.Corpus(args.data, dictionary)
        multinli_test.parse('multinli/test_matched.txt', 'multinli',
                            args.tokenize)
        multinli_test.parse('multinli/test_mismatched.txt', 'multinli',
                            args.tokenize)
        print('Found {} pairs of test sentences.'.format(
            len(multinli_test.data)))

        tasks.append(('multinli', 3))
        train_dict['multinli'] = multinli_train
        dev_dict['multinli'] = multinli_dev

    if 'allnli' in args.task:
        print('**Task name : AllNLI**')
        # load allnli dataset
        allnli_train = data.Corpus(args.data, dictionary)
        allnli_train.parse('snli/train.txt', 'snli', args.tokenize,
                           args.max_example)
        allnli_train.parse('multinli/train.txt', 'multinli', args.tokenize,
                           args.max_example)
        print('Found {} pairs of train sentences.'.format(
            len(allnli_train.data)))

        allnli_dev = data.Corpus(args.data, dictionary)
        allnli_dev.parse('snli/dev.txt', 'snli', args.tokenize)
        allnli_dev.parse('multinli/dev_matched.txt', 'multinli', args.tokenize)
        allnli_dev.parse('multinli/dev_mismatched.txt', 'multinli',
                         args.tokenize)
        print('Found {} pairs of dev sentences.'.format(len(allnli_dev.data)))

        allnli_test = data.Corpus(args.data, dictionary)
        allnli_test.parse('snli/test.txt', 'snli', args.tokenize)
        allnli_test.parse('multinli/test_matched.txt', 'multinli',
                          args.tokenize)
        allnli_test.parse('multinli/test_mismatched.txt', 'multinli',
                          args.tokenize)
        print('Found {} pairs of test sentences.'.format(len(
            allnli_test.data)))

        tasks.append(('allnli', 3))
        train_dict['allnli'] = allnli_train
        dev_dict['allnli'] = allnli_dev

    print('\nvocabulary size = ', len(dictionary))

    # save the dictionary object to use during testing
    helper.save_object(dictionary, args.save_path + 'dictionary.p')

    embeddings_index = helper.load_word_embeddings(args.word_vectors_directory,
                                                   args.word_vectors_file,
                                                   dictionary.word2idx)
    print('number of OOV words = ', len(dictionary) - len(embeddings_index))

    # ###############################################################################
    # # Build the model
    # ###############################################################################

    if not tasks:
        return

    model = MultitaskDomainAdapter(dictionary, embeddings_index, args, tasks)
    print(model)

    optim_fn, optim_params = helper.get_optimizer(args.optimizer)
    optimizer = optim_fn(filter(lambda p: p.requires_grad, model.parameters()),
                         **optim_params)
    best_accuracy = 0

    # for training on multiple GPUs. use CUDA_VISIBLE_DEVICES=0,1 to specify which GPUs to use
    if 'CUDA_VISIBLE_DEVICES' in os.environ:
        cuda_visible_devices = [
            int(x) for x in os.environ['CUDA_VISIBLE_DEVICES'].split(',')
        ]
        if len(cuda_visible_devices) > 1:
            model = torch.nn.DataParallel(model,
                                          device_ids=cuda_visible_devices)
    if args.cuda:
        model = model.cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_accuracy = checkpoint['best_acc']
            model.load_state_dict(checkpoint['state_dict']['model'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    # ###############################################################################
    # # Train the model
    # ###############################################################################

    train = Train(model, optimizer, dictionary, embeddings_index, args,
                  best_accuracy)
    train.set_train_dev_corpus(train_dict, dev_dict)
    train.train_epochs(args.start_epoch, args.epochs)
Esempio n. 8
0
    helper.log(
        logger,
        '[train] Shape of data placeholder {0}'.format(data.get_shape()))
    helper.log(
        logger,
        '[train] Shape of label placeholder {0}'.format(label.get_shape()))

# Create model
with tf.name_scope('model'):
    model = model.Model(logger)

# Get train opt
with tf.name_scope('train'):
    train_logit = model.logit(data, True, config.dropout)
    train_cost = helper.get_loss(train_logit, label)
    train_opt = helper.get_optimizer(config.learning_rate,
                                     config.optimizer).minimize(train_cost)

    train_pred = tf.argmax(tf.nn.softmax(train_logit),
                           axis=1,
                           name='train_pred')
    train_equal = tf.equal(train_pred, label)
    train_acc = tf.reduce_mean(tf.cast(train_equal, tf.float32))

    train_summary_list = []
    train_summary_list.append(tf.summary.scalar('train_cost', train_cost))
    train_summary_list.append(tf.summary.scalar('train_acc', train_acc))
    train_summary_merge = tf.summary.merge(train_summary_list)

# get eval opt
with tf.name_scope('eval'):
    tf.get_variable_scope().reuse_variables()
 print("creating models......")
 
 path_g   = os.path.join(model_path, args.path_g)
 path_g2l = os.path.join(model_path, args.path_g2l)
 path_l2g = os.path.join(model_path, args.path_l2g)
 
 model, global_fixed = create_model_load_weights(n_class, mode, evaluation, path_g=path_g, path_g2l=path_g2l, path_l2g=path_l2g)
 
 model_ddp = DDP(model, device_ids=[local_rank], output_device=local_rank)
 
 ###################################
 num_epochs    = args.num_epochs
 learning_rate = args.lr
 lamb_fmreg    = args.lamb_fmreg
 
 optimizer = get_optimizer(model_ddp, mode, parallel=True, learning_rate=learning_rate)
 
 scheduler = LR_Scheduler('poly', learning_rate, num_epochs, len(dataloader_train))
 ##################################
 
 criterion1 = FocalLoss(gamma=3)
 criterion2 = nn.CrossEntropyLoss()
 criterion3 = lovasz_softmax
 criterion  = lambda x,y: criterion1(x, y)
 # criterion = lambda x,y: 0.5*criterion1(x, y) + 0.5*criterion3(x, y)
 mse = nn.MSELoss()
 
 if not evaluation:
     writer = SummaryWriter(logdir=log_path + task_name)
     f_log = open(log_path + task_name + ".log", 'w')