def main(args):
    # Set up logging and devices
    args.save_dir = util.get_save_dir(args.save_dir, args.name, type="evaluation")
    log = util.get_logger(args.save_dir, args.name)
    device, args.gpu_ids = util.get_available_devices()
    log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}')
    args.batch_size *= max(1, len(args.gpu_ids))


    # Get your model
    log.info('Building model...')
    model, step=get_model(log,args)
    model = model.to(device)
    model.eval()

    # Get data loader
    log.info('Building dataset...')

    dev_dataset = util.load_dataset(args.test_file,args.PPI_dir,args.PPI_gene_feature_dir,
                                    args.PPI_gene_query_dict_dir,args.max_nodes,train=False)

    dev_loader = data.DataLoader(dev_dataset,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers,
                                 collate_fn=util.collate_fn)

    # Train
    log.info('Evaluating...')

    #get loss computer
    cri=FocalLoss(alpha=torch.tensor([args.alpha,1]).to(device),gamma=args.gamma)

    loss_meter = util.AverageMeter()
    ground_true = dev_loader.dataset.y_list
    ground_true = ground_true.to(device)
    predict_list=torch.zeros([dev_loader.dataset.__len__(),2],dtype=torch.float)
    predict_list = predict_list.to(device)
    sample_index=0
    with torch.no_grad(), \
         tqdm(total=len(dev_loader.dataset)) as progress_bar:
        for batch_a, batch_bio_a, batch_A, batch_b, batch_bio_b, batch_B, batch_y in dev_loader:
            # Setup for forward
            batch_a = batch_a.to(device)
            batch_bio_a = batch_bio_a.to(device)
            batch_A = batch_A.to(device)
            batch_bio_b = batch_bio_b.to(device)
            batch_b = batch_b.to(device)
            batch_B = batch_B.to(device)
            batch_y = batch_y.to(device)
            batch_y = batch_y.long()
            batch_size = batch_bio_a.size(0)
            # Forward
            output= model(batch_a, batch_bio_a, batch_A, batch_b, batch_bio_b, batch_B)
            loss = cri(output, batch_y)
            loss_val = loss.item()
            loss_meter.update(loss_val, batch_size)
            predict_list[sample_index:sample_index+batch_size]=output
            sample_index=sample_index+batch_size

            # Log info
            progress_bar.update(batch_size)
            progress_bar.set_postfix(NLL=loss_meter.avg)

    results = util.metrics_compute(predict_list, ground_true)

    log.info("Evaluation result of model:")
    log.info(f"Loss in test dataset is {loss_meter.avg}")
    log.info(f"Accuracy:{results['Accuracy']}, AUC:{results['AUC']}, Recall:{results['Recall']},Precision:{results['Precision']},Specificity:{results['Specificity']}")
    log.info(f"TP:{results['TP']},FN:{results['FN']}")
    log.info(f"FP:{results['FP']},TN:{results['TN']}")
    log.info("plot prediction curve...")
    ROC_AUC(results["fpr"],results["tpr"],results["AUC"],os.path.join(args.save_dir,"ROC_curve.pdf"))
    log.info("Save evaluation result...")
    np.savez(os.path.join(args.save_dir,"results.npz"),predict=np.array(predict_list.cpu().tolist()),result=results)
Example #2
0
def main(args):

    # Set up logging
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=False)
    log = util.get_logger(args.save_dir, args.name)
    log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}')
    device, gpu_ids = util.get_available_devices()
    args.batch_size *= max(1, len(gpu_ids))

    seed = 42
    torch.manual_seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

    # Get model
    #log.info(f'Loading checkpoint from {args.load_path}...')

    model = resnet.resnet50()
    model = nn.DataParallel(model, gpu_ids)
    #log.info(f'Loading checkpoint from {args.load_path}...')
    #model = util.load_model(model, args.load_path, gpu_ids, return_step=False)
    model = model.to(device)
    model.eval()

    # Get data loader
    log.info('loading dataset...')
    input_data_file = '/home/mahbub/research/flat-resnet/data/dev_images.pt'
    #vars(args)[f'{args.input_data_file}']
    dataset = ImageDataset(input_data_file)
    data_loader = data.DataLoader(dataset,
                                  batch_size=args.batch_size,
                                  shuffle=False,
                                  num_workers=args.num_workers,
                                  collate_fn=None)

    #class_label_file = '/home/mahbub/research/flat-resnet/imagenet_classes.txt'
    # Read the categories
    #with open(class_label_file, "r") as f:
    #    categories = [s.strip() for s in f.readlines()]

    # Evaluate
    log.info(f'Running inference ...')

    output = torch.zeros(
        len(dataset), 1000
    )  # TODO: 1000 is number of class or resnet output size, remove hard coding.
    out_idx = 0
    with torch.no_grad(), \
            tqdm(total=len(dataset)) as progress_bar:
        for images in data_loader:
            # Setup for forward
            images = images.to(device)

            batch_size = images.shape[0]
            #print ("batch size is {}".format(batch_size))

            #print("Input is : {}".format(images[0,0,0,:10]))
            # Forward
            output[out_idx:out_idx + batch_size] = model(images)
            out_idx += batch_size

            #print("output shape is {}".format(output.shape))
            #print("Output is: {}".format(output))

            #probabilities = torch.nn.functional.softmax(output, dim=1)
            #print("probabilities shape is {}".format(probabilities.shape))
            #print ("probabilities sum = {}".format(probabilities.sum(axis=1)))

            # Show top categories per image
            #K = 5
            #top_prob, top_catid = torch.topk(probabilities, K)

            #print("top catid shape is {}".format(top_catid.shape))

            #for i in range(top_prob.shape[0]):
            #    for k in range(K):
            #        print(categories[top_catid[i,k]], top_prob[i,k].item())

            # Log info
            progress_bar.update(batch_size)

    # Write output to a file
    torch.save(output, "resnet50_output")
Example #3
0
def main(args):
    # Set up logging and devices
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True)
    log = util.get_logger(args.save_dir, args.name)
    tbx = SummaryWriter(args.save_dir)
    device, args.gpu_ids = util.get_available_devices()
    log.info('Args: {}'.format(dumps(vars(args), indent=4, sort_keys=True)))
    args.batch_size *= max(1, len(args.gpu_ids))

    # Set random seed
    log.info('Using random seed {}...'.format(args.seed))
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # Get embeddings
    log.info('Loading embeddings...')
    word_vectors = util.torch_from_json(args.word_emb_file)
    char_vectors = util.torch_from_json(args.char_emb_file)

    # ######################################
    # tokenizer = BertTokenizer.from_pretrained('bert-large-uncased', do_lower_case=True)
    # train_examples = None
    # train_examples = read_squad_examples(
    #     input_file=args.train_file, is_training=True, version_2_with_negative=args.version_2_with_negative)
    # train_features = convert_examples_to_features(
    #     examples=train_examples,
    #     tokenizer=tokenizer,
    #     max_seq_length=args.max_seq_length,
    #     doc_stride=args.doc_stride,
    #     max_query_length=args.max_query_length,
    #     is_training=True)
    # if args.local_rank == -1 or torch.distributed.get_rank() == 0:
    #     logger.info("  Saving train features into cached file %s", cached_train_features_file)
    #     with open(cached_train_features_file, "wb") as writer:
    #         pickle.dump(train_features, writer)
    # all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long)
    # x = all_input_ids
    ###########################################

    # Get model
    log.info('Building model...')
    model = BiDAF(word_vectors=word_vectors,
                  char_vectors=char_vectors,
                  hidden_size=args.hidden_size,
                  drop_prob=args.drop_prob)
    model = nn.DataParallel(model, args.gpu_ids)
    if args.load_path:
        log.info('Loading checkpoint from {}...'.format(args.load_path))
        model, step = util.load_model(model, args.load_path, args.gpu_ids)
    else:
        step = 0
    model = model.to(device)
    model.train()
    ema = util.EMA(model, args.ema_decay)

    # Get saver
    saver = util.CheckpointSaver(args.save_dir,
                                 max_checkpoints=args.max_checkpoints,
                                 metric_name=args.metric_name,
                                 maximize_metric=args.maximize_metric,
                                 log=log)

    # Get optimizer and scheduler
    optimizer = optim.Adadelta(model.parameters(),
                               args.lr,
                               weight_decay=args.l2_wd)
    scheduler = sched.LambdaLR(optimizer, lambda s: 1.)  # Constant LR

    # Get data loader
    log.info('Building dataset...')
    train_dataset = SQuAD(args.train_record_file, args.use_squad_v2)
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   collate_fn=collate_fn)
    dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2)
    dev_loader = data.DataLoader(dev_dataset,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers,
                                 collate_fn=collate_fn)

    # Train
    log.info('Training...')
    steps_till_eval = args.eval_steps
    epoch = step // len(train_dataset)
    while epoch != args.num_epochs:
        epoch += 1
        log.info('Starting epoch {}...'.format(epoch))
        with torch.enable_grad(), \
                tqdm(total=len(train_loader.dataset)) as progress_bar:
            for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader:
                # Setup for forward
                cw_idxs = cw_idxs.to(device)
                qw_idxs = qw_idxs.to(device)
                batch_size = cw_idxs.size(0)

                # added_flag
                cc_idxs = cc_idxs.to(device)
                qc_idxs = qc_idxs.to(device)

                optimizer.zero_grad()

                # Forward
                # log_p1, log_p2 = model(cw_idxs, qw_idxs)
                log_p1, log_p2 = model(cw_idxs, qw_idxs, cc_idxs, qc_idxs)
                y1, y2 = y1.to(device), y2.to(device)
                loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
                loss_val = loss.item()

                # Backward
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(),
                                         args.max_grad_norm)
                optimizer.step()
                scheduler.step(step // batch_size)
                ema(model, step // batch_size)

                # Log info
                step += batch_size
                progress_bar.update(batch_size)
                progress_bar.set_postfix(epoch=epoch, NLL=loss_val)
                tbx.add_scalar('train/NLL', loss_val, step)
                tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'],
                               step)

                steps_till_eval -= batch_size
                if steps_till_eval <= 0:
                    steps_till_eval = args.eval_steps

                    # Evaluate and save checkpoint
                    log.info('Evaluating at step {}...'.format(step))
                    ema.assign(model)
                    results, pred_dict = evaluate(model, dev_loader, device,
                                                  args.dev_eval_file,
                                                  args.max_ans_len,
                                                  args.use_squad_v2)
                    saver.save(step, model, results[args.metric_name], device)
                    ema.resume(model)

                    # Log to console
                    results_str = ', '.join('{}: {:05.2f}'.format(k, v)
                                            for k, v in results.items())
                    log.info('Dev {}'.format(results_str))

                    # Log to TensorBoard
                    log.info('Visualizing in TensorBoard...')
                    for k, v in results.items():
                        tbx.add_scalar('dev/{}'.format(k), v, step)
                    util.visualize(tbx,
                                   pred_dict=pred_dict,
                                   eval_path=args.dev_eval_file,
                                   step=step,
                                   split='dev',
                                   num_visuals=args.num_visuals)
def main(args):
    # Set up logging
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=False)
    log = util.get_logger(args.save_dir, args.name)
    log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}')
    device, gpu_ids = util.get_available_devices()
    args.batch_size *= max(1, len(gpu_ids))

    # Get embeddings
    log.info('Loading embeddings...')
    word_vectors = util.torch_from_json(args.word_emb_file)

    # Get model
    log.info('Building model...')
    if args.model == 'bidaf':
        model = BiDAF(word_vectors=word_vectors, hidden_size=args.hidden_size)
    elif args.model == 'bidafextra':
        model = BiDAFExtra(word_vectors=word_vectors, args=args)
    elif args.model == 'fusionnet':
        model = FusionNet(word_vectors=word_vectors, args=args)

    model = nn.DataParallel(model, gpu_ids)
    log.info(f'Loading checkpoint from {args.load_path}...')
    model = util.load_model(model, args.load_path, gpu_ids, return_step=False)
    model = model.to(device)
    model.eval()

    # Get data loader
    log.info('Building dataset...')
    record_file = vars(args)[f'{args.split}_record_file']
    dataset = SQuAD(record_file, args)
    data_loader = data.DataLoader(dataset,
                                  batch_size=args.batch_size,
                                  shuffle=False,
                                  num_workers=args.num_workers,
                                  collate_fn=collate_fn)
    # print("*"*80)
    # print(len(dataset.question_idxs))

    # for question_idx in dataset.question_idxs:
    #    print(question_idx)
    #    print("*" * 80)

    # print(self.question_idxs[question_idx])
    # self.question_idxs[idx]
    # print("data_loader: ",data_loader)
    # Evaluate
    log.info(f'Evaluating on {args.split} split...')
    nll_meter = util.AverageMeter()
    pred_dict = {}  # Predictions for TensorBoard
    sub_dict = {}  # Predictions for submission
    eval_file = vars(args)[f'{args.split}_eval_file']
    with open(eval_file, 'r') as fh:
        gold_dict = json_load(fh)

    # create statistics
    # print("*"*80)
    # print(len(gold_dict))
    # print(gold_dict['1']['question'])

    count_questions_type = defaultdict(lambda: 0)

    audit_trail_from_question_type = defaultdict(lambda: [])
    list_of_interrogative_pronouns = [
        "what", "whose", "why", "which", "where", "when", "how", "who", "whom"
    ]

    for index in range(1, len(gold_dict)):
        # transform the question in lower case to simplify the analysis, thus losing the benefit of the capital letters
        # possibly indicating the position of the interrogative pronoun in the sentence.
        question_lower_case = gold_dict[str(index)]['question'].lower()

        list_question_lower_case_with_punctuation = question_lower_case.translate(
            {ord(i): " "
             for i in "'"}).split()

        #
        question_lower_case = []
        for item in list_question_lower_case_with_punctuation:
            question_lower_case.append(
                item.translate({ord(i): ""
                                for i in ",.<>!@£$%^&*()_-+=?"}))

        # defining a variable for the first word
        first_word_question_lower_case = question_lower_case[0]

        # defining variable for the second word
        second_word_question_lower_case = question_lower_case[1]

        # defining variable for the first and second word
        combined_first_and_second_words = first_word_question_lower_case + " " + second_word_question_lower_case

        #printing on the screen test for debugging purpose

        # Analyzing the sentence
        if first_word_question_lower_case in list_of_interrogative_pronouns:
            count_questions_type[first_word_question_lower_case] += 1
            audit_trail_from_question_type[
                first_word_question_lower_case].append(str(index))
        # composed question starting by in
        elif first_word_question_lower_case == "in":
            if second_word_question_lower_case in list_of_interrogative_pronouns and second_word_question_lower_case != "whose":
                count_questions_type[combined_first_and_second_words] += 1
                audit_trail_from_question_type[
                    combined_first_and_second_words].append(str(index))
            else:
                pronoun = find_first_interrogative_pronoun(
                    list_of_interrogative_pronouns, question_lower_case)
                count_questions_type[pronoun] += 1
                audit_trail_from_question_type[pronoun].append(str(index))

        # composed question starting by by
        elif first_word_question_lower_case == "by":
            if second_word_question_lower_case in list_of_interrogative_pronouns \
                    and second_word_question_lower_case !="whom"\
                    and second_word_question_lower_case !="which"\
                    and second_word_question_lower_case !="when"\
                    and second_word_question_lower_case !="how":
                count_questions_type[combined_first_and_second_words] += 1
                audit_trail_from_question_type[
                    combined_first_and_second_words].append(str(index))
            else:
                pronoun = find_first_interrogative_pronoun(
                    list_of_interrogative_pronouns, question_lower_case)
                count_questions_type[pronoun] += 1
                audit_trail_from_question_type[pronoun].append(str(index))

        else:
            pronoun = find_first_interrogative_pronoun(
                list_of_interrogative_pronouns, question_lower_case)
            #if pronoun =="":
            #    print(">>", question_lower_case)
            #    print("@@@", gold_dict[str(index)]['question'])
            count_questions_type[pronoun] += 1
            audit_trail_from_question_type[pronoun].append(str(index))
            # if pronoun =="":
            #    print(">>", question_lower_case.split())
            # print()
            #if first_word_question_lower_case == "if":
            #    print(">>", question_lower_case.split())

    # print(count_questions_type)
    # if gold_dict[str(index)]['question'].lower().split()[0] == "in":
    #    print(gold_dict[str(index)]['question'])

    reverse_dict_by_value = OrderedDict(
        sorted(count_questions_type.items(), key=lambda x: x[1]))
    # print(count_questions_type)
    total_questions = sum(count_questions_type.values())
    # print(reverse_dict)
    #for k, v in reverse_dict_by_value.items():
    #   print( "%s: %s and in percentage: %s" % (k, v, 100*v/total_questions))
    #print(audit_trail_from_question_type)
    # exit()
    with torch.no_grad(), \
         tqdm(total=len(dataset)) as progress_bar:
        for cw_idxs, cc_idxs, qw_idxs, qc_idxs, cw_pos, cw_ner, cw_freq, cqw_extra, y1, y2, ids in data_loader:
            # Setup for forward
            cw_idxs = cw_idxs.to(device)
            qw_idxs = qw_idxs.to(device)
            batch_size = cw_idxs.size(0)

            # Forward
            if args.model == 'bidaf':
                log_p1, log_p2 = model(cw_idxs, qw_idxs)
            else:
                log_p1, log_p2 = model(cw_idxs, qw_idxs, cw_pos, cw_ner,
                                       cw_freq, cqw_extra)

            y1, y2 = y1.to(device), y2.to(device)
            loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
            nll_meter.update(loss.item(), batch_size)

            # Get F1 and EM scores
            p1, p2 = log_p1.exp(), log_p2.exp()
            starts, ends = util.discretize(p1, p2, args.max_ans_len,
                                           args.use_squad_v2)

            # Log info
            progress_bar.update(batch_size)
            if args.split != 'test':
                # No labels for the test set, so NLL would be invalid
                progress_bar.set_postfix(NLL=nll_meter.avg)

            idx2pred, uuid2pred = util.convert_tokens(gold_dict, ids.tolist(),
                                                      starts.tolist(),
                                                      ends.tolist(),
                                                      args.use_squad_v2)
            pred_dict.update(idx2pred)
            sub_dict.update(uuid2pred)

    # Log results (except for test set, since it does not come with labels)
    if args.split != 'test':
        results = util.eval_dicts(gold_dict, pred_dict, args.use_squad_v2)

        # Printing information for questions without interrogative pronouns
        """"
        print("len(gold_dict): ", len(gold_dict))
        print("len(pred_dict): ", len(pred_dict))
        print("Is gold_dict.keys() identical to pred_dict.keys(): ", gold_dict.keys()==pred_dict.keys())
        if gold_dict.keys()!=pred_dict.keys():
            for key in gold_dict.keys():
                if key not in pred_dict.keys():
                    print("key ", key, " missing in pred_dict.keys(")
        """
        results_list = [('NLL', nll_meter.avg), ('F1', results['F1']),
                        ('EM', results['EM'])]
        if args.use_squad_v2:
            results_list.append(('AvNA', results['AvNA']))
        results = OrderedDict(results_list)

        # Computing the F1 score for each type of question
        #
        #    audit_trail_from_question_type[pronoun].append(str(index))

        # create a list of the types of questions by extracting the keys from the dict audit_trail_from_question_type
        types_of_questions = list(audit_trail_from_question_type.keys())

        gold_dict_per_type_of_questions = defaultdict(lambda: [])
        pred_dict_per_type_of_questions = {}

        gold_dict_per_type_of_questions_start = {}
        pred_dict_per_type_of_questions_start = {}

        gold_dict_per_type_of_questions_middle = {}
        pred_dict_per_type_of_questions_middle = {}

        gold_dict_per_type_of_questions_end = {}
        pred_dict_per_type_of_questions_end = {}

        for type_of_questions in types_of_questions:
            #gold_pred = {key: value for key, value in gold_dict.items() if key in audit_trail_from_question_type[type_of_questions]}
            #lst_pred = {key: value for key, value in pred_dict.items() if key in audit_trail_from_question_type[type_of_questions]}

            # Create two dictionnaries for each type of sentence for gold_dict_per_type_of_questions and pred_dict_per_type_of_questions
            gold_dict_per_type_of_questions[type_of_questions] = {
                key: value
                for key, value in gold_dict.items()
                if key in audit_trail_from_question_type[type_of_questions]
                and key in pred_dict.keys()
            }
            pred_dict_per_type_of_questions[type_of_questions] = {
                key: value
                for key, value in pred_dict.items()
                if key in audit_trail_from_question_type[type_of_questions]
                and key in pred_dict.keys()
            }
            # print(type_of_questions," F1 score: ", util.eval_dicts(gold_dict_per_type_of_questions[type_of_questions], pred_dict_per_type_of_questions[type_of_questions], args.use_squad_v2)['F1'])

            gold_dict_per_type_of_questions_start[type_of_questions] = {
                key: value
                for key, value in gold_dict.items()
                if key in audit_trail_from_question_type[type_of_questions]
                and key in pred_dict.keys()
            }
            pred_dict_per_type_of_questions_start[type_of_questions] = {
                key: value
                for key, value in pred_dict.items()
                if key in audit_trail_from_question_type[type_of_questions]
                and key in pred_dict.keys()
            }

            gold_dict_per_type_of_questions_middle[type_of_questions] = {
                key: value
                for key, value in gold_dict.items()
                if key in audit_trail_from_question_type[type_of_questions]
                and key in pred_dict.keys()
            }
            pred_dict_per_type_of_questions_middle[type_of_questions] = {
                key: value
                for key, value in pred_dict.items()
                if key in audit_trail_from_question_type[type_of_questions]
                and key in pred_dict.keys()
            }

            gold_dict_per_type_of_questions_end[type_of_questions] = {
                key: value
                for key, value in gold_dict.items()
                if key in audit_trail_from_question_type[type_of_questions]
                and key in pred_dict.keys()
            }
            pred_dict_per_type_of_questions_end[type_of_questions] = {
                key: value
                for key, value in pred_dict.items()
                if key in audit_trail_from_question_type[type_of_questions]
                and key in pred_dict.keys()
            }

            for key, value in gold_dict.items():
                #if key in audit_trail_from_question_type[type_of_questions] and key in pred_dict.keys():
                if key in audit_trail_from_question_type[
                        type_of_questions] and type_of_questions != "" and key in pred_dict_per_type_of_questions[
                            type_of_questions]:
                    """
                    print("type_of_questions: ",type_of_questions)
                    print("key: ", key)
                    print("question: ", value["question"])
                    sub_index = value["question"].lower().find(type_of_questions)
                    print("sub_index: ",sub_index)
                    test_fc = value["question"].lower().find(type_of_questions)
                    print("present type of the var: ",type(test_fc))
                    #print("question: ", value["question"][str(key)])
                    print("length of the question: ", len(value["question"]))
                    print('Position of the interrogative pronoun in the question:', )
                    """
                    # Create two dictionnaries for each type of sentence based at the start of the sentence

                    if value["question"].lower().find(
                            type_of_questions) == 1 or value["question"].lower(
                            ).find(type_of_questions) == 0:
                        #print("BEGINNING")
                        if type_of_questions != "":
                            try:
                                del gold_dict_per_type_of_questions_middle[
                                    type_of_questions][key]
                            except KeyError:
                                pass

                            try:
                                del pred_dict_per_type_of_questions_middle[
                                    type_of_questions][key]
                            except KeyError:
                                pass

                            try:
                                del gold_dict_per_type_of_questions_end[
                                    type_of_questions][key]
                            except KeyError:
                                pass

                            try:
                                del pred_dict_per_type_of_questions_end[
                                    type_of_questions][key]
                            except KeyError:
                                pass

                        #pred_dict_per_type_of_questions_start[type_of_questions] = {key: pred_dict[key] for key in
                        #                                                            gold_dict_per_type_of_questions_start[
                        #                                                                type_of_questions].keys()}
                    elif value["question"].lower(
                    ).find(type_of_questions) >= len(
                            value["question"]) - len(type_of_questions) - 5:
                        #print("END")

                        if type_of_questions != "":
                            try:
                                del gold_dict_per_type_of_questions_middle[
                                    type_of_questions][key]
                            except KeyError:
                                pass

                            try:
                                del pred_dict_per_type_of_questions_middle[
                                    type_of_questions][key]
                            except KeyError:
                                pass

                            try:
                                del gold_dict_per_type_of_questions_start[
                                    type_of_questions][key]
                            except KeyError:
                                pass

                            try:
                                del pred_dict_per_type_of_questions_start[
                                    type_of_questions][key]
                            except KeyError:
                                pass

                        #print("type_of_questions: ",type_of_questions)
                        #sub_index = value["question"].lower().find(type_of_questions)
                        #print("sub_index: ", sub_index)
                        #print("len(value['question']) - len(type_of_questions) - 2: ", len(value["question"])-len(type_of_questions)-2)
                        #start_string = len(value["question"])-len(type_of_questions)-6
                        #end_string = len(value["question"])-1
                        #print("extract at the end: ", value["question"][start_string:end_string])
                    else:
                        #print("MIDDLE")
                        if type_of_questions != "":
                            try:
                                del gold_dict_per_type_of_questions_start[
                                    type_of_questions][key]
                            except KeyError:
                                pass

                            try:
                                del pred_dict_per_type_of_questions_start[
                                    type_of_questions][key]
                            except KeyError:
                                pass

                            try:
                                del gold_dict_per_type_of_questions_end[
                                    type_of_questions][key]
                            except KeyError:
                                pass

                            try:
                                del pred_dict_per_type_of_questions_end[
                                    type_of_questions][key]
                            except KeyError:
                                pass
                            pass
            """
            if  type_of_questions != "":
                gold_dict_per_type_of_questions_start[type_of_questions] = {key: value for key, value in gold_dict.items() if (key in audit_trail_from_question_type[type_of_questions] \
                                                                        and (value["question"].lower().find(type_of_questions) <= 1) \
                                                                        and key in pred_dict_per_type_of_questions[type_of_questions]) }
            """
            """
                for key in gold_dict_per_type_of_questions_start[type_of_questions].keys():
                    print("key:: ", key )
                    print("type(key):: ", type(key) )
                            print("pred_dict[,key,] : ", pred_dict[key])
                print("@@@@@@@@@@@@@@@@@@@@@@@@")
                
                pred_dict_per_type_of_questions_start[type_of_questions] = {key: pred_dict[key] for key in gold_dict_per_type_of_questions_start[type_of_questions].keys()}

                #pred_dict_per_type_of_questions_start[type_of_questions] = {key: value for key, value in pred_dict.items() if key in list(gold_dict_per_type_of_questions_start[type_of_questions].keys()) }

                # Create two dictionnaries for each type of sentence based at the end of the sentence
                gold_dict_per_type_of_questions_end[type_of_questions] = {key: value for key, value in gold_dict.items() if key in audit_trail_from_question_type[type_of_questions] \
                                                                        and value["question"].lower().find(type_of_questions) >= len(value["question"])-len(type_of_questions)-2 \
                                                                        and key in pred_dict_per_type_of_questions[type_of_questions]}


                pred_dict_per_type_of_questions_end[type_of_questions] = {key: pred_dict[key] for key in list(gold_dict_per_type_of_questions_end[type_of_questions].keys())}

                #print("*"*80)
                # Create two dictionnaries for each type of sentence based at the middle of the sentencecount_questions_type
                gold_dict_per_type_of_questions_middle[type_of_questions] = {key: value for key, value in gold_dict.items() if key not in list(gold_dict_per_type_of_questions_start[type_of_questions].keys()) \
                                                                            and key not in list(gold_dict_per_type_of_questions_end[type_of_questions].keys())}

                pred_dict_per_type_of_questions_middle[type_of_questions] = {key: pred_dict[key] for key in list(gold_dict_per_type_of_questions_end[type_of_questions].keys())}
            else:
                gold_dict_per_type_of_questions_start[""] = gold_dict_per_type_of_questions[""]
                pred_dict_per_type_of_questions_start[""] = pred_dict_per_type_of_questions[""]
                gold_dict_per_type_of_questions_end[""] = gold_dict_per_type_of_questions[""]
                pred_dict_per_type_of_questions_end[""] = pred_dict_per_type_of_questions[""]
                gold_dict_per_type_of_questions_middle[""] = gold_dict_per_type_of_questions[""]
                pred_dict_per_type_of_questions_middle[""] = pred_dict_per_type_of_questions[""]
        """

        positions_in_question = ["beginning", "middle", "end"]

        # print(type_of_questions," F1 score: ", util.eval_dicts(gold_dict_per_type_of_questions[type_of_questions], pred_dict_per_type_of_questions[type_of_questions], args.use_squad_v2)['F1'])

        list_beginning = [
            util.eval_dicts(
                gold_dict_per_type_of_questions_start[type_of_questions],
                pred_dict_per_type_of_questions_start[type_of_questions],
                args.use_squad_v2)['F1']
            for type_of_questions in types_of_questions
        ]
        list_middle = [
            util.eval_dicts(
                gold_dict_per_type_of_questions_middle[type_of_questions],
                pred_dict_per_type_of_questions_middle[type_of_questions],
                args.use_squad_v2)['F1']
            for type_of_questions in types_of_questions
        ]
        list_end = [
            util.eval_dicts(
                gold_dict_per_type_of_questions_end[type_of_questions],
                pred_dict_per_type_of_questions_end[type_of_questions],
                args.use_squad_v2)['F1']
            for type_of_questions in types_of_questions
        ]

        #for type_of_questions in types_of_questions:
        #    print("gold_dict_per_type_of_questions_start[type_of_questions]: ",gold_dict_per_type_of_questions_start[type_of_questions])
        #    print("pred_dict_per_type_of_questions[type_of_questions]: ",pred_dict_per_type_of_questions[type_of_questions])

        F1 = np.array([list_beginning, list_middle, list_end])

        m, n = F1.shape

        value_to_ignore = []
        for i in range(m):
            for j in range(n):
                if F1[i, j] == "NA" or F1[i, j] == 0:
                    value_to_ignore.append((i, j))
        print("value to ignore: ", value_to_ignore)
        #F1 = np.array([[0, 0, 0, 0, 0.1, 0, 0, 0, 0, 0, 0, 0, 0],
        #                    [0, 0, 0, 0, 0.1, 0, 0, 0, 0, 0, 0, 0, 0],
        #                    [0, 0, 0, 0, 0.1, 0, 0, 0, 0, 0, 0, 0, 0]])

        data_label = copy.deepcopy(F1)

        for row in data_label:
            for column_idx in range(len(row)):
                if row[column_idx] == "NA":
                    row[column_idx] = ""

        # print question without interrogative pronoun required for the second part of the analysis:
        for key, value in gold_dict.items():
            if key in audit_trail_from_question_type[
                    ""] and key in pred_dict.keys():
                print("question: ", gold_dict_per_type_of_questions[''])
                print("golden answers: ", )
                print("prediction: ", pred_dict[key])
                print()

        fig, ax = plt.subplots()

        types_of_questions[types_of_questions.index(
            "")] = "Implicit question without interrogative pronoun"

        im, cbar = heatmap(F1, positions_in_question, types_of_questions, ax=ax, \
                            cmap="YlGn", cbarlabel="F1 scores")

        texts = annotate_heatmap(im,
                                 data=data_label,
                                 valfmt="{x:.1f}",
                                 ignore=value_to_ignore)

        fig.tight_layout()
        plt.show()

        # Log to console
        results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items())
        log.info(f'{args.split.title()} {results_str}')

        # Log to TensorBoard
        tbx = SummaryWriter(args.save_dir)
        util.visualize(tbx,
                       pred_dict=pred_dict,
                       eval_path=eval_file,
                       step=0,
                       split=args.split,
                       num_visuals=args.num_visuals)

    # Write submission file
    sub_path = join(args.save_dir, args.split + '_' + args.sub_file)
    log.info(f'Writing submission file to {sub_path}...')
    with open(sub_path, 'w', newline='', encoding='utf-8') as csv_fh:
        csv_writer = csv.writer(csv_fh, delimiter=',')
        csv_writer.writerow(['Id', 'Predicted'])
        for uuid in sorted(sub_dict):
            csv_writer.writerow([uuid, sub_dict[uuid]])
Example #5
0
def main(args):

    # Set up logging and devices
    name = "train_exp2"
    args.save_dir = util.get_save_dir(args.logging_dir, name, training=True)
    log = get_logger(args.save_dir, name)
    tbx = SummaryWriter(args.save_dir)
    device, gpu_ids = util.get_available_devices()
    log.info(f"Args: {dumps(vars(args), indent=4, sort_keys=True)}")
    args.batch_size *= max(1, len(gpu_ids))

    # Set random seed
    log.info(f"Using random seed {args.random_seed}...")
    random.seed(args.random_seed)
    np.random.seed(args.random_seed)
    torch.manual_seed(args.random_seed)
    torch.cuda.manual_seed_all(args.random_seed)

    # Get embeddings
    log.info(f"Loading embeddings from {args.word_emb_file}...")
    word_vectors = util.torch_from_json(args.word_emb_file)

    # Get model
    log.info("Building model...")
    model = BiDAF(word_vectors=word_vectors,
                  hidden_size=args.hidden_size,
                  drop_prob=args.drop_prob)
    model = nn.DataParallel(model, gpu_ids)
    if args.load_path:
        log.info(f"Loading checkpoint from {args.load_path}...")
        model, step = util.load_model(model, args.load_path, gpu_ids)
    else:
        step = 0
    model = model.to(device)
    model.train()
    ema = util.EMA(model, args.ema_decay)

    # Get saver
    saver = util.CheckpointSaver(args.save_dir,
                                 max_checkpoints=args.max_checkpoints,
                                 metric_name=args.metric_name,
                                 maximize_metric=args.maximize_metric,
                                 log=log)

    # Get optimizer and scheduler
    optimizer = optim.Adadelta(model.parameters(), args.learning_rate,
                               weight_decay=args.learning_rate_decay)
    # scheduler = sched.LambdaLR(optimizer, lambda s: 1.)  # Constant LR
    scheduler = sched.ReduceLROnPlateau(optimizer=optimizer,
                                        mode="min", factor=0.1,
                                        patience=2, verbose=True, cooldown=0 
                                        min_lr=0.0005)


    for epoch in range(args.num_epochs):
        log.info(f"Starting epoch {epoch}...")
        for i in range(args.num_train_chunks):
        # Get data loader
            train_rec_file = f"{args.train_record_file_exp2}_{i}.npz"
            log.info(f'Building dataset from {train_rec_file} ...')
            train_dataset = SQuAD(train_rec_file, args.exp2_train_topic_contexts, use_v2=True)
            train_loader = data.DataLoader(train_dataset,
                                           batch_size=args.batch_size,
                                           shuffle=True,
                                           num_workers=args.num_workers,
                                           collate_fn=collate_fn)

            # Train
            log.info('Training...')
            steps_till_eval = args.eval_steps
            epoch = 0
        # torch.set_num_threads(7)
            with torch.enable_grad(), tqdm(total=len(train_loader.dataset)) as progress_bar:
                for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader:
                    # Setup for forward
                    cw_idxs = cw_idxs.to(device)
                    qw_idxs = qw_idxs.to(device)
                    batch_size = qw_idxs.size(0)
                    optimizer.zero_grad()

                    # Forward
                    log_p1, log_p2 = model(cw_idxs, qw_idxs)
                    y1, y2 = y1.to(device), y2.to(device)
                    loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
                    loss_val = loss.item()

                    # Backward
                    loss.backward()
                    nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)
                    optimizer.step()
                    scheduler.step(step // batch_size)
                    ema(model, step // batch_size)

                    # Log info
                    step += batch_size
                    progress_bar.update(batch_size)
                    progress_bar.set_postfix(epoch=epoch,
                                             NLL=loss_val)
                    tbx.add_scalar('train/NLL', loss_val, step)
                    tbx.add_scalar('train/LR',
                                   optimizer.param_groups[0]['lr'],
                                   step)

                    steps_till_eval -= batch_size
                    if steps_till_eval <= 0:
                        steps_till_eval = args.eval_steps

                        # Evaluate and save checkpoint
                        log.info(f"Evaluating at step {step}...")
                        ema.assign(model)

                        for i in range(args.num_dev_chunks):
                        # Get data loader
                            all_pred_dicts = {}
                            all_results = OrderedDict() 
                            dev_rec_file = f"{args.dev_record_file_exp2}_{i}.npz"
                            log.info(f'Building evaluating dataset from {dev_rec_file} ...')
                            dev_dataset = SQuAD(dev_rec_file, 
                                                args.exp2_dev_topic_contexts, 
                                                use_v2=True)
                            dev_loader = data.DataLoader(dev_dataset,
                                                           batch_size=args.batch_size,
                                                           shuffle=True,
                                                           num_workers=args.num_workers,
                                                           collate_fn=collate_fn)
                            results, pred_dict = evaluate(model, dev_loader, device,
                                                          args.dev_eval_file,
                                                          args.max_ans_len,
                                                          use_squad_v2=True)
                            all_results.update(results)
                            all_pred_dicts.update(pred_dict)

                            del dev_dataset
                            del dev_loader
                            del results
                            del pred_dict
                            torch.cuda.empty_cache()

                        saver.save(step, model, all_results[args.metric_name], device)
                        ema.resume(model)

                        # Log to console
                        results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in all_results.items())
                        log.info(f"Dev {results_str}")

                        # Log to TensorBoard
                        log.info('Visualizing in TensorBoard...')
                        for k, v in all_results.items():
                            tbx.add_scalar(f"dev/{k}", v, step)
                        util.visualize(tbx,
                                       pred_dict=all_pred_dicts,
                                       eval_path=args.dev_eval_file,
                                       step=step,
                                       split='dev',
                                       num_visuals=args.num_visuals)
                    torch.cuda.empty_cache()
            del train_dataset
            del train_loader
            torch.cuda.empty_cache()
def main():
    #save_dir = util.get_save_dir('save','vgglinear', training=False)
    #log = util.get_logger(save_dir, 'vgglinear')
    save_dir = util.get_save_dir('save', 'TimeCNN', training=False)
    log = util.get_logger(save_dir, 'TimeCNN')
    device, gpu_ids = util.get_available_devices()
    tbx = SummaryWriter(save_dir)

    'save/train/TimeCNN-wd0.01-epoch100-01/best.pth.tar'
    #path = 'save/train/Resnet-82/best.pth.tar'
    #path = 'save/train/TimeCNN-epoch30-1024-01/best.pth.tar'
    #path = 'save/train/vgglinear-02/best.pth.tar'
    #build model here
    log.info("Building model")
    #model = Baseline(8 * 96 * 64)
    model = TimeCNN()
    #model = Resnet()
    #model = VGGLinear()
    model = nn.DataParallel(model, gpu_ids)
    model = util.load_model(model, path, gpu_ids, return_step=False)
    model = model.to(device)
    model = model.double()
    model.eval()

    log.info("Building Dataset")
    test_dataset = Shots("videos/test.h5py", "labels/test.npy")
    test_loader = data.DataLoader(test_dataset,
                                  batch_size=BATCH_SIZE,
                                  shuffle=False,
                                  num_workers=4,
                                  collate_fn=collate_fn)
    num_correct = 0
    num_samples = 0
    missed_1, missed_0 = 0, 0
    num_1_predicted = 0
    num_0_predicted = 0
    with torch.no_grad():
        for frames, y in test_loader:
            frames = frames.to(device)
            y = y.to(device)
            scores = model(frames)
            loss = F.cross_entropy(scores, y)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()

            # This accumulates how many 1's and 0's were misclassified
            for i in range(y.shape[0]):
                if y[i] == 1 and preds[i] == 0:
                    missed_1 += 1
                elif y[i] == 0 and preds[i] == 1:
                    missed_0 += 1
            num_samples += preds.shape[0]
            num_1_predicted += (preds == 1).sum()
            num_0_predicted += (preds == 0).sum()
    acc = float(num_correct) / num_samples

    log.info("Path: {}".format(path))
    log.info("Accuracy on test set is {}".format(acc))
    log.info("Missed 1's: {}, Missed 0's: {}".format(missed_1, missed_0))
    log.info("Number 1's predicted: {}".format(num_1_predicted))
    log.info("Number 0's predicted: {}".format(num_0_predicted))

    log.info('-----------------')

    log.info("Best Accuracy on test set is {} and path was {}".format(
        best_accuracy, best_path))
Example #7
0
def main(course_dir,
         text_embedding_size,
         audio_embedding_size,
         image_embedding_size,
         hidden_size,
         drop_prob,
         max_text_length,
         out_heatmaps_dir,
         args,
         batch_size=3,
         num_epochs=100):
    # Set up logging and devices
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True)
    log = util.get_logger(args.save_dir, args.name)
    tbx = SummaryWriter(args.save_dir)
    device, args.gpu_ids = util.get_available_devices()
    log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}')
    args.batch_size *= max(1, len(args.gpu_ids))

    # Set random seed
    log.info(f'Using random seed {args.seed}...')
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # Create Dataset objects
    text_dataset = TextDataset(course_dir, max_text_length)
    audio_dataset = AudioDataset(course_dir)
    target_dataset = TargetDataset(course_dir)
    # Preprocess the image in prescribed format
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    transform = transforms.Compose([
        transforms.RandomResizedCrop(256),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ])
    image_dataset = ImageDataset(course_dir, transform)

    assert len(text_dataset) == len(audio_dataset) and len(
        audio_dataset) == len(image_dataset) and len(image_dataset) == len(
            target_dataset), "Unequal dataset lengths"

    # Creating data indices for training and validation splits:
    train_indices, val_indices = gen_train_val_indices(text_dataset)

    # Creating PT data samplers and loaders:
    train_sampler = torch.utils.data.SequentialSampler(train_indices)
    val_sampler = torch.utils.data.SequentialSampler(val_indices)

    # Get sentence embeddings
    train_text_loader = torch.utils.data.DataLoader(text_dataset,
                                                    batch_size=batch_size,
                                                    shuffle=False,
                                                    num_workers=2,
                                                    collate_fn=collator,
                                                    sampler=train_sampler)
    val_text_loader = torch.utils.data.DataLoader(text_dataset,
                                                  batch_size=batch_size,
                                                  shuffle=False,
                                                  num_workers=2,
                                                  collate_fn=collator,
                                                  sampler=val_sampler)

    # Get Audio embeddings
    train_audio_loader = torch.utils.data.DataLoader(audio_dataset,
                                                     batch_size=batch_size,
                                                     shuffle=False,
                                                     num_workers=2,
                                                     collate_fn=collator,
                                                     sampler=train_sampler)
    val_audio_loader = torch.utils.data.DataLoader(audio_dataset,
                                                   batch_size=batch_size,
                                                   shuffle=False,
                                                   num_workers=2,
                                                   collate_fn=collator,
                                                   sampler=val_sampler)

    # Get images
    train_image_loader = torch.utils.data.DataLoader(image_dataset,
                                                     batch_size=batch_size,
                                                     shuffle=False,
                                                     num_workers=2,
                                                     collate_fn=collator,
                                                     sampler=train_sampler)
    val_image_loader = torch.utils.data.DataLoader(image_dataset,
                                                   batch_size=batch_size,
                                                   shuffle=False,
                                                   num_workers=2,
                                                   collate_fn=collator,
                                                   sampler=val_sampler)

    # Load Target text
    train_target_loader = torch.utils.data.DataLoader(
        target_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=2,
        collate_fn=target_collator,
        sampler=train_sampler)
    val_target_loader = torch.utils.data.DataLoader(target_dataset,
                                                    batch_size=batch_size,
                                                    shuffle=False,
                                                    num_workers=2,
                                                    collate_fn=target_collator,
                                                    sampler=val_sampler)

    # print("lens - train_text_loader {}, val_text_loader {}".format(len(train_text_loader), len(val_text_loader)))
    # print("lens - train_audio_loader {}, val_audio_loader {}".format(len(train_audio_loader), len(val_audio_loader)))
    # print("lens - train_image_loader {}, val_image_loader {}".format(len(train_image_loader), len(val_image_loader)))
    # print("lens - train_target_loader {}, val_target_loader {}".format(len(train_target_loader), len(val_target_loader)))

    # Create model
    model = MMBiDAF(hidden_size, text_embedding_size, audio_embedding_size,
                    image_embedding_size, device, drop_prob, max_text_length)
    model = nn.DataParallel(model, args.gpu_ids)
    if args.load_path:
        log.info(f'Loading checkpoint from {args.load_path}...')
        model, step = util.load_model(model, args.load_path, args.gpu_ids)
    else:
        step = 0
    model = model.to(device)
    model.train()
    ema = util.EMA(model, args.ema_decay)  # For exponential moving average

    # Get saver
    saver = util.CheckpointSaver(args.save_dir,
                                 max_checkpoints=args.max_checkpoints,
                                 metric_name=args.metric_name,
                                 maximize_metric=args.maximize_metric,
                                 log=log)  # Need to change the metric name

    # Get optimizer and scheduler
    optimizer = optim.Adadelta(model.parameters(),
                               args.lr,
                               weight_decay=args.l2_wd)
    scheduler = sched.LambdaLR(optimizer, lambda s: 1.)  # Constant LR

    # Let's do this!
    loss = 0
    eps = 1e-8
    log.info("Training...")
    steps_till_eval = args.eval_steps
    epoch = step // len(TextDataset(course_dir, max_text_length))

    while epoch != args.num_epochs:
        epoch += 1
        log.info("Starting epoch {epoch}...")
        count_item = 0
        loss_epoch = 0
        with torch.enable_grad(), tqdm(
                total=len(train_text_loader.dataset)) as progress_bar:
            for (batch_text, original_text_lengths), (
                    batch_audio, original_audio_lengths), (
                        batch_images,
                        original_img_lengths), (batch_target_indices,
                                                batch_source_paths,
                                                batch_target_paths,
                                                original_target_len) in zip(
                                                    train_text_loader,
                                                    train_audio_loader,
                                                    train_image_loader,
                                                    train_target_loader):
                loss = 0
                max_dec_len = torch.max(
                    original_target_len
                )  # TODO check error : max decoder timesteps for each batch

                # Transfer tensors to GPU
                batch_text = batch_text.to(device)
                log.info("Loaded batch text")
                batch_audio = batch_audio.to(device)
                log.info("Loaded batch audio")
                batch_images = batch_images.to(device)
                log.info("Loaded batch image")
                batch_target_indices = batch_target_indices.to(device)
                log.info("Loaded batch targets")

                # Setup for forward
                batch_size = batch_text.size(0)
                optimizer.zero_grad()

                log.info("Starting forward pass")
                # Forward
                batch_out_distributions, loss = model(
                    batch_text, original_text_lengths, batch_audio,
                    original_audio_lengths, batch_images, original_img_lengths,
                    batch_target_indices, original_target_len, max_dec_len)
                loss_val = loss.item()  # numerical value of loss
                loss_epoch = loss_epoch + loss_val

                log.info("Starting backward")

                # Backward
                loss.backward()
                nn.utils.clip_grad_norm_(
                    model.parameters(),
                    args.max_grad_norm)  # To tackle exploding gradients
                optimizer.step()
                scheduler.step(step // batch_size)
                ema(model, step // batch_size)

                # Log info
                step += batch_size
                progress_bar.update(batch_size)
                progress_bar.set_postfix(epoch=epoch, NLL=loss_val)
                tbx.add_scalar('train/NL', loss_val, step)
                tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'],
                               step)

                steps_till_eval -= batch_size
                if steps_till_eval <= 0:
                    steps_till_eval = args.eval_steps

                    # Evaluate and save checkpoint
                    log.info(f'Evaluating at step {step}...')
                    ema.assign(model)
                    # TODO
                    # scores, results = evaluate(model, dev_loader, device,
                    #                               args.dev_eval_file,
                    #                               args.max_ans_len,
                    #                               args.use_squad_v2)
                    saver.save(step, model, device)
                    ema.resume(model)

                # Generate summary
                print('Generated summary for iteration {}: '.format(epoch))
                summaries = get_generated_summaries(batch_out_distributions,
                                                    original_text_lengths,
                                                    batch_source_paths)
                print(summaries)

                # Evaluation
                # rouge = Rouge()
                # rouge_scores = rouge.get_scores(batch_source_paths, batch_target_paths, avg=True)
                # print('Rouge score at iteration {} is {}: '.format(epoch, rouge_scores))

                # Generate Output Heatmaps
                # sns.set()
                # for idx in range(len(out_distributions)):
                #     out_distributions[idx] = out_distributions[idx].squeeze(0).detach().numpy()      # Converting each timestep distribution to numpy array
                # out_distributions = np.asarray(out_distributions)   # Converting the timestep list to array
                # ax = sns.heatmap(out_distributions)
                # fig = ax.get_figure()
                # fig.savefig(out_heatmaps_dir + str(epoch) + '.png')
            print("Epoch loss is : {}".format(loss_epoch / count_item))
Example #8
0
def main(args):
    # Set up logging and devices
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True)
    log = util.get_logger(args.save_dir, args.name)
    tbx = SummaryWriter(args.save_dir)
    device, args.gpu_ids = util.get_available_devices()
    log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}')
    args.batch_size *= max(1, len(args.gpu_ids))

    # Set random seed
    log.info(f'Using random seed {args.seed}...')
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # Get embeddings
    log.info('Loading embeddings...')
    word_vectors = util.torch_from_json(args.word_emb_file)
    char_vec = util.torch_from_json(args.char_emb_file)

    # Get model
    log.info('Building model...')
    if args.name == 'baseline':
        model = BiDAF(word_vectors=word_vectors,
                      hidden_size=args.hidden_size,
                      drop_prob=args.drop_prob)
    elif args.name == 'charembeddings':
        model = BiDAFChar(word_vectors=word_vectors,
                          char_vec=char_vec,
                          word_len=16,
                          hidden_size=args.hidden_size,
                          drop_prob=args.drop_prob)
    elif args.name == 'charembeddings2':
        model = BiDAFChar2(word_vectors=word_vectors,
                           char_vec=char_vec,
                           word_len=16,
                           hidden_size=args.hidden_size,
                           drop_prob=args.drop_prob)
    elif args.name == 'qanet':
        model = QANet(word_vectors=word_vectors,
                      char_vec=char_vec,
                      word_len=16,
                      emb_size=args.hidden_size,
                      drop_prob=args.drop_prob,
                      enc_size=args.enc_size,
                      n_head=args.n_head,
                      LN_train=args.ln_train,
                      DP_residual=args.dp_res,
                      mask_pos=args.mask_pos,
                      two_pos=args.two_pos,
                      total_prob=args.total_drop,
                      final_prob=args.final_prob)
    elif args.name == 'qanet2':
        model = QANet2(word_vectors=word_vectors,
                       char_vec=char_vec,
                       word_len=16,
                       emb_size=args.hidden_size,
                       drop_prob=args.drop_prob,
                       enc_size=args.enc_size,
                       n_head=args.n_head,
                       LN_train=args.ln_train,
                       DP_residual=args.dp_res,
                       mask_pos=args.mask_pos,
                       two_pos=args.two_pos,
                       rel=args.rel_att,
                       total_prob=args.total_drop,
                       final_prob=args.final_prob,
                       freeze=args.freeze_emb)
    elif args.name == 'qanet3':
        model = QANet3(word_vectors=word_vectors,
                       char_vec=char_vec,
                       word_len=16,
                       emb_size=args.hidden_size,
                       drop_prob=args.drop_prob,
                       enc_size=args.enc_size,
                       n_head=args.n_head,
                       LN_train=args.ln_train,
                       DP_residual=args.dp_res,
                       mask_pos=args.mask_pos,
                       two_pos=args.two_pos,
                       rel=args.rel_att,
                       total_prob=args.total_drop,
                       final_prob=args.final_prob,
                       freeze=args.freeze_emb)
    elif args.name == 'qanet4':
        model = QANet4(word_vectors=word_vectors,
                       char_vec=char_vec,
                       word_len=16,
                       emb_size=args.hidden_size,
                       drop_prob=args.drop_prob,
                       enc_size=args.enc_size,
                       n_head=args.n_head,
                       LN_train=args.ln_train,
                       DP_residual=args.dp_res,
                       mask_pos=args.mask_pos,
                       two_pos=args.two_pos,
                       rel=args.rel_att,
                       total_prob=args.total_drop,
                       final_prob=args.final_prob,
                       freeze=args.freeze_emb)
    else:
        raise ValueError('Wrong model name')

    model = nn.DataParallel(model, args.gpu_ids)
    if args.load_path:
        log.info(f'Loading checkpoint from {args.load_path}...')
        model, step = util.load_model(model, args.load_path, args.gpu_ids)
    else:
        step = 0
    model = model.to(device)
    model.train()
    ema = util.EMA(model, args.ema_decay)

    # Get saver
    saver = util.CheckpointSaver(args.save_dir,
                                 max_checkpoints=args.max_checkpoints,
                                 metric_name=args.metric_name,
                                 maximize_metric=args.maximize_metric,
                                 log=log)

    # Get optimizer and scheduler

    if args.name == 'qanet':
        optimizer = optim.Adam(model.parameters(),
                               args.lr,
                               betas=(0.8, 0.999),
                               weight_decay=3 * 1e-7,
                               eps=1e-7)
        scheduler = warmup(optimizer, 1, 2000)
    elif args.opt == 'adam':
        if args.grad_cent:
            optimizer = AdamWGC(model.parameters(),
                                args.lr,
                                betas=(0.9, 0.999),
                                weight_decay=3 * 1e-7,
                                eps=1e-7,
                                use_gc=True)
        else:
            optimizer = AdamW(model.parameters(),
                              args.lr,
                              betas=(0.8, 0.999),
                              weight_decay=3 * 1e-7,
                              eps=1e-7)
        scheduler = warmup(optimizer, 1, 2000)
    elif args.opt == 'adadelta':
        optimizer = optim.Adadelta(model.parameters(),
                                   args.lr,
                                   weight_decay=3 * 1e-7)
        scheduler = sched.LambdaLR(optimizer, lambda s: 1.)  # Constant LR
    elif args.opt == 'sgd':
        optimizer = optim.SGD(model.parameters(),
                              args.lr,
                              weight_decay=3 * 1e-7)
        scheduler = sched.LambdaLR(optimizer, lambda s: 1.)  # Constant LR

    # Get data loader
    log.info('Building dataset...')
    train_dataset = SQuAD(args.train_record_file, args.use_squad_v2)
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   collate_fn=collate_fn)
    dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2)
    dev_loader = data.DataLoader(dev_dataset,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers,
                                 collate_fn=collate_fn)

    # Train
    log.info('Training...')
    steps_till_eval = args.eval_steps
    epoch = step // len(train_dataset)
    i = 0
    while epoch != args.num_epochs:
        epoch += 1
        log.info(f'Starting epoch {epoch}...')
        with torch.enable_grad(), \
                tqdm(total=len(train_loader.dataset)) as progress_bar:
            for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader:
                # Setup for forward
                cw_idxs = cw_idxs.to(device)
                qw_idxs = qw_idxs.to(device)
                batch_size = cw_idxs.size(0)

                # Forward
                log_p1, log_p2 = model(cw_idxs, cc_idxs, qw_idxs, qc_idxs)
                y1, y2 = y1.to(device), y2.to(device)
                loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
                loss_val = loss.item()
                i += 1
                loss /= args.acc_step

                # Backward
                loss.backward()
                if i % args.acc_step == 0:
                    nn.utils.clip_grad_norm_(model.parameters(),
                                             args.max_grad_norm)
                    optimizer.step()
                    scheduler.step(i // (args.acc_step))
                    ema(model, i // (args.acc_step))
                    optimizer.zero_grad()

                # Log info
                step += batch_size
                progress_bar.update(batch_size)
                progress_bar.set_postfix(epoch=epoch, NLL=loss_val)
                tbx.add_scalar('train/NLL', loss_val, step)
                tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'],
                               step)

                steps_till_eval -= batch_size
                if steps_till_eval <= 0 and i % args.acc_step == 0:
                    steps_till_eval = args.eval_steps

                    # Evaluate and save checkpoint
                    log.info(f'Evaluating at step {step}...')
                    ema.assign(model)
                    results, pred_dict = evaluate(model, dev_loader, device,
                                                  args.dev_eval_file,
                                                  args.max_ans_len,
                                                  args.use_squad_v2)
                    saver.save(step, model, results[args.metric_name], device)
                    ema.resume(model)

                    # Log to console
                    results_str = ', '.join(f'{k}: {v:05.2f}'
                                            for k, v in results.items())
                    log.info(f'Dev {results_str}')

                    # Log to TensorBoard
                    log.info('Visualizing in TensorBoard...')
                    for k, v in results.items():
                        tbx.add_scalar(f'dev/{k}', v, step)
                    util.visualize(tbx,
                                   pred_dict=pred_dict,
                                   eval_path=args.dev_eval_file,
                                   step=step,
                                   split='dev',
                                   num_visuals=args.num_visuals)
Example #9
0
def main(args):
    # Set up logging and devices
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True)
    log = util.get_logger(args.save_dir, args.name)
    tbx = SummaryWriter(args.save_dir)
    device, args.gpu_ids = util.get_available_devices()
    log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}')
    args.batch_size *= max(1, len(args.gpu_ids))

    # Set random seed
    log.info(f'Using random seed {args.seed}...')
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # Get embeddings
    log.info('Loading embeddings...')
    word_vectors = util.torch_from_json(args.word_emb_file)
    char_vectors = util.torch_from_json(args.char_emb_file)

    # Get model
    log.info('Building model...')
    if args.model_name == 'sketchy':
        model = SketchyReader(word_vectors=word_vectors,
                              char_vectors=char_vectors,
                              hidden_size=args.hidden_size,
                              char_embed_drop_prob=args.char_embed_drop_prob,
                              num_heads=args.num_heads,
                              drop_prob=args.drop_prob)  # SKETCHY
    elif args.model_name == 'intensive':

        model = IntensiveReader(word_vectors=word_vectors,
                                char_vectors=char_vectors,
                                num_heads=args.num_heads,
                                char_embed_drop_prob=args.char_embed_drop_prob,
                                hidden_size=args.hidden_size,
                                drop_prob=args.drop_prob)  # INTENSIVE
    elif args.model_name == 'retro':

        model = RetroQANet(word_vectors=word_vectors,
                           char_vectors=char_vectors,
                           hidden_size=args.hidden_size,
                           num_heads=args.num_heads,
                           char_embed_drop_prob=args.char_embed_drop_prob,
                           intensive_path=args.load_path_i,
                           sketchy_path=args.load_path_s,
                           gpu_ids=args.gpu_ids,
                           drop_prob=args.drop_prob)  # Outer

    model = nn.DataParallel(model, args.gpu_ids)

    if args.load_path:
        log.info(f'Loading checkpoint from {args.load_path}...')
        model, step = util.load_model(model, args.load_path, args.gpu_ids)
    else:
        step = 0
    model = model.to(device)
    model.train()
    ema = util.EMA(model, args.ema_decay)

    # setup losses
    bceLoss = nn.BCELoss()

    # Get saver
    saver = util.CheckpointSaver(args.save_dir,
                                 max_checkpoints=args.max_checkpoints,
                                 metric_name=args.metric_name,
                                 maximize_metric=args.maximize_metric,
                                 log=log)

    # Get optimizer and scheduler
    optimizer = optim.Adadelta(model.parameters(), args.lr,
                               weight_decay=args.l2_wd)
    if args.optim == "adam":
        optimizer = optim.Adam(
            model.parameters(), 0.001, betas=(0.8, 0.999), eps=1e-7, weight_decay=3e-7)

    scheduler = sched.LambdaLR(optimizer, lambda s: 1.)  # Constant LR

    # Get data loader
    log.info('Building dataset...')
    train_dataset = SQuAD(args.train_record_file, args.use_squad_v2)
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   collate_fn=collate_fn)

    dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2)
    dev_loader = data.DataLoader(dev_dataset,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers,
                                 collate_fn=collate_fn)

    # Train
    log.info('Training...')
    steps_till_eval = args.eval_steps
    epoch = step // len(train_dataset)
    while epoch != args.num_epochs:
        counter = 0
        epoch += 1
        log.info(f'Starting epoch {epoch}...')
        with torch.enable_grad(), \
                tqdm(total=len(train_loader.dataset)) as progress_bar:
            for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader:
                counter += 1
                # Setup for forward
                cw_idxs = cw_idxs.to(device)
                qw_idxs = qw_idxs.to(device)
                cc_idxs = cc_idxs.to(device)
                qc_idxs = qc_idxs.to(device)
                batch_size = cw_idxs.size(0)
                optimizer.zero_grad()

                # Forward
                y1, y2 = y1.to(device), y2.to(device)
                if args.model_name == 'sketchy':
                    yi = model(cw_idxs, qw_idxs, cc_idxs, qc_idxs)
                    loss = bceLoss(yi, torch.where(
                        y1 == 0, 0, 1).type(torch.FloatTensor))
                elif args.model_name == 'intensive':
                    yi, log_p1, log_p2 = model(
                        cw_idxs, qw_idxs, cc_idxs, qc_idxs)
                    # if counter % 100 == 0:
                    #print(torch.max(log_p1.exp(), dim=1)[0])
                    # $print(torch.max(log_p2.exp(), dim=1)[0])
                    #weights = torch.ones(log_p1.shape[1])
                    #weights[0] = 2/(log_p1.shape[1])
                    #nll_loss = nn.NLLLoss(weight=weights.to(device='cuda:0'))
                    # gt_0 = torch.zeros(yi.shape[0]).to(device)
                    # gt_1 = torch.ones(yi.shape[0]).to(device)
                    loss = args.alpha_1 * bceLoss(yi, torch.where(y1 == 0, 0, 1).type(
                        torch.FloatTensor)) + args.alpha_2 * (F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2))
                    #loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
                elif args.model_name == 'retro':
                    log_p1, log_p2 = model(cw_idxs, qw_idxs, cc_idxs, qc_idxs)
                    loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
                else:
                    raise ValueError(
                        'invalid --model_name, sketchy or intensive required')

                loss_val = loss.item()

                # Backward
                loss.backward()
                nn.utils.clip_grad_norm_(
                    model.parameters(), args.max_grad_norm)
                optimizer.step()
                scheduler.step(step // batch_size)
                ema(model, step // batch_size)

                # Log info
                step += batch_size
                progress_bar.update(batch_size)
                progress_bar.set_postfix(epoch=epoch,
                                         NLL=loss_val)
                tbx.add_scalar('train/' + args.model_name, loss_val, step)
                tbx.add_scalar('train/LR',
                               optimizer.param_groups[0]['lr'],
                               step)

                steps_till_eval -= batch_size
                if steps_till_eval <= 0:
                    steps_till_eval = args.eval_steps

                    # Evaluate and save checkpoint
                    log.info(f'Evaluating at step {step}...')
                    ema.assign(model)
                    results, pred_dict = evaluate(model, dev_loader, device,
                                                  args.dev_eval_file,
                                                  args.max_ans_len,
                                                  args.use_squad_v2,
                                                  model_name=args.model_name,
                                                  a1=args.alpha_1,
                                                  a2=args.alpha_2)
                    saver.save(
                        step, model, results[args.metric_name], device, model_name=args.model_name)
                    ema.resume(model)

                    # Log to console
                    results_str = ', '.join(
                        f'{k}: {v:05.2f}' for k, v in results.items())
                    log.info(f'Dev {results_str}')

                    # Log to TensorBoard
                    log.info('Visualizing in TensorBoard...')
                    for k, v in results.items():
                        tbx.add_scalar(f'dev/{k}', v, step)
                    util.visualize(tbx,
                                   pred_dict=pred_dict,
                                   eval_path=args.dev_eval_file,
                                   step=step,
                                   split='dev',
                                   num_visuals=args.num_visuals)
Example #10
0
def main(args):
    # Set up logging and devices
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True)
    log = util.get_logger(args.save_dir, args.name)
    tbx = SummaryWriter(args.save_dir)
    device, args.gpu_ids = util.get_available_devices()
    log.info('Args: {}'.format(dumps(vars(args), indent=4, sort_keys=True)))
    args.batch_size *= max(1, len(args.gpu_ids))

    # Set random seed
    log.info('Using random seed {}...'.format(args.seed))
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # Get embeddings
    log.info('Loading embeddings...')
    word_vectors = util.torch_from_json(args.word_emb_file)

    # Get model
    log.info('Building model...')
    model = BiDAF(word_vectors=word_vectors,
                  hidden_size=args.hidden_size,
                  drop_prob=args.drop_prob)
    model = nn.DataParallel(model, args.gpu_ids)
    if args.load_path:
        log.info('Loading checkpoint from {}...'.format(args.load_path))
        model, step = util.load_model(model, args.load_path, args.gpu_ids)
    else:
        step = 0
    model = model.to(device)
    model.train()
    ema = util.EMA(model, args.ema_decay)

    # Get saver
    saver = util.CheckpointSaver(args.save_dir,
                                 max_checkpoints=args.max_checkpoints,
                                 metric_name=args.metric_name,
                                 maximize_metric=args.maximize_metric,
                                 log=log)

    # Get optimizer and scheduler
    optimizer = optim.Adadelta(model.parameters(),
                               args.lr,
                               weight_decay=args.l2_wd)
    scheduler = sched.LambdaLR(optimizer, lambda s: 1.)  # Constant LR

    # Get data loader
    log.info('Building dataset...')
    train_dataset = SegmentSQuAD(args.train_record_file, args.use_squad_v2)
    #train_dataset = SQuAD(args.train_record_file, args.use_squad_v2)
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   collate_fn=collate_fn)
    dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2)
    dev_loader = data.DataLoader(dev_dataset,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers,
                                 collate_fn=collate_fn)

    # Train
    log.info('Training...')
    steps_till_eval = args.eval_steps
    epoch = step // len(train_dataset)
    while epoch != args.num_epochs:
        epoch += 1
        log.info('Starting epoch {}...'.format(epoch))
        with torch.enable_grad(), \
                tqdm(total=len(train_loader.dataset)) as progress_bar:
            for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader:
                # Setup for forward
                cw_idxs = cw_idxs.to(device)
                qw_idxs = qw_idxs.to(device)
                batch_size = cw_idxs.size(0)
                optimizer.zero_grad()
                y1, y2 = y1.to(device), y2.to(device)

                # Forward
                loss = 0
                for i in range(batch_size):
                    max_p_sum = 0
                    max_p_sum_idx = 0
                    for j in range(cw_idxs.size(1)):
                        # Deal with the case when all the words in the window are padded words
                        if cw_idxs[i, j].sum().item() == 0:
                            continue
                        log_p1_j, log_p2_j = model(cw_idxs[i, j].unsqueeze(0),
                                                   qw_idxs[i].unsqueeze(0))
                        max_log_p1_j = torch.max(log_p1_j.detach())
                        max_log_p2_j = torch.max(log_p2_j.detach())
                        max_p_sum_idx = j if (max_log_p1_j + max_log_p2_j
                                              ) > max_p_sum else max_p_sum_idx
                        max_p_sum = max_log_p1_j + max_log_p2_j if (
                            max_log_p1_j +
                            max_log_p2_j) > max_p_sum else max_p_sum
                    log_p1_max, log_p2_max = model(
                        cw_idxs[i, max_p_sum_idx].unsqueeze(0),
                        qw_idxs[i].unsqueeze(0))

                    # Adjust label to the window case
                    if max_p_sum_idx * train_dataset.stride + torch.argmax(
                            log_p1_max).item() == y1[i].item():
                        loss += F.nll_loss(
                            log_p1_max,
                            torch.argmax(log_p1_max).unsqueeze(0))
                    else:
                        loss += F.nll_loss(
                            log_p1_max,
                            torch.argmin(log_p1_max).unsqueeze(0))

                    if max_p_sum_idx * train_dataset.stride + torch.argmax(
                            log_p2_max).item() == y2[i].item():
                        loss += F.nll_loss(
                            log_p2_max,
                            torch.argmax(log_p2_max).unsqueeze(0))
                    else:
                        loss += F.nll_loss(
                            log_p2_max,
                            torch.argmin(log_p2_max).unsqueeze(0))

                loss_val = loss.item()

                # # Forward
                # log_p1, log_p2 = model(cw_idxs, qw_idxs)
                # loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
                # loss_val = loss.item()

                # Backward
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(),
                                         args.max_grad_norm)
                optimizer.step()
                scheduler.step(step // batch_size)
                ema(model, step // batch_size)

                # Log info
                step += batch_size
                progress_bar.update(batch_size)
                progress_bar.set_postfix(epoch=epoch, NLL=loss_val)
                tbx.add_scalar('train/NLL', loss_val, step)
                tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'],
                               step)

                steps_till_eval -= batch_size
                if steps_till_eval <= 0:
                    steps_till_eval = args.eval_steps

                    # Evaluate and save checkpoint
                    log.info('Evaluating at step {}...'.format(step))
                    ema.assign(model)
                    results, pred_dict = evaluate(model, dev_loader, device,
                                                  args.dev_eval_file,
                                                  args.max_ans_len,
                                                  args.use_squad_v2)
                    saver.save(step, model, results[args.metric_name], device)
                    ema.resume(model)

                    # Log to console
                    results_str = ', '.join('{}: {:05.2f}'.format(k, v)
                                            for k, v in results.items())
                    log.info('Dev {}'.format(results_str))

                    # Log to TensorBoard
                    log.info('Visualizing in TensorBoard...')
                    for k, v in results.items():
                        tbx.add_scalar('dev/{}'.format(k), v, step)
                    util.visualize(tbx,
                                   pred_dict=pred_dict,
                                   eval_path=args.dev_eval_file,
                                   step=step,
                                   split='dev',
                                   num_visuals=args.num_visuals)
Example #11
0
def main():

    set_random_seed()

    # Arguments
    opt = args.get_setup_args()

    #cuda = True if torch.cuda.is_available() else False
    device, gpu_ids = util.get_available_devices()

    num_classes = opt.num_classes
    noise_dim = opt.latent_dim + opt.num_classes

    def weights_init(m):
        classname = m.__class__.__name__
        if classname.find('Conv') != -1:
            nn.init.normal_(m.weight.data, 0.0, 0.02)
        elif classname.find('BatchNorm') != -1:
            nn.init.normal_(m.weight.data, 1.0, 0.02)
            nn.init.constant_(m.bias.data, 0)

    train_images_path = os.path.join(opt.data_path, "train")
    val_images_path = os.path.join(opt.data_path, "val")
    output_model_path = os.path.join(opt.output_path, opt.version)
    output_train_images_path = os.path.join(opt.output_path, opt.version,
                                            "train")
    output_sample_images_path = os.path.join(opt.output_path, opt.version,
                                             "sample")
    output_nn_images_path = os.path.join(opt.output_path, opt.version, "nn")
    output_const_images_path = os.path.join(opt.output_path, opt.version,
                                            "constant_sample")

    os.makedirs(output_train_images_path, exist_ok=True)
    os.makedirs(output_sample_images_path, exist_ok=True)
    os.makedirs(output_nn_images_path, exist_ok=True)
    os.makedirs(output_const_images_path, exist_ok=True)

    train_set = datasets.ImageFolder(root=train_images_path,
                                     transform=transforms.Compose([
                                         transforms.Resize(
                                             (opt.img_size, opt.img_size)),
                                         transforms.ToTensor(),
                                         transforms.Normalize((0.5, 0.5, 0.5),
                                                              (0.5, 0.5, 0.5))
                                     ]))

    dataloader = torch.utils.data.DataLoader(train_set,
                                             batch_size=opt.batch_size,
                                             shuffle=True,
                                             num_workers=opt.num_workers)

    dataloader_nn = torch.utils.data.DataLoader(train_set,
                                                batch_size=1,
                                                num_workers=opt.num_workers)

    gen = fcgan.Generator(noise_dim).to(device)
    disc = fcgan.Discriminator(num_classes).to(device)

    gen.apply(weights_init)
    disc.apply(weights_init)

    optimG = optim.Adam(gen.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2))
    optimD = optim.Adam(disc.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2))
    #optimD = optim.SGD(disc.parameters(), lr=opt.lr_sgd)

    adversarial_loss = torch.nn.BCELoss()
    auxiliary_loss = torch.nn.CrossEntropyLoss()

    real_label_val = 1
    #real_label_smooth_val = 0.9
    real_label_low = 0.75
    real_label_high = 1.0
    fake_label_val = 0
    c_fake_label = opt.num_classes

    # Probability of adding label noise during discriminator training
    label_noise_prob = 0.05

    # Keep track of losses, accuracy, FID
    G_losses = []
    D_losses = []
    D_acc = []
    FIDs = []
    val_epochs = []

    # Define a fixed noise vector for consistent samples
    z_const = torch.randn(
        (num_classes * opt.num_sample_images, opt.latent_dim)).to(device)

    def print_labels():
        for class_name in train_set.classes:
            print("{} -> {}".format(class_name,
                                    train_set.class_to_idx[class_name]))

    def eval_fid(gen_images_path, eval_images_path):
        print("Calculating FID...")
        fid = fid_score.calculate_fid_given_paths(
            (gen_images_path, eval_images_path), opt.batch_size, device)
        return fid

    def validate(keep_images=True):
        # Put G in eval mode
        gen.eval()

        val_set = datasets.ImageFolder(root=val_images_path,
                                       transform=transforms.Compose([
                                           transforms.Resize(
                                               (opt.img_size, opt.img_size)),
                                           transforms.ToTensor()
                                       ]))

        val_loader = torch.utils.data.DataLoader(val_set,
                                                 batch_size=opt.batch_size,
                                                 shuffle=True,
                                                 num_workers=opt.num_workers)

        output_images_path = os.path.join(opt.output_path, opt.version, "val")
        os.makedirs(output_images_path, exist_ok=True)

        output_source_images_path = val_images_path + "_" + str(opt.img_size)
        source_images_available = True
        if (not os.path.exists(output_source_images_path)):
            os.makedirs(output_source_images_path)
            source_images_available = False

        images_done = 0
        for _, data in enumerate(val_loader, 0):
            images, labels = data
            batch_size = images.size(0)
            noise = torch.randn((batch_size, opt.latent_dim)).to(device)
            labels = torch.randint(0, num_classes, (batch_size, )).to(device)
            labels_onehot = F.one_hot(labels, num_classes)

            noise = torch.cat((noise, labels_onehot.to(dtype=torch.float)), 1)
            gen_images = gen(noise)
            for i in range(images_done, images_done + batch_size):
                vutils.save_image(gen_images[i - images_done, :, :, :],
                                  "{}/{}.jpg".format(output_images_path, i),
                                  normalize=True)
                if (not source_images_available):
                    vutils.save_image(images[i - images_done, :, :, :],
                                      "{}/{}.jpg".format(
                                          output_source_images_path, i),
                                      normalize=True)
            images_done += batch_size

        # Put G back in train mode
        gen.train()

        fid = eval_fid(output_images_path, output_source_images_path)
        if (not keep_images):
            print("Deleting images generated for validation...")
            rmtree(output_images_path)
        return fid

    def get_dist(img1, img2):
        return torch.dist(img1, img2, p=1)

    def get_nn(images, class_label):
        nn = [None] * len(images)
        dist = [np.inf] * len(images)
        for e, data in enumerate(dataloader_nn, 0):
            img, label = data
            if label != class_label:
                continue
            img = img.to(device)
            for i in range(len(images)):
                d = get_dist(images[i], img)
                if d < dist[i]:
                    dist[i] = d
                    nn[i] = img
        r = torch.stack(nn, dim=0).squeeze().to(device)
        #print(r.shape)
        return r

    def get_nearest_neighbour(sample_images, num_images):
        all_nn = []
        for i in range(num_classes):
            nearest_n = get_nn(
                sample_images[i * num_images:(i + 1) * num_images], i)
            class_nn = torch.stack([
                sample_images[i * num_images:(i + 1) * num_images], nearest_n
            ],
                                   dim=0).squeeze().view(
                                       -1, 3, opt.img_size,
                                       opt.img_size).to(device)
            all_nn.append(class_nn)
        #r = torch.stack(nn, dim=0).squeeze().view(-1, 3, opt.img_size, opt.img_size).to(device)
        #print(r.shape)
        return all_nn

    def get_onehot_labels(num_images):
        labels = torch.zeros(num_images, 1).to(device)
        for i in range(num_classes - 1):
            temp = torch.ones(num_images, 1).to(device) + i
            labels = torch.cat([labels, temp], 0)

        labels_onehot = torch.zeros(num_images * num_classes,
                                    num_classes).to(device)
        labels_onehot.scatter_(1, labels.to(torch.long), 1)

        return labels_onehot

    def sample_images(num_images, batches_done, isLast):
        # Sample noise - declared once at the top to maintain consistency of samples
        z = torch.randn((num_classes * num_images, opt.latent_dim)).to(device)
        '''
        labels = torch.zeros((num_classes * num_images,), dtype=torch.long).to(device)

        for i in range(num_classes):
            for j in range(num_images):
                labels[i*num_images + j] = i
        
        labels_onehot = F.one_hot(labels, num_classes)        
        '''

        labels_onehot = get_onehot_labels(num_images)
        z = torch.cat((z, labels_onehot.to(dtype=torch.float)), 1)
        sample_imgs = gen(z)
        z_const_cat = torch.cat((z_const, labels_onehot.to(dtype=torch.float)),
                                1)
        const_sample_imgs = gen(z_const_cat)
        vutils.save_image(sample_imgs.data,
                          "{}/{}.png".format(output_sample_images_path,
                                             batches_done),
                          nrow=num_images,
                          padding=2,
                          normalize=True)
        vutils.save_image(const_sample_imgs.data,
                          "{}/{}.png".format(output_const_images_path,
                                             batches_done),
                          nrow=num_images,
                          padding=2,
                          normalize=True)

        if isLast:
            print(
                "Estimating nearest neighbors for the last samples, this takes a few minutes..."
            )
            nearest_neighbour_imgs_list = get_nearest_neighbour(
                sample_imgs, num_images)
            for label, nn_imgs in enumerate(nearest_neighbour_imgs_list):
                vutils.save_image(nn_imgs.data,
                                  "{}/{}_{}.png".format(
                                      output_nn_images_path, batches_done,
                                      label),
                                  nrow=num_images,
                                  padding=2,
                                  normalize=True)
            nearest_neighbour_imgs_list = get_nearest_neighbour(
                const_sample_imgs, num_images)
            for label, nn_imgs in enumerate(nearest_neighbour_imgs_list):
                vutils.save_image(nn_imgs.data,
                                  "{}/const_{}_{}.png".format(
                                      output_nn_images_path, batches_done,
                                      label),
                                  nrow=num_images,
                                  padding=2,
                                  normalize=True)
            print("Saved nearest neighbors.")

    def save_loss_plot(path):
        plt.figure(figsize=(10, 5))
        plt.title("Generator and Discriminator Loss During Training")
        plt.plot(G_losses, label="G")
        plt.plot(D_losses, label="D")
        plt.xlabel("iterations")
        plt.ylabel("Loss")
        plt.legend()
        plt.savefig(path)
        plt.close()

    def save_acc_plot(path):
        plt.figure(figsize=(10, 5))
        plt.title("Discriminator Accuracy")
        plt.plot(D_acc)
        plt.xlabel("iterations")
        plt.ylabel("accuracy")
        plt.savefig(path)
        plt.close()

    def save_fid_plot(FIDs, epochs, path):
        #N = len(FIDs)
        plt.figure(figsize=(10, 5))
        plt.title("FID on Validation Set")
        plt.plot(epochs, FIDs)
        plt.xlabel("epochs")
        plt.ylabel("FID")
        #plt.xticks([i * 49 for i in range(1, N+1)])
        plt.savefig(path)
        plt.close()

    def expectation_loss(real_feature, fake_feature):
        norm = torch.norm(real_feature - fake_feature)
        total = torch.abs(norm).sum()
        return norm / total

    print("Label to class mapping:")
    print_labels()

    for epoch in range(1, opt.num_epochs + 1):
        for i, data in enumerate(dataloader, 0):

            images, class_labels = data
            images = images.to(device)
            class_labels = class_labels.to(device)

            batch_size = images.size(0)

            #real_label_smooth = torch.full((batch_size,), real_label_smooth_val, device=device)
            real_label_smooth = (
                real_label_low - real_label_high) * torch.rand(
                    (batch_size, ), device=device) + real_label_high
            real_label = torch.full((batch_size, ),
                                    real_label_val,
                                    device=device)
            fake_label = torch.full((batch_size, ),
                                    fake_label_val,
                                    device=device)

            ############################
            # Train Discriminator
            ###########################

            ## Train with all-real batch

            optimD.zero_grad()

            real_pred, real_aux = disc(images)

            mask = torch.rand(
                (batch_size, ), device=device) <= label_noise_prob
            mask = mask.type(torch.float)
            noisy_label = torch.mul(1 - mask, real_label_smooth) + torch.mul(
                mask, fake_label)

            d_real_loss = (adversarial_loss(real_pred, noisy_label) +
                           auxiliary_loss(real_aux, class_labels)) / 2

            # Train with fake batch
            noise = torch.randn((batch_size, opt.latent_dim)).to(device)
            gen_class_labels = torch.randint(0, num_classes,
                                             (batch_size, )).to(device)
            gen_class_labels_onehot = F.one_hot(gen_class_labels, num_classes)

            noise = torch.cat(
                (noise, gen_class_labels_onehot.to(dtype=torch.float)), 1)
            gen_images = gen(noise)
            fake_pred, fake_aux = disc(gen_images.detach())

            mask = torch.rand(
                (batch_size, ), device=device) <= label_noise_prob
            mask = mask.type(torch.float)
            noisy_label = torch.mul(1 - mask, fake_label) + torch.mul(
                mask, real_label_smooth)

            c_fake = c_fake_label * torch.ones_like(gen_class_labels).to(
                device)
            d_fake_loss = (adversarial_loss(fake_pred, noisy_label) +
                           auxiliary_loss(fake_aux, c_fake)) / 2

            # Total discriminator loss
            d_loss = (d_real_loss + d_fake_loss) / 2

            # Calculate discriminator accuracy
            pred = np.concatenate(
                [real_aux.data.cpu().numpy(),
                 fake_aux.data.cpu().numpy()],
                axis=0)
            gt = np.concatenate([
                class_labels.data.cpu().numpy(),
                gen_class_labels.data.cpu().numpy()
            ],
                                axis=0)
            d_acc = np.mean(np.argmax(pred, axis=1) == gt)

            d_loss.backward()
            optimD.step()

            ############################
            # Train Generator
            ###########################

            optimG.zero_grad()

            validity, aux_scores = disc(gen_images)
            g_loss = 0.5 * (adversarial_loss(validity, real_label) +
                            auxiliary_loss(aux_scores, gen_class_labels)
                            )  # + expectation_loss(gen_features, r_f1)

            g_loss.backward()
            optimG.step()

            # Save losses and accuracy for plotting
            G_losses.append(g_loss.item())
            D_losses.append(d_loss.item())
            D_acc.append(d_acc)

            # Output training stats
            if i % opt.print_every == 0:
                print(
                    "[Epoch %d/%d] [Batch %d/%d] [D loss: %.4f, acc:  %d%%] [G loss: %.4f]"
                    % (epoch, opt.num_epochs, i, len(dataloader),
                       d_loss.item(), 100 * d_acc, g_loss.item()))

            batches_done = epoch * len(dataloader) + i

            # Generate and save sample images
            isLast = ((epoch == opt.num_epochs - 1)
                      and (i == len(dataloader) - 1))
            if (batches_done % opt.sample_interval == 0) or isLast:
                # Put G in eval mode
                gen.eval()

                with torch.no_grad():
                    sample_images(opt.num_sample_images, batches_done, isLast)
                vutils.save_image(gen_images.data[:36],
                                  "{}/{}.png".format(output_train_images_path,
                                                     batches_done),
                                  nrow=6,
                                  padding=2,
                                  normalize=True)

                # Put G back in train mode
                gen.train()

        # Save model checkpoint
        if (epoch != opt.num_epochs and epoch % opt.checkpoint_epochs == 0):
            print("Checkpoint at epoch {}".format(epoch))

            print("Saving G & D loss plot...")
            save_loss_plot(
                os.path.join(opt.output_path, opt.version,
                             "loss_plot_{}.png".format(epoch)))
            print("Saving D accuracy plot...")
            save_acc_plot(
                os.path.join(opt.output_path, opt.version,
                             "accuracy_plot_{}.png".format(epoch)))

            print("Validating model...")
            with torch.no_grad():
                fid = validate(keep_images=False)
            print("Validation FID: {}".format(fid))
            with open(os.path.join(opt.output_path, opt.version, "FIDs.txt"),
                      "a") as f:
                f.write("Epoch: {}, FID: {}\n".format(epoch, fid))
            FIDs.append(fid)
            val_epochs.append(epoch)
            print("Saving FID plot...")
            save_fid_plot(
                FIDs, val_epochs,
                os.path.join(opt.output_path, opt.version,
                             "fid_plot_{}.png".format(epoch)))

            print("Saving model checkpoint...")
            torch.save(
                {
                    'epoch': epoch,
                    'g_state_dict': gen.state_dict(),
                    'd_state_dict': disc.state_dict(),
                    'g_optimizer_state_dict': optimG.state_dict(),
                    'd_optimizer_state_dict': optimD.state_dict(),
                    'g_loss': g_loss.item(),
                    'd_loss': d_loss.item(),
                    'd_accuracy': d_acc,
                    'val_fid': fid
                },
                os.path.join(output_model_path,
                             "model_checkpoint_{}.tar".format(epoch)))

    print("Saving final G & D loss plot...")
    save_loss_plot(os.path.join(opt.output_path, opt.version, "loss_plot.png"))
    print("Done!")

    print("Saving final D accuracy plot...")
    save_acc_plot(
        os.path.join(opt.output_path, opt.version, "accuracy_plot.png"))
    print("Done!")

    print("Validating final model...")
    gen.eval()
    with torch.no_grad():
        fid = validate()
    print("Final Validation FID: {}".format(fid))
    with open(os.path.join(opt.output_path, opt.version, "FIDs.txt"),
              "a") as f:
        f.write("Epoch: {}, FID: {}\n".format(epoch, fid))
    FIDs.append(fid)
    val_epochs.append(epoch)
    print("Saving final FID plot...")
    save_fid_plot(FIDs, val_epochs,
                  os.path.join(opt.output_path, opt.version, "fid_plot"))
    print("Done!")

    print("Saving final model...")
    torch.save(
        {
            'epoch': epoch,
            'g_state_dict': gen.state_dict(),
            'd_state_dict': disc.state_dict(),
            'g_optimizer_state_dict': optimG.state_dict(),
            'd_optimizer_state_dict': optimD.state_dict(),
            'g_loss': g_loss.item(),
            'd_loss': d_loss.item(),
            'd_accuracy': d_acc,
            'val_fid': fid
        }, os.path.join(output_model_path, "model.tar"))
    print("Done!")
Example #12
0
def train_QaNet(args):
    device, args.gpu_ids = util.get_available_devices()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    word_mat = util.torch_from_json(args.word_emb_file)
    char_mat = util.torch_from_json(args.char_emb_file)

    with open(args.dev_eval_file, 'r') as fh:
        dev_eval_file = json_load(fh)

    print("Building model...")

    train_dataset = SQuAD(args.train_record_file, args.use_squad_v2)
    train_dataset = data.DataLoader(train_dataset,
                                    batch_size=args.batch_size,
                                    shuffle=True,
                                    num_workers=args.num_workers,
                                    collate_fn=collate_fn)
    dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2)
    dev_dataset = data.DataLoader(dev_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=False,
                                  num_workers=args.num_workers,
                                  collate_fn=collate_fn)

    lr = args.lr
    base_lr = 1
    lr_warm_up_num = args.lr_warm_up_num

    model = QaNet(word_mat, char_mat, args.connector_dim, args.glove_dim,
                  args.char_dim, args.drop_prob, args.dropout_char,
                  args.num_heads).to(device)
    ema = util.EMA(model, args.ema_decay)

    parameters = filter(lambda param: param.requires_grad, model.parameters())
    optimizer = optim.Adam(lr=base_lr,
                           betas=(0.9, 0.999),
                           eps=1e-7,
                           weight_decay=5e-8,
                           params=parameters)
    cr = lr / math.log2(lr_warm_up_num)
    scheduler = optim.lr_scheduler.LambdaLR(
        optimizer,
        lr_lambda=lambda ee: cr * math.log2(ee + 1)
        if ee < lr_warm_up_num else lr)

    best_f1 = 0
    best_em = 0
    patience = 0
    unused = False
    for iter in range(args.num_epochs):

        train(model, optimizer, scheduler, train_dataset, dev_dataset,
              dev_eval_file, iter, ema, device)

        ema.assign(model)
        metrics = test(model, dev_dataset, dev_eval_file,
                       (iter + 1) * len(train_dataset))
        dev_f1 = metrics["f1"]
        dev_em = metrics["exact_match"]
        if dev_f1 < best_f1 and dev_em < best_em:
            patience += 1
            if patience > args.early_stop:
                break
        else:
            patience = 0
            best_f1 = max(best_f1, dev_f1)
            best_em = max(best_em, dev_em)

        fn = os.path.join(args.save_dir, "model.pt")
        torch.save(model, fn)
        ema.resume(model)
Example #13
0
def main(args):
    args.save_dir = util.get_save_dir(args.save_dir,
                                      "exp1_training",
                                      training=False)
    log = get_logger(args.logging_dir, "exp1_training")
    log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}')
    device, gpu_ids = util.get_available_devices()
    args.batch_size *= max(1, len(gpu_ids))

    # Get embeddings
    log.info('Loading embeddings...')
    word_vectors = util.torch_from_json(args.word_emb_file)

    # Get model
    log.info('Building model...')
    model = BiDAF(word_vectors=word_vectors, hidden_size=args.hidden_size)
    model = nn.DataParallel(model, gpu_ids)

    log.info(f'Loading checkpoint from {args.load_path}...')
    model = util.load_model(model, c.load_path, gpu_ids, return_step=False)
    model = model.to(device)
    model.eval()

    # Get data loader
    log.info('Building dataset...')
    dataset = SQuAD(args.test_record_file, True)
    data_loader = data.DataLoader(dataset,
                                  batch_size=args.batch_size,
                                  shuffle=False,
                                  num_workers=args.num_workers,
                                  collate_fn=collate_fn)

    # Evaluate
    log.info(f'Evaluating on {args.datasplit} split...')
    nll_meter = util.AverageMeter()
    pred_dict = {}  # Predictions for TensorBoard
    sub_dict = {}  # Predictions for submission
    with open(args.test_eval_file, 'r') as fh:
        gold_dict = json_load(fh)
    with torch.no_grad(), \
            tqdm(total=len(dataset)) as progress_bar:
        for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in data_loader:
            # Setup for forward
            cw_idxs = cw_idxs.to(device)
            qw_idxs = qw_idxs.to(device)
            batch_size = cw_idxs.size(0)

            # Forward
            log_p1, log_p2 = model(cw_idxs, qw_idxs)
            y1, y2 = y1.to(device), y2.to(device)
            loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
            nll_meter.update(loss.item(), batch_size)

            # Get F1 and EM scores
            p1, p2 = log_p1.exp(), log_p2.exp()
            starts, ends = util.discretize(p1, p2, c.max_ans_len, True)

            # Log info
            progress_bar.update(batch_size)

            # Not using the unlabeled test set
            #            if args.split != 'test':
            #                # No labels for the test set, so NLL would be invalid
            #                progress_bar.set_postfix(NLL=nll_meter.avg)

            idx2pred, uuid2pred = util.convert_tokens(gold_dict, ids.tolist(),
                                                      starts.tolist(),
                                                      ends.tolist(), True)
            pred_dict.update(idx2pred)
            sub_dict.update(uuid2pred)

    # Log results (except for test set, since it does not come with labels)
    results = util.eval_dicts(gold_dict, pred_dict, True)
    results_list = [('NLL', nll_meter.avg), ('F1', results['F1']),
                    ('EM', results['EM'])]
    results_list.append(('AvNA', results['AvNA']))
    results = OrderedDict(results_list)
    # Log to console
    results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items())
    log.info(f'{args.datasplit} {results_str}')
    # Log to TensorBoard
    tbx = SummaryWriter(c.save_dir)
    util.visualize(tbx,
                   pred_dict=pred_dict,
                   eval_path=args.test_eval_file,
                   step=0,
                   split=args.datasplit,
                   num_visuals=args.num_visuals)
Example #14
0
def create_training_function(args, experiment_save_dir, k_fold_spits=None):
    device, args.gpu_ids = util.get_available_devices()
    args.batch_size *= max(1, len(args.gpu_ids))
    word_vectors, char_vectors = train.load_embeddings(args)
    training_dataset = util.SQuAD(
        util.preprocessed_path(args.train_record_file, args.data_dir,
                               args.dataset), args.use_squad_v2)
    eval_dataset = util.SQuAD(
        util.preprocessed_path(args.dev_record_file, args.data_dir,
                               args.dataset), args.use_squad_v2)
    train_gold_dict = util.load_eval_file(args, args.train_eval_file)
    eval_gold_dict = util.load_eval_file(args, args.dev_eval_file)

    k_fold_spits = args.k_fold
    min_nll_decrease = args.min_nll_decrease

    def process_sample(sample, model, gold_dict=None):
        cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids = sample
        batch_size = cw_idxs.size(0)
        log_p1, log_p2 = model(cw_idxs.to(device), cc_idxs.to(device),
                               qw_idxs.to(device), qc_idxs.to(device))
        y1, y2 = y1.to(device), y2.to(device)
        nll_loss_1 = F.nll_loss(log_p1, y1)
        nll_loss_2 = F.nll_loss(log_p2, y2)
        loss = nll_loss_1 + nll_loss_2
        preds = None
        if gold_dict:
            p1, p2 = log_p1.exp(), log_p2.exp()
            starts, ends = util.discretize(p1, p2, args.max_ans_len,
                                           args.use_squad_v2)
            preds, _ = util.convert_tokens(gold_dict, ids.tolist(),
                                           starts.tolist(), ends.tolist(),
                                           args.use_squad_v2)

        return loss, batch_size, preds

    def run_experiment(tbx, train_loader, train_size, eval_loader, eval_size,
                       gold_dict, config):
        from models import init_training
        max_grad_norm = args.max_grad_norm
        model, optimizer, scheduler, ema, step = init_training(
            args, word_vectors, char_vectors, device, config)

        prev_epoch_avg_nll = None
        for epoch in range(step, args.num_epochs):
            model.train()
            epoch_avg_nll = util.AverageMeter()
            with torch.enable_grad(), tqdm(total=train_size) as progress_bar:
                for sample in train_loader:
                    loss, batch_size, _ = process_sample(sample, model, None)
                    nll = loss.item()
                    epoch_avg_nll.update(nll)
                    tbx.add_scalar('train/NLL', loss.item(), step)
                    current_lr = optimizer.param_groups[0]['lr']
                    tbx.add_scalar('train/LR', current_lr, step)
                    loss.backward()
                    nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
                    optimizer.step()
                    scheduler.step()

                    ema(model, step // batch_size)
                    progress_bar.update(batch_size)
                    progress_bar.set_postfix(epoch=epoch,
                                             STEP=util.millify(step),
                                             LR=current_lr,
                                             NLL=nll)
                    step += batch_size

            model.eval()
            ema.assign(model)
            results, pred_dict = evaluate(model, eval_loader, eval_size,
                                          gold_dict)
            ema.resume(model)

            tbx.add_scalar('eval/NLL', results['NLL'], step)
            if 'AvNA' in results:
                tbx.add_scalar('eval/AvNA', results['AvNA'], step)
            tbx.add_scalar('eval/F1', results['F1'], step)
            tbx.add_scalar('eval/EM', results['EM'], step)

            dev_eval_file = util.preprocessed_path(args.dev_eval_file,
                                                   args.data_dir, args.dataset)
            util.visualize(tbx,
                           pred_dict=pred_dict,
                           eval_dict=gold_dict,
                           step=step,
                           split='eval',
                           num_visuals=args.num_visuals)

            if ((min_nll_decrease is not None)
                    and (prev_epoch_avg_nll is not None) and
                (epoch_avg_nll.avg > prev_epoch_avg_nll - min_nll_decrease)):
                print(
                    f"Avg NLL {epoch_avg_nll.avg:.2f} > {prev_epoch_avg_nll:.2f} - {(min_nll_decrease):.2f}. Break"
                )
                break
            prev_epoch_avg_nll = epoch_avg_nll.avg

        return model, step

    def evaluate(model, eval_loader, eval_size, gold_dict):
        pred_dict = {}
        with torch.no_grad(), tqdm(total=eval_size) as progress_bar:
            nll_meter = util.AverageMeter()
            for sample in eval_loader:
                loss, batch_size, preds = process_sample(
                    sample, model, gold_dict)
                nll_meter.update(loss.item(), batch_size)
                pred_dict.update(preds)
                progress_bar.update(batch_size)
                progress_bar.set_postfix(NLL=nll_meter.avg)

            results = {
                **util.eval_dicts(gold_dict, pred_dict, args.use_squad_v2),
                **{
                    'NLL': nll_meter.avg
                }
            }
        return results, pred_dict

    def kfold_training_function(experiment, config):
        avg_meter = util.MultiAverageMeter(['F1', 'EM', 'AvNA', 'NLL'])
        gold_dict = train_gold_dict
        for fold_index, train_loader, train_size, test_loader, test_size in kfold_generator(
                args, k_fold_spits, training_dataset):
            save_dir = os.path.join(experiment_save_dir,
                                    *GridSearch.experiment_path(experiment),
                                    f"fold={fold_index + 1}")
            tbx = SummaryWriter(save_dir)

            model, steps = run_experiment(tbx, train_loader, train_size,
                                          test_loader, test_size, gold_dict,
                                          config)
            results, _ = evaluate(model, test_loader, test_size, gold_dict)
            avg_meter.update(results, steps)

        return {**experiment, **avg_meter.avg}

    def training_function(experiment, config):
        import torch.utils.data as data
        train_loader = data.DataLoader(training_dataset,
                                       shuffle=True,
                                       batch_size=args.batch_size,
                                       num_workers=args.num_workers,
                                       collate_fn=None)
        eval_loader = data.DataLoader(eval_dataset,
                                      shuffle=False,
                                      batch_size=args.batch_size,
                                      num_workers=args.num_workers,
                                      collate_fn=None)
        save_dir = os.path.join(experiment_save_dir,
                                *GridSearch.experiment_path(experiment))
        tbx = SummaryWriter(save_dir)

        train_size = len(training_dataset)
        eval_size = len(eval_dataset)
        model, steps = run_experiment(tbx, train_loader, train_size,
                                      eval_loader, eval_size, eval_gold_dict,
                                      config)
        results, _ = evaluate(model, eval_loader, eval_size, eval_gold_dict)

        return {**experiment, **results}

    return kfold_training_function if k_fold_spits is not None else training_function
Example #15
0
def main(args):
    # Set up logging
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=False)
    log = util.get_logger(args.save_dir, args.name)
    log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}')
    device, gpu_ids = util.get_available_devices()
    args.batch_size *= max(1, len(gpu_ids))

    # Get embeddings
    log.info('Loading embeddings...')
    word_vectors = util.torch_from_json(args.word_emb_file)
    char_vectors = util.torch_from_json(args.char_emb_file)
    # Get model
    log.info('Building model...')
    nbr_model = 0
    if (args.load_path_baseline):
        model_baseline = Baseline(word_vectors=word_vectors, hidden_size=100)
        model_baseline = nn.DataParallel(model_baseline, gpu_ids)
        log.info(f'Loading checkpoint from {args.load_path_baseline}...')
        model_baseline = util.load_model(model_baseline,
                                         args.load_path_baseline,
                                         gpu_ids,
                                         return_step=False)
        model_baseline = model_baseline.to(device)
        model_baseline.eval()
        nll_meter_baseline = util.AverageMeter()
        nbr_model += 1
        save_prob_baseline_start = []
        save_prob_baseline_end = []

    if (args.load_path_bidaf):
        model_bidaf = BiDAF(word_vectors=word_vectors,
                            char_vectors=char_vectors,
                            char_emb_dim=args.char_emb_dim,
                            hidden_size=args.hidden_size)
        model_bidaf = nn.DataParallel(model_bidaf, gpu_ids)
        log.info(f'Loading checkpoint from {args.load_path_bidaf}...')
        model_bidaf = util.load_model(model_bidaf,
                                      args.load_path_bidaf,
                                      gpu_ids,
                                      return_step=False)
        model_bidaf = model_bidaf.to(device)
        model_bidaf.eval()
        nll_meter_bidaf = util.AverageMeter()
        nbr_model += 1
        save_prob_bidaf_start = []
        save_prob_bidaf_end = []

    if (args.load_path_bidaf_fusion):
        model_bidaf_fu = BiDAF_fus(word_vectors=word_vectors,
                                   char_vectors=char_vectors,
                                   char_emb_dim=args.char_emb_dim,
                                   hidden_size=args.hidden_size)
        model_bidaf_fu = nn.DataParallel(model_bidaf_fu, gpu_ids)
        log.info(f'Loading checkpoint from {args.load_path_bidaf_fusion}...')
        model_bidaf_fu = util.load_model(model_bidaf_fu,
                                         args.load_path_bidaf_fusion,
                                         gpu_ids,
                                         return_step=False)
        model_bidaf_fu = model_bidaf_fu.to(device)
        model_bidaf_fu.eval()
        nll_meter_bidaf_fu = util.AverageMeter()
        nbr_model += 1
        save_prob_bidaf_fu_start = []
        save_prob_bidaf_fu_end = []

    if (args.load_path_qanet):
        model_qanet = QANet(word_vectors=word_vectors,
                            char_vectors=char_vectors,
                            char_emb_dim=args.char_emb_dim,
                            hidden_size=args.hidden_size,
                            n_heads=args.n_heads,
                            n_conv_emb_enc=args.n_conv_emb,
                            n_conv_mod_enc=args.n_conv_mod,
                            n_emb_enc_blocks=args.n_emb_blocks,
                            n_mod_enc_blocks=args.n_mod_blocks,
                            divisor_dim_kqv=args.divisor_dim_kqv)

        model_qanet = nn.DataParallel(model_qanet, gpu_ids)
        log.info(f'Loading checkpoint from {args.load_path_qanet}...')
        model_qanet = util.load_model(model_qanet,
                                      args.load_path_qanet,
                                      gpu_ids,
                                      return_step=False)
        model_qanet = model_qanet.to(device)
        model_qanet.eval()
        nll_meter_qanet = util.AverageMeter()
        nbr_model += 1
        save_prob_qanet_start = []
        save_prob_qanet_end = []

    if (args.load_path_qanet_old):
        model_qanet_old = QANet_old(word_vectors=word_vectors,
                                    char_vectors=char_vectors,
                                    device=device,
                                    char_emb_dim=args.char_emb_dim,
                                    hidden_size=args.hidden_size,
                                    n_heads=args.n_heads,
                                    n_conv_emb_enc=args.n_conv_emb,
                                    n_conv_mod_enc=args.n_conv_mod,
                                    n_emb_enc_blocks=args.n_emb_blocks,
                                    n_mod_enc_blocks=args.n_mod_blocks)

        model_qanet_old = nn.DataParallel(model_qanet_old, gpu_ids)
        log.info(f'Loading checkpoint from {args.load_path_qanet_old}...')
        model_qanet_old = util.load_model(model_qanet_old,
                                          args.load_path_qanet_old,
                                          gpu_ids,
                                          return_step=False)
        model_qanet_old = model_qanet_old.to(device)
        model_qanet_old.eval()
        nll_meter_qanet_old = util.AverageMeter()
        nbr_model += 1
        save_prob_qanet_old_start = []
        save_prob_qanet_old_end = []

    if (args.load_path_qanet_inde):
        model_qanet_inde = QANet_independant_encoder(
            word_vectors=word_vectors,
            char_vectors=char_vectors,
            char_emb_dim=args.char_emb_dim,
            hidden_size=args.hidden_size,
            n_heads=args.n_heads,
            n_conv_emb_enc=args.n_conv_emb,
            n_conv_mod_enc=args.n_conv_mod,
            n_emb_enc_blocks=args.n_emb_blocks,
            n_mod_enc_blocks=args.n_mod_blocks,
            divisor_dim_kqv=args.divisor_dim_kqv)

        model_qanet_inde = nn.DataParallel(model_qanet_inde, gpu_ids)
        log.info(f'Loading checkpoint from {args.load_path_qanet_inde}...')
        model_qanet_inde = util.load_model(model_qanet_inde,
                                           args.load_path_qanet_inde,
                                           gpu_ids,
                                           return_step=False)
        model_qanet_inde = model_qanet_inde.to(device)
        model_qanet_inde.eval()
        nll_meter_qanet_inde = util.AverageMeter()
        nbr_model += 1
        save_prob_qanet_inde_start = []
        save_prob_qanet_inde_end = []

    if (args.load_path_qanet_s_e):
        model_qanet_s_e = QANet_S_E(word_vectors=word_vectors,
                                    char_vectors=char_vectors,
                                    char_emb_dim=args.char_emb_dim,
                                    hidden_size=args.hidden_size,
                                    n_heads=args.n_heads,
                                    n_conv_emb_enc=args.n_conv_emb,
                                    n_conv_mod_enc=args.n_conv_mod,
                                    n_emb_enc_blocks=args.n_emb_blocks,
                                    n_mod_enc_blocks=args.n_mod_blocks,
                                    divisor_dim_kqv=args.divisor_dim_kqv)

        model_qanet_s_e = nn.DataParallel(model_qanet_s_e, gpu_ids)
        log.info(f'Loading checkpoint from {args.load_path_qanet_s_e}...')
        model_qanet_s_e = util.load_model(model_qanet_s_e,
                                          args.load_path_qanet_s_e,
                                          gpu_ids,
                                          return_step=False)
        model_qanet_s_e = model_qanet_s_e.to(device)
        model_qanet_s_e.eval()
        nll_meter_qanet_s_e = util.AverageMeter()
        nbr_model += 1
        save_prob_qanet_s_e_start = []
        save_prob_qanet_s_e_end = []

    # Get data loader
    log.info('Building dataset...')
    record_file = vars(args)[f'{args.split}_record_file']
    dataset = SQuAD(record_file, args.use_squad_v2)
    data_loader = data.DataLoader(dataset,
                                  batch_size=args.batch_size,
                                  shuffle=False,
                                  num_workers=args.num_workers,
                                  collate_fn=collate_fn)

    # Evaluate
    log.info(f'Evaluating on {args.split} split...')
    pred_dict = {}  # Predictions for TensorBoard
    sub_dict = {}  # Predictions for submission
    eval_file = vars(args)[f'{args.split}_eval_file']
    with open(eval_file, 'r') as fh:
        gold_dict = json_load(fh)
    with torch.no_grad(), \
            tqdm(total=len(dataset)) as progress_bar:
        for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in data_loader:
            # Setup for forward
            cw_idxs = cw_idxs.to(device)
            qw_idxs = qw_idxs.to(device)
            cc_idxs = cc_idxs.to(device)
            qc_idxs = qc_idxs.to(device)
            batch_size = cw_idxs.size(0)

            y1, y2 = y1.to(device), y2.to(device)
            l_p1, l_p2 = [], []
            # Forward
            if (args.load_path_baseline):
                log_p1_baseline, log_p2_baseline = model_baseline(
                    cw_idxs, cc_idxs)
                loss_baseline = F.nll_loss(log_p1_baseline, y1) + F.nll_loss(
                    log_p2_baseline, y2)
                nll_meter_baseline.update(loss_baseline.item(), batch_size)
                l_p1 += [log_p1_baseline.exp()]
                l_p2 += [log_p2_baseline.exp()]
                if (args.save_probabilities):
                    save_prob_baseline_start += [
                        log_p1_baseline.exp().detach().cpu().numpy()
                    ]
                    save_prob_baseline_end += [
                        log_p2_baseline.exp().detach().cpu().numpy()
                    ]

            if (args.load_path_qanet):
                log_p1_qanet, log_p2_qanet = model_qanet(
                    cw_idxs, cc_idxs, qw_idxs, qc_idxs)
                loss_qanet = F.nll_loss(log_p1_qanet, y1) + F.nll_loss(
                    log_p2_qanet, y2)
                nll_meter_qanet.update(loss_qanet.item(), batch_size)
                # Get F1 and EM scores
                l_p1 += [log_p1_qanet.exp()]
                l_p2 += [log_p2_qanet.exp()]
                if (args.save_probabilities):
                    save_prob_qanet_start += [
                        log_p1_qanet.exp().detach().cpu().numpy()
                    ]
                    save_prob_qanet_end += [
                        log_p2_qanet.exp().detach().cpu().numpy()
                    ]

            if (args.load_path_qanet_old):
                log_p1_qanet_old, log_p2_qanet_old = model_qanet_old(
                    cw_idxs, cc_idxs, qw_idxs, qc_idxs)
                loss_qanet_old = F.nll_loss(log_p1_qanet_old, y1) + F.nll_loss(
                    log_p2_qanet_old, y2)
                nll_meter_qanet_old.update(loss_qanet_old.item(), batch_size)
                # Get F1 and EM scores
                l_p1 += [log_p1_qanet_old.exp()]
                l_p2 += [log_p2_qanet_old.exp()]
                if (args.save_probabilities):
                    save_prob_qanet_old_start += [
                        log_p1_qanet_old.exp().detach().cpu().numpy()
                    ]
                    save_prob_qanet_old_end += [
                        log_p2_qanet_old.exp().detach().cpu().numpy()
                    ]

            if (args.load_path_qanet_inde):
                log_p1_qanet_inde, log_p2_qanet_inde = model_qanet_inde(
                    cw_idxs, cc_idxs, qw_idxs, qc_idxs)
                loss_qanet_inde = F.nll_loss(
                    log_p1_qanet_inde, y1) + F.nll_loss(log_p2_qanet_inde, y2)
                nll_meter_qanet_inde.update(loss_qanet_inde.item(), batch_size)
                # Get F1 and EM scores
                l_p1 += [log_p1_qanet_inde.exp()]
                l_p2 += [log_p2_qanet_inde.exp()]
                if (args.save_probabilities):
                    save_prob_qanet_inde_start += [
                        log_p1_qanet_inde.exp().detach().cpu().numpy()
                    ]
                    save_prob_qanet_inde_end += [
                        log_p2_qanet_inde.exp().detach().cpu().numpy()
                    ]

            if (args.load_path_qanet_s_e):
                log_p1_qanet_s_e, log_p2_qanet_s_e = model_qanet_s_e(
                    cw_idxs, cc_idxs, qw_idxs, qc_idxs)
                loss_qanet_s_e = F.nll_loss(log_p1_qanet_s_e, y1) + F.nll_loss(
                    log_p2_qanet_s_e, y2)
                nll_meter_qanet_s_e.update(loss_qanet_s_e.item(), batch_size)
                # Get F1 and EM scores
                l_p1 += [log_p1_qanet_s_e.exp()]
                l_p2 += [log_p2_qanet_s_e.exp()]
                if (args.save_probabilities):
                    save_prob_qanet_s_e_start += [
                        log_p1_qanet_s_e.exp().detach().cpu().numpy()
                    ]
                    save_prob_qanet_s_e_end += [
                        log_p2_qanet_s_e.exp().detach().cpu().numpy()
                    ]

            if (args.load_path_bidaf):
                log_p1_bidaf, log_p2_bidaf = model_bidaf(
                    cw_idxs, cc_idxs, qw_idxs, qc_idxs)
                loss_bidaf = F.nll_loss(log_p1_bidaf, y1) + F.nll_loss(
                    log_p2_bidaf, y2)
                nll_meter_bidaf.update(loss_bidaf.item(), batch_size)
                l_p1 += [log_p1_bidaf.exp()]
                l_p2 += [log_p2_bidaf.exp()]
                if (args.save_probabilities):
                    save_prob_bidaf_start += [
                        log_p1_bidaf.exp().detach().cpu().numpy()
                    ]
                    save_prob_bidaf_end += [
                        log_p2_bidaf.exp().detach().cpu().numpy()
                    ]

            if (args.load_path_bidaf_fusion):
                log_p1_bidaf_fu, log_p2_bidaf_fu = model_bidaf_fu(
                    cw_idxs, cc_idxs, qw_idxs, qc_idxs)
                loss_bidaf_fu = F.nll_loss(log_p1_bidaf_fu, y1) + F.nll_loss(
                    log_p2_bidaf_fu, y2)
                nll_meter_bidaf_fu.update(loss_bidaf_fu.item(), batch_size)
                l_p1 += [log_p1_bidaf_fu.exp()]
                l_p2 += [log_p2_bidaf_fu.exp()]
                if (args.save_probabilities):
                    save_prob_bidaf_fu_start += [
                        log_p1_bidaf_fu.exp().detach().cpu().numpy()
                    ]
                    save_prob_bidaf_fu_end += [
                        log_p2_bidaf_fu.exp().detach().cpu().numpy()
                    ]

            p1, p2 = l_p1[0], l_p2[0]
            for i in range(1, nbr_model):
                p1 += l_p1[i]
                p2 += l_p2[i]
            p1 /= nbr_model
            p2 /= nbr_model

            starts, ends = util.discretize(p1, p2, args.max_ans_len,
                                           args.use_squad_v2)

            # Log info
            progress_bar.update(batch_size)
            if args.split != 'test':
                # No labels for the test set, so NLL would be invalid
                if (args.load_path_qanet):
                    progress_bar.set_postfix(NLL=nll_meter_qanet.avg)
                elif (args.load_path_bidaf):
                    progress_bar.set_postfix(NLL=nll_meter_bidaf.avg)
                elif (args.load_path_bidaf_fusion):
                    progress_bar.set_postfix(NLL=nll_meter_bidaf_fu.avg)
                elif (args.load_path_qanet_old):
                    progress_bar.set_postfix(NLL=nll_meter_qanet_old.avg)
                elif (args.load_path_qanet_inde):
                    progress_bar.set_postfix(NLL=nll_meter_qanet_inde.avg)
                elif (args.load_path_qanet_s_e):
                    progress_bar.set_postfix(NLL=nll_meter_qanet_s_e.avg)
                else:
                    progress_bar.set_postfix(NLL=nll_meter_baseline.avg)

            idx2pred, uuid2pred = util.convert_tokens(gold_dict, ids.tolist(),
                                                      starts.tolist(),
                                                      ends.tolist(),
                                                      args.use_squad_v2)
            pred_dict.update(idx2pred)
            sub_dict.update(uuid2pred)

    if (args.save_probabilities):
        if (args.load_path_baseline):
            with open(args.save_dir + "/probs_start", "wb") as fp:  #Pickling
                pickle.dump(save_prob_baseline_start, fp)
            with open(args.save_dir + "/probs_end", "wb") as fp:  #Pickling
                pickle.dump(save_prob_baseline_end, fp)

        if (args.load_path_bidaf):
            with open(args.save_dir + "/probs_start", "wb") as fp:  #Pickling
                pickle.dump(save_prob_bidaf_start, fp)
            with open(args.save_dir + "/probs_end", "wb") as fp:  #Pickling
                pickle.dump(save_prob_bidaf_end, fp)

        if (args.load_path_bidaf_fusion):
            with open(args.save_dir + "/probs_start", "wb") as fp:  #Pickling
                pickle.dump(save_prob_bidaf_fu_start, fp)
            with open(args.save_dir + "/probs_end", "wb") as fp:  #Pickling
                pickle.dump(save_prob_bidaf_fu_end, fp)

        if (args.load_path_qanet):
            with open(args.save_dir + "/probs_start", "wb") as fp:  #Pickling
                pickle.dump(save_prob_qanet_start, fp)
            with open(args.save_dir + "/probs_end", "wb") as fp:  #Pickling
                pickle.dump(save_prob_qanet_end, fp)

        if (args.load_path_qanet_old):
            with open(args.save_dir + "/probs_start", "wb") as fp:  #Pickling
                pickle.dump(save_prob_qanet_old_start, fp)
            with open(args.save_dir + "/probs_end", "wb") as fp:  #Pickling
                pickle.dump(save_prob_qanet_old_end, fp)

        if (args.load_path_qanet_inde):
            with open(args.save_dir + "/probs_start", "wb") as fp:  #Pickling
                pickle.dump(save_prob_qanet_inde_start, fp)
            with open(args.save_dir + "/probs_end", "wb") as fp:  #Pickling
                pickle.dump(save_prob_qanet_inde_end, fp)

        if (args.load_path_qanet_s_e):
            with open(args.save_dir + "/probs_start", "wb") as fp:  #Pickling
                pickle.dump(save_prob_qanet_s_e_start, fp)
            with open(args.save_dir + "/probs_end", "wb") as fp:  #Pickling
                pickle.dump(save_prob_qanet_s_e_end, fp)

    # Log results (except for test set, since it does not come with labels)
    if args.split != 'test':
        results = util.eval_dicts(gold_dict, pred_dict, args.use_squad_v2)
        if (args.load_path_qanet):
            meter_avg = nll_meter_qanet.avg
        elif (args.load_path_bidaf):
            meter_avg = nll_meter_bidaf.avg
        elif (args.load_path_bidaf_fusion):
            meter_avg = nll_meter_bidaf_fu.avg
        elif (args.load_path_qanet_inde):
            meter_avg = nll_meter_qanet_inde.avg
        elif (args.load_path_qanet_s_e):
            meter_avg = nll_meter_qanet_s_e.avg
        elif (args.load_path_qanet_old):
            meter_avg = nll_meter_qanet_old.avg
        else:
            meter_avg = nll_meter_baseline.avg
        results_list = [('NLL', meter_avg), ('F1', results['F1']),
                        ('EM', results['EM'])]
        if args.use_squad_v2:
            results_list.append(('AvNA', results['AvNA']))
        results = OrderedDict(results_list)

        # Log to console
        results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items())
        log.info(f'{args.split.title()} {results_str}')

        # Log to TensorBoard
        tbx = SummaryWriter(args.save_dir)
        util.visualize(tbx,
                       pred_dict=pred_dict,
                       eval_path=eval_file,
                       step=0,
                       split=args.split,
                       num_visuals=args.num_visuals)

    # Write submission file
    sub_path = join(args.save_dir, args.split + '_' + args.sub_file)
    log.info(f'Writing submission file to {sub_path}...')
    with open(sub_path, 'w', newline='', encoding='utf-8') as csv_fh:
        csv_writer = csv.writer(csv_fh, delimiter=',')
        csv_writer.writerow(['Id', 'Predicted'])
        for uuid in sorted(sub_dict):
            csv_writer.writerow([uuid, sub_dict[uuid]])
Example #16
0
def main(args):
    # Set up logging and devices
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True)
    log = util.get_logger(args.save_dir, args.name)
    tbx = SummaryWriter(args.save_dir)
    device, args.gpu_ids = util.get_available_devices()
    log.info('Args: {}'.format(dumps(vars(args), indent=4, sort_keys=True)))
    args.batch_size *= max(1, len(args.gpu_ids))

    # Set random seed
    log.info('Using random seed {}...'.format(args.seed))
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # Get embeddings
    # Args:  word_vectors: word vector tensor of dimension [vocab_size * wemb_dim]
    log.info('Loading embeddings...')
    word_vectors = util.torch_from_json(args.word_emb_file)
    char_vectors = util.torch_from_json(args.char_emb_file)

    # Get Model
    log.info('Building Model...')
    model = QANet(word_vectors,
                  char_vectors,
                  args.para_limit,
                  args.ques_limit,
                  args.f_model,
                  num_head=args.num_head,
                  train_cemb = (not args.pretrained_char))
    model = nn.DataParallel(model, args.gpu_ids)
    
    if args.load_path:
        log.info('Loading checkpoint from {}...'.format(args.load_path))
        model, step = util.load_model(model, args.load_path, args.gpu_ids)
    else:
        step = 0
    model = model.to(device)
    model.train()
    ema = util.EMA(model, args.ema_decay)

    # Get saver
    saver = util.CheckpointSaver(args.save_dir,
                                 max_checkpoints=args.max_checkpoints,
                                 metric_name=args.metric_name,
                                 maximize_metric=args.maximize_metric,
                                 log=log)

    # Get optimizer and scheduler
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.Adam(
        params=parameters,
        lr=args.lr,
        betas=(args.beta1, args.beta2),
        eps=1e-8,
        weight_decay=3e-7)
    cr = 1.0 / math.log(args.lr_warm_up_num)
    scheduler = optim.lr_scheduler.LambdaLR(
        optimizer,
        lr_lambda=lambda ee: cr * math.log(ee + 1)
        if ee < args.lr_warm_up_num else 1)
    loss_f = torch.nn.CrossEntropyLoss()

    # Get data loader
    log.info('Building dataset...')
    train_dataset = SQuAD(args.train_record_file, args.use_squad_v2)
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   collate_fn=collate_fn)
    dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2)
    dev_loader = data.DataLoader(dev_dataset,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers,
                                 collate_fn=collate_fn)

    # Train
    log.info('Training...')
    steps_till_eval = args.eval_steps
    epoch = step // len(train_dataset)
    while epoch != args.num_epochs:
        epoch += 1
        log.info('Starting epoch {}...'.format(epoch))
        with torch.enable_grad(), \
                tqdm(total=len(train_loader.dataset)) as progress_bar:
            for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader:
                # Setup for forward
                cw_idxs = cw_idxs.to(device)
                qw_idxs = qw_idxs.to(device)

                cc_idxs = cc_idxs.to(device)
                qc_idxs = qc_idxs.to(device)
                
                batch_size = cw_idxs.size(0)
                optimizer.zero_grad()

                # Forward
                log_p1, log_p2 = model(cw_idxs, cc_idxs, qw_idxs, qc_idxs)
                y1, y2 = y1.to(device), y2.to(device)
                loss = torch.mean(loss_f(log_p1, y1) + loss_f(log_p2, y2))
                loss_val = loss.item()

                # Backward
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)
                optimizer.step()
                scheduler.step(step // batch_size)
                ema(model, step // batch_size)

                # Log info
                step += batch_size
                progress_bar.update(batch_size)
                progress_bar.set_postfix(epoch=epoch,
                                         NLL=loss_val)
                tbx.add_scalar('train/NLL', loss_val, step)
                tbx.add_scalar('train/LR',
                               optimizer.param_groups[0]['lr'],
                               step)

                steps_till_eval -= batch_size
                if steps_till_eval <= 0:
                    steps_till_eval = args.eval_steps

                    # Evaluate and save checkpoint
                    log.info('Evaluating at step {}...'.format(step))
                    ema.assign(model)
                    results, pred_dict = evaluate(model, dev_loader, device,
                                                  args.dev_eval_file,
                                                  args.max_ans_len,
                                                  args.use_squad_v2)
                    saver.save(step, model, results[args.metric_name], device)
                    ema.resume(model)

                    # Log to console
                    results_str = ', '.join('{}: {:05.2f}'.format(k, v)
                                            for k, v in results.items())
                    log.info('Dev {}'.format(results_str))

                    # Log to TensorBoard
                    log.info('Visualizing in TensorBoard...')
                    for k, v in results.items():
                        tbx.add_scalar('dev/{}'.format(k), v, step)
                    util.visualize(tbx,
                                   pred_dict=pred_dict,
                                   eval_path=args.dev_eval_file,
                                   step=step,
                                   split='dev',
                                   num_visuals=args.num_visuals)
Example #17
0
def train(args):
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True)
    log = util.get_logger(args.save_dir, args.name)
    tbx = SummaryWriter(args.save_dir)
    if args.gpu_ids == 'cpu':
        device, args.gpu_ids = torch.device('cpu'), []
    else:
        device, args.gpu_ids = util.get_available_devices()
    log.info('training on device {} with gpu_id {}'.format(str(device), str(args.gpu_ids)))

    # Set random seed
    log.info('Using random seed {}...'.format(args.seed))
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    log.info('Building model...')
    if args.task == 'tag':
        model = SummarizerLinear()
#        model = SummarizerLinearAttended(128, 256)
#        model = SummarizerRNN(128, 256)
    else:
        model = SummarizerAbstractive(128, 256, device)
    if len(args.gpu_ids) > 0:
        model = nn.DataParallel(model, args.gpu_ids)
    if args.load_path:
        log.info('Loading checkpoint from {}...'.format(args.load_path))
        model, step = util.load_model(model, args.load_path, args.gpu_ids)
    else:
        step = 0
    model = model.to(device)
    model.train()

    ## get a saver
    saver = util.CheckpointSaver(args.save_dir,
                                 max_checkpoints=args.max_checkpoints,
                                 metric_name=args.metric_name,
                                 maximize_metric=args.maximize_metric,
                                 log=log)

    optimizer = optim.Adam(model.parameters(), args.lr,
                               weight_decay=args.l2_wd)

    log.info('Building dataset...')
    data_path = PROCESSED_DATA_SUPER_TINY if args.split == 'super_tiny' else PROCESSED_DATA
    with open(data_path, 'rb') as f:
        all_data = pickle.load(f)
    if 'tiny' in args.split:
        train_split = all_data['tiny']
        dev_split = all_data['tiny']
    else:
        train_split = all_data['train']
        dev_split = all_data['dev']
    train_dataset = SummarizationDataset(
            train_split['X'], train_split['y'], train_split['gold'])
    dev_dataset = SummarizationDataset(
            dev_split['X'], dev_split['y'], dev_split['gold'])
    collate_fn = tag_collate_fn if args.task == 'tag' else decode_collate_fn
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   num_workers=args.num_workers,
                                   shuffle=True,
                                   collate_fn=collate_fn)
    dev_loader = data.DataLoader(dev_dataset,
                                   batch_size=args.batch_size,
                                   num_workers=args.num_workers,
                                   shuffle=False,
                                   collate_fn=collate_fn)
    ## Train!
    log.info('Training...')
    steps_till_eval = args.eval_steps
    epoch = step // len(train_dataset)
    while epoch != args.num_epochs:
        epoch += 1
        log.info('Starting epoch {}...'.format(epoch))
        batch_num = 0
        with torch.enable_grad(), \
                tqdm(total=len(train_loader.dataset)) as progress_bar:
            for X, y, _ in train_loader:
                batch_size = X.size(0)
                batch_num += 1
                X = X.to(device)
                y = y.float().to(device) # (batch_size, max_len) for tag, (batch_size, 110) for decode
                optimizer.zero_grad()
                if args.task == 'tag':
                    logits = model(X) # (batch_size, max_len)
                    mask = (X != PAD_VALUE).float() # 1 for real data, 0 for pad, size of (batch_size, max_len)
                    loss = (F.binary_cross_entropy_with_logits(logits, y, reduction='none') * mask).mean()
                    loss_val = loss.item()
                else:
                    logits = model(X, y[:, :-1]) # (batch_size, 109, max_len)
                    loss = sum(F.cross_entropy(logits[i], y[i, 1:], ignore_index=-1, reduction='mean')\
                               for i in range(batch_size)) / batch_size
                    loss_val = loss.item()
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)
                optimizer.step()
                # scheduler.step(step // batch_size)

                # Log info
                step += args.batch_size
                progress_bar.update(args.batch_size)
                progress_bar.set_postfix(epoch=epoch,
                                         Loss=loss_val)
                tbx.add_scalar('train/Loss', loss_val, step)
                tbx.add_scalar('train/LR',
                               optimizer.param_groups[0]['lr'],
                               step)

                steps_till_eval -= batch_size
                if steps_till_eval <= 0:
                    steps_till_eval = args.eval_steps

                    # Evaluate and save checkpoint
                    log.info('Evaluating at step {}...'.format(step))
                    results, pred_dict = evaluate(args, model, dev_loader, device)
                    if results is None:
                        log.info('Selected predicted no select for all in batch')
                        continue
                    saver.save(step, model, results[args.metric_name], device)

#                     # Log to console
                    results_str = ', '.join('{}: {:05.2f}'.format(k, v)
                                            for k, v in results.items())
                    log.info('Dev {}'.format(results_str))

                    # Log to TensorBoard
                    log.info('Visualizing in TensorBoard...')
                    for k, v in results.items():
                        tbx.add_scalar('dev/{}'.format(k), v, step)
Example #18
0
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.optim.lr_scheduler as sched
import torch.utils.data as data
from torch.utils.data import DataLoader, TensorDataset
from torch import Tensor

if __name__ == '__main__':

    args = get_test_args()

    # Set up logging
    args.save_dir = util.get_save_dir(args.save_dir, args.name, subdir='test')
    log = util.get_logger(args.save_dir, args.name)
    device, gpu_ids = util.get_available_devices()
    args.batch_size *= max(1, len(gpu_ids))

    log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}')

    # Load the checkpoint if given as parameter
    if args.load_path:
        log.info(f'Loading checkpoint from {args.load_path}...')
        model = util.load_model(args.load_path)

    else:
        # Get model
        log.info('Building model...')
        model = util.get_model_class(args.model)(args)

    # Load the reward model
def main(data, flags):
    # Set up logging and devices
    log_dir = data.logging_dir
    log = util.get_logger(log_dir, "toy")
    tbx = SummaryWriter(data.logging_dir)
    device, data.gpu_ids = util.get_available_devices()
    log.info('Config: {}'.format(dumps(vars(data), indent=4, sort_keys=True)))
    data.batch_size *= max(1, len(data.gpu_ids))

    # Set random seed
    log.info('Using random seed {}...'.format(data.random_seed))
    random.seed(data.random_seed)
    np.random.seed(data.random_seed)
    torch.manual_seed(data.random_seed)
    torch.cuda.manual_seed_all(data.random_seed)

    if flags[1] == "toy":
        word_emb_file = data.toy_word_emb_file
        training_data = data.toy_record_file_exp3
        test_data = data.dev_record_file_exp3
        eval_file = data.toy_eval_exp3
    elif flags[1] == "train":
        word_emb_file = data.word_emb_file
        training_data = data.train_record_file_exp3
        test_data = data.dev_record_file_exp3
        eval_file = data.train_eval_exp3
    elif flags[1] == "dev":
        word_emb_file = data.word_emb_file
        training_data = data.dev_record_file_exp3
        test_data = data.toy_record_file_exp3
        eval_file = data.dev_eval_exp3

    # Get embeddings
    log.info('Loading embeddings...')
    word_vectors = util.torch_from_json(word_emb_file)

    # Get model
    log.info('Building model...')
    model = BiDAF(word_vectors=word_vectors,
                  hidden_size=data.hidden_size,
                  drop_prob=data.drop_prob)
    model = nn.DataParallel(model, data.gpu_ids)
    if data.load_path:
        log.info('Loading checkpoint from {}...'.format(data.load_path))
        model, step = util.load_model(model, data.load_path, data.gpu_ids)
    else:
        step = 0
    model = model.to(device)
    model.train()
    ema = util.EMA(model, data.ema_decay)

    # Get saver
    saver = util.CheckpointSaver(data.logging_dir,
                                 max_checkpoints=10,
                                 metric_name=data.metric_name,
                                 maximize_metric=data.maximize_metric,
                                 log=log)

    # Get optimizer and scheduler
    optimizer = optim.Adadelta(model.parameters(),
                               data.learning_rate,
                               weight_decay=data.learning_weight_decay)
    scheduler = sched.LambdaLR(optimizer, lambda s: 1.)  # Constant LR

    # Get data loader
    log.info('Building dataset...')
    # np.load(data.toy_record_file_exp3)
    train_dataset = SQuAD3(training_data, use_v2=True)
    train_loader = torchdata.DataLoader(train_dataset,
                                        batch_size=data.batch_size,
                                        shuffle=True,
                                        num_workers=data.num_workers,
                                        collate_fn=collate_fn)

    test_dataset = SQuAD3(test_data, use_v2=True)
    test_loader = torchdata.DataLoader(test_dataset,
                                       batch_size=data.batch_size,
                                       shuffle=False,
                                       num_workers=data.num_workers,
                                       collate_fn=collate_fn)

    # Train
    log.info('Training...')
    steps_till_eval = data.eval_steps
    epoch = step // len(test_dataset)
    while epoch != data.num_epochs:
        epoch += 1
        log.info('Starting epoch {}...'.format(epoch))
        with torch.enable_grad(), \
                tqdm(total=len(train_loader.dataset)) as progress_bar:
            for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader:
                # Setup for forward
                cw_idxs = cw_idxs.to(device)
                qw_idxs = qw_idxs.to(device)
                batch_size = cw_idxs.size(0)
                optimizer.zero_grad()

                # Forward
                log.info("cw_idxs length: {}".format(str(len(cw_idxs))))
                log.info("qw_idxs length: {}".format(str(len(qw_idxs))))
                log.info("cw_idxs size: {}".format(str(
                    sys.getsizeof(cw_idxs))))
                log.info("qw_idxs size: {}".format(str(
                    sys.getsizeof(qw_idxs))))
                log.info("cw_idxs shape: {}".format(str(cw_idxs.shape)))
                log.info("qw_idxs shape: {}".format(str(qw_idxs.shape)))

                log_p1, log_p2 = model(cw_idxs, qw_idxs)
                y1, y2 = y1.to(device), y2.to(device)
                loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
                loss_val = loss.item()

                # Backward
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(),
                                         data.max_grad_norm)
                optimizer.step()
                scheduler.step(step // batch_size)
                ema(model, step // batch_size)

                # Log info
                step += batch_size
                progress_bar.update(batch_size)
                progress_bar.set_postfix(epoch=epoch, NLL=loss_val)
                tbx.add_scalar('toy/NLL', loss_val, step)
                tbx.add_scalar('toy/LR', optimizer.param_groups[0]['lr'], step)

                steps_till_eval -= batch_size
                if steps_till_eval <= 0:
                    steps_till_eval = data.eval_steps

                    # Evaluate and save checkpoint
                    log.info('Evaluating at step {}...'.format(step))
                    ema.assign(model)
                    results, pred_dict = evaluate(model,
                                                  test_loader,
                                                  device,
                                                  eval_path=eval_file,
                                                  max_len=sys.maxsize,
                                                  use_squad_v2=True)
                    saver.save(step, model, results[data.metric_name], device)
                    ema.resume(model)

                    # Log to console
                    results_str = ', '.join('{}: {:05.2f}'.format(k, v)
                                            for k, v in results.items())
                    log.info('Dev {}'.format(results_str))

                    # Log to TensorBoard
                    log.info('Visualizing in TensorBoard...')
                    for k, v in results.items():
                        tbx.add_scalar('dev/{}'.format(k), v, step)
                    util.visualize(tbx,
                                   pred_dict=pred_dict,
                                   eval_path=eval_file,
                                   step=step,
                                   split='dev',
                                   num_visuals=data.num_visuals)
Example #20
0
def main(args):
    # Set up logging
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=False)
    #log = util.get_logger(args.save_dir, args.name)
    #log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}')
    device, gpu_ids = util.get_available_devices()
    args.batch_size *= max(1, len(gpu_ids))

    # Get embeddings
    print('Loading embeddings...')
    word_vectors = util.torch_from_json(args.word_emb_file)

    # Get model
    print('Building model...')
    model = BiDAF(word_vectors=word_vectors, hidden_size=args.hidden_size)
    model = nn.DataParallel(model, gpu_ids)
    print(f'Loading checkpoint from {args.load_path}...')
    model = util.load_model(model, args.load_path, gpu_ids, return_step=False)
    model = model.to(device)
    model.eval()

    # Get data loader
    print('Building dataset...')
    #record_file = vars(args)[f'{args.split}_record_file']
    dataset = SQuAD("./data/my_test.npz", args.use_squad_v2)
    data_loader = data.DataLoader(dataset,
                                  batch_size=args.batch_size,
                                  shuffle=False,
                                  num_workers=args.num_workers,
                                  collate_fn=collate_fn)

    # Evaluate
    print(f'Evaluating on {args.split} split...')
    nll_meter = util.AverageMeter()
    pred_dict = {}  # Predictions for TensorBoard
    sub_dict = {}  # Predictions for submission
    #eval_file = vars(args)[f'{args.split}_eval_file']
    with open("./data/my_test_eval.json", 'r') as fh:
        gold_dict = json_load(fh)
    with torch.no_grad(), \
            tqdm(total=len(dataset)) as progress_bar:
        for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in data_loader:
            print("viewing the dataset")
            print(cw_idxs, cc_idxs, qw_idxs, qc_idxs)
            # Setup for forward
            cw_idxs = cw_idxs.to(device)
            qw_idxs = qw_idxs.to(device)
            batch_size = cw_idxs.size(0)
            # Forward
            log_p1, log_p2 = model(cw_idxs, qw_idxs)
            y1, y2 = y1.to(device), y2.to(device)
            loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
            nll_meter.update(loss.item(), batch_size)

            # Get F1 and EM scores
            p1, p2 = log_p1.exp(), log_p2.exp()
            starts, ends = util.discretize(p1, p2, args.max_ans_len,
                                           args.use_squad_v2)

            # Log info
            progress_bar.update(batch_size)
            #if args.split != 'test':
            # No labels for the test set, so NLL would be invalid
            #progress_bar.set_postfix(NLL=nll_meter.avg)

            idx2pred, uuid2pred = util.convert_tokens(gold_dict, ids.tolist(),
                                                      starts.tolist(),
                                                      ends.tolist(),
                                                      args.use_squad_v2)
            pred_dict.update(idx2pred)
            sub_dict.update(uuid2pred)

    print("my evaluation ....")

    for el in pred_dict:
        print(el, pred_dict[el])

    for el in sub_dict:
        print(el, sub_dict[el])
Example #21
0
def main(args):
    if args.large:
        args.train_record_file += '_large'
        args.dev_eval_file += '_large'
        model_name = "albert-xlarge-v2"
    else:
        model_name = "albert-base-v2"
    if args.xxlarge:
        args.train_record_file += '_xxlarge'
        args.dev_eval_file += '_xxlarge'
        model_name = "albert-xxlarge-v2"
    # Set up logging and devices
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True)
    log = util.get_logger(args.save_dir, args.name)
    tbx = SummaryWriter(args.save_dir)
    device, args.gpu_ids = util.get_available_devices()
    log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}')
    args.batch_size *= max(1, len(args.gpu_ids))

    # Set random seed
    log.info(f'Using random seed {args.seed}...')
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # Get model
    log.info('Building model...')
    if args.bidaf:
        char_vectors = util.torch_from_json(args.char_emb_file)

    if args.model_name == 'albert_highway':
        model = models.albert_highway(model_name)
    elif args.model_name == 'albert_lstm_highway':
        model = models.LSTM_highway(model_name, hidden_size=args.hidden_size)
    elif args.model_name == 'albert_bidaf':
        model = models.BiDAF(char_vectors=char_vectors,
                             hidden_size=args.hidden_size,
                             drop_prob=args.drop_prob)
    elif args.model_name == 'albert_bidaf2':
        model = models.BiDAF2(model_name=model_name,
                              char_vectors=char_vectors,
                              hidden_size=args.hidden_size,
                              drop_prob=args.drop_prob)
    else:
        model = AlbertForQuestionAnswering.from_pretrained(args.model_name)

    model = nn.DataParallel(model, args.gpu_ids)
    if args.load_path:
        log.info(f'Loading checkpoint from {args.load_path}...')
        model, step = util.load_model(model, args.load_path, args.gpu_ids)
    else:
        step = 0
    model = model.to(device)
    model.train()
    ema = util.EMA(model, args.ema_decay)

    # Get saver
    saver = util.CheckpointSaver(args.save_dir,
                                 max_checkpoints=args.max_checkpoints,
                                 metric_name=args.metric_name,
                                 maximize_metric=args.maximize_metric,
                                 log=log)

    # Get optimizer and scheduler
    optimizer = AdamW(model.parameters(), lr=args.lr, weight_decay=args.l2_wd)
    scheduler = sched.LambdaLR(optimizer, lambda s: 1.)  # Constant LR

    # Get data loader
    log.info('Building dataset...')
    train_dataset = SQuAD(args.train_record_file, args.use_squad_v2,
                          args.bidaf)
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers)
    dev_dataset = SQuAD(args.dev_eval_file, args.use_squad_v2, args.bidaf)
    dev_loader = data.DataLoader(dev_dataset,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers)

    with open(args.dev_gold_file) as f:
        gold_dict = json.load(f)

    tokenizer = AlbertTokenizer.from_pretrained(model_name)

    # Train
    log.info('Training...')
    steps_till_eval = args.eval_steps
    epoch = step // len(train_dataset)
    while epoch != args.num_epochs:
        epoch += 1
        log.info(f'Starting epoch {epoch}...')
        with torch.enable_grad(), \
                tqdm(total=len(train_loader.dataset)) as progress_bar:
            for batch in train_loader:
                batch = tuple(t.to(device) for t in batch)
                inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[1],
                    "token_type_ids": batch[2],
                    'start_positions': batch[3],
                    'end_positions': batch[4],
                }
                if args.bidaf:
                    inputs['char_ids'] = batch[6]
                y1 = batch[3]
                y2 = batch[4]
                # Setup for forward
                batch_size = inputs["input_ids"].size(0)
                optimizer.zero_grad()

                # Forward
                # log_p1, log_p2 = model(**inputs)
                y1, y2 = y1.to(device), y2.to(device)
                outputs = model(**inputs)
                loss = outputs[0]
                loss = loss.mean()
                # loss_fct = nn.CrossEntropyLoss()
                # loss = loss_fct(log_p1, y1) + loss_fct(log_p2, y2)
                # loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
                loss_val = loss.item()

                # Backward
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(),
                                         args.max_grad_norm)
                optimizer.step()
                scheduler.step(step // batch_size)
                ema(model, step // batch_size)

                # Log info
                step += batch_size
                progress_bar.update(batch_size)
                progress_bar.set_postfix(epoch=epoch, NLL=loss_val)
                tbx.add_scalar('train/NLL', loss_val, step)
                tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'],
                               step)

                steps_till_eval -= batch_size
                if steps_till_eval <= 0:
                    steps_till_eval = args.eval_steps

                    # Evaluate and save checkpoint
                    log.info(f'Evaluating at step {step}...')
                    ema.assign(model)
                    results, pred_dict = evaluate(args, model, dev_dataset,
                                                  dev_loader, gold_dict,
                                                  tokenizer, device,
                                                  args.max_ans_len,
                                                  args.use_squad_v2)
                    saver.save(step, model, results[args.metric_name], device)
                    ema.resume(model)

                    # Log to console
                    results_str = ', '.join(f'{k}: {v:05.2f}'
                                            for k, v in results.items())
                    log.info(f'Dev {results_str}')

                    # Log to TensorBoard
                    log.info('Visualizing in TensorBoard...')
                    for k, v in results.items():
                        tbx.add_scalar(f'dev/{k}', v, step)
def test_model(questions,
               context,
               use_squad_v2=True,
               model_path="../save/training-02/best.pth.tar"):
    # Set up logging
    #args.save_dir = util.get_save_dir(args.save_dir, args.name, training=False)
    #log = util.get_logger(args.save_dir, args.name)
    #log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}')
    #args = get_test_args()
    device, gpu_ids = util.get_available_devices()
    batch_size = 64 * max(1, len(gpu_ids))

    # Get embeddings
    #print('Loading embeddings...')
    word_vectors = util.torch_from_json('../data/word_emb.json')

    # Get model
    #print('Building model...')
    model = BiDAF(word_vectors=word_vectors, hidden_size=100)
    model = nn.DataParallel(model, gpu_ids)
    #model_path = "../save/training-02/best.pth.tar"
    #print(f'Loading checkpoint from {args.load_path}...')
    model = util.load_model(model, model_path, gpu_ids, return_step=False)
    model = model.to(device)
    model.eval()

    # Get data loader
    #print('Building dataset...')
    #record_file = vars(args)[f'{args.split}_record_file']
    # my code start here
    # this is a simple approch when dealing with the user date
    # according to your approch of creating the interface you can change this code
    # and also you have to check the function "process_file" in the setup.py file
    processed_questions = []
    for index, question in enumerate(questions):
        processed_question = {
            "question": question,
            "id": index,
            "answers": [{
                "answer_start": 0,
                "text": "never mind"
            }]
        }
        processed_questions.append(processed_question)
    source = {"paragraphs": [{"qas": processed_questions, "context": context}]}
    word_counter, char_counter = Counter(), Counter()
    with open("../data/word2idx.json", "r") as f1:
        word2idx_dict = json.load(f1)
    with open("../data/char2idx.json", "r") as f2:
        char2idx_dict = json.load(f2)
    my_test_examples, my_test_eval = process_file(source, "my_test",
                                                  word_counter, char_counter)
    npz = build_features(my_test_examples,
                         "my_test",
                         word2idx_dict,
                         char2idx_dict,
                         is_test=True)
    #my code end here
    dataset = SQuAD(npz, use_squad_v2)
    data_loader = data.DataLoader(dataset,
                                  batch_size=batch_size,
                                  shuffle=False,
                                  num_workers=4,
                                  collate_fn=collate_fn)

    # Evaluate
    #print(f'Evaluating on {args.split} split...')
    nll_meter = util.AverageMeter()
    pred_dict = {}  # Predictions for TensorBoard
    sub_dict = {}  # Predictions for submission
    #eval_file = vars(args)[f'{args.split}_eval_file']
    gold_dict = my_test_eval
    #print("gold_dict", gold_dict)
    #print("data_loader", data_loader)
    with torch.no_grad(), \
            tqdm(total=len(dataset)) as progress_bar:
        for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in data_loader:
            # Setup for forward
            cw_idxs = cw_idxs.to(device)
            qw_idxs = qw_idxs.to(device)
            batch_size = cw_idxs.size(0)
            # Forward
            log_p1, log_p2 = model(cw_idxs, qw_idxs)
            y1, y2 = y1.to(device), y2.to(device)
            loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
            nll_meter.update(loss.item(), batch_size)

            # Get F1 and EM scores
            p1, p2 = log_p1.exp(), log_p2.exp()
            starts, ends = util.discretize(p1, p2, 15, use_squad_v2)
            print("starts ", starts, " ends ", ends)

            # Log info
            progress_bar.update(batch_size)
            #if args.split != 'test':
            # No labels for the test set, so NLL would be invalid
            #progress_bar.set_postfix(NLL=nll_meter.avg)

            idx2pred, uuid2pred = util.convert_tokens(gold_dict, ids.tolist(),
                                                      starts.tolist(),
                                                      ends.tolist(),
                                                      use_squad_v2)
            pred_dict.update(idx2pred)
            sub_dict.update(uuid2pred)

    #print("my evaluation ....")

    #for el in pred_dict:
    #print(el, pred_dict[el])

    #for el in sub_dict:
    #print(el, sub_dict[el])
    return pred_dict
Example #23
0
def main(args):
    # Set up logging and devices
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True)
    log = util.get_logger(args.save_dir, args.name)
    tbx = SummaryWriter(args.save_dir)
    device, args.gpu_ids = util.get_available_devices()
    log.info('Args: {}'.format(dumps(vars(args), indent=4, sort_keys=True)))
    # Comment out to only use 1 GPU on nv12
    args.batch_size *= max(1, len(args.gpu_ids))

    # Set random seed
    log.info('Using random seed {}...'.format(args.seed))
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # Get embeddings

    log.info('Loading embeddings...')
    word_vectors = util.torch_from_json(args.word_emb_file)

    # Get model
    log.info('Building model...')
    model = None
    max_context_len, max_question_len = args.para_limit, args.ques_limit
    if (args.model_type == "bidaf" or args.model_type == "bert-bidaf"):
        model = BiDAF(word_vectors=word_vectors,
                      hidden_size=args.hidden_size,
                      drop_prob=args.drop_prob)
    elif (args.model_type == "dcn" or args.model_type == "bert-dcn"):
        model = DCN(word_vectors=word_vectors,
                    hidden_size=args.hidden_size,
                    max_context_len=max_context_len,
                    max_question_len=max_question_len,
                    drop_prob=args.drop_prob)
    elif (args.model_type == "bert-basic"):
        model = BERT(word_vectors=word_vectors,
                     hidden_size=args.hidden_size,
                     drop_prob=args.drop_prob)

    if model is None:
        raise ValueError('Model is unassigned. Please ensure --model_type \
        chooses between {bidaf, bert-bidaf, dcn, bert-dcn, bert-basic} ')

    model = nn.DataParallel(model, args.gpu_ids)
    if args.load_path:
        log.info('Loading checkpoint from {}...'.format(args.load_path))
        model, step = util.load_model(model, args.load_path, args.gpu_ids)
    else:
        step = 0
    model = model.to(device)
    model.train()
    ema = util.EMA(model, args.ema_decay)

    # Get saver
    saver = util.CheckpointSaver(args.save_dir,
                                 max_checkpoints=args.max_checkpoints,
                                 metric_name=args.metric_name,
                                 maximize_metric=args.maximize_metric,
                                 log=log)

    # Get optimizer and scheduler
    optimizer = optim.Adadelta(model.parameters(),
                               args.lr,
                               weight_decay=args.l2_wd)
    scheduler = sched.LambdaLR(optimizer, lambda s: 1.)  # Constant LR

    # Get data loader
    log.info('Building dataset...')
    train_dataset = SQuAD(args.train_record_file, args.use_squad_v2)
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   collate_fn=collate_fn)
    dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2)
    dev_loader = data.DataLoader(dev_dataset,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers,
                                 collate_fn=collate_fn)

    # Train
    log.info('Training...')
    steps_till_eval = args.eval_steps
    epoch = step // len(train_dataset)
    count_skip = 0
    while epoch != args.num_epochs:
        epoch += 1
        log.info('Starting epoch {}...'.format(epoch))
        with torch.enable_grad(), \
                tqdm(total=len(train_loader.dataset)) as progress_bar:
            for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader:
                batch_size = cw_idxs.size(0)
                count_skip += 1
                if (args.skip_examples == True
                        and (count_skip % 5 == 1 or count_skip % 5 == 2
                             or count_skip % 5 == 3 or count_skip % 5 == 4)):
                    step += batch_size
                    progress_bar.update(batch_size)
                    steps_till_eval -= batch_size
                    continue
                # Setup for forward
                cw_idxs = cw_idxs.to(device)
                qw_idxs = qw_idxs.to(device)
                batch_size = cw_idxs.size(0)
                optimizer.zero_grad()

                ## Additions for BERT ##
                max_context_len, max_question_len = args.para_limit, args.ques_limit

                if "bert" in args.model_type:
                    bert_train_embeddings = get_embeddings(
                        "train", ids, args.para_limit, args.ques_limit)
                else:
                    bert_train_embeddings = None

                # Forward
                log_p1, log_p2 = model(cw_idxs, qw_idxs, bert_train_embeddings, \
                max_context_len, max_question_len, device)
                y1, y2 = y1.to(device), y2.to(device)
                loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
                loss_val = loss.item()

                # Backward
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(),
                                         args.max_grad_norm)
                optimizer.step()
                scheduler.step(step // batch_size)
                ema(model, step // batch_size)

                # Log info
                step += batch_size
                progress_bar.update(batch_size)
                progress_bar.set_postfix(epoch=epoch, NLL=loss_val)
                tbx.add_scalar('train/NLL', loss_val, step)
                tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'],
                               step)

                steps_till_eval -= batch_size
                if steps_till_eval <= 0:
                    steps_till_eval = args.eval_steps

                    # Evaluate and save checkpoint
                    log.info('Evaluating at step {}...'.format(step))
                    ema.assign(model)
                    results, pred_dict = evaluate(model, dev_loader, device,
                                                  args.dev_eval_file,
                                                  args.max_ans_len,
                                                  args.use_squad_v2, args)
                    saver.save(step, model, results[args.metric_name], device)
                    ema.resume(model)

                    # Log to console
                    results_str = ', '.join('{}: {:05.2f}'.format(k, v)
                                            for k, v in results.items())
                    log.info('Dev {}'.format(results_str))

                    # Log to TensorBoard
                    log.info('Visualizing in TensorBoard...')
                    for k, v in results.items():
                        tbx.add_scalar('dev/{}'.format(k), v, step)
                    util.visualize(tbx,
                                   pred_dict=pred_dict,
                                   eval_path=args.dev_eval_file,
                                   step=step,
                                   split='dev',
                                   num_visuals=args.num_visuals)
Example #24
0
def main(args):
    # Set up logging and devices (unchanged from train.py)
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True)
    log = util.get_logger(args.save_dir, args.name)
    tbx = SummaryWriter(args.save_dir)                  # train only, not in test
    device, args.gpu_ids = util.get_available_devices() # todo(small): should this be args (compare test_para)
    log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}')
    args.batch_size *= max(1, len(args.gpu_ids))        # args.py: default size is 64

    # Set random seed (unchanged) - train only
    log.info(f'Using random seed {args.seed}...')
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # Get embeddings
    log.info('Loading embeddings...')
    word_vectors = util.torch_from_json(args.word_emb_file)

    # Prepare BiDAF model (must already trained)
    log.info('Building BiDAF model (should be pretrained)')
    bidaf_model = BiDAF(word_vectors=word_vectors,          # todo: these word vectors shouldn't matter?
                          hidden_size=args.hidden_size)     # since they will be loaded in during load_model?
                          #drop_prob=args.drop_prob)        # no drop probability since we are not training
    bidaf_model = nn.DataParallel(bidaf_model, args.gpu_ids)

    if args.short_test:
        args.hidden_size = 5
    elif not args.load_path:
        log.info("Trying to trian paraphraser withou bidaf model. "
                 "First train BiDAF and then specify the load path. Exiting")
        exit(1)
    else:
        log.info(f'Loading checkpoint from {args.load_path}...')
        bidaf_model = util.load_model(bidaf_model, args.load_path, args.gpu_ids, return_step=False) # don't need step since we aren't training
        bidaf_model = bidaf_model.to(device)
        bidaf_model.eval()                  # we eval only (vs train)

    # todo: Setup the Paraphraser model
    paraphaser_model = Paraphraser(word_vectors=word_vectors,
                                   hidden_size=args.hidden_size,
                                   drop_prob=args.drop_prob)


    # Get data loader
    log.info('Building dataset...')
    # New for paraphrase: squad_paraphrase has extra fields
    train_dataset = SQuAD_paraphrase(args.train_record_file, args.use_squad_v2)    # train.npz (from setup.py, build_features())
    train_loader = data.DataLoader(train_dataset,                       # this dataloader used for all epoch iteration
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   collate_fn=collate_fn_para)
    dev_dataset = SQuAD_paraphrase(args.dev_record_file, args.use_squad_v2)        # dev.npz (same as above)
    dev_loader = data.DataLoader(dev_dataset,                           # dev.npz used in evaluate() fcn
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers,
                                 collate_fn=collate_fn_para)

    # todo: this is just for looking at the paraphrases
    idx2word_dict = load(args.idx2word_file)

    #Get saver
    # saver = util.CheckpointSaver(args.save_dir,
    #                              max_checkpoints=args.max_checkpoints,
    #                              metric_name=args.metric_name,
    #                              maximize_metric=args.maximize_metric,
    #                              log=log)

    #Get optimizer and scheduler
    # ema = util.EMA(paraphaser_model, args.ema_decay)
    # optimizer = optim.Adadelta(paraphaser_model.parameters(), args.lr,
    #                            weight_decay=args.l2_wd)
    # scheduler = sched.LambdaLR(optimizer, lambda s: 1.)  # Constant LR
    # Train
    step = 0
    log.info('Training...')
    steps_till_eval = args.eval_steps
    epoch = step // len(train_dataset)


    while epoch != args.num_epochs:
        epoch += 1
        log.info(f'Starting epoch {epoch}...')
        with torch.enable_grad(), \
                tqdm(total=len(train_loader.dataset)) as progress_bar:
            for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, cphr_idxs, qphr_idxs, qphr_types, ids in train_loader:
                # Setup for forward
                # note that cc_idxs, qc_idxs are not used! (character indices)
                cw_idxs = cw_idxs.to(device)        # todo what does this actually do
                qw_idxs = qw_idxs.to(device)

                cphr_idxs = cphr_idxs.to(device)
                qphr_idxs = qphr_idxs.to(device)
                qphr_types = qphr_types.to(device)

                batch_size = cw_idxs.size(0)
                # if args.short_test:
                #     print(f'batch size: {batch_size}')
                #     for i, type in enumerate(cphr_idxs[0]):
                #         print(f'type: {i}')
                #         pp(type)
                #     for x in (qphr_idxs[0], qphr_types[0]):
                #         pp(x)
                #     return

                paraphrased = paraphaser_model(qphr_idxs, qphr_types, cphr_idxs)
                for idx, p in enumerate(paraphrased):   # enumerate over batch_size
                    non_zeros = p[p.nonzero()].squeeze()
                    #paraphrased[idx] = non_zeros
                    sentence_as_list = [idx2word_dict[str(w.item())] for w in non_zeros]
                    pp(" ".join(sentence_as_list))
                    #pp([idx2word_dict[w] for w in non_zeros])

                if args.short_test:
                    return

                optimizer.zero_grad()

                # Forward
                log_p1, log_p2 = model(cw_idxs, qw_idxs)
                y1, y2 = y1.to(device), y2.to(device)
                loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
                loss_val = loss.item()

                # Backward
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)
                optimizer.step()
                scheduler.step(step // batch_size)      # // is floor division
                ema(model, step // batch_size)

                # Log info
                step += batch_size
                progress_bar.update(batch_size)
                progress_bar.set_postfix(epoch=epoch,
                                         NLL=loss_val)
                tbx.add_scalar('train/NLL', loss_val, step)
                tbx.add_scalar('train/LR',
                               optimizer.param_groups[0]['lr'],
                               step)

                steps_till_eval -= batch_size
                if steps_till_eval <= 0:
                    steps_till_eval = args.eval_steps

                    # Evaluate and save checkpoint
                    log.info(f'Evaluating at step {step}...')
                    ema.assign(model)
                    results, pred_dict = evaluate(model, dev_loader, device,    # call eval with dev_loader
                                                  args.dev_eval_file,
                                                  args.max_ans_len,
                                                  args.use_squad_v2)
                    saver.save(step, model, results[args.metric_name], device)
                    ema.resume(model)

                    # Log to console
                    results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items())
                    log.info(f'Dev {results_str}')

                    # Log to TensorBoard
                    log.info('Visualizing in TensorBoard...')
                    for k, v in results.items():
                        tbx.add_scalar(f'dev/{k}', v, step)
                    util.visualize(tbx,
                                   pred_dict=pred_dict,
                                   eval_path=args.dev_eval_file,
                                   step=step,
                                   split='dev',
                                   num_visuals=args.num_visuals)
Example #25
0
def main(args):
    # Set up logging
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=False)
    log = util.get_logger(args.save_dir, args.name)
    log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}')
    device, gpu_ids = util.get_available_devices()
    args.batch_size *= max(1, len(gpu_ids))

    # Get embeddings
    log.info('Loading embeddings...')
    word_vectors = util.torch_from_json(args.word_emb_file)

    # Get model
    log.info('Building model...')
    '''
    model = QANet(word_vectors, args.hidden_size, args.char_embed_size, args.word_from_char_size,
                  args.dropout_main,
                  args.embed_encoder_num_convs, args.embed_encoder_conv_kernel_size,
                  args.embed_encoder_num_heads, args.embed_encoder_num_blocks,
                  args.model_encoder_num_convs, args.model_encoder_conv_kernel_size,
                  args.model_encoder_num_heads, args.model_encoder_num_blocks)
    '''
    char_vectors = util.torch_from_json(args.char_emb_file)
    model = BiDAF(word_vectors=word_vectors,
                  char_vectors=char_vectors,
                  hidden_size=args.hidden_size,
                  drop_prob=args.drop_prob)
    model = nn.DataParallel(model, gpu_ids)
    log.info(f'Loading checkpoint from {args.load_path}...')
    model = util.load_model(model, args.load_path, gpu_ids, return_step=False)
    model = model.to(device)
    model.eval()

    # Get data loader
    log.info('Building dataset...')
    record_file = vars(args)[f'{args.split}_record_file']
    dataset = SQuAD(record_file, args.use_squad_v2)
    data_loader = data.DataLoader(dataset,
                                  batch_size=args.batch_size,
                                  shuffle=False,
                                  num_workers=args.num_workers,
                                  collate_fn=collate_fn)

    # Evaluate
    log.info(f'Evaluating on {args.split} split...')
    nll_meter = util.AverageMeter()
    pred_dict = {}  # Predictions for TensorBoard
    sub_dict = {}  # Predictions for submission
    eval_file = vars(args)[f'{args.split}_eval_file']
    with open(eval_file, 'r') as fh:
        gold_dict = json_load(fh)
    with torch.no_grad(), \
            tqdm(total=len(dataset)) as progress_bar:
        for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in data_loader:
            # Setup for forward
            cw_idxs = cw_idxs.to(device)
            qw_idxs = qw_idxs.to(device)
            cc_idxs = cc_idxs.to(device)
            qc_idxs = qc_idxs.to(device)
            batch_size = cw_idxs.size(0)

            # Forward
            log_p1, log_p2 = model(cw_idxs, qw_idxs, cc_idxs, qc_idxs)
            y1, y2 = y1.to(device), y2.to(device)
            loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
            nll_meter.update(loss.item(), batch_size)

            # Get F1 and EM scores
            p1, p2 = log_p1.exp(), log_p2.exp()
            starts, ends = util.discretize(p1, p2, args.max_ans_len,
                                           args.use_squad_v2)

            # Log info
            progress_bar.update(batch_size)
            if args.split != 'test':
                # No labels for the test set, so NLL would be invalid
                progress_bar.set_postfix(NLL=nll_meter.avg)

            idx2pred, uuid2pred = util.convert_tokens(gold_dict, ids.tolist(),
                                                      starts.tolist(),
                                                      ends.tolist(),
                                                      args.use_squad_v2)
            pred_dict.update(idx2pred)
            sub_dict.update(uuid2pred)

    # Log results (except for test set, since it does not come with labels)
    if args.split != 'test':
        results = util.eval_dicts(gold_dict, pred_dict, args.use_squad_v2)
        results_list = [('NLL', nll_meter.avg), ('F1', results['F1']),
                        ('EM', results['EM'])]
        if args.use_squad_v2:
            results_list.append(('AvNA', results['AvNA']))
        results = OrderedDict(results_list)

        # Log to console
        results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items())
        log.info(f'{args.split.title()} {results_str}')

        # Log to TensorBoard
        tbx = SummaryWriter(args.save_dir)
        util.visualize(tbx,
                       pred_dict=pred_dict,
                       eval_path=eval_file,
                       step=0,
                       split=args.split,
                       num_visuals=args.num_visuals)

    # Write submission file
    sub_path = join(args.save_dir, args.split + '_' + args.sub_file)
    log.info(f'Writing submission file to {sub_path}...')
    with open(sub_path, 'w', newline='', encoding='utf-8') as csv_fh:
        csv_writer = csv.writer(csv_fh, delimiter=',')
        csv_writer.writerow(['Id', 'Predicted'])
        for uuid in sorted(sub_dict):
            csv_writer.writerow([uuid, sub_dict[uuid]])
Example #26
0
def main():
    args = get_train_args()
    args.save_dir = util.get_save_dir(args.save_dir,
                                      args.bert_model,
                                      training=True)
    tbx = SummaryWriter(args.save_dir)
    device, gpu_ids = util.get_available_devices()

    # Set random seed
    logger.info('Using random seed {}...'.format(args.seed))
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    if os.path.exists(args.output_dir) and os.listdir(args.output_dir):
        raise ValueError(
            "Output directory () already exists and is not empty.")
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    logger.info("device: {} n_gpu: {}, 16-bits training: {}".format(
        device, len(gpu_ids), args.fp16))
    args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps

    if args.load_path:
        output_model_file = os.path.join(args.load_path, WEIGHTS_NAME)
        output_config_file = os.path.join(args.load_path, CONFIG_NAME)
        config = BertConfig(output_config_file)
        model = BertForNQ(config)
        model.load_state_dict(torch.load(output_model_file), strict=False)
    else:
        model = BertForNQ.from_pretrained(
            args.bert_model,
            cache_dir=os.path.join(str(PYTORCH_PRETRAINED_BERT_CACHE),
                                   'distributed_{}'.format(-1)))
    logger.info(model.config)

    if args.fp16:
        model.half()
    model.to(device)
    if len(gpu_ids) > 1:
        model = torch.nn.DataParallel(model)

    with open(args.train_file, "rb") as reader:
        train_features = pickle.load(reader)
    num_train_optimization_steps = int(
        len(train_features) / args.train_batch_size /
        args.gradient_accumulation_steps) * args.num_train_epochs

    # Prepare optimizer
    param_optimizer = list(model.named_parameters())
    # hack to remove pooler, which is not used
    # thus it produce None grad that break apex
    param_optimizer = [n for n in param_optimizer if 'pooler' not in n[0]]

    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay':
        0.01
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }]
    if args.fp16:
        try:
            from apex.optimizers import FP16_Optimizer
            from apex.optimizers import FusedAdam
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training."
            )

        optimizer = FusedAdam(optimizer_grouped_parameters,
                              lr=args.learning_rate,
                              bias_correction=False,
                              max_grad_norm=1.0)
        if args.loss_scale == 0:
            optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
        else:
            optimizer = FP16_Optimizer(optimizer,
                                       static_loss_scale=args.loss_scale)
    else:
        optimizer = BertAdam(optimizer_grouped_parameters,
                             lr=args.learning_rate,
                             warmup=args.warmup_proportion,
                             t_total=num_train_optimization_steps)

    global_step = 0
    logger.info("***** Running training *****")
    logger.info("  Num examples = %d", len(train_features))
    logger.info("  Batch size = %d", args.train_batch_size)
    logger.info("  Num steps = %d", num_train_optimization_steps)

    all_input_ids = torch.tensor([f.input_ids for f in train_features],
                                 dtype=torch.long)
    all_input_mask = torch.tensor([f.input_mask for f in train_features],
                                  dtype=torch.long)
    all_segment_ids = torch.tensor([f.segment_ids for f in train_features],
                                   dtype=torch.long)
    all_start_positions = torch.tensor(
        [f.start_position for f in train_features], dtype=torch.long)
    all_end_positions = torch.tensor([f.end_position for f in train_features],
                                     dtype=torch.long)
    all_ans_types = torch.tensor([f.ans_type for f in train_features],
                                 dtype=torch.long)
    train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids,
                               all_start_positions, all_end_positions,
                               all_ans_types)
    train_sampler = RandomSampler(train_data)
    train_dataloader = DataLoader(train_data,
                                  sampler=train_sampler,
                                  batch_size=args.train_batch_size)
    model.train()
    for epoch in trange(int(args.num_train_epochs), desc="Epoch"):
        for step, batch in enumerate(tqdm(train_dataloader, desc="Iteration")):
            if len(gpu_ids) == 1:
                batch = tuple(
                    t.to(device)
                    for t in batch)  # multi-gpu does scattering it-self
            input_ids, input_mask, segment_ids, start_positions, end_positions, ans_types = batch
            loss = model(input_ids, segment_ids, input_mask, start_positions,
                         end_positions, ans_types)
            if len(gpu_ids) > 1:
                loss = loss.mean()  # mean() to average on multi-gpu.
            if args.gradient_accumulation_steps > 1:
                loss = loss / args.gradient_accumulation_steps

            if args.fp16:
                optimizer.backward(loss)
            else:
                loss.backward()
            if (step + 1) % args.gradient_accumulation_steps == 0:
                if args.fp16:
                    # modify learning rate with special warm up BERT uses
                    # if args.fp16 is False, BertAdam is used and handles this automatically
                    lr_this_step = args.learning_rate * warmup_linear(
                        global_step / num_train_optimization_steps,
                        args.warmup_proportion)
                    for param_group in optimizer.param_groups:
                        param_group['lr'] = lr_this_step
                optimizer.step()
                optimizer.zero_grad()
                tbx.add_scalar('train/NLL', loss.item(), global_step)
                tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'],
                               global_step)
                global_step += 1
                if global_step % 5000 == 0:
                    model_to_save = model.module if hasattr(
                        model,
                        'module') else model  # Only save the model it-self
                    out_dir = os.path.join(args.output_dir,
                                           str(global_step // 5000))
                    if not os.path.exists(out_dir):
                        os.makedirs(out_dir)
                    output_model_file = os.path.join(out_dir, WEIGHTS_NAME)
                    torch.save(model_to_save.state_dict(), output_model_file)
                    output_config_file = os.path.join(out_dir, CONFIG_NAME)
                    with open(output_config_file, 'w') as f:
                        f.write(model_to_save.config.to_json_string())

    # Save a trained model and the associated configuration
    model_to_save = model.module if hasattr(
        model, 'module') else model  # Only save the model it-self
    output_model_file = os.path.join(args.output_dir, WEIGHTS_NAME)
    torch.save(model_to_save.state_dict(), output_model_file)
    output_config_file = os.path.join(args.output_dir, CONFIG_NAME)
    with open(output_config_file, 'w') as f:
        f.write(model_to_save.config.to_json_string())
Example #27
0
def main(args):
    # Set up logging and devices
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True)
    log = util.get_logger(args.save_dir, args.name)
    tbx = SummaryWriter(args.save_dir)
    device, args.gpu_ids = util.get_available_devices()
    log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}')
    args.batch_size *= max(1, len(args.gpu_ids))

    # Set random seed
    log.info(f'Using random seed {args.seed}...')
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # Get embeddings
    log.info('Loading embeddings...')
    word_vectors = util.torch_from_json(args.word_emb_file)

    # Get model
    log.info('Building model...')
    model = BiDAF(word_vectors=word_vectors,
                  hidden_size=args.hidden_size,
                  drop_prob=args.drop_prob)
    model = nn.DataParallel(model, args.gpu_ids)
    if args.load_path:
        log.info(f'Loading checkpoint from {args.load_path}...')
        model, step = util.load_model(model, args.load_path, args.gpu_ids)
    else:
        step = 0
    model = model.to(device)
    model.train()
    ema = util.EMA(model, args.ema_decay)

    # Get saver
    saver = util.CheckpointSaver(args.save_dir,
                                 max_checkpoints=args.max_checkpoints,
                                 metric_name=args.metric_name,
                                 maximize_metric=args.maximize_metric,
                                 log=log)

    # Get optimizer and scheduler
    optimizer = optim.Adadelta(model.parameters(),
                               args.lr,
                               weight_decay=args.l2_wd)
    scheduler = sched.LambdaLR(optimizer, lambda s: 1.)  # Constant LR

    # Get data loader
    log.info('Building dataset...')
    train_dataset = SQuAD(args.train_record_file, args.use_squad_v2)
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   collate_fn=collate_fn)
    dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2)
    dev_loader = data.DataLoader(dev_dataset,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers,
                                 collate_fn=collate_fn)

    # Train
    log.info('Training...')
    steps_till_eval = args.eval_steps
    epoch = step // len(train_dataset)
    while epoch != args.num_epochs:
        epoch += 1
        log.info(f'Starting epoch {epoch}...')
        with torch.enable_grad(), \
                tqdm(total=len(train_loader.dataset)) as progress_bar:
            for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader:
                # Setup for forward
                cw_idxs = cw_idxs.to(device)
                qw_idxs = qw_idxs.to(device)
                batch_size = cw_idxs.size(0)
                optimizer.zero_grad()

                # Forward
                log_p1, log_p2 = model(cw_idxs, qw_idxs)
                y1, y2 = y1.to(device), y2.to(device)
                loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
                loss_val = loss.item()

                # Backward
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(),
                                         args.max_grad_norm)
                optimizer.step()
                scheduler.step(step // batch_size)  # // is floor division
                ema(model, step // batch_size)

                # Log info
                step += batch_size
                progress_bar.update(batch_size)
                progress_bar.set_postfix(epoch=epoch, NLL=loss_val)
                tbx.add_scalar('train/NLL', loss_val, step)
                tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'],
                               step)

                steps_till_eval -= batch_size
                if steps_till_eval <= 0:
                    steps_till_eval = args.eval_steps

                    # Evaluate and save checkpoint
                    log.info(f'Evaluating at step {step}...')
                    ema.assign(model)
                    results, pred_dict = evaluate(model, dev_loader, device,
                                                  args.dev_eval_file,
                                                  args.max_ans_len,
                                                  args.use_squad_v2)
                    saver.save(step, model, results[args.metric_name], device)
                    ema.resume(model)

                    # Log to console
                    results_str = ', '.join(f'{k}: {v:05.2f}'
                                            for k, v in results.items())
                    log.info(f'Dev {results_str}')

                    # Log to TensorBoard
                    log.info('Visualizing in TensorBoard...')
                    for k, v in results.items():
                        tbx.add_scalar(f'dev/{k}', v, step)
                    util.visualize(tbx,
                                   pred_dict=pred_dict,
                                   eval_path=args.dev_eval_file,
                                   step=step,
                                   split='dev',
                                   num_visuals=args.num_visuals)
Example #28
0
def main(args):
    # Set up logging and devices
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True)
    log = util.get_logger(args.save_dir, args.name)
    tbx = SummaryWriter(args.save_dir)
    device, args.gpu_ids = util.get_available_devices()
    log.info('Args: {}'.format(dumps(vars(args), indent=4, sort_keys=True)))
    args.batch_size *= max(1, len(args.gpu_ids))

    # Set random seed
    log.info('Using random seed {}...'.format(args.seed))
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # Get embeddings
    log.info('Loading embeddings...')
    word_vectors = util.torch_from_json(args.word_emb_file)
    char_vectors = util.torch_from_json(args.char_emb_file)

    # Get model
    log.info('Building model...')

    model = BiDAF(vectors=(word_vectors, char_vectors),
                  hidden_size=args.hidden_size,
                  drop_prob=args.drop_prob,
                  p_sdd=args.p_sdd,
                  char_limit=args.char_limit,
                  use_transformer=args.use_transformer,
                  inter_size=args.inter_size,
                  heads=args.heads,
                  c2w_size=args.c2w_size,
                  enc_blocks=args.enc_blocks,
                  enc_convs=args.enc_convs,
                  mod_blocks=args.mod_blocks,
                  mod_convs=args.mod_convs,
                  use_GRU=args.use_GRU)

    model = nn.DataParallel(model, args.gpu_ids)
    if args.load_path:
        log.info('Loading checkpoint from {}...'.format(args.load_path))
        model, step = util.load_model(model, args.load_path,
                                      args.gpu_ids)  # uses the saved step num
    else:
        step = 0
    model = model.to(device)
    model.train()
    ema = util.EMA(model, args.ema_decay)

    # Get saver
    saver = util.CheckpointSaver(args.save_dir,
                                 max_checkpoints=args.max_checkpoints,
                                 metric_name=args.metric_name,
                                 maximize_metric=args.maximize_metric,
                                 log=log)

    # Get optimizer and scheduler
    # optimizer = optim.Adadelta(model.parameters(), args.lr,
    #                            weight_decay=args.l2_wd)

    # The scheduler MULTIPLIES the base LR, NOT replaces
    optimizer = optim.Adam(model.parameters(),
                           1.,
                           betas=(.9, .98),
                           eps=1e-9,
                           weight_decay=args.l2_wd)

    scheduler = sched.LambdaLR(
        optimizer, lambda s: 0.001 * math.log(s + 1) / math.log(1000 - 1)
        if s < 1000 else 0.001)  # Chute (must use math.log, else TypeError)
    # scheduler = sched.LambdaLR(optimizer, lambda s: (args.hidden_size**(-.5)) *
    #                            min((s+1e-9)**(-.5), s*(4000**(-1.5)))
    #                            )  # From Vaswani et. al 2017

    # Get data loader
    log.info('Building dataset...')
    train_dataset = SQuAD(args.train_record_file, args.use_squad_v2)
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   collate_fn=collate_fn)
    dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2)
    dev_loader = data.DataLoader(dev_dataset,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers,
                                 collate_fn=collate_fn)

    # Train
    log.info('Training...')
    steps_till_eval = args.eval_steps
    epoch = step // len(train_dataset)
    while epoch != args.num_epochs:
        epoch += 1
        log.info('Starting epoch {}...'.format(epoch))
        with torch.enable_grad(), \
                tqdm(total=len(train_loader.dataset)) as progress_bar:
            for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader:

                # Setup for forward
                optimizer.zero_grad()
                batch_size = cw_idxs.size(0)

                cc_idxs = cc_idxs.to(device)  # (batch, c_limit, char_limit)
                qc_idxs = qc_idxs.to(device)
                cw_idxs = cw_idxs.to(device)  # (batch, c_limit)
                qw_idxs = qw_idxs.to(device)

                c_idxs, q_idxs = (cw_idxs, cc_idxs), (qw_idxs, qc_idxs)

                # Forward
                log_p1, log_p2 = model(c_idxs, q_idxs)

                y1, y2 = y1.to(device), y2.to(device)
                loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
                loss_val = loss.item()

                # Backward
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(),
                                         args.max_grad_norm)
                optimizer.step()
                scheduler.step(
                    step // batch_size
                )  # By default, schedules per epoch; pass in step # as "epoch"
                ema(model, step // batch_size)

                # Log info
                step += batch_size  # Number of examples. Step is usually the number of (mini)-batches
                progress_bar.update(batch_size)
                progress_bar.set_postfix(epoch=epoch, NLL=loss_val)
                tbx.add_scalar('train/NLL', loss_val, step)
                tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'],
                               step)

                steps_till_eval -= batch_size
                if steps_till_eval <= 0:
                    steps_till_eval = args.eval_steps

                    # Evaluate and save checkpoint
                    log.info('Evaluating at step {}...'.format(step))
                    ema.assign(model)
                    results, pred_dict = evaluate(model, dev_loader, device,
                                                  args.dev_eval_file,
                                                  args.max_ans_len,
                                                  args.use_squad_v2)
                    saver.save(step, model, results[args.metric_name], device)
                    ema.resume(model)

                    # Log to console
                    results_str = ', '.join('{}: {:05.2f}'.format(k, v)
                                            for k, v in results.items())
                    log.info('Dev {}'.format(results_str))

                    # Log to TensorBoard
                    log.info('Visualizing in TensorBoard...')
                    for k, v in results.items():
                        tbx.add_scalar('dev/{}'.format(k), v, step)
                    util.visualize(tbx,
                                   pred_dict=pred_dict,
                                   eval_path=args.dev_eval_file,
                                   step=step,
                                   split='dev',
                                   num_visuals=args.num_visuals)
Example #29
0
def main(args):
    # Set up logging and devices
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True)
    log = util.get_logger(args.save_dir, args.name)
    tbx = SummaryWriter(args.save_dir)
    device, args.gpu_ids = util.get_available_devices()
    log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}')
    args.batch_size *= max(1, len(args.gpu_ids))

    # Set random seed
    log.info(f'Using random seed {args.seed}...')
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # Load embeddings
    log.info('Loading embeddings...')
    word_vectors = util.torch_from_json(args.word_emb_file)

    # Build QA model
    log.info('Building model...')
    model = QA_Model(word_vectors=word_vectors,
                     hidden_size=args.hidden_size,
                     drop_prob=args.drop_prob,
                     attention_type=args.attention_type,
                     train_embeddings=args.train_embeddings)
    model = nn.DataParallel(model, args.gpu_ids)
    if args.load_path:
        # Load QA model from file
        log.info(f'Loading checkpoint from {args.load_path}...')
        model, step = util.load_model(model, args.load_path, args.gpu_ids)
    else:
        step = 0
    model = model.to(device)
    model.train()

    # Get saver
    saver = util.CheckpointSaver(args.save_dir,
                                 max_checkpoints=args.max_checkpoints,
                                 metric_name=args.metric_name,
                                 maximize_metric=args.maximize_metric,
                                 log=log)

    # Get optimizer
    optimizer = optim.Adadelta(model.parameters(),
                               args.lr,
                               weight_decay=args.l2_wd)
    #optimizer = optim.Adam(model.parameters(), lr=args.lr)

    # Get data loader
    log.info('Building dataset...')
    train_dataset = SQuAD(args.train_record_file, args.use_squad_v2)
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   collate_fn=collate_fn)
    dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2)
    dev_loader = data.DataLoader(dev_dataset,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers,
                                 collate_fn=collate_fn)

    # Train
    log.info('Training...')
    steps_till_eval = args.eval_steps
    epoch = step // len(train_dataset)
    while epoch != args.num_epochs:
        epoch += 1
        log.info(f'Starting epoch {epoch}...')
        with torch.enable_grad(), \
                tqdm(total=len(train_loader.dataset)) as progress_bar:
            for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader:
                # Setup for forward
                cw_idxs = cw_idxs.to(device)
                qw_idxs = qw_idxs.to(device)
                batch_size = cw_idxs.size(0)
                optimizer.zero_grad()

                # Forward
                log_p1, log_p2 = model(cw_idxs, qw_idxs)
                y1, y2 = y1.to(device), y2.to(device)
                loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
                loss_val = loss.item()

                # Backward
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(),
                                         args.max_grad_norm)
                optimizer.step()

                # Log info
                step += batch_size
                progress_bar.update(batch_size)
                progress_bar.set_postfix(epoch=epoch, NLL=loss_val)

                steps_till_eval -= batch_size
                if steps_till_eval <= 0:
                    steps_till_eval = args.eval_steps

                    # Evaluate and save checkpoint
                    log.info(f'Evaluating at step {step}...')

                    results, pred_dict = evaluate(model, dev_loader, device,
                                                  args.dev_eval_file,
                                                  args.max_ans_len,
                                                  args.use_squad_v2)
                    saver.save(step, model, results[args.metric_name], device)

                    # Log to console
                    results_str = ', '.join(f'{k}: {v:05.2f}'
                                            for k, v in results.items())
                    log.info(f'Dev {results_str}')
def main(args):
    # Set up logging and devices
    args.save_dir = util.get_save_dir(args.save_dir, args.name, type="train")
    log = util.get_logger(args.save_dir, args.name)
    tbx = SummaryWriter(args.save_dir)
    device, args.gpu_ids = util.get_available_devices()
    log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}')
    args.batch_size *= max(1, len(args.gpu_ids))

    # Set random seed
    log.info(f'Using random seed {args.seed}...')
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # Get your model
    log.info('Building model...')
    model, step = get_model(log, args)
    model = model.to(device)
    model.train()

    #Exponential moving average
    ema = util.EMA(model, args.ema_decay)

    # Get saver
    saver = util.CheckpointSaver(args.save_dir,
                                 max_checkpoints=args.max_checkpoints,
                                 metric_name=args.metric_name,
                                 maximize_metric=args.maximize_metric,
                                 log=log)

    # Get optimizer and scheduler
    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           betas=[0.8, 0.999],
                           eps=1e-7,
                           weight_decay=args.l2_wd)

    scheduler = sched.LambdaLR(optimizer, lambda step: 1)

    #get loss computer
    cri = FocalLoss(alpha=torch.tensor([args.alpha, 1]).to(device),
                    gamma=args.gamma)

    # Get data loader
    log.info('Building dataset...')

    dev_dataset = util.load_dataset(args.dev_file,
                                    args.PPI_dir,
                                    args.PPI_gene_feature_dir,
                                    args.PPI_gene_query_dict_dir,
                                    args.max_nodes,
                                    train=False)

    dev_loader = data.DataLoader(dev_dataset,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers,
                                 collate_fn=util.collate_fn)

    train_dataset = util.load_dataset(args.train_file, args.PPI_dir,
                                      args.PPI_gene_feature_dir,
                                      args.PPI_gene_query_dict_dir,
                                      args.max_nodes)
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   collate_fn=util.collate_fn)
    # Train
    log.info('Training...')
    steps_till_eval = args.eval_steps
    epoch = 0
    while epoch != args.num_epochs:
        epoch += 1

        log.info(f'Starting epoch {epoch}...')
        with torch.enable_grad(), \
                tqdm(total=len(train_loader.dataset)) as progress_bar:
            for batch_a, batch_bio_a, batch_A, batch_b, batch_bio_b, batch_B, batch_y in train_loader:
                # Setup for forward
                batch_a = batch_a.to(device)
                batch_bio_a = batch_bio_a.to(device)
                batch_A = batch_A.to(device)
                batch_bio_b = batch_bio_b.to(device)
                batch_b = batch_b.to(device)
                batch_B = batch_B.to(device)
                batch_y = batch_y.to(device)
                batch_y = batch_y.long()
                batch_size = batch_bio_a.size(0)
                optimizer.zero_grad()
                # Forward
                output = model(batch_a, batch_bio_a, batch_A, batch_b,
                               batch_bio_b, batch_B)
                loss = cri(output, batch_y)
                #loss = F.nll_loss(output, batch_y)
                loss_val = loss.item()

                # Backward
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(),
                                         args.max_grad_norm)
                optimizer.step()
                scheduler.step()
                ema(model, step // batch_size)

                # Log info
                step += batch_size
                progress_bar.update(batch_size)
                progress_bar.set_postfix(epoch=epoch, NLL=loss_val)
                tbx.add_scalar('train/Loss', loss_val, step)
                tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'],
                               step)

                steps_till_eval -= batch_size
                if steps_till_eval <= 0:
                    steps_till_eval = args.eval_steps

                    # Evaluate and save checkpoint
                    log.info(f'Evaluating at step {step}...')
                    ema.assign(model)
                    results = evaluate(model, dev_loader, cri, device)
                    saver.save(step, model, results[args.metric_name], device)
                    ema.resume(model)

                    # Log to console
                    results_str = ', '.join(f'{k}: {v:05.5f}'
                                            for k, v in results.items())
                    log.info(f'Dev {results_str}')

                    log.info('Visualizing in TensorBoard...')
                    for k, v in results.items():
                        tbx.add_scalar(f'dev/{k}', v, step)