예제 #1
0
def create_trainer():
    model = Baseline(bert_vocab_num=24000,
                     emb_dim=300,
                     hidden_dim=256,
                     output_dim=3).to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)
    criterion = torch.nn.CrossEntropyLoss()
    # criterion = FocalLoss(num_classes=3)

    trainer = Trainer(model, optimizer, criterion, NUM_EPOCH, device)
    return trainer
예제 #2
0
def main(config):
    setup_logging(os.getcwd())
    logger = logging.getLogger('test')

    use_gpu = config['n_gpu'] > 0 and torch.cuda.is_available()
    device = torch.device('cuda:0' if use_gpu else 'cpu')

    datamanager = DataManger(config['data'], phase='test')
    
    model = Baseline(num_classes=datamanager.datasource.get_num_classes(
        'train'), is_training=False)
    model = model.eval()

    logger.info('Loading checkpoint: {} ...'.format(config['resume']))
    checkpoint = torch.load(config['resume'], map_location='cpu')

    model.load_state_dict(checkpoint['state_dict'])

    if config['extract']:
        logger.info('Extract feature from query set...')
        query_feature, query_label = feature_extractor(
            model, datamanager.get_dataloader('query'), device)

        logger.info('Extract feature from gallery set...')
        gallery_feature, gallery_label = feature_extractor(
            model, datamanager.get_dataloader('gallery'), device)

        gallery_embeddings = (gallery_feature, gallery_label)
        query_embeddings = (query_feature, query_label)

        os.makedirs(config['testing']['ouput_dir'], exist_ok=True)

        with open(os.path.join(config['testing']['ouput_dir'], 'gallery_embeddings.pt'), 'wb') as f:
            torch.save(gallery_embeddings, f)

        with open(os.path.join(config['testing']['ouput_dir'], 'query_embeddings.pt'), 'wb') as f:
            torch.save(query_embeddings, f)

    gallery_feature, gallery_label = torch.load(os.path.join(
        config['testing']['ouput_dir'], 'gallery_embeddings.pt'), map_location='cpu')
    query_feature, query_label = torch.load(os.path.join(
        config['testing']['ouput_dir'], 'query_embeddings.pt'), map_location='cpu')

    distance = compute_distance_matrix(query_feature, gallery_feature)

    top1 = top_k(distance, output=gallery_label, target=query_label, k=1)
    top5 = top_k(distance, output=gallery_label, target=query_label, k=5)
    top10 = top_k(distance, output=gallery_label, target=query_label, k=10)
    m_ap = mAP(distance, output=gallery_label, target=query_label, k='all')

    logger.info('Datasets: {}, without spatial-temporal: top1: {}, top5: {}, top10: {}, mAP: {}'.format(
        datamanager.datasource.get_name_dataset(), top1, top5, top10, m_ap))
예제 #3
0
def get_model(opts):
    """
    ----------------------------------------------------------------------------------------------------------------
    Test(id=Baseline.SGD.CosineAnnealingLR.CIFAR10.1000.512.01, loss=1.6240, mA=0.8682)
    ✡ Test(id=SEBaseline.SGD.CosineAnnealingLR.CIFAR10.1000.512.01, loss=1.6408, mA=0.8717) ✡
    ----------------------------------------------------------------------------------------------------------------
    ✡ Test(id=Baseline.SGD.OneCycleLR.CIFAR10.1000.512.01, loss=1.6121, mA=0.8697) ✡
    Test(id=SEBaseline.SGD.OneCycleLR.CIFAR10.1000.512.01, loss=1.6210, mA=0.8695)
    Test(id=AABaseline.SGD.OneCycleLR.CIFAR10.1000.2048.01, loss=1.6297, mA=0.8625)
    TODO Test(id=SASABaseline.AdamW.OneCycleLR.CIFAR10.300.128.001, loss=1.6930, mA=0.8438)
    ----------------------------------------------------------------------------------------------------------------
    Test(id=SimpleResNet56.SGD.CosineAnnealingWarmRestarts.CIFAR10.1000.256.01, loss=1.5356, mA=0.9205)
    ✡ Test(id=SimpleSEResNet56.SGD.CosineAnnealingWarmRestarts.CIFAR10.1000.256.01, loss=1.5866, mA=0.9238) ✡
    ----------------------------------------------------------------------------------------------------------------
    Test(id=SimpleResNet56.SGD.CosineAnnealingLR.CIFAR10.1000.256.01, loss=1.5238, mA=0.9273)
    ✡ Test(id=SimpleSEResNet56.SGD.CosineAnnealingLR.CIFAR10.1000.512.01, loss=1.5145, mA=0.9353) ✡
    Test(id=SimpleStdAAResNet56.SGD.CosineAnnealingLR.CIFAR10.1000.512.01, loss=1.5243, mA=0.9275)
    ----------------------------------------------------------------------------------------------------------------
    Test(id=SimpleResNet56.SGD.OneCycleLR.CIFAR10.1000.256.01, loss=1.5239, mA=0.9254)
    ✡ Test(id=SimpleSEResNet56.SGD.OneCycleLR.CIFAR10.1000.512.01, loss=1.5160, mA=0.9356) ✡
    Test(id=SimpleOrigAAResNet56.SGD.OneCycleLR.CIFAR10.1000.512.01, loss=1.5261, mA=0.9138)
    Test(id=SimpleStdAAResNet56.SGD.OneCycleLR.CIFAR10.1000.512.01, loss=1.5381, mA=0.9265)
    Test(id=SimpleSASAResNet56.AdamW.OneCycleLR.CIFAR10.300.256.01, loss=0.8838, mA=0.8457)
    ----------------------------------------------------------------------------------------------------------------
    ✡ Test(id=SimpleResNet110.SGD.CosineAnnealingLR.CIFAR10.1000.1024.01, loss=1.5177, mA=0.9336) ✡
    Test(id=SimpleSEResNet110.SGD.CosineAnnealingLR.CIFAR10.1000.1024.01, loss=1.5231, mA=0.9319)
    ----------------------------------------------------------------------------------------------------------------
    Test(id=ResNet50.SGD.CosineAnnealingLR.CIFAR10.1000.128.01, loss=1.5777, mA=0.9244)
    ✡ Test(id=ResNet50.AdamW.OneCycleLR.CIFAR10.300.128.005, loss=1.5237, mA=0.9395) ✡
    Test(id=SEResNet50.SGD.CosineAnnealingLR.CIFAR10.1000.128.01, loss=1.5243, mA=0.9156)
    Test(id=AAResNet50.SGD.OneCycleLR.CIFAR10.1000.256.01, loss=1.5223, mA=0.9072)
    ----------------------------------------------------------------------------------------------------------------
    """
    return {
        'Baseline': lambda: Baseline(opts),
        'SEBaseline': lambda: SEBaseline(opts),
        'AABaseline': lambda: AABaseline(opts),
        'SASABaseline': lambda: SASABaseline(stem=False),
        'SASAStemBaseline': lambda: SASABaseline(stem=True),
        'SimpleChannelAttnBaseline': lambda: ChannelAttnBaseline(simple=True, mode='none'),
        'ComplexChannelAttnBaseline': lambda: ChannelAttnBaseline(simple=False, mode='none'),
        'SkipSimpleChannelAttnBaseline': lambda: ChannelAttnBaseline(simple=True, mode='skip'),
        'SkipComplexChannelAttnBaseline': lambda: ChannelAttnBaseline(simple=False, mode='skip'),
        'ScaleSimpleChannelAttnBaseline': lambda: ChannelAttnBaseline(simple=True, mode='scale'),
        'ScaleComplexChannelAttnBaseline': lambda: ChannelAttnBaseline(simple=False, mode='scale'),
        'SimpleResNet56': lambda: SimpleResNet(n=9),
        'SimpleResNet110': lambda: SimpleResNet(n=18),
        'SimpleSEResNet56': lambda: SimpleSEResNet(n=9),
        'SimpleSEResNet110': lambda: SimpleSEResNet(n=18),
        'SimpleOrigAAResNet56': lambda: SimpleAAResNet(n=9, original=True),
        'SimpleStdAAResNet56': lambda: SimpleAAResNet(n=9, original=False),
        'SimpleSASAResNet56': lambda: SimpleSASAResNet(n=9, stem=False),
        'SimpleStemSASAResNet56': lambda: SimpleSASAResNet(n=9, stem=True),
        'ResNet50': lambda: ResNet(sizes=[3, 4, 6, 3]),
        'SEResNet50': lambda: SEResNet(sizes=[3, 4, 6, 3]),
        'AAResNet50': lambda: AAResNet(sizes=[3, 4, 6, 3]),
        'ResNet101': lambda: ResNet(sizes=[3, 4, 23, 3]),
        'SEResNet101': lambda: SEResNet(sizes=[3, 4, 23, 3]),
    }[opts.model_name]()
예제 #4
0
def train_model(data_pack, num_epochs, learning_rate, num_words, dim_embedding, num_classes, model_name):
    train_X, train_y, valid_X, valid_y, test_X, test_y = data_pack

    if model_name == "Baseline-BoW":
        model = Bag_of_Words(num_words, num_classes)
    elif model_name == "Baseline-AvEmbedding":
        model = Baseline(num_words, dim_embedding, num_classes)
    elif model_name == "Shallow-CNN":
        n_filters = [40, 40]
        model = CNN(num_words, dim_embedding, num_classes, n_filters)
    elif model_name == "Deep-CNN":
        n_filters = [40, 48, 72, 48]
        model = CNN_Deep(num_words, dim_embedding, num_classes, n_filters)
    elif model_name == "Shallow-LSTM":
        memory_size = 100
        model = LSTM(num_words, dim_embedding, num_classes, memory_size)
    elif model_name == "Deep-LSTM":
        memory_size = 100
        model = LSTM_Deep(num_words, dim_embedding, num_classes, memory_size)
    elif model_name == "Shallow-CNN-CE":
        n_filters = [40, 40]
        model = CE_CNN(dim_embedding, num_classes, n_filters)
    elif model_name == "Deep-CNN-CE":
        n_filters = [40, 48, 72, 48]
        model = CE_CNN_Deep(dim_embedding, num_classes, n_filters)
    elif model_name == "Block-CNN-CE":
        n_filters = [64, 128, 256, 512]
        model = CE_CNN_Block(dim_embedding, num_classes, n_filters)
    elif model_name == "ResNet-CE":
        n_filters = [64, 128, 256, 512]
        model = CE_ResNet(dim_embedding, num_classes, n_filters)
    model.cuda()
    # n_filters = [15, 20, 40]
    # model = CNN_Deep(num_words, dim_embedding, num_classes, n_filters)

    max_train, max_val, max_test = 0, 0, 0
    min_train, min_val, min_test = 10, 10, 10
    model = torch.load(model_name + ".pt")
    model.cuda()

    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    model.eval()
    criterion = torch.nn.CrossEntropyLoss()
    a = []
    batch_x_one = torch.FloatTensor(batch_size, test_X[0].shape[1], dim_embedding)

    t_acc, output_results = run_example_set(model, criterion, test_X, test_y, batch_x_one=batch_x_one)
    print(output_results)
    results = open("results/example_set_prediction.txt", "w")
    for e in output_results:
        results.write(str(e) + "\n")
    print(str(t_acc))  
예제 #5
0
def run(dataset_train, dataset_dev, dataset_test, model_type, word_embed_size,
        hidden_size, batch_size, use_cuda, n_epochs):

    if model_type == 'base':
        model = Baseline(vocab=dataset_train.vocab,
                         word_embed_size=word_embed_size,
                         hidden_size=hidden_size,
                         use_cuda=use_cuda,
                         inference=False)
    else:
        raise NotImplementedError
    if use_cuda:
        model = model.cuda()

    optim_params = model.parameters()
    optimizer = optim.Adam(optim_params, lr=10**-3)

    print('start training')
    for epoch in range(n_epochs):
        train_loss, tokens, preds, golds = train(dataset_train, model,
                                                 optimizer, batch_size, epoch,
                                                 Phase.TRAIN, use_cuda)

        dev_loss, tokens, preds, golds = train(dataset_dev, model, optimizer,
                                               batch_size, epoch, Phase.DEV,
                                               use_cuda)
        logger = '\t'.join([
            'epoch {}'.format(epoch + 1),
            'TRAIN Loss: {:.9f}'.format(train_loss),
            'DEV Loss: {:.9f}'.format(dev_loss)
        ])
        print('\r' + logger, end='')
    test_loss, tokens, preds, golds = train(dataset_test, model, optimizer,
                                            batch_size, epoch, Phase.TEST,
                                            use_cuda)
    print('====', 'TEST', '=====')
    print_scores(preds, golds)
    output_results(tokens, preds, golds)
예제 #6
0
def embedding_baseline_test(args):
    chosen_params = dict(params)

    results = []

    for rand_emb in [True, False]:
        chosen_params['rand_emb'] = rand_emb
        train_dataset, valid_dataset, test_dataset = data.load_dataset(
            args.train_batch_size,
            args.test_batch_size,
            min_freq=chosen_params['min_freq'])
        embedding = data.generate_embedding_matrix(
            train_dataset.dataset.text_vocab,
            rand=chosen_params['rand_emb'],
            freeze=chosen_params['freeze'])

        result = {}
        for m in ['baseline', 'rnn']:
            if m == 'rnn':
                model = RNN(embedding, chosen_params)
            else:
                model = Baseline(embedding)

            criterion = nn.BCEWithLogitsLoss()
            optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

            for epoch in range(args.epochs):
                print(f'******* epoch: {epoch} *******')
                train(model, train_dataset, optimizer, criterion, args)
                evaluate(model, valid_dataset, criterion, 'Validation')

            acc, f1 = evaluate(model, test_dataset, criterion, 'Test')
            result[m + '_acc_rand_emb' + str(rand_emb)] = acc
            result[m + '_f1_rand_emb' + str(rand_emb)] = f1
        results.append(result)

    with open(os.path.join(SAVE_DIR, 'embedding_baseline.txt'), 'a') as f:
        for res in results:
            print(res, file=f)
예제 #7
0
def main(args):
    # Set up logging
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=False)
    log = util.get_logger(args.save_dir, args.name)
    log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}')
    device, gpu_ids = util.get_available_devices()
    args.batch_size *= max(1, len(gpu_ids))

    # Get embeddings
    log.info('Loading embeddings...')
    word_vectors = util.torch_from_json(args.word_emb_file)
    char_vectors = util.torch_from_json(args.char_emb_file)
    # Get model
    log.info('Building model...')
    nbr_model = 0
    if (args.load_path_baseline):
        model_baseline = Baseline(word_vectors=word_vectors, hidden_size=100)
        model_baseline = nn.DataParallel(model_baseline, gpu_ids)
        log.info(f'Loading checkpoint from {args.load_path_baseline}...')
        model_baseline = util.load_model(model_baseline,
                                         args.load_path_baseline,
                                         gpu_ids,
                                         return_step=False)
        model_baseline = model_baseline.to(device)
        model_baseline.eval()
        nll_meter_baseline = util.AverageMeter()
        nbr_model += 1
        save_prob_baseline_start = []
        save_prob_baseline_end = []

    if (args.load_path_bidaf):
        model_bidaf = BiDAF(word_vectors=word_vectors,
                            char_vectors=char_vectors,
                            char_emb_dim=args.char_emb_dim,
                            hidden_size=args.hidden_size)
        model_bidaf = nn.DataParallel(model_bidaf, gpu_ids)
        log.info(f'Loading checkpoint from {args.load_path_bidaf}...')
        model_bidaf = util.load_model(model_bidaf,
                                      args.load_path_bidaf,
                                      gpu_ids,
                                      return_step=False)
        model_bidaf = model_bidaf.to(device)
        model_bidaf.eval()
        nll_meter_bidaf = util.AverageMeter()
        nbr_model += 1
        save_prob_bidaf_start = []
        save_prob_bidaf_end = []

    if (args.load_path_bidaf_fusion):
        model_bidaf_fu = BiDAF_fus(word_vectors=word_vectors,
                                   char_vectors=char_vectors,
                                   char_emb_dim=args.char_emb_dim,
                                   hidden_size=args.hidden_size)
        model_bidaf_fu = nn.DataParallel(model_bidaf_fu, gpu_ids)
        log.info(f'Loading checkpoint from {args.load_path_bidaf_fusion}...')
        model_bidaf_fu = util.load_model(model_bidaf_fu,
                                         args.load_path_bidaf_fusion,
                                         gpu_ids,
                                         return_step=False)
        model_bidaf_fu = model_bidaf_fu.to(device)
        model_bidaf_fu.eval()
        nll_meter_bidaf_fu = util.AverageMeter()
        nbr_model += 1
        save_prob_bidaf_fu_start = []
        save_prob_bidaf_fu_end = []

    if (args.load_path_qanet):
        model_qanet = QANet(word_vectors=word_vectors,
                            char_vectors=char_vectors,
                            char_emb_dim=args.char_emb_dim,
                            hidden_size=args.hidden_size,
                            n_heads=args.n_heads,
                            n_conv_emb_enc=args.n_conv_emb,
                            n_conv_mod_enc=args.n_conv_mod,
                            n_emb_enc_blocks=args.n_emb_blocks,
                            n_mod_enc_blocks=args.n_mod_blocks,
                            divisor_dim_kqv=args.divisor_dim_kqv)

        model_qanet = nn.DataParallel(model_qanet, gpu_ids)
        log.info(f'Loading checkpoint from {args.load_path_qanet}...')
        model_qanet = util.load_model(model_qanet,
                                      args.load_path_qanet,
                                      gpu_ids,
                                      return_step=False)
        model_qanet = model_qanet.to(device)
        model_qanet.eval()
        nll_meter_qanet = util.AverageMeter()
        nbr_model += 1
        save_prob_qanet_start = []
        save_prob_qanet_end = []

    if (args.load_path_qanet_old):
        model_qanet_old = QANet_old(word_vectors=word_vectors,
                                    char_vectors=char_vectors,
                                    device=device,
                                    char_emb_dim=args.char_emb_dim,
                                    hidden_size=args.hidden_size,
                                    n_heads=args.n_heads,
                                    n_conv_emb_enc=args.n_conv_emb,
                                    n_conv_mod_enc=args.n_conv_mod,
                                    n_emb_enc_blocks=args.n_emb_blocks,
                                    n_mod_enc_blocks=args.n_mod_blocks)

        model_qanet_old = nn.DataParallel(model_qanet_old, gpu_ids)
        log.info(f'Loading checkpoint from {args.load_path_qanet_old}...')
        model_qanet_old = util.load_model(model_qanet_old,
                                          args.load_path_qanet_old,
                                          gpu_ids,
                                          return_step=False)
        model_qanet_old = model_qanet_old.to(device)
        model_qanet_old.eval()
        nll_meter_qanet_old = util.AverageMeter()
        nbr_model += 1
        save_prob_qanet_old_start = []
        save_prob_qanet_old_end = []

    if (args.load_path_qanet_inde):
        model_qanet_inde = QANet_independant_encoder(
            word_vectors=word_vectors,
            char_vectors=char_vectors,
            char_emb_dim=args.char_emb_dim,
            hidden_size=args.hidden_size,
            n_heads=args.n_heads,
            n_conv_emb_enc=args.n_conv_emb,
            n_conv_mod_enc=args.n_conv_mod,
            n_emb_enc_blocks=args.n_emb_blocks,
            n_mod_enc_blocks=args.n_mod_blocks,
            divisor_dim_kqv=args.divisor_dim_kqv)

        model_qanet_inde = nn.DataParallel(model_qanet_inde, gpu_ids)
        log.info(f'Loading checkpoint from {args.load_path_qanet_inde}...')
        model_qanet_inde = util.load_model(model_qanet_inde,
                                           args.load_path_qanet_inde,
                                           gpu_ids,
                                           return_step=False)
        model_qanet_inde = model_qanet_inde.to(device)
        model_qanet_inde.eval()
        nll_meter_qanet_inde = util.AverageMeter()
        nbr_model += 1
        save_prob_qanet_inde_start = []
        save_prob_qanet_inde_end = []

    if (args.load_path_qanet_s_e):
        model_qanet_s_e = QANet_S_E(word_vectors=word_vectors,
                                    char_vectors=char_vectors,
                                    char_emb_dim=args.char_emb_dim,
                                    hidden_size=args.hidden_size,
                                    n_heads=args.n_heads,
                                    n_conv_emb_enc=args.n_conv_emb,
                                    n_conv_mod_enc=args.n_conv_mod,
                                    n_emb_enc_blocks=args.n_emb_blocks,
                                    n_mod_enc_blocks=args.n_mod_blocks,
                                    divisor_dim_kqv=args.divisor_dim_kqv)

        model_qanet_s_e = nn.DataParallel(model_qanet_s_e, gpu_ids)
        log.info(f'Loading checkpoint from {args.load_path_qanet_s_e}...')
        model_qanet_s_e = util.load_model(model_qanet_s_e,
                                          args.load_path_qanet_s_e,
                                          gpu_ids,
                                          return_step=False)
        model_qanet_s_e = model_qanet_s_e.to(device)
        model_qanet_s_e.eval()
        nll_meter_qanet_s_e = util.AverageMeter()
        nbr_model += 1
        save_prob_qanet_s_e_start = []
        save_prob_qanet_s_e_end = []

    # Get data loader
    log.info('Building dataset...')
    record_file = vars(args)[f'{args.split}_record_file']
    dataset = SQuAD(record_file, args.use_squad_v2)
    data_loader = data.DataLoader(dataset,
                                  batch_size=args.batch_size,
                                  shuffle=False,
                                  num_workers=args.num_workers,
                                  collate_fn=collate_fn)

    # Evaluate
    log.info(f'Evaluating on {args.split} split...')
    pred_dict = {}  # Predictions for TensorBoard
    sub_dict = {}  # Predictions for submission
    eval_file = vars(args)[f'{args.split}_eval_file']
    with open(eval_file, 'r') as fh:
        gold_dict = json_load(fh)
    with torch.no_grad(), \
            tqdm(total=len(dataset)) as progress_bar:
        for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in data_loader:
            # Setup for forward
            cw_idxs = cw_idxs.to(device)
            qw_idxs = qw_idxs.to(device)
            cc_idxs = cc_idxs.to(device)
            qc_idxs = qc_idxs.to(device)
            batch_size = cw_idxs.size(0)

            y1, y2 = y1.to(device), y2.to(device)
            l_p1, l_p2 = [], []
            # Forward
            if (args.load_path_baseline):
                log_p1_baseline, log_p2_baseline = model_baseline(
                    cw_idxs, cc_idxs)
                loss_baseline = F.nll_loss(log_p1_baseline, y1) + F.nll_loss(
                    log_p2_baseline, y2)
                nll_meter_baseline.update(loss_baseline.item(), batch_size)
                l_p1 += [log_p1_baseline.exp()]
                l_p2 += [log_p2_baseline.exp()]
                if (args.save_probabilities):
                    save_prob_baseline_start += [
                        log_p1_baseline.exp().detach().cpu().numpy()
                    ]
                    save_prob_baseline_end += [
                        log_p2_baseline.exp().detach().cpu().numpy()
                    ]

            if (args.load_path_qanet):
                log_p1_qanet, log_p2_qanet = model_qanet(
                    cw_idxs, cc_idxs, qw_idxs, qc_idxs)
                loss_qanet = F.nll_loss(log_p1_qanet, y1) + F.nll_loss(
                    log_p2_qanet, y2)
                nll_meter_qanet.update(loss_qanet.item(), batch_size)
                # Get F1 and EM scores
                l_p1 += [log_p1_qanet.exp()]
                l_p2 += [log_p2_qanet.exp()]
                if (args.save_probabilities):
                    save_prob_qanet_start += [
                        log_p1_qanet.exp().detach().cpu().numpy()
                    ]
                    save_prob_qanet_end += [
                        log_p2_qanet.exp().detach().cpu().numpy()
                    ]

            if (args.load_path_qanet_old):
                log_p1_qanet_old, log_p2_qanet_old = model_qanet_old(
                    cw_idxs, cc_idxs, qw_idxs, qc_idxs)
                loss_qanet_old = F.nll_loss(log_p1_qanet_old, y1) + F.nll_loss(
                    log_p2_qanet_old, y2)
                nll_meter_qanet_old.update(loss_qanet_old.item(), batch_size)
                # Get F1 and EM scores
                l_p1 += [log_p1_qanet_old.exp()]
                l_p2 += [log_p2_qanet_old.exp()]
                if (args.save_probabilities):
                    save_prob_qanet_old_start += [
                        log_p1_qanet_old.exp().detach().cpu().numpy()
                    ]
                    save_prob_qanet_old_end += [
                        log_p2_qanet_old.exp().detach().cpu().numpy()
                    ]

            if (args.load_path_qanet_inde):
                log_p1_qanet_inde, log_p2_qanet_inde = model_qanet_inde(
                    cw_idxs, cc_idxs, qw_idxs, qc_idxs)
                loss_qanet_inde = F.nll_loss(
                    log_p1_qanet_inde, y1) + F.nll_loss(log_p2_qanet_inde, y2)
                nll_meter_qanet_inde.update(loss_qanet_inde.item(), batch_size)
                # Get F1 and EM scores
                l_p1 += [log_p1_qanet_inde.exp()]
                l_p2 += [log_p2_qanet_inde.exp()]
                if (args.save_probabilities):
                    save_prob_qanet_inde_start += [
                        log_p1_qanet_inde.exp().detach().cpu().numpy()
                    ]
                    save_prob_qanet_inde_end += [
                        log_p2_qanet_inde.exp().detach().cpu().numpy()
                    ]

            if (args.load_path_qanet_s_e):
                log_p1_qanet_s_e, log_p2_qanet_s_e = model_qanet_s_e(
                    cw_idxs, cc_idxs, qw_idxs, qc_idxs)
                loss_qanet_s_e = F.nll_loss(log_p1_qanet_s_e, y1) + F.nll_loss(
                    log_p2_qanet_s_e, y2)
                nll_meter_qanet_s_e.update(loss_qanet_s_e.item(), batch_size)
                # Get F1 and EM scores
                l_p1 += [log_p1_qanet_s_e.exp()]
                l_p2 += [log_p2_qanet_s_e.exp()]
                if (args.save_probabilities):
                    save_prob_qanet_s_e_start += [
                        log_p1_qanet_s_e.exp().detach().cpu().numpy()
                    ]
                    save_prob_qanet_s_e_end += [
                        log_p2_qanet_s_e.exp().detach().cpu().numpy()
                    ]

            if (args.load_path_bidaf):
                log_p1_bidaf, log_p2_bidaf = model_bidaf(
                    cw_idxs, cc_idxs, qw_idxs, qc_idxs)
                loss_bidaf = F.nll_loss(log_p1_bidaf, y1) + F.nll_loss(
                    log_p2_bidaf, y2)
                nll_meter_bidaf.update(loss_bidaf.item(), batch_size)
                l_p1 += [log_p1_bidaf.exp()]
                l_p2 += [log_p2_bidaf.exp()]
                if (args.save_probabilities):
                    save_prob_bidaf_start += [
                        log_p1_bidaf.exp().detach().cpu().numpy()
                    ]
                    save_prob_bidaf_end += [
                        log_p2_bidaf.exp().detach().cpu().numpy()
                    ]

            if (args.load_path_bidaf_fusion):
                log_p1_bidaf_fu, log_p2_bidaf_fu = model_bidaf_fu(
                    cw_idxs, cc_idxs, qw_idxs, qc_idxs)
                loss_bidaf_fu = F.nll_loss(log_p1_bidaf_fu, y1) + F.nll_loss(
                    log_p2_bidaf_fu, y2)
                nll_meter_bidaf_fu.update(loss_bidaf_fu.item(), batch_size)
                l_p1 += [log_p1_bidaf_fu.exp()]
                l_p2 += [log_p2_bidaf_fu.exp()]
                if (args.save_probabilities):
                    save_prob_bidaf_fu_start += [
                        log_p1_bidaf_fu.exp().detach().cpu().numpy()
                    ]
                    save_prob_bidaf_fu_end += [
                        log_p2_bidaf_fu.exp().detach().cpu().numpy()
                    ]

            p1, p2 = l_p1[0], l_p2[0]
            for i in range(1, nbr_model):
                p1 += l_p1[i]
                p2 += l_p2[i]
            p1 /= nbr_model
            p2 /= nbr_model

            starts, ends = util.discretize(p1, p2, args.max_ans_len,
                                           args.use_squad_v2)

            # Log info
            progress_bar.update(batch_size)
            if args.split != 'test':
                # No labels for the test set, so NLL would be invalid
                if (args.load_path_qanet):
                    progress_bar.set_postfix(NLL=nll_meter_qanet.avg)
                elif (args.load_path_bidaf):
                    progress_bar.set_postfix(NLL=nll_meter_bidaf.avg)
                elif (args.load_path_bidaf_fusion):
                    progress_bar.set_postfix(NLL=nll_meter_bidaf_fu.avg)
                elif (args.load_path_qanet_old):
                    progress_bar.set_postfix(NLL=nll_meter_qanet_old.avg)
                elif (args.load_path_qanet_inde):
                    progress_bar.set_postfix(NLL=nll_meter_qanet_inde.avg)
                elif (args.load_path_qanet_s_e):
                    progress_bar.set_postfix(NLL=nll_meter_qanet_s_e.avg)
                else:
                    progress_bar.set_postfix(NLL=nll_meter_baseline.avg)

            idx2pred, uuid2pred = util.convert_tokens(gold_dict, ids.tolist(),
                                                      starts.tolist(),
                                                      ends.tolist(),
                                                      args.use_squad_v2)
            pred_dict.update(idx2pred)
            sub_dict.update(uuid2pred)

    if (args.save_probabilities):
        if (args.load_path_baseline):
            with open(args.save_dir + "/probs_start", "wb") as fp:  #Pickling
                pickle.dump(save_prob_baseline_start, fp)
            with open(args.save_dir + "/probs_end", "wb") as fp:  #Pickling
                pickle.dump(save_prob_baseline_end, fp)

        if (args.load_path_bidaf):
            with open(args.save_dir + "/probs_start", "wb") as fp:  #Pickling
                pickle.dump(save_prob_bidaf_start, fp)
            with open(args.save_dir + "/probs_end", "wb") as fp:  #Pickling
                pickle.dump(save_prob_bidaf_end, fp)

        if (args.load_path_bidaf_fusion):
            with open(args.save_dir + "/probs_start", "wb") as fp:  #Pickling
                pickle.dump(save_prob_bidaf_fu_start, fp)
            with open(args.save_dir + "/probs_end", "wb") as fp:  #Pickling
                pickle.dump(save_prob_bidaf_fu_end, fp)

        if (args.load_path_qanet):
            with open(args.save_dir + "/probs_start", "wb") as fp:  #Pickling
                pickle.dump(save_prob_qanet_start, fp)
            with open(args.save_dir + "/probs_end", "wb") as fp:  #Pickling
                pickle.dump(save_prob_qanet_end, fp)

        if (args.load_path_qanet_old):
            with open(args.save_dir + "/probs_start", "wb") as fp:  #Pickling
                pickle.dump(save_prob_qanet_old_start, fp)
            with open(args.save_dir + "/probs_end", "wb") as fp:  #Pickling
                pickle.dump(save_prob_qanet_old_end, fp)

        if (args.load_path_qanet_inde):
            with open(args.save_dir + "/probs_start", "wb") as fp:  #Pickling
                pickle.dump(save_prob_qanet_inde_start, fp)
            with open(args.save_dir + "/probs_end", "wb") as fp:  #Pickling
                pickle.dump(save_prob_qanet_inde_end, fp)

        if (args.load_path_qanet_s_e):
            with open(args.save_dir + "/probs_start", "wb") as fp:  #Pickling
                pickle.dump(save_prob_qanet_s_e_start, fp)
            with open(args.save_dir + "/probs_end", "wb") as fp:  #Pickling
                pickle.dump(save_prob_qanet_s_e_end, fp)

    # Log results (except for test set, since it does not come with labels)
    if args.split != 'test':
        results = util.eval_dicts(gold_dict, pred_dict, args.use_squad_v2)
        if (args.load_path_qanet):
            meter_avg = nll_meter_qanet.avg
        elif (args.load_path_bidaf):
            meter_avg = nll_meter_bidaf.avg
        elif (args.load_path_bidaf_fusion):
            meter_avg = nll_meter_bidaf_fu.avg
        elif (args.load_path_qanet_inde):
            meter_avg = nll_meter_qanet_inde.avg
        elif (args.load_path_qanet_s_e):
            meter_avg = nll_meter_qanet_s_e.avg
        elif (args.load_path_qanet_old):
            meter_avg = nll_meter_qanet_old.avg
        else:
            meter_avg = nll_meter_baseline.avg
        results_list = [('NLL', meter_avg), ('F1', results['F1']),
                        ('EM', results['EM'])]
        if args.use_squad_v2:
            results_list.append(('AvNA', results['AvNA']))
        results = OrderedDict(results_list)

        # Log to console
        results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items())
        log.info(f'{args.split.title()} {results_str}')

        # Log to TensorBoard
        tbx = SummaryWriter(args.save_dir)
        util.visualize(tbx,
                       pred_dict=pred_dict,
                       eval_path=eval_file,
                       step=0,
                       split=args.split,
                       num_visuals=args.num_visuals)

    # Write submission file
    sub_path = join(args.save_dir, args.split + '_' + args.sub_file)
    log.info(f'Writing submission file to {sub_path}...')
    with open(sub_path, 'w', newline='', encoding='utf-8') as csv_fh:
        csv_writer = csv.writer(csv_fh, delimiter=',')
        csv_writer.writerow(['Id', 'Predicted'])
        for uuid in sorted(sub_dict):
            csv_writer.writerow([uuid, sub_dict[uuid]])
예제 #8
0
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=1024,
                                               num_workers=0,
                                               shuffle=True)
    return train_loader


if __name__ == "__main__":
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # Assuming that we are on a CUDA machine, this should print a CUDA device:
    epochs = 100
    best_val_loss = 999999
    print(device)
    net = Baseline()
    net.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=0.005, momentum=0.9)
    for epoch in range(epochs):
        with tqdm(total=len(load_dataset(train_path))) as epoch_pbar:
            epoch_pbar.set_description(f'Epoch {epoch}')
            running_loss = 0.0
            running_val_loss = 0.0
            for i, data in enumerate(load_dataset(train_path)):
                # get the inputs; data is a list of [inputs, labels]
                inputs = data[0].to(device)
                labels = data[1].to(device)
                outputs = net(inputs)
                loss = criterion(outputs, labels)
                running_loss += loss.item()
예제 #9
0
if __name__ == '__main__':
    parser = Flags()
    parser.set_arguments()
    FG = parser.parse_args()

    rf = str(FG.cur_fold)

    vis = Visdom(port=FG.vis_port, env=FG.model + '_fold' + rf)
    report = parser.report(end='<br>')
    vis.text(report, win='report f{}'.format(FG.cur_fold))

    torch.cuda.set_device(FG.devices[0])
    device = torch.device(FG.devices[0])

    net = Baseline(FG.ckpt_dir, len(FG.labels))
    # net = Baseline3D(FG.ckpt_dir, len(FG.labels))

    if len(FG.devices) > 1:
        net = torch.nn.DataParallel(net, device_ids=FG.devices)
        print(net.module)
    else:
        print(net)

    optimizer = Adam(net.parameters(), lr=FG.lr, weight_decay=FG.l2_decay)
    scheduler = ExponentialLR(optimizer, gamma=FG.lr_gamma)

    trainloader, testloader = get_dataloader(k=FG.fold,
                                             cur_fold=FG.cur_fold,
                                             modality=FG.modality,
                                             axis=FG.axis,
예제 #10
0
def main(args):
    MaxEpochs = args.epochs
    lr = args.lr
    batchsize = args.batch_size
    curr_model = args.model

    # 3.2 Processing of the data
    TEXT = data.Field(sequential=True, include_lengths=True, tokenize='spacy')
    LABEL = data.Field(sequential=False, use_vocab=False)

    train, val, test = data.TabularDataset.splits(
        path='/Users/jiyun/PycharmProjects/mie324/assign4',
        train='train.tsv',
        validation='validation.tsv',
        test='test.tsv',
        format='tsv',
        skip_header=True,
        fields=[('text', TEXT), ('label', LABEL)])

    train_iter, val_iter, test_iter = data.BucketIterator.splits(
        datasets=(train, val, test),
        sort_key=lambda x: len(x.text),
        sort_within_batch=True,
        repeat=False,
        batch_sizes=(batchsize, batchsize, batchsize),
        device=-1)
    # train_iter, val_iter, test_iter = data.Iterator.splits(datasets=(train, val, test),
    #                                                              sort_key=lambda x: len(x.text), sort_within_batch=True,
    #                                                              repeat=False,
    #                                                              batch_sizes=(batchsize, batchsize, batchsize),
    #                                                              device=-1)

    TEXT.build_vocab(train)
    vocab = TEXT.vocab
    vocab.load_vectors(torchtext.vocab.GloVe(name='6B', dim=100))

    # 5 Training and Evaluation

    loss_fnc = torch.nn.BCELoss()

    base_model = Baseline(100, vocab)
    rnn_model = RNN(100, vocab, 100)
    cnn_model = CNN(100, vocab, 50, [2, 4])

    if curr_model == 'baseline':
        model = base_model
    elif curr_model == 'rnn':
        model = rnn_model
    elif curr_model == 'cnn':
        model = cnn_model

    optimizer = optim.Adam(model.parameters(), lr=lr)

    for epoch in range(MaxEpochs):
        accum_loss = 0
        tot_corr = 0

        for i, batch in enumerate(train_iter):
            label = batch.label
            feats, lengths = batch.text

            optimizer.zero_grad()

            predicts = model(feats, lengths)

            batch_loss = loss_fnc(input=predicts.squeeze(),
                                  target=label.float())
            accum_loss += batch_loss
            batch_loss.backward()
            optimizer.step()

            corr = (predicts > 0.5).squeeze().long() == label
            tot_corr += int(corr.sum())

        train_acc = float(tot_corr) / (batchsize * 100)
        train_loss = accum_loss / (batchsize * 100)
        valid_acc, valid_loss = evaluate(model, val_iter)
        print(
            "Epoch: {} | Train acc: {} | Train loss: {} | Valid acc: {} | Valid loss: {}"
            .format(epoch, train_acc, train_loss, valid_acc, valid_loss))

    print('Finished Training')
    torch.save(model, "model_%s.pt" % (curr_model))
    test_model = torch.load("model_%s.pt" % (curr_model))
    test_acc, test_loss = evaluate(test_model, test_iter)
    # test_acc, test_loss = evaluate(model, test_iter)
    print('Test acc: {} | Test loss: {}'.format(test_acc, test_loss))
예제 #11
0
파일: run.py 프로젝트: mihail911/CioEntails
            goldLabels.append(label)
            s1 = " ".join(leaves(t1))
            s2 = " ".join(leaves(t2))
            modelPredict = model.predict(s1, s2)
            predictions.append(modelPredict)
            count += 1

    accuracy = accuracy_score(predictions, goldLabels)
    print "Accuracy on SICK %s set: %f" % (dataSet, accuracy)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="arguments for CioEntails system")
    parser.add_argument("--model",
                        type=str,
                        default="baseline",
                        help="Name of model to use for system")
    args = parser.parse_args()

    if args.model == "baseline":
        model = Baseline("cosineSimilarity", ["keyword_overlap"])
    elif args.model == "keyword":
        model = Keyword("cosineSimilarity", ["keyword_overlap"])
    elif args.model == "NB":
        model = NaiveBayes("cosineSimilarity", ["keyword_overlap"])

    start = time.time()
    evaluateModel(model, args.model, sick_dev_reader)
    print "Evaluation done in %f seconds" % (time.time() - start)
예제 #12
0
def run_fold(parser, vis):
    devices = parser.args.devices
    parser.args.ckpt_dir = os.path.join('checkpoint', parser.args.model,
                                        'f' + str(parser.args.cur_fold))
    FG = parser.load()
    FG.devices = devices
    print(FG)

    torch.cuda.set_device(FG.devices[0])
    device = torch.device(FG.devices[0])

    net = Baseline(FG.ckpt_dir, len(FG.labels))

    performances = net.load(epoch=None, is_best=True)
    net = net.to(device)

    trainloader, testloader = get_dataloader(k=FG.fold,
                                             cur_fold=FG.cur_fold,
                                             modality=FG.modality,
                                             axis=FG.axis,
                                             labels=FG.labels,
                                             batch_size=FG.batch_size)

    evaluator = create_supervised_evaluator(
        net,
        device=device,
        non_blocking=True,
        prepare_batch=process_ninecrop_batch,
        metrics={
            'sensitivity': Recall(False, mean_over_ninecrop),
            'precision': Precision(False, mean_over_ninecrop),
            'specificity': Specificity(False, mean_over_ninecrop)
        })

    class Tracker(object):
        def __init__(self):
            self.data = []

    outputs = Tracker()
    targets = Tracker()

    @evaluator.on(Events.ITERATION_COMPLETED)
    def transform_ninecrop_output(engine):
        output, target = engine.state.output

        if output.size(0) != target.size(0):
            n = target.size(0)
            npatches = output.size(0) // n
            output = output.view(n, npatches, *output.shape[1:])
            output = torch.mean(output, dim=1)
        outputs.data += [output]
        targets.data += [target]

    evaluator.run(testloader)
    string = 'Fold {}'.format(FG.cur_fold) + '<br>'
    string += 'Epoch {}'.format(performances.pop('epoch')) + '<br>'
    for k in sorted(performances.keys()):
        string += k + ': ' + '{:.4f}'.format(performances[k])
        string += '<br>'

    string += 'pre : ' + str(evaluator.state.metrics['precision']) + '<br>'
    string += 'sen : ' + str(evaluator.state.metrics['sensitivity']) + '<br>'
    string += 'spe : ' + str(evaluator.state.metrics['specificity']) + '<br>'

    vis.text(string, win=FG.model + '_result_fold{}'.format(FG.cur_fold))

    del net
    return outputs.data, targets.data
예제 #13
0
class Trainer(BaseTrainer):
    def __init__(self, config):
        super(Trainer, self).__init__(config)
        self.datamanager = DataManger(config["data"])

        # model
        self.model = Baseline(
            num_classes=self.datamanager.datasource.get_num_classes("train")
        )

        # summary model
        summary(
            self.model,
            input_size=(3, 256, 128),
            batch_size=config["data"]["batch_size"],
            device="cpu",
        )

        # losses
        cfg_losses = config["losses"]
        self.criterion = Softmax_Triplet_loss(
            num_class=self.datamanager.datasource.get_num_classes("train"),
            margin=cfg_losses["margin"],
            epsilon=cfg_losses["epsilon"],
            use_gpu=self.use_gpu,
        )

        self.center_loss = CenterLoss(
            num_classes=self.datamanager.datasource.get_num_classes("train"),
            feature_dim=2048,
            use_gpu=self.use_gpu,
        )

        # optimizer
        cfg_optimizer = config["optimizer"]
        self.optimizer = torch.optim.Adam(
            self.model.parameters(),
            lr=cfg_optimizer["lr"],
            weight_decay=cfg_optimizer["weight_decay"],
        )

        self.optimizer_centerloss = torch.optim.SGD(
            self.center_loss.parameters(), lr=0.5
        )

        # learing rate scheduler
        cfg_lr_scheduler = config["lr_scheduler"]
        self.lr_scheduler = WarmupMultiStepLR(
            self.optimizer,
            milestones=cfg_lr_scheduler["steps"],
            gamma=cfg_lr_scheduler["gamma"],
            warmup_factor=cfg_lr_scheduler["factor"],
            warmup_iters=cfg_lr_scheduler["iters"],
            warmup_method=cfg_lr_scheduler["method"],
        )

        # track metric
        self.train_metrics = MetricTracker("loss", "accuracy")
        self.valid_metrics = MetricTracker("loss", "accuracy")

        # save best accuracy for function _save_checkpoint
        self.best_accuracy = None

        # send model to device
        self.model.to(self.device)

        self.scaler = GradScaler()

        # resume model from last checkpoint
        if config["resume"] != "":
            self._resume_checkpoint(config["resume"])

    def train(self):
        for epoch in range(self.start_epoch, self.epochs + 1):
            result = self._train_epoch(epoch)

            if self.lr_scheduler is not None:
                self.lr_scheduler.step()

            result = self._valid_epoch(epoch)

            # add scalars to tensorboard
            self.writer.add_scalars(
                "Loss",
                {
                    "Train": self.train_metrics.avg("loss"),
                    "Val": self.valid_metrics.avg("loss"),
                },
                global_step=epoch,
            )
            self.writer.add_scalars(
                "Accuracy",
                {
                    "Train": self.train_metrics.avg("accuracy"),
                    "Val": self.valid_metrics.avg("accuracy"),
                },
                global_step=epoch,
            )

            # logging result to console
            log = {"epoch": epoch}
            log.update(result)
            for key, value in log.items():
                self.logger.info("    {:15s}: {}".format(str(key), value))

            # save model
            if (
                self.best_accuracy == None
                or self.best_accuracy < self.valid_metrics.avg("accuracy")
            ):
                self.best_accuracy = self.valid_metrics.avg("accuracy")
                self._save_checkpoint(epoch, save_best=True)
            else:
                self._save_checkpoint(epoch, save_best=False)

            # save logs
            self._save_logs(epoch)

    def _train_epoch(self, epoch):
        """Training step"""
        self.model.train()
        self.train_metrics.reset()
        with tqdm(total=len(self.datamanager.get_dataloader("train"))) as epoch_pbar:
            epoch_pbar.set_description(f"Epoch {epoch}")
            for batch_idx, (data, labels, _) in enumerate(
                self.datamanager.get_dataloader("train")
            ):
                # push data to device
                data, labels = data.to(self.device), labels.to(self.device)

                # zero gradient
                self.optimizer.zero_grad()
                self.optimizer_centerloss.zero_grad()

                with autocast():
                    # forward batch
                    score, feat = self.model(data)

                    # calculate loss and accuracy
                    loss = (
                        self.criterion(score, feat, labels)
                        + self.center_loss(feat, labels) * self.config["losses"]["beta"]
                    )
                    _, preds = torch.max(score.data, dim=1)

                # backward parameters
                # loss.backward()
                self.scaler.scale(loss).backward()

                # backward parameters for center_loss
                for param in self.center_loss.parameters():
                    param.grad.data *= 1.0 / self.config["losses"]["beta"]

                # optimize
                # self.optimizer.step()
                self.scaler.step(self.optimizer)
                self.optimizer_centerloss.step()

                self.scaler.update()

                # update loss and accuracy in MetricTracker
                self.train_metrics.update("loss", loss.item())
                self.train_metrics.update(
                    "accuracy",
                    torch.sum(preds == labels.data).double().item() / data.size(0),
                )

                # update process bar
                epoch_pbar.set_postfix(
                    {
                        "train_loss": self.train_metrics.avg("loss"),
                        "train_acc": self.train_metrics.avg("accuracy"),
                    }
                )
                epoch_pbar.update(1)
        return self.train_metrics.result()

    def _valid_epoch(self, epoch):
        """Validation step"""
        self.model.eval()
        self.valid_metrics.reset()
        with torch.no_grad():
            with tqdm(total=len(self.datamanager.get_dataloader("val"))) as epoch_pbar:
                epoch_pbar.set_description(f"Epoch {epoch}")
                for batch_idx, (data, labels, _) in enumerate(
                    self.datamanager.get_dataloader("val")
                ):
                    # push data to device
                    data, labels = data.to(self.device), labels.to(self.device)

                    with autocast():
                        # forward batch
                        score, feat = self.model(data)

                        # calculate loss and accuracy
                        loss = (
                            self.criterion(score, feat, labels)
                            + self.center_loss(feat, labels)
                            * self.config["losses"]["beta"]
                        )
                        _, preds = torch.max(score.data, dim=1)

                    # update loss and accuracy in MetricTracker
                    self.valid_metrics.update("loss", loss.item())
                    self.valid_metrics.update(
                        "accuracy",
                        torch.sum(preds == labels.data).double().item() / data.size(0),
                    )

                    # update process bar
                    epoch_pbar.set_postfix(
                        {
                            "val_loss": self.valid_metrics.avg("loss"),
                            "val_acc": self.valid_metrics.avg("accuracy"),
                        }
                    )
                    epoch_pbar.update(1)
        return self.valid_metrics.result()

    def _save_checkpoint(self, epoch, save_best=True):
        """save model to file"""
        state = {
            "epoch": epoch,
            "state_dict": self.model.state_dict(),
            "center_loss": self.center_loss.state_dict(),
            "optimizer": self.optimizer.state_dict(),
            "optimizer_centerloss": self.optimizer_centerloss.state_dict(),
            "lr_scheduler": self.lr_scheduler.state_dict(),
            "best_accuracy": self.best_accuracy,
        }
        filename = os.path.join(self.checkpoint_dir, "model_last.pth")
        self.logger.info("Saving last model: model_last.pth ...")
        torch.save(state, filename)
        if save_best:
            filename = os.path.join(self.checkpoint_dir, "model_best.pth")
            self.logger.info("Saving current best: model_best.pth ...")
            torch.save(state, filename)

    def _resume_checkpoint(self, resume_path):
        """Load model from checkpoint"""
        if not os.path.exists(resume_path):
            raise FileExistsError("Resume path not exist!")
        self.logger.info("Loading checkpoint: {} ...".format(resume_path))
        checkpoint = torch.load(resume_path, map_location=self.map_location)
        self.start_epoch = checkpoint["epoch"] + 1
        self.model.load_state_dict(checkpoint["state_dict"])
        self.center_loss.load_state_dict(checkpoint["center_loss"])
        self.optimizer.load_state_dict(checkpoint["optimizer"])
        self.optimizer_centerloss.load_state_dict(checkpoint["optimizer_centerloss"])
        self.lr_scheduler.load_state_dict(checkpoint["lr_scheduler"])
        self.best_accuracy = checkpoint["best_accuracy"]
        self.logger.info(
            "Checkpoint loaded. Resume training from epoch {}".format(self.start_epoch)
        )

    def _save_logs(self, epoch):
        """Save logs from google colab to google drive"""
        if os.path.isdir(self.logs_dir_saved):
            shutil.rmtree(self.logs_dir_saved)
        destination = shutil.copytree(self.logs_dir, self.logs_dir_saved)
예제 #14
0
def train_model(data_pack, num_epochs, learning_rate, num_words, dim_embedding,
                num_classes, model_name):
    train_X, train_y, valid_X, valid_y, test_X, test_y = data_pack

    if model_name == "Baseline-BoW":
        model = Bag_of_Words(num_words, num_classes)
    elif model_name == "Baseline-AvEmbedding":
        model = Baseline(num_words, dim_embedding, num_classes)
    elif model_name == "Shallow-CNN":
        n_filters = [40, 40]
        model = CNN(num_words, dim_embedding, num_classes, n_filters)
    elif model_name == "Deep-CNN":
        n_filters = [40, 48, 72, 48]
        model = CNN_Deep(num_words, dim_embedding, num_classes, n_filters)
    elif model_name == "Shallow-LSTM":
        memory_size = 100
        model = LSTM(num_words, dim_embedding, num_classes, memory_size)
    elif model_name == "Deep-LSTM":
        memory_size = 100
        model = LSTM_Deep(num_words, dim_embedding, num_classes, memory_size)
    elif model_name == "Shallow-CNN-CE":
        n_filters = [40, 40]
        model = CE_CNN(dim_embedding, num_classes, n_filters)
    elif model_name == "Deep-CNN-CE":
        n_filters = [40, 48, 72, 48]
        model = CE_CNN_Deep(dim_embedding, num_classes, n_filters)
    elif model_name == "Block-CNN-CE":
        n_filters = [64, 128, 256, 512]
        model = CE_CNN_Block(dim_embedding, num_classes, n_filters)
    elif model_name == "ResNet-CE":
        n_filters = [64, 128, 256, 512]
        model = CE_ResNet(dim_embedding, num_classes, n_filters)
    model.cuda()
    model = torch.load(model_name + ".pt")
    # n_filters = [15, 20, 40]
    # model = CNN_Deep(num_words, dim_embedding, num_classes, n_filters)

    max_train, max_val, max_test = 0, 0, 0
    min_train, min_val, min_test = 10, 10, 10

    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    model.train()
    criterion = torch.nn.CrossEntropyLoss()
    reduce_size = 0.2
    a = []
    batch_x_one = torch.FloatTensor(batch_size, train_X[0].shape[1],
                                    dim_embedding)
    tt_best = 0
    epoch = 0
    while epoch < num_epochs:
        break
        i = 0
        s1 = np.random.choice(range(len(train_X)),
                              int(reduce_size * len(train_X)),
                              replace=False)
        t1 = time.time()
        while i < len(s1):
            # for each batch......... ????
            optimizer.zero_grad()
            batch_x = train_X[s1[i]]
            batch_y = train_y[s1[i]]
            # print(s1[i])
            # print(len(train_X))
            # print(batch_x.shape)

            batch_x = torch.Tensor(batch_x).type('torch.LongTensor')
            if word_path[0:4] == "char":
                batch_x = torch.unsqueeze(batch_x, 2)

                batch_x_one.zero_()
                batch_x_one.scatter_(2, batch_x, 2)
                batch_x = batch_x_one
            batch_x = batch_x.to("cuda")
            output = model(batch_x)
            batch_y = torch.Tensor(batch_y).type('torch.LongTensor')
            batch_y = batch_y.to("cuda")
            loss = criterion(output, batch_y)
            loss.backward()
            i += 1
        t2 = time.time()
        print(t2 - t1)
        model.eval()

    tt_loss, tt_acc = run_testing(model,
                                  criterion,
                                  test_X,
                                  test_y,
                                  batch_x_one=batch_x_one)

    results = open("results/" + model_name + ".txt", "w")
    for e in [min_train, min_test, min_val, max_train, max_val, max_test]:
        results.write(str(e) + ", ")
    results.close()
예제 #15
0
파일: main.py 프로젝트: prismformore/expAT
    def __init__(self):
        self.log_dir = settings.log_dir
        self.model_dir = settings.model_dir
        ensure_dir(settings.log_dir)
        ensure_dir(settings.model_dir)
        logger.info('set log dir as %s' % settings.log_dir)
        logger.info('set model dir as %s' % settings.model_dir)

        ##################################### Import models ###########################
        self.feature_generator = Baseline(
            last_stride=1, model_path=settings.pretrained_model_path)

        self.feature_embedder_rgb = FeatureEmbedder(2048)
        self.feature_embedder_ir = FeatureEmbedder(2048)
        self.id_classifier = IdClassifier()

        if torch.cuda.is_available():
            self.feature_generator.cuda()
            self.feature_embedder_rgb.cuda()
            self.feature_embedder_ir.cuda()
            self.id_classifier.cuda()

        self.feature_generator = nn.DataParallel(self.feature_generator,
                                                 device_ids=range(
                                                     settings.num_gpu))

        self.feature_embedder_rgb = nn.DataParallel(self.feature_embedder_rgb,
                                                    device_ids=range(
                                                        settings.num_gpu))
        self.feature_embedder_ir = nn.DataParallel(self.feature_embedder_ir,
                                                   device_ids=range(
                                                       settings.num_gpu))
        self.id_classifier = nn.DataParallel(self.id_classifier,
                                             device_ids=range(
                                                 settings.num_gpu))

        ############################# Get Losses & Optimizers #########################
        self.criterion_at = expATLoss()
        self.criterion_identity = CrossEntropyLabelSmoothLoss(
            settings.num_classes, epsilon=0.1)  #torch.nn.CrossEntropyLoss()

        opt_models = [
            self.feature_generator, self.feature_embedder_rgb,
            self.feature_embedder_ir, self.id_classifier
        ]

        def make_optimizer(opt_models):
            train_params = []

            for opt_model in opt_models:
                for key, value in opt_model.named_parameters():
                    if not value.requires_grad:
                        continue
                    lr = settings.BASE_LR
                    weight_decay = settings.WEIGHT_DECAY
                    if "bias" in key:
                        lr = settings.BASE_LR * settings.BIAS_LR_FACTOR
                        weight_decay = settings.WEIGHT_DECAY_BIAS
                    train_params += [{
                        "params": [value],
                        "lr": lr,
                        "weight_decay": weight_decay
                    }]

            optimizer = torch.optim.Adam(train_params)
            return optimizer

        self.optimizer_G = make_optimizer(opt_models)

        self.epoch_count = 0
        self.step = 0
        self.save_steps = settings.save_steps
        self.num_workers = settings.num_workers
        self.writers = {}
        self.dataloaders = {}

        self.sche_G = solver.WarmupMultiStepLR(self.optimizer_G,
                                               milestones=settings.iter_sche,
                                               gamma=0.1)  # default setting
예제 #16
0
    def __init__(self, config):
        super(Trainer, self).__init__(config)
        self.datamanager = DataManger(config["data"])

        # model
        self.model = Baseline(
            num_classes=self.datamanager.datasource.get_num_classes("train")
        )

        # summary model
        summary(
            self.model,
            input_size=(3, 256, 128),
            batch_size=config["data"]["batch_size"],
            device="cpu",
        )

        # losses
        cfg_losses = config["losses"]
        self.criterion = Softmax_Triplet_loss(
            num_class=self.datamanager.datasource.get_num_classes("train"),
            margin=cfg_losses["margin"],
            epsilon=cfg_losses["epsilon"],
            use_gpu=self.use_gpu,
        )

        self.center_loss = CenterLoss(
            num_classes=self.datamanager.datasource.get_num_classes("train"),
            feature_dim=2048,
            use_gpu=self.use_gpu,
        )

        # optimizer
        cfg_optimizer = config["optimizer"]
        self.optimizer = torch.optim.Adam(
            self.model.parameters(),
            lr=cfg_optimizer["lr"],
            weight_decay=cfg_optimizer["weight_decay"],
        )

        self.optimizer_centerloss = torch.optim.SGD(
            self.center_loss.parameters(), lr=0.5
        )

        # learing rate scheduler
        cfg_lr_scheduler = config["lr_scheduler"]
        self.lr_scheduler = WarmupMultiStepLR(
            self.optimizer,
            milestones=cfg_lr_scheduler["steps"],
            gamma=cfg_lr_scheduler["gamma"],
            warmup_factor=cfg_lr_scheduler["factor"],
            warmup_iters=cfg_lr_scheduler["iters"],
            warmup_method=cfg_lr_scheduler["method"],
        )

        # track metric
        self.train_metrics = MetricTracker("loss", "accuracy")
        self.valid_metrics = MetricTracker("loss", "accuracy")

        # save best accuracy for function _save_checkpoint
        self.best_accuracy = None

        # send model to device
        self.model.to(self.device)

        self.scaler = GradScaler()

        # resume model from last checkpoint
        if config["resume"] != "":
            self._resume_checkpoint(config["resume"])
예제 #17
0
파일: main.py 프로젝트: prismformore/expAT
class Session:
    def __init__(self):
        self.log_dir = settings.log_dir
        self.model_dir = settings.model_dir
        ensure_dir(settings.log_dir)
        ensure_dir(settings.model_dir)
        logger.info('set log dir as %s' % settings.log_dir)
        logger.info('set model dir as %s' % settings.model_dir)

        ##################################### Import models ###########################
        self.feature_generator = Baseline(
            last_stride=1, model_path=settings.pretrained_model_path)

        self.feature_embedder_rgb = FeatureEmbedder(2048)
        self.feature_embedder_ir = FeatureEmbedder(2048)
        self.id_classifier = IdClassifier()

        if torch.cuda.is_available():
            self.feature_generator.cuda()
            self.feature_embedder_rgb.cuda()
            self.feature_embedder_ir.cuda()
            self.id_classifier.cuda()

        self.feature_generator = nn.DataParallel(self.feature_generator,
                                                 device_ids=range(
                                                     settings.num_gpu))

        self.feature_embedder_rgb = nn.DataParallel(self.feature_embedder_rgb,
                                                    device_ids=range(
                                                        settings.num_gpu))
        self.feature_embedder_ir = nn.DataParallel(self.feature_embedder_ir,
                                                   device_ids=range(
                                                       settings.num_gpu))
        self.id_classifier = nn.DataParallel(self.id_classifier,
                                             device_ids=range(
                                                 settings.num_gpu))

        ############################# Get Losses & Optimizers #########################
        self.criterion_at = expATLoss()
        self.criterion_identity = CrossEntropyLabelSmoothLoss(
            settings.num_classes, epsilon=0.1)  #torch.nn.CrossEntropyLoss()

        opt_models = [
            self.feature_generator, self.feature_embedder_rgb,
            self.feature_embedder_ir, self.id_classifier
        ]

        def make_optimizer(opt_models):
            train_params = []

            for opt_model in opt_models:
                for key, value in opt_model.named_parameters():
                    if not value.requires_grad:
                        continue
                    lr = settings.BASE_LR
                    weight_decay = settings.WEIGHT_DECAY
                    if "bias" in key:
                        lr = settings.BASE_LR * settings.BIAS_LR_FACTOR
                        weight_decay = settings.WEIGHT_DECAY_BIAS
                    train_params += [{
                        "params": [value],
                        "lr": lr,
                        "weight_decay": weight_decay
                    }]

            optimizer = torch.optim.Adam(train_params)
            return optimizer

        self.optimizer_G = make_optimizer(opt_models)

        self.epoch_count = 0
        self.step = 0
        self.save_steps = settings.save_steps
        self.num_workers = settings.num_workers
        self.writers = {}
        self.dataloaders = {}

        self.sche_G = solver.WarmupMultiStepLR(self.optimizer_G,
                                               milestones=settings.iter_sche,
                                               gamma=0.1)  # default setting

    def tensorboard(self, name):
        self.writers[name] = SummaryWriter(
            os.path.join(self.log_dir, name + '.events'))
        return self.writers[name]

    def write(self, name, out):
        for k, v in out.items():
            self.writers[name].add_scalar(name + '/' + k, v, self.step)

        out['G_lr'] = self.optimizer_G.param_groups[0]['lr']
        out['step'] = self.step
        out['eooch_count'] = self.epoch_count
        outputs = ["{}:{:.4g}".format(k, v) for k, v in out.items()]
        logger.info(name + '--' + ' '.join(outputs))

    def save_checkpoints(self, name):
        ckp_path = os.path.join(self.model_dir, name)
        obj = {
            'feature_generator': self.feature_generator.state_dict(),
            'feature_embedder_rgb': self.feature_embedder_rgb.state_dict(),
            'feature_embedder_ir': self.feature_embedder_ir.state_dict(),
            'id_classifier': self.id_classifier.state_dict(),
            'clock': self.step,
            'epoch_count': self.epoch_count,
            'opt_G': self.optimizer_G.state_dict(),
        }
        torch.save(obj, ckp_path)

    def load_checkpoints(self, name):
        ckp_path = os.path.join(self.model_dir, name)
        try:
            obj = torch.load(ckp_path)
            print('load checkpoint: %s' % ckp_path)
        except FileNotFoundError:
            return
        self.feature_generator.load_state_dict(obj['feature_generator'])
        self.feature_embedder_rgb.load_state_dict(obj['feature_embedder_rgb'])
        self.feature_embedder_ir.load_state_dict(obj['feature_embedder_ir'])
        self.id_classifier.load_state_dict(obj['id_classifier'])
        self.optimizer_G.load_state_dict(obj['opt_G'])
        self.step = obj['clock']
        self.epoch_count = obj['epoch_count']
        self.sche_G.last_epoch = self.step

    def load_checkpoints_delf_init(self, name):
        ckp_path = os.path.join(self.model_dir, name)
        obj = torch.load(ckp_path)
        self.backbone.load_state_dict(obj['backbone'])

    def cal_fea(self, x, domain_mode):
        feat = self.feature_generator(x)
        if domain_mode == 'rgb':
            return self.feature_embedder_rgb(feat)
        elif domain_mode == 'ir':
            return self.feature_embedder_ir(feat)

    def inf_batch(self, batch):
        alpha = settings.alpha
        beta = settings.beta

        anchor_rgb, positive_rgb, negative_rgb, anchor_ir, positive_ir, \
        negative_ir, anchor_label, modality_rgb, modality_ir = batch

        if torch.cuda.is_available():
            anchor_rgb = anchor_rgb.cuda()
            positive_rgb = positive_rgb.cuda()
            negative_rgb = negative_rgb.cuda()
            anchor_ir = anchor_ir.cuda()
            positive_ir = positive_ir.cuda()
            negative_ir = negative_ir.cuda()
            anchor_label = anchor_label.cuda()

        anchor_rgb_features = self.cal_fea(anchor_rgb, 'rgb')
        positive_rgb_features = self.cal_fea(positive_rgb, 'rgb')
        negative_rgb_features = self.cal_fea(negative_rgb, 'rgb')

        anchor_ir_features = self.cal_fea(anchor_ir, 'ir')
        positive_ir_features = self.cal_fea(positive_ir, 'ir')
        negative_ir_features = self.cal_fea(negative_ir, 'ir')

        at_loss_rgb = self.criterion_at.forward(anchor_rgb_features,
                                                positive_ir_features,
                                                negative_ir_features)

        at_loss_ir = self.criterion_at.forward(anchor_ir_features,
                                               positive_rgb_features,
                                               negative_rgb_features)

        at_loss = at_loss_rgb + at_loss_ir

        predicted_id_rgb = self.id_classifier(anchor_rgb_features)
        predicted_id_ir = self.id_classifier(anchor_ir_features)

        identity_loss = self.criterion_identity(predicted_id_rgb, anchor_label) + \
                        self.criterion_identity(predicted_id_ir, anchor_label)

        loss_G = alpha * at_loss + beta * identity_loss

        self.optimizer_G.zero_grad()
        loss_G.backward()
        self.optimizer_G.step()

        self.write('train_stats', {
            'loss_G': loss_G,
            'at_loss': at_loss,
            'identity_loss': identity_loss
        })
예제 #18
0
valid_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=BATCH_SIZE,
                                           num_workers=4,
                                           collate_fn=collate_fn,
                                           sampler=valid_sampler)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=1,
                                          num_workers=4,
                                          shuffle=False)

config = {
    "epochs": 100,
    "device": get_device(),
    "sampling": True,
    "temperature": 1.0,
    "max_sentence_length": 18
}

embedding_dim = 256
hidden_dim = 512
vocab_size = len(vocab)
model = Baseline(embedding_dim, hidden_dim, vocab_size, vanilla=False)

criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=5e-4)

model.cuda()
train(model, optimizer, criterion, train_loader, valid_loader, vocab, config)
test(model, criterion, test_loader, vocab, config)
예제 #19
0
def main(args):
    # Set up logging and devices
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True)
    log = util.get_logger(args.save_dir, args.name)
    tbx = SummaryWriter(args.save_dir)
    device, args.gpu_ids = util.get_available_devices()
    log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}')
    args.batch_size *= max(1, len(args.gpu_ids))

    # Set random seed
    log.info(f'Using random seed {args.seed}...')
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # Get embeddings
    log.info('Loading embeddings...')
    word_vectors = util.torch_from_json(args.word_emb_file)
    char_vectors = util.torch_from_json(args.char_emb_file)
    # Get model
    log.info('Building model...')

    if (args.model == 'baseline'):
        model = Baseline(word_vectors=word_vectors,
                         hidden_size=args.hidden_size,
                         drop_prob=args.drop_prob)
        optimizer = optim.Adadelta(model.parameters(),
                                   args.lr,
                                   weight_decay=args.l2_wd)

    elif (args.model == 'bidaf'):
        model = BiDAF(word_vectors=word_vectors,
                      char_vectors=char_vectors,
                      char_emb_dim=args.char_emb_dim,
                      hidden_size=args.hidden_size,
                      drop_prob=args.drop_prob)
        optimizer = optim.Adadelta(model.parameters(),
                                   args.lr,
                                   weight_decay=args.l2_wd)

    elif (args.model == 'qanet'):
        model = QANet(word_vectors=word_vectors,
                      char_vectors=char_vectors,
                      char_emb_dim=args.char_emb_dim,
                      hidden_size=args.hidden_size,
                      n_conv_emb_enc=args.n_conv_emb,
                      n_conv_mod_enc=args.n_conv_mod,
                      drop_prob_word=0.1,
                      drop_prob_char=0.05,
                      kernel_size_emb_enc_block=7,
                      kernel_size_mod_enc_block=7,
                      n_heads=args.n_heads)
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               betas=(args.beta_1, args.beta_2),
                               eps=args.epsilon,
                               weight_decay=args.l2_wd)

    elif (args.model == 'qanet_out'):
        model = QANet(word_vectors=word_vectors,
                      char_vectors=char_vectors,
                      char_emb_dim=args.char_emb_dim,
                      hidden_size=args.hidden_size,
                      n_conv_emb_enc=args.n_conv_emb,
                      n_conv_mod_enc=args.n_conv_mod,
                      drop_prob_word=0.1,
                      drop_prob_char=0.05,
                      kernel_size_emb_enc_block=7,
                      kernel_size_mod_enc_block=7,
                      n_heads=args.n_heads)
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               betas=(args.beta_1, args.beta_2),
                               eps=args.epsilon,
                               weight_decay=args.l2_wd)

    model = nn.DataParallel(model, args.gpu_ids)
    if args.load_path:
        log.info(f'Loading checkpoint from {args.load_path}...')
        model, step = util.load_model(model, args.load_path, args.gpu_ids)
    else:
        step = 0
    model = model.to(device)
    model.train()
    ema = util.EMA(model, args.ema_decay)

    # Get saver
    saver = util.CheckpointSaver(args.save_dir,
                                 max_checkpoints=args.max_checkpoints,
                                 metric_name=args.metric_name,
                                 maximize_metric=args.maximize_metric,
                                 log=log)

    # Get optimizer and scheduler
    scheduler = sched.LambdaLR(optimizer, lambda s: 1.)  # Constant LR

    # Get data loader
    log.info('Building dataset...')
    train_dataset = SQuAD(args.train_record_file, args.use_squad_v2)
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   collate_fn=collate_fn)
    dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2)
    dev_loader = data.DataLoader(dev_dataset,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers,
                                 collate_fn=collate_fn)

    # Train
    log.info('Training...')
    steps_till_eval = args.eval_steps
    epoch = step // len(train_dataset)
    while epoch != args.num_epochs:
        epoch += 1
        log.info(f'Starting epoch {epoch}...')
        with torch.enable_grad(), \
                tqdm(total=len(train_loader.dataset)) as progress_bar:
            for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader:
                # Setup for forward
                cw_idxs = cw_idxs.to(device)
                qw_idxs = qw_idxs.to(device)
                cc_idxs = cc_idxs.to(device)
                qc_idxs = qc_idxs.to(device)
                batch_size = cw_idxs.size(0)
                optimizer.zero_grad()

                # Forward
                log_p1, log_p2 = model(cw_idxs, cc_idxs, qw_idxs, qc_idxs)
                y1, y2 = y1.to(device), y2.to(device)
                loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
                loss_val = loss.item()

                # Backward
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(),
                                         args.max_grad_norm)
                optimizer.step()
                scheduler.step(step // batch_size)
                ema(model, step // batch_size)

                # Log info
                step += batch_size
                progress_bar.update(batch_size)
                progress_bar.set_postfix(epoch=epoch, NLL=loss_val)
                tbx.add_scalar('train/NLL', loss_val, step)
                tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'],
                               step)

                steps_till_eval -= batch_size
                if steps_till_eval <= 0:
                    steps_till_eval = args.eval_steps

                    # Evaluate and save checkpoint
                    log.info(f'Evaluating at step {step}...')
                    ema.assign(model)
                    results, pred_dict = evaluate(model, dev_loader, device,
                                                  args.dev_eval_file,
                                                  args.max_ans_len,
                                                  args.use_squad_v2)
                    saver.save(step, model, results[args.metric_name], device)
                    ema.resume(model)

                    # Log to console
                    results_str = ', '.join(f'{k}: {v:05.2f}'
                                            for k, v in results.items())
                    log.info(f'Dev {results_str}')

                    # Log to TensorBoard
                    log.info('Visualizing in TensorBoard...')
                    for k, v in results.items():
                        tbx.add_scalar(f'dev/{k}', v, step)
                    util.visualize(tbx,
                                   pred_dict=pred_dict,
                                   eval_path=args.dev_eval_file,
                                   step=step,
                                   split='dev',
                                   num_visuals=args.num_visuals)
예제 #20
0
import numpy as np

if __name__ == '__main__':
    parser = Flags()
    parser.set_arguments()
    FG = parser.parse_args()
    c_code, axis, z_dim = FG.c_code, FG.axis, FG.z_dim
    device = torch.device(FG.devices[0])
    torch.cuda.set_device(FG.devices[0])

    nets = []
    for i in range(FG.fold):
        parser.configure('cur_fold', i)
        parser.configure('ckpt_dir')
        FG = parser.load()
        net = Baseline(FG.ckpt_dir, len(FG.labels))
        net.to(device)
        net.load(epoch=None, optimizer=None, is_best=True)
        net.eval()
        nets += [net]

    #G = Generator(FG)
    G = torch.nn.DataParallel(Generator(z_dim, c_code, axis))

    # state_dict = torch.load(os.path.join('BiGAN-info-c4-f', 'G.pth'), 'cpu')
    state_dict = torch.load(os.path.join('157-G8', 'G.pth'), 'cpu')
    G.load_state_dict(state_dict)
    G.to(device)
    G.eval()

    if axis == 1:
예제 #21
0
def main(_):
    # Load MNIST data
    mnist = load_mnist()
    pre_training = FLAGS.pre_train

    # Define the deep learning model
    if FLAGS.model == 'Base':
        pre_training = False
        kernlen = int(FLAGS.frame_size / 2)
        net = Baseline(directory=FLAGS.dir,
                       optimizer=FLAGS.optimizer,
                       learning_rate=FLAGS.learning_rate,
                       layer_sizes=FLAGS.arch,
                       num_features=FLAGS.num_features,
                       num_filters=FLAGS.num_filters,
                       frame_size=FLAGS.frame_size)
    if FLAGS.model == 'Cat':
        kernlen = int(FLAGS.frame_size / 2)
        net = Cat_Net(layer_sizes=FLAGS.arch,
                      optimizer=FLAGS.optimizer,
                      num_filters=FLAGS.num_filters,
                      num_features=FLAGS.num_features,
                      num_samples=FLAGS.num_samples,
                      frame_size=FLAGS.frame_size,
                      num_cat=FLAGS.num_cat,
                      learning_rate=FLAGS.learning_rate,
                      feedback_distance=FLAGS.feedback_distance,
                      directory=FLAGS.dir)
    elif FLAGS.model == 'Gumbel':
        kernlen = int(FLAGS.frame_size / 2)
        net = Gumbel_Net(layer_sizes=FLAGS.arch,
                         optimizer=FLAGS.optimizer,
                         num_filters=FLAGS.num_filters,
                         num_features=FLAGS.num_features,
                         frame_size=FLAGS.frame_size,
                         num_cat=FLAGS.num_cat,
                         learning_rate=FLAGS.learning_rate,
                         feedback_distance=FLAGS.feedback_distance,
                         directory=FLAGS.dir,
                         second_conv=FLAGS.second_conv,
                         initial_tau=FLAGS.initial_tau,
                         tau_decay=FLAGS.tau_decay,
                         reg=FLAGS.reg)
    elif FLAGS.model == 'RawG':
        pre_training = False
        kernlen = 60
        net = Raw_Gumbel_Net(layer_sizes=FLAGS.arch,
                             optimizer=FLAGS.optimizer,
                             num_filters=FLAGS.num_filters,
                             num_features=FLAGS.frame_size**2,
                             frame_size=FLAGS.frame_size,
                             num_cat=FLAGS.num_cat,
                             learning_rate=FLAGS.learning_rate,
                             feedback_distance=FLAGS.feedback_distance,
                             directory=FLAGS.dir,
                             second_conv=FLAGS.second_conv,
                             initial_tau=FLAGS.initial_tau,
                             meta=None)
    elif FLAGS.model == 'RL':
        kernlen = int(FLAGS.frame_size / 2)
        net = Bernoulli_Net(layer_sizes=FLAGS.arch,
                            optimizer=FLAGS.optimizer,
                            num_filters=FLAGS.num_filters,
                            num_features=FLAGS.num_features,
                            num_samples=FLAGS.num_samples,
                            frame_size=FLAGS.frame_size,
                            learning_rate=FLAGS.learning_rate,
                            feedback_distance=FLAGS.feedback_distance,
                            directory=FLAGS.dir,
                            second_conv=FLAGS.second_conv)
    elif FLAGS.model == 'RawB':
        pre_training = True
        kernlen = 60
        net = Raw_Bernoulli_Net(layer_sizes=FLAGS.arch,
                                optimizer=FLAGS.optimizer,
                                num_filters=FLAGS.num_filters,
                                num_features=FLAGS.frame_size**2,
                                num_samples=FLAGS.num_samples,
                                frame_size=FLAGS.frame_size,
                                learning_rate=FLAGS.learning_rate,
                                feedback_distance=FLAGS.feedback_distance,
                                directory=FLAGS.dir,
                                second_conv=FLAGS.second_conv)

    X_train, train_coords = convertCluttered(
        mnist.train.images,
        finalImgSize=FLAGS.frame_size,
        number_patches=FLAGS.number_patches)
    y_train = mnist.train.labels

    train_coords = np.array(
        [gkern(coord[0], coord[1], kernlen=kernlen) for coord in train_coords])

    X_test, test_coords = convertCluttered(mnist.test.images,
                                           finalImgSize=FLAGS.frame_size,
                                           number_patches=FLAGS.number_patches)
    # test_coords = np.array([gkern(coord[0], coord[1], kernlen=20) for coord in test_coords])
    y_test = mnist.test.labels

    batch_size = FLAGS.batch_size
    if pre_training:
        print("Pre-training")
        for epoch in tqdm(range(FLAGS.epochs)):
            _x, _y = input_fn(X_test, y_test, batch_size=batch_size)
            net.evaluate(_x, _y, pre_trainining=True)
            X_train, train_coords = convertCluttered(
                mnist.train.images,
                finalImgSize=FLAGS.frame_size,
                number_patches=FLAGS.number_patches)
            y_train = mnist.train.labels
            # print(net.confusion_matrix(_x, _y))
            net.save()
            X_train, y_train, train_coords = shuffle_in_unison(
                X_train, y_train, train_coords)
            for i in range(0, len(X_train), batch_size):
                _x, _y = input_fn(X_train[i:i + batch_size],
                                  y_train[i:i + batch_size],
                                  batch_size=batch_size)
                net.pre_train(_x, _y, dropout=0.8)

    print("Training")
    for epoch in tqdm(range(FLAGS.epochs)):
        X_train, y_train, train_coords = shuffle_in_unison(
            X_train, y_train, train_coords)
        _x, _y = input_fn(X_test, y_test, batch_size=batch_size)
        net.evaluate(_x, _y)
        X_train, train_coords = convertCluttered(
            mnist.train.images,
            finalImgSize=FLAGS.frame_size,
            number_patches=FLAGS.number_patches)
        y_train = mnist.train.labels
        # print(net.confusion_matrix(_x, _y))
        net.save()
        for i in range(0, len(X_train), batch_size):
            _x, _y = X_train[i:i + batch_size], y_train[i:i + batch_size]
            net.train(_x, _y, dropout=FLAGS.dropout)

    if FLAGS.model == 'RL' or FLAGS.model == 'Gumbel' or FLAGS.model == 'Cat' or FLAGS.model == 'RawB' or FLAGS.model == 'RawG':
        print("Feedback Training")
        for epoch in tqdm(range(FLAGS.epochs)):
            _x, _y = input_fn(X_test, y_test, batch_size=batch_size)
            net.evaluate(_x, _y)
            X_train, train_coords = convertCluttered(
                mnist.train.images,
                finalImgSize=FLAGS.frame_size,
                number_patches=FLAGS.number_patches)
            y_train = mnist.train.labels
            train_coords = np.array([
                gkern(coord[0], coord[1], kernlen=kernlen)
                for coord in train_coords
            ])
            # print(net.confusion_matrix(_x, _y))
            net.save()
            X_train, y_train, train_coords = shuffle_in_unison(
                X_train, y_train, train_coords)
            for i in range(0, len(X_train), batch_size):
                _x, _y, _train_coords = input_fn(X_train,
                                                 y_train,
                                                 train_coords,
                                                 batch_size=batch_size)
                net.feedback_train(_x,
                                   _y,
                                   _train_coords,
                                   dropout=FLAGS.dropout)
예제 #22
0
파일: sdutils.py 프로젝트: jkittley/visen
def getDayBaseline(meter, channel, day, data_type):
    day = day.date()
    try:
        baseline = Baseline.objects.get(date=day, 
                                        sensor=meter,
                                        channel=channel)
        created = False
    except Baseline.DoesNotExist:
        baseline = Baseline(date=day, 
                            sensor=meter,
                            channel=channel,
                            value=0.0)
        created = True
    
    logger.debug('getDayBaseline')
    #powerFactor = 60 * 60.0 / channel.reading_frequency
    
    valid = False
    if not created:
        lastModifiedDay = baseline.last_modified.date()
        if day == date.today():
            if (datetime.now() - baseline.last_modified) < timedelta(hours=1):
                # TODO: check me!
                valid = True
        else: # day is not today
            if lastModifiedDay > day: 
                valid = True
    
    logger.debug('valid: ' + str(valid))
    
    if valid:
        return baseline.value
    else:
        # filter all energy data from the specific reading meter and specific period (1 day)
        filter_energy_objects = SensorReading.objects.filter(
                             sensor=meter, channel=channel).filter(
                             timestamp__gte=day).filter(
                             timestamp__lt=(day+timedelta(days=1)) )

        logger.debug('filter_energy_objects.count(): ' + 
                     str(filter_energy_objects.count()))
        
        if filter_energy_objects.count() > 0:
            energy = [x.value for x in filter_energy_objects]
            
            # hard-coded subset size for moving average calculation
            window_size = ALWAYS_ON_WINDOW_SIZE
            
            mav = moving_average(energy, window_size)
            import numpy as np
            # calculate the moving average using a rectangular window
            window = (np.zeros(int(window_size)) + 1.0) / window_size
            mav = np.convolve(energy, window, 'valid')

            try:
                min_baseline = min( mav )
            except ValueError:
                min_baseline = 0 
        else:
            min_baseline = 0
        
        baseline.value = min_baseline
        try:
            baseline.save()
        except IntegrityError:
            b2 = Baseline.objects.get(date=day, 
                                            sensor=meter,
                                            channel=channel)
            b2.value = min_baseline
            b2.save()
        
        return min_baseline
예제 #23
0
파일: train.py 프로젝트: yhung119/PointNet3
def main():
    saver = utils.Saver(opt)

    # randomize seed
    opt.manualSeed = random.randint(1, 10000)  # fix seed
    random.seed(opt.manualSeed)
    torch.manual_seed(opt.manualSeed)
    torch.cuda.manual_seed_all(opt.manualSeed)

    # load data
    root = "data/modelnet40_ply_hdf5_2048/"  #"data/modelnet40_normal_resampled"#
    use_cuda = torch.cuda.is_available()

    transforms_list = []
    random_permute = utils.Random_permute(opt.num_points, delta=opt.distance)
    # load transformations
    if opt.random_input:
        print("random_input")
        transforms_list.append(random_permute)

    # Load dataset / data loader
    train_dataset = data.ModelNetDataset(
        root,
        train=True,
        sort=opt.sort,
        transform=transforms.Compose(transforms_list),
        distance=opt.distance,
        normal=opt.normal)
    train_loader = DataLoader(train_dataset,
                              batch_size=opt.batchSize,
                              shuffle=True,
                              num_workers=opt.workers)

    test_dataset = data.ModelNetDataset(root,
                                        train=False,
                                        sort=opt.sort,
                                        distance=opt.distance,
                                        normal=opt.normal)
    test_loader = DataLoader(test_dataset,
                             batch_size=opt.batchSize,
                             shuffle=False,
                             num_workers=opt.workers)

    # define model
    ndim = 6 if opt.distance or opt.normal else 3
    if opt.model == 'lstm':
        model = Baseline(input_dim=ndim, maxout=opt.elem_max)
    elif opt.model == 'lstm_mlp':
        model = LSTM_mlp(input_dim=ndim,
                         maxout=opt.elem_max,
                         mlp=[64, 128, 256, 512],
                         fc=[512, 256, 40])
    elif opt.model == 'test':
        model = Test(input_dim=ndim, maxout=opt.elem_max)

    # load speicified pre-trained model
    if opt.path != '':
        model.load_state_dict(torch.load(opt.path))

    # define optimizer and loss function
    optimizer = optim.Adam(model.parameters(),
                           lr=opt.learning_rate,
                           weight_decay=1e-5)
    criterion = nn.CrossEntropyLoss()

    # transfer model and criterion to cuda if exist
    if use_cuda:
        model = model.cuda(
        )  #nn.DataParallel(model).cuda()#model.cuda() #nn.DataParallel(model).cuda()
        criterion = criterion.cuda()

    best_model_wts = model.state_dict()

    early_stopping = utils.Early_stopping(opt.early_stopping, patience=15)

    saver.log_parameters(model.parameters())

    for epoch in range(opt.nepoch):
        adjust_learning_rate(optimizer, epoch, saver)

        train(model, optimizer, criterion, saver, train_loader, epoch)

        test_loss = test(model, criterion, saver, test_loader, epoch)

        early_stopping.update(test_loss)
        if early_stopping.stop():
            break

    saver.save_result()
예제 #24
0
파일: main.py 프로젝트: BadGuy-wang/HSI_BPN
#%%
data, label = load_data(data_path, label_path, 'indian_pines')
#%%
get_value_data(data, label)
#%%
DATA = pd.read_csv('datasets/Indian_pines.csv', header=None).values
data_D = DATA[:, :-1]
data_L = DATA[:, -1]
data_train, data_test, label_train, label_test = train_test_split(
    data_D, data_L, test_size=0.8)
#%%
train_set = GetLoader(data_train, label_train)
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
val_set = GetLoader(data_test, label_test)
val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=False)
#%%
data_p, label_p = next(iter(train_loader))
# print(data_p[:-1])
#%%
net = Baseline(INPUT_CHANNELS, CLASSES, dropout=False)
optimizer = optim.Adam(net.parameters(), lr=0.0001)
weight = torch.ones(CLASSES)
weight[torch.LongTensor([0])] = 0.
w = weight.to(DEVICE)
criterion = nn.CrossEntropyLoss(weight=w)
#%%
train_loss, val_accuracy = train(net, optimizer, criterion, train_loader,
                                 val_loader, EPOCH, DEVICE)

plot_curve(train_loss)
plot_curve(val_accuracy)
예제 #25
0
y = ['close']
wp = data_processor.WindowGenerator(input_width,
                                    label_width,
                                    shift,
                                    train_df=train,
                                    val_df=validate,
                                    test_df=test,
                                    label_columns=y)
print("wp:")
print(wp)
print("============")
for example_inputs, example_labels in wp.train.take(1):
    print(f'Inputs shape (batch, time, features): {example_inputs.shape}')
    print(f'Labels shape (batch, time, features): {example_labels.shape}')

bl = Baseline()

dense = tf.keras.Sequential([
    tf.keras.layers.Dense(units=64, activation='relu'),
    tf.keras.layers.Dense(units=64, activation='relu'),
    tf.keras.layers.Dense(units=1)
])

conv_model = tf.keras.Sequential([
    tf.keras.layers.Conv1D(filters=32, kernel_size=(3, ), activation='relu'),
    tf.keras.layers.Dense(units=32, activation='relu'),
    tf.keras.layers.Dense(units=1),
])

mr = ModelRunner()
val_performance = {}