Example #1
0
def evaluateAll(encoder, decoder, checkpoint_dir, n_iters):
    dataloader = get_dataloader(
        SummarizationDataset("data/finished/test.txt", "data/word2idx.json"))

    encoder.load_state_dict(
        torch.load(os.path.join(checkpoint_dir, 'encoder_4.pth')))
    decoder.load_state_dict(
        torch.load(os.path.join(checkpoint_dir, 'decoder_4.pth')))
    # load_model(encoder, model_dir=checkpoint_dir, appendix='Encoder', iter="l")
    # load_model(decoder, model_dir=checkpoint_dir, appendix='Decoder', iter="l")

    data_iter = iter(dataloader)

    for i in range(1, n_iters):
        try:
            batch = next(data_iter)
        except:
            data_iter = iter(dataloader)
            batch = next(data_iter)

        input_tensor = batch[0][0].to(device)

        output_words, _ = evaluate(encoder, decoder, input_tensor)
        output_sentence = ' '.join(output_words)
        outf = fname + '.' + str(i) + '.txt'
        fmod = open(system_dir + outf, 'w+')
        fmod.write(output_sentence)
        fmod.close()
Example #2
0
def trainIters(encoder,
               decoder,
               n_iters,
               checkpoint_dir,
               print_every=1000,
               plot_every=100,
               learning_rate=0.005,
               save_every=1000):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    dataloader = get_dataloader(
        SummarizationDataset("data/finished/train.txt", "data/word2idx.json"))

    criterion = nn.NLLLoss()
    start_iter = load_model(encoder,
                            model_dir=checkpoint_dir,
                            appendix='Encoder',
                            iter="l")
    start_iter_ = load_model(decoder,
                             model_dir=checkpoint_dir,
                             appendix='Decoder',
                             iter="l")
    assert start_iter == start_iter_

    data_iter = iter(dataloader)

    if start_iter < n_iters:

        for i in range(start_iter, n_iters):
            try:
                batch = next(data_iter)
            except:
                data_iter = iter(dataloader)
                batch = next(data_iter)

            input_tensor = batch[0][0].to(device)
            target_tensor = batch[1][0].to(device)

            loss = train(input_tensor, target_tensor, encoder, decoder,
                         encoder_optimizer, decoder_optimizer, criterion)
            print_loss_total += loss
            plot_loss_total += loss

            if i % print_every == 0:
                print_loss_avg = print_loss_total / print_every
                print_loss_total = 0
                print('(%d %d%%) %.4f' %
                      (i, i / n_iters * 100, print_loss_avg))

            if i % plot_every == 0:
                plot_loss_avg = plot_loss_total / plot_every
                plot_losses.append(plot_loss_avg)
                plot_loss_total = 0

            # Save checkpoint
            # torch.save(encoder.state_dict(), os.path.join(checkpoint_dir, "encoder_{}.pth".format(iter)))
            # torch.save(decoder.state_dict(), os.path.join(checkpoint_dir, "decoder_{}.pth".format(iter)))
            if (i + 1) % save_every == 0:
                save_model(encoder,
                           model_dir=checkpoint_dir,
                           appendix="Encoder",
                           iter=i + 1,
                           save_num=3,
                           save_step=save_every)
                save_model(decoder,
                           model_dir=checkpoint_dir,
                           appendix="Decoder",
                           iter=i + 1,
                           save_num=3,
                           save_step=save_every)
Example #3
0
def main(args):
    def log_string(str):
        logger.info(str)
        print(str)

    cfg = get_config(args)
    '''LOG'''
    log_dir = pjoin(cfg['experiment_dir'], 'log')
    ensure_dirs(log_dir)

    logger = logging.getLogger("TrainModel")
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    file_handler = logging.FileHandler('%s/log.txt' % (log_dir))
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)
    log_string('PARAMETER ...')
    log_string(cfg)
    '''DATA'''
    train_dataloader = get_dataloader(cfg, 'train', shuffle=True)
    test_dataloader = get_dataloader(cfg, 'test')

    if args.use_val is not None:
        val_dataloader = get_dataloader(cfg, args.use_val)
    else:
        val_dataloader = None
    '''TRAINER'''
    trainer = Trainer(cfg, logger)
    start_epoch = trainer.resume()

    def test_all():
        '''testing'''
        test_loss = {}
        for i, data in enumerate(test_dataloader):
            pred_dict, loss_dict = trainer.test(data)
            loss_dict['cnt'] = 1
            add_dict(test_loss, loss_dict)

        cnt = test_loss.pop('cnt')
        log_loss_summary(test_loss, cnt,
                         lambda x, y: log_string('Test {} is {}'.format(x, y)))

        if val_dataloader is not None:
            val_loss = {}
            for i, data in enumerate(val_dataloader):
                pred_dict, loss_dict = trainer.test(data)
                loss_dict['cnt'] = 1
                add_dict(val_loss, loss_dict)

            cnt = val_loss.pop('cnt')
            log_loss_summary(
                val_loss, cnt, lambda x, y: log_string('{} {} is {}'.format(
                    args.use_val, x, y)))

    for epoch in range(start_epoch, cfg['total_epoch']):
        trainer.step_epoch()
        train_loss = {}
        '''training'''
        for i, data in enumerate(train_dataloader):
            loss_dict = trainer.update(data)
            loss_dict['cnt'] = 1
            add_dict(train_loss, loss_dict)

        cnt = train_loss.pop('cnt')
        log_loss_summary(
            train_loss, cnt,
            lambda x, y: log_string('Train {} is {}'.format(x, y)))

        if (epoch + 1) % cfg['freq']['save'] == 0:
            trainer.save()

        test_all()
Example #4
0
        # evaluate the performance
        matchings = maxmatching.getMaxMatching(list(learnedLabels), trueLabels)  # dict
        acc = computeAccuracy(learnedLabels, trueLabels, matchings)
        print('Step {} : Accuracy = {}'.format(step, acc))
        cumulativeAccuracy += acc
        # ####################################################3
        #  vis
        # ####################################################
        if verbose != '':
            img = cv2.imread('data/{}'.format(file_path))
            plt.imshow(img[:, :, [2, 1, 0]], alpha=0.5)
            clusterData_2d = np.array(clusterData).reshape(-1, 2)
            colors_list = list(seaborn.xkcd_rgb.keys())
            if verbose=='trueLabel':
                colors = [seaborn.xkcd_rgb[colors_list[int(k)%200*3]] for k in trueLabels]
            else:
                colors = [seaborn.xkcd_rgb[colors_list[int(k) % 200 * 3]] for k in learnedLabels]
            plt.scatter((1 + clusterData_2d[:, 1]) * img.shape[1]/2.0,
                        (1 + clusterData_2d[:, 0]) * img.shape[0]/2.0,
                        marker='x', label='true label', linewidths=2, color=colors)
            plt.pause(3)
            plt.cla()
        # ####################################################
    print('Average Accuracy: {}%'.format(cumulativeAccuracy*1.0/len(dataloader)))
    print('Done!')

if __name__=='__main__':
    args = parser()
    dataloader = get_dataloader(args.dataset)
    tracking(dataloader, verbose=args.verbose)
Example #5
0
def main(args):
    def log_string(str):
        logger.info(str)
        print(str)

    cfg = get_config(args, save=False)

    '''LOG'''
    log_dir = pjoin(cfg['experiment_dir'], 'log')
    ensure_dirs(log_dir)

    logger = logging.getLogger("TestModel")
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    file_handler = logging.FileHandler('%s/log_test.txt' % (log_dir))
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)
    log_string('PARAMETER ...')
    log_string(cfg)

    '''TRAINER'''
    trainer = Trainer(cfg, logger)
    trainer.resume()

    '''testing'''
    save = cfg['save']
    no_eval = cfg['no_eval']

    dataset_name = args.mode_name

    test_dataloader = get_dataloader(cfg, dataset_name)
    test_loss = {'cnt': 0}

    zero_time = time.time()
    time_dict = {'data_proc': 0.0, 'network': 0.0}
    total_frames = 0

    for i, data in tqdm(enumerate(test_dataloader), total=len(test_dataloader), smoothing=0.9):
        num_frames = len(data)
        total_frames += num_frames
        print(f'Trajectory {i}, {num_frames:8} frames****************************')

        start_time = time.time()
        elapse = start_time - zero_time
        time_dict['data_proc'] += elapse
        print(f'Data Preprocessing: {elapse:8.2f}s {num_frames / elapse:8.2f}FPS')

        pred_dict, loss_dict = trainer.test(data, save=save, no_eval=no_eval)

        elapse = time.time() - start_time
        time_dict['network'] += elapse
        print(f'Network Forwarding: {elapse:8.2f}s {num_frames / elapse:8.2f}FPS')

        loss_dict['cnt'] = 1
        add_dict(test_loss, loss_dict)

        zero_time = time.time()

    print(f'Overall, {total_frames:8} frames****************************')
    print(f'Data Preprocessing: {time_dict["data_proc"]:8.2f}s {total_frames / time_dict["data_proc"]:8.2f}FPS')
    print(f'Network Forwarding: {time_dict["network"]:8.2f}s {total_frames / time_dict["network"]:8.2f}FPS')
    if cfg['batch_size'] > 1:
        print(f'PLEASE SET batch_size = 1 TO TEST THE SPEED. CURRENT BATCH_SIZE: cfg["batch_size"]')

    cnt = test_loss.pop('cnt')
    log_loss_summary(test_loss, cnt, lambda x, y: log_string('Test {} is {}'.format(x, y)))
    if save and not no_eval:
        trainer.model.save_per_diff()
def main(args):
    def log_string(str):
        logger.info(str)
        print(str)

    cfg = get_config(args)

    '''LOG'''
    log_dir = pjoin(cfg['experiment_dir'], 'log')
    ensure_dirs(log_dir)

    logger = logging.getLogger("TrainModel")
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    file_handler = logging.FileHandler('%s/log_finetune.txt' % (log_dir))
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)
    log_string('PARAMETER ...')
    log_string(cfg)

    '''DATA'''
    test_dataloader = get_dataloader(cfg, args.use_val, downsampling=args.downsample)

    train_real_dataloader = get_dataloader(cfg, 'real_train', shuffle=True)
    syn_train_len = len(train_real_dataloader) * args.syn_n

    train_syn_dataloader = get_dataloader(cfg, 'train', shuffle=True)
    syn_train_cycle = iter(train_syn_dataloader)
    num_div = len(train_syn_dataloader) // syn_train_len

    '''TRAINER'''
    trainer = Trainer(cfg, logger)
    start_epoch = trainer.resume()

    def test_all():
        '''testing'''
        test_loss = {}
        for i, data in enumerate(test_dataloader):
            pred_dict, loss_dict = trainer.test(data)
            loss_dict['cnt'] = 1
            add_dict(test_loss, loss_dict)

        cnt = test_loss.pop('cnt')
        log_loss_summary(test_loss, cnt, lambda x, y: log_string('real_test {} is {}'.format(x, y)))

    test_all()

    for epoch in range(start_epoch, cfg['total_epoch']):
        trainer.step_epoch()

        '''training'''
        if not args.real_only:
            train_loss = {}
            for i in range(syn_train_len):
                data = next(syn_train_cycle)
                loss_dict = trainer.update(data)
                loss_dict['cnt'] = 1
                add_dict(train_loss, loss_dict)

            cnt = train_loss.pop('cnt')
            log_loss_summary(train_loss, cnt, lambda x, y: log_string('Syn_Train {} is {}'.format(x, y)))

        train_loss = {}
        for i, data in enumerate(train_real_dataloader):
            loss_dict = trainer.update(data)
            loss_dict['cnt'] = 1
            add_dict(train_loss, loss_dict)

        cnt = train_loss.pop('cnt')
        log_loss_summary(train_loss, cnt, lambda x, y: log_string('Real_Train {} is {}'.format(x, y)))

        if (epoch + 1) % cfg['freq']['save'] == 0:
            trainer.save()

        test_all()
        if (epoch + 1) % num_div == 0:
            syn_train_cycle = iter(train_syn_dataloader)