Beispiel #1
0
def run_seq(config, evaluator):
    for i in range(0, 21):
        print(i)
        model = Model(config, evaluator.dataset, str(i))
        # model = torch.nn.DataParallel(model)
        model.to(evaluator.device)
        evaluator.eval(model)
def main(argv):
    (opts, args) = parser.parse_args(argv)
    config = ConfigParser(opts.config)

    if torch.cuda.is_available():
        gpu_ids = np.array(config.general.gpu_ids.split(' ')).astype(np.int)
        device = torch.device('cuda:{0}'.format(gpu_ids[0]))
    else:
        device = torch.device('cpu')

    # torch.cuda.set_device(device)

    raw_df = pd.read_csv(config.dataset.raw_path, sep="\t")
    name_vectorizer = train_tf_idf(MIN_NAME_DF, 'name', raw_df)

    train_loader, dataset = init_dataset(config, DBType.Train, name_vectorizer, raw_df)

    current_iteration_path = os.path.join(config.general.output_path, config.general.current_iteration_file_name)

    if os.path.isfile(current_iteration_path):
        start_epoch, epoch_iteration = np.loadtxt(current_iteration_path, delimiter=',', dtype=int)
        print('resuming from epoch %d at iteration %d' % (start_epoch, epoch_iteration))
    else:
        start_epoch, epoch_iteration = 0, 0

    tmp_start = epoch_iteration
    model = Model(config, dataset)

    # model = torch.nn.DataParallel(model)
    model.train()

    dataset_size = len(dataset)
    logger = Logger(config)
    current_step = start_epoch * dataset_size + epoch_iteration

    steps_counter = 0
    accumulated_loss = 0
    freq_loss = 0

    evaluator = Evaluator(DBType.Validation, config, name_vectorizer, raw_df)
    raw_df = None

    # if start_epoch % config.train.lr_update_freq == 0:
    #     model.update_learning_rate()

    # if len(gpu_ids) > 1:
    #    model = nn.DataParallel(model)

    model.to(device)
    freq_start_time = time.time()

    current_eval = last_eval = 99999999
    tmp_count = 0

    for epoch in range(start_epoch, config.train.num_epochs):
        epoch_start_time = time.time()

        if epoch != start_epoch:
            epoch_iteration = 0

        for i, data in enumerate(train_loader, start=epoch_iteration):
            if steps_counter % 500 == 0:
                print('{} / {}'.format(epoch_iteration, dataset_size))

            current_step += config.train.batch_size
            epoch_iteration += config.train.batch_size

            name = data['name'].to(device)
            cid = data['cid'].to(device)
            c_name = data['c_name'].to(device)
            b_name = data['b_name'].to(device)
            price = data['price'].to(device).unsqueeze(1)
            shipping = data['shipping'].to(device)
            desc = data['desc'].to(device)
            desc_len = data['desc_len'].to(device)

            loss = model(name, cid, c_name, b_name, shipping, desc, desc_len, price)
            loss = torch.mean(loss)

            model.optimizer.zero_grad()
            loss.backward()

            if config.general.clip_grads:
                torch.nn.utils.clip_grad_norm_(model.parameters(), 0.25)

            model.optimizer.step()

            accumulated_loss += loss.item()
            freq_loss += loss.item()

            if (steps_counter % config.general.print_logs_freq == 0) and steps_counter != 0:
                freq_loss = freq_loss / config.general.print_logs_freq

                print('freq_loss {}. time {}'.format(freq_loss, time.time() - freq_start_time))

                losses_dict = {'loss': loss.item(), 'freq_loss': freq_loss}
                logger.dump_current_errors(losses_dict, current_step)

                freq_loss = 0
                freq_start_time = time.time()

            if (steps_counter % config.general.save_checkpoint_freq == 0) and steps_counter != 0:
                print('========== saving model (epoch %d, total_steps %d) =========' % (epoch, current_step))
                model.save('latest')
                np.savetxt(current_iteration_path, (epoch, epoch_iteration), delimiter=',', fmt='%d')

            steps_counter += 1

        print('end of epoch %d / %d \t time taken: %d sec' %
              (epoch, config.train.num_epochs, time.time() - epoch_start_time))

        accumulated_loss = accumulated_loss / (i + 1 - tmp_start)
        tmp_start = 0

        print('accumulated loss {}'.format(accumulated_loss))

        losses_dict = {'accumulated_loss': accumulated_loss}
        logger.dump_current_errors(losses_dict, current_step)

        accumulated_loss = 0
        model.save('latest')
        model.save(str(epoch))

        np.savetxt(current_iteration_path, (epoch + 1, 0), delimiter=',', fmt='%d')

        # if epoch % config.general.eval_epcohs_freq == 0:
        current_eval = evaluator.eval(model, max_iterations=config.train.max_eval_iterations)

        # if epoch % config.train.lr_update_freq == 0:
        if current_eval > last_eval:
            tmp_count += 1

            if tmp_count == 3:
                model.update_learning_rate()
                tmp_count = 0
                last_eval = current_eval
        else:
            tmp_count = 0
            last_eval = current_eval
Beispiel #3
0
def run_once(config, evaluator):
    model = Model(config, evaluator.dataset)
    # model = torch.nn.DataParallel(model)
    model.to(evaluator.device)
    evaluator.eval(model)
Beispiel #4
0
def main():
    seed = 0
    fix_seed(seed)

    data = cr.Dataset(
        data_paths=cfg.data_paths,
        exp_id=cfg.exp_id,
        img_shape=cfg.img_shape,
        img_crop_size=cfg.img_crop_size,
        max_trace=cfg.max_trace_len,
    )

    x_train, x_test, y_train, y_test = data.split_training_test_data(
        test_split=.20, seed=10, for_deep=True)

    trainsets = NNDataset(x_train, y_train, DEVICE)
    testsets = NNDataset(x_test, y_test, DEVICE)
    train_loader = torch.utils.data.DataLoader(trainsets, batch_size=32)
    test_loader = torch.utils.data.DataLoader(testsets, batch_size=32)

    for model_name in MODEL_LIST:
        print(f'\n======== {model_name} ========\n')
        if model_name == 'LSTM':
            model = Model(t_stage='LSTM',
                          device=DEVICE,
                          t_hidden_dim=500,
                          t_output_dim=500,
                          use_cnn_for_trace=False)
        elif model_name == 'CNN_LSTM':
            model = Model(t_stage='LSTM',
                          device=DEVICE,
                          t_hidden_dim=500,
                          t_output_dim=500)
        elif model_name == 'OnlyCNN':
            model = Model(s_stage='CNN', device=DEVICE, block_num=3)
        else:
            model = Model(s_stage=model_name,
                          t_stage='LSTM',
                          device=DEVICE,
                          pretrained=PRETRAINED,
                          block_num=3,
                          t_hidden_dim=500,
                          t_output_dim=500)
        if MODE == 'train':
            score, model = train(model,
                                 model_name,
                                 train_loader,
                                 test_loader,
                                 DEVICE,
                                 log_path=f'{ROOT}/out/{model_name}.txt')
            model = model.to('cpu')
            torch.save(model.state_dict(),
                       f'{ROOT}/best_models/{model_name}.pth')
        elif MODE == 'fps':
            model.eval()
            inputs = (torch.rand(1, 1,
                                 500).to(DEVICE), torch.rand(1, 1, 80,
                                                             80).to(DEVICE))
            t0 = time.time()
            for i in range(100):
                model(inputs)
            with open(f'{ROOT}/out/speed.txt', 'a') as f:
                f.write(f'{model_name}: {100 / (time.time() - t0):.04f} fps\n')
        else:
            raise ValueError