예제 #1
0
def train():
    # Prepare gym
    env = create_env()
    h, w, c = env.observation_space.shape

    # Prepare models
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model_dir, fn = "./policy_grad", '{}.pth'
    model = Baseline(h, w).to(device)
    model.train()
    optimizer = optim.RMSprop(model.parameters(),
                              lr=LEARN_RATE,
                              weight_decay=WEIGHT_DECAY)

    # Train
    steps_done = 0
    num_episodes = 2000
    episode_rewards = []

    for i_episode in tqdm(range(num_episodes)):
        # Complete 1 episode
        print("Episode {}".format(i_episode + 1))
        i_rewards, i_states, i_actions, steps_done = generate_episode(
            env, model, device, steps_done, episode_rewards)

        # Update model
        optimize_model(device, model, optimizer, i_rewards, i_actions,
                       i_states)

        # Save model every couple episodes
        if (i_episode + 1) % SAVE_EPI == 0:
            path = os.path.join(model_dir, fn.format(episode_rewards[-1]))
            torch.save(model.state_dict(), path)

    print('Complete')
    np.save('./rewards_policy_grad.npy', episode_rewards)

    env.close()
    plt.ioff()
    plt.show()
예제 #2
0
        print(model_name)

        model = EfficientNet.from_name(model_name)

        #model = EfficientNet.from_pretrained(model_name, num_classes=350)
        #summary(model,input_size=(3,224,224))
    else:
        model = Baseline(args.hidden_size, args.output_size)
    optimizer = optim.Adam(model.parameters(), args.learning_rate)
    lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                        patience=1,
                                                        verbose=True)
    criterion = nn.CrossEntropyLoss()  #multi-class classification task

    model = model.to(device)
    model.train()

    # DONOTCHANGE: They are reserved for nsml
    bind_model(model)
    # below the nsml load
    nsml.load(checkpoint='15', session='team_62/airush1/40')
    nsml.save('stillgoing')

    if args.pause:
        nsml.paused(scope=locals())
    if args.mode == "train":
        # Warning: Do not load data before this line
        dataloader = train_dataloader(args.input_size, args.batch_size,
                                      args.num_workers)
        for epoch_idx in range(1, args.epochs + 1):
            total_loss = 0
예제 #3
0
def train(args):
    start_time = time.time()
    device = torch.device('cuda' if args.cuda else 'cpu')

    pprint(args.__dict__)
    interface = FileInterface(**args.__dict__)
    piqa_model = Baseline(**args.__dict__).to(device)

    loss_model = Loss().to(device)
    optimizer = torch.optim.Adam(p for p in piqa_model.parameters()
                                 if p.requires_grad)

    batch_size = args.batch_size
    char_vocab_size = args.char_vocab_size
    glove_vocab_size = args.glove_vocab_size
    word_vocab_size = args.word_vocab_size
    glove_size = args.glove_size
    elmo = args.elmo
    draft = args.draft

    def preprocess(interface_):
        # get data
        print('Loading train and dev data')
        train_examples = load_squad(interface_.train_path, draft=draft)
        dev_examples = load_squad(interface_.test_path, draft=draft)

        # iff creating processor
        print('Loading GloVe')
        glove_words, glove_emb_mat = load_glove(
            glove_size,
            vocab_size=args.glove_vocab_size - 2,
            glove_dir=interface_.glove_dir,
            draft=draft)

        print('Constructing processor')
        processor = SquadProcessor(char_vocab_size,
                                   glove_vocab_size,
                                   word_vocab_size,
                                   elmo=elmo)
        processor.construct(train_examples, glove_words)

        # data loader
        print('Preprocessing datasets')
        train_dataset = tuple(
            processor.preprocess(example) for example in train_examples)
        dev_dataset = tuple(
            processor.preprocess(example) for example in dev_examples)

        print('Creating data loaders')
        train_sampler = SquadSampler(train_dataset,
                                     max_context_size=256,
                                     max_question_size=32,
                                     bucket=True,
                                     shuffle=True)
        train_loader = DataLoader(train_dataset,
                                  batch_size=batch_size,
                                  collate_fn=processor.collate,
                                  sampler=train_sampler)

        dev_sampler = SquadSampler(dev_dataset, bucket=True)
        dev_loader = DataLoader(dev_dataset,
                                batch_size=batch_size,
                                collate_fn=processor.collate,
                                sampler=dev_sampler)

        if args.preload:
            train_loader = tuple(train_loader)
            dev_loader = tuple(dev_loader)

        out = {
            'glove_emb_mat': glove_emb_mat,
            'processor': processor,
            'train_dataset': train_dataset,
            'dev_dataset': dev_dataset,
            'train_loader': train_loader,
            'dev_loader': dev_loader
        }

        return out

    out = interface.cache(
        preprocess,
        interface_=interface) if args.cache else preprocess(interface)
    glove_emb_mat = out['glove_emb_mat']
    processor = out['processor']
    train_dataset = out['train_dataset']
    dev_dataset = out['dev_dataset']
    train_loader = out['train_loader']
    dev_loader = out['dev_loader']

    print("Initializing model weights")
    piqa_model.load_glove(torch.tensor(glove_emb_mat))

    bind_model(interface, processor, piqa_model, optimizer=optimizer)

    step = 0
    best_report = None

    print('Training')
    piqa_model.train()
    for epoch_idx in range(args.epochs):
        for i, train_batch in enumerate(train_loader):
            train_batch = {
                key: val.to(device)
                for key, val in train_batch.items()
            }
            model_output = piqa_model(step=step, **train_batch)
            train_results = processor.postprocess_batch(
                train_dataset, train_batch, model_output)
            train_loss = loss_model(step=step, **model_output, **train_batch)
            train_f1 = float(
                np.mean([result['f1'] for result in train_results]))
            train_em = float(
                np.mean([result['em'] for result in train_results]))

            # optimize
            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()
            step += 1

            # report & eval & save
            if step % args.report_period == 1:
                report = OrderedDict(step=step,
                                     train_loss=train_loss.item(),
                                     train_f1=train_f1,
                                     train_em=train_em,
                                     time=time.time() - start_time)
                interface.report(**report)
                print(', '.join('%s=%.5r' % (s, r) for s, r in report.items()))

            if step % args.eval_save_period == 1:
                with torch.no_grad():
                    piqa_model.eval()
                    loss_model.eval()
                    pred = {}
                    dev_losses, dev_results = [], []
                    for dev_batch, _ in zip(dev_loader,
                                            range(args.eval_steps)):
                        dev_batch = {
                            key: val.to(device)
                            for key, val in dev_batch.items()
                        }
                        model_output = piqa_model(**dev_batch)
                        results = processor.postprocess_batch(
                            dev_dataset, dev_batch, model_output)

                        dev_loss = loss_model(step=step,
                                              **dev_batch,
                                              **model_output)

                        for result in results:
                            pred[result['id']] = result['pred']
                        dev_results.extend(results)
                        dev_losses.append(dev_loss.item())

                    dev_loss = float(np.mean(dev_losses))
                    dev_f1 = float(
                        np.mean([result['f1'] for result in dev_results]))
                    dev_em = float(
                        np.mean([result['em'] for result in dev_results]))

                    report = OrderedDict(step=step,
                                         dev_loss=dev_loss,
                                         dev_f1=dev_f1,
                                         dev_em=dev_em,
                                         time=time.time() - start_time)
                    summary = False
                    if best_report is None or report['dev_f1'] > best_report[
                            'dev_f1']:
                        best_report = report
                        summary = True
                        interface.save(iteration=step)
                        interface.pred(pred)
                    interface.report(summary=summary, **report)
                    print(
                        ', '.join('%s=%.5r' % (s, r)
                                  for s, r in report.items()),
                        '(dev_f1_best=%.5r @%d)' %
                        (best_report['dev_f1'], best_report['step']))
                    piqa_model.train()
                    loss_model.train()

            if step == args.train_steps:
                break
        if step == args.train_steps:
            break