예제 #1
0
def load_all_model(root_dir, device=0):
    model_corpus = []
    for i in range(17):
        config_file = os.path.join(root_dir, str(i), "config.json")
        with open(config_file, 'r') as fin:
            config = json.load(fin)
        args = argparse.Namespace(**config)
        item = []
        for j in range(args.model_num):
            if args.model_type == 'lstm':
                model = models.LSTMModel(args)
            elif args.model_type == 'conv':
                model = models.ConvModel(args)
            elif args.model_type == 'char':
                model = models.CharCNNModel(args)
            elif args.model_type == 'base':
                model = models.BaseModel(args)
            else:
                raise NotImplementedError
            model_path = os.path.join(
                args.checkpoint_path, str(i),
                "%s_%s" % (args.model_type, args.type_suffix),
                "model_%d.pth" % j)
            if not os.path.isfile(model_path):
                print("No model to test")
                exit(1)
            model.load_state_dict(torch.load(model_path))
            model = model.cuda(device)
            model.eval()
            item.append(model)
        model_corpus.append(item)
    return model_corpus
예제 #2
0
def get_model(model_name, **kwargs):
    if model_name == 'lstm':
        return models.LSTMModel(**kwargs)
    if model_name == 'cnn':
        return models.CNNModel(**kwargs)
    if model_name == "split_cnn":
        return models.SplitCNN(**kwargs)
    raise Exception('Invalid Model Type: {}'.format(model_name))
예제 #3
0
def get_lstm_classifier():
    global y_train_df
    # Convert to a format suitable for RNNs
    to_tensor = FunctionTransformer(torch.from_numpy)
    # Type cast to float tensor (our classifier doesn't seem to work with the
    # default double tensor)
    to_float = FunctionTransformer(lambda t: t.type(dtype=torch.FloatTensor))

    _, vector_size = glove_vectorize.get_instance_dims()
    lstm_model = models.LSTMModel(vector_size=vector_size)
    return Pipeline([('to_tensor', to_tensor), ('to_float', to_float),
                     ('lstm', lstm_model.get_sklearn_compatible_estimator())])
예제 #4
0
파일: eval_model.py 프로젝트: yixiangD/bglp
elif config['model'] == 'mlp':
    model = models.MLPModel(input_shape=(config['history_length'], ),
                            nb_output_units=1,
                            nb_hidden_units=config['nb_hidden_units'],
                            nb_layers=config['nb_layers'])
elif config['model'] == 'gru':
    model = models.GRUModel(input_shape=(config['history_length'], 1),
                            nb_output_units=1,
                            nb_hidden_units=config['nb_hidden_units'],
                            nb_layers=config['nb_layers'],
                            dropout=config['dropout'],
                            recurrent_dropout=config['recurrent_dropout'])
elif config['model'] == 'lstm':
    model = models.LSTMModel(input_shape=(config['history_length'], 1),
                             nb_output_units=1,
                             nb_hidden_units=config['nb_hidden_units'],
                             nb_layers=config['nb_layers'],
                             dropout=config['dropout'],
                             recurrent_dropout=config['recurrent_dropout'])
elif config['model'] == 'lstm_attention':
    model = models.LSTMAttentionModel(
        input_shape=(config['history_length'], 1),
        nb_output_units=1,
        nb_hidden_units=config['nb_hidden_units'],
        dropout=config['dropout'],
        recurrent_dropout=config['recurrent_dropout'],
        nb_attention_units=config['nb_attention_units'])
elif config["model"] == "seq2seq":
    model = models.Seq2seqModel(input_shape=(config['history_length'], 1),
                                kernel_size=4,
                                n_block=4,
                                nb_hidden_units=64,
예제 #5
0
def train(args, model_id, tb):
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    train_data = MedicalEasyEnsembleDataloader(args.train_data, args.class_id,
                                               args.batch_size, True,
                                               args.num_workers)
    val_data = MedicalEasyEnsembleDataloader(args.val_data, args.class_id,
                                             args.batch_size, False,
                                             args.num_workers)
    if os.path.exists(args.w2v_file):
        embedding = utils.load_embedding(args.w2v_file,
                                         vocab_size=args.vocab_size,
                                         embedding_size=args.embedding_size)
    else:
        embedding = None
    if args.model_type == 'lstm':
        model = models.LSTMModel(args, embedding)
    elif args.model_type == 'conv':
        model = models.ConvModel(args, embedding)
    elif args.model_type == 'char':
        model = models.CharCNNModel(args, embedding)
    elif args.model_type == 'base':
        model = models.BaseModel(args, embedding)
    else:
        raise NotImplementedError
    if os.path.isfile(
            os.path.join(args.checkpoint_path, str(args.class_id),
                         "%s_%s" % (args.model_type, args.type_suffix),
                         "model_%d.pth" % model_id)):
        print("Load %d class %s type %dth model from previous step" %
              (args.class_id, args.model_type, model_id))
        model.load_state_dict(
            torch.load(
                os.path.join(args.checkpoint_path, str(args.class_id),
                             "%s_%s" % (args.model_type, args.type_suffix),
                             "model_%d.pth" % model_id)))
    iteration = 0
    model = model.cuda(args.device)
    model.train()
    optimizer = utils.build_optimizer(args, model)
    loss_func = MultiBceLoss()
    cur_worse = 1000
    bad_times = 0
    for epoch in range(args.epochs):
        if epoch >= args.start_epoch:
            factor = (epoch - args.start_epoch) // args.decay_every
            decay_factor = args.decay_rate**factor
            current_lr = args.lr * decay_factor
            utils.set_lr(optimizer, current_lr)
        # if epoch != 0 and epoch % args.sample_every == 0:
        #     train_data.re_sample()
        for i, data in enumerate(train_data):
            tmp = [
                _.cuda(args.device) if isinstance(_, torch.Tensor) else _
                for _ in data
            ]
            report_ids, sentence_ids, sentence_lengths, output_vec = tmp
            optimizer.zero_grad()
            loss = loss_func(model(sentence_ids, sentence_lengths), output_vec)
            loss.backward()
            train_loss = loss.item()
            optimizer.step()
            iteration += 1
            if iteration % args.print_every == 0:
                print("iter %d epoch %d loss: %.3f" %
                      (iteration, epoch, train_loss))

            if iteration % args.save_every == 0:
                torch.save(
                    model.state_dict(),
                    os.path.join(args.checkpoint_path, str(args.class_id),
                                 "%s_%s" % (args.model_type, args.type_suffix),
                                 "model_%d.pth" % model_id))
                with open(os.path.join(args.checkpoint_path,
                                       str(args.class_id), "config.json"),
                          'w',
                          encoding='utf-8') as config_f:
                    json.dump(vars(args), config_f, indent=2)
                with open(os.path.join(
                        args.checkpoint_path, str(args.class_id),
                        "%s_%s" % (args.model_type, args.type_suffix),
                        "config.json"),
                          'w',
                          encoding='utf-8') as config_f:
                    json.dump(vars(args), config_f, indent=2)
            if iteration % args.val_every == 0:
                val_loss = eval_model(model, loss_func, val_data, epoch)
                tb.add_scalar("model_%d val_loss" % model_id, val_loss,
                              iteration)
                if val_loss > cur_worse:
                    print("Bad Time Appear")
                    cur_worse = val_loss
                    bad_times += 1
                else:
                    cur_worse = val_loss
                    bad_times = 0
                if bad_times > args.patient:
                    print('Early Stop !!!!')
                    return
            if iteration % args.loss_log_every == 0:
                tb.add_scalar("model_%d train_loss" % model_id, loss.item(),
                              iteration)

    print("The train finished")
예제 #6
0
def main(argv):
    parser = argparse.ArgumentParser(
        description='WikiText-2 language modeling')
    parser.add_argument('--batch-size',
                        type=int,
                        default=70,
                        metavar='N',
                        help='input batch size for training (default: 90)'),
    parser.add_argument('--eval-batch-size',
                        type=int,
                        default=50,
                        metavar='N',
                        help='input batch size for training (default: 50)'),
    parser.add_argument('--save-directory',
                        type=str,
                        default='output/wikitext-2',
                        help='output directory')
    parser.add_argument('--model-save-directory',
                        type=str,
                        default='models/',
                        help='output directory')
    parser.add_argument('--epochs',
                        type=int,
                        default=5,
                        metavar='N',
                        help='number of epochs to train')
    parser.add_argument('--base-seq-len',
                        type=int,
                        default=70,
                        metavar='N',
                        help='Batch length'),
    parser.add_argument('--min-seq-len',
                        type=int,
                        default=50,
                        metavar='N',
                        help='minimum batch length'),
    parser.add_argument('--seq-prob',
                        type=int,
                        default=0.99,
                        metavar='N',
                        help='prob of being divided by 2'),
    parser.add_argument('--seq-std',
                        type=int,
                        default=1,
                        metavar='N',
                        help='squence length std'),
    parser.add_argument('--hidden-dim',
                        type=int,
                        default=1150,
                        metavar='N',
                        help='Hidden dim')
    parser.add_argument('--embedding-dim',
                        type=int,
                        default=400,
                        metavar='N',
                        help='Embedding dim')
    parser.add_argument('--lr',
                        type=int,
                        default=20,
                        metavar='N',
                        help='learning rate'),
    parser.add_argument('--weight-decay',
                        type=int,
                        default=2e-6,
                        metavar='N',
                        help='learning rate'),
    parser.add_argument('--tag',
                        type=str,
                        default='lr-1e-2-base.pt',
                        metavar='N',
                        help='learning rate'),
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')

    args = parser.parse_args(argv)
    args.cuda = not args.no_cuda and torch.cuda.is_available()

    # load dataset
    train_data, val_data, vocabulary = (np.load('./dataset/wiki.train.npy'),
                                        np.load('./dataset/wiki.valid.npy'),
                                        np.load('./dataset/vocab.npy'))

    word_count = len(vocabulary)

    model = models.LSTMModel(word_count, args)
    loss_fn = models.CrossEntropyLoss3D()

    checkpoint_path = os.path.join(args.model_save_directory, args.tag)

    if not os.path.exists(checkpoint_path):
        model = models.LSTMModel(word_count, args)
    else:
        print("Using pre-trained model")
        print("*" * 90)
        model = models.LSTMModel(word_count, args)
        checkpoint_path = os.path.join(args.model_save_directory, args.tag)
        model.load_state_dict(torch.load(checkpoint_path))

    if args.cuda:
        model = model.cuda()
        loss_fn = loss_fn.cuda()

    generated = utils.generate(model,
                               sequence_length=10,
                               batch_size=2,
                               stochastic=True,
                               args=args).data.cpu().numpy()
    utils.print_generated(utils.to_text(preds=generated,
                                        vocabulary=vocabulary))
    print('Model: ', model)

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    logging = dict()
    logging['loss'] = []
    logging['train_acc'] = []
    logging['val_loss'] = []

    model.train()

    for epoch in range(args.epochs):

        epoch_time = time.time()
        np.random.shuffle(train_data)
        train_data_ = utils.batchify(
            utils.to_tensor(np.concatenate(train_data)), args.batch_size)
        val_data_ = utils.batchify(utils.to_tensor(np.concatenate(val_data)),
                                   args.eval_batch_size)
        train_data_loader = utils.custom_data_loader(train_data_, args)
        val_data_loader = utils.custom_data_loader(val_data_,
                                                   args,
                                                   evaluation=True)
        # number of words
        train_size = train_data_.size(0) * train_data_.size(1)
        val_size = val_data_.size(0) * val_data_.size(1)

        n_batchs = len(train_data_)
        n_batchs_val = len(val_data_)
        correct = 0
        epoch_loss = 0
        batch_index = 0
        seq_len = 0
        counter = 0
        while (batch_index < n_batchs - 1):

            optimizer.zero_grad()

            X, y, seq_len = next(train_data_loader)

            out = model(X)
            loss = loss_fn(out, y)
            loss.backward()
            # scale lr with respect the size of the seq_len
            utils.adjust_learning_rate(optimizer, args, seq_len)
            torch.nn.utils.clip_grad_norm(model.parameters(), 0.25)

            for p in model.parameters():
                p.data.add_(-args.lr, p.grad.data)

            optimizer.step()
            utils.adjust_learning_rate(optimizer, args, args.base_seq_len)

            epoch_loss += loss.data.sum()
            batch_index += seq_len
            if counter % 30 == 0 and counter != 0:
                print('|batch {:3d}|train loss {:5.2f}|'.format(
                    counter, epoch_loss / counter))

            counter += 1

        train_loss = epoch_loss / counter
        val_loss = validate(model, val_data_loader, loss_fn, n_batchs_val)

        logging['loss'].append(train_loss)
        logging['val_loss'].append(val_loss)
        utils.save_model(model, checkpoint_path)

        print('=' * 83)
        print('|epoch {:3d}|time: {:5.2f}s|valid loss {:5.2f}|'
              'train loss {:8.2f}'.format(epoch + 1,
                                          (time.time() - epoch_time), val_loss,
                                          train_loss))