Exemple #1
0
    def __init__(self,
                 hparams=DotDict({
                     'model_type': 'transformer',
                     'ninp': 128,
                     'nhead': 2,
                     'nhid': 512,
                     'nlayers': 2,
                     'tie_layers': True,
                     'tie_encoder_decoder': True,
                     'dropout': 0.1,
                 })):
        super(LanguageModelTrainer, self).__init__()

        self.hparams = hparams if isinstance(hparams, DotDict) \
                        else DotDict(hparams)

        from utils import get_default_tokenizer
        self.vocab_size = get_default_tokenizer()._tokenizer.get_vocab_size()

        self.model_type = hparams.get('model_type', 'transformer')
        assert self.model_type in ['transformer', 'lstm']

        if self.model_type == 'transformer':
            self.model = TransformerModel(ntoken=self.vocab_size, **hparams)
        else:
            self.model = LSTMModel(ntoken=self.vocab_size, **hparams)

        self.batch_size = hparams.get('batch_size', 64)
        self.bptt = hparams.get('bptt', 128)
Exemple #2
0
def main():
    batch_size = 128
    epochs = 100
    maxlen = 300
    model_path = 'models/lstm_model.h5'
    num_words = 40000
    num_label = 2

    x, y = load_dataset('data/amazon_reviews_multilingual_JP_v1_00.tsv')

    x = preprocess_dataset(x)
    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=42)

    vocab = build_vocabulary(x_train, num_words)
    x_train = vocab.texts_to_sequences(x_train)
    x_test = vocab.texts_to_sequences(x_test)
    x_train = pad_sequences(x_train, maxlen=maxlen, truncating='post')
    x_test = pad_sequences(x_test, maxlen=maxlen, truncating='post')

    model = LSTMModel(num_words, num_label, embeddings=None).build()
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['acc'])

    callbacks = [
        EarlyStopping(patience=3),
        ModelCheckpoint(model_path, save_best_only=True)
    ]

    model.fit(x=x_train,
              y=y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_split=0.2,
              callbacks=callbacks,
              shuffle=True)

    model = load_model(model_path)
    api = InferenceAPI(model, vocab, preprocess_dataset)
    y_pred = api.predict_from_sequences(x_test)

    print('precision: {:.4f}'.format(
        precision_score(y_test, y_pred, average='binary')))
    print('recall   : {:.4f}'.format(
        recall_score(y_test, y_pred, average='binary')))
    print('f1   : {:.4f}'.format(f1_score(y_test, y_pred, average='binary')))
## Did it finish?
last_output = " ".join(ss_output.stdout.decode("utf-8").split('\n')[-2:])
if "Saving model in tsv format" not in last_output:
    ## won't save a file if it doesn't finish.
    print('Starspace did not complete. PANIC! \nReverting to default initialization.')
    args.init_embed = 0 ## change the parameter to not use Starspace embeddings later.
'''

# Init models, opt and criterion
if args.model == 'FastText':
    print("Using FastText model")
    model = FastText(len(vocab), args.embeddim, len(label_map), args.cuda)
else:
    print("Using LSTM model")
    model = LSTMModel(len(vocab), args.embeddim, args.hiddendim, label_map,
                      args.batchsize, args.cuda)
crit = nn.BCEWithLogitsLoss()
if args.cuda:
    print("Using cuda")
    torch.cuda.device(args.gpu)
    model.cuda()
    crit.cuda()
else:
    print("Using CPU only")
params = list(model.parameters())
opti = torch.optim.Adam(params, lr=args.lr)
print(model)


def evaluate(model, loader, crit, cuda, bs, num_labels, model_type):
    data_size = len(loader)
Exemple #4
0
 def create_model(self) -> torch.nn.Module:
     return LSTMModel(self.calculate_input_size(),
                      self.calculate_output_size(),
                      self.helper.opt.state_size, self.helper.opt.n_layers)
Exemple #5
0

#Main Loop

while True:
    
    min_test_loss = 1.e6
    
    loss = 0.0
    train_loss_seq = []
    test_loss_seq = []

    if model_type == 'Transformer':
        model = TransformerModel(config)
    elif model_type == 'LSTM':
        model = LSTMModel(config)
    if cuda:
        model = model.cuda()

    optimizer = torch.optim.Adam(model.parameters(),
                             lr=config['train']['learning_rate'],
                             weight_decay=config['train']['weight_decay'])
    criterion = torch.nn.MSELoss()
    
    optimizer.zero_grad()
        
    for it in range(n_iter):
        model.train()
        country = random.choice(train_countries)

        inp, target = get_data_tensor(data, country, measure_mode, output_mode=output_mode, cuda=cuda)
Exemple #6
0
def main():
    # ハイパーパラメータの背一定
    batch_size = 128
    epochs = 100
    maxlen = 300
    # model_path = 'models/rnn_model.h5'
    # model_path = 'models/lstm_model.h5'
    # model_path = 'models/CNN_model.h5'
    model_path = 'models/latm_iniemb_model.h5'
    num_words = 4000
    num_label = 2

    # データ・セットの読み込み
    x, y = load_dataset('data/amazon_reviews_multilingual_JP_v1_00.tsv')

    # データセットの前処理
    x = preprocess_dataset(x)
    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=42)
    vocab = build_vocabulary(x_train, num_words)
    x_train = vocab.texts_to_sequences(x_train)
    x_test = vocab.texts_to_sequences(x_test)
    x_train = pad_sequences(x_train, maxlen=maxlen, truncating='post')
    x_test = pad_sequences(x_test, maxlen=maxlen, truncating='post')

    # 単語分散表現
    wv = load_fasttext('data/cc.ja.300.vec')
    wv = filter_embeddings(wv, vocab.word_index, num_words)

    # モデルの構築
    # model = RNNModel(num_words, num_label, embeddings=None).build()
    model = LSTMModel(num_words, num_label, embeddings=wv).build()
    # model = CNNModel(num_words, num_label, embeddings=None).build()
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['acc'])

    # コールバックの用意
    callbacks = [
        EarlyStopping(patience=3),
        ModelCheckpoint(model_path, save_best_only=True)
    ]

    # モデルの学習
    model.fit(x=x_train,
              y=y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_split=0.2,
              callbacks=callbacks,
              shuffle=True)

    # 予測
    model = load_model(model_path)
    api = InferenceAPI(model, vocab, preprocess_dataset)
    y_pred = api.predict_from_sequences(x_test)
    print('precision: {:.4f}'.format(
        precision_score(y_test, y_pred, average='binary')))
    print('recall: {:.4f}'.format(
        recall_score(y_test, y_pred, average='binary')))
    print('f1: {:.4f}'.format(f1_score(y_test, y_pred, average='binary')))
Exemple #7
0
def launch(model_params, checkpoint_path, device='cuda'):
    print('model_params:\t', model_params)

    max_length = model_params['bptt']

    tokenizer = get_default_tokenizer()

    eos_token = tokenizer.token_to_id('[SEP]')
    eod_token = tokenizer.token_to_id('[DOC_SEP]')
    vocab_size = tokenizer._tokenizer.get_vocab_size()

    assert eos_token is not None, 'Invalid tokenizer files - EOS token cannot be null'

    # Model

    from models import TransformerModel, LSTMModel

    model_type = model_params.get('model_type', 'transformer')
    assert model_type in ['transformer', 'lstm']

    if model_type == 'transformer':
        model = TransformerModel(ntoken=vocab_size, **model_params)
    else:
        model = LSTMModel(ntoken=vocab_size, **model_params)

    model = model.to(device)

    if checkpoint_path and path.exists(checkpoint_path):
        print(f'Loading checkpoint from {checkpoint_path}')
        checkpoint_state = torch.load(checkpoint_path)
        model.load_state_dict(checkpoint_state)

    @torch.no_grad()
    def _generate(input_ids=None,
                  max_length=max_length,
                  do_sample=True,
                  num_beams=5,
                  temperature=1.3,
                  top_k=50,
                  top_p=1.0,
                  repetition_penalty=1.2,
                  eos_token_ids=[eos_token, eod_token],
                  length_penalty=1.0,
                  num_return_sequences=1,
                  vocab_size=vocab_size):
        pad_token_id = 0
        model.eval()

        batch_size = 1
        cur_len = input_ids.shape[1]

        # Expand input to num beams
        input_ids = input_ids.unsqueeze(1).expand(batch_size, num_beams,
                                                  cur_len)
        input_ids = input_ids.contiguous().view(batch_size * num_beams,
                                                cur_len)

        # generated hypotheses
        generated_hyps = [
            BeamHypotheses(num_beams,
                           max_length,
                           length_penalty,
                           early_stopping=False) for _ in range(batch_size)
        ]

        # scores for each sentence in the beam
        beam_scores = torch.zeros((batch_size, num_beams),
                                  dtype=torch.float,
                                  device=input_ids.device)
        beam_scores[:, 1:] = -1e9
        beam_scores = beam_scores.view(-1)  # shape (batch_size * num_beams,)

        # cache compute states
        past = None

        # done sentences
        done = [False for _ in range(batch_size)]

        while cur_len < max_length:

            outputs = model(input_ids.t())
            outputs = outputs.permute(1, 0, 2)
            # print(input_ids)
            # print(torch.argmax(outputs))

            scores = outputs[:, -1, :]

            # repetition penalty (from CTRL paper https://arxiv.org/abs/1909.05858)
            if repetition_penalty != 1.0:
                for i in range(batch_size * num_beams):
                    for previous_token in set(input_ids[i].tolist()):
                        # if score < 0 then repetition penalty has to multiplied to reduce the previous token probability
                        if scores[i, previous_token] < 0:
                            scores[i, previous_token] *= repetition_penalty
                        else:
                            scores[i, previous_token] /= repetition_penalty

            if do_sample:
                # Temperature (higher temperature => more likely to sample low probability tokens)
                if temperature != 1.0:
                    scores = scores / temperature
                # Top-p/top-k filtering
                # min_value = torch.min(scores, dim=-1)[]
                scores = top_k_top_p_filtering(
                    scores, top_k=top_k, top_p=top_p, min_tokens_to_keep=2
                )  # (batch_size * num_beams, vocab_size)
                # Sample 2 next words for each beam (so we have some spare tokens and match output of greedy beam search)

                try:
                    next_words = torch.multinomial(
                        torch.softmax(scores, dim=-1),
                        num_samples=2,
                        replacement=True)  # (batch_size * num_beams, 2)
                except:
                    print((torch.softmax(scores, dim=-1) > 0).sum())
                    raise ValueError()
                # Compute next scores
                _scores = F.log_softmax(
                    scores, dim=-1)  # (batch_size * num_beams, vocab_size)
                _scores = torch.gather(
                    _scores, -1, next_words)  # (batch_size * num_beams, 2)
                next_scores = _scores + beam_scores[:, None].expand_as(
                    _scores)  # (batch_size * num_beams, 2)
                # Match shape of greedy beam search
                next_words = next_words.view(
                    batch_size, 2 * num_beams)  # (batch_size, 2 * num_beams)
                next_scores = next_scores.view(
                    batch_size, 2 * num_beams)  # (batch_size, 2 * num_beams)
            else:
                # do greedy beam search
                scores = F.log_softmax(
                    scores, dim=-1)  # (batch_size * num_beams, vocab_size)
                assert scores.size() == (batch_size * num_beams, vocab_size)
                # Add the log prob of the new beams to the log prob of the beginning of the sequence (sum of logs == log of the product)
                _scores = scores + beam_scores[:, None].expand_as(
                    scores)  # (batch_size * num_beams, vocab_size)
                # re-organize to group the beam together (we are keeping top hypothesis accross beams)
                _scores = _scores.view(
                    batch_size, num_beams *
                    vocab_size)  # (batch_size, num_beams * vocab_size)
                next_scores, next_words = torch.topk(_scores,
                                                     2 * num_beams,
                                                     dim=1,
                                                     largest=True,
                                                     sorted=True)

            assert next_scores.size() == next_words.size() == (batch_size,
                                                               2 * num_beams)

            # next batch beam content
            # list of (batch_size * num_beams) tuple(next hypothesis score, next word, current position in the batch)
            next_batch_beam = []

            # for each sentence
            for batch_ex in range(batch_size):

                # if we are done with this sentence
                done[batch_ex] = done[batch_ex] or generated_hyps[
                    batch_ex].is_done(next_scores[batch_ex].max().item())
                if done[batch_ex]:
                    next_batch_beam.extend([(0, pad_token_id, 0)] *
                                           num_beams)  # pad the batch
                    continue

                # next sentence beam content
                next_sent_beam = []

                # next words for this sentence
                for idx, score in zip(next_words[batch_ex],
                                      next_scores[batch_ex]):

                    # get beam and word IDs
                    beam_id = idx // vocab_size
                    word_id = idx % vocab_size

                    # end of sentence, or next word
                    if word_id.item(
                    ) in eos_token_ids or cur_len + 1 == max_length:
                        generated_hyps[batch_ex].add(
                            input_ids[batch_ex * num_beams +
                                      beam_id, :cur_len].clone(), score.item())
                    else:
                        next_sent_beam.append(
                            (score, word_id, batch_ex * num_beams + beam_id))

                    # the beam for next step is full
                    if len(next_sent_beam) == num_beams:
                        break

                # update next beam content
                assert len(next_sent_beam
                           ) == 0 if cur_len + 1 == max_length else num_beams
                if len(next_sent_beam) == 0:
                    next_sent_beam = [(0, pad_token_id, 0)
                                      ] * num_beams  # pad the batch
                next_batch_beam.extend(next_sent_beam)
                assert len(next_batch_beam) == num_beams * (batch_ex + 1)

            # sanity check / prepare next batch
            assert len(next_batch_beam) == batch_size * num_beams
            beam_scores = beam_scores.new([x[0] for x in next_batch_beam])
            beam_words = input_ids.new([x[1] for x in next_batch_beam])
            beam_idx = input_ids.new([x[2] for x in next_batch_beam])

            # re-order batch
            input_ids = input_ids[beam_idx, :]
            input_ids = torch.cat([input_ids, beam_words.unsqueeze(1)], dim=-1)

            # re-order internal states
            if past:
                reordered_past = []
                for layer_past in past:
                    # get the correct batch idx from layer past batch dim
                    # batch dim of `past` and `mems` is at 2nd position
                    reordered_layer_past = [
                        layer_past[:, i].unsqueeze(1).clone().detach()
                        for i in beam_idx
                    ]
                    reordered_layer_past = torch.cat(reordered_layer_past,
                                                     dim=1)
                    # check that shape matches
                    assert reordered_layer_past.shape == layer_past.shape
                    reordered_past.append(reordered_layer_past)
                past = tuple(reordered_past)

            # update current length
            cur_len = cur_len + 1

            # stop when we are done with each sentence
            if all(done):
                break

        # visualize hypotheses
        # print([len(x) for x in generated_hyps], cur_len)
        # globals().update( locals() );
        # !import code; code.interact(local=vars())
        # for ii in range(batch_size):
        #     for ss, ww in sorted(generated_hyps[ii].hyp, key=lambda x: x[0], reverse=True):
        #         print("%.3f " % ss + " ".join(self.dico[x] for x in ww.tolist()))
        #     print("")

        # select the best hypotheses
        tgt_len = input_ids.new(batch_size)
        best = []

        for i, hypotheses in enumerate(generated_hyps):
            if len(hypotheses.hyp) == 0:
                continue

            best_hyp = max(hypotheses.hyp, key=lambda x: x[0])[1]
            tgt_len[i] = len(best_hyp) + 1  # +1 for the <EOS> symbol
            best.append(best_hyp)

        # generate target batch
        decoded = input_ids.new(batch_size,
                                tgt_len.max().item()).fill_(pad_token_id)
        for i, hypo in enumerate(best):
            decoded[i, :tgt_len[i] - 1] = hypo
            decoded[i, tgt_len[i] - 1] = eos_token_ids[0]

        return decoded

    model_input = LEADING_TEXT

    while True:
        user_prompt = input(' >>> ')

        if user_prompt == 'exit':
            exit()

        else:
            num_return_sequences = 1

            model_input += ' [P0] ' + user_prompt + ' [SEP] [P1] '

            input_ids = tokenizer.encode(model_input).ids
            input_ids = torch.LongTensor(input_ids).unsqueeze(0)
            input_ids = input_ids.to(device)

            output = _generate(input_ids=input_ids,
                               max_length=min(max_length,
                                              input_ids.size(1) + 40))

            if num_return_sequences != 1:
                output = output.view(batch_size, num_return_sequences, -1)

            response = tokenizer.decode(output[0].cpu().tolist(),
                                        skip_special_tokens=False)

            eod_token = '[DOC_SEP]'

            if eod_token in response:
                response = response[response.index(eod_token):]

            start_token = '[P1]'
            sep_token = '[SEP]'

            if start_token in response:
                start_idx = response.index(start_token) + len(start_token) + 1
                response = response[start_idx:]

            if sep_token in response:
                sep_idx = response.index(sep_token)
                response = response[:sep_idx]

            model_input += response + f' {sep_token} '

            print('Bot: ' + response)
def main():
    try:
        os.mkdir(args.snapshot_directory)
    except:
        pass

    images = []
    files = os.listdir(args.dataset_path)
    files.sort()
    for filename in files:
        image = np.load(os.path.join(args.dataset_path, filename))
        image = image / 255
        images.append(image)

    images = np.vstack(images)
    images = images.transpose((0, 3, 1, 2)).astype(np.float32)
    train_dev_split = 0.9
    num_images = images.shape[0]
    num_train_images = int(num_images * train_dev_split)
    num_dev_images = num_images - num_train_images
    images_train = images[:num_train_images]
    images_dev = images[num_dev_images:]

    xp = np
    using_gpu = args.gpu_device >= 0
    if using_gpu:
        cuda.get_device(args.gpu_device).use()
        xp = cp

    hyperparams = HyperParameters(snapshot_directory=args.snapshot_directory)
    hyperparams.print()

    if hyperparams.use_gru:
        model = GRUModel(hyperparams,
                         snapshot_directory=args.snapshot_directory)
    else:
        model = LSTMModel(hyperparams,
                          snapshot_directory=args.snapshot_directory)
    if using_gpu:
        model.to_gpu()

    dataset = draw.data.Dataset(images_dev)
    iterator = draw.data.Iterator(dataset, batch_size=1)

    cols = hyperparams.generator_generation_steps
    figure = plt.figure(figsize=(8, 4 * cols))
    axis_1 = figure.add_subplot(cols, 3, 1)
    axis_1.set_title("Data")

    axis_rec_array = []
    for n in range(cols):
        axis_rec_array.append(figure.add_subplot(cols, 3, n * 3 + 2))

    axis_rec_array[0].set_title("Reconstruction")

    axis_gen_array = []
    for n in range(cols):
        axis_gen_array.append(figure.add_subplot(cols, 3, n * 3 + 3))

    axis_gen_array[0].set_title("Generation")

    for batch_index, data_indices in enumerate(iterator):

        with chainer.using_config("train", False), chainer.using_config(
                "enable_backprop", False):
            x = dataset[data_indices]
            x = to_gpu(x)
            axis_1.imshow(make_uint8(x[0]))

            r_t_array, x_param = model.sample_image_at_each_step_from_posterior(
                x,
                zero_variance=args.zero_variance,
                step_limit=args.step_limit)
            for r_t, axis in zip(r_t_array, axis_rec_array[:-1]):
                r_t = to_cpu(r_t)
                axis.imshow(make_uint8(r_t[0]))

            mu_x, ln_var_x = x_param
            mu_x = to_cpu(mu_x.data)
            axis_rec_array[-1].imshow(make_uint8(mu_x[0]))

            r_t_array, x_param = model.sample_image_at_each_step_from_prior(
                batch_size=1, xp=xp)
            for r_t, axis in zip(r_t_array, axis_gen_array[:-1]):
                r_t = to_cpu(r_t)
                axis.imshow(make_uint8(r_t[0]))

            mu_x, ln_var_x = x_param
            mu_x = to_cpu(mu_x.data)
            axis_gen_array[-1].imshow(make_uint8(mu_x[0]))

            plt.pause(0.01)
Exemple #9
0
def train_and_evaluate(dataset,
                       loss,
                       noise,
                       run=0,
                       num_batch=32,
                       asymmetric=0):

    val_split = 0.1

    if dataset == 'mnist':
        kerasModel = MNISTModel(num_batch=num_batch)
        kerasModel.optimizer = Adagrad()
    elif dataset == 'cifar10_deep':
        kerasModel = CIFAR10Model(num_batch=num_batch, type='deep')
    elif dataset[8:-1] == 'resnet':
        kerasModel = CIFAR10Model(num_batch=num_batch, type=dataset[8:])
    elif dataset == 'cifar100':
        kerasModel = CIFAR100Model(num_batch=num_batch)
    elif dataset == 'imdb':
        kerasModel = IMDBModel(num_batch=num_batch)
        kerasModel.optimizer = Adagrad()
    elif dataset == 'lstm':
        kerasModel = LSTMModel(num_batch=num_batch)
        kerasModel.optimizer = Adagrad(lr=0.001)
    else:
        ValueError('No dataset given.')
        import sys
        sys.exit()

    # an important data-dependent configuration
    if dataset == 'cifar100':
        filter_outlier = False
    else:
        filter_outlier = True

    # the data, shuffled and split between train and test sets
    print('Loading %s ...' % dataset)
    X_train, X_test, y_train, y_test = kerasModel.get_data()
    print('Done.')

    # apply label noise
    if asymmetric == 0:
        y_train, P = noisify_with_P(y_train,
                                    kerasModel.classes,
                                    noise,
                                    random_state=run)
    elif asymmetric == 1:
        if dataset == 'mnist':
            y_train, P = noisify_mnist_asymmetric(y_train,
                                                  noise,
                                                  random_state=run)
        elif dataset == 'cifar100':
            y_train, P = noisify_cifar100_asymmetric(y_train,
                                                     noise,
                                                     random_state=run)
        elif dataset[:7] == 'cifar10':
            y_train, P = noisify_cifar10_asymmetric(y_train,
                                                    noise,
                                                    random_state=run)
        else:  # binary classes
            y_train, P = noisify_binary_asymmetric(y_train,
                                                   noise,
                                                   random_state=run)

    print('T: \n', P)

    # convert class vectors to binary class matrices
    Y_train = to_categorical(y_train, kerasModel.classes)
    Y_test = to_categorical(y_test, kerasModel.classes)

    # keep track of the best model
    model_file = build_file_name('tmp_model/', dataset, loss, noise,
                                 asymmetric, run)

    # this is the case when we post-train changing the loss
    if loss == 'est_backward':

        vanilla_file = build_file_name('tmp_model/', dataset, 'crossentropy',
                                       noise, asymmetric, run)

        if not os.path.isfile(vanilla_file):
            ValueError('Need to train with crossentropy first !')

        # first compile the vanilla_crossentropy model with the saved weights
        kerasModel.build_model('crossentropy', P=None)
        kerasModel.load_model(vanilla_file)

        # estimate P
        est = NoiseEstimator(classifier=kerasModel,
                             alpha=0.0,
                             filter_outlier=filter_outlier)

        # use all X_train
        P_est = est.fit(X_train).predict()
        print('Condition number:', np.linalg.cond(P_est))
        print('T estimated: \n', P)

        # compile the model with the new estimated loss
        kerasModel.build_model('backward', P=P_est)

    elif loss == 'est_forward':
        vanilla_file = build_file_name('tmp_model/', dataset, 'crossentropy',
                                       noise, asymmetric, run)

        if not os.path.isfile(vanilla_file):
            ValueError('Need to train with crossentropy first !')

        # first compile the vanilla_crossentropy model with the saved weights
        kerasModel.build_model('crossentropy', P=None)
        kerasModel.load_model(vanilla_file)

        # estimate P
        est = NoiseEstimator(classifier=kerasModel,
                             alpha=0.0,
                             filter_outlier=filter_outlier)
        # use all X_train
        P_est = est.fit(X_train).predict()
        print('T estimated:', P)

        # compile the model with the new estimated loss
        kerasModel.build_model('forward', P=P_est)

    else:
        # compile the model
        kerasModel.build_model(loss, P)

    # fit the model
    history = kerasModel.fit_model(model_file,
                                   X_train,
                                   Y_train,
                                   validation_split=val_split)

    history_file = build_file_name('history/', dataset, loss, noise,
                                   asymmetric, run)

    # decomment for writing history
    with open(history_file, 'wb') as f:
        pickle.dump(history, f)
        print('History dumped at ' + str(history_file))

    # test
    score = kerasModel.evaluate_model(X_test, Y_test)

    # clean models, unless it is vanilla_crossentropy --to be used by P_est
    if loss != 'crossentropy':
        os.remove(model_file)

    return score
Exemple #10
0
 def create_model(self) -> torch.nn.Module:
     return LSTMModel(self.train_set.in_channels(),  self.train_set.out_channels(),
               self.helper.opt.state_size, self.helper.opt.n_layers)
Exemple #11
0
def main():
    try:
        os.mkdir(args.snapshot_directory)
    except:
        pass

    comm = chainermn.create_communicator()
    device = comm.intra_rank
    cuda.get_device(device).use()
    xp = cp

    images = []
    files = os.listdir(args.dataset_path)
    files.sort()
    subset_size = int(math.ceil(len(files) / comm.size))
    files = deque(files)
    files.rotate(-subset_size * comm.rank)
    files = list(files)[:subset_size]
    for filename in files:
        image = np.load(os.path.join(args.dataset_path, filename))
        image = image / 256
        images.append(image)

    print(comm.rank, files)

    images = np.vstack(images)
    images = images.transpose((0, 3, 1, 2)).astype(np.float32)
    train_dev_split = 0.9
    num_images = images.shape[0]
    num_train_images = int(num_images * train_dev_split)
    num_dev_images = num_images - num_train_images
    images_train = images[:num_train_images]

    # To avoid OpenMPI bug
    # multiprocessing.set_start_method("forkserver")
    # p = multiprocessing.Process(target=print, args=("", ))
    # p.start()
    # p.join()

    hyperparams = HyperParameters()
    hyperparams.chz_channels = args.chz_channels
    hyperparams.generator_generation_steps = args.generation_steps
    hyperparams.generator_share_core = args.generator_share_core
    hyperparams.generator_share_prior = args.generator_share_prior
    hyperparams.generator_share_upsampler = args.generator_share_upsampler
    hyperparams.generator_downsampler_channels = args.generator_downsampler_channels
    hyperparams.inference_share_core = args.inference_share_core
    hyperparams.inference_share_posterior = args.inference_share_posterior
    hyperparams.inference_downsampler_channels = args.inference_downsampler_channels
    hyperparams.batch_normalization_enabled = args.enable_batch_normalization
    hyperparams.use_gru = args.use_gru
    hyperparams.no_backprop_diff_xr = args.no_backprop_diff_xr

    if comm.rank == 0:
        hyperparams.save(args.snapshot_directory)
        hyperparams.print()

    if args.use_gru:
        model = GRUModel(hyperparams,
                         snapshot_directory=args.snapshot_directory)
    else:
        model = LSTMModel(hyperparams,
                          snapshot_directory=args.snapshot_directory)
    model.to_gpu()

    optimizer = AdamOptimizer(model.parameters,
                              lr_i=args.initial_lr,
                              lr_f=args.final_lr,
                              beta_1=args.adam_beta1,
                              communicator=comm)
    if comm.rank == 0:
        optimizer.print()

    num_pixels = images.shape[1] * images.shape[2] * images.shape[3]

    dataset = draw.data.Dataset(images_train)
    iterator = draw.data.Iterator(dataset, batch_size=args.batch_size)

    num_updates = 0

    for iteration in range(args.training_steps):
        mean_kld = 0
        mean_nll = 0
        mean_mse = 0
        start_time = time.time()

        for batch_index, data_indices in enumerate(iterator):
            x = dataset[data_indices]
            x += np.random.uniform(0, 1 / 256, size=x.shape)
            x = to_gpu(x)

            z_t_param_array, x_param, r_t_array = model.sample_z_and_x_params_from_posterior(
                x)

            loss_kld = 0
            for params in z_t_param_array:
                mean_z_q, ln_var_z_q, mean_z_p, ln_var_z_p = params
                kld = draw.nn.functions.gaussian_kl_divergence(
                    mean_z_q, ln_var_z_q, mean_z_p, ln_var_z_p)
                loss_kld += cf.sum(kld)

            loss_sse = 0
            for r_t in r_t_array:
                loss_sse += cf.sum(cf.squared_error(r_t, x))

            mu_x, ln_var_x = x_param

            loss_nll = cf.gaussian_nll(x, mu_x, ln_var_x)

            loss_nll /= args.batch_size
            loss_kld /= args.batch_size
            loss_sse /= args.batch_size
            loss = args.loss_beta * loss_nll + loss_kld + args.loss_alpha * loss_sse

            model.cleargrads()
            loss.backward(loss_scale=optimizer.loss_scale())
            optimizer.update(num_updates, loss_value=float(loss.array))

            num_updates += 1
            mean_kld += float(loss_kld.data)
            mean_nll += float(loss_nll.data)
            mean_mse += float(loss_sse.data) / num_pixels / (
                hyperparams.generator_generation_steps - 1)

            printr(
                "Iteration {}: Batch {} / {} - loss: nll_per_pixel: {:.6f} - mse: {:.6f} - kld: {:.6f} - lr: {:.4e}"
                .format(
                    iteration + 1, batch_index + 1, len(iterator),
                    float(loss_nll.data) / num_pixels + math.log(256.0),
                    float(loss_sse.data) / num_pixels /
                    (hyperparams.generator_generation_steps - 1),
                    float(loss_kld.data), optimizer.learning_rate))

            if comm.rank == 0 and batch_index > 0 and batch_index % 100 == 0:
                model.serialize(args.snapshot_directory)

        if comm.rank == 0:
            model.serialize(args.snapshot_directory)

        if comm.rank == 0:
            elapsed_time = time.time() - start_time
            print(
                "\r\033[2KIteration {} - loss: nll_per_pixel: {:.6f} - mse: {:.6f} - kld: {:.6f} - lr: {:.4e} - elapsed_time: {:.3f} min"
                .format(
                    iteration + 1,
                    mean_nll / len(iterator) / num_pixels + math.log(256.0),
                    mean_mse / len(iterator), mean_kld / len(iterator),
                    optimizer.learning_rate, elapsed_time / 60))
def main(args):
    print(args)
    startime = time.time()
    os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

    # Set hyper-parameters.
    batch_size = 128
    epochs = 100
    maxlen = 300
    model_path = 'models/model_{}.h5'
    num_words = 40000
    num_label = 2

    # Data loading.
    print(return_time(startime), "1. Loading data ...")
    x, y = load_dataset('data/amazon_reviews_multilingual_JP_v1_00.tsv')

    # pre-processing.
    print(return_time(startime), "2. Preprocessing dataset ...")
    x = preprocess_dataset(x)
    x_train, x_test, y_train, y_test = train_test_split(x, y,
                                                        test_size=0.2,
                                                        random_state=42)
    vocab = build_vocabulary(x_train, num_words)
    x_train = vocab.texts_to_sequences(x_train)
    x_test = vocab.texts_to_sequences(x_test)
    x_train = pad_sequences(x_train, maxlen=maxlen, truncating='post')
    x_test = pad_sequences(x_test, maxlen=maxlen, truncating='post')

    # Preparing word embedding.
    if args.loadwv:
        print(return_time(startime), "3. Loading word embedding ...")
        wv_path = 'data/wv_{0}_{1}.npy'.format(maxlen, num_words)
        if os.path.exists(wv_path):
            wv = np.load(wv_path)
            print(return_time(startime), "Loaded word embedding successfully!")
        else:
            print(return_time(startime), "Word embedding file doesn't exist")
            exit()

    else:
        print(return_time(startime), "3. Preparing word embedding ...")
        wv = load_fasttext('data/cc.ja.300.vec.gz')
        wv = filter_embeddings(wv, vocab.word_index, num_words)
        # Saving word embedding.
        if args.savewv:
            wv_path = 'data/wv_{0}_{1}.npy'.format(maxlen, num_words)
            np.save(wv_path, wv)
            print(return_time(startime), "Saved word embedding successfully!", wv_path)

    # Build models.
    models = [
        RNNModel(num_words, num_label, embeddings=None).build(),
        LSTMModel(num_words, num_label, embeddings=None).build(),
        CNNModel(num_words, num_label, embeddings=None).build(),
        RNNModel(num_words, num_label, embeddings=wv).build(),
        LSTMModel(num_words, num_label, embeddings=wv).build(),
        CNNModel(num_words, num_label, embeddings=wv).build(),
        CNNModel(num_words, num_label, embeddings=wv, trainable=False).build()
    ]

    model_names = [
        "RNN-None",
        "LSTM-None",
        "CNN-None",
        "RNN-wv",
        "LSTM-wv",
        "CNN-wv",
        "CNN-wv-notrain"
    ]

    print(return_time(startime), "4. Start training ...")
    for i, model in enumerate(models):
        print("***********************************")
        print(return_time(startime), "Model:", model_names[i])

        model.compile(optimizer='adam',
                      loss='sparse_categorical_crossentropy',
                      metrics=['acc'])

        # Preparing callbacks.
        callbacks = [
            EarlyStopping(patience=3),
            ModelCheckpoint(model_path.format(model_names[i]), save_best_only=True)
        ]

        # Train the model.
        model.fit(x=x_train,
                  y=y_train,
                  batch_size=batch_size,
                  epochs=epochs,
                  validation_split=0.2,
                  callbacks=callbacks,
                  shuffle=True)

        # Inference.
        model = load_model(model_path.format(model_names[i]))
        api = InferenceAPI(model, vocab, preprocess_dataset)
        y_pred = api.predict_from_sequences(x_test)
        print('precision: {:.4f}'.format(precision_score(y_test, y_pred, average='binary')))
        print('recall   : {:.4f}'.format(recall_score(y_test, y_pred, average='binary')))
        print('f1       : {:.4f}'.format(f1_score(y_test, y_pred, average='binary')))