Ejemplo n.º 1
0
 def __init__(self,
              model_container,
              generator,
              num_samples_stats,
              num_display_sentences,
              output_dir=None):
     self.model_container = model_container
     self.output_dir = output_dir
     self.generator = generator
     self.num_samples_stats = num_samples_stats
     self.num_display_sentences = num_display_sentences
     self.decoder = Decoder(beam_width=beam_width)
     print("output directory is ", output_dir)
     if output_dir is not None and not os.path.exists(self.output_dir):
         os.makedirs(self.output_dir)
Ejemplo n.º 2
0
 def __init__(self,
              env,
              other_players,
              n_seats,
              stacks=2500,
              encoding='norm',
              encoder=None,
              decoder=None,
              visualize=False,
              debug=False):
     assert len(other_players) == n_seats - 1
     self.env = env
     self.other_players = other_players
     self.n_seats = n_seats
     self._debug = debug
     self._visualize = visualize
     self._encoder = encoder if not encoder is None else Encoder(
         n_seats, ranking_encoding=encoding)
     self._decoder = decoder if not decoder is None else Decoder()
     self._add_players(n_seats, stacks)
Ejemplo n.º 3
0
        from decoders import gridrnn_Decoder as Decoder
    elif args.dec_type == 'hidden':
        from decoders import Hidden_Decoder as Decoder
    else:
        from decoders import gridrnn_Decoder as Decoder

    if args.d_type == 'dcgan':
        from discriminators import DCGAN_discriminator as Discriminator
    #elif args.d_type == 'hidden':
    else:
        from discriminators import Hidden_discriminator as Discriminator
    #else:
    #from discriminators import DCGAN_discriminator as Discriminator

    generator = Generator(args)
    decoder = Decoder(args)
    discriminator = Discriminator(args)

    if cuda:
        generator.cuda()
        discriminator.cuda()
        decoder.cuda()
        BCELoss.cuda()
        MSELoss.cuda()
    else:
        print('models', generator, discriminator, decoder)

    # Initialize weights
    generator.apply(weights_init_normal)
    discriminator.apply(weights_init_normal)
    decoder.apply(weights_init_normal)
Ejemplo n.º 4
0
                        shuffle=False,
                        collate_fn=dataset.collate_fn)

# iterations per epoch
setattr(args, 'iter_per_epoch',
        math.ceil(dataset.num_data_points[args.split] / args.batch_size))
print("{} iter per epoch.".format(args.iter_per_epoch))

# ----------------------------------------------------------------------------
# setup the model
# ----------------------------------------------------------------------------

encoder = Encoder(model_args)
encoder.load_state_dict(components['encoder'])

decoder = Decoder(model_args, encoder)
decoder.load_state_dict(components['decoder'])
print("Loaded model from {}".format(args.load_path))

if args.gpuid >= 0:
    encoder = encoder.cuda()
    decoder = decoder.cuda()

# ----------------------------------------------------------------------------
# evaluation
# ----------------------------------------------------------------------------

print("Evaluation start time: {}".format(
    datetime.datetime.strftime(datetime.datetime.utcnow(),
                               '%d-%b-%Y-%H:%M:%S')))
encoder.eval()
Ejemplo n.º 5
0
def train(run_name, speaker, start_epoch, stop_epoch, img_c, img_w, img_h,
          frames_n, absolute_max_string_len, minibatch_size):
    DATASET_DIR = os.path.join(CURRENT_PATH, speaker, 'datasets')
    OUTPUT_DIR = os.path.join(CURRENT_PATH, speaker, 'results')
    LOG_DIR = os.path.join(CURRENT_PATH, speaker, 'logs')

    curriculum = Curriculum(curriculum_rules)
    lip_gen = BasicGenerator(dataset_path=DATASET_DIR,
                             minibatch_size=minibatch_size,
                             img_c=img_c,
                             img_w=img_w,
                             img_h=img_h,
                             frames_n=frames_n,
                             absolute_max_string_len=absolute_max_string_len,
                             curriculum=curriculum,
                             start_epoch=start_epoch).build()

    lipnet = LipNet(img_c=img_c,
                    img_w=img_w,
                    img_h=img_h,
                    frames_n=frames_n,
                    absolute_max_string_len=absolute_max_string_len,
                    output_size=lip_gen.get_output_size())
    lipnet.summary()

    adam = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)

    # the loss calc occurs elsewhere, so use a dummy lambda func for the loss
    lipnet.model.compile(loss={
        'ctc': lambda y_true, y_pred: y_pred
    },
                         optimizer=adam)

    # load weight if necessary
    if start_epoch > 0:
        weight_file = os.path.join(
            OUTPUT_DIR,
            os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1)))
        lipnet.model.load_weights(weight_file)

    spell = Spell(path=PREDICT_DICTIONARY)
    decoder = Decoder(greedy=PREDICT_GREEDY,
                      beam_width=PREDICT_BEAM_WIDTH,
                      postprocessors=[labels_to_text, spell.sentence])

    # define callbacks
    statistics = Statistics(lipnet,
                            lip_gen.next_val(),
                            decoder,
                            256,
                            output_dir=os.path.join(OUTPUT_DIR, run_name))
    visualize = Visualize(os.path.join(OUTPUT_DIR, run_name),
                          lipnet,
                          lip_gen.next_val(),
                          decoder,
                          num_display_sentences=minibatch_size)
    tensorboard = TensorBoard(log_dir=os.path.join(LOG_DIR, run_name))
    csv_logger = CSVLogger(os.path.join(
        LOG_DIR, "{}-{}.csv".format('training', run_name)),
                           separator=',',
                           append=True)
    checkpoint = ModelCheckpoint(os.path.join(OUTPUT_DIR, run_name,
                                              "weights{epoch:02d}.h5"),
                                 monitor='val_loss',
                                 save_weights_only=True,
                                 mode='auto',
                                 period=1)

    lipnet.model.fit_generator(generator=lip_gen.next_train(),
                               steps_per_epoch=lip_gen.default_training_steps,
                               epochs=stop_epoch,
                               validation_data=lip_gen.next_val(),
                               validation_steps=2,
                               callbacks=[
                                   checkpoint, statistics, visualize, lip_gen,
                                   tensorboard, csv_logger
                               ],
                               initial_epoch=start_epoch,
                               verbose=1,
                               max_q_size=5,
                               workers=2,
                               pickle_safe=True)
        'num_data_points', 'vocab_size', 'max_ques_count', 'max_ques_len',
        'max_ans_len'
}:
    setattr(model_args, key, getattr(dataset, key))

# iterations per epoch
setattr(args, 'iter_per_epoch',
        math.ceil(dataset.num_data_points['train'] / args.batch_size))
print("{} iter per epoch.".format(args.iter_per_epoch))

# ----------------------------------------------------------------------------
# setup the model
# ----------------------------------------------------------------------------

encoder = Encoder(model_args)
decoder = Decoder(model_args, encoder)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()),
                       lr=args.lr)
scheduler = lr_scheduler.StepLR(optimizer,
                                step_size=1,
                                gamma=args.lr_decay_rate)

if args.load_path != '':
    encoder.load_state_dict(components['encoder'])
    decoder.load_state_dict(components['decoder'])
    optimizer.load_state_dict(components['optimizer'])
    # cuda enabled optimizer, see:
    for state in optimizer.state.values():
        for k, v in state.items():
            if isinstance(v, torch.Tensor):
Ejemplo n.º 7
0
def beam_search(
    decoder: Decoder,
    size: int,
    bos_index: int,
    eos_index: int,
    pad_index: int,
    encoder_output: Tensor,
    encoder_hidden: Tensor,
    src_mask: Tensor,
    max_output_length: int,
    alpha: float,
    embed: Embeddings,
    n_best: int = 1,
) -> (np.array, np.array):
    """
    Beam search with size k.
    Inspired by OpenNMT-py, adapted for Transformer.

    In each decoding step, find the k most likely partial hypotheses.

    :param decoder:
    :param size: size of the beam
    :param bos_index:
    :param eos_index:
    :param pad_index:
    :param encoder_output:
    :param encoder_hidden:
    :param src_mask:
    :param max_output_length:
    :param alpha: `alpha` factor for length penalty
    :param embed:
    :param n_best: return this many hypotheses, <= beam (currently only 1)
    :return:
        - stacked_output: output hypotheses (2d array of indices),
        - stacked_attention_scores: attention scores (3d array)
    """
    assert size > 0, "Beam size must be >0."
    assert n_best <= size, "Can only return {} best hypotheses.".format(size)

    # init
    transformer = isinstance(decoder, TransformerDecoder)
    batch_size = src_mask.size(0)
    att_vectors = None  # not used for Transformer

    # Recurrent models only: initialize RNN hidden state
    # pylint: disable=protected-access
    if not transformer:
        hidden = decoder._init_hidden(encoder_hidden)
    else:
        hidden = None

    # tile encoder states and decoder initial states beam_size times
    if hidden is not None:
        hidden = tile(hidden, size,
                      dim=1)  # layers x batch*k x dec_hidden_size

    encoder_output = tile(encoder_output.contiguous(), size,
                          dim=0)  # batch*k x src_len x enc_hidden_size
    src_mask = tile(src_mask, size, dim=0)  # batch*k x 1 x src_len

    # Transformer only: create target mask
    if transformer:
        trg_mask = src_mask.new_ones([1, 1, 1])  # transformer only
    else:
        trg_mask = None

    # numbering elements in the batch
    batch_offset = torch.arange(batch_size,
                                dtype=torch.long,
                                device=encoder_output.device)

    # numbering elements in the extended batch, i.e. beam size copies of each
    # batch element
    beam_offset = torch.arange(0,
                               batch_size * size,
                               step=size,
                               dtype=torch.long,
                               device=encoder_output.device)

    # keeps track of the top beam size hypotheses to expand for each element
    # in the batch to be further decoded (that are still "alive")
    alive_seq = torch.full(
        [batch_size * size, 1],
        bos_index,
        dtype=torch.long,
        device=encoder_output.device,
    )

    # Give full probability to the first beam on the first step.
    topk_log_probs = torch.zeros(batch_size,
                                 size,
                                 device=encoder_output.device)
    topk_log_probs[:, 1:] = float("-inf")

    # Structure that holds finished hypotheses.
    hypotheses = [[] for _ in range(batch_size)]

    results = {
        "predictions": [[] for _ in range(batch_size)],
        "scores": [[] for _ in range(batch_size)],
        "gold_score": [0] * batch_size,
    }

    for step in range(max_output_length):

        # This decides which part of the predicted sentence we feed to the
        # decoder to make the next prediction.
        # For Transformer, we feed the complete predicted sentence so far.
        # For Recurrent models, only feed the previous target word prediction
        if transformer:  # Transformer
            decoder_input = alive_seq  # complete prediction so far
        else:  # Recurrent
            decoder_input = alive_seq[:, -1].view(-1, 1)  # only the last word

        # expand current hypotheses
        # decode one single step
        # logits: logits for final softmax
        # pylint: disable=unused-variable
        trg_embed = embed(decoder_input)
        logits, hidden, att_scores, att_vectors = decoder(
            encoder_output=encoder_output,
            encoder_hidden=encoder_hidden,
            src_mask=src_mask,
            trg_embed=trg_embed,
            hidden=hidden,
            prev_att_vector=att_vectors,
            unroll_steps=1,
            trg_mask=trg_mask,  # subsequent mask for Transformer only
        )

        # For the Transformer we made predictions for all time steps up to
        # this point, so we only want to know about the last time step.
        if transformer:
            logits = logits[:, -1]  # keep only the last time step
            hidden = None  # we don't need to keep it for transformer

        # batch*k x trg_vocab
        log_probs = F.log_softmax(logits, dim=-1).squeeze(1)

        # multiply probs by the beam probability (=add logprobs)
        log_probs += topk_log_probs.view(-1).unsqueeze(1)
        curr_scores = log_probs.clone()

        # compute length penalty
        if alpha > -1:
            length_penalty = ((5.0 + (step + 1)) / 6.0)**alpha
            curr_scores /= length_penalty

        # flatten log_probs into a list of possibilities
        curr_scores = curr_scores.reshape(-1, size * decoder.output_size)

        # pick currently best top k hypotheses (flattened order)
        topk_scores, topk_ids = curr_scores.topk(size, dim=-1)

        if alpha > -1:
            # recover original log probs
            topk_log_probs = topk_scores * length_penalty
        else:
            topk_log_probs = topk_scores.clone()

        # reconstruct beam origin and true word ids from flattened order
        topk_beam_index = topk_ids.div(decoder.output_size)
        topk_ids = topk_ids.fmod(decoder.output_size)

        # map beam_index to batch_index in the flat representation
        batch_index = topk_beam_index + beam_offset[:topk_beam_index.
                                                    size(0)].unsqueeze(1)
        select_indices = batch_index.view(-1)

        # append latest prediction
        alive_seq = torch.cat(
            [alive_seq.index_select(0, select_indices),
             topk_ids.view(-1, 1)], -1)  # batch_size*k x hyp_len

        is_finished = topk_ids.eq(eos_index)
        if step + 1 == max_output_length:
            is_finished.fill_(True)
        # end condition is whether the top beam is finished
        end_condition = is_finished[:, 0].eq(True)

        # save finished hypotheses
        if is_finished.any():
            predictions = alive_seq.view(-1, size, alive_seq.size(-1))
            for i in range(is_finished.size(0)):
                b = batch_offset[i]
                if end_condition[i]:
                    is_finished[i].fill_(True)
                finished_hyp = is_finished[i].nonzero().view(-1)
                # store finished hypotheses for this batch
                for j in finished_hyp:
                    # Check if the prediction has more than one EOS.
                    # If it has more than one EOS, it means that the prediction should have already
                    # been added to the hypotheses, so you don't have to add them again.
                    if (predictions[i, j, 1:]
                            == eos_index).nonzero().numel() < 2:
                        hypotheses[b].append((
                            topk_scores[i, j],
                            predictions[i, j, 1:],
                        )  # ignore start_token
                                             )
                # if the batch reached the end, save the n_best hypotheses
                if end_condition[i]:
                    best_hyp = sorted(hypotheses[b],
                                      key=lambda x: x[0],
                                      reverse=True)
                    for n, (score, pred) in enumerate(best_hyp):
                        if n >= n_best:
                            break
                        results["scores"][b].append(score)
                        results["predictions"][b].append(pred)
            non_finished = end_condition.eq(False).nonzero().view(-1)
            # if all sentences are translated, no need to go further
            # pylint: disable=len-as-condition
            if len(non_finished) == 0:
                break
            # remove finished batches for the next step
            topk_log_probs = topk_log_probs.index_select(0, non_finished)
            batch_index = batch_index.index_select(0, non_finished)
            batch_offset = batch_offset.index_select(0, non_finished)
            alive_seq = predictions.index_select(0, non_finished).view(
                -1, alive_seq.size(-1))

        # reorder indices, outputs and masks
        select_indices = batch_index.view(-1)
        encoder_output = encoder_output.index_select(0, select_indices)
        src_mask = src_mask.index_select(0, select_indices)

        if hidden is not None and not transformer:
            if isinstance(hidden, tuple):
                # for LSTMs, states are tuples of tensors
                h, c = hidden
                h = h.index_select(1, select_indices)
                c = c.index_select(1, select_indices)
                hidden = (h, c)
            else:
                # for GRUs, states are single tensors
                hidden = hidden.index_select(1, select_indices)

        if att_vectors is not None:
            att_vectors = att_vectors.index_select(0, select_indices)

    def pad_and_stack_hyps(hyps, pad_value):
        filled = (np.ones(
            (len(hyps), max([h.shape[0]
                             for h in hyps])), dtype=int) * pad_value)
        for j, h in enumerate(hyps):
            for k, i in enumerate(h):
                filled[j, k] = i
        return filled

    # from results to stacked outputs
    assert n_best == 1
    # only works for n_best=1 for now
    final_outputs = pad_and_stack_hyps(
        [r[0].cpu().numpy() for r in results["predictions"]],
        pad_value=pad_index)

    return final_outputs, None
Ejemplo n.º 8
0
for key in {
        'num_data_points', 'vocab_size', 'max_ques_count', 'max_ques_len',
        'max_ans_len'
}:
    setattr(model_args, key, getattr(dataset, key))

# iterations per epoch
setattr(args, 'iter_per_epoch',
        math.floor(dataset.num_data_points['train'] / args.batch_size))
print("{} iter per epoch.".format(args.iter_per_epoch))

# ----------------------------------------------------------------------------
# setup the model
# ----------------------------------------------------------------------------
encoder = Encoder(model_args)
decoder = Decoder(model_args, encoder)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()),
                       lr=args.lr,
                       weight_decay=args.weight_decay)
encoder.word_embed.init_embedding('data/glove/glove6b_init_300d_1.0.npy')

start_epoch = 0
if args.load_path != '':
    components = torch.load(args.load_path)
    encoder.load_state_dict(components.get('encoder', components))
    decoder.load_state_dict(components.get('decoder', components))
    optimizer.load_state_dict(components.get('optimizer', components))
    start_epoch = components['epoch']
    print("Loaded model from {}".format(args.load_path))
print("Decoder: {}".format(args.decoder))
Ejemplo n.º 9
0
dataset = VisDialDataset(args, [args.split])
dataloader = DataLoader(dataset,
                        batch_size=args.batch_size,
                        shuffle=False,
                        collate_fn=dataset.collate_fn)

# iterations per epoch
setattr(args, 'iter_per_epoch',
        math.floor(dataset.num_data_points[args.split] / args.batch_size))
print("{} iter per epoch.".format(args.iter_per_epoch))

# ----------------------------------------------------------------------------
# setup the model

encoder = Encoder(model_args)
decoder = Decoder(model_args, encoder)

encoder = nn.DataParallel(encoder).cuda()
decoder = nn.DataParallel(decoder).cuda()

encoder.load_state_dict(components.get('encoder', components))
decoder.load_state_dict(components.get('decoder', components))
print("Loaded model from {}".format(args.load_path))

if args.gpuid >= 0:
    encoder = encoder.cuda()
    decoder = decoder.cuda()

# ----------------------------------------------------------------------------
# evaluation
# ----------------------------------------------------------------------------
Ejemplo n.º 10
0
class Metrics(keras.callbacks.Callback):
    def __init__(self,
                 model_container,
                 generator,
                 num_samples_stats,
                 num_display_sentences,
                 output_dir=None):
        self.model_container = model_container
        self.output_dir = output_dir
        self.generator = generator
        self.num_samples_stats = num_samples_stats
        self.num_display_sentences = num_display_sentences
        self.decoder = Decoder(beam_width=beam_width)
        print("output directory is ", output_dir)
        if output_dir is not None and not os.path.exists(self.output_dir):
            os.makedirs(self.output_dir)

    def get_statistics(self, num):
        num_left = num
        data = []

        while num_left > 0:
            output_batch = next(self.generator)[0]
            num_proc = min(output_batch['the_input'].shape[0], num_left)
            y_pred = self.model_container.predict(
                output_batch['the_input'][0:num_proc])
            input_length = output_batch['input_length'][0:num_proc]
            decoded_res = self.decoder.decode(y_pred, input_length)

            for j in range(0, num_proc):
                data.append((decoded_res[j], output_batch['source_str'][j]))

            num_left -= num_proc

        mean_cer, mean_cer_norm = self.get_mean_character_error_rate(data)
        mean_wer, mean_wer_norm = self.get_mean_word_error_rate(data)
        mean_bleu, mean_bleu_norm = self.get_mean_bleu_score(data)

        return {
            'samples': num,
            'cer': (mean_cer, mean_cer_norm),
            'wer': (mean_wer, mean_wer_norm),
            'bleu': (mean_bleu, mean_bleu_norm)
        }

    def get_mean_tuples(self, data, individual_length, func):
        total = 0.0
        total_norm = 0.0
        length = len(data)
        for i in range(0, length):
            val = float(func(data[i][0], data[i][1]))
            total += val
            total_norm += val / individual_length
        return (total / length, total_norm / length)

    def get_mean_character_error_rate(self, data):
        mean_individual_length = np.mean([len(pair[1]) for pair in data])
        return self.get_mean_tuples(data, mean_individual_length,
                                    editdistance.eval)

    def get_mean_word_error_rate(self, data):
        mean_individual_length = np.mean(
            [len(pair[1].split()) for pair in data])
        return self.get_mean_tuples(data, mean_individual_length, wer_sentence)

    def get_mean_bleu_score(self, data):
        wrapped_data = [([reference], hypothesis)
                        for reference, hypothesis in data]
        return self.get_mean_tuples(wrapped_data, 1.0,
                                    bleu_score.sentence_bleu)

    def on_train_begin(self, logs={}):
        with open(os.path.join(self.output_dir, 'stats.csv'), 'w') as csvfile:
            csvw = csv.writer(csvfile)
            csvw.writerow([
                "Epoch", "Samples", "Mean CER", "Mean CER (Norm)", "Mean WER",
                "Mean WER (Norm)", "Mean BLEU", "Mean BLEU (Norm)"
            ])

    def on_epoch_end(self, epoch, logs={}):
        stats = self.get_statistics(self.num_samples_stats)

        print((
            '\n\n[Epoch %d] Out of %d samples: [CER: %.3f - %.3f] [WER: %.3f - %.3f] [BLEU: %.3f - %.3f]\n'
            % (epoch + 1, stats['samples'], stats['cer'][0], stats['cer'][1],
               stats['wer'][0], stats['wer'][1], stats['bleu'][0],
               stats['bleu'][1])))

        if self.output_dir is not None:
            with open(os.path.join(self.output_dir, 'stats.csv'),
                      'a') as csvfile:
                csvw = csv.writer(csvfile)
                csvw.writerow([
                    epoch, stats['samples'], "{0:.5f}".format(stats['cer'][0]),
                    "{0:.5f}".format(stats['cer'][1]),
                    "{0:.5f}".format(stats['wer'][0]),
                    "{0:.5f}".format(stats['wer'][1]),
                    "{0:.5f}".format(stats['bleu'][0]),
                    "{0:.5f}".format(stats['bleu'][1])
                ])

        output_batch = next(self.generator)[0]

        y_pred = self.model_container.predict(
            output_batch['the_input'][0:self.num_display_sentences])
        input_length = output_batch['input_length'][0:self.
                                                    num_display_sentences]
        res = self.decoder.decode(y_pred, input_length)

        with open(os.path.join(self.output_dir, 'e%02d.csv' % (epoch)),
                  'w') as csvfile:
            csvw = csv.writer(csvfile)
            csvw.writerow(["Truth", "Decoded"])
            for i in range(self.num_display_sentences):
                csvw.writerow([output_batch['source_str'][i], res[i]])