Ejemplo n.º 1
0
def main():
    args = get_args()
    vocab = Vocab(args.vocab_path, args.vocab_size)  # create a vocabulary
    hps = get_hps()
    if not args.data_path == "":
        batcher = Batcher(args.data_path, vocab, hps, args.single_pass)
        import pdb
        pdb.set_trace()
        x = batcher.next_batch()
        import pdb
        pdb.set_trace()
        pass
    else:
        with open(args.json_path) as f:
            art = json.load(f)
        article = neologdn.normalize(art['body'])
        abstract = neologdn.normalize(art['title'])
        m = MeCab('-Owakati')
        parsed_article = m.parse(article)
        abs_words = m.parse(abstract).split()
        ex = B.Example(parsed_article, abs_words, vocab, hps)
        b = B.Batch([ex], hps, vocab)
        import pdb
        pdb.set_trace()
        pass
Ejemplo n.º 2
0
def build_batch(pair_list, vocab, hp):
    instance_list = [
        batcher.Example(article=article,
                        abstract_sentences=abstract,
                        vocab=vocab,
                        hps=hp) for article, abstract in pair_list
    ]
    batch = batcher.Batch(example_list=instance_list, hps=hp, vocab=vocab)
    return batch
Ejemplo n.º 3
0
def json_batch(fname, hps, vocab):
    with open(fname) as f:
        art = json.load(f)
    article = neologdn.normalize(art['body'])
    abstract = neologdn.normalize(art['title'])
    m = MeCab('-Owakati')
    parsed_article = m.parse(article)
    abs_words = m.parse(abstract).split()
    ex = B.Example(parsed_article, abs_words, vocab, hps)
    b = B.Batch([ex], hps, vocab)
    return b
Ejemplo n.º 4
0
def build_batch(query, vocab, hp):
    instance = batcher.Example(article=query,
                               abstract_sentences='',
                               vocab=vocab,
                               hps=hp)
    batch = batcher.Batch(example_list=[
        instance,
    ] * hp.batch_size,
                          hps=hp,
                          vocab=vocab)
    return batch
Ejemplo n.º 5
0
def get_separate_enc_states(model, sess, enc_sentences, vocab, hps):
    reps = []
    examples = []
    for enc_sent in enc_sentences:
        sent_str = ' '.join(enc_sent)
        doc_indices = [0] * len(enc_sent)                   # just filler, shouldn't do anything
        ex = batcher.Example(sent_str, [], [[]], doc_indices, None, vocab, hps)
        examples.append(ex)
    chunks = util.chunks(examples, hps.batch_size)
    if len(chunks[-1]) != hps.batch_size:                   # If last chunk is not filled, then just artificially fill it
        for i in range(hps.batch_size - len(chunks[-1])):
            chunks[-1].append(examples[-1])
    for chunk in chunks:
        batch = batcher.Batch(chunk, hps, vocab)
        batch_enc_states, _ = model.run_encoder(sess, batch)
        for batch_idx, enc_states in enumerate(batch_enc_states):
            start_idx = 0
            end_idx = batch.enc_lens[batch_idx] - 1
            rep = get_fw_bw_rep(enc_states, start_idx, end_idx)
            reps.append(rep)
    reps = reps[:len(enc_sentences)]                        # Removes the filler examples
    return reps