Example #1
0
def test2():
    hiddenDim = 3
    classes = 2

    vocab = Vocab()
    vocab.construct(["i", "am", "a", "an", "student"])
    tree = tr.Tree("(0 (0 I) (1 (1 am) (0 (0 a) (0 student))))")

    rnn = RecursiveNeuralNet(hiddenDim, classes, vocab)
    rnn.initialize_matrices(W=np.matrix(
        [[1.0, -1.0, 0.9, -0.6, 0.2, 0.0], [-0.3, 1.2, 0.0, 0.4, -0.4, 0.0],
         [-0.8, 0.1, 1.1, 0.0, -2.0, 0.0]],
        dtype=np.float32),
                            b=np.matrix([[-0.4], [0.0], [0.0]],
                                        dtype=np.float32),
                            Ws=np.matrix([[0.0, 0.0, -0.5], [1.0, -0.9, 0.0]],
                                         dtype=np.float32),
                            bs=np.matrix([[-0.4], [0.0]], dtype=np.float32),
                            L=np.matrix([[0.4, -0.3, -0.1], [0.6, -0.3, 0.4],
                                         [0.04, -0.08, 1.25], [0.2, 0.5, 0.6],
                                         [0.1, 0.7, 0.6], [0.1, 0.3, 0.0]],
                                        dtype=np.float32))

    result = rnn.forward_prop(tree.root)
    rnn.backward_prop(tree.root)

    return {'dW': rnn.dW, 'db': rnn.db, 'dWs': rnn.dWs, 'dbs': rnn.dbs}
Example #2
0
    def __init__(self,
                 embed_size,
                 hidden_size,
                 vocabList,
                 device,
                 dropout_rate=0.2):
        super(RNN, self).__init__()

        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.vocab = Vocab(vocabList)
        self.model_embeddings = loadWordEmbedding(self.vocab)
        self.device = device

        self.lstm = nn.LSTM(embed_size, hidden_size)
        self.wy_projection = nn.Linear(hidden_size, hidden_size,
                                       bias=False)  #  called Wy in the paper
        self.wh_projection = nn.Linear(hidden_size, hidden_size,
                                       bias=False)  #  called Wh in the paper
        self.w_projeciton = nn.Linear(hidden_size, 1,
                                      bias=False)  #  called w in the paper
        self.wp_projection = nn.Linear(hidden_size, hidden_size,
                                       bias=False)  #  called Wp in the paper
        self.wx_projection = nn.Linear(hidden_size, hidden_size,
                                       bias=False)  #  called Wx in the paper

        self.dropout = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(hidden_size, 3)
Example #3
0
def test3():
    hiddenDim = 3
    classes = 2

    vocab = Vocab()
    vocab.construct(["i", "hate", "cat", "fur"])
    tree = tr.Tree("(0 (0 I) (1 (1 hate) (1 (1 cat) (0 (0 cat) (0 fur)))))")

    rnn = RecursiveNeuralNet(hiddenDim, classes, vocab)
    rnn.initialize_matrices(W=np.matrix(
        [[1.0, -1.0, 0.5, -0.6, 0.2, -0.8], [-0.3, 1.2, 2.0, 0.4, -0.4, 0.2],
         [-0.8, 0.9, 1.1, 1.0, -2.0, 0.1]],
        dtype=np.float32),
                            b=np.matrix([[-0.4], [0.3], [0.4]],
                                        dtype=np.float32),
                            Ws=np.matrix([[0.2, 0.1, -0.5], [1.2, -0.9, 0.3]],
                                         dtype=np.float32),
                            bs=np.matrix([[-0.4], [0.5]], dtype=np.float32),
                            L=np.matrix([[0.4, -0.3, -0.1], [0.6, -0.3, 0.4],
                                         [0.04, -0.08, 1.25], [0.2, 0.7, 0.6],
                                         [0.2, 0.7, 0.6]],
                                        dtype=np.float32))

    result = rnn.forward_prop(tree.root)
    rnn.backward_prop(tree.root)

    return {'dW': rnn.dW, 'db': rnn.db, 'dWs': rnn.dWs, 'dbs': rnn.dbs}
Example #4
0
    def load_data(self):
        """Loads train/dev/test data and builds vocabulary."""
        self.train_data, self.dev_data, self.test_data = tr.simplified_data(
            300, 70, 100)

        # build vocab from training data
        self.vocab = Vocab()
        train_sents = [t.get_words() for t in self.train_data]
        self.vocab.construct(list(itertools.chain.from_iterable(train_sents)))

        self.w2v_vocab, w2v_embd, embedding_dict = self.load_w2v()
        self.embedding_dim = len(w2v_embd[0])
        self.w2v_vocab_size = len(self.w2v_vocab)

        self.vocab_size = len(self.vocab)
        embeddings_tmp = []
        for i in range(self.vocab_size):
            item = self.vocab.decode(i)
            if item in self.w2v_vocab:
                embeddings_tmp.append(embedding_dict[item])
                # print("Found word {}".format(item))
            else:
                # print("Couldn't find {}.".format(item))
                rand_num = np.random.uniform(low=-0.2,
                                             high=0.2,
                                             size=self.embedding_dim)
                embeddings_tmp.append(rand_num)

        self.embed = np.asarray(embeddings_tmp)
def main():
    config = get_config(mode='test')

    vocab = Vocab()
    vocab.load(config.word2id_path, config.id2word_path)
    print(f'Vocabulary size: {vocab.vocab_size}')
    config.vocab_size = vocab.vocab_size

    if config.users:
        test_users = load_pickle(config.convs_users_path)
        config.user_size = max([x for xx in test_users for x in xx]) + 1
        print(f'User size: {config.user_size}')
    else:
        test_users = None

    data_loader = get_loader(convs=load_pickle(config.convs_path),
                             convs_length=load_pickle(config.conversations_length_path),
                             utterances_length=load_pickle(config.utterances_length_path),
                             vocab=vocab, batch_size=config.batch_size, shuffle=False, convs_users=test_users)

    model_solver = getattr(solvers, "Solver{}".format(config.model))
    test_solver = model_solver(config, None, data_loader, vocab=vocab, is_train=False)

    test_solver.build()
    test_solver.export_samples()
Example #6
0
 def load_own_data(self,
                   filename,
                   filename2,
                   filename3,
                   debug=False,
                   encoding='utf-8'):
     """Loads starter word-vectors and train/dev/test data."""
     self.vocab = Vocab()
     self.vocab.construct(get_datafile(filename))
     # self.vocab.construct(get_ptb_dataset('train'))
     self.encoded_train = np.array([
         self.vocab.encode(word)
         for word in get_datafile(filename, encoding=encoding)
     ],
                                   dtype=np.int32)
     self.encoded_valid = np.array([
         self.vocab.encode(word)
         for word in get_datafile(filename2, encoding=encoding)
     ],
                                   dtype=np.int32)
     self.encoded_test = np.array([
         self.vocab.encode(word)
         for word in get_datafile(filename3, encoding=encoding)
     ],
                                  dtype=np.int32)
     if debug:
         num_debug = 1024
         self.encoded_train = self.encoded_train[:num_debug]
         self.encoded_valid = self.encoded_valid[:num_debug]
         self.encoded_test = self.encoded_test[:num_debug]
def train(config):

    vocab = Vocab(config)
    train_data = vocab.get_train_dev_test()
    train1 = [(x[0] + ' ' + x[1], x[2]) for x in train_data]
    train2 = [(x[1] + ' ' + x[0], x[2]) for x in train_data]
    train_data = train1 + train2
    train_dataset = BuildDataSet(train_data)
    train_sampler = torch.utils.data.distributed.DistributedSampler(
        train_dataset)
    train_load = DataLoader(dataset=train_dataset,
                            batch_size=config.batch_size,
                            shuffle=False,
                            collate_fn=collate_fn,
                            sampler=train_sampler)

    for model_name in config.model_name:
        if config.local_rank in [0, -1]:
            msg = 'model_name:{},train_nums:{},train_iter:{},batch_size:{}'
            print(
                msg.format(model_name, len(train_data), len(train_load),
                           config.batch_size))

        train_process(config, train_load, train_sampler, model_name)
        torch.distributed.barrier()
Example #8
0
    def load_data(self, debug=False):
        """Loads starter word-vectors and train/dev/test data. """
        self.vocab = Vocab()
        self.vocab.construct(
            get_dataset(self.config.merged_data, self.config.ingredients_data))
        self.encoded_train = np.array([
            self.vocab.encode(word) for word in get_dataset(
                self.config.encoded_train, self.config.ingredients_data)
        ],
                                      dtype=np.int32)
        self.encoded_valid = np.array([
            self.vocab.encode(word) for word in get_dataset(
                self.config.encoded_valid, self.config.ingredients_data)
        ],
                                      dtype=np.int32)
        self.encoded_test = np.array([
            self.vocab.encode(word) for word in get_dataset(
                self.config.encoded_test, self.config.ingredients_data)
        ],
                                     dtype=np.int32)

        if debug:
            num_debug = 1024 * 3
            self.encoded_train = self.encoded_train[:num_debug]
            self.encoded_valid = self.encoded_valid[:num_debug]
            self.encoded_test = self.encoded_test[:num_debug]
Example #9
0
def test1():
    hiddenDim = 3
    classes = 2

    vocab = Vocab()
    vocab.construct(["i", "love", "apple", "juice"])
    tree = tr.Tree(
        "(1 (0 I) (1 (1 love) (1 (1 love) (0 (0 apple) (0 juice)))))")

    rnn = RecursiveNeuralNet(hiddenDim, classes, vocab)
    rnn.initialize_matrices(W=np.matrix(
        [[1.0, 2.0, 0.0, -0.4, 0.2, -0.8], [-0.5, 1.0, 2.0, 0.0, -0.4, 0.2],
         [-0.6, 0.9, 1.1, 1.0, -2.0, 0.0]],
        dtype=np.float32),
                            b=np.matrix([[-0.4], [0.5], [0.2]],
                                        dtype=np.float32),
                            Ws=np.matrix([[0.0, 0.1, -0.2], [1.4, -0.7, 0.1]],
                                         dtype=np.float32),
                            bs=np.matrix([[-0.1], [0.4]], dtype=np.float32),
                            L=np.matrix([[0.4, -0.3, -0.1], [0.1, 0.1, 0.2],
                                         [0.04, -0.9, 1.2], [0.2, 0.5, 0.6],
                                         [0.2, 0.5, 0.6]],
                                        dtype=np.float32))

    result = rnn.forward_prop(tree.root)
    rnn.backward_prop(tree.root)

    return {'dW': rnn.dW, 'db': rnn.db, 'dWs': rnn.dWs, 'dbs': rnn.dbs}
Example #10
0
    def __init__(self,
                 input_dim,
                 hid_dim,
                 n_layers,
                 n_heads,
                 pf_dim,
                 dropout,
                 device,
                 vocabList,
                 max_length=100):
        super().__init__()

        self.device = device
        self.vocab = Vocab(vocabList)
        self.embed_size = input_dim
        self.n_heads = n_heads

        # self.tok_embedding = nn.Embedding(input_dim, hid_dim)
        # self.pos_embedding = nn.Embedding(max_length, hid_dim)
        self.tok_embedding = loadWordEmbedding(self.vocab)
        self.pos_embedding = loadPosEmbedding(max_length, hid_dim)

        self.layers = nn.ModuleList([
            EncoderLayer(hid_dim, n_heads, pf_dim, dropout, device)
            for _ in range(n_layers)
        ])

        self.dropout = nn.Dropout(dropout)

        self.scale = torch.sqrt(torch.FloatTensor([hid_dim])).to(device)

        self.drop = nn.Dropout(dropout)
        self.classify = nn.Linear(hid_dim, 5)
Example #11
0
def main(_):
    if not os.path.exists(FLAGS.output_dir):
        print('Creating directory: %s' % FLAGS.output_dir)
        os.mkdir(FLAGS.output_dir)

    desc_counter = Counter()
    attr_counter = Counter()
    partial_counts = defaultdict(Counter)

    print('Processing data...')
    n = len(FLAGS.inputs)
    for i, fname in enumerate(FLAGS.inputs):
        print('File %i of %i: %s' % (i, n, fname))
        with open(fname, 'r') as f:
            data = json.load(f)
        for product in data:
            desc = product['clean_text'].split() + \
                product['clean_title'].split()
            desc_counter.update(desc)
            for attr, value in product['specs'].items():
                attr_counter.update((attr,))
                partial_counts[attr].update((value,))

    # Filter values
    partial_counts = {x: {y: z for y, z in y.items() if z >= FLAGS.min_value }
                      for x, y in partial_counts.items()}

    # Remove singular attributes
    singular = {x for x, y in partial_counts.items() if len(y) <= 1}
    attr_counter = Counter({x: y for x, y in attr_counter.items() if x not in singular})
    partial_counts = {x: y for x, y in partial_counts.items() if x not in singular}

    # Filter attrs
    if FLAGS.max_attr is not None:
        attr_counter = {x: y for x, y in attr_counter.most_common(FLAGS.max_attr)}

    # Filter desc
    desc_counter = Counter({x: y for x, y in desc_counter.items() if y >= FLAGS.min_desc})
    desc_vocab = Vocab.build_from_counter(desc_counter)
    attr_vocab = Vocab.build_from_counter(attr_counter)
    value_set = ValueSet.build_from_partial_counts(partial_counts)

    print('Writing to disk...')
    desc_fname = os.path.join(FLAGS.output_dir, 'desc.txt')
    with open(desc_fname, 'w') as f:
        desc_vocab.write(f)
    attr_fname = os.path.join(FLAGS.output_dir, 'attr.txt')
    with open(attr_fname, 'w') as f:
        attr_vocab.write(f)
    value_fname = os.path.join(FLAGS.output_dir, 'value.txt')
    with open(value_fname, 'w') as f:
        value_set.write(f)
    stats_fname = os.path.join(FLAGS.output_dir, 'stats.txt')
    with open(stats_fname, 'w') as f:
        f.write('num_attrs: %i\n' % len(attr_vocab))
        f.write('num_vals: %i\n' % len(value_set))
        f.write('num_words: %i\n' % len(desc_vocab))

    print('Done')
Example #12
0
def prep_to_entityduet_format():
    train_file = os.path.join(args.data_dir, 'train.prep.pairwise')
    dev_file = os.path.join(args.data_dir, 'test.prep.pointwise')
    test_file = os.path.join(args.data_dir, 'test.prep.pointwise')
    vocab_file = os.path.join(args.data_dir, 'vocab')
    emb_file = os.path.join(args.data_dir, 'w2v')
    train_file_out = os.path.join(args.out_dir, 'train_pair.pkl')
    dev_file_out = os.path.join(args.out_dir, 'dev.pkl')
    test_file_out = os.path.join(args.out_dir, 'test.pkl')
    vocab_file_out = os.path.join(args.out_dir, 'vocab.txt')
    emb_file_out = os.path.join(args.out_dir, 'embed.txt')

    def id_map_fn(ids):
        return [id + 1 for id in ids]

    def label_map_fn(label):
        if label > 0:
            return 1
        return 0

    # save train, dev, test data
    for in_file, out_file in [(train_file, train_file_out),
                              (dev_file, dev_file_out),
                              (test_file, test_file_out)]:
        transformed_data = []
        print('transforming {} ...'.format(in_file))
        if in_file.endswith('pointwise'):
            mode = 1
            func = int
        elif in_file.endswith('pairwise'):
            mode = 2
            func = float
        for sample in prep_file_iterator(in_file,
                                         method='sample',
                                         func=func,
                                         parse=True):
            if mode == 1:
                transformed_data.append(
                    (id_map_fn(sample.query), id_map_fn(sample.doc),
                     label_map_fn(sample.label), sample.qid))
            elif mode == 2:
                transformed_data.append(
                    (id_map_fn(sample.query), id_map_fn(sample.doc1),
                     id_map_fn(sample.doc2)))
        print('saving to {}'.format(out_file))
        with open(out_file, 'wb') as fout:
            pickle.dump(transformed_data, fout, protocol=2)
    # save vocab
    print('saving to {}'.format(vocab_file_out))
    vocab = Vocab(filepath=vocab_file, file_format=args.format)
    words = ['<PAD>'] + vocab.get_word_list()
    with open(vocab_file_out, 'w') as fout:
        fout.write('\n'.join(words) + '\n')
    # save emb
    print('saving to {}'.format(emb_file_out))
    wv = WordVector(filepath=emb_file, first_line=args.first_line)
    vector = np.concatenate([np.zeros_like(wv.vectors[:1]), wv.vectors],
                            axis=0)
    vector.dump(emb_file_out)
Example #13
0
    def load_data(self):
        """Loads train/dev/test data and builds vocabulary."""
        self.train_data, self.dev_data, self.test_data = tr.simplified_data(700, 100, 200)

        # build vocab from training data
        self.vocab = Vocab()
        train_sents = [t.get_words() for t in self.train_data]
        self.vocab.construct(list(itertools.chain.from_iterable(train_sents)))
 def load_vocab(self,debug):
     self.vocab = Vocab()
     if debug:
         self.vocab.construct(get_words_dataset('dev'))
     else:
         self.vocab.construct(get_words_dataset('train'))
     self.vocab.build_embedding_matrix(self.config.word_embed_size)
     self.embedding_matrix = self.vocab.embedding_matrix
Example #15
0
 def load_data(self):
     self.vocab = Vocab()
     self.vocab.construct(get_dataset('train'))
     self.encoded_train = np.array(
         [self.vocab.encode(word) for word in get_dataset('train')],
         dtype=np.int32)
     self.encoded_valid = np.array(
         [self.vocab.encode(word) for word in get_dataset('valid')],
         dtype=np.int32)
Example #16
0
 def __init__(self):
     self.batch_size = 32
     self.embed_size = 300
     self.label_size = 3
     self.max_epochs = 30
     self.lr = 0.01
     self.use_pretrained_embeddings = True
     # Fix the embeddings parameters during training
     self.fix_embeddings = False
     self.vocab = Vocab()
Example #17
0
 def load_data(self, debug=False):
     self.vocab = Vocab()
     self.vocab.construct(get_ptb_dataset('train'))
     self.encoded_train = np.array([self.vocab.encode(word) for word in get_ptb_dataset('train')],dtype=np.int32)
     self.encoded_test = np.array([self.vocab.encode(word) for word in get_ptb_dataset('test')],dtype=np.int32)
     if debug:
         num_debug = 1024
         self.encoded_train = self.encoded_train[:num_debug]
         self.encoded_valid = self.encoded_valid[:num_debug]
         self.encoded_test = self.encoded_test[:num_debug]
Example #18
0
File: prep.py Project: jzbjyb/rri
def word_vector_transform():
    print('loading word vector ...')
    wv = WordVector(filepath=args.word_vector_path, first_line=True)
    vocab = Vocab(filepath=os.path.join(args.data_dir, 'vocab'),
                  file_format='ir')
    print('transforming ...')
    wv.transform(vocab.get_word_list(),
                 oov_filepath=os.path.join(args.data_dir, 'oov.txt'),
                 oov_at_end=True)
    print('saving ...')
    wv.save_to_file(os.path.join(args.data_dir, 'w2v'))
class WhoseLineModel(object):
    def __init__(self, config):
        self.config = config
        self.load_data(debug=False)
        self.add_common_model_vars()

    def load_data(self, debug=False):
        self.wordvecs = gensim.models.Word2Vec.load_word2vec_format(
            self.config.wordvecpath, binary=False)
        self.vocab = Vocab()
        self.vocab.construct(self.wordvecs.index2word)
        self.embedding_matrix = np.vstack([
            self.wordvecs[self.vocab.index_to_word[i]]
            for i in range(len(self.vocab))
        ])
        # next line is "unk" surgery cf. https://groups.google.com/forum/#!searchin/globalvectors/unknown/globalvectors/9w8ZADXJclA/X6f0FgxUnMgJ
        self.embedding_matrix[0, :] = np.mean(self.embedding_matrix, axis=0)

        chapter_split = load_chapter_split(self.config.datasplitpath)
        self.speakers = Speakers()
        for line in open(self.config.datapath):
            ch, speaker, line = line.split("\t")
            if chapter_split[ch] == 0:
                self.speakers.add_speaker(speaker)
        self.speakers.prune(self.config.speaker_count - 1)  # -1 for OTHER

        self.train_data = []
        self.dev_data = []
        self.test_data = []
        oldch = None
        for ln in open(self.config.datapath):
            ch, speaker, line = ln.split("\t")
            encoded_line = (np.array(
                [self.vocab.encode(word) for word in line.split()],
                dtype=np.int32), self.speakers.encode(speaker))
            if chapter_split[ch] == 0:
                dataset = self.train_data
            elif chapter_split[ch] == 1:
                dataset = self.dev_data
            else:
                dataset = self.test_data
            if self.config.batch_size == "chapter":
                if ch == oldch:
                    dataset[-1].append(encoded_line)
                else:
                    dataset.append([encoded_line])
            else:
                dataset.append(encoded_line)
            oldch = ch

    def add_common_model_vars(self):
        with tf.variable_scope("word_vectors"):
            self.tf_embedding_matrix = tf.constant(self.embedding_matrix,
                                                   name="embedding")
Example #20
0
def k_fold(config):

    vocab = Vocab(config)
    # vocab.add_words()
    # vocab.build_bert_vocab()
    train, test = vocab.get_train_dev_test()

    test_data = [(x[0] + ' ' + x[1], x[2]) for x in test]
    test_dataset = BuildDataSet(test_data)
    test_load = DataLoader(dataset=test_dataset,
                           batch_size=config.batch_size,
                           shuffle=False,
                           collate_fn=collate_fn)

    kf = KFold(n_splits=config.kfold, shuffle=False, random_state=config.seed)

    for k, (train_index, dev_index) in enumerate(kf.split(train)):
        #         pdb.set_trace()
        train_data, valid_data = train[train_index], train[dev_index]
        train1 = [(x[0] + ' ' + x[1], x[2]) for x in train_data]
        train2 = [(x[1] + ' ' + x[0], x[2]) for x in train_data]
        train_data = train1 + train2
        valid_data = [(x[0] + ' ' + x[1], x[2]) for x in valid_data]

        train_dataset = BuildDataSet(train_data)

        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
        train_load = DataLoader(dataset=train_dataset,
                                batch_size=config.batch_size,
                                shuffle=False,
                                collate_fn=collate_fn,
                                sampler=train_sampler)

        valid_dataset = BuildDataSet(valid_data)
        valid_sampler = torch.utils.data.distributed.DistributedSampler(
            valid_dataset)
        valid_load = DataLoader(dataset=valid_dataset,
                                batch_size=config.batch_size,
                                shuffle=False,
                                collate_fn=collate_fn,
                                sampler=valid_sampler)

        if config.local_rank in [0, -1]:
            msg = '{} fold,train_nums:{},train_iter:{},dev_nums:{},dev_iter:{},batch_size:{},test_nums:{},test_iter:{}'
            print(
                msg.format(k + 1, len(train_data), len(train_load),
                           len(valid_data), len(valid_load), config.batch_size,
                           len(test_data), len(test_load)))

        train_process(config, train_load, valid_load, test_load, k,
                      train_sampler)
        torch.distributed.barrier()
Example #21
0
def word_vector_transform():
    print('loading word vector ...')
    wv = WordVector(filepath=args.word_vector_path, first_line=args.first_line)
    vocab = Vocab(filepath=os.path.join(args.data_dir, 'vocab'),
                  file_format=args.format)
    print('transforming ...')
    wv.transform(
        vocab.get_word_list(),
        oov_filepath=os.path.join(args.data_dir, 'oov.txt'),
        oov_at_end=False)  # don't use oov_at_end because it is problematic
    print('saving ...')
    wv.save_to_file(os.path.join(args.data_dir, 'w2v'))
Example #22
0
    def __init__(self, embed_size, hidden_size, vocabList, device):
        super(RNN, self).__init__()

        self.vocab = Vocab(vocabList)
        self.device = device
        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.model_embeddings = loadWordEmbedding(self.vocab)

        self.lstm = nn.LSTM(embed_size, hidden_size, bidirectional=True)
        self.classify = nn.Linear(2*hidden_size, 5)
        self.drop = nn.Dropout(0.8)
        self.activate = nn.ReLU()
Example #23
0
 def load_data(self, LOAD_DATA=False):
     """Loads train/dev/test data and builds vocabulary."""
     if LOAD_DATA:
         self.vocab = Vocab(
         )  # only initialize the Vocab class because of the embedding matrix
     else:
         self.train_data, self.dev_data, self.test_data = tr.simplified_data(
             600, 40)
         #self.train_data, self.dev_data , self.test_data = tr.simplified_data(2000, 500)
         # build vocab from training data
         self.vocab = Vocab()
         train_sents = [t.get_words() for t in self.train_data]
         self.vocab.construct(
             list(itertools.chain.from_iterable(train_sents)))
Example #24
0
class WhoseLineModel(object):

    def __init__(self, config):
        self.config = config
        self.load_data(debug=False)
        self.add_common_model_vars()
        
    def load_data(self, debug=False):
        self.wordvecs = gensim.models.Word2Vec.load_word2vec_format(self.config.wordvecpath, binary=False)
        self.vocab = Vocab()
        self.vocab.construct(self.wordvecs.index2word)
        self.embedding_matrix = np.vstack([self.wordvecs[self.vocab.index_to_word[i]] for i in range(len(self.vocab))])
        # next line is "unk" surgery cf. https://groups.google.com/forum/#!searchin/globalvectors/unknown/globalvectors/9w8ZADXJclA/X6f0FgxUnMgJ
        self.embedding_matrix[0,:] = np.mean(self.embedding_matrix, axis=0)

        chapter_split = load_chapter_split(self.config.datasplitpath)
        self.speakers = Speakers()
        for line in open(self.config.datapath):
            ch, speaker, line = line.split("\t")
            if chapter_split[ch] == 0:
                self.speakers.add_speaker(speaker)
        self.speakers.prune(self.config.speaker_count-1)  # -1 for OTHER

        self.train_data = []
        self.dev_data = []
        self.test_data = []
        oldch = None
        for ln in open(self.config.datapath):
            ch, speaker, line = ln.split("\t")
            encoded_line = (np.array([self.vocab.encode(word) for word in line.split()], dtype=np.int32),
                            self.speakers.encode(speaker))
            if chapter_split[ch] == 0:
                dataset = self.train_data
            elif chapter_split[ch] == 1:
                dataset = self.dev_data
            else:
                dataset = self.test_data
            if self.config.batch_size == "chapter":
                if ch == oldch:
                    dataset[-1].append(encoded_line)
                else:
                    dataset.append([encoded_line])
            else:
                dataset.append(encoded_line)
            oldch = ch
    
    def add_common_model_vars(self):
        with tf.variable_scope("word_vectors"):
            self.tf_embedding_matrix = tf.constant(self.embedding_matrix, name="embedding")
Example #25
0
    def __init__(self, hparams: HyperParams, data_split: str, sep_line="\n"):
        assert Splits.check_split(data_split)
        self.hparams = hparams
        self.root = Path(self.hparams.root)
        self.data_split = data_split
        self.sep_line = sep_line

        self.path_data = self.download()
        self.lines = self.preprocess_data()
        self.vocab = Vocab(list(self.sep_line.join(self.lines)))
        self.text = self.train_val_test_split()
        self.tensor = self.get_sequences()
        if self.hparams.verbose:
            self.show_samples()
            print(dict(vocab_size=len(self.vocab)))
    def load_data(self):
        """Loads train/dev/test data and builds vocabulary."""
        self.train_data, self.dev_data, self.test_data = tr.simplified_data(
            700, 100, 200)

        # build vocab from training data
        self.vocab = Vocab()
        # train_sents = [t.get_words() for t in self.train_data]
        # self.vocab.construct(list(itertools.chain.from_iterable(train_sents)))
        all_sents = [t.get_words() for t in self.train_data] + [
            t.get_words() for t in self.dev_data
        ] + [t.get_words() for t in self.test_data]
        self.vocab.construct(list(itertools.chain.from_iterable(all_sents)))
        for k in self.vocab.word_to_index.keys():
            print '\t {} : {}'.format(k, self.vocab.word_to_index[k])
Example #27
0
 def preprocess(self):
     self.log.info('Getting Vocabulary...')
     if os.path.exists(os.path.join(self.config.data_path, 'vocab.pkl')):
         with open(os.path.join(self.config.data_path, 'vocab.pkl'),
                   'rb') as fr:
             vocab = pickle.load(fr)
     else:
         if not self.config.debug:
             with open(os.path.join(self.config.data_path, 'vocab.pkl'),
                       'wb') as fw:
                 vocab = Vocab(self.config)
                 pickle.dump(vocab, fw)
         else:
             vocab = Vocab(self.config)
     return vocab
Example #28
0
class RNN(nn.Module):

    def __init__(self, embed_size, hidden_size, vocabList, device):
        super(RNN, self).__init__()

        self.vocab = Vocab(vocabList)
        self.device = device
        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.model_embeddings = loadWordEmbedding(self.vocab)

        self.lstm = nn.LSTM(embed_size, hidden_size, bidirectional=True)
        self.classify = nn.Linear(2*hidden_size, 5)
        self.drop = nn.Dropout(0.8)
        self.activate = nn.ReLU()

    def forward(self, data):
        x = self.vocab.to_input_tensor(data, self.device, -1)
        x = self.model_embeddings(x)

        x = x.permute(1, 0, 2)   # (seq_len, batch, input_size)
        # rnn, lstm
        x, (_, _) = self.lstm(x) # x: (seq_len, batch, num_direction*hidden_size)
        x = self.activate(x)
        x = x.permute(1, 2, 0) # x: (batch, num_direction*hidden_size, seq_len)

        # max pooling
        x = torch.max(x, dim=2)[0]
        # dropout prevent overfitting
        x = self.drop(x)
        # fulling connection
        x = self.classify(x)
        return F.log_softmax(x, dim=1)
Example #29
0
    def load_data(self):
        pair_fname  = '../lastfm_train_mappings.txt'
        lyrics_path = '../data/lyrics/train/'
    
        # X_train is a list of all examples. each examples is a 2-len list. each element is a list of words in lyrics.
        # word_counts is a dictionary that maps
        if self.config.debug:
            X_train, l_train, self.word_counts, seq_len1, seq_len2, self.config.max_steps = get_data(pair_fname, lyrics_path, '../glove.6B.50d.txt', threshold_down=0, threshold_up=float('inf'), npos=100, nneg=100)
        else:
            X_train, l_train, self.word_counts, seq_len1, seq_len2, self.config.max_steps = get_data(pair_fname, lyrics_path, threshold_down=100, threshold_up=4000, npos=10000, nneg=10000)

        self.labels_train = np.zeros((len(X_train),self.config.n_class))
        self.labels_train[range(len(X_train)),l_train] = 1
        
        x = collections.Counter(l_train)
        for k in x.keys():
            print 'class:', k, x[k]
        print ''

        self.vocab = Vocab()
        self.vocab.construct(self.word_counts.keys())
        self.wv = self.vocab.get_wv('../glove.6B.50d.txt')

        with open('word_hist.csv', 'w') as f:
            for w in self.word_counts.keys():
                f.write(w+','+str(self.word_counts[w])+'\n')
            
        self.encoded_train_1 = np.zeros((len(X_train), self.config.max_steps)) # need to handle this better. 
        self.encoded_train_2 = np.zeros((len(X_train), self.config.max_steps))
        for i in range(len(X_train)):
            self.encoded_train_1[i,:len(X_train[i][0])] = [self.vocab.encode(word) for word in X_train[i][0]]       
            self.encoded_train_2[i,:len(X_train[i][1])] = [self.vocab.encode(word) for word in X_train[i][1]]       
        self.sequence_len1 = np.array(seq_len1)
        self.sequence_len2 = np.array(seq_len2)
Example #30
0
def build_vocab(args):
    f = open(args.embed)
    embed_dim = int(next(f).split()[1])

    word2id = {}
    id2word = {}

    word2id[PAD_TOKEN] = PAD_IDX
    word2id[UNK_TOKEN] = UNK_IDX
    id2word[PAD_IDX] = PAD_TOKEN
    id2word[UNK_IDX] = UNK_TOKEN

    embed_list = []
    # fill PAD and UNK vector
    embed_list.append([0 for _ in range(embed_dim)])
    embed_list.append([0 for _ in range(embed_dim)])

    # build Vocab

    for line in f:
        tokens = line.split()
        word = tokens[0]
        vector = [float(num) for num in tokens[1:]]

        embed_list.append(vector)
        word2id[word] = len(word2id)
        id2word[len(id2word)] = word

    embed = torch.FloatTensor(embed_list)
    vocab = Vocab(embed, word2id, id2word)
    torch.save(vocab, args.vocab)
Example #31
0
 def read_edge(self, filename: Path):
     if "txt" in filename.suffix:
         read_func = read_txt
     elif "csv" in filename.suffix:
         read_func = read_csv
     else:
         read_func = read_txt
     node_list = list()
     for row in read_func(filename):
         node_list.append(row[0])
         node_list.append(row[1])
     self._vocab = Vocab(collections.Counter(node_list))
     edge_array = []
     for row in read_func(filename):
         n1 = self._vocab.stoi[row[0]]
         n2 = self._vocab.stoi[row[1]]
         t = int(row[2])
         edge_array.append([n1, n2, t])
         edge_array.append([n2, n1, t])
     edge_array = np.asarray(edge_array, dtype=np.int32)
     self._adj = coo_matrix((np.ones(len(edge_array)), (edge_array[:, 0], edge_array[:, 1])), shape=(len(self._vocab), len(self._vocab)))
     self._adj_t = coo_matrix((edge_array[:, 2], (edge_array[:, 0], edge_array[:, 1])), shape=(len(self._vocab), len(self._vocab)))
     self._adj_csr = self._adj.tocsr() + sp.eye(len(self._vocab))
     self._adj = self._adj_csr.tocoo()
     self._adj_t_csr = self._adj_t.tocsr()
Example #32
0
    def load_data(self):
        """Loads train/dev/test data and builds vocabulary."""
        self.train_data, self.dev_data, self.test_data = tr.simplified_data(700, 100, 200)

        # build vocab from training data
        self.vocab = Vocab()
        train_sents = [t.get_words() for t in self.train_data]
        self.vocab.construct(list(itertools.chain.from_iterable(train_sents)))
Example #33
0
def main():
    with open(data_file, 'rb') as f:
        data = pickle.load(f)
    vocab = Vocab(data)
    sg_loader = create_skipgram_dataset(chorales=data['train'],
                                        vocab=vocab,
                                        batch_size=32)
    sg_model, sg_losses = train_skipgram(vocab, sg_loader)
Example #34
0
def prep_data(trees, X_vocab=None, y_vocab=None):
    update_vocab = False
    if X_vocab is None:
        X_vocab, y_vocab = Vocab(), Vocab()
        update_vocab = True
    X, y = [], []
    for tree in tqdm(trees):
        if len(tree.tokens) < 2: continue
        #TODO accumulate features without iterating over all states
        try:
            for state, decision in tree.iter_oracle_states():
                feats = state.extract_features()
                if update_vocab:
                    X_vocab.add_words(feats)
                    y_vocab.add_word(decision)
                X.append([X_vocab.encode(f) for f in feats])
                y.append(y_vocab.encode(decision))
        except:
            pass
    return X, y, X_vocab, y_vocab
 def load_data(self, debug=False):
   """Loads starter word-vectors and train/dev/test data."""
   self.vocab = Vocab()
   self.vocab.construct(get_ptb_dataset('train'))
   self.encoded_train = np.array(
       [self.vocab.encode(word) for word in get_ptb_dataset('train')],
       dtype=np.int32)
   self.encoded_valid = np.array(
       [self.vocab.encode(word) for word in get_ptb_dataset('valid')],
       dtype=np.int32)
   self.encoded_test = np.array(
       [self.vocab.encode(word) for word in get_ptb_dataset('test')],
       dtype=np.int32)
   if debug:
     num_debug = 1024
     self.encoded_train = self.encoded_train[:num_debug]
     self.encoded_valid = self.encoded_valid[:num_debug]
     self.encoded_test = self.encoded_test[:num_debug]
Example #36
0
    def load_data(self):
        pair_fname  = '../lastfm_train_mappings.txt'
        lyrics_path = '../lyrics/data/lyrics/train/'
    
        # X_train is a list of all examples. each examples is a 2-len list. each element is a list of words in lyrics.
        # word_counts is a dictionary that maps 
        X_train, l_train, self.word_counts, self.config.max_steps = get_data(pair_fname, lyrics_path, threshold=100, n_class=self.config.n_class)
        self.labels_train = np.zeros((len(X_train),self.config.n_class))
        self.labels_train[range(len(X_train)),l_train] = 1
    
        self.vocab = Vocab()
        self.vocab.construct(self.word_counts.keys())

        self.encoded_train_1 = np.zeros((len(X_train), self.config.max_steps)) # need to handle this better. 
        self.encoded_train_2 = np.zeros((len(X_train), self.config.max_steps))
        for i in range(len(X_train)):
            self.encoded_train_1[i,:len(X_train[i][0])] = [self.vocab.encode(word) for word in X_train[i][0]]       
            self.encoded_train_2[i,:len(X_train[i][1])] = [self.vocab.encode(word) for word in X_train[i][1]]       
class Model():


    def __init__(self, config):
        self.config = config
        self.load_data(debug=False)
        self.build_model()


    def load_vocab(self,debug):
        self.vocab = Vocab()
        if debug:
            self.vocab.construct(get_words_dataset('dev'))
        else:
            self.vocab.construct(get_words_dataset('train'))
        self.vocab.build_embedding_matrix(self.config.word_embed_size)
        self.embedding_matrix = self.vocab.embedding_matrix


    def load_data(self, debug=False):
        """
            Loads starter word-vectors and train/dev/test data.
        """
        self.load_vocab(debug)
        config = self.config

        if debug:
            # Load the training set
            train_data = list(get_sentences_dataset(self.vocab,
                config.sent_len, 'dev', 'post'))
            ( self.sent1_train, self.sent2_train, self.len1_train,
                self.len2_train, self.y_train ) = zip(*train_data)
            self.sent1_train, self.sent2_train = np.vstack(self.sent1_train), np.vstack(self.sent2_train)
            self.len1_train, self.len2_train = ( np.array(self.len1_train),
                np.array(self.len2_train) )
            self.y_train = np.array(self.y_train)
            print('# training examples: %d' %len(self.y_train))

            # Load the validation set
            dev_data = list(get_sentences_dataset(self.vocab, config.sent_len,
                'test', 'post'))
            ( self.sent1_dev, self.sent2_dev, self.len1_dev,
                self.len2_dev, self.y_dev ) = zip(*dev_data)
            self.sent1_dev, self.sent2_dev = np.vstack(self.sent1_dev), np.vstack(self.sent2_dev)
            self.len1_dev, self.len2_dev = ( np.array(self.len1_dev),
                np.array(self.len2_dev) )
            self.y_dev = np.array(self.y_dev)
            print('# dev examples: %d' %len(self.y_dev))

            # Load the test set
            test_data = list(get_sentences_dataset(self.vocab, config.sent_len,
                'test', 'post'))
            ( self.sent1_test, self.sent2_test, self.len1_test,
                self.len2_test, self.y_test ) = zip(*test_data)
            self.sent1_test, self.sent2_test = np.vstack(self.sent1_test), np.vstack(self.sent2_test)
            self.len1_test, self.len2_test = ( np.array(self.len1_test),
                np.array(self.len2_test) )
            self.y_test = np.array(self.y_test)
            print('# test examples: %d' %len(self.y_test))
        else:
            # Load the training set
            train_data = list(get_sentences_dataset(self.vocab,
                config.sent_len, 'train', 'post'))
            ( self.sent1_train, self.sent2_train, self.len1_train,
                self.len2_train, self.y_train ) = zip(*train_data)
            self.sent1_train, self.sent2_train = np.vstack(self.sent1_train), np.vstack(self.sent2_train)
            self.len1_train, self.len2_train = ( np.array(self.len1_train),
                np.array(self.len2_train) )
            self.y_train = np.array(self.y_train)
            print('# training examples: %d' %len(self.y_train))

            # Load the validation set
            dev_data = list(get_sentences_dataset(self.vocab, config.sent_len,
                'dev', 'post'))
            ( self.sent1_dev, self.sent2_dev, self.len1_dev,
                self.len2_dev, self.y_dev ) = zip(*dev_data)
            self.sent1_dev, self.sent2_dev = np.vstack(self.sent1_dev), np.vstack(self.sent2_dev)
            self.len1_dev, self.len2_dev = ( np.array(self.len1_dev),
                np.array(self.len2_dev) )
            self.y_dev = np.array(self.y_dev)
            print('# dev examples: %d' %len(self.y_dev))

            # Load the test set
            test_data = list(get_sentences_dataset(self.vocab, config.sent_len,
                'test', 'post'))
            ( self.sent1_test, self.sent2_test, self.len1_test,
                self.len2_test, self.y_test ) = zip(*test_data)
            self.sent1_test, self.sent2_test = np.vstack(self.sent1_test), np.vstack(self.sent2_test)
            self.len1_test, self.len2_test = ( np.array(self.len1_test),
                np.array(self.len2_test) )
            self.y_test = np.array(self.y_test)
            print('# test examples: %d' %len(self.y_test))

            print('min len: ', np.min(self.len2_train))


    def build_model(self):
        config = self.config
        k = config.sentence_embed_size
        L = config.sent_len

        # input tensors
        self.sent1_ph = tf.placeholder(tf.int32, shape=[None, L],
                                       name='sent1')
        self.sent2_ph = tf.placeholder(tf.int32, shape=[None, L],
                                       name='sent2')
        self.len1_ph = tf.placeholder(tf.int32, shape=[None], name='len1')
        self.len2_ph = tf.placeholder(tf.int32, shape=[None], name='len2')
        self.labels_ph = tf.placeholder(tf.float32,
                                        shape=[None, config.label_size],
                                        name='label')
        self.kp_ph = tf.placeholder(tf.float32, name='kp')
        kp = self.kp_ph

        # set embedding matrix to pretrained embedding
        init_embeds = tf.constant(self.embedding_matrix, dtype='float32')
        word_embeddings = tf.get_variable(
                dtype='float32',
                name='word_embeddings',
                initializer=init_embeds,
                trainable=False) # no fine-tuning of word embeddings

        x1 = tf.nn.embedding_lookup(word_embeddings, self.sent1_ph)
        x2 = tf.nn.embedding_lookup(word_embeddings, self.sent2_ph)
        x1, x2 = tf.nn.dropout(x1, kp), tf.nn.dropout(x2, kp)

        def lstmn(x, length, scope):
            with tf.variable_scope(scope):
                W_h = tf.get_variable(name='W_h', shape=[k, k],
                        regularizer=tf.contrib.layers.l2_regularizer(config.l2))
                W_hs = tf.get_variable(name='W_hs', shape=[k, k],
                        regularizer=tf.contrib.layers.l2_regularizer(config.l2))
                W_x = tf.get_variable(name='W_x', shape=[k, k],
                        regularizer=tf.contrib.layers.l2_regularizer(config.l2))
                b_M = tf.get_variable(name='b_M', initializer=tf.zeros([L, k]))
                w = tf.get_variable(name='w', shape=[k, 1],
                        regularizer=tf.contrib.layers.l2_regularizer(config.l2))
                b_a = tf.get_variable(name='b_a', initializer=tf.zeros([L]))

                W_rnn_h_i = tf.get_variable(name='W_rnn_h_i', shape=[k, k],
                        regularizer=tf.contrib.layers.l2_regularizer(config.l2))
                W_rnn_x_i = tf.get_variable(name='W_rnn_x_i', shape=[k, k],
                        regularizer=tf.contrib.layers.l2_regularizer(config.l2))
                b_rnn_i = tf.get_variable(name='b_rnn_i', initializer=tf.zeros([k]))

                W_rnn_h_f = tf.get_variable(name='W_rnn_h_f', shape=[k, k],
                        regularizer=tf.contrib.layers.l2_regularizer(config.l2))
                W_rnn_x_f = tf.get_variable(name='W_rnn_x_f', shape=[k, k],
                        regularizer=tf.contrib.layers.l2_regularizer(config.l2))
                b_rnn_f = tf.get_variable(name='b_rnn_f', initializer=tf.zeros([k]))

                W_rnn_h_o = tf.get_variable(name='W_rnn_h_o', shape=[k, k],
                        regularizer=tf.contrib.layers.l2_regularizer(config.l2))
                W_rnn_x_o = tf.get_variable(name='W_rnn_x_o', shape=[k, k],
                        regularizer=tf.contrib.layers.l2_regularizer(config.l2))
                b_rnn_o = tf.get_variable(name='b_rnn_o', initializer=tf.zeros([k]))

                W_rnn_h_c = tf.get_variable(name='W_rnn_h_c', shape=[k, k],
                        regularizer=tf.contrib.layers.l2_regularizer(config.l2))
                W_rnn_x_c = tf.get_variable(name='W_rnn_x_c', shape=[k, k],
                        regularizer=tf.contrib.layers.l2_regularizer(config.l2))
                b_rnn_c = tf.get_variable(name='b_rnn_c', initializer=tf.zeros([k]))

                c0 = tf.zeros([tf.shape(length)[0], k])
                h0 = tf.zeros([tf.shape(length)[0], k])
                hst_1 = tf.zeros([tf.shape(length)[0], k])
                Cl, Hl = [c0], [h0]
                for t in range(L):
                    Ct_1 = tf.stack(Cl, axis=1)
                    Ht_1 = tf.stack(Hl, axis=1)
                    H_mod = tf.reshape(Ht_1, [-1, k])

                    xt = x[:,t,:]
                    Xt = tf.reshape(tf.tile(xt, [1, t+1]), [-1, t+1, k])
                    Xt_mod = tf.reshape(Xt, [-1, k])

                    Hst_1 = tf.reshape(tf.tile(hst_1, [1, t+1]), [-1, t+1, k])
                    Hst_1_mod = tf.reshape(Hst_1, [-1, k])

                    Mt = tf.nn.tanh( tf.reshape(tf.matmul(H_mod, W_h),
                                         [-1, t+1, k]) +
                                     tf.reshape(tf.matmul(Xt_mod, W_x),
                                         [-1, t+1, k]) +
                                     tf.reshape(tf.matmul(Hst_1_mod, W_hs),
                                         [-1, t+1, k])  + b_M[:t+1])
                    Mt_w = tf.matmul(tf.reshape(Mt, [-1, k]), w)
                    alphat = tf.nn.softmax(tf.reshape(Mt_w, [-1, 1, t+1]) + b_a[:t+1])
                    cst = tf.reshape(tf.matmul(alphat, Ct_1), [-1, k])
                    hst = tf.reshape(tf.matmul(alphat, Ht_1), [-1, k])
                    hst_1 = hst

                    it = tf.sigmoid(tf.matmul(hst, W_rnn_h_i) +
                                    tf.matmul(xt, W_rnn_x_i) +
                                    b_rnn_i)
                    ft = tf.sigmoid(tf.matmul(hst, W_rnn_h_f) +
                                    tf.matmul(xt, W_rnn_x_f) +
                                    b_rnn_f)
                    ot = tf.sigmoid(tf.matmul(hst, W_rnn_h_o) +
                                    tf.matmul(xt, W_rnn_x_o) +
                                    b_rnn_o)
                    cht = tf.nn.tanh(tf.matmul(hst, W_rnn_h_c) +
                                     tf.matmul(xt, W_rnn_x_c) +
                                     b_rnn_c)

                    ct = ft*cst + it*cht
                    ht = ot*tf.nn.tanh(ct)

                    Cl.append(ct)
                    Hl.append(ht)
            return ( tf.transpose(tf.stack(Hl), [1, 0, 2]),
                     tf.transpose(tf.stack(Cl), [1, 0, 2]) )

        H1, _ = lstmn(x1, self.len1_ph, 'lstmn1')
        H2, _ = lstmn(x2, self.len2_ph, 'lstmn2')

        def get_last_relevant_output(out, seq_len):
            rng = tf.range(0, tf.shape(seq_len)[0])
            indx = tf.stack([rng, seq_len - 1], 1)
            last = tf.gather_nd(out, indx)
            return last

        h1 = get_last_relevant_output(H1, self.len1_ph)
        h2 = get_last_relevant_output(H2, self.len2_ph)

        h_s = tf.concat([h1, h2], 1)

        y = h_s

        # MLP classifier on top
        hidden_sizes = config.hidden_sizes
        for layer, size in enumerate(hidden_sizes):
            if layer > 0:
                previous_size = hidden_sizes[layer-1]
            else:
                previous_size = 2*k
            W = tf.get_variable(name='W{}'.format(layer),
                    shape=[previous_size, size],
                    initializer=tf.contrib.layers.xavier_initializer(),
                    regularizer=tf.contrib.layers.l2_regularizer(config.l2))
            b = tf.get_variable(name='b{}'.format(layer),
                    initializer=tf.zeros([size]))
            y = tf.nn.relu(tf.matmul(y, W) + b)

        W_softmax = tf.get_variable(name='W_softmax',
                shape=[hidden_sizes[-1], config.label_size],
                initializer=tf.contrib.layers.xavier_initializer(),
                regularizer=tf.contrib.layers.l2_regularizer(config.l2))
        b_softmax = tf.get_variable(name='b_softmax',
                initializer=tf.zeros([config.label_size]))

        logits = tf.matmul(y, W_softmax) + b_softmax
        cross_entropy_loss = tf.reduce_mean(
                tf.losses.softmax_cross_entropy(self.labels_ph, logits)
                )
        reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        self.loss = cross_entropy_loss #+ tf.add_n(reg_losses)

        self.train_op = ( tf.train.AdamOptimizer(learning_rate=config.lr)
                .minimize(self.loss) )

        self.probs = tf.nn.softmax(logits)
        self.predictions = tf.argmax(self.probs, 1)
        correct_prediction = tf.equal(
            tf.argmax(self.labels_ph, 1), self.predictions)
        self.correct_predictions = tf.reduce_sum(tf.cast(correct_prediction, 'int32'))


    def create_feed_dict(self, sent1_batch, sent2_batch, len1_batch,
            len2_batch, label_batch, keep_prob):
        feed_dict = {
            self.sent1_ph: sent1_batch,
            self.sent2_ph: sent2_batch,
            self.len1_ph: len1_batch,
            self.len2_ph: len2_batch,
            self.labels_ph: label_batch,
            self.kp_ph: keep_prob
        }
        return feed_dict


    def run_epoch(self, session, sent1_data, sent2_data, len1_data, len2_data, input_labels,
            verbose=100):
        orig_sent1, orig_sent2, orig_len1, orig_len2, orig_y = ( sent1_data,
                sent2_data, len1_data, len2_data, input_labels )
        kp = self.config.kp
        total_loss = []
        total_correct_examples = 0
        total_processed_examples = 0
        total_steps = int( orig_sent1.shape[0] / self.config.batch_size)
        for step, (sent1, sent2, len1, len2, y) in enumerate(
            data_iterator(orig_sent1, orig_sent2, orig_len1, orig_len2, orig_y,
                    batch_size=self.config.batch_size, label_size=self.config.label_size)):
            feed = self.create_feed_dict(sent1, sent2, len1, len2, y, kp)
            loss, total_correct, _ = session.run(
                [self.loss, self.correct_predictions, self.train_op],
                feed_dict=feed)
            total_processed_examples += len(y)
            total_correct_examples += total_correct
            total_loss.append(loss)
            if verbose and step % verbose == 0:
                sys.stdout.write('\r{} / {} : loss = {}'.format(
                    step, total_steps, np.mean(total_loss)))
                sys.stdout.flush()
        if verbose:
            sys.stdout.write('\r')
            sys.stdout.flush()
        return np.mean(total_loss), total_correct_examples / float(total_processed_examples), total_loss


    def predict(self, session, sent1_data, sent2_data, len1_data, len2_data, y=None):
        """Make predictions from the provided model."""
        # If y is given, the loss is also calculated
        # We deactivate dropout by setting it to 1
        kp = 1.0
        losses = []
        results = []
        if np.any(y):
            data = data_iterator(sent1_data, sent2_data, len1_data, len2_data, y, batch_size=self.config.batch_size,
                                 label_size=self.config.label_size, shuffle=False)
        else:
            data = data_iterator(sent1_data, sent2_data, len1_data, len2_data, batch_size=self.config.batch_size,
                                 label_size=self.config.label_size, shuffle=False)
        for step, (sent1, sent2, len1, len2, y) in enumerate(data):
            feed = self.create_feed_dict(sent1, sent2, len1, len2, y, kp)
            if np.any(y):
                loss, preds = session.run(
                    [self.loss, self.predictions], feed_dict=feed)
                losses.append(loss)
            else:
                preds = session.run(self.predictions, feed_dict=feed)
            results.extend(preds)
        return np.mean(losses), np.array(results)
Example #38
0
class RNN_Model():

    def load_data(self):
        """Loads train/dev/test data and builds vocabulary."""
        self.train_data, self.dev_data, self.test_data = tr.simplified_data(700, 100, 200)

        # build vocab from training data
        self.vocab = Vocab()
        train_sents = [t.get_words() for t in self.train_data]
        self.vocab.construct(list(itertools.chain.from_iterable(train_sents)))

    def inference(self, tree, predict_only_root=False):
        """For a given tree build the RNN models computation graph up to where it
            may be used for inference.
        Args:
            tree: a Tree object on which to build the computation graph for the RNN
        Returns:
            softmax_linear: Output tensor with the computed logits.
        """
        node_tensors = self.add_model(tree.root)
        if predict_only_root:
            node_tensors = node_tensors[tree.root]
        else:
            node_tensors = [tensor for node, tensor in node_tensors.iteritems() if node.label!=2]
            node_tensors = tf.concat(0, node_tensors)
        return self.add_projections(node_tensors)

    def add_model_vars(self):
        '''
        You model contains the following parameters:
            embedding:  tensor(vocab_size, embed_size)
            W1:         tensor(2* embed_size, embed_size)
            b1:         tensor(1, embed_size)
            U:          tensor(embed_size, output_size)
            bs:         tensor(1, output_size)
        Hint: Add the tensorflow variables to the graph here and *reuse* them while building
                the compution graphs for composition and projection for each tree
        Hint: Use a variable_scope "Composition" for the composition layer, and
              "Projection") for the linear transformations preceding the softmax.
        '''
        embed_size = self.config.embed_size
        vocab_size = len(self.vocab)
        output_size = self.config.label_size
        with tf.variable_scope('Composition'):
            ### YOUR CODE HERE
            embedding = tf.get_variable("embedding", shape=(vocab_size, embed_size))
            W1 = tf.get_variable("W1", shape=(2 * embed_size, embed_size))
            b1 = tf.get_variable("b1", shape=(1, embed_size))
            ### END YOUR CODE
        with tf.variable_scope('Projection'):
            ### YOUR CODE HERE
            U = tf.get_variable("U", shape=(embed_size, output_size))
            bs = tf.get_variable("bs", shape=(1, output_size))
            ### END YOUR CODE

        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.config.lr)
        # dummy_total is a simple sum to ensure that the variables for the AdamOptimizer
        # are created for initialization and before restore the variables later.
        # It should never actually get executed.
        dummy_total = tf.constant(0.0)
        for v in tf.trainable_variables(): dummy_total +=tf.reduce_sum(v)
        self.dummy_minimizer = self.optimizer.minimize(dummy_total)
        # we then initialize variables, and because of the self.dummy_minimizer,
        # all of the necessary variable/slot pairs get added and included in the
        # saver variables

    def add_model(self, node):
        """Recursively build the model to compute the phrase embeddings in the tree

        Hint: Refer to tree.py and vocab.py before you start. Refer to
              the model's vocab with self.vocab
        Hint: Reuse the "Composition" variable_scope here
        --Hint: Store a node's vector representation in node.tensor so it can be
              used by it's parent--
        Hint: If node is a leaf node, it's vector representation is just that of the
              word vector (see tf.gather()).
        Args:
            node: a Node object
        Returns:
            node_tensors: Dict: key = Node, value = tensor(1, embed_size)
        """
        with tf.variable_scope('Composition', reuse=True):
            ### YOUR CODE HERE
            embedding = tf.get_variable("embedding")
            W1 = tf.get_variable("W1")
            b1 = tf.get_variable("b1")
            ### END YOUR CODE


        # THOUGHT: Batch together all leaf nodes and all non leaf nodes

        node_tensors = OrderedDict()
        curr_node_tensor = None
        if node.isLeaf:
            ### YOUR CODE HERE
            curr_node_tensor = tf.gather(embedding, tf.constant([node.label]), name="leaf_lookup")
            ### END YOUR CODE
        else:
            node_tensors.update(self.add_model(node.left))
            node_tensors.update(self.add_model(node.right))
            ### YOUR CODE HERE
            left = node_tensors[node.left]
            right = node_tensors[node.right]
            concat = tf.concat(1, [left, right])
            composition = tf.matmul(concat, W1) + b1
            # TODO save on number of zero tensors...
            curr_node_tensor = tf.maximum(composition, tf.zeros_like(composition))
            ### END YOUR CODE
        node_tensors[node] = curr_node_tensor
        return node_tensors

    def add_projections(self, node_tensors):
        """Add projections to the composition vectors to compute the raw sentiment scores

        Hint: Reuse the "Projection" variable_scope here
        Args:
            node_tensors: tensor(?, embed_size)
        Returns:
            output: tensor(?, label_size)
        """
        logits = None
        ### YOUR CODE HERE
        with tf.variable_scope('Projection', reuse=True):
            U = tf.get_variable("U")
            bs = tf.get_variable("bs")

        # NOTE: tf.add supports Broadcast
        logits = tf.matmul(node_tensors, U) + bs
        ### END YOUR CODE
        return logits

    def loss(self, logits, labels):
        """Adds loss ops to the computational graph.

        Hint: Use sparse_softmax_cross_entropy_with_logits
        Hint: Remember to add l2_loss (see tf.nn.l2_loss)
        Args:
            logits: tensor(num_nodes, output_size)
            labels: python list, len = num_nodes
        Returns:
            loss: tensor 0-D
        """
        loss = None
        # YOUR CODE HERE
        labels = tf.convert_to_tensor(labels, dtype=tf.int64)
        softmax_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels)

        l2 = self.config.l2
        with tf.variable_scope('Composition', reuse=True):
            W1 = tf.get_variable("W1")
        with tf.variable_scope('Projection', reuse=True):
            U = tf.get_variable("U")
        l2_loss = tf.nn.l2_loss(W1) + tf.nn.l2_loss(U)
        l2_loss *= l2

        loss = tf.reduce_sum(softmax_loss) + l2_loss
        # END YOUR CODE
        return loss

    def training(self, loss):
        """Sets up the training Ops.

        Creates an optimizer and applies the gradients to all trainable variables.
        The Op returned by this function is what must be passed to the
        `sess.run()` call to cause the model to train. See

        https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer

        for more information.

        Hint: Use tf.train.GradientDescentOptimizer for this model.
                Calling optimizer.minimize() will return a train_op object.

        Args:
            loss: tensor 0-D
        Returns:
            train_op: tensorflow op for training.
        """
        train_op = None
        # YOUR CODE HERE
        train_op = self.optimizer.minimize(loss)
        # END YOUR CODE
        return train_op

    def predictions(self, y):
        """Returns predictions from sparse scores

        Args:
            y: tensor(?, label_size)
        Returns:
            predictions: tensor(?,1)
        """
        predictions = None
        # YOUR CODE HERE
        # pick max of softmax predictions in each batch
        predictions = tf.argmax(tf.nn.softmax(tf.cast(y, tf.float64)), dimension=1)
        # END YOUR CODE
        return predictions

    def __init__(self, config):
        self.config = config
        self.load_data()

    def predict(self, trees, weights_path, get_loss = False):
        """Make predictions from the provided model."""
        results = []
        losses = []
        for i in xrange(int(math.ceil(len(trees)/float(RESET_AFTER)))):
            with tf.Graph().as_default(), tf.Session() as sess:
                self.add_model_vars()
                saver = tf.train.Saver()
                saver.restore(sess, weights_path)
                for tree in trees[i*RESET_AFTER: (i+1)*RESET_AFTER]:
                    logits = self.inference(tree, True)
                    predictions = self.predictions(logits)
                    root_prediction = sess.run(predictions)[0]
                    if get_loss:
                        root_label = tree.root.label
                        loss = sess.run(self.loss(logits, [root_label]))
                        losses.append(loss)
                    results.append(root_prediction)
        return results, losses

    def run_epoch(self, new_model = False, verbose=True):
        step = 0
        loss_history = []
        while step < len(self.train_data):
            with tf.Graph().as_default(), tf.Session() as sess:
                self.add_model_vars()
                if new_model:
                    init = tf.initialize_all_variables()
                    sess.run(init)
                    new_model = False
                else:
                    saver = tf.train.Saver()
                    #saver.restore(sess, './weights/%s.temp'%self.config.model_name)
                    saver.restore(sess, './weights_l2/%s.temp'%self.config.model_name)
                for _ in xrange(RESET_AFTER):
                    if step>=len(self.train_data):
                        break
                    tree = self.train_data[step]
                    logits = self.inference(tree)
                    labels = [l for l in tree.labels if l!=2]
                    loss = self.loss(logits, labels)
                    train_op = self.training(loss)
                    loss, _ = sess.run([loss, train_op])
                    loss_history.append(loss)
                    if verbose:
                        sys.stdout.write('\r{} / {} :    loss = {}'.format(
                            step, len(self.train_data), np.mean(loss_history)))
                        sys.stdout.flush()
                    step+=1
                saver = tf.train.Saver()
                if not os.path.exists("./weights_l2"):
                    os.makedirs("./weights_l2")
                saver.save(sess, './weights_l2/%s.temp'%self.config.model_name, write_meta_graph=False)
        train_preds, _ = self.predict(self.train_data, './weights_l2/%s.temp'%self.config.model_name)
        val_preds, val_losses = self.predict(self.dev_data, './weights_l2/%s.temp'%self.config.model_name, get_loss=True)
        train_labels = [t.root.label for t in self.train_data]
        val_labels = [t.root.label for t in self.dev_data]
        train_acc = np.equal(train_preds, train_labels).mean()
        val_acc = np.equal(val_preds, val_labels).mean()

        print
        print 'Training acc (only root node): {}'.format(train_acc)
        print 'Valiation acc (only root node): {}'.format(val_acc)
        print self.make_conf(train_labels, train_preds)
        print self.make_conf(val_labels, val_preds)
        return train_acc, val_acc, loss_history, np.mean(val_losses)

    def train(self, verbose=True):
        complete_loss_history = []
        train_acc_history = []
        val_acc_history = []
        prev_epoch_loss = float('inf')
        best_val_loss = float('inf')
        best_val_epoch = 0
        stopped = -1
        for epoch in xrange(self.config.max_epochs):
            print 'epoch %d'%epoch
            if epoch==0:
                train_acc, val_acc, loss_history, val_loss = self.run_epoch(new_model=True)
            else:
                train_acc, val_acc, loss_history, val_loss = self.run_epoch()
            complete_loss_history.extend(loss_history)
            train_acc_history.append(train_acc)
            val_acc_history.append(val_acc)

            #lr annealing
            epoch_loss = np.mean(loss_history)
            if epoch_loss>prev_epoch_loss*self.config.anneal_threshold:
                self.config.lr/=self.config.anneal_by
                print 'annealed lr to %f'%self.config.lr
            prev_epoch_loss = epoch_loss

            #save if model has improved on val
            if val_loss < best_val_loss:
                 shutil.copyfile('./weights_l2/%s.temp'%self.config.model_name, './weights_l2/%s'%self.config.model_name)
                 best_val_loss = val_loss
                 best_val_epoch = epoch

            # if model has not imprvoved for a while stop
            if epoch - best_val_epoch > self.config.early_stopping:
                stopped = epoch
                #break
        if verbose:
                sys.stdout.write('\r')
                sys.stdout.flush()

        print '\n\nstopped at %d\n'%stopped
        writeToResults('%s,%s,%s,%s,%s'%(self.config.model_name,stopped,complete_loss_history[-1],train_acc_history[-1],val_acc_history[-1]))
        return {
            'loss_history': complete_loss_history,
            'train_acc_history': train_acc_history,
            'val_acc_history': val_acc_history,
            }

    def make_conf(self, labels, predictions):
        confmat = np.zeros([2, 2])
        for l,p in itertools.izip(labels, predictions):
            confmat[l, p] += 1
        return confmat
class RNNLM_Model(LanguageModel):

  def load_data(self, debug=False):
    """Loads starter word-vectors and train/dev/test data."""
    self.vocab = Vocab()
    self.vocab.construct(get_ptb_dataset('train'))
    self.encoded_train = np.array(
        [self.vocab.encode(word) for word in get_ptb_dataset('train')],
        dtype=np.int32)
    self.encoded_valid = np.array(
        [self.vocab.encode(word) for word in get_ptb_dataset('valid')],
        dtype=np.int32)
    self.encoded_test = np.array(
        [self.vocab.encode(word) for word in get_ptb_dataset('test')],
        dtype=np.int32)
    if debug:
      num_debug = 1024
      self.encoded_train = self.encoded_train[:num_debug]
      self.encoded_valid = self.encoded_valid[:num_debug]
      self.encoded_test = self.encoded_test[:num_debug]

  def add_placeholders(self):
    """Generate placeholder variables to represent the input tensors

    These placeholders are used as inputs by the rest of the model building
    code and will be fed data during training.  Note that when "None" is in a
    placeholder's shape, it's flexible

    Adds following nodes to the computational graph.
    (When None is in a placeholder's shape, it's flexible)

    input_placeholder: Input placeholder tensor of shape
                       (None, num_steps), type tf.int32
    labels_placeholder: Labels placeholder tensor of shape
                        (None, num_steps), type tf.float32
    dropout_placeholder: Dropout value placeholder (scalar),
                         type tf.float32

    Add these placeholders to self as the instance variables
  
      self.input_placeholder
      self.labels_placeholder
      self.dropout_placeholder

    (Don't change the variable names)
    """
    ### YOUR CODE HERE
    self.input_placeholder = tf.placeholder(tf.int32, shape=[None, self.config.num_steps], name='Input')
    self.labels_placeholder = tf.placeholder(tf.float32, shape=[None, self.config.num_steps], name='Target')
    self.dropout_placeholder = tf.placeholder(tf.int64, name='Dropout')
    ### END YOUR CODE
  
  def add_embedding(self):
    """Add embedding layer.

    Hint: This layer should use the input_placeholder to index into the
          embedding.
    Hint: You might find tf.nn.embedding_lookup useful.
    Hint: You might find tf.split, tf.squeeze useful in constructing tensor inputs
    Hint: Check the last slide from the TensorFlow lecture.
    Hint: Here are the dimensions of the variables you will need to create:

      L: (len(self.vocab), embed_size)

    Returns:
      inputs: List of length num_steps, each of whose elements should be
              a tensor of shape (batch_size, embed_size).
    """
    # The embedding lookup is currently only implemented for the CPU
    with tf.device('/cpu:0'):
      ### YOUR CODE HERE
      embeddings = tf.get_variable('Embedding', [len(self.vocab), self.config.embed_size], trainable=True)
      inputs = tf.nn.embedding_lookup(embeddings, self.input_placeholder)
      inputs = [tf.squeeze(x, [1]) for x in tf.split(1, self.config.num_steps, inputs)]
      ### END YOUR CODE
      return inputs

  def add_projection(self, rnn_outputs):
    """Adds a projection layer.

    The projection layer transforms the hidden representation to a distribution
    over the vocabulary.

    Hint: Here are the dimensions of the variables you will need to
          create 
          
          U:   (hidden_size, len(vocab))
          b_2: (len(vocab),)

    Args:
      rnn_outputs: List of length num_steps, each of whose elements should be
                   a tensor of shape (batch_size, embed_size).
    Returns:
      outputs: List of length num_steps, each a tensor of shape
               (batch_size, len(vocab)
    """
    ### YOUR CODE HERE
    with tf.name_scope('Projection Layer'):
      U = tf.get_variable('U', [self.config.hidden_size, len(self.vocab)])
      b2 = tf.get_variable('b2', len(self.vocab))
      outputs = [tf.nn.softmax(tf.matmul(o,U)+b2) for o in rnn_outputs]
    ### END YOUR CODE
    return outputs

  def add_loss_op(self, output):
    """Adds loss ops to the computational graph.

    Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. 

    Args:
      output: A tensor of shape (None, self.vocab)
    Returns:
      loss: A 0-d tensor (scalar)
    """
    ### YOUR CODE HERE
    all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])]
    cross_entropy = sequence_loss([output], [tf.reshape(self.labels_placeholder,[-1])], all_ones, len(self.vocab))
    tf.add_to_collection('total_loss', cross_entropy)
    loss = tf.add_n(tf.get_collection('total_loss'))
    ### END YOUR CODE
    return loss

  def add_training_op(self, loss):
    """Sets up the training Ops.

    Creates an optimizer and applies the gradients to all trainable variables.
    The Op returned by this function is what must be passed to the
    `sess.run()` call to cause the model to train. See 

    https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer

    for more information.

    Hint: Use tf.train.AdamOptimizer for this model.
          Calling optimizer.minimize() will return a train_op object.

    Args:
      loss: Loss tensor, from cross_entropy_loss.
    Returns:
      train_op: The Op for training.
    """
    ### YOUR CODE HERE
    optimizer = tf.train.AdamOptimizer(self.config.lr)
    train_op = optimizer.minimize(loss)
    ### END YOUR CODE
    return train_op
  
  def __init__(self, config):
    self.config = config
    self.load_data(debug=False)
    self.add_placeholders()
    self.inputs = self.add_embedding()
    self.rnn_outputs = self.add_model(self.inputs)
    self.outputs = self.add_projection(self.rnn_outputs)
  
    # We want to check how well we correctly predict the next word
    # We cast o to float64 as there are numerical issues at hand
    # (i.e. sum(output of softmax) = 1.00000298179 and not 1)
    self.predictions = [tf.nn.softmax(tf.cast(o, 'float64')) for o in self.outputs]
    # Reshape the output into len(vocab) sized chunks - the -1 says as many as
    # needed to evenly divide
    output = tf.reshape(tf.concat(1, self.outputs), [-1, len(self.vocab)])
    self.calculate_loss = self.add_loss_op(output)
    self.train_step = self.add_training_op(self.calculate_loss)


  def add_model(self, inputs):
    """Creates the RNN LM model.

    In the space provided below, you need to implement the equations for the
    RNN LM model. Note that you may NOT use built in rnn_cell functions from
    tensorflow.

    Hint: Use a zeros tensor of shape (batch_size, hidden_size) as
          initial state for the RNN. Add this to self as instance variable

          self.initial_state
  
          (Don't change variable name)
    Hint: Add the last RNN output to self as instance variable

          self.final_state

          (Don't change variable name)
    Hint: Make sure to apply dropout to the inputs and the outputs.
    Hint: Use a variable scope (e.g. "RNN") to define RNN variables.
    Hint: Perform an explicit for-loop over inputs. You can use
          scope.reuse_variables() to ensure that the weights used at each
          iteration (each time-step) are the same. (Make sure you don't call
          this for iteration 0 though or nothing will be initialized!)
    Hint: Here are the dimensions of the various variables you will need to
          create:
      
          H: (hidden_size, hidden_size) 
          I: (embed_size, hidden_size)
          b_1: (hidden_size,)

    Args:
      inputs: List of length num_steps, each of whose elements should be
              a tensor of shape (batch_size, embed_size).
    Returns:
      outputs: List of length num_steps, each of whose elements should be
               a tensor of shape (batch_size, hidden_size)
    """
    ### YOUR CODE HERE
    with tf.variable_scope('InputDropout'):
      inputs = [tf.nn.dropout(x, self.dropout_placeholder) for x in inputs]
    with tf.variable_scope('RNN') as scope:
      self.initial_state = tf.zeros([self.config.batch_size, self.config.hidden_size])
      state = self.initial_state
      rnn_outputs = []
      for tstep, current_input in enumerate(inputs):
        if tstep > 0:
          scope.reuse_variables()
        H = tf.get_variable('H', [self.config.hidden_size, self.config.hidden_size])
        I = tf.get_variable('I', [self.config.embed_size, self.config.hidden_size])
        b1 = tf.get_variable('b1', [self.config.hidden_size])
        state = tf.nn.sigmoid(tf.matmul(state, H) + tf.matmul(current_input, I) + b1)
        rnn_outputs.append(state)
    self.final_state = rnn_outputs[-1]
    with tf.variable_scope('RNNDropout'):
      rnn_outputs = [tf.nn.dropout(x, self.dropout_placeholder) for x in rnn_outputs]
    ### END YOUR CODE
    return rnn_outputs


  def run_epoch(self, session, data, train_op=None, verbose=10):
    config = self.config
    dp = config.dropout
    if not train_op:
      train_op = tf.no_op()
      dp = 1.0
    total_steps = sum(1 for x in ptb_iterator(data, config.batch_size, config.num_steps))
    total_loss = []
    state = self.initial_state.eval()
    for step, (x, y) in enumerate(
      ptb_iterator(data, config.batch_size, config.num_steps)):
      # We need to pass in the initial state and retrieve the final state to give
      # the RNN proper history
      feed = {self.input_placeholder: x,
              self.labels_placeholder: y,
              self.initial_state: state,
              self.dropout_placeholder: dp}
      loss, state, _ = session.run(
          [self.calculate_loss, self.final_state, train_op], feed_dict=feed)
      total_loss.append(loss)
      if verbose and step % verbose == 0:
          sys.stdout.write('\r{} / {} : pp = {}'.format(
              step, total_steps, np.exp(np.mean(total_loss))))
          sys.stdout.flush()
    if verbose:
      sys.stdout.write('\r')
    return np.exp(np.mean(total_loss))
Example #40
0
class RNN_Model():

    def load_data(self):
        """Loads train/dev/test data and builds vocabulary."""
        self.train_data, self.dev_data, self.test_data = tr.simplified_data(700, 100, 200)

        # build vocab from training data
        self.vocab = Vocab()
        train_sents = [t.get_words() for t in self.train_data]
        self.vocab.construct(list(itertools.chain.from_iterable(train_sents)))

    def inference(self, tree, predict_only_root=False):
        """For a given tree build the RNN models computation graph up to where it
            may be used for inference.
        Args:
            tree: a Tree object on which to build the computation graph for the RNN
        Returns:
            softmax_linear: Output tensor with the computed logits.
        """
        node_tensors = self.add_model(tree.root)
        if predict_only_root:
            node_tensors = node_tensors[tree.root]
        else:
            node_tensors = [tensor for node, tensor in node_tensors.iteritems() if node.label!=2]
            node_tensors = tf.concat(0, node_tensors)
        return self.add_projections(node_tensors)

    def add_model_vars(self):
        '''
        You model contains the following parameters:
            embedding:  tensor(vocab_size, embed_size)
            W1:         tensor(2* embed_size, embed_size)
            b1:         tensor(1, embed_size)
            U:          tensor(embed_size, output_size)
            bs:         tensor(1, output_size)
        Hint: Add the tensorflow variables to the graph here and *reuse* them while building
                the compution graphs for composition and projection for each tree
        Hint: Use a variable_scope "Composition" for the composition layer, and
              "Projection") for the linear transformations preceding the softmax.
        '''
        with tf.variable_scope('Composition'):
            ### YOUR CODE HERE
            embed_size = self.config.embed_size
            #epsilon = 0.4
            #initializer = tf.random_uniform_initializer(-epsilon, epsilon)
            initializer = None
            embedding = tf.get_variable('embedding', [len(self.vocab), self.config.embed_size], initializer=initializer)
            W1 = tf.get_variable("W1", [2 * embed_size, embed_size], initializer=initializer)
            b1 = tf.get_variable("b1", [1, embed_size], initializer=initializer)
            ### END YOUR CODE
        with tf.variable_scope('Projection'):
            ### YOUR CODE HERE
            U = tf.get_variable("U", [embed_size, self.config.label_size], initializer=initializer)
            bs = tf.get_variable("bs", [1, self.config.label_size], initializer=initializer)
            ### END YOUR CODE

    def add_model(self, node):
        """Recursively build the model to compute the phrase embeddings in the tree

        Hint: Refer to tree.py and vocab.py before you start. Refer to
              the model's vocab with self.vocab
        Hint: Reuse the "Composition" variable_scope here
        Hint: Store a node's vector representation in node.tensor so it can be
              used by it's parent
        Hint: If node is a leaf node, it's vector representation is just that of the
              word vector (see tf.gather()).
        Args:
            node: a Node object
        Returns:
            node_tensors: Dict: key = Node, value = tensor(1, embed_size)
        """
        with tf.variable_scope('Composition', reuse=True):
            ### YOUR CODE HERE
            embedding = tf.get_variable("embedding")
            W1 = tf.get_variable("W1")
            b1 = tf.get_variable("b1")
            ### END YOUR CODE


        node_tensors = OrderedDict()
        curr_node_tensor = None
        if node.isLeaf:
            ### YOUR CODE HERE
            curr_node_tensor = tf.gather(embedding, [self.vocab.encode(node.word)])
            ### END YOUR CODE
        else:
            node_tensors.update(self.add_model(node.left))
            node_tensors.update(self.add_model(node.right))
            ### YOUR CODE HERE
            node_input = tf.concat(1, [node_tensors[node.left], node_tensors[node.right]])
            curr_node_tensor = tf.matmul(node_input, W1) + b1
            curr_node_tensor = tf.nn.relu(curr_node_tensor)
            ### END YOUR CODE
        node_tensors[node] = curr_node_tensor
        return node_tensors

    def add_projections(self, node_tensors):
        """Add projections to the composition vectors to compute the raw sentiment scores

        Hint: Reuse the "Projection" variable_scope here
        Args:
            node_tensors: tensor(?, embed_size)
        Returns:
            output: tensor(?, label_size)
        """
        logits = None
        ### YOUR CODE HERE
        with tf.variable_scope("Projection", reuse=True):
            U = tf.get_variable("U")
            bs = tf.get_variable("bs")
        multi = tf.matmul(node_tensors, U)
        logits = multi + bs
        ### END YOUR CODE
        return logits

    def loss(self, logits, labels):
        """Adds loss ops to the computational graph.

        Hint: Use sparse_softmax_cross_entropy_with_logits
        Hint: Remember to add l2_loss (see tf.nn.l2_loss)
        Args:
            logits: tensor(num_nodes, output_size)
            labels: python list, len = num_nodes
        Returns:
            loss: tensor 0-D
        """
        loss = None
        # YOUR CODE HERE
        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels)
        cost = tf.reduce_sum(cost)
        with tf.variable_scope("Composition", reuse=True):
            W1 = tf.get_variable("W1")
        with tf.variable_scope("Projection", reuse=True):
            U = tf.get_variable("U")
        regularization = tf.nn.l2_loss(W1) + tf.nn.l2_loss(U)
        loss = cost + self.config.l2 * regularization
        #loss = cost + self.config.l2 * tf.nn.l2_loss(W1)
        # END YOUR CODE
        return loss

    def training(self, loss):
        """Sets up the training Ops.

        Creates an optimizer and applies the gradients to all trainable variables.
        The Op returned by this function is what must be passed to the
        `sess.run()` call to cause the model to train. See

        https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer

        for more information.

        Hint: Use tf.train.GradientDescentOptimizer for this model.
                Calling optimizer.minimize() will return a train_op object.

        Args:
            loss: tensor 0-D
        Returns:
            train_op: tensorflow op for training.
        """
        train_op = None
        # YOUR CODE HERE
        optim = tf.train.GradientDescentOptimizer(self.config.lr)
        #optim = tf.train.AdamOptimizer(0.003)
        train_op = optim.minimize(loss)
        # END YOUR CODE
        return train_op

    def predictions(self, y):
        """Returns predictions from sparse scores

        Args:
            y: tensor(?, label_size)
        Returns:
            predictions: tensor(?,1)
        """
        predictions = None
        # YOUR CODE HERE
        yhat = tf.nn.softmax(y)
        predictions = tf.argmax(yhat, 1)
        #predictions = tf.Print(predictions,[yhat, predictions], summarize=30)
        # END YOUR CODE
        return predictions

    def __init__(self, config):
        self.config = config
        self.load_data()

    def predict(self, trees, weights_path, get_loss = False):
        """Make predictions from the provided model."""
        results = []
        losses = []
        for i in xrange(int(math.ceil(len(trees)/float(RESET_AFTER)))):
            with tf.Graph().as_default(), tf.Session() as sess:
                self.add_model_vars()
                saver = tf.train.Saver()
                saver.restore(sess, weights_path)
                for tree in trees[i*RESET_AFTER: (i+1)*RESET_AFTER]:
                    logits = self.inference(tree, True)
                    predictions = self.predictions(logits)
                    root_prediction = sess.run(predictions)[0]
                    if get_loss:
                        root_label = tree.root.label
                        loss = sess.run(self.loss(logits, [root_label]))
                        losses.append(loss)
                    results.append(root_prediction)
        return results, losses

    def run_epoch(self, new_model = False, verbose=True):
        step = 0
        loss_history = []
        while step < len(self.train_data):
            with tf.Graph().as_default(), tf.Session() as sess:
                self.add_model_vars()
                if new_model:
                    init = tf.initialize_all_variables()
                    sess.run(init)
                    new_model = False
                else:
                    saver = tf.train.Saver()
                    saver.restore(sess, './weights/%s.temp'%self.config.model_name)
                for _ in xrange(RESET_AFTER):
                    if step>=len(self.train_data):
                        break
                    tree = self.train_data[step]
                    logits = self.inference(tree)
                    labels = [l for l in tree.labels if l!=2]
                    loss = self.loss(logits, labels)
                    train_op = self.training(loss)
                    loss, _ = sess.run([loss, train_op])
                    loss_history.append(loss)
                    if verbose:
                        sys.stdout.write('\r{} / {} :    loss = {}'.format(
                            step, len(self.train_data), np.mean(loss_history)))
                        sys.stdout.flush()
                    step+=1
                saver = tf.train.Saver()
                if not os.path.exists("./weights"):
                    os.makedirs("./weights")
                saver.save(sess, './weights/%s.temp'%self.config.model_name)
        train_preds, _ = self.predict(self.train_data, './weights/%s.temp'%self.config.model_name)
        val_preds, val_losses = self.predict(self.dev_data, './weights/%s.temp'%self.config.model_name, get_loss=True)
        train_labels = [t.root.label for t in self.train_data]
        val_labels = [t.root.label for t in self.dev_data]
        train_acc = np.equal(train_preds, train_labels).mean()
        val_acc = np.equal(val_preds, val_labels).mean()

        print
        print 'Training acc (only root node): {}'.format(train_acc)
        print 'Valiation acc (only root node): {}'.format(val_acc)
        print self.make_conf(train_labels, train_preds)
        print self.make_conf(val_labels, val_preds)
        return train_acc, val_acc, loss_history, np.mean(val_losses)

    def train(self, verbose=True):
        complete_loss_history = []
        train_acc_history = []
        val_acc_history = []
        prev_epoch_loss = float('inf')
        #best_val_loss = float('inf')
        best_val_acc = 0
        best_val_epoch = 0
        stopped = -1
        for epoch in xrange(self.config.max_epochs):
            print 'epoch %d'%epoch
            if epoch==0:
                train_acc, val_acc, loss_history, val_loss = self.run_epoch(new_model=True)
            else:
                train_acc, val_acc, loss_history, val_loss = self.run_epoch()
            complete_loss_history.extend(loss_history)
            train_acc_history.append(train_acc)
            val_acc_history.append(val_acc)

            #lr annealing
            epoch_loss = np.mean(loss_history)
            if epoch_loss>prev_epoch_loss*self.config.anneal_threshold:
                self.config.lr/=self.config.anneal_by
                print 'annealed lr to %f'%self.config.lr
            prev_epoch_loss = epoch_loss

            #save if model has improved on val
            print 'validation loss %f' % val_loss
            #if val_loss < best_val_loss:
            if val_acc > best_val_acc:
                 shutil.copyfile('./weights/%s.temp'%self.config.model_name, './weights/%s'%self.config.model_name)
                 #best_val_loss = val_loss
                 best_val_acc = val_acc
                 best_val_epoch = epoch

            # if model has not imprvoved for a while stop
            if epoch - best_val_epoch > self.config.early_stopping:
                stopped = epoch
                #break
        if verbose:
                sys.stdout.write('\r')
                sys.stdout.flush()

        print '\n\nstopped at %d\n'%stopped
        return {
            'loss_history': complete_loss_history,
            'train_acc_history': train_acc_history,
            'val_acc_history': val_acc_history,
            }

    def make_conf(self, labels, predictions):
        confmat = np.zeros([2, 2])
        for l,p in itertools.izip(labels, predictions):
            confmat[l, p] += 1
        return confmat
Example #41
0
class Model_RNN(LanguageModel):
    
    def load_data(self):
        pair_fname  = '../lastfm_train_mappings.txt'
        lyrics_path = '../lyrics/data/lyrics/train/'
    
        # X_train is a list of all examples. each examples is a 2-len list. each element is a list of words in lyrics.
        # word_counts is a dictionary that maps 
        X_train, l_train, self.word_counts, self.config.max_steps = get_data(pair_fname, lyrics_path, threshold=100, n_class=self.config.n_class)
        self.labels_train = np.zeros((len(X_train),self.config.n_class))
        self.labels_train[range(len(X_train)),l_train] = 1
    
        self.vocab = Vocab()
        self.vocab.construct(self.word_counts.keys())

        self.encoded_train_1 = np.zeros((len(X_train), self.config.max_steps)) # need to handle this better. 
        self.encoded_train_2 = np.zeros((len(X_train), self.config.max_steps))
        for i in range(len(X_train)):
            self.encoded_train_1[i,:len(X_train[i][0])] = [self.vocab.encode(word) for word in X_train[i][0]]       
            self.encoded_train_2[i,:len(X_train[i][1])] = [self.vocab.encode(word) for word in X_train[i][1]]       


    def add_placeholders(self):
        self.X1            = tf.placeholder(tf.int32,   shape=(None, self.config.max_steps), name='X1')
        self.X2            = tf.placeholder(tf.int32,   shape=(None, self.config.max_steps), name='X2')
        self.labels        = tf.placeholder(tf.float32,   shape=(None, self.config.n_class), name='labels')
        #self.initial_state = tf.placeholder(tf.float32, shape=(None, self.config.hidden_size), name='initial_state')
        self.seq_len1      = tf.placeholder(tf.int32,   shape=(None),                        name='seq_len1') # for variable length sequences
        self.seq_len2      = tf.placeholder(tf.int32,   shape=(None),                        name='seq_len2') # for variable length sequences

    def add_embedding(self):
        L = tf.get_variable('L', shape=(len(self.word_counts.keys()), self.config.embed_size), dtype=tf.float32) 
        inputs1 = tf.nn.embedding_lookup(L, self.X1) # self.X1 is batch_size x self.config.max_steps 
        inputs2 = tf.nn.embedding_lookup(L, self.X2) # input2 is batch_size x self.config.max_steps x self.config.embed_size
        inputs1 = tf.split(1, self.config.max_steps, inputs1) # list of len self.config.max_steps where each element is batch_size x self.config.embed_size
        inputs1 = [tf.squeeze(x) for x in inputs1]
        inputs2 = tf.split(1, self.config.max_steps, inputs2) # list of len self.config.max_steps where each element is batch_size x self.config.embed_size
        inputs2 = [tf.squeeze(x) for x in inputs2]
        print 'onh'
        print inputs1[0].get_shape
        return inputs1, inputs2

    def add_model(self, inputs1, inputs2, seq_len1, seq_len2):
        #self.initial_state = tf.constant(np.zeros(()), dtype=tf.float32)
        print 'adsf add_model'
        self.initial_state = tf.constant(np.zeros((self.config.batch_size,self.config.hidden_size)), dtype=tf.float32)
        rnn_outputs  = []
        rnn_outputs1 = []
        rnn_outputs2 = []
        h_curr1 = self.initial_state
        h_curr2 = self.initial_state
        print 'nthgnghn'
        with tf.variable_scope('rnn'):
            Whh = tf.get_variable('Whh', shape=(self.config.hidden_size,self.config.hidden_size), dtype=tf.float32)
            Wxh = tf.get_variable('Wxh', shape=(self.config.embed_size,self.config.hidden_size),  dtype=tf.float32)
            b1  = tf.get_variable('bhx', shape=(self.config.hidden_size,),                        dtype=tf.float32)
            print Wxh.get_shape
            print inputs1[0].get_shape
            print inputs2[0].get_shape
            for i in range(self.config.max_steps):
                h_curr2 = tf.matmul(h_curr2,Whh) 
                h_curr2 += tf.matmul(inputs2[i],Wxh)
                h_curr2 += b1
                h_curr2 = tf.sigmoid(h_curr2)

                h_curr1 = tf.sigmoid(tf.matmul(h_curr1,Whh) + tf.matmul(inputs1[i],Wxh) + b1)
                rnn_outputs1.append(h_curr1)
                rnn_outputs2.append(h_curr2)
        
        rnn_states = [tf.concat(1, [rnn_outputs1[i], rnn_outputs2[i]]) for i in range(self.config.max_steps)]
        return rnn_states

    def add_projection(self, rnn_states):
        # rnn_outputs is a list of length batch_size of lengths = seq_len. Where each list element is ??. I think.
        Whc = tf.get_variable('Whc', shape=(2*self.config.hidden_size,self.config.n_class))
        bhc = tf.get_variable('bhc', shape=(self.config.n_class,))
        projections = tf.matmul(rnn_states[-1],Whc) + bhc # in case we stop short sequences, the rnn_state in further time_steps should be unch
        return projections

    def add_loss_op(self, y):
        loss = tf.nn.softmax_cross_entropy_with_logits(y, self.labels)
        loss = tf.reduce_sum(loss)
        return loss
      
    def add_training_op(self, loss):
        #train_op = tf.train.AdamOptimizer(learning_rate=self.config.lr).minimize(loss)
        train_op = tf.train.GradientDescentOptimizer(learning_rate=self.config.lr).minimize(loss)
        return train_op

    def __init__(self, config):
        self.config = config
        self.load_data()
        self.add_placeholders()

        print 'adsf __init__'
        print self.X1.get_shape
        self.inputs1, self.inputs2 = self.add_embedding()
        self.rnn_states            = self.add_model(self.inputs1, self.inputs2, self.seq_len1, self.seq_len2)
        self.projections           = self.add_projection(self.rnn_states)
        self.loss                  = self.add_loss_op(self.projections)
        self.train_step            = self.add_training_op(self.loss)
        self.predictions           = tf.argmax(tf.nn.softmax(self.projections),1)
        self.correct_predictions   = tf.equal(self.predictions,tf.argmax(self.labels,1))
        self.correct_predictions   = tf.reduce_sum(tf.cast(self.correct_predictions, 'int32'))
        

    def run_epoch(self, session, X1, X2, labels, train_op, verbose=10): # X and y are 2D np arrays
        print 'adsf run_epoch'
        config = self.config
        #state = tf.zeros([self.config.batch_size, self.config.hidden_size])
        state = self.initial_state.eval()
        data_len = np.shape(X1)[0]
        index = np.arange(data_len)
        np.random.shuffle(index)
        n_batches  = data_len // self.config.batch_size
        
        loss = 0.0
        for batch_num in range(n_batches):
            print 'sadf batch_num', str(batch_num)    
            x1_batch = X1[index[batch_num * self.config.batch_size : (batch_num+1) * self.config.batch_size], :]
            x2_batch = X2[index[batch_num * self.config.batch_size : (batch_num+1) * self.config.batch_size], :]
            seq_len_batch1 = [1 for i in range(X1.shape[0])]
            seq_len_batch2 = [1 for i in range(X1.shape[0])]
            labels_batch = labels[index[batch_num * self.config.batch_size : (batch_num+1) * self.config.batch_size]]
            print 'qwer', x1_batch.shape
            print 'qwer', x2_batch.shape
            feed_dict = {self.X1: x1_batch,
                         self.X2: x2_batch,
                         self.labels: labels_batch,
                         self.seq_len1: seq_len_batch1, 
                         self.seq_len2: seq_len_batch2} 
                         #self.initial_state: state}
            
            loss, total_correct, _ = session.run([self.loss, self.correct_predictions, train_op], feed_dict=feed_dict)
            total_loss.append(loss)
            
            if verbose and (batch_num+1)%verbose==0:
                sys.stdout.write('\r{} / {} : pp = {}'.format(batch_num+1, n_batches, np.exp(np.mean(total_loss))))
                sys.stdout.flush()
            if verbose:
                sys.stdout.write('\r')
            
            return np.exp(np.mean(total_loss))
Example #42
0
    """
    Forward function accepts input data and returns a Variable of output data
    """
    self.node_list = []
    root_node = self.walk_tree(x.root)
    all_nodes = torch.cat(self.node_list)
    #now I need to project out
    return all_nodes

def main():
  print("do nothing")


if __name__ == '__main__':
  train_data, dev_data, test_data = tr.simplified_data(train_size, 100, 200)
  vocab = Vocab()
  train_sents = [t.get_words() for t in train_data]
  vocab.construct(list(itertools.chain.from_iterable(train_sents)))
  model   = RNN_Model(vocab, embed_size=50)
  main()

  lr = 0.01
  loss_history = []
  optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, dampening=0.0)
  # params (iterable): iterable of parameters to optimize or dicts defining
  #     parameter groups
  # lr (float): learning rate
  # momentum (float, optional): momentum factor (default: 0)
  # weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
  #torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, dampening=0, weight_decay=0)
  # print(model.fcl._parameters['weight'])
Example #43
0
class Model_RNN(LanguageModel):
    def load_data(self):
        pair_fname  = '../lastfm_train_mappings.txt'
        lyrics_path = '../data/lyrics/train/'
    
        # X_train is a list of all examples. each examples is a 2-len list. each element is a list of words in lyrics.
        # word_counts is a dictionary that maps
        if self.config.debug:
            X_train, l_train, self.word_counts, seq_len1, seq_len2, self.config.max_steps = get_data(pair_fname, lyrics_path, '../glove.6B.50d.txt', threshold_down=0, threshold_up=float('inf'), npos=100, nneg=100)
        else:
            X_train, l_train, self.word_counts, seq_len1, seq_len2, self.config.max_steps = get_data(pair_fname, lyrics_path, threshold_down=100, threshold_up=4000, npos=10000, nneg=10000)

        self.labels_train = np.zeros((len(X_train),self.config.n_class))
        self.labels_train[range(len(X_train)),l_train] = 1
        
        x = collections.Counter(l_train)
        for k in x.keys():
            print 'class:', k, x[k]
        print ''

        self.vocab = Vocab()
        self.vocab.construct(self.word_counts.keys())
        self.wv = self.vocab.get_wv('../glove.6B.50d.txt')

        with open('word_hist.csv', 'w') as f:
            for w in self.word_counts.keys():
                f.write(w+','+str(self.word_counts[w])+'\n')
            
        self.encoded_train_1 = np.zeros((len(X_train), self.config.max_steps)) # need to handle this better. 
        self.encoded_train_2 = np.zeros((len(X_train), self.config.max_steps))
        for i in range(len(X_train)):
            self.encoded_train_1[i,:len(X_train[i][0])] = [self.vocab.encode(word) for word in X_train[i][0]]       
            self.encoded_train_2[i,:len(X_train[i][1])] = [self.vocab.encode(word) for word in X_train[i][1]]       
        self.sequence_len1 = np.array(seq_len1)
        self.sequence_len2 = np.array(seq_len2)

    def add_placeholders(self):
        self.X1            = tf.placeholder(tf.int32,   shape=(None, self.config.max_steps), name='X1')
        self.X2            = tf.placeholder(tf.int32,   shape=(None, self.config.max_steps), name='X2')
        self.labels        = tf.placeholder(tf.float32, shape=(None, self.config.n_class), name='labels')
        #self.initial_state = tf.placeholder(tf.float32, shape=(None, self.config.hidden_size), name='initial_state')
        self.seq_len1      = tf.placeholder(tf.int32,   shape=(None),                        name='seq_len1') # for variable length sequences
        self.seq_len2      = tf.placeholder(tf.int32,   shape=(None),                        name='seq_len2') # for variable length sequences

    def add_embedding(self):
        #L = tf.get_variable('L', shape=(len(self.vocab), self.config.embed_size), dtype=tf.float32) 
        L = tf.Variable(tf.convert_to_tensor(self.wv, dtype=tf.float32), name='L')
        #L = tf.constant(tf.convert_to_tensor(self.wvi), dtype=tf.float32, name='L')
        inputs1 = tf.nn.embedding_lookup(L, self.X1) # self.X1 is batch_size x self.config.max_steps 
        inputs2 = tf.nn.embedding_lookup(L, self.X2) # input2 is batch_size x self.config.max_steps x self.config.embed_size
        inputs1 = tf.split(1, self.config.max_steps, inputs1) # list of len self.config.max_steps where each element is batch_size x self.config.embed_size
        inputs1 = [tf.squeeze(x, squeeze_dims=[1]) for x in inputs1]
        inputs2 = tf.split(1, self.config.max_steps, inputs2) # list of len self.config.max_steps where each element is batch_size x self.config.embed_size
        inputs2 = [tf.squeeze(x, squeeze_dims=[1]) for x in inputs2]
        return inputs1, inputs2

    def add_model_rnn(self, inputs1, inputs2, seq_len1, seq_len2):
        #self.initial_state = tf.constant(np.zeros(()), dtype=tf.float32)
        self.initial_state = tf.constant(np.zeros((self.config.batch_size,self.config.hidden_size)), dtype=tf.float32)
        rnn_outputs  = []
        rnn_outputs1 = []
        rnn_outputs2 = []
        h_curr1 = self.initial_state
        h_curr2 = self.initial_state

        with tf.variable_scope('rnn'):
            Whh = tf.get_variable('Whh', shape=(self.config.hidden_size,self.config.hidden_size), dtype=tf.float32)
            Wxh = tf.get_variable('Wxh', shape=(self.config.embed_size,self.config.hidden_size),  dtype=tf.float32)
            b1  = tf.get_variable('bhx', shape=(4*self.config.hidden_size,),                        dtype=tf.float32)

            for i in range(self.config.max_steps):
                if self.config.batch_size==1:
                    if i==seq_len1[0]:
                        breaka
                tmp = tf.matmul(h_curr1,Whh) + tf.matmul(inputs1[i],Wxh) + b1
                
                rnn_outputs1.append(h_curr1)

            for i in range(self.config.max_steps):
                if self.config.batch_size==1:
                    if i==seq_len2[0]:
                        breaka
                h_curr2 = tf.sigmoid(tf.matmul(h_curr2,Whh) + tf.matmul(inputs2[i],Wxh) + b1)
                rnn_outputs2.append(h_curr2)

        #lstm_states = [tf.concat(1, [rnn_outputs1[i], rnn_outputs2[i]]) for i in range(self.config.max_steps)]
        rnn_final_states = tf.concat(1, [rnn_outputs1[-1], rnn_outputs2[-1]])
        return rnn_final_states

    def add_model_lstm(self, inputs1, inputs2, seq_len1, seq_len2):
        #self.initial_state = tf.constant(np.zeros(()), dtype=tf.float32)
        self.initial_state = tf.constant(np.zeros((self.config.batch_size,self.config.hidden_size)), dtype=tf.float32)
        lstm_outputs1 = []
        lstm_outputs2 = []
        h_curr1 = self.initial_state
        h_curr2 = self.initial_state
        cell1   = self.initial_state
        cell2   = self.initial_state

        with tf.variable_scope('lstm'):
            Whc = tf.get_variable('Whh', shape=(self.config.hidden_size,4*self.config.hidden_size), dtype=tf.float32, initializer=tf.random_normal_initializer())
            Wxc = tf.get_variable('Wxh', shape=(self.config.embed_size,4*self.config.hidden_size),  dtype=tf.float32, initializer=tf.random_normal_initializer())
            b1  = tf.get_variable('bhx', shape=(self.config.hidden_size,),                          dtype=tf.float32, initializer=tf.random_normal_initializer())

            for i in range(self.config.max_steps):
                if self.config.batch_size==1:
                    if i==seq_len1[0]:
                        break
                ifog1 = tf.matmul(h_curr1,Whc) + tf.matmul(inputs1[i],Wxc)
                i1, f1, o1, g1 = tf.split(1, 4, ifog1)
                i1 = tf.sigmoid(i1)
                f1 = tf.sigmoid(f1)
                o1 = tf.sigmoid(o1)
                g1 = tf.tanh(g1)

                cell1   = f1*cell1 + i1*g1
                h_curr1 = o1*tf.tanh(cell1)
                lstm_outputs1.append(h_curr1)

            for i in range(self.config.max_steps):
                if self.config.batch_size==1:
                    if i==seq_len2[0]:
                        break
                ifog2 = tf.matmul(h_curr2,Whc) + tf.matmul(inputs2[i],Wxc)
                i2, f2, o2, g2 = tf.split(1, 4, ifog2)
                i2 = tf.sigmoid(i2)
                f2 = tf.sigmoid(f2)
                o2 = tf.sigmoid(o2)
                g2 = tf.tanh(g2)

                cell2   = f2*cell2 + i2*g2
                h_curr2 = o2*tf.tanh(cell2)
                lstm_outputs2.append(h_curr2)

        lstm_final_states = tf.concat(1, [lstm_outputs1[-1], lstm_outputs2[-1]])
        return lstm_final_states

    def add_final_projections(self, rnn_final_states):
        # rnn_outputs is a list of length batch_size of lengths = seq_len. Where each list element is ??. I think.
        Whu = tf.get_variable('Whu', shape=(2*self.config.hidden_size,self.config.n_class), initializer=tf.random_normal_initializer())
        bhu = tf.get_variable('bhu', shape=(self.config.n_class,), initializer=tf.random_normal_initializer())
        final_projections = tf.matmul(rnn_final_states,Whu) + bhu # in case we stop short sequences, the rnn_state in further time_steps should be unch
        return final_projections

    def add_loss_op(self, y):
        loss = tf.nn.softmax_cross_entropy_with_logits(y, self.labels)
        loss = tf.reduce_mean(loss)
        return loss
      
    def add_training_op(self, loss):
        #train_op = tf.train.AdamOptimizer(learning_rate=self.config.lr).minimize(loss)
        train_op = tf.train.GradientDescentOptimizer(learning_rate=self.config.lr).minimize(loss)
        return train_op

    def __init__(self, config):
        self.config = config
        self.load_data()
        self.add_placeholders()

        self.inputs1, self.inputs2 = self.add_embedding()
        if self.config.model=='rnn':
            self.final_hidden_states = self.add_model_rnn(self.inputs1, self.inputs2, self.seq_len1, self.seq_len2)
        elif self.config.model=='lstm':
            self.final_hidden_states = self.add_model_lstm(self.inputs1, self.inputs2, self.seq_len1, self.seq_len2)
        self.final_projections     = self.add_final_projections(self.final_hidden_states)
        self.loss                  = self.add_loss_op(self.final_projections)
        self.train_step            = self.add_training_op(self.loss)
        self.predictions           = tf.argmax(tf.nn.softmax(self.final_projections),1)
        self.correct_predictions   = tf.equal(self.predictions,tf.argmax(self.labels,1))
        self.correct_predictions   = tf.reduce_sum(tf.cast(self.correct_predictions, 'int32'))

    def run_epoch(self, session, X1, X2, labels, sequence_len1, sequence_len2, train_op, verbose=10): # X and y are 2D np arrays
        config = self.config
        #state = tf.zeros([self.config.batch_size, self.config.hidden_size])
        state = self.initial_state.eval()
        data_len = np.shape(X1)[0]
        index = np.arange(data_len)
        np.random.shuffle(index)
        n_batches  = data_len // self.config.batch_size
        
        loss = 0.0
        total_loss = []
        total_correct = 0
        all_preds = -np.ones((data_len,))
        for batch_num in range(n_batches):
            x1_batch = X1[index[batch_num * self.config.batch_size : (batch_num+1) * self.config.batch_size], :]
            x2_batch = X2[index[batch_num * self.config.batch_size : (batch_num+1) * self.config.batch_size], :]
            labels_batch = labels[index[batch_num * self.config.batch_size : (batch_num+1) * self.config.batch_size], :]
            seq_len_batch1 = sequence_len1[index[batch_num * self.config.batch_size : (batch_num+1) * self.config.batch_size]]
            seq_len_batch2 = sequence_len2[index[batch_num * self.config.batch_size : (batch_num+1) * self.config.batch_size]]
            feed_dict = {self.X1: x1_batch,
                         self.X2: x2_batch,
                         self.labels: labels_batch,
                         self.seq_len1: seq_len_batch1, 
                         self.seq_len2: seq_len_batch2} 
                         #self.initial_state: state}
            
            loss, preds, correct, final_projections, _ = session.run([self.loss, self.predictions, self.correct_predictions, self.final_projections, train_op], feed_dict=feed_dict)
            #print str(batch_num)+'/'+str(n_batches)+' : '+str(final_projections[0][0])+'  '+str(final_projections[0][1])
            total_loss.append(loss)
            total_correct += correct
            all_preds[index[batch_num * self.config.batch_size : (batch_num+1) * self.config.batch_size]] = preds

            if verbose and (batch_num+1)%verbose==0:
                sys.stdout.write('\r{} / {} : loss = {:.4f} : train_acc = {:.2f}%'.format(batch_num+1, n_batches, np.mean(total_loss), 100.0*total_correct/((batch_num+1)*self.config.batch_size)))
                sys.stdout.flush()
            if verbose:
                sys.stdout.write('\r')
            
        return np.mean(total_loss), all_preds
class Model():


    def __init__(self, config):
        self.config = config
        self.load_data()
        self.build_model()


    def load_vocab(self,debug):
        self.vocab = Vocab()
        if debug:
            self.vocab.construct(get_words_dataset('dev'))
        else:
            self.vocab.construct(get_words_dataset('train'))
        self.vocab.build_embedding_matrix(self.config.word_embed_size)
        self.embedding_matrix = self.vocab.embedding_matrix


    def load_data(self, debug=False):
        """
            Loads starter word-vectors and train/dev/test data.
        """
        self.load_vocab(debug)
        config = self.config

        if debug:
            # Load the training set
            train_data = list(get_sentences_dataset(self.vocab,
                config.sent_len, 'dev', 'post'))
            ( self.sent1_train, self.sent2_train, self.len1_train,
                self.len2_train, self.y_train ) = zip(*train_data)
            self.sent1_train, self.sent2_train = np.vstack(self.sent1_train), np.vstack(self.sent2_train)
            self.len1_train, self.len2_train = ( np.array(self.len1_train),
                np.array(self.len2_train) )
            self.y_train = np.array(self.y_train)
            print('# training examples: %d' %len(self.y_train))

            # Load the validation set
            dev_data = list(get_sentences_dataset(self.vocab, config.sent_len,
                'test', 'post'))
            ( self.sent1_dev, self.sent2_dev, self.len1_dev,
                self.len2_dev, self.y_dev ) = zip(*dev_data)
            self.sent1_dev, self.sent2_dev = np.vstack(self.sent1_dev), np.vstack(self.sent2_dev)
            self.len1_dev, self.len2_dev = ( np.array(self.len1_dev),
                np.array(self.len2_dev) )
            self.y_dev = np.array(self.y_dev)
            print('# dev examples: %d' %len(self.y_dev))

            # Load the test set
            test_data = list(get_sentences_dataset(self.vocab, config.sent_len,
                'test', 'post'))
            ( self.sent1_test, self.sent2_test, self.len1_test,
                self.len2_test, self.y_test ) = zip(*test_data)
            self.sent1_test, self.sent2_test = np.vstack(self.sent1_test), np.vstack(self.sent2_test)
            self.len1_test, self.len2_test = ( np.array(self.len1_test),
                np.array(self.len2_test) )
            self.y_test = np.array(self.y_test)
            print('# test examples: %d' %len(self.y_test))
        else:
            # Load the training set
            train_data = list(get_sentences_dataset(self.vocab,
                config.sent_len, 'train', 'post'))
            ( self.sent1_train, self.sent2_train, self.len1_train,
                self.len2_train, self.y_train ) = zip(*train_data)
            self.sent1_train, self.sent2_train = np.vstack(self.sent1_train), np.vstack(self.sent2_train)
            self.len1_train, self.len2_train = ( np.array(self.len1_train),
                np.array(self.len2_train) )
            self.y_train = np.array(self.y_train)
            print('# training examples: %d' %len(self.y_train))

            # Load the validation set
            dev_data = list(get_sentences_dataset(self.vocab, config.sent_len,
                'dev', 'post'))
            ( self.sent1_dev, self.sent2_dev, self.len1_dev,
                self.len2_dev, self.y_dev ) = zip(*dev_data)
            self.sent1_dev, self.sent2_dev = np.vstack(self.sent1_dev), np.vstack(self.sent2_dev)
            self.len1_dev, self.len2_dev = ( np.array(self.len1_dev),
                np.array(self.len2_dev) )
            self.y_dev = np.array(self.y_dev)
            print('# dev examples: %d' %len(self.y_dev))

            # Load the test set
            test_data = list(get_sentences_dataset(self.vocab, config.sent_len,
                'test', 'post'))
            ( self.sent1_test, self.sent2_test, self.len1_test,
                self.len2_test, self.y_test ) = zip(*test_data)
            self.sent1_test, self.sent2_test = np.vstack(self.sent1_test), np.vstack(self.sent2_test)
            self.len1_test, self.len2_test = ( np.array(self.len1_test),
                np.array(self.len2_test) )
            self.y_test = np.array(self.y_test)
            print('# test examples: %d' %len(self.y_test))

            print('min len: ', np.min(self.len2_train))


    def build_model(self):
        config = self.config
        k = config.sentence_embed_size
        L = config.sent_len

        # input tensors
        self.sent1_ph = tf.placeholder(tf.int32, shape=[None, L],
                                       name='sent1')
        self.sent2_ph = tf.placeholder(tf.int32, shape=[None, L],
                                       name='sent2')
        self.len1_ph = tf.placeholder(tf.int32, shape=[None], name='len1')
        self.len2_ph = tf.placeholder(tf.int32, shape=[None], name='len2')
        self.labels_ph = tf.placeholder(tf.float32,
                                        shape=[None, config.label_size],
                                        name='label')
        self.kp_ph = tf.placeholder(tf.float32, name='kp')
        kp = self.kp_ph

        # set embedding matrix to pretrained embedding
        init_embeds = tf.constant(self.embedding_matrix, dtype='float32')
        word_embeddings = tf.get_variable(
                dtype='float32',
                name='word_embeddings',
                initializer=init_embeds,
                trainable=False) # no fine-tuning of word embeddings

        # x1 and x2 have shape (?, L, k)
        x1 = tf.nn.embedding_lookup(word_embeddings, self.sent1_ph)
        x2 = tf.nn.embedding_lookup(word_embeddings, self.sent2_ph)
        x1, x2 = tf.nn.dropout(x1, kp), tf.nn.dropout(x2, kp)

        # encode premise sentence with 1st LSTM
        with tf.variable_scope('rnn1'):
            cell1 = tf.contrib.rnn.LSTMCell(num_units=k,
                    state_is_tuple=True)
            cell1 = tf.contrib.rnn.DropoutWrapper(cell1, input_keep_prob=kp,
                    output_keep_prob=kp)
            out1, fstate1 = tf.nn.dynamic_rnn(
                cell=cell1,
                inputs=x1,
                sequence_length=self.len1_ph,
                dtype=tf.float32)

        # encode hypothesis with 2nd LSTM
        # using final state of 1st LSTM as initial state
        with tf.variable_scope('rnn2'):
            cell2 = tf.contrib.rnn.LSTMCell(num_units=k,
                    state_is_tuple=True)
            cell2 = tf.contrib.rnn.DropoutWrapper(cell2, input_keep_prob=kp,
                    output_keep_prob=kp)
            out2, fstate2 = tf.nn.dynamic_rnn(
                cell=cell2,
                inputs=x2,
                sequence_length=self.len2_ph,
                initial_state=fstate1,
                dtype=tf.float32)

        Y = out1
        Y_mod =tf.reshape(Y, [-1, k])

        W_y = tf.get_variable(name='W_y', shape=[k, k],
                regularizer=tf.contrib.layers.l2_regularizer(config.l2))
        W_h = tf.get_variable(name='W_h', shape=[k, k],
                regularizer=tf.contrib.layers.l2_regularizer(config.l2))
        b_M = tf.get_variable(name='b_M', initializer=tf.zeros([L, k]))
        W_r = tf.get_variable(name='W_r', shape=[k, k],
                regularizer=tf.contrib.layers.l2_regularizer(config.l2))
        W_t = tf.get_variable(name='W_t', shape=[k, k],
                regularizer=tf.contrib.layers.l2_regularizer(config.l2))
        b_r = tf.get_variable(name='b_r', initializer=tf.zeros([k]))
        w = tf.get_variable(name='w', shape=[k, 1],
                regularizer=tf.contrib.layers.l2_regularizer(config.l2))
        b_a = tf.get_variable(name='b_a', initializer=tf.zeros([L]))

        rt_1 = tf.zeros([tf.shape(self.len1_ph)[0], k])
        attention = []
        r_outputs = []
        for t in range(L):
            ht = out2[:,t,:]

            Ht = tf.reshape(tf.tile(ht, [1, L]), [-1, L, k])
            Ht_mod = tf.reshape(Ht, [-1, k])
            Rt_1 = tf.reshape(tf.tile(rt_1, [1, L]), [-1, L, k])
            Rt_1_mod = tf.reshape(Rt_1, [-1, k])
            Mt = tf.nn.tanh( tf.reshape(tf.matmul(Y_mod, W_y),
                                 [-1, L, k]) +
                             tf.reshape(tf.matmul(Ht_mod, W_h),
                                 [-1, L, k]) +
                             tf.reshape(tf.matmul(Rt_1_mod, W_r),
                                 [-1, L, k]) )
            Mt_w = tf.matmul(tf.reshape(Mt, [-1, k]), w)
            alphat = tf.nn.softmax(tf.reshape(Mt_w, [-1, 1, L]) )
            alphat_Y = tf.reshape(tf.matmul(alphat, Y), [-1, k])
            rt = alphat_Y + tf.nn.tanh(tf.matmul(rt_1, W_t) )
            rt_1 = rt
            attention.append(alphat)
            r_outputs.append(rt)

        r_outputs = tf.stack(r_outputs)
        self.attention = tf.stack(attention)
        r_outputs = tf.transpose(r_outputs, [1, 0, 2])

        def get_last_relevant_output(out, seq_len):
            rng = tf.range(0, tf.shape(seq_len)[0])
            indx = tf.stack([rng, seq_len - 1], 1)
            last = tf.gather_nd(out, indx)
            return last

        rN = get_last_relevant_output(r_outputs, self.len2_ph)
        hN = get_last_relevant_output(out2, self.len2_ph)

        W_p = tf.get_variable(name='W_p', shape=[k, k],
                regularizer=tf.contrib.layers.l2_regularizer(config.l2))
        W_x = tf.get_variable(name='W_x', shape=[k, k],
                regularizer=tf.contrib.layers.l2_regularizer(config.l2))
        b_hs = tf.get_variable(name='b_hs', initializer=tf.zeros([k]))

        # sentence pair representation
        h_s = tf.nn.tanh(tf.matmul(rN, W_p) + tf.matmul(hN, W_x) )

        y = h_s

        # MLP classifier on top
        hidden_sizes = config.hidden_sizes
        for layer, size in enumerate(hidden_sizes):
            if layer > 0:
                previous_size = hidden_sizes[layer-1]
            else:
                previous_size = k
            W = tf.get_variable(name='W{}'.format(layer),
                    shape=[previous_size, size],
                    initializer=tf.contrib.layers.xavier_initializer(),
                    regularizer=tf.contrib.layers.l2_regularizer(config.l2))
            b = tf.get_variable(name='b{}'.format(layer),
                    initializer=tf.zeros([size]))
            y = tf.nn.relu(tf.matmul(y, W) + b)
            y = tf.nn.dropout(y, kp)

        W_softmax = tf.get_variable(name='W_softmax',
                shape=[hidden_sizes[-1], config.label_size],
                initializer=tf.contrib.layers.xavier_initializer(),
                regularizer=tf.contrib.layers.l2_regularizer(config.l2))
        b_softmax = tf.get_variable(name='b_softmax',
                initializer=tf.zeros([config.label_size]))

        logits = tf.matmul(y, W_softmax) + b_softmax
        cross_entropy_loss = tf.reduce_mean(
                tf.losses.softmax_cross_entropy(self.labels_ph, logits)
                )
        reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        self.loss = cross_entropy_loss #+ tf.add_n(reg_losses)

        optimizer = tf.train.AdamOptimizer(learning_rate=config.lr)
        gradients, variables = zip(*optimizer.compute_gradients(self.loss))
        gradients, _ = tf.clip_by_global_norm(gradients, config.max_grad_norm)
        self.train_op = optimizer.apply_gradients(zip(gradients, variables))

        self.probs = tf.nn.softmax(logits)
        self.predictions = tf.argmax(self.probs, 1)
        correct_prediction = tf.equal(
            tf.argmax(self.labels_ph, 1), self.predictions)
        self.correct_predictions = tf.reduce_sum(tf.cast(correct_prediction, 'int32'))


    def create_feed_dict(self, sent1_batch, sent2_batch, len1_batch,
            len2_batch, label_batch, keep_prob):
        feed_dict = {
            self.sent1_ph: sent1_batch,
            self.sent2_ph: sent2_batch,
            self.len1_ph: len1_batch,
            self.len2_ph: len2_batch,
            self.labels_ph: label_batch,
            self.kp_ph: keep_prob
        }
        return feed_dict


    def run_epoch(self, session, sent1_data, sent2_data, len1_data, len2_data, input_labels,
            verbose=100):
        orig_sent1, orig_sent2, orig_len1, orig_len2, orig_y = ( sent1_data,
                sent2_data, len1_data, len2_data, input_labels )
        kp = self.config.kp
        total_loss = []
        total_correct_examples = 0
        total_processed_examples = 0
        total_steps = int( orig_sent1.shape[0] / self.config.batch_size)
        for step, (sent1, sent2, len1, len2, y) in enumerate(
            data_iterator(orig_sent1, orig_sent2, orig_len1, orig_len2, orig_y,
                    batch_size=self.config.batch_size, label_size=self.config.label_size)):
            feed = self.create_feed_dict(sent1, sent2, len1, len2, y, kp)
            loss, total_correct, _ = session.run(
                [self.loss, self.correct_predictions, self.train_op],
                feed_dict=feed)
            total_processed_examples += len(y)
            total_correct_examples += total_correct
            total_loss.append(loss)
            if verbose and step % verbose == 0:
                sys.stdout.write('\r{} / {} : loss = {}'.format(
                    step, total_steps, np.mean(total_loss)))
                sys.stdout.flush()
        if verbose:
            sys.stdout.write('\r')
            sys.stdout.flush()
        return np.mean(total_loss), total_correct_examples / float(total_processed_examples), total_loss


    def predict(self, session, sent1_data, sent2_data, len1_data, len2_data, y=None):
        """Make predictions from the provided model."""
        # If y is given, the loss is also calculated
        # We deactivate dropout by setting it to 1
        kp = 1.0
        losses = []
        results = []
        if np.any(y):
            data = data_iterator(sent1_data, sent2_data, len1_data, len2_data, y, batch_size=self.config.batch_size,
                                 label_size=self.config.label_size, shuffle=False)
        else:
            data = data_iterator(sent1_data, sent2_data, len1_data, len2_data, batch_size=self.config.batch_size,
                                 label_size=self.config.label_size, shuffle=False)
        for step, (sent1, sent2, len1, len2, y) in enumerate(data):
            feed = self.create_feed_dict(sent1, sent2, len1, len2, y, kp)
            if np.any(y):
                loss, preds = session.run(
                    [self.loss, self.predictions], feed_dict=feed)
                losses.append(loss)
            else:
                preds = session.run(self.predictions, feed_dict=feed)
            results.extend(preds)
        return np.mean(losses), np.array(results)


    def get_attention(self, session, sent1, sent2):
        kp = 1.0
        sent1 = utils.encode_sentence(self.vocab, sent1)
        print(sent1)
        sent2 = utils.encode_sentence(self.vocab, sent2)
        print(sent2)
        sent1 = utils.pad_sentence(self.vocab, sent1, self.config.sent_len,
                'post')
        sent2 = utils.pad_sentence(self.vocab, sent2, self.config.sent_len,
                'post')
        len1, len2 = np.array([len(sent1)]), np.array([len(sent2)])
        sent1_arr = np.array(sent1).reshape((1,-1))
        sent2_arr = np.array(sent2).reshape((1,-1))
        y = np.array([0,1,0]).reshape((1,-1))
        feed = self.create_feed_dict(sent1_arr, sent2_arr, len1, len2, y, kp)
        preds, alphas = session.run([self.predictions, self.attention], feed_dict=feed)
        return preds, alphas
import sys
import os
from utils import Vocab
import numpy as np
import pickle


if __name__ == "__main__":

    #Create a set of all words
    all_words = set()
    vocab = Vocab()
    count_files = 0
    for name in ['test', 'train', 'val']:
        filename = name + '_tokens.txt'
        f = open(filename, 'r')
        for line in f:
            sp_line = line.strip().split()
            for token in sp_line:
                all_words.add(token)
                vocab.add_word(token)
        f.close()

    glove_dir = '/media/sf_kickstarter/CS224D/Project/glove.840B.300d'
    glove_f = open(os.path.join(glove_dir, 'glove.840B.300d.txt'), 'r')
    embedding_matrix = np.zeros((len(vocab.word_to_index),300))


    count = 0
    for line in glove_f:
        line_sp = line.strip().split()
Example #46
0
class RNN_Model():
    def __init__(self, config):
        self.config = config
        self.load_data()
        self.merged_summaries = None
        self.summary_writer = None
        self.is_a_leaf   = tf.placeholder(tf.bool, [None], name="is_a_leaf")
        self.left_child  = tf.placeholder(tf.int32, [None], name="lchild")
        self.right_child = tf.placeholder(tf.int32, [None], name="rchild")
        self.word_index  = tf.placeholder(tf.int32, [None], name="word_index")
        self.labelholder = tf.placeholder(tf.int32, [None], name="labels_holder")
        self.add_model_vars()
        self.tensor_array = tf.TensorArray(tf.float32,
                              size=0,
                              dynamic_size=True,
                              clear_after_read=False,
                              infer_shape=False)
        #tensor array stores the vectors (embedded or composed)
        self.tensor_array_op = None
        self.prediction   = None
        self.logits       = None
        self.root_logits  = None
        self.root_predict = None

        self.root_loss = None
        self.full_loss = None

        self.training_op = None
        #tensor_array_op is the operation on the TensorArray

    # private functions used to construct the graph.
    def _embed_word(self, word_index):
        with tf.variable_scope("Composition", reuse=True) as scope:
            print(scope.name)
            embedding = tf.get_variable("embedding")
            print(embedding.name)
        return tf.expand_dims(tf.gather(embedding, word_index), 0)

    # private functions used to construct the graph.
    def _combine_children(self, left_index, right_index):
        left_tensor  = self.tensor_array.read(left_index)
        right_tensor = self.tensor_array.read(right_index)
        with tf.variable_scope('Composition', reuse=True):
            W1 = tf.get_variable('W1')
            b1 = tf.get_variable('b1')
        return tf.nn.relu(tf.matmul(tf.concat(1, [left_tensor, right_tensor]), W1) + b1)


    # i is the index (over data stored in the placeholders)
    # identical type[out] = type[in]; can be used in while_loop
    # so first iteration -> puts left most leaf on the tensorarray (and increments i)
    # next iteration -> puts next left most (leaf on stack) and increments i
    # ....
    # until all the leaves are on the stack in the correct order
    # starts combining the leaves after and adding to the stack
    def _loop_over_tree(self, tensor_array, i):
        is_leaf     = tf.gather(self.is_a_leaf, i)
        word_idx    = tf.gather(self.word_index, i)
        left_child  = tf.gather(self.left_child, i)
        right_child = tf.gather(self.right_child, i)
        node_tensor = tf.cond(is_leaf, lambda : self._embed_word(word_idx),
                                       lambda : self._combine_children(left_child, right_child))
        tensor_array = tensor_array.write(i, node_tensor)
        i = tf.add(i,1)

        return tensor_array, i

    def construct_tensor_array(self):
        loop_condition = lambda tensor_array, i: \
                         tf.less(i, tf.squeeze(tf.shape(self.is_a_leaf)))
        #iterate over all leaves + composition
        tensor_array_op = tf.while_loop(cond=loop_condition,
                                        body=self._loop_over_tree,
                                        loop_vars=[self.tensor_array, 0],
                                        parallel_iterations=1)[0]
        return tensor_array_op

    def inference_op(self, predict_only_root=False):
        if predict_only_root:
            return self.root_logits_op()
        return self.logits_op()

    def load_data(self):
        """Loads train/dev/test data and builds vocabulary."""
        self.train_data, self.dev_data, self.test_data = tr.simplified_data(700, 100, 200)

        # build vocab from training data
        self.vocab = Vocab()
        train_sents = [t.get_words() for t in self.train_data]
        self.vocab.construct(list(itertools.chain.from_iterable(train_sents)))

    def add_model_vars(self):
        '''
        You model contains the following parameters:
            embedding:  tensor(vocab_size, embed_size)
            W1:         tensor(2* embed_size, embed_size)
            b1:         tensor(1, embed_size)
            U:          tensor(embed_size, output_size)
            bs:         tensor(1, output_size)
        Hint: Add the tensorflow variables to the graph here and *reuse* them while building
                the compution graphs for composition and projection for each tree
        Hint: Use a variable_scope "Composition" for the composition layer, and
              "Projection") for the linear transformations preceding the softmax.
        '''
        with tf.variable_scope('Composition') as scope:
        ### YOUR CODE HERE
        #initializer=initializer=tf.random_normal_initializer(0,3)
            print(scope.name)
            embedding = tf.get_variable("embedding",
                                        [self.vocab.total_words, self.config.embed_size])
            print(embedding.name)
            W1 = tf.get_variable("W1", [2 * self.config.embed_size, self.config.embed_size])
            b1 = tf.get_variable("b1", [1, self.config.embed_size])
            l2_loss = tf.nn.l2_loss(W1)
            tf.add_to_collection(name="l2_loss", value=l2_loss)
            variable_summaries(embedding, embedding.name)
            variable_summaries(W1, W1.name)
            variable_summaries(b1, b1.name)
        ### END YOUR CODE
        with tf.variable_scope('Projection'):
         ### YOUR CODE HERE
            U = tf.get_variable("U", [self.config.embed_size, self.config.label_size])
            bs = tf.get_variable("bs", [1, self.config.label_size])
            variable_summaries(U, U.name)
            variable_summaries(bs, bs.name)
            l2_loss = tf.nn.l2_loss(U)
            tf.add_to_collection(name="l2_loss", value=l2_loss)
        ### END YOUR CODE

    def add_model(self):
        """Recursively build the model to compute the phrase embeddings in the tree

        Hint: Refer to tree.py and vocab.py before you start. Refer to
              the model's vocab with self.vocab
        Hint: Reuse the "Composition" variable_scope here
        Hint: Store a node's vector representation in node.tensor so it can be
              used by it's parent
        Hint: If node is a leaf node, it's vector representation is just that of the
              word vector (see tf.gather()).
        Args:
            node: a Node object
        Returns:
            node_tensors: Dict: key = Node, value = tensor(1, embed_size)
        """
        if self.tensor_array_op is None:
            self.tensor_array_op = self.construct_tensor_array()
        return self.tensor_array_op

    def add_projections_op(self, node_tensors):
        """Add projections to the composition vectors to compute the raw sentiment scores

        Hint: Reuse the "Projection" variable_scope here
        Args:
            node_tensors: tensor(?, embed_size)
        Returns:
            output: tensor(?, label_size)
        """
        logits = None
        ### YOUR CODE HERE
        with tf.variable_scope("Projection", reuse=True):
            U = tf.get_variable("U")
            bs = tf.get_variable("bs")
        logits = tf.matmul(node_tensors, U) + bs
        ### END YOUR CODE
        return logits

    def logits_op(self):
        #this is an operation on the updated tensor_array
        if self.logits is None:
            self.logits = self.add_projections_op(self.tensor_array_op.concat())
        return self.logits

    def root_logits_op(self):
        #construct once
        if self.root_logits is None:
            self.root_logits = self.add_projections_op(self.tensor_array_op.read(self.tensor_array_op.size() -1))
        return self.root_logits

    def root_prediction_op(self):
        if self.root_predict is None:
            self.root_predict =  tf.squeeze(tf.argmax(self.root_logits_op(), 1))
        return self.root_predict

    def full_loss_op(self, logits, labels):
        """Adds loss ops to the computational graph.

        Hint: Use sparse_softmax_cross_entropy_with_logits
        Hint: Remember to add l2_loss (see tf.nn.l2_loss)
        Args:
            logits: tensor(num_nodes, output_size)
            labels: python list, len = num_nodes
        Returns:
            loss: tensor 0-D
        """
        if self.full_loss is None:
            loss = None
            # YOUR CODE HERE
            l2_loss = self.config.l2 * tf.add_n(tf.get_collection("l2_loss"))
            idx = tf.where(tf.less(self.labelholder,2))
            logits = tf.gather(logits, idx)
            labels = tf.gather(labels, idx)
            objective_loss = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels))
            loss = objective_loss + l2_loss
            tf.summary.scalar("loss_l2", l2_loss)
            tf.summary.scalar("loss_objective", tf.reduce_sum(objective_loss))
            tf.summary.scalar("loss_total", loss)
            self.full_loss = loss
        # END YOUR CODE
        return self.full_loss

    def loss_op(self, logits, labels):
        """Adds loss ops to the computational graph.

        Hint: Use sparse_softmax_cross_entropy_with_logits
        Hint: Remember to add l2_loss (see tf.nn.l2_loss)
        Args:
            logits: tensor(num_nodes, output_size)
            labels: python list, len = num_nodes
        Returns:
            loss: tensor 0-D
        """
        if self.root_loss is None:
            #construct once guard
            loss = None
            # YOUR CODE HERE
            l2_loss = self.config.l2 * tf.add_n(tf.get_collection("l2_loss"))
            objective_loss = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels))
            loss = objective_loss + l2_loss
            tf.summary.scalar("root_loss_l2", l2_loss)
            tf.summary.scalar("root_loss_objective", tf.reduce_sum(objective_loss))
            tf.summary.scalar("root_loss_total", loss)
            self.root_loss = loss
        # END YOUR CODE
        return self.root_loss


    def training(self, loss):
        """Sets up the training Ops.

        Creates an optimizer and applies the gradients to all trainable variables.
        The Op returned by this function is what must be passed to the
        `sess.run()` call to cause the model to train. See

        https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer

        for more information.

        Hint: Use tf.train.GradientDescentOptimizer for this model.
                Calling optimizer.minimize() will return a train_op object.

        Args:
            loss: tensor 0-D
        Returns:
            train_op: tensorflow op for training.
        """
        if self.training_op is None:
        # YOUR CODE HERE
            optimizer = tf.train.AdamOptimizer(self.config.lr)#tf.train.GradientDescentOptimizer(self.config.lr)
            #optimizer = tf.train.AdamOptimizer(self.config.lr)
            self.training_op = optimizer.minimize(loss)
        # END YOUR CODE
        return self.training_op

    def predictions(self, y):
        """Returns predictions from sparse scores

        Args:
            y: tensor(?, label_size)
        Returns:
            predictions: tensor(?,1)
        """
        if self.prediction is None:
        # YOUR CODE HERE
            self.prediction = tf.argmax(y, dimension=1)
        # END YOUR CODE
        return self.prediction

    def build_feed_dict(self, in_node):
        nodes_list = []
        tr.leftTraverse(in_node, lambda node, args: args.append(node), nodes_list)
        node_to_index = OrderedDict()
        for idx, i in enumerate(nodes_list):
            node_to_index[i] = idx

        feed_dict = {
          self.is_a_leaf   : [ n.isLeaf for n in nodes_list ],
          self.left_child  : [ node_to_index[n.left] if not n.isLeaf else -1 for n in nodes_list ],
          self.right_child : [ node_to_index[n.right] if not n.isLeaf else -1 for n in nodes_list ],
          self.word_index  : [ self.vocab.encode(n.word) if n.word else -1 for n in nodes_list ],
          self.labelholder : [ n.label for n in nodes_list ]
        }
        return feed_dict

    def predict(self, trees, weights_path, get_loss = False):
        """Make predictions from the provided model."""


        results = []
        losses = []

        logits = self.root_logits_op()
        #evaluation is based upon the root node
        root_loss = self.loss_op(logits=logits, labels=self.labelholder[-1:])
        root_prediction_op = self.root_prediction_op()
        with tf.Session() as sess:
            saver = tf.train.Saver()
            saver.restore(sess, weights_path)
            for t in  trees:
                feed_dict = self.build_feed_dict(t.root)
                if get_loss:
                    root_prediction, loss = sess.run([root_prediction_op, root_loss], feed_dict=feed_dict)
                    losses.append(loss)
                    results.append(root_prediction)
                else:
                    root_prediction = sess.run(root_prediction_op, feed_dict=feed_dict)
                    results.append(root_prediction)
        return results, losses

    #need to rework this: (OP creation needs to be made independent of using OPs)
    def run_epoch(self, new_model = False, verbose=True, epoch=0):
        loss_history = []
        random.shuffle(self.train_data)
        
        with tf.Session() as sess:
            if new_model:
                add_model_op = self.add_model()
                logits = self.logits_op()
                loss = self.full_loss_op(logits=logits, labels=self.labelholder)
                train_op = self.training(loss)
                init = tf.global_variables_initializer()
                sess.run(init)
            else:
                saver = tf.train.Saver()
                saver.restore(sess, './weights/%s.temp'%self.config.model_name)
                logits = self.logits_op()
                loss = self.full_loss_op(logits=logits, labels=self.labelholder)
                train_op = self.training(loss)

            for step, tree in enumerate(self.train_data):
                feed_dict = self.build_feed_dict(tree.root)
                loss_value, _ = sess.run([loss, train_op], feed_dict=feed_dict)
                loss_history.append(loss_value)
                if verbose:
                    sys.stdout.write('\r{} / {} :    loss = {}'.format(
                            step+1, len(self.train_data), np.mean(loss_history)))
                    sys.stdout.flush()
            saver = tf.train.Saver()
            if not os.path.exists("./weights"):
                os.makedirs("./weights")

            #print('./weights/%s.temp'%self.config.model_name)
            saver.save(sess, './weights/%s.temp'%self.config.model_name)
        train_preds, _ = self.predict(self.train_data, './weights/%s.temp'%self.config.model_name)
        val_preds, val_losses = self.predict(self.dev_data, './weights/%s.temp'%self.config.model_name, get_loss=True)
        train_labels = [t.root.label for t in self.train_data]
        val_labels = [t.root.label for t in self.dev_data]
        train_acc = np.equal(train_preds, train_labels).mean()
        val_acc = np.equal(val_preds, val_labels).mean()
        print()
        print('Training acc (only root node): {}'.format(train_acc))
        print('Valiation acc (only root node): {}'.format(val_acc))
        print(self.make_conf(train_labels, train_preds))
        print(self.make_conf(val_labels, val_preds))
        return train_acc, val_acc, loss_history, np.mean(val_losses)

    def train(self, verbose=True):
        complete_loss_history = []
        train_acc_history = []
        val_acc_history = []
        prev_epoch_loss = float('inf')
        best_val_loss = float('inf')
        best_val_epoch = 0
        stopped = -1
        for epoch in range(self.config.max_epochs):
            print('epoch %d'%epoch)
            if epoch==0:
                train_acc, val_acc, loss_history, val_loss = self.run_epoch(new_model=True, epoch=epoch)
            else:
                train_acc, val_acc, loss_history, val_loss = self.run_epoch(epoch=epoch)
            complete_loss_history.extend(loss_history)
            train_acc_history.append(train_acc)
            val_acc_history.append(val_acc)

            #lr annealing
            epoch_loss = np.mean(loss_history)
            if epoch_loss>prev_epoch_loss*self.config.anneal_threshold:
                self.config.lr/=self.config.anneal_by
                print('annealed lr to %f'%self.config.lr)
            prev_epoch_loss = epoch_loss

            #save if model has improved on val
            if val_loss < best_val_loss:
                 shutil.copyfile('./weights/%s.temp'%self.config.model_name, './weights/%s'%self.config.model_name)
                 best_val_loss = val_loss
                 best_val_epoch = epoch

            # if model has not imprvoved for a while stop
            if epoch - best_val_epoch > self.config.early_stopping:
                stopped = epoch
                #break
        if verbose:
                sys.stdout.write('\r')
                sys.stdout.flush()

        print('\n\nstopped at %d\n'%stopped)
        return {
            'loss_history': complete_loss_history,
            'train_acc_history': train_acc_history,
            'val_acc_history': val_acc_history,
            }

    def make_conf(self, labels, predictions):
        confmat = np.zeros([2, 2])
        for l,p in zip(labels, predictions):
            confmat[l, p] += 1
        return confmat
Example #47
0
class RNNLM_Model(LanguageModel):

  def load_data(self, debug=False):
    """Loads starter word-vectors and train/dev/test data."""
    self.vocab = Vocab()
    self.vocab.construct(get_ptb_dataset('train'))
    self.encoded_train = np.array(
        [self.vocab.encode(word) for word in get_ptb_dataset('train')],
        dtype=np.int32)
    self.encoded_valid = np.array(
        [self.vocab.encode(word) for word in get_ptb_dataset('valid')],
        dtype=np.int32)
    self.encoded_test = np.array(
        [self.vocab.encode(word) for word in get_ptb_dataset('test')],
        dtype=np.int32)
    if debug:
      num_debug = 1024
      self.encoded_train = self.encoded_train[:num_debug]
      self.encoded_valid = self.encoded_valid[:num_debug]
      self.encoded_test = self.encoded_test[:num_debug]

  def add_placeholders(self):
    """Generate placeholder variables to represent the input tensors

    These placeholders are used as inputs by the rest of the model building
    code and will be fed data during training.  Note that when "None" is in a
    placeholder's shape, it's flexible

    Adds following nodes to the computational graph.
    (When None is in a placeholder's shape, it's flexible)

    input_placeholder: Input placeholder tensor of shape
                       (None, num_steps), type tf.int32
    labels_placeholder: Labels placeholder tensor of shape
                        (None, num_steps), type tf.float32
    dropout_placeholder: Dropout value placeholder (scalar),
                         type tf.float32

    Add these placeholders to self as the instance variables
  
      self.input_placeholder
      self.labels_placeholder
      self.dropout_placeholder

    (Don't change the variable names)
    """
    ### YOUR CODE HERE
    self.input_placeholder = tf.placeholder(tf.int32, shape=(None, self.config.num_steps))
    self.labels_placeholder = tf.placeholder(tf.int32, shape=(None, self.config.num_steps))
    self.dropout_placeholder = tf.placeholder(tf.float32, shape=None)
    ### END YOUR CODE
  
  def add_embedding(self):
    """Add embedding layer.

    Hint: This layer should use the input_placeholder to index into the
          embedding.
    Hint: You might find tf.nn.embedding_lookup useful.
    Hint: You might find tf.split, tf.squeeze useful in constructing tensor inputs
    Hint: Check the last slide from the TensorFlow lecture.
    Hint: Here are the dimensions of the variables you will need to create:

      L: (len(self.vocab), embed_size)

    Returns:
      inputs: List of length num_steps, each of whose elements should be
              a tensor of shape (batch_size, embed_size).
    """
    # The embedding lookup is currently only implemented for the CPU
    with tf.device('/cpu:0'):
      ### YOUR CODE HERE
      L = tf.Variable(tf.random_uniform([len(self.vocab), self.config.embed_size], -1.0, 1.0), name="L")
      # Shape of input_placeholder : (batch_size, num_steps)
      # Shape of embed : (num_steps, batch_size, embed_size)
      embed = tf.nn.embedding_lookup(L, tf.transpose(self.input_placeholder, perm=[1,0]))
      inputs = [tf.squeeze(ts, [0]) for ts in tf.split(0, self.config.num_steps, embed)]
      ### END YOUR CODE
      return inputs

  def add_projection(self, rnn_outputs):
    """Adds a projection layer.

    The projection layer transforms the hidden representation to a distribution
    over the vocabulary.

    Hint: Here are the dimensions of the variables you will need to
          create 
          
          U:   (hidden_size, len(vocab))
          b_2: (len(vocab),)

    Args:
      rnn_outputs: List of length num_steps, each of whose elements should be
                   a tensor of shape (batch_size, hidden_size(LIBIN edited)).
    Returns:
      outputs: List of length num_steps, each a tensor of shape
               (batch_size, len(vocab))
    """
    ### YOUR CODE HERE
    with tf.variable_scope("projection", initializer = xavier_weight_init(), reuse=None):
        U = tf.get_variable("U", shape=(self.config.hidden_size, len(self.vocab)))
        b2 = tf.get_variable("b2", shape=(len(self.vocab), ))
    
    outputs = [tf.matmul(ts, U) + b2 for ts in rnn_outputs]
    ### END YOUR CODE
    return outputs

  def add_loss_op(self, output):
    """Adds loss ops to the computational graph.

    Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. 
          Check https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/seq2seq.py

    Args:
      output: A tensor of shape (None, self.vocab)  (LIBIN : not used)
    Returns:
      loss: A 0-d tensor (scalar)
    """
    ### YOUR CODE HERE
    # output shape  : [num_steps * (batch_size, len(self.vocab))]
    # targets shape : [num_steps * (batch_size, )]
    # weights shape : [num_steps * (batch_size, )]
    targets = [tf.squeeze(ts,[1]) for ts in tf.split(1, self.config.num_steps, self.labels_placeholder)]
    weights = [tf.ones((self.config.batch_size, )) for step in xrange(self.config.num_steps)]
    loss = sequence_loss(output, targets, weights)
    ### END YOUR CODE
    return loss

  def add_training_op(self, loss):
    """Sets up the training Ops.

    Creates an optimizer and applies the gradients to all trainable variables.
    The Op returned by this function is what must be passed to the
    `sess.run()` call to cause the model to train. See 

    https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer

    for more information.

    Hint: Use tf.train.AdamOptimizer for this model.
          Calling optimizer.minimize() will return a train_op object.

    Args:
      loss: Loss tensor, from cross_entropy_loss.
    Returns:
      train_op: The Op for training.
    """
    ### YOUR CODE HERE
    optimizer = tf.train.AdamOptimizer(learning_rate=self.config.lr, beta1=0.9, beta2=0.999, epsilon=1e-08, use_locking=False)
    train_op = optimizer.minimize(loss)
    ### END YOUR CODE
    return train_op
  
  def __init__(self, config):
    self.config = config
    self.load_data(debug=False)
    self.add_placeholders()
    self.inputs = self.add_embedding()
    self.rnn_outputs = self.add_model(self.inputs)
    self.outputs = self.add_projection(self.rnn_outputs)
    
    #print self.outputs
    #print tf.concat(1, self.outputs)
  
    # We want to check how well we correctly predict the next word
    # We cast o to float64 as there are numerical issues at hand
    # (i.e. sum(output of softmax) = 1.00000298179 and not 1)
    self.predictions = [tf.nn.softmax(tf.cast(o, 'float64')) for o in self.outputs]
    # Reshape the output into len(vocab) sized chunks - the -1 says as many as
    # needed to evenly divide
    # Libin : output not used
    output = tf.reshape(tf.concat(1, self.outputs), [-1, len(self.vocab)])
    # output is a single long sequence tensor concatenated
    # orderly by all short squences in current batch.
    # Each element in output is a tensor of size self.vocab which gives the probability
    # distribution of current word
    
    #print output
    #raw_input()
    
    self.calculate_loss = self.add_loss_op(self.outputs)
    self.train_step = self.add_training_op(self.calculate_loss)


  def add_model(self, inputs):
    """Creates the RNN LM model.

    In the space provided below, you need to implement the equations for the
    RNN LM model. Note that you may NOT use built in rnn_cell functions from
    tensorflow.

    Hint: Use a zeros tensor of shape (batch_size, hidden_size) as
          initial state for the RNN. Add this to self as instance variable

          self.initial_state
  
          (Don't change variable name)
    Hint: Add the last RNN output to self as instance variable

          self.final_state

          (Don't change variable name)
    Hint: Make sure to apply dropout to the inputs and the outputs.
    Hint: Use a variable scope (e.g. "RNN") to define RNN variables.
    Hint: Perform an explicit for-loop over inputs. You can use
          scope.reuse_variables() to ensure that the weights used at each
          iteration (each time-step) are the same. (Make sure you don't call
          this for iteration 0 though or nothing will be initialized!)
    Hint: Here are the dimensions of the various variables you will need to
          create:
      
          H: (hidden_size, hidden_size) 
          I: (embed_size, hidden_size)
          b_1: (hidden_size,)

    Args:
      inputs: List of length num_steps, each of whose elements should be
              a tensor of shape (batch_size, embed_size).
    Returns:
      outputs: List of length num_steps, each of whose elements should be
               a tensor of shape (batch_size, hidden_size)
    """
    ### YOUR CODE HERE
    rnn_outputs = []
    
    self.initial_state = tf.zeros([self.config.batch_size, self.config.hidden_size])
    
    with tf.variable_scope("RNN", initializer=xavier_weight_init(), reuse=None):
        H = tf.get_variable("H", shape=(self.config.hidden_size, self.config.hidden_size))
        I = tf.get_variable("I", shape=(self.config.embed_size, self.config.hidden_size))
        b1 = tf.get_variable("b1", shape=(self.config.hidden_size, ))
    
    prev_h = self.initial_state
    
    for step_input in inputs:
        step_input = tf.nn.dropout(step_input, self.dropout_placeholder)
        prev_h = tf.sigmoid(tf.matmul(prev_h, H) + tf.matmul(step_input, I) + b1)
        #prev_h = tf.nn.dropout(prev_h, self.dropout_placeholder)
        rnn_outputs.append(prev_h)

    self.final_state = prev_h
    ### END YOUR CODE
    return rnn_outputs


  def run_epoch(self, session, data, train_op=None, verbose=10):
    config = self.config
    dp = config.dropout
    if not train_op:
      train_op = tf.no_op()
      dp = 1
    total_steps = sum(1 for x in ptb_iterator(data, config.batch_size, config.num_steps))
    total_loss = []
    state = self.initial_state.eval()
    for step, (x, y) in enumerate(
      ptb_iterator(data, config.batch_size, config.num_steps)):
      # We need to pass in the initial state and retrieve the final state to give
      # the RNN proper history
      feed = {self.input_placeholder: x,
              self.labels_placeholder: y,
              self.initial_state: state,
              self.dropout_placeholder: dp}
      loss, state, _ = session.run(
          [self.calculate_loss, self.final_state, train_op], feed_dict=feed)
      total_loss.append(loss)
      if verbose and step % verbose == 0:
          # The derivation of pp can be checked in question Q3-(a)
          sys.stdout.write('\r{} / {} : pp = {}'.format(
              step, total_steps, np.exp(np.mean(total_loss))))
          sys.stdout.flush()
    if verbose:
      sys.stdout.write('\r')
    return np.exp(np.mean(total_loss))
Example #48
0
File: RNN.py Project: zbxzc35/RNN-2
class RNNLM_Model(LanguageModel):

  def load_data(self, debug=False):
    """Loads starter word-vectors and train/dev/test data."""
    self.vocab = Vocab()
    self.vocab.construct(get_ptb_dataset('train'))
    self.encoded_train = np.array(
        [self.vocab.encode(word) for word in get_ptb_dataset('train')],
        dtype=np.int32)
    self.encoded_valid = np.array(
        [self.vocab.encode(word) for word in get_ptb_dataset('valid')],
        dtype=np.int32)
    #self.encoded_test = np.array(
        #[self.vocab.encode(word) for word in get_ptb_dataset('test')],
        #dtype=np.int32)
    if debug:
      num_debug = 1024
      self.encoded_train = self.encoded_train[:num_debug]#读入训练数据
      self.encoded_valid = self.encoded_valid[:num_debug]
      self.encoded_test = self.encoded_test[:num_debug]

  def add_placeholders(self):
    
    self.input_placeholder = tf.placeholder(tf.int32, (None, self.config.num_steps))
    self.labels_placeholder = tf.placeholder(tf.float32, (None, self.config.num_steps))
    self.dropout_placeholder = tf.placeholder(tf.float32)

  
  def add_embedding(self):#将one-hot转化为词向量
    
    inputs = []
    with tf.device('/cpu:0'):
      L = tf.get_variable("Embedding", (len(self.vocab), self.config.embed_size))
      tensors = tf.nn.embedding_lookup(L, self.input_placeholder)
      split_tensors = tf.split(1, self.config.num_steps, tensors)
    
      for tensor in split_tensors:

        inputs.append(tf.squeeze(tensor, [1]))
      return inputs#返回的是一个list

  def add_projection(self, rnn_outputs):#把隐藏层转化为词语
    
    with tf.variable_scope("projection"):
      U=tf.get_variable("U",shape=(self.config.hidden_size,len(self.vocab)))
      b_2=tf.get_variable("b_2",shape=(len(self.vocab),))
    outputs=[tf.matmul(x,U)+b_2 for x in rnn_outputs]###softmax?
    

    return outputs

  def add_loss_op(self, output):#计算损失函数
    
    loss = sequence_loss([output], [tf.reshape(self.labels_placeholder, [-1])], [tf.ones([self.config.batch_size * self.config.num_steps])])

    return loss

  def add_training_op(self, loss):#对损失函数进行优化
    
    optimizer=tf.train.AdamOptimizer(self.config.lr)
    train_op=optimizer.minimize(loss)
    return train_op
  
  def __init__(self, config):
    self.config = config
    self.load_data(debug=False)
    self.add_placeholders()
    self.inputs = self.add_embedding()
    self.rnn_outputs = self.add_model(self.inputs)
    self.outputs = self.add_projection(self.rnn_outputs)
    self.predictions = [tf.nn.softmax(tf.cast(o, 'float64')) for o in self.outputs]
    output = tf.reshape(tf.concat(1, self.outputs), [-1, len(self.vocab)])
    self.calculate_loss = self.add_loss_op(output)
    self.train_step = self.add_training_op(self.calculate_loss)


  def add_model(self, inputs):
    
    hidden_size=self.config.hidden_size
    embed_size=self.config.embed_size
    batch_size=self.config.batch_size
    with tf.variable_scope("RNN"):
      H=tf.get_variable("H",shape=(hidden_size,hidden_size))
      I=tf.get_variable("I",shape=(embed_size,hidden_size))
      b_1=tf.get_variable("b_1",shape=(hidden_size,))
    self.initial_state=tf.zeros([batch_size,hidden_size])
    pre_h=self.initial_state
    rnn_outputs=[]
    for step in inputs:
      step=tf.nn.dropout(step,self.dropout_placeholder)
      pre_h=tf.sigmoid(tf.matmul(pre_h,H)+tf.matmul(step,I)+b_1)
      rnn_outputs.append(tf.nn.dropout(pre_h,self.dropout_placeholder))
    self.final_state=pre_h
    return rnn_outputs


  def run_epoch(self, session, data, train_op=None, verbose=10):
    config = self.config
    dp = config.dropout
    if not train_op:
      train_op = tf.no_op()
      dp = 1
    total_steps = sum(1 for x in ptb_iterator(data, config.batch_size, config.num_steps))#总的迭代次数
    total_loss = []
    state = self.initial_state.eval()
    for step, (x, y) in enumerate(
      ptb_iterator(data, config.batch_size, config.num_steps)):
      feed = {self.input_placeholder: x,
              self.labels_placeholder: y,
              self.initial_state: state,
              self.dropout_placeholder: dp}
      loss, state, _ = session.run(
          [self.calculate_loss, self.final_state, train_op], feed_dict=feed)
      total_loss.append(loss)
      if verbose and step % verbose == 0:
          sys.stdout.write('\r{} / {} : pp = {}'.format(
              step, total_steps, np.exp(np.mean(total_loss))))
          sys.stdout.flush()
    if verbose:
      sys.stdout.write('\r')
    return np.exp(np.mean(total_loss))