Beispiel #1
0
    def __init__(self, config, model):
        self.num_layers = 1
        self.input_dim = config.embedding_dim
        self.model = model
        self.use_char_rnn = config.use_char_rnn

        self.char_rnn = CharRNN(config, model) if self.use_char_rnn else None
        input_size = self.input_dim if not self.char_rnn else self.input_dim + config.charlstm_hidden_dim
        self.bilstm = dy.BiRNNBuilder(1, input_size, config.hidden_dim,
                                      self.model, dy.LSTMBuilder)
        print("Input to word-level BiLSTM size: %d" % (input_size))
        print("BiLSTM hidden size: %d" % (config.hidden_dim))
        # self.bilstm.set_dropout(config.dropout_bilstm)
        self.num_labels = len(config.label2idx)
        self.label2idx = config.label2idx
        self.labels = config.idx2labels
        # print(config.hidden_dim)

        # self.tanh_w = self.model.add_parameters((config.tanh_hidden_dim, config.hidden_dim))
        # self.tanh_bias = self.model.add_parameters((config.tanh_hidden_dim,))

        self.linear_w = self.model.add_parameters(
            (self.num_labels, config.hidden_dim))
        self.linear_bias = self.model.add_parameters((self.num_labels, ))

        self.transition = self.model.add_lookup_parameters(
            (self.num_labels, self.num_labels))
        vocab_size = len(config.word2idx)
        self.word2idx = config.word2idx
        print("Word Embedding size: %d x %d" % (vocab_size, self.input_dim))
        self.word_embedding = self.model.add_lookup_parameters(
            (vocab_size, self.input_dim), init=config.word_embedding)

        self.dropout = config.dropout
Beispiel #2
0
def main(config):
    words, word_id_map, poems_id_vector, id_word_map = process_poems(config.file_name, start_token='S', end_token='E')
    generate_batches = generate_batch(config.batch_size, poems_id_vector, word_id_map)
    with tf.Session() as sess:
        model = CharRNN(sess, config.epoch_size, config.num_layers, config.batch_size, config.learning_rate,
                    len(words)+1, config.rnn_size, generate_batches, config.checkpoint_dir, False)
        model.train()
    def __init__(self, config, model, mask):
        self.num_layers = 1
        self.input_dim = config.embedding_dim
        self.model = model
        self.use_char_rnn = config.use_char_rnn

        self.char_rnn = CharRNN(config, model) if self.use_char_rnn else None
        input_size = self.input_dim if not self.char_rnn else self.input_dim + config.charlstm_hidden_dim
        self.bilstm = dy.BiRNNBuilder(1, input_size, config.hidden_dim,
                                      self.model, dy.LSTMBuilder)
        print("Input to word-level BiLSTM size: %d" % (input_size))
        print("BiLSTM hidden size: %d" % (config.hidden_dim))
        # self.bilstm.set_dropout(config.dropout_bilstm)
        self.num_labels = len(config.label2idx)
        self.label2idx = config.label2idx
        self.labels = config.idx2labels
        # print(config.hidden_dim)

        self.linear_w = self.model.add_parameters(
            (self.num_labels, config.hidden_dim))
        self.linear_bias = self.model.add_parameters((self.num_labels, ))

        trans_np = np.random.rand(self.num_labels, self.num_labels)
        trans_np[self.label2idx[START], :] = -1e10
        trans_np[:, self.label2idx[STOP]] = -1e10
        self.init_iobes_constraint(trans_np)

        # print(trans_np)

        self.transition = self.model.add_lookup_parameters(
            (self.num_labels, self.num_labels), init=trans_np)
        vocab_size = len(config.word2idx)
        self.word2idx = config.word2idx
        print("Word Embedding size: %d x %d" % (vocab_size, self.input_dim))
        self.word_embedding = self.model.add_lookup_parameters(
            (vocab_size, self.input_dim), init=config.word_embedding)

        # self.mask_tensor = [ self.model.add_lookup_parameters((vocab_size, self.input_dim), init=config.word_embedding) for inst_mask in mask ]
        self.mask = mask
        # for inst_mask in mask:
        #     print(inst_mask)
        self.dropout = config.dropout
def train(args):
    # load data
    text, vocab_size, mapping = load_data('transcription_train.txt')
    test = load_test_file('transcription_test.txt')
    # Dump few states to use in generation
    dump([vocab_size, args.hidden_size, args.embedding_dim, args.n_layers],
         open('state_vars.pkl', 'wb'))

    my_net = CharRNN(hidden_size=args.hidden_size,
                     embedding_dim=args.embedding_dim,
                     output_size=vocab_size,
                     n_layers=args.n_layers)  # Create the network,
    loss_fn = torch.nn.CrossEntropyLoss()  # loss function / optimizer
    optim = torch.optim.Adam(my_net.parameters(), lr=args.learning_rate)

    if torch.cuda.is_available():
        # Move the network and the optimizer to the GPU
        my_net = my_net.cuda()
        loss_fn = loss_fn.cuda()

    loss_avg = 0
    prev_ppl = 1000000
    for epoch in range(1, args.n_epochs + 1):
        start_time = timer()
        loss = train_batch(my_net, optim, loss_fn, args,
                           *train_set(args, text, len(text), mapping))
        loss_avg += loss
        if epoch % args.print_every == 0:
            val_loss = evaluate(my_net, loss_fn, mapping, test)
            ppl = math.exp(val_loss)
            print(
                "Epoch {} : Training Loss: {:.5f}, Test ppl: {:.5f}, Time elapsed {:.2f} mins"
                .format(epoch, loss, ppl, (timer() - start_time) / 60))
            if ppl < prev_ppl:
                prev_ppl = ppl
                torch.save(my_net.state_dict(), 'bestModel.t7')
                print("Perplexity reduced, saving model !!")
    return my_net
Beispiel #5
0
print("\ntrain/dev/test size: {:d}/{:d}/{:d}\n".format(len(train_y),
                                                       len(dev_y),
                                                       len(test_y)))

with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():

        # Instantiate our model
        rnn = CharRNN(vocabulary_size,
                      FLAGS.sentence_length,
                      FLAGS.batch_size,
                      2,
                      embedding_size=FLAGS.embedding_dim,
                      hidden_dim=FLAGS.hidden_dim,
                      num_layers=FLAGS.num_layers,
                      loss=FLAGS.loss_type)

        # Generate input batches (using tensorflow)
        with tf.variable_scope("input"):
            placeholder_x = tf.placeholder(tf.int32, train_x.shape)
            placeholder_y = tf.placeholder(tf.float32, train_y.shape)
            train_x_var = tf.Variable(placeholder_x,
                                      trainable=False,
                                      collections=[])
            train_y_var = tf.Variable(placeholder_y,
                                      trainable=False,
                                      collections=[])
            x_slice, y_slice = tf.train.slice_input_producer(
Beispiel #6
0
import theano
import theano.tensor as T
import sys
import random
###############################
#
#  Prepare the data
#
###############################

# f = open("../data/reuters21578/reut2-002.sgm")
f = open("../data/tinyshakespeare/input.txt")
text = f.read()
f.close()

rnn = CharRNN()

seq_len = 150


def train(eta, iters):
    for it in xrange(iters):
        i = random.randint(0, len(text) / seq_len)
        j = i * seq_len

        X = text[j:(j + seq_len)]
        Y = text[(j + 1):(j + 1 + seq_len)]

        print "iteration: %s, cost: %s" % (
            str(it), str(rnn.train(one_hot(X), one_hot(Y), eta, 1.0)))
Beispiel #7
0
        _, hidden = model(primer[:, p], hidden)
    input = primer[:, -1]
    predicted = in_text
    # generate a fixed number of characters, to generate indefinite string replace this with while loop
    for _ in range(n_chars):
        # predict character
        y, hidden = model(input, hidden)
        _, yhat = y.max(dim=1)
        yhat = yhat.data.cpu()[0]
        char = reverse_mapping[yhat]
        predicted += char
        input = get_encoded_sequence(char, mapping)
    return predicted


vocab_size, hidden_size, embedding_dim, n_layers = load(open('state_vars.pkl', 'rb'))
# load the model
model = CharRNN(hidden_size=hidden_size, embedding_dim=embedding_dim,
                output_size=vocab_size, n_layers=n_layers)
if torch.cuda.is_available():
    model = model.cuda()
model.load_state_dict(torch.load('model.t7'))
# load the mapping
mapping = load(open('mapping.pkl', 'rb'))
reverse_mapping = load(open('reverse_mapping.pkl', 'rb'))

# Generate few sentences
print(generate_seq(model, mapping, reverse_mapping, 'టాలు కూడా వ', 3000))
print(generate_seq(model, mapping, reverse_mapping, 'కదా అంత మంచ', 3000))
print(generate_seq(model, mapping, reverse_mapping, 'ును అందుకని', 3000))
Beispiel #8
0
class Partial_Perceptron:
    def __init__(self, config, model):
        self.num_layers = 1
        self.input_dim = config.embedding_dim
        self.model = model
        self.use_char_rnn = config.use_char_rnn

        self.char_rnn = CharRNN(config, model) if self.use_char_rnn else None
        input_size = self.input_dim if not self.char_rnn else self.input_dim + config.charlstm_hidden_dim
        self.bilstm = dy.BiRNNBuilder(1, input_size, config.hidden_dim,
                                      self.model, dy.LSTMBuilder)
        print("Input to word-level BiLSTM size: %d" % (input_size))
        print("BiLSTM hidden size: %d" % (config.hidden_dim))
        # self.bilstm.set_dropout(config.dropout_bilstm)
        self.num_labels = len(config.label2idx)
        self.label2idx = config.label2idx
        self.labels = config.idx2labels
        # print(config.hidden_dim)
        self.o_id = self.label2idx["O"]
        self.linear_w = self.model.add_parameters(
            (self.num_labels, config.hidden_dim))
        self.linear_bias = self.model.add_parameters((self.num_labels, ))

        trans_np = np.random.rand(self.num_labels, self.num_labels)
        trans_np[self.label2idx[START], :] = -1e10
        trans_np[:, self.label2idx[STOP]] = -1e10
        self.init_iobes_constraint(trans_np)

        # print(trans_np)

        self.transition = self.model.add_lookup_parameters(
            (self.num_labels, self.num_labels), init=trans_np)
        vocab_size = len(config.word2idx)
        self.word2idx = config.word2idx
        print("Word Embedding size: %d x %d" % (vocab_size, self.input_dim))
        self.word_embedding = self.model.add_lookup_parameters(
            (vocab_size, self.input_dim), init=config.word_embedding)

        self.dropout = config.dropout

    def init_iobes_constraint(self, trans_np):
        for l1 in range(self.num_labels):
            ##previous label
            if l1 == self.label2idx[START] or l1 == self.label2idx[STOP]:
                continue
            for l2 in range(self.num_labels):
                ##next label
                if l2 == self.label2idx[START] or l2 == self.label2idx[STOP]:
                    continue
                if not check_bies_constraint(self.labels[l1], self.labels[l2]):
                    trans_np[l2, l1] = -1e10

    def build_graph_with_char(self, x, all_chars, is_train):

        if is_train:
            embeddings = []
            for w, chars in zip(x, all_chars):
                word_emb = self.word_embedding[w]
                f, b = self.char_rnn.forward_char(chars)
                concat = dy.concatenate([word_emb, f, b])
                embeddings.append(dy.dropout(concat, self.dropout))

        else:
            embeddings = []
            for w, chars in zip(x, all_chars):
                word_emb = self.word_embedding[w]
                f, b = self.char_rnn.forward_char(chars)
                concat = dy.concatenate([word_emb, f, b])
                embeddings.append(concat)
        lstm_out = self.bilstm.transduce(embeddings)
        features = [
            dy.affine_transform([self.linear_bias, self.linear_w, rep])
            for rep in lstm_out
        ]
        return features

    # computing the negative log-likelihood
    def build_graph(self, x, is_train):
        # dy.renew_cg()
        if is_train:
            embeddings = [
                dy.dropout(self.word_embedding[w], self.dropout) for w in x
            ]
        else:
            embeddings = [self.word_embedding[w] for w in x]
        lstm_out = self.bilstm.transduce(embeddings)
        features = [
            dy.affine_transform([self.linear_bias, self.linear_w, rep])
            for rep in lstm_out
        ]
        return features

    def forward_unlabeled(self, features, output):
        init_alphas = [-1e10] * self.num_labels
        init_alphas[self.label2idx[START]] = 0

        for_expr = dy.inputVector(init_alphas)
        for pos, obs in enumerate(features):
            alphas_t = []
            if output[pos] != self.o_id:
                for next_tag in range(self.num_labels):
                    obs_broadcast = dy.concatenate([dy.pick(obs, next_tag)] *
                                                   self.num_labels)
                    next_tag_expr = for_expr + self.transition[
                        next_tag] + obs_broadcast if pos == 0 or output[
                            pos - 1] != self.o_id else for_expr + obs_broadcast
                    alphas_t.append(max_score(next_tag_expr))
                for_expr = dy.concatenate(alphas_t)
            # for_expr = dy.max_dim(alphas_t)
            # dy.emax()
        terminal_expr = for_expr + self.transition[
            self.label2idx[STOP]] if output[-1] != self.o_id else for_expr
        alpha = max_score(terminal_expr)
        return alpha

    # def forward_labeled(self, id, features, output):
    #     init_alphas = [-1e10] * self.num_labels
    #     init_alphas[self.label2idx[START]] = 0
    #
    #     for_expr = dy.inputVector(init_alphas)
    #     for pos, obs in enumerate(features):
    #         alphas_t = []
    #         if output[pos] == self.o_id:
    #             for next_tag in range(self.num_labels):
    #                 next_tag_expr = for_expr
    #                 alphas_t.append(max_score(next_tag_expr))
    #         else:
    #             for next_tag in range(self.num_labels):
    #                 if next_tag != output[pos]:
    #                     next_tag_expr = for_expr + dy.inputVector([-1e10] * self.num_labels)
    #                 else:
    #                     obs_broadcast = dy.concatenate([dy.pick(obs, next_tag)] * self.num_labels)
    #                     next_tag_expr = for_expr + self.transition[next_tag] + obs_broadcast
    #                 alphas_t.append(max_score(next_tag_expr))
    #         for_expr = dy.concatenate(alphas_t)
    #         # for_expr = dy.max_dim(alphas_t)
    #         # dy.emax()
    #     terminal_expr = for_expr + self.transition[self.label2idx[STOP]]
    #     alpha = max_score(terminal_expr)
    #     return alpha

    # Labeled network score
    def forward_labeled(self, id, features, tags, is_prediction):
        score = dy.scalarInput(0)
        # tags = [self.label2idx[w] for w in tags]
        tags = [self.label2idx[START]] + tags
        is_prediction = [False] + is_prediction
        for i, obs in enumerate(features):
            # if tags[i+1] != self.o_id:
            if not is_prediction[i + 1]:
                score = score + dy.pick(self.transition[tags[
                    i + 1]], tags[i]) + dy.pick(obs, tags[
                        i + 1]) if not is_prediction[i] else score + dy.pick(
                            obs, tags[i + 1])
        if not is_prediction[-1]:
            labeled_score = score + dy.pick(
                self.transition[self.label2idx[STOP]], tags[-1])
        else:
            labeled_score = score
        return labeled_score

    def negative_log_bak(self, id, x, y, x_chars=None):
        features = self.build_graph(
            x, True) if not self.use_char_rnn else self.build_graph_with_char(
                x, x_chars, True)
        # features = self.build_graph(x, True)
        unlabed_score = self.forward_unlabeled(features, y)
        labeled_score = self.forward_labeled(id, features, y)
        return unlabed_score - labeled_score

    def negative_log(self, id, x, y, x_chars=None):
        features = self.build_graph(
            x, True) if not self.use_char_rnn else self.build_graph_with_char(
                x, x_chars, True)
        # features = self.build_graph(x, True)
        # unlabed_score = self.forward_unlabeled(features, y)
        is_prediction = [tag == self.o_id for tag in y]
        best_path, _ = self.viterbi_decoding(features)
        unlabeled_score = self.forward_labeled(id, features, best_path,
                                               is_prediction)
        labeled_score = self.forward_labeled(id, features, y, is_prediction)
        return unlabeled_score - labeled_score

    def viterbi_decoding(self, features):
        backpointers = []
        init_vvars = [-1e10] * self.num_labels
        init_vvars[
            self.label2idx[START]] = 0  # <Start> has all the probability
        for_expr = dy.inputVector(init_vvars)
        trans_exprs = [self.transition[idx] for idx in range(self.num_labels)]
        for obs in features:
            bptrs_t = []
            vvars_t = []
            for next_tag in range(self.num_labels):
                next_tag_expr = for_expr + trans_exprs[next_tag]
                next_tag_arr = next_tag_expr.npvalue()
                best_tag_id = np.argmax(next_tag_arr)
                bptrs_t.append(best_tag_id)
                vvars_t.append(dy.pick(next_tag_expr, best_tag_id))
            for_expr = dy.concatenate(vvars_t) + obs

            backpointers.append(bptrs_t)
        # Perform final transition to terminal
        terminal_expr = for_expr + trans_exprs[self.label2idx[STOP]]
        terminal_arr = terminal_expr.npvalue()
        best_tag_id = np.argmax(terminal_arr)
        path_score = dy.pick(terminal_expr, best_tag_id)
        # Reverse over the backpointers to get the best path
        best_path = [best_tag_id
                     ]  # Start with the tag that was best for terminal
        for bptrs_t in reversed(backpointers):
            best_tag_id = bptrs_t[best_tag_id]
            best_path.append(best_tag_id)
        start = best_path.pop()  # Remove the start symbol
        best_path.reverse()
        assert start == self.label2idx[START]
        # Return best path and best path's score
        return best_path, path_score

    def decode(self, x, x_chars=None):
        features = self.build_graph(
            x, False) if not self.use_char_rnn else self.build_graph_with_char(
                x, x_chars, False)
        # features = self.build_graph(x, False)
        best_path, path_score = self.viterbi_decoding(features)
        best_path = [self.labels[x] for x in best_path]
        # print(best_path)
        # print('path_score:', path_score.value())
        return best_path
Beispiel #9
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--char-dict', help='character list file location')
    parser.add_argument('--fwd-in', help='forward model file location')
    parser.add_argument('--bwd-in', help='backward model file location')
    parser.add_argument('--layers', type=int, default=NUM_LAYERS)
    parser.add_argument('--hid-dim', type=int, default=HIDDEN_DIM)
    parser.add_argument('--emb-dim', type=int, default=EMB_DIM)

    parser.add_argument('--anns', help='annotations')
    parser.add_argument('--contexts', help='contexts file location')
    parser.add_argument('--neg-samps', help='negative samples file location')
    parser.add_argument('--not-nltk',
                        action='store_true',
                        help='input was not tokenized using nltk')

    parser.add_argument('--output', help='output file location')

    args = parser.parse_args()

    with open(args.char_dict) as chars_f:
        char_dict = {c: i for i, c in enumerate(chars_f.read())}
    num_chars = len(char_dict)
    print(f'loaded {num_chars} characters')

    # load backup bases
    bases = {}
    with open(args.anns) as in_f:
        in_f.readline()  # header
        for l in in_f:
            # Annotation, Form, Bases, Semantic Affixes, Triv Affixes, Additional Segment Count, PAXOBS, Blend Type
            _, fullform, basess, _, _, _, _, _ = l.strip().split('\t')
            bases[fullform] = basess.split(' ')

    fmodel = CharRNN(num_chars,
                     args.emb_dim,
                     args.hid_dim,
                     n_layers=args.layers)
    fmodel.load_state_dict(torch.load(args.fwd_in))
    fmodel.eval()

    bmodel = CharRNN(num_chars,
                     args.emb_dim,
                     args.hid_dim,
                     n_layers=args.layers)
    bmodel.load_state_dict(torch.load(args.bwd_in))
    bmodel.eval()

    loss_crit = torch.nn.CrossEntropyLoss()

    sent_contexts_df = pd.read_csv(args.contexts, sep=SENT_CONTEXT_DELIM)
    neg_samps = pd.read_csv(args.neg_samps,
                            sep=NEGSAMP_DELIM).to_dict(orient='record')

    results = defaultdict(list)
    resultlog = []
    exceptions = 0
    for ix, row in tqdm(sent_contexts_df.iterrows()):
        neo = row['neologism']
        if neo not in bases:
            continue

        f_tru = bases[neo][0]
        b_tru = bases[neo][-1]
        instkey = (neo, f_tru, b_tru)

        negs = [x for x in neg_samps if x['FORM'] == neo]
        if len(negs) == 0:
            instres = (10000, 10000, 0, 0, 'CHECK', 'CHECK')
            results[instkey].append(instres)
            continue

        sent = '\\n' + row['sentence_context'] + '\\n'
        if not args.not_nltk:
            sent = nltk_clean(sent)
        tnes = rev(sent)

        # find the location for each bases's start
        start_loc = sent.find(MASK_TEXT)
        end_loc = tnes.find(rev(MASK_TEXT))

        sent_chars = [enc_c(c, char_dict) for c in sent[:start_loc]]
        tnes_chars = [enc_c(c, char_dict) for c in tnes[:end_loc]]

        # run each model on the input
        fwd_outs, f_hids = fmodel(torch.tensor(sent_chars).view(1, -1))
        f_out_last = fwd_outs[:, -1, :].view(
            1, 1, num_chars)  # needed for predicting first candidate char
        f_hid_last = f_hids[:, -1, :].view(args.layers, 1, args.hid_dim)

        bwd_outs, b_hids = bmodel(torch.tensor(tnes_chars).view(1, -1))
        b_out_last = bwd_outs[:, -1, :].view(1, 1, num_chars)
        b_hid_last = b_hids[:, -1, :].view(args.layers, 1, args.hid_dim)

        # evaluate loss on each candidate
        fcand_losses = {}
        bcand_losses = {}

        ftrg = [enc_c(c, char_dict) for c in f_tru]
        fcand_losses[f_tru] = conditioned_loss(ftrg, fmodel, loss_crit,
                                               f_hid_last, f_out_last)
        btrg = [enc_c(c, char_dict) for c in rev(b_tru)]
        bcand_losses[b_tru] = conditioned_loss(btrg, bmodel, loss_crit,
                                               b_hid_last, b_out_last)

        for n in negs:
            w = n['NEGATIVE']
            if n['PLACE'] == "PRE":
                if w not in fcand_losses:  # should always be the case
                    trg = [enc_c(c, char_dict) for c in w]
                    fcand_losses[w] = conditioned_loss(trg, fmodel, loss_crit,
                                                       f_hid_last, f_out_last)
            elif n['PLACE'] == "SUF":
                if w not in bcand_losses:  # should always be the case
                    trg = [enc_c(c, char_dict) for c in rev(w)]
                    bcand_losses[w] = conditioned_loss(trg, bmodel, loss_crit,
                                                       b_hid_last, b_out_last)
            else:
                raise Exception(f'unknown location value: {n["PLACE"]}')

        # complete from bases
        if f_tru not in fcand_losses:
            fcand_losses[f_tru] = 0.0
        if b_tru not in bcand_losses:
            bcand_losses[b_tru] = 0.0

        # rank
        ftnll = fcand_losses[f_tru]
        btnll = bcand_losses[b_tru]
        fnlls = sorted(fcand_losses.values())
        bnlls = sorted(bcand_losses.values())
        frank = fnlls.index(ftnll) + 1
        brank = bnlls.index(btnll) + 1
        instres = (frank, brank, fnlls[0], bnlls[0], len(fnlls), len(bnlls))
        instlog = (f'{ftnll:.3f}', f'{btnll:.3f}', str(frank), str(brank),
                   f'{fnlls[0]:.3f}', f'{bnlls[0]:.3f}', str(len(fnlls)),
                   str(len(bnlls)))
        results[instkey].append(instres)
        resultlog.append(instkey + instlog)

    for b, bs in bases.items():
        k = (b, bs[0], bs[-1])
        if k not in results:
            instres = (10000, 10000, 0, 0, 'CHECK', 'CHECK')
            results[k].append(instres)

    with open(args.output + '.log', 'w') as outf:
        outf.write(
            'form\tpref\tsuf\tpref nll\tsuf nll\tpref rank\tsuf rank\tpref min\tsuf min\t#prefs\t#sufs\n'
        )
        for res in resultlog:
            outf.write('\t'.join(res) + '\n')

    with open(args.output, 'w') as outf:
        outf.write(
            'Form\tPref\tSuf\tNULL\tBoth rank\tPref rank\tSuf rank\tpref max\tsuf max\t#prefs\t#sufs\n'
        )
        for k, resl in sorted(results.items()):
            mean_frank = np.average([r[0] for r in resl])
            mean_brank = np.average([r[1] for r in resl])
            both_rank = mean_frank * mean_brank
            mean_fmax = np.average([r[2] for r in resl])
            mean_bmax = np.average([r[3] for r in resl])
            assert len(set([r[4] for r in resl
                            ])) == 1, f'uneven prefix candidates in {k}'
            assert len(set([r[5] for r in resl
                            ])) == 1, f'uneven suffix candidates in {k}'
            outf.write(
                '\t'.join(k) +
                f'\t\t{both_rank:.1f}\t{mean_frank:.1f}\t{mean_brank:.1f}\t{mean_fmax:.3f}\t{mean_bmax:.3f}\t{resl[0][-2]}\t{resl[0][-1]}\n'
            )

    print(
        f'finished with {exceptions} unfound true values. reporting {len(resultlog)} results from {len(results)} blends.'
    )
Beispiel #10
0
    
    
    SYMBOL_TABLE = os.path.join('../saved_model', 'vocab.sym')
    if args.type and os.path.exists(SYMBOL_TABLE):
         all_characters = list(set(open(SYMBOL_TABLE).read()))
    else:
         file = open(args.filename).read()
         print('Loaded file', args.filename)
         print('File length', len(file)/80, 'lines')
         all_characters = list(set(file))    
         with open(SYMBOL_TABLE, 'w') as vocab:
              print("".join(all_characters), file=vocab)
         
    n_characters = len(all_characters)
        
    decoder = CharRNN(n_characters, args.hidden_size,
                      n_characters, n_layers=args.n_layers)

    decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=args.learning_rate)
    criterion = nn.CrossEntropyLoss()

    if args.type:
         # Enter typing mode
         print ('Typing Mode...')

         decoder = torch.load('../saved_model/linux.pt')         
         from typing import build_getch


         with build_getch() as getch:
              try:
                   getchar = getch()
Beispiel #11
0
print("\ntrain/dev/test size: {:d}/{:d}/{:d}\n".format(len(train_y), len(dev_y), len(test_y)))


with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():

        # Instantiate our model
        rnn = CharRNN(
            vocabulary_size,
            FLAGS.sentence_length,
            FLAGS.batch_size,
            2,
            embedding_size=FLAGS.embedding_dim,
            hidden_dim=FLAGS.hidden_dim,
            num_layers=FLAGS.num_layers,
            loss=FLAGS.loss_type)

        # Generate input batches (using tensorflow)
        with tf.variable_scope("input"):
            placeholder_x = tf.placeholder(tf.int32, train_x.shape)
            placeholder_y = tf.placeholder(tf.float32, train_y.shape)
            train_x_var = tf.Variable(placeholder_x, trainable=False, collections=[])
            train_y_var = tf.Variable(placeholder_y, trainable=False, collections=[])
            x_slice, y_slice = tf.train.slice_input_producer([train_x_var, train_y_var], num_epochs=FLAGS.num_epochs)
            x_batch, y_batch = tf.train.batch([x_slice, y_slice], batch_size=FLAGS.batch_size)

        # Define Training procedure
Beispiel #12
0
class BiLSTM_CRF:
    def __init__(self, config, model):
        self.num_layers = 1
        self.input_dim = config.embedding_dim
        self.model = model
        self.use_char_rnn = config.use_char_rnn

        self.char_rnn = CharRNN(config, model) if self.use_char_rnn else None
        input_size = self.input_dim if not self.char_rnn else self.input_dim + config.charlstm_hidden_dim
        self.bilstm = dy.BiRNNBuilder(1, input_size, config.hidden_dim,
                                      self.model, dy.LSTMBuilder)
        print("Input to word-level BiLSTM size: %d" % (input_size))
        print("BiLSTM hidden size: %d" % (config.hidden_dim))
        # self.bilstm.set_dropout(config.dropout_bilstm)
        self.num_labels = len(config.label2idx)
        self.label2idx = config.label2idx
        self.labels = config.idx2labels
        # print(config.hidden_dim)

        # self.tanh_w = self.model.add_parameters((config.tanh_hidden_dim, config.hidden_dim))
        # self.tanh_bias = self.model.add_parameters((config.tanh_hidden_dim,))

        self.linear_w = self.model.add_parameters(
            (self.num_labels, config.hidden_dim))
        self.linear_bias = self.model.add_parameters((self.num_labels, ))

        self.transition = self.model.add_lookup_parameters(
            (self.num_labels, self.num_labels))
        vocab_size = len(config.word2idx)
        self.word2idx = config.word2idx
        print("Word Embedding size: %d x %d" % (vocab_size, self.input_dim))
        self.word_embedding = self.model.add_lookup_parameters(
            (vocab_size, self.input_dim), init=config.word_embedding)

        self.dropout = config.dropout

    def save_shared_parameters(self):
        print("Saving the encoder parameter")
        # self.word_embedding.save("models/word_embedding.m")
        dy.save("basename", [
            self.char_rnn.char_emb, self.char_rnn.fw_lstm,
            self.char_rnn.bw_lstm, self.word_embedding, self.bilstm
        ])

    def build_graph_with_char(self, x, all_chars, is_train):

        if is_train:
            embeddings = []
            for w, chars in zip(x, all_chars):
                word_emb = self.word_embedding[w]
                f, b = self.char_rnn.forward_char(chars)
                concat = dy.concatenate([word_emb, f, b])
                embeddings.append(dy.dropout(concat, self.dropout))

        else:
            embeddings = []
            for w, chars in zip(x, all_chars):
                word_emb = self.word_embedding[w]
                f, b = self.char_rnn.forward_char(chars)
                concat = dy.concatenate([word_emb, f, b])
                embeddings.append(concat)
        lstm_out = self.bilstm.transduce(embeddings)
        # tanh_feats = [dy.tanh(dy.affine_transform([self.tanh_bias, self.tanh_w, rep])) for rep in lstm_out]
        features = [
            dy.affine_transform([self.linear_bias, self.linear_w, rep])
            for rep in lstm_out
        ]
        return features

    # computing the negative log-likelihood
    def build_graph(self, x, is_train):
        # dy.renew_cg()
        if is_train:
            embeddings = [
                dy.dropout(self.word_embedding[w], self.dropout) for w in x
            ]
        else:
            embeddings = [self.word_embedding[w] for w in x]
        lstm_out = self.bilstm.transduce(embeddings)
        features = [
            dy.affine_transform([self.linear_bias, self.linear_w, rep])
            for rep in lstm_out
        ]
        return features

    def forward_unlabeled(self, features):
        init_alphas = [-1e10] * self.num_labels
        init_alphas[self.label2idx[START]] = 0

        for_expr = dy.inputVector(init_alphas)
        for obs in features:
            alphas_t = []
            for next_tag in range(self.num_labels):
                obs_broadcast = dy.concatenate([dy.pick(obs, next_tag)] *
                                               self.num_labels)
                next_tag_expr = for_expr + self.transition[
                    next_tag] + obs_broadcast
                alphas_t.append(log_sum_exp(next_tag_expr, self.num_labels))
            for_expr = dy.concatenate(alphas_t)
        terminal_expr = for_expr + self.transition[self.label2idx[STOP]]
        alpha = log_sum_exp(terminal_expr, self.num_labels)
        return alpha

    # Labeled network score
    def forward_labeled(self, features, tags):
        score = dy.scalarInput(0)
        tags = [self.label2idx[w] for w in tags]
        tags = [self.label2idx[START]] + tags
        for i, obs in enumerate(features):
            score = score + dy.pick(self.transition[tags[i + 1]],
                                    tags[i]) + dy.pick(obs, tags[i + 1])
        labeled_score = score + dy.pick(self.transition[self.label2idx[STOP]],
                                        tags[-1])

        return labeled_score

    def negative_log(self, x, y, x_chars=None):
        features = self.build_graph(
            x, True) if not self.use_char_rnn else self.build_graph_with_char(
                x, x_chars, True)
        # features = self.build_graph(x, True)
        unlabed_score = self.forward_unlabeled(features)
        labeled_score = self.forward_labeled(features, y)
        return unlabed_score - labeled_score

    def viterbi_decoding(self, features):
        backpointers = []
        init_vvars = [-1e10] * self.num_labels
        init_vvars[
            self.label2idx[START]] = 0  # <Start> has all the probability
        for_expr = dy.inputVector(init_vvars)
        trans_exprs = [self.transition[idx] for idx in range(self.num_labels)]
        for obs in features:
            bptrs_t = []
            vvars_t = []
            for next_tag in range(self.num_labels):
                next_tag_expr = for_expr + trans_exprs[next_tag]
                next_tag_arr = next_tag_expr.npvalue()
                best_tag_id = np.argmax(next_tag_arr)
                bptrs_t.append(best_tag_id)
                vvars_t.append(dy.pick(next_tag_expr, best_tag_id))
            for_expr = dy.concatenate(vvars_t) + obs

            backpointers.append(bptrs_t)
        # Perform final transition to terminal
        terminal_expr = for_expr + trans_exprs[self.label2idx[STOP]]
        terminal_arr = terminal_expr.npvalue()
        best_tag_id = np.argmax(terminal_arr)
        path_score = dy.pick(terminal_expr, best_tag_id)
        # Reverse over the backpointers to get the best path
        best_path = [best_tag_id
                     ]  # Start with the tag that was best for terminal
        for bptrs_t in reversed(backpointers):
            best_tag_id = bptrs_t[best_tag_id]
            best_path.append(best_tag_id)
        start = best_path.pop()  # Remove the start symbol
        best_path.reverse()
        assert start == self.label2idx[START]
        # Return best path and best path's score
        return best_path, path_score

    def decode(self, x, x_chars=None):
        features = self.build_graph(
            x, False) if not self.use_char_rnn else self.build_graph_with_char(
                x, x_chars, False)
        # features = self.build_graph(x, False)
        best_path, path_score = self.viterbi_decoding(features)
        best_path = [self.labels[x] for x in best_path]
        # print(best_path)
        # print('path_score:', path_score.value())
        return best_path
from prepare_hamlet_text import text_ints, chars
from char_rnn_utiles import reshape_data
from char_rnn import CharRNN

batch_size = 64
num_steps = 100 
train_x, train_y = reshape_data(text_ints, 
                                batch_size, 
                                num_steps)

rnn = CharRNN(num_classes=len(chars), batch_size=batch_size)
rnn.train(train_x, train_y, 
          num_epochs=100,
          ckpt_dir='./model-100/')
Beispiel #14
0
class Soft_BiLSTM_CRF:
    def __init__(self, config, model):
        self.num_layers = 1
        self.input_dim = config.embedding_dim
        self.model = model
        self.use_char_rnn = config.use_char_rnn

        self.char_rnn = CharRNN(config, model) if self.use_char_rnn else None
        input_size = self.input_dim if not self.char_rnn else self.input_dim + config.charlstm_hidden_dim
        self.bilstm = dy.BiRNNBuilder(1, input_size, config.hidden_dim,
                                      self.model, dy.LSTMBuilder)
        print("Input to word-level BiLSTM size: %d" % (input_size))
        print("BiLSTM hidden size: %d" % (config.hidden_dim))
        # self.bilstm.set_dropout(config.dropout_bilstm)
        self.num_labels = len(config.label2idx)
        self.label2idx = config.label2idx
        self.labels = config.idx2labels
        # print(config.hidden_dim)

        self.linear_w = self.model.add_parameters(
            (self.num_labels, config.hidden_dim))
        self.linear_bias = self.model.add_parameters((self.num_labels, ))

        trans_np = np.random.rand(self.num_labels, self.num_labels)

        trans_np[self.label2idx[START], :] = -1e10
        trans_np[:, self.label2idx[STOP]] = -1e10
        self.init_iobes_constraint(trans_np)

        self.transition = self.model.add_lookup_parameters(
            (self.num_labels, self.num_labels), init=trans_np)
        vocab_size = len(config.word2idx)
        self.word2idx = config.word2idx
        print("Word Embedding size: %d x %d" % (vocab_size, self.input_dim))
        self.word_embedding = self.model.add_lookup_parameters(
            (vocab_size, self.input_dim), init=config.word_embedding)

        self.dropout = config.dropout

    def init_iobes_constraint(self, trans_np):
        for l1 in range(self.num_labels):
            ##previous label
            if l1 == self.label2idx[START] or l1 == self.label2idx[STOP]:
                continue
            for l2 in range(self.num_labels):
                ##next label
                if l2 == self.label2idx[START] or l2 == self.label2idx[STOP]:
                    continue
                if not check_bies_constraint(self.labels[l1], self.labels[l2]):
                    trans_np[l2, l1] = -1e10

    def build_graph_with_char(self, x, all_chars, is_train):

        if is_train:
            embeddings = []
            for w, chars in zip(x, all_chars):
                word_emb = self.word_embedding[w]
                f, b = self.char_rnn.forward_char(chars)
                concat = dy.concatenate([word_emb, f, b])
                embeddings.append(dy.dropout(concat, self.dropout))

        else:
            embeddings = []
            for w, chars in zip(x, all_chars):
                word_emb = self.word_embedding[w]
                f, b = self.char_rnn.forward_char(chars)
                concat = dy.concatenate([word_emb, f, b])
                embeddings.append(concat)
        lstm_out = self.bilstm.transduce(embeddings)
        features = [
            dy.affine_transform([self.linear_bias, self.linear_w, rep])
            for rep in lstm_out
        ]
        return features

    # computing the negative log-likelihood
    def build_graph(self, x, is_train):
        # dy.renew_cg()
        if is_train:
            embeddings = [
                dy.dropout(self.word_embedding[w], self.dropout) for w in x
            ]
        else:
            embeddings = [self.word_embedding[w] for w in x]
        lstm_out = self.bilstm.transduce(embeddings)
        features = [
            dy.affine_transform([self.linear_bias, self.linear_w, rep])
            for rep in lstm_out
        ]
        return features

    def forward_unlabeled(self, features):
        init_alphas = [-1e10] * self.num_labels
        init_alphas[self.label2idx[START]] = 0

        for_expr = dy.inputVector(init_alphas)
        for obs in features:
            alphas_t = []
            for next_tag in range(self.num_labels):
                obs_broadcast = dy.concatenate([dy.pick(obs, next_tag)] *
                                               self.num_labels)
                next_tag_expr = for_expr + self.transition[
                    next_tag] + obs_broadcast
                alphas_t.append(log_sum_exp(next_tag_expr, self.num_labels))
            for_expr = dy.concatenate(alphas_t)
        terminal_expr = for_expr + self.transition[self.label2idx[STOP]]
        alpha = log_sum_exp(terminal_expr, self.num_labels)
        return alpha

    # Labeled network score
    def forward_labeled(self, id, features, marginals):
        init_alphas = [-1e10] * self.num_labels
        init_alphas[self.label2idx[START]] = 0
        for_expr = dy.inputVector(init_alphas)
        # print(id)
        # print(len(features))
        # print(self.mask_tensor[id].dim())
        marginal = dy.inputTensor(marginals)
        for pos, obs in enumerate(features):

            alphas_t = []
            for next_tag in range(self.num_labels):
                obs_broadcast = dy.concatenate([dy.pick(obs, next_tag)] *
                                               self.num_labels)
                next_tag_expr = for_expr + self.transition[
                    next_tag] + obs_broadcast
                score = log_sum_exp(next_tag_expr, self.num_labels)
                alphas_t.append(score)
                # print(self.transition[next_tag].value())
                # print(" pos is %d,  tag is %s, label score is %.2f "% ( pos, self.labels[next_tag],score.value()) )
            for_expr = dy.concatenate(alphas_t) + marginal[pos]
        terminal_expr = for_expr + self.transition[self.label2idx[STOP]]
        alpha = log_sum_exp(terminal_expr, self.num_labels)
        return alpha

    def negative_log(self, id, x, y, x_chars=None, marginals=None):
        features = self.build_graph(
            x, True) if not self.use_char_rnn else self.build_graph_with_char(
                x, x_chars, True)
        # features = self.build_graph(x, True)
        unlabed_score = self.forward_unlabeled(features)
        labeled_score = self.forward_labeled(id, features, marginals)
        return unlabed_score - labeled_score

    def viterbi_decoding(self, features):
        backpointers = []
        init_vvars = [-1e10] * self.num_labels
        init_vvars[
            self.label2idx[START]] = 0  # <Start> has all the probability
        for_expr = dy.inputVector(init_vvars)
        trans_exprs = [self.transition[idx] for idx in range(self.num_labels)]
        for obs in features:
            bptrs_t = []
            vvars_t = []
            for next_tag in range(self.num_labels):
                next_tag_expr = for_expr + trans_exprs[next_tag]
                next_tag_arr = next_tag_expr.npvalue()
                best_tag_id = np.argmax(next_tag_arr)
                bptrs_t.append(best_tag_id)
                vvars_t.append(dy.pick(next_tag_expr, best_tag_id))
            for_expr = dy.concatenate(vvars_t) + obs

            backpointers.append(bptrs_t)
        # Perform final transition to terminal
        terminal_expr = for_expr + trans_exprs[self.label2idx[STOP]]
        terminal_arr = terminal_expr.npvalue()
        best_tag_id = np.argmax(terminal_arr)
        path_score = dy.pick(terminal_expr, best_tag_id)
        # Reverse over the backpointers to get the best path
        best_path = [best_tag_id
                     ]  # Start with the tag that was best for terminal
        for bptrs_t in reversed(backpointers):
            best_tag_id = bptrs_t[best_tag_id]
            best_path.append(best_tag_id)
        start = best_path.pop()  # Remove the start symbol
        best_path.reverse()
        assert start == self.label2idx[START]
        # Return best path and best path's score
        return best_path, path_score

    def constrained_viterbi_decoding(self, features, tags, is_prediction):
        backpointers = []
        init_vvars = [-1e10] * self.num_labels
        init_vvars[
            self.label2idx[START]] = 0  # <Start> has all the probability
        for_expr = dy.inputVector(init_vvars)
        trans_exprs = [self.transition[idx] for idx in range(self.num_labels)]
        for pos, obs in enumerate(features):
            bptrs_t = []
            vvars_t = []
            if not is_prediction[pos]:
                mask = dy.inputVector([-1e10] * self.num_labels)
                for next_tag in range(self.num_labels):
                    next_tag_expr = for_expr + trans_exprs[
                        next_tag] if next_tag == tags[pos] else for_expr + mask
                    next_tag_arr = next_tag_expr.npvalue()
                    best_tag_id = np.argmax(next_tag_arr)
                    bptrs_t.append(best_tag_id)
                    vvars_t.append(dy.pick(next_tag_expr, best_tag_id))
            else:
                for next_tag in range(self.num_labels):
                    next_tag_expr = for_expr + trans_exprs[next_tag]
                    next_tag_arr = next_tag_expr.npvalue()
                    best_tag_id = np.argmax(next_tag_arr)
                    bptrs_t.append(best_tag_id)
                    vvars_t.append(dy.pick(next_tag_expr, best_tag_id))
            for_expr = dy.concatenate(vvars_t) + obs

            backpointers.append(bptrs_t)
        # Perform final transition to terminal
        terminal_expr = for_expr + trans_exprs[self.label2idx[STOP]]
        terminal_arr = terminal_expr.npvalue()
        best_tag_id = np.argmax(terminal_arr)
        path_score = dy.pick(terminal_expr, best_tag_id)
        # Reverse over the backpointers to get the best path
        best_path = [best_tag_id
                     ]  # Start with the tag that was best for terminal
        for bptrs_t in reversed(backpointers):
            best_tag_id = bptrs_t[best_tag_id]
            best_path.append(best_tag_id)
        start = best_path.pop()  # Remove the start symbol
        best_path.reverse()
        assert start == self.label2idx[START]
        # Return best path and best path's score
        return best_path, path_score

    def decode(self,
               x,
               x_chars=None,
               is_constrained=False,
               y=None,
               is_prediction=None):
        features = self.build_graph(
            x, False) if not self.use_char_rnn else self.build_graph_with_char(
                x, x_chars, False)
        # features = self.build_graph(x, False)
        best_path, path_score = self.viterbi_decoding(features) if not is_constrained else \
            self.constrained_viterbi_decoding(features, y, is_prediction)
        if not is_constrained:
            best_path = [self.labels[x] for x in best_path]
        # print(best_path)
        # print('path_score:', path_score.value())
        return best_path

    def max_marginal_decode(self, x, x_chars=None, y=None, is_prediction=None):
        features = self.build_graph(
            x, False) if not self.use_char_rnn else self.build_graph_with_char(
                x, x_chars, False)
        init_alphas = [-1e10] * self.num_labels
        init_alphas[self.label2idx[START]] = 0
        for_expr = dy.inputVector(init_alphas)
        all_alphas = []
        # print(y)
        for pos, obs in enumerate(features):
            alphas_t = []
            for next_tag in range(self.num_labels):
                obs_broadcast = dy.concatenate([dy.pick(obs, next_tag)] *
                                               self.num_labels)
                next_tag_expr = for_expr + self.transition[
                    next_tag] + obs_broadcast
                if (not is_prediction[pos]) and next_tag != y[pos]:
                    mask = dy.inputVector([-1e10] * self.num_labels)
                    next_tag_expr = next_tag_expr + mask
                alphas_t.append(log_sum_exp(next_tag_expr, self.num_labels))
            for_expr = dy.concatenate(alphas_t)
            all_alphas.append(for_expr)
        terminal_expr = for_expr + self.transition[self.label2idx[STOP]]
        final_alpha = log_sum_exp(terminal_expr, self.num_labels)
        final_alpha.forward()

        ##backward
        # print(self.transition[self.label2idx[STOP]].value())
        previous_trans = dy.transpose(dy.transpose(self.transition))
        # print(previous_trans.value()[:,self.label2idx[STOP]])
        init_betas = [-1e10] * self.num_labels
        init_betas[self.label2idx[STOP]] = 0
        back_expr = dy.inputVector(init_betas)
        all_betas = []
        for rev_pos, obs in enumerate(features[::-1]):
            betas_t = []
            for previous_tag in range(self.num_labels):
                obs_broadcast = dy.concatenate([dy.pick(obs, previous_tag)] *
                                               self.num_labels)
                prev_tag_expr = back_expr + previous_trans[
                    previous_tag] + obs_broadcast
                if (not is_prediction[-rev_pos -
                                      1]) and previous_tag != y[-rev_pos - 1]:
                    mask = dy.inputVector([-1e10] * self.num_labels)
                    prev_tag_expr = prev_tag_expr + mask
                score = log_sum_exp(prev_tag_expr, self.num_labels)
                betas_t.append(score)
            back_expr = dy.concatenate(betas_t)
            all_betas.append(back_expr)
        start_expr = back_expr + previous_trans[self.label2idx[START]]
        final_beta = log_sum_exp(start_expr, self.num_labels)
        final_beta.forward()
        all_betas_rev = all_betas[::-1]
        marginals = []
        # print(final_alpha.value())
        # print(final_beta.value())
        k = 0
        for f, b in zip(all_alphas, all_betas_rev):
            marginal = f + b - final_alpha - features[k]
            x = marginal.value()
            marginals.append(x)
            # print("log")
            # print(x)
            # print("prob")
            k += 1
            # print(math.fsum([ math.exp(w)  for w in x]))

        return marginals