def __init__(self, options, data_train): self.model = dn.Model() (span_model_options, statistics), self.container = nn.model_load_helper( options.span_model_format, options.span_model_prefix, self.model) logger.info(statistics) self.statistics = statistics self.options = options self.options.__dict__ = AttrDict( chain(span_model_options.__dict__.items(), options.__dict__.items())) logger.info(pformat(self.options.__dict__)) self.optimizer = nn.trainers[options.optimizer]( *((self.model, options.learning_rate ) if options.learning_rate is not None else (self.model, ))) with open(options.derivations, "rb") as f: self.derivations = pickle.load(f, encoding="latin1") self.hrg_statistics = HRGStatistics.from_derivations(self.derivations) self.span_ebd_network, self.span_eval_network, self.label_eval_network = self.container.components self.scorer_network = self.scorers[options.scorer](self.container, self.hrg_statistics, self.options) with open(options.grammar, "rb") as f: self.grammar = pickle.load( f, encoding="latin1") # type: Mapping[str, Mapping[CFGRule, int]] self.terminal_mapping = defaultdict( Counter) # type: Mapping[str, typing.Counter] for (cfg_lhs, cfg_rhs_list), value in self.grammar.items(): if all(isinstance(i, HLexicon) for i in cfg_rhs_list): self.terminal_mapping[cfg_lhs] += value self.lexicon_mapping = defaultdict( Counter) # type: Mapping[Tuple[HLexicon, int], typing.Counter] for (cfg_lhs, cfg_rhs_list), value in self.grammar.items(): rule_name, main_node_count = cfg_lhs.rsplit("#", 1) main_node_count = int(main_node_count) if all(isinstance(i, HLexicon) for i in cfg_rhs_list): lexicon = cfg_rhs_list[0] self.lexicon_mapping[lexicon, main_node_count] += value self.lemmatizer = WordNetLemmatizer() if options.unlexicalized_rules is not None: with open(options.unlexicalized_rules, "rb") as f: self.unlexicalized_rules = pickle.load(f)
def __init__( self, bigrams_size, unigrams_size, bigrams_dims, unigrams_dims, lstm_units, hidden_units, label_size, span_nums, droprate=0, ): self.bigrams_size = bigrams_size self.bigrams_dims = bigrams_dims self.unigrams_dims = unigrams_dims self.unigrams_size = unigrams_size self.lstm_units = lstm_units self.hidden_units = hidden_units self.span_nums = span_nums self.droprate = droprate self.label_size = label_size self.model = dynet.Model() self.trainer = dynet.AdadeltaTrainer(self.model, eps=1e-7, rho=0.99) random.seed(1) self.activation = dynet.rectify self.bigram_embed = self.model.add_lookup_parameters( (self.bigrams_size, self.bigrams_dims), ) self.unigram_embed = self.model.add_lookup_parameters( (self.unigrams_size, self.unigrams_dims), ) self.fwd_lstm1 = LSTM(self.bigrams_dims + self.unigrams_dims, self.lstm_units, self.model) self.back_lstm1 = LSTM(self.bigrams_dims + self.unigrams_dims, self.lstm_units, self.model) self.fwd_lstm2 = LSTM(2 * self.lstm_units, self.lstm_units, self.model) self.back_lstm2 = LSTM(2 * self.lstm_units, self.lstm_units, self.model) self.p_hidden_W = self.model.add_parameters( (self.hidden_units, 2 * self.span_nums * self.lstm_units), dynet.UniformInitializer(0.01)) self.p_hidden_b = self.model.add_parameters((self.hidden_units, ), dynet.ConstInitializer(0)) self.p_output_W = self.model.add_parameters( (self.label_size, self.hidden_units), dynet.ConstInitializer(0)) self.p_output_b = self.model.add_parameters((self.label_size, ), dynet.ConstInitializer(0))
def __init__(self, vocab, properties): self.properties = properties self.vocab = vocab # first initialize a computation graph container (or model). self.model = dynet.Model() # assign the algorithm for backpropagation updates. self.updater = dynet.AdamTrainer(self.model) # create embeddings for words and tag features. self.word_embedding = self.model.add_lookup_parameters( (vocab.num_words(), properties.word_embed_dim)) self.tag_embedding = self.model.add_lookup_parameters( (vocab.num_tags(), properties.pos_embed_dim)) self.dep_embedding = self.model.add_lookup_parameters( (vocab.num_dep(), properties.dep_embed_dim)) # assign transfer function self.transfer = dynet.rectify # can be dynet.logistic or dynet.tanh as well. # define the input dimension for the embedding layer. # here we assume to see two words after and before and current word (meaning 5 word embeddings) # and to see the last two predicted tags (meaning two tag embeddings) self.input_dim = 20 * properties.word_embed_dim + 20 * properties.pos_embed_dim + 12 * properties.dep_embed_dim # define the first hidden layer. self.hidden_layer1 = self.model.add_parameters( (properties.hidden_dim, self.input_dim)) #dynet.dropout(self.hidden_layer1, 0.3) # define the first hidden layer bias term and initialize it as constant 0.2. self.hidden_layer_bias1 = self.model.add_parameters( properties.hidden_dim, init=dynet.ConstInitializer(0.2)) # define the second hidden layer. self.hidden_layer2 = self.model.add_parameters( (properties.hidden_dim, properties.hidden_dim)) # define the second hidden layer bias term and initialize it as constant 0.2. self.hidden_layer_bias2 = self.model.add_parameters( properties.hidden_dim, init=dynet.ConstInitializer(0.2)) # define the output weight. self.output_layer = self.model.add_parameters( (vocab.num_actions(), properties.hidden_dim)) # define the bias vector and initialize it as zero. self.output_bias = self.model.add_parameters( vocab.num_actions(), init=dynet.ConstInitializer(0))
def __init__(self, model=None, meta=None, wvm=None): self.model = dy.Model() self.meta = pickle.load(open('%s.meta' % model, 'rb')) if model else meta self.trainer = self.meta.trainer(self.model) # pretrained embeddings if wvm: self.wvm = wvm self.meta.w_dim_e = wvm.syn0.shape[1] # MLP on top of biLSTM outputs 100 -> 32 -> ntags self.w1 = self.model.add_parameters( (self.meta.n_hidden, self.meta.lstm_word_dim * 2)) self.w2 = self.model.add_parameters( (self.meta.n_tags, self.meta.n_hidden)) self.b1 = self.model.add_parameters(self.meta.n_hidden) self.b2 = self.model.add_parameters(self.meta.n_tags) self.aw = self.model.add_parameters( (self.meta.lstm_word_dim, self.meta.lstm_word_dim * 2)) self.ab = self.model.add_parameters(self.meta.lstm_word_dim) self.av = self.model.add_parameters((1, self.meta.lstm_word_dim)) # word-level LSTMs self.fwdRNN = dy.LSTMBuilder( 1, self.meta.w_dim_e + self.meta.lstm_char_dim * 0, self.meta.lstm_word_dim, self.model) self.bwdRNN = dy.LSTMBuilder( 1, self.meta.w_dim_e + self.meta.lstm_char_dim * 0, self.meta.lstm_word_dim, self.model) self.fwdRNN2 = dy.LSTMBuilder(1, self.meta.lstm_word_dim * 2, self.meta.lstm_word_dim, self.model) self.bwdRNN2 = dy.LSTMBuilder(1, self.meta.lstm_word_dim * 2, self.meta.lstm_word_dim, self.model) # char-level LSTMs self.cfwdRNN = dy.LSTMBuilder(1, self.meta.c_dim, self.meta.lstm_char_dim, self.model) self.cbwdRNN = dy.LSTMBuilder(1, self.meta.c_dim, self.meta.lstm_char_dim, self.model) # unk for unknown word embeddings self.unk = np.zeros(self.meta.w_dim_e) # define char lookup table self.CHAR_LOOKUP = self.model.add_lookup_parameters( (self.meta.n_chars, self.meta.c_dim)) # load pretrained dynet model if model: self.model.populate('%s.dy' % model)
def __init__(self): self.feats_and_values = {} self.wids = defaultdict(lambda: len(self.wids)) self.unigrams = {} self.model = dy.Model() self.EMB_SIZE = 1 self.HID_SIZE = 1 self.N = 3 M = self.model.add_lookup_parameters((len(self.wids), self.EMB_SIZE)) W_mh = self.model.add_parameters( (self.HID_SIZE, self.EMB_SIZE * (self.N - 1))) b_hh = self.model.add_parameters((self.HID_SIZE)) W_hs = self.model.add_parameters((len(self.wids), self.HID_SIZE)) b_s = self.model.add_parameters((len(self.wids)))
def get_trained_embeds(data): sentence_dict = {} model = dy.Model() re_model = dy.Model() tagger = BI_LSTM(model, LAYERS, EMBED_DIM, HIDDEN_DIM, len(vocabulary)) relation_extractor = RE_MLP(re_model, EMBED_DIM, HIDDEN_DIM) for sentence in data: if len(sentence.ent) < 2: continue good = bad = 0.0 sentence_idxs = [vocabulary[word] for word in sentence.doc.split()] encodings = tagger.train_sentence(sentence, sentence_idxs) location_entities, person_entities = get_relations_from_sentence( sentence) cartesian_product = itertools.product( [person_entities, location_entities]) for relation in cartesian_product: is_relation = (relation in relation_dict[sentence_num]) pred_relation = relation_extractor.train_relation( relation, is_relation) good += 1 if is_relation == pred_relation else 0 bad += 1 if is_relation != pred_relation else 0
def __init__(self, config, encodings, embeddings, runtime=False): self.config = config self.word_embeddings = embeddings self.encodings = encodings self.model = dy.Model() self.trainer = dy.AdamTrainer(self.model, alpha=2e-3, beta_1=0.9, beta_2=0.9) from character_embeddings import CharacterNetwork self.encoder = CharacterNetwork(self.config.character_embeddings_size, encodings, self.config.encoder_size, self.config.encoder_layers, self.config.character_embeddings_size, self.model, runtime=runtime) self.decoder = dy.VanillaLSTMBuilder(self.config.decoder_layers, self.config.encoder_size * 2, self.config.decoder_size, self.model) self.decoder_start_lookup = self.model.add_lookup_parameters( (1, self.config.encoder_size * 2)) # self.att_w1 = self.model.add_parameters( # (self.config.character_embeddings_size * 2, self.config.encoder_size * 2)) # self.att_w2 = self.model.add_parameters( # (self.config.character_embeddings_size * 2, self.config.decoder_size)) # self.att_v = self.model.add_parameters((1, self.config.character_embeddings_size * 2)) self.softmax_w = self.model.add_parameters( (len(self.encodings.char2int) + 4, self.config.decoder_size) ) # all known characters except digits with COPY, INC, TOK and EOS self.softmax_b = self.model.add_parameters( (len(self.encodings.char2int) + 4)) self.softmax_comp_w = self.model.add_parameters( (2, self.config.character_embeddings_size)) self.softmax_comp_b = self.model.add_parameters((2)) self.label2int = {} ofs = len(self.encodings.char2int) self.label2int['<EOS>'] = ofs self.label2int['<TOK>'] = ofs + 1 self.label2int['<COPY>'] = ofs + 2 self.label2int['<INC>'] = ofs + 3 self.losses = []
def __init__(self, options, data_train): self.model = dn.Model() self.optimizer = nn.trainers[options.optimizer]( *((self.model, options.learning_rate) if options.learning_rate is not None else (self.model,))) self.options = options self.statistics = Statistics.from_sentences(data_train) logger.info(str(self.statistics)) self.container = nn.Container(self.model) self.sent_embeddings = SentenceEmbeddings(self.container, self.statistics, options) self.tag_classification = POSTagClassification(self.container, self.statistics.supertags, options) self.tag_dict = self.statistics.supertags self.viterbi_decoder = ViterbiDecoder(self.container, self.statistics.supertags, self.options)
def main(): train_params = { "num_epochs": 80000000, "dis_updates": 1, "gen_updates": 1 } gen = Generator().cuda() dis = Discriminator().cuda() model = dy.Model() rnn = network.Network(W2I, I2W, model) model.populate("model.m") train(gen, dis, rnn, **train_params)
def init(self, config): dy.renew_cg() self.INPUT_VOCAB_SIZE = len(self.vocab['input']) self.OUTPUT_VOCAB_SIZE = len(self.vocab['output']) self.LSTM_NUM_OF_LAYERS = config['LSTM_NUM_OF_LAYERS'] self.EMBEDDINGS_SIZE = config['EMBEDDINGS_SIZE'] self.STATE_SIZE = config['STATE_SIZE'] self.DROPOUT = config['DROPOUT'] self.BEAM = config['BEAM_SIZE'] self.model = dy.Model() # ENCODERS self.encpre_fwd_lstm = dy.LSTMBuilder(self.LSTM_NUM_OF_LAYERS, self.EMBEDDINGS_SIZE, self.STATE_SIZE, self.model) self.encpre_bwd_lstm = dy.LSTMBuilder(self.LSTM_NUM_OF_LAYERS, self.EMBEDDINGS_SIZE, self.STATE_SIZE, self.model) self.encpre_fwd_lstm.set_dropout(self.DROPOUT) self.encpre_bwd_lstm.set_dropout(self.DROPOUT) self.encpos_fwd_lstm = dy.LSTMBuilder(self.LSTM_NUM_OF_LAYERS, self.EMBEDDINGS_SIZE, self.STATE_SIZE, self.model) self.encpos_bwd_lstm = dy.LSTMBuilder(self.LSTM_NUM_OF_LAYERS, self.EMBEDDINGS_SIZE, self.STATE_SIZE, self.model) self.encpos_fwd_lstm.set_dropout(self.DROPOUT) self.encpos_bwd_lstm.set_dropout(self.DROPOUT) # DECODER self.dec_lstm = dy.LSTMBuilder(self.LSTM_NUM_OF_LAYERS, (self.STATE_SIZE * 4) + (self.EMBEDDINGS_SIZE * 2), self.STATE_SIZE, self.model) self.dec_lstm.set_dropout(self.DROPOUT) # EMBEDDINGS self.input_lookup = self.model.add_lookup_parameters( (self.INPUT_VOCAB_SIZE, self.EMBEDDINGS_SIZE)) self.output_lookup = self.model.add_lookup_parameters( (self.OUTPUT_VOCAB_SIZE, self.EMBEDDINGS_SIZE)) # SOFTMAX self.decoder_w = self.model.add_parameters( (self.OUTPUT_VOCAB_SIZE, self.STATE_SIZE)) self.decoder_b = self.model.add_parameters((self.OUTPUT_VOCAB_SIZE))
def __init__(self, character_embeddings_size, encodings, rnn_size=100, rnn_layers=1, embeddings_size=100, model=None, runtime=False): if model is None: self.model = dy.Model() else: self.model = model self.encodings = encodings self.character_embeddings_size = character_embeddings_size self.embeddings_size = embeddings_size self.num_characters = len(encodings.char2int) self.character_lookup = self.model.add_lookup_parameters( (self.num_characters, character_embeddings_size)) self.rnn_fw = [] self.rnn_bw = [] self.rnn_layers = rnn_layers self.rnn_size = rnn_size input_size = character_embeddings_size + 3 for _ in range(rnn_layers): if runtime: self.rnn_fw.append( dy.VanillaLSTMBuilder(1, input_size, rnn_size, self.model)) self.rnn_bw.append( dy.VanillaLSTMBuilder(1, input_size, rnn_size, self.model)) else: from generic_networks.utils import orthonormal_VanillaLSTMBuilder self.rnn_fw.append( orthonormal_VanillaLSTMBuilder(1, input_size, rnn_size, self.model)) self.rnn_bw.append( orthonormal_VanillaLSTMBuilder(1, input_size, rnn_size, self.model)) input_size = rnn_size * 2 self.linearW = self.model.add_parameters( (embeddings_size, rnn_size * 4)) # last state and attention over the other states self.linearB = self.model.add_parameters((embeddings_size)) self.att_w1 = self.model.add_parameters((rnn_size, rnn_size * 2)) self.att_w2 = self.model.add_parameters((rnn_size, rnn_size * 2)) self.att_v = self.model.add_parameters((1, rnn_size))
def run(self, model_name): train_name = os.environ.get('MODEL_DUMP_PATH') + self.get_file_name( extension='') # Codifica las incidencias. vectorized_issue = get_vectorized_issue( self.kwargs["corpus"], self.kwargs["collection"], self.kwargs["glove_size"], attention_vector=self.kwargs["attention_vector"], categorical=True, column=self.kwargs["column"]) # Generación de los conjuntos de entrenamiento a partir de las incidencias codificadas. raw_data = vectorized_issue.attention_vector_raw_data raw_data_post = data.get_dataset_other_categorical( raw_data, self.kwargs["num_samples"], self.kwargs["balanced"], self.kwargs["num_cat"], self.kwargs["column"]) data_train, data_test, input_vocab = data.get_dataset_tree_categorical( raw_data_post, max_sentence_length=self.kwargs["max_input"], column=self.kwargs["column"], train_size=self.kwargs["train_porcent"]) embeddings = data.create_embeddings_tree(vectorized_issue, input_vocab, train_name) model = dy.Model() model_tree = TreeLstmCategorical( model, data_train, data_test, embeddings, train_name, update_embeddings=self.kwargs["update_embeddings"], hidden_dim=self.kwargs["hidden_size"], attention_size=self.kwargs["attention_size"], batch_size=self.kwargs["batch_size"], learning_rate=self.kwargs["learning_rate"], patience=self.kwargs["patience"], attention=self.kwargs["attention"], corpus=self.kwargs["corpus"], num_cat=self.kwargs["num_cat"], ) model_tree.fit() self.results = model_tree.evaluate() return self.results
def new_code2nl_model(args): model = dy.Model() # src_onehot = OneHotEmbedder(num_token_type, 0) # src_lookup = LookupEmbedder(model, code_vocab_size, code_embed_dim, 1) # src_embedder = ConcatEmbedder(src_onehot, src_lookup) # src_tok_lookup = LookupEmbedder(model, args.num_token_type, args.tok_embed_dim) src_code_lookup = LookupEmbedder(model, args.code_vocab_size, args.code_embed_dim) # src_embedder = ConcatEmbedder(src_tok_lookup, src_code_lookup) encoder = Encoder(model, src_code_lookup, args.code_rnn_layers, args.code_rnn_state_dim, args.rnn_dropout) trg_embedder = LookupEmbedder(model, args.nl_vocab_size, args.nl_embed_dim) decoder = Decoder(model, trg_embedder, args.nl_rnn_layers, encoder.state_dim, args.nl_rnn_state_dim, args.rnn_dropout, args.dropout) attender = Attender(model, encoder.state_dim, decoder.state_dim, args.attention_dim) translator = Translator(encoder, attender, decoder) return model, translator
def __init__(self, model=None, meta=None): self.model = dy.Model() if model: self.meta = pickle.load(open('%s.meta' % model, 'rb')) else: self.meta = meta self.EWORDS_LOOKUP = self.model.add_lookup_parameters( (self.meta.n_words_eng, self.meta.w_dim_eng)) self.HWORDS_LOOKUP = self.model.add_lookup_parameters( (self.meta.n_words_hin, self.meta.w_dim_hin)) if not model: for word, V in ewvm.vocab.iteritems(): self.EWORDS_LOOKUP.init_row(V.index + self.meta.add_words, ewvm.syn0[V.index]) for word, V in hwvm.vocab.iteritems(): self.HWORDS_LOOKUP.init_row(V.index + self.meta.add_words, hwvm.syn0[V.index]) self.ECHARS_LOOKUP = self.model.add_lookup_parameters( (self.meta.n_chars_eng, self.meta.c_dim)) self.HCHARS_LOOKUP = self.model.add_lookup_parameters( (self.meta.n_chars_hin, self.meta.c_dim)) # MLP on top of biLSTM outputs 100 -> 32 -> ntags self.W1 = self.model.add_parameters( (self.meta.n_hidden, self.meta.lstm_word_dim * 2)) self.W2 = self.model.add_parameters( (self.meta.n_tags, self.meta.n_hidden)) self.B1 = self.model.add_parameters(self.meta.n_hidden) self.B2 = self.model.add_parameters(self.meta.n_tags) # word-level LSTMs self.fwdRNN = dy.LSTMBuilder( 1, self.meta.w_dim_eng + self.meta.lstm_char_dim * 2, self.meta.lstm_word_dim, self.model) self.bwdRNN = dy.LSTMBuilder( 1, self.meta.w_dim_eng + self.meta.lstm_char_dim * 2, self.meta.lstm_word_dim, self.model) # char-level LSTMs self.ecfwdRNN = dy.LSTMBuilder(1, self.meta.c_dim, self.meta.lstm_char_dim, self.model) self.ecbwdRNN = dy.LSTMBuilder(1, self.meta.c_dim, self.meta.lstm_char_dim, self.model) self.hcfwdRNN = dy.LSTMBuilder(1, self.meta.c_dim, self.meta.lstm_char_dim, self.model) self.hcbwdRNN = dy.LSTMBuilder(1, self.meta.c_dim, self.meta.lstm_char_dim, self.model) if model: self.model.populate('%s.dy' % model)
def old_style_save_and_load(): # create a model and add parameters. m = dy.Model() a = m.add_parameters((100,100)) b = m.add_lookup_parameters((20,2)) t1 = Transfer(5,6,dy.softmax, m) t2 = Transfer(7,8,dy.softmax, m) tt = MultiTransfer([10,10,10,10],dy.tanh, m) c = m.add_parameters((100)) lb = dy.LSTMBuilder(1,2,3,m) lb2 = dy.LSTMBuilder(2,4,4,m) # save m.save("test1") # create new model (same parameters): m2 = dy.Model() a2 = m2.add_parameters((100,100)) b2 = m2.add_lookup_parameters((20,2)) t12 = Transfer(5,6,dy.softmax, m2) t22 = Transfer(7,8,dy.softmax, m2) tt2 = MultiTransfer([10,10,10,10],dy.tanh, m2) c2 = m2.add_parameters((100)) lb2 = dy.LSTMBuilder(1,2,3,m2) lb22 = dy.LSTMBuilder(2,4,4,m2) # parameters should be different for p1,p2 in [(a,a2),(b,b2),(c,c2),(t1.W,t12.W),(tt.transfers[0].W,tt2.transfers[0].W)]: assert(not numpy.array_equal(p1.as_array(), p2.as_array())) m2.load("test1") # parameters should be same for p1,p2 in [(a,a2),(b,b2),(c,c2),(t1.W,t12.W),(tt.transfers[0].W,tt2.transfers[0].W)]: assert(numpy.array_equal(p1.as_array(), p2.as_array())) os.remove("test1")
def __init__(self, args, src_vocab, tgt_vocab, src_vocab_id2word, tgt_vocab_id2word): model = self.model = dy.Model() self.args = args self.src_vocab = src_vocab self.tgt_vocab = tgt_vocab self.src_vocab_id2word = src_vocab_id2word self.tgt_vocab_id2word = tgt_vocab_id2word self.src_lookup = self.model.add_lookup_parameters( (args.src_vocab_size, args.embed_size)) self.tgt_lookup = self.model.add_lookup_parameters( (args.tgt_vocab_size, args.embed_size)) self.enc_forward_builder = dy.LSTMBuilder(1, args.embed_size, args.hidden_size, model) self.enc_backward_builder = dy.LSTMBuilder(1, args.embed_size, args.hidden_size, model) self.dec_builder = dy.LSTMBuilder( 1, args.embed_size + args.hidden_size * 2, args.hidden_size, model) # set recurrent dropout if args.dropout > 0.: self.enc_forward_builder.set_dropout(args.dropout) self.enc_backward_builder.set_dropout(args.dropout) self.dec_builder.set_dropout(args.dropout) # target word embedding self.W_y = model.add_parameters((args.tgt_vocab_size, args.embed_size)) self.b_y = model.add_parameters((args.tgt_vocab_size)) self.b_y.zero() # transformation of decoder hidden states and context vectors before reading out target words self.W_h = model.add_parameters( (args.embed_size, args.hidden_size + args.hidden_size * 2)) self.b_h = model.add_parameters((args.embed_size)) self.b_h.zero() # transformation of context vectors at t_0 in decoding self.W_s = model.add_parameters( (args.hidden_size, args.hidden_size * 2)) self.b_s = model.add_parameters((args.hidden_size)) self.b_s.zero() self.W1_att_f = model.add_parameters( (args.attention_size, args.hidden_size * 2)) self.W1_att_e = model.add_parameters( (args.attention_size, args.hidden_size)) self.W2_att = model.add_parameters((1, args.attention_size))
def __init__(self, lemmatizer_config, encodings, embeddings, runtime=False): self.config = lemmatizer_config self.encodings = encodings # Bug in encodings - this will be removed after UD Shared Task self.has_bug = False if self.encodings.char2int[' '] != 1: self.has_bug = True self.embeddings = embeddings self.losses = [] self.model = dy.Model() self.trainer = dy.AdamTrainer(self.model, alpha=2e-3, beta_1=0.9, beta_2=0.9) self.character_network = CharacterNetwork(self.config.tag_embeddings_size, encodings, rnn_size=self.config.char_rnn_size, rnn_layers=self.config.char_rnn_layers, embeddings_size=self.config.char_embeddings, model=self.model, runtime=runtime) self.upos_lookup = self.model.add_lookup_parameters( (len(self.encodings.upos2int), self.config.tag_embeddings_size)) self.xpos_lookup = self.model.add_lookup_parameters( (len(self.encodings.xpos2int), self.config.tag_embeddings_size)) self.attrs_lookup = self.model.add_lookup_parameters( (len(self.encodings.attrs2int), self.config.tag_embeddings_size)) self.char_lookup = self.model.add_lookup_parameters((len(self.encodings.char2int), self.config.char_embeddings)) if runtime: self.rnn = dy.LSTMBuilder(self.config.rnn_layers, self.config.char_rnn_size * 2 + self.config.char_embeddings, self.config.rnn_size, self.model) else: from utils import orthonormal_VanillaLSTMBuilder self.rnn = orthonormal_VanillaLSTMBuilder(self.config.rnn_layers, self.config.char_rnn_size * 2 + self.config.char_embeddings, self.config.rnn_size, self.model) self.att_w1 = self.model.add_parameters((200, self.config.char_rnn_size * 2)) self.att_w2 = self.model.add_parameters((200, self.config.rnn_size + self.config.tag_embeddings_size)) self.att_v = self.model.add_parameters((1, 200)) self.start_lookup = self.model.add_lookup_parameters( (1, self.config.char_rnn_size * 2 + self.config.char_embeddings)) self.softmax_w = self.model.add_parameters((len(self.encodings.char2int) + 1, self.config.rnn_size)) self.softmax_b = self.model.add_parameters((len(self.encodings.char2int) + 1)) self.softmax_casing_w = self.model.add_parameters((2, self.config.rnn_size)) self.softmax_casing_b = self.model.add_parameters((2))
def main(): training_log = open('training-'+str(datetime.now())+'.log','w') model = dy.Model() trainer = dy.SimpleSGDTrainer(model) training_src = read_file(sys.argv[1]) word_freq_src = build_dict(training_src) training_tgt = read_file(sys.argv[2]) word_freq_tgt = build_dict(training_tgt) training_src = sentence_clean(training_src,word_freq_src) training_tgt = sentence_clean(training_tgt,word_freq_tgt) dev_src = sentence_clean(read_file(sys.argv[3]),word_freq_src) dev_tgt = sentence_clean(read_file(sys.argv[4]),word_freq_tgt) test_src = sentence_clean(read_file(sys.argv[5]),word_freq_src) attention = Attention(model, list(training_src), list(training_tgt)) train_data = zip(training_src, training_tgt) train_data.sort(key=lambda x: -len(x[0])) train_src = [sent[0] for sent in train_data] train_tgt = [sent[1] for sent in train_data] start = time.time() for epoch in range(150): epoch_loss = 0 train_zip = zip(train_src, train_tgt) i = 0 while i < len(train_zip): esum,num_words = attention.step_batch(train_zip[i:i+attention.BATCH_SIZE]) i += attention.BATCH_SIZE epoch_loss += esum.scalar_value() esum.backward() trainer.update() # if epoch_loss < 10: # end = time.time() # print 'TIME ELAPSED:', end - start, 'SECONDS' # break print 'Epoch:',epoch training_log.write("Epoch %d: loss=%f \n" % (epoch, epoch_loss)) training_log.flush() trainer.update_epoch(1.0) #training_log.write(attention.translate_sentence(training_src[0])+'\n') if epoch % 5 == 0: #attention.save(epoch) ori_sentence = attention.translate_sentence_ori(training_src[0]) training_log.write('ori:'+ori_sentence+'\n') #print '----ori finished----' training_log.write('new:'+attention.translate_sentence_beam(training_src[0])+'\n')
def __init__(self, vocab, size_embed, size_lstm, size_hidden, timex_event_label_input, size_timex_event_label_embed, size_edge_label=len(EDGE_LABEL_LIST)): self.model = dy.Model() self.size_edge_label = size_edge_label if timex_event_label_input == 'none': self.label_vocab = {} elif timex_event_label_input == 'timex_event': self.label_vocab = LABEL_VOCAB_TIMEX_EVENT else: self.label_vocab = LABEL_VOCAB_FULL if vocab != 0: self.embeddings = self.model.add_lookup_parameters( (len(vocab), size_embed)) self.timex_event_label_embeddings = \ self.model.add_lookup_parameters( (len(self.label_vocab), size_timex_event_label_embed)) self.lstm_fwd = dy.LSTMBuilder( 1, size_embed + size_timex_event_label_embed, size_lstm, self.model) self.lstm_bwd = dy.LSTMBuilder( 1, size_embed + size_timex_event_label_embed, size_lstm, self.model) self.pW1 = self.model.add_parameters( (size_hidden, 12 * size_lstm + 5 + 2)) self.pb1 = self.model.add_parameters(size_hidden) self.pW2 = self.model.add_parameters( (size_edge_label, size_hidden)) self.pb2 = self.model.add_parameters(size_edge_label) self.attention_w = self.model.add_parameters((1, size_lstm * 2)) self.vocab = vocab self.size_lstm = size_lstm else: self.embeddings, self.timex_event_label_embeddings, \ self.pW1, self.pb1, self.pW2, self.pb2, \ self.lstm_fwd, self.lstm_bwd, self.attention_w, self.vocab = \ None, None, None, None, None, None, None, None, None, None
def load(cls, prefix, # type: str new_options=None): """ :param prefix: model file name prefix :rtype: MaxSubGraphParser """ model = dn.Model() parser, savable = nn.model_load_helper(None, prefix, model) parser.options.__dict__.update(new_options.__dict__) parser.model = model parser.container = savable parser.network = parser.container.components[0] parser.optimizer = nn.get_optimizer(model, parser.options) return parser
def __init__(self, rnn_num_of_layers, embeddings_size, state_size): self.model = dy.Model() # the embedding paramaters self.embeddings = self.model.add_lookup_parameters( (data.VOCAB_SIZE, embeddings_size)) # the rnn self.RNN = RNN_BUILDER(rnn_num_of_layers, embeddings_size, state_size, self.model) # project the rnn output to a vector of VOCAB_SIZE length self.output_w = self.model.add_parameters( (data.VOCAB_SIZE, state_size)) self.output_b = self.model.add_parameters((data.VOCAB_SIZE))
def __init__(self, rnn_model, use_char_rnn): self.use_char_rnn = use_char_rnn self.model = dy.Model() att_tuple = iter(self.model.load(rnn_model)) self.attributes = open(rnn_model + "-atts", "r").read().split("\t") self.words_lookup = att_tuple.next() if (self.use_char_rnn): self.char_lookup = att_tuple.next() self.char_bi_lstm = att_tuple.next() self.word_bi_lstm = att_tuple.next() self.lstm_to_tags_params = get_next_att_batch(self.attributes, att_tuple) self.lstm_to_tags_bias = get_next_att_batch(self.attributes, att_tuple) self.mlp_out = get_next_att_batch(self.attributes, att_tuple) self.mlp_out_bias = get_next_att_batch(self.attributes, att_tuple)
def __init__(self): print("hello from Talha Yılmaz :)") self.word_number = 15 self.epoch_num = 4 self.embedding_layer_size = 32 self.hidden_layer_size = 32 self.min_count = 2 self.model = dy.Model() self.trainer = dy.SimpleSGDTrainer(self.model) self.pW_hidden = self.model.add_parameters( (self.hidden_layer_size, self.embedding_layer_size)) self.pB_hidden = self.model.add_parameters(self.hidden_layer_size)
def __init__(self, HIDDEN_SIZE, input_size, output_size): # Parameters of the model and training self.HIDDEN_SIZE = HIDDEN_SIZE # 100 self.input_size = input_size # 128 self.output_size = output_size # len(lexicon) self.model = dy.Model() # Define the model and SGD optimizer self.w_xh_p = self.model.add_parameters( (self.HIDDEN_SIZE, self.input_size)) self.b_h_p = self.model.add_parameters(self.HIDDEN_SIZE) self.W_hy_p = self.model.add_parameters( (self.output_size, self.HIDDEN_SIZE)) self.b_y_p = self.model.add_parameters(self.output_size) self.x_val = dy.vecInput(self.input_size)
def build_model(self, nwords, nchars, ntags): self.model = dy.Model() trainer = dy.AdamTrainer(self.model) EMB_SIZE = 64 HID_SIZE = 64 self.W_emb = self.model.add_lookup_parameters( (nwords, EMB_SIZE)) # Word embeddings self.fwdLSTM = dy.VanillaLSTMBuilder(1, EMB_SIZE, HID_SIZE, self.model) # Forward RNN self.bwdLSTM = dy.VanillaLSTMBuilder(1, EMB_SIZE, HID_SIZE, self.model) # Backward RNN self.W_sm = self.model.add_parameters( (ntags, 2 * HID_SIZE)) # Softmax weights self.b_sm = self.model.add_parameters((ntags)) # Softmax bias return trainer
def __init__(self): self.model = dy.Model() # Embeds the five states at each square: empty, blocked, occupied by agent, # goal, and * (occupied by both agent and goal). self.emb_env_mat = self.model.add_lookup_parameters((5, BLOCK_EMB_SIZE)) self.num_spots = env.WORLD_SIZE * env.WORLD_SIZE tot_size = BLOCK_EMB_SIZE * self.num_spots self.l1_weights = self.model.add_parameters((tot_size, int(tot_size / 2)), initializer = dy.UniformInitializer(0.1)) self.l1_biases = self.model.add_parameters((int(tot_size / 2))), initializer = dy.UniformInitializer(0.1)
def __init__(self, Config): self.Config = Config self.model = dy.Model() VOCAB_SIZE = Config.data.vocab_size EMBEDDINGS_SIZE = Config.model.embed_dim LSTM_NUM_OF_LAYERS = Config.model.num_layers STATE_SIZE = Config.model.num_units ATTENTION_SIZE = Config.model.attention_size with open( os.path.join(Config.data.base_path, Config.data.processed_path, 'embed.pkl'), 'rb') as f: embed = np.asarray(cPickle.load(f)) oov = np.random.random((4 + Config.data.oov_size, EMBEDDINGS_SIZE)) self.input_lookup = self.model.lookup_parameters_from_numpy( np.concatenate((oov, embed))) self.enc_fwd_lstm = dy.GRUBuilder(LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, self.model) self.enc_bwd_lstm = dy.GRUBuilder(LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, self.model) self.attention_word_w1 = self.model.add_parameters( (ATTENTION_SIZE, STATE_SIZE * 2), init='uniform') self.attention_word_w2 = self.model.add_parameters( (ATTENTION_SIZE, STATE_SIZE * LSTM_NUM_OF_LAYERS * 1), init='uniform') self.attention_word_w3 = self.model.add_parameters( (ATTENTION_SIZE, STATE_SIZE * LSTM_NUM_OF_LAYERS * 1), init='uniform') self.attention_word_v = self.model.add_parameters((1, ATTENTION_SIZE), init='uniform') self.utt_lstm = dy.GRUBuilder(LSTM_NUM_OF_LAYERS, STATE_SIZE * 2, STATE_SIZE, self.model) self.attention_utt_w1 = self.model.add_parameters( (ATTENTION_SIZE, STATE_SIZE), init='uniform') self.attention_utt_w2 = self.model.add_parameters( (ATTENTION_SIZE, STATE_SIZE * LSTM_NUM_OF_LAYERS * 1), init='uniform') self.attention_utt_v = self.model.add_parameters((1, ATTENTION_SIZE), init='uniform') self.sess_lstm = dy.GRUBuilder(LSTM_NUM_OF_LAYERS, STATE_SIZE + EMBEDDINGS_SIZE, STATE_SIZE, self.model) self.decoder_w = self.model.add_parameters((VOCAB_SIZE, STATE_SIZE), init='uniform') self.decoder_b = self.model.add_parameters((VOCAB_SIZE), init='uniform')
def __init__(self, LAYERS, INPUT_DIM, HIDDEN_DIM, ATTEN_SIZE, BATCH_SIZE, source, target, source_val, target_val, test, blind): self.s_vocab, self.s_id_lookup, self.s_data = self.get_vocab(source) self.t_vocab, self.t_id_lookup, self.t_data = self.get_vocab(target) self.s_vocab_size = len(self.s_vocab) self.t_vocab_size = len(self.t_vocab) # self.model = dy.Model() # self.l2r_builder = dy.LSTMBuilder(LAYERS, INPUT_DIM, HIDDEN_DIM, self.model) # self.r2l_builder = dy.LSTMBuilder(LAYERS, INPUT_DIM, HIDDEN_DIM, self.model) # self.dec_builder = dy.LSTMBuilder(LAYERS, INPUT_DIM+(HIDDEN_DIM*2), HIDDEN_DIM, self.model) # self.l2r_builder.set_dropout(dropout) # self.r2l_builder.set_dropout(dropout) # self.dec_builder.set_dropout(dropout) # self.params = {} # self.params["s_lookup"] = self.model.add_lookup_parameters((self.s_vocab_size, INPUT_DIM)) # self.params["t_lookup"] = self.model.add_lookup_parameters((self.t_vocab_size, INPUT_DIM)) # self.params["W_y"] = self.model.add_parameters((self.t_vocab_size, HIDDEN_DIM*3)) # self.params["b_y"] = self.model.add_parameters((self.t_vocab_size)) # self.params["W1_att"] = self.model.add_parameters((ATTEN_SIZE, 2*HIDDEN_DIM)) # self.params["W2_att"] = self.model.add_parameters((ATTEN_SIZE, LAYERS*HIDDEN_DIM*2)) # self.params["v_att"] = self.model.add_parameters((1, ATTEN_SIZE)) self.model = dy.Model() self.params = {} (self.l2r_builder, self.r2l_builder, self.dec_builder, self.params["s_lookup"], self.params["t_lookup"], self.params["W_y"], self.params["b_y"], self.params["W1_att"], self.params["W2_att"], self.params["v_att"]) = self.model.load("18.2164769676") self.l2r_builder.set_dropout(dropout) self.r2l_builder.set_dropout(dropout) self.dec_builder.set_dropout(dropout) self.HIDDEN_DIM = HIDDEN_DIM self.BATCH_SIZE = BATCH_SIZE self.s_val_data = self.get_data(source_val, self.s_vocab) self.t_val_data = self.get_data(target_val, self.t_vocab) self.test_data = self.get_data(test, self.s_vocab) self.blind_data = self.get_data(blind, self.s_vocab) self.max_perp = 1000000.0
def __init__(self, src_we, dst_we, input_encodings, output_encodings, config): self.config = config self.losses = [] self.model = dy.Model() self.trainer = dy.AdamTrainer(self.model) self.src_we = src_we self.dst_we = dst_we self.input_encodings = input_encodings self.output_encodings = output_encodings # encoder self.encoder_fw = [] self.encoder_bw = [] input_size = config.input_size for layer_size in self.config.encoder_layers: self.encoder_fw.append(orthonormal_VanillaLSTMBuilder(1, input_size, layer_size, self.model)) self.encoder_bw.append(orthonormal_VanillaLSTMBuilder(1, input_size, layer_size, self.model)) input_size = layer_size * 2 # decoder #self.decoder = [] #for layer_size in self.config.decoder_layers: self.decoder=orthonormal_VanillaLSTMBuilder(config.decoder_layers, input_size+self.config.input_size, config.decoder_size, self.model) input_size = config.decoder_size # output softmax self.output_softmax_w = self.model.add_parameters((len(self.output_encodings.word2int) + 1, input_size)) self.output_softmax_b = self.model.add_parameters((len(self.output_encodings.word2int) + 1)) self.EOS = len(self.output_encodings.word2int) # aux WE layer self.aux_layer_w = self.model.add_parameters( (self.config.aux_we_layer_size, self.config.decoder_size)) self.aux_layer_b = self.model.add_parameters((self.config.aux_we_layer_size)) # aux WE projection self.aux_layer_proj_w = self.model.add_parameters((self.dst_we.word_embeddings_size, self.config.aux_we_layer_size)) self.aux_layer_proj_b = self.model.add_parameters((self.dst_we.word_embeddings_size)) # input projection self.word_proj_w = self.model.add_parameters((self.config.input_size, self.src_we.word_embeddings_size)) self.word_proj_b = self.model.add_parameters((self.config.input_size)) self.hol_we_src = self.model.add_lookup_parameters((len(self.input_encodings.word2int), self.config.input_size)) self.hol_we_dst = self.model.add_lookup_parameters((len(self.output_encodings.word2int), self.config.input_size)) self.special_we = self.model.add_lookup_parameters((2, self.config.input_size)) # attention self.att_w1 = self.model.add_parameters( (self.config.encoder_layers[-1] * 2, self.config.encoder_layers[-1] * 2)) self.att_w2 = self.model.add_parameters((self.config.encoder_layers[-1] * 2, self.config.decoder_size)) self.att_v = self.model.add_parameters((1, self.config.encoder_layers[-1] * 2))
def build_model(vocab): model = dy.Model() embeddings_lookup = model.add_lookup_parameters((len(vocab), INPUT_DIM)) hidden_W = model.add_parameters((HIDDEN_DIM, HIDDEN_DIM)) hidden_bias = model.add_parameters(HIDDEN_DIM) MLP_W = model.add_parameters((OUTPUT_DIM, HIDDEN_DIM)) MLP_bias = model.add_parameters(OUTPUT_DIM) encoder_lstm = dy.LSTMBuilder(layers=1, hidden_dim=HIDDEN_DIM, input_dim=INPUT_DIM, model=model) return model, embeddings_lookup, hidden_W, hidden_bias, MLP_W, MLP_bias, encoder_lstm