예제 #1
0
gru3 = GRU(dim_proj, dim_proj, return_sequences=False, seed=seed)
encoder = [emb1, gru1, gru2, gru3]

# define the decoder architecture
emb2 = Embedding(pt_v_size, dim_proj, seed=seed)
gru4 = GRU(dim_proj, dim_proj, seed=seed)
gru5 = GRU(dim_proj, dim_proj, seed=seed)
gru6 = GRU(dim_proj, dim_proj, seed=seed)
decoder = [emb2, gru4, gru5, gru6]

softmax = Softmax(dim_proj, pt_v_size)

# ensemble the sequence-to-sequence model
seq = SequenceToSequence(encoder=encoder,
                         decoder=decoder,
                         output=softmax,
                         source_v_size=en_v_size,
                         target_v_size=pt_v_size)

# set optimizer
optimizer = Adadelta()

# set up the model
seq.setup(batch_size=batch_size, optimizer=optimizer)

time2 = time.time()
print 'Initialization took %3.5f seconds. \n' % (time2 - time1)

model_file = '/home/gian/%s_%shid_prj%s_en%s_pt%s_%s_batch%s.hp5y' % \
             (gru1.__class__.__name__, len(encoder), dim_proj,
              en_v_size, pt_v_size, optimizer.__class__.__name__,
예제 #2
0
# define the encoder architecture
emb1 = Embedding(en_v_size, dim_proj, seed=seed)
lstm1 = LSTM(dim_proj, dim_proj, seed=seed)
lstm2 = LSTM(dim_proj, dim_proj, return_sequences=False, seed=seed)
encoder = [emb1, lstm1, lstm2]

# define the decoder architecture
emb2 = Embedding(pt_v_size, dim_proj, seed=seed)
lstm3 = LSTM(dim_proj, dim_proj, seed=seed)
lstm4 = LSTM(dim_proj, pt_v_size, seed=seed)
decoder = [emb2, lstm3, lstm4]

# ensemble the sequence-to-sequence model
seq = SequenceToSequence(encoder=encoder,
                         decoder=decoder,
                         source_v_size=en_v_size,
                         target_v_size=pt_v_size,
                         auto_setup=False)  # set auto_setup to false to avoid initialization
                         # (weights will be overwritten anyway)

# load source and target language dictionaries
sr_dict = load_dictionary('/home/gian/datasets/dict.sort.en', max_words=en_v_size)
tr_dict = load_dictionary('/home/gian/datasets/dict.sort.pt', max_words=pt_v_size)

# load the corpora and convert its words to their indexes (corpora must be already tokenized)
sequences1 = load_and_convert_corpora('/home/gian/datasets/fapesp/fapesp-v2.tok.test-a.en', sr_dict)
sequences2 = load_and_convert_corpora('/home/gian/datasets/fapesp/fapesp-v2.tok.test-a.pt', tr_dict)

# prepare the data (add padding values to the end of each sequence so they have the same size)

seq.load_weights('/home/gian/seq_to_seq.hp5y')