def main(_): data = reader.Data().load_raw_data(reader.ptb_raw_dir(), add_beg_token='</s>', add_end_token='</s>', add_unknwon_token='<unk>') # create config config = create_config(data) # create log dir logdir = 'trf_nce/' + create_name(config) # prepare the log dir wb.prepare_log_dir(logdir, 'trf.log') config.print() data.write_vocab(logdir + '/vocab.txt') data.write_data(data.datas[1], logdir + '/valid.id') data.write_data(data.datas[2], logdir + '/test.id') if config.net_config.load_embedding_path is not None: # get embedding vectors data.word2vec(config.net_config.load_embedding_path, config.net_config.embedding_dim, cnum=0) # create TRF m = trf.TRF(config, data, logdir=logdir, device='/gpu:0') sv = tf.train.Supervisor(logdir=os.path.join(logdir, 'logs'), global_step=m.global_step) sv.summary_writer.add_graph(tf.get_default_graph()) # write the graph to logs session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.allow_growth = True with sv.managed_session(config=session_config) as session: with session.as_default(): # train model m.train(operation=Operation(m))
def main(): data = reader.Data().load_raw_data(file_list=[train_text, dev_text, test_text], add_beg_token='</s>', add_end_token='</s>', add_unknwon_token='<unk>') config = get_config(data) logdir = wb.mklogdir(out_dir + '/' + str(config), is_recreate=True, force=True) config.print() # config.word_config.load_embedding_path = os.path.join(logdir, 'word_emb.txt') # if config.word_config.load_embedding_path is not None: # # get embedding vectors # data.create_data().word2vec(config.word_config.load_embedding_path, config.word_config.embedding_dim, cnum=0) data.write_vocab(os.path.join(logdir, 'vocab.chr')) data.write_data(data.datas[0], os.path.join(logdir, 'train.id')) data.write_data(data.datas[1], os.path.join(logdir, 'valid.id')) m = trf.TRF(config, data, logdir, device='/gpu:1') print('sampler' in m.__dict__) sv = tf.train.Supervisor(logdir=os.path.join(logdir, 'logs')) sv.summary_writer.add_graph(tf.get_default_graph()) # write the graph to logs session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.allow_growth = True with sv.managed_session(config=session_config) as session: with session.as_default(): m.train(0.1, Ops(m))
def main(): with open('../data.info') as f: data_info = json.load(f) train_files = 100 data = reader.LargeData().dynamicly_load_raw_data( sorted_vocab_file=None, train_list=data_info['train_all'][0:train_files], valid_file=data_info['valid'], test_file=data_info['test'], max_length=60, add_beg_token='<s>', add_end_token='</s>', add_unknwon_token='<unk>', vocab_max_size=None, vocab_cutoff=3) config = get_config(data) logdir = wb.mklogdir('trf_t%d_nce/' % train_files + str(config), is_recreate=True) config.print() # config.word_config.load_embedding_path = os.path.join(logdir, 'word_emb.txt') # if config.word_config.load_embedding_path is not None: # # get embedding vectors # data.create_data().word2vec(config.word_config.load_embedding_path, config.word_config.embedding_dim, cnum=0) data.write_vocab(os.path.join(logdir, 'vocab.chr')) data.write_data(data.datas[0], os.path.join(logdir, 'train.id')) data.write_data(data.datas[1], os.path.join(logdir, 'valid.id')) m = trf.TRF(config, data, logdir, device=['/gpu:0', '/gpu:1']) print('sampler' in m.__dict__) sv = tf.train.Supervisor(logdir=os.path.join(logdir, 'logs')) sv.summary_writer.add_graph( tf.get_default_graph()) # write the graph to logs session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.allow_growth = True with sv.managed_session(config=session_config) as session: with session.as_default(): m.train(0.1, Ops(m))