def main(): if len(sys.argv) < 3: print 'Need input/output filenames as first/second arguments. Exit.' exit(-1) if os.path.exists(sys.argv[2]): print 'Output file exists. Exit.' exit(-1) print '\nChecking input file consistency...' loaders = dataLoader.get_data(sys.argv[1]) checks = checker.run_check_data(loaders, True, True, False) for data in loaders: typ = data.meas_type if 1 in data.flavs: print 'FLAV_C already present in input file for %s. Exit.' % typ exit(-1) if not any(0 in data.flavs for data in loaders): print 'FLAV_B not found in input file. Exit.' exit(-1) print '\nGenerating new csv content...' new_csv_data = list( itertools.chain.from_iterable(l for d in loaders for l in generate_flav_c(d))) with open(sys.argv[1]) as f: old_csv_data = f.readlines() with open(sys.argv[2], 'w') as f: f.writelines(old_csv_data) f.write('\n') f.writelines(new_csv_data) print 'Done.'
def gen(**kwargs): """ 提供命令借口,用以生成相应的诗 """ for k, v in kwargs.items(): setattr(opt, k, v) # 加载数据和模型 data, word2ix, ix2word = get_data(opt) model = PoetryModel(len(word2ix), 128, 256) map_location = lambda s, l: s state_dict = t.load(opt.model_path, map_location=map_location) model.load_state_dict(state_dict) if opt.use_gpu: model.cuda() # python2和python3 字符串兼容 if sys.version_info.major == 3: if opt.start_words.isprintable(): start_words = opt.start_words prefix_words = opt.prefix_words if opt.prefix_words else None else: start_words = opt.start_words.encode( 'ascii', 'surrogateescape').decode('utf8') prefix_words = opt.prefix_words.encode( 'ascii', 'surrogateescape').decode('utf8') if opt.prefix_words else None else: start_words = opt.start_words.decode('utf8') prefix_words = opt.prefix_words.decode( 'utf8') if opt.prefix_words else None # 编码问题,半角改成全角,古诗中都是全角符号 start_words = start_words.replace(',', u',')\ .replace('.', u'。')\ .replace('?', u'?') # 判断是藏头诗还是普通诗 gen_poetry = gen_acrostic if opt.acrostic else generate result = gen_poetry(model, start_words, ix2word, word2ix, prefix_words) print(''.join(result))
def main(): if len(sys.argv) < 3: print 'Need input/output filenames as first/second arguments. Exit.' exit(-1) if os.path.exists(sys.argv[2]): print 'Output file exists. Exit.' exit(-1) print '\nChecking input file consistency...' loaders = dataLoader.get_data(sys.argv[1]) checks = checker.run_check_data(loaders, True, True, False) for res, data in itertools.izip(checks, loaders): typ = data.meas_type if not res: print 'Checks on input file failed for %s. Exit.' % typ exit(-1) if not 0 in data.flavs: print 'FLAV_B not found in input file for %s. Exit.' % typ exit(-1) if 1 in data.flavs: print 'FLAV_C already present in input file for %s. Exit.' % typ exit(-1) print '\nGenerating new csv content...' new_csv_data = list(itertools.chain.from_iterable( l for d in loaders for l in generate_flav_c(d) )) with open(sys.argv[1]) as f: old_csv_data = f.readlines() with open(sys.argv[2], 'w') as f: f.writelines(old_csv_data) f.write('\n') f.writelines(new_csv_data) print 'Done.'
def run_check(filename, op=True, sys=True, flavor=True): loaders = dataLoader.get_data(filename) return run_check_data(loaders, op, sys, flavor)
def train(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) vis = Visualizer(env=opt.env) # 获取数据 data, word2ix, ix2word = get_data(opt) data = t.from_numpy(data) dataloader = t.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=True, num_workers=1) # 模型定义 model = PoetryModel(len(word2ix), 128, 256) optimizer = t.optim.Adam(model.parameters(), lr=opt.lr) criterion = nn.CrossEntropyLoss() if opt.model_path: model.load_state_dict(t.load(opt.model_path)) if opt.use_gpu: model.cuda() loss_meter = meter.AverageValueMeter() for epoch in range(opt.epoch): loss_meter.reset() for ii, data_ in tqdm(enumerate(dataloader)): # 训练 data_ = data_.long().transpose(1, 0).contiguous() if opt.use_gpu: data_ = data_.cuda() optimizer.zero_grad() # 输入和目标错开 input_, target = Variable(data_[:-1, :]), Variable( data_[1:, :]) # target = target.cuda() output, _ = model(input_) loss = criterion(output, target.view(-1)) loss.backward() optimizer.step() loss_meter.add(loss.data[0]) # 可视化 if (ii + 1) % opt.plot_every == 0: # if os.path.exists(opt.debug_file): # ipdb.set_trace() vis.plot('loss', loss_meter.value()[0]) # 诗歌原文 poetrys = [[ix2word[_word] for _word in data_[:, _iii]] for _iii in range(data_.size(1))][:16] vis.text('<br>'.join( [''.join(poetry) for poetry in poetrys]), win='origin_poem') gen_poetries = [] # 分别以这几个字作为诗歌的第一个字,生成8首诗 for word in list('春江花月夜凉如水'): gen_poetry = ''.join( generate(model, word, ix2word, word2ix)) gen_poetries.append(gen_poetry) vis.text('</br>'.join( [''.join(poetry) for poetry in gen_poetries]), win='gen_poem') t.save(model.state_dict(), '%s_%s.path' % (opt.model_prefix, epoch))