def demo_resort(): path_data = 'data/test_text.txt' path_source = 'result/test_text.json' path_target = 'result/test_text-resort.json' from Config import config_predict import json config = config_predict() with open(path_data, 'r') as f: Data = f.read().strip().split('\n') with open(path_source, 'r') as f: S = json.load(f) D = [] for i in range(len(Data)): prefix = Data[i] outputs = S[i]['result'] s1 = [s[:-3] for s in outputs if '(文)' in s] s2 = [s[:-5] for s in outputs if '(大白狗)' in s] if len(s1) == 0: r1 = [] else: r1 = resort(prefix, s1, config) if len(s2) == 0: r2 = [] else: r2 = resort(prefix, s2, config) R = [r + '(文)' for r in r1] R += [r + '(大白狗)' for r in r2] d = {'input': prefix, 'outputs': R} D.append(d) with open(path_target, 'w') as f: json.dump(D, f, ensure_ascii=False, indent=4)
def main(path_source, path_config, idx0, idx1, gpu, nsamples): model, tokenizer, config, device = getModel(path_config=path_config, gpu=gpu) from Config import config_predict configpredict = config_predict() with open(path_source, 'r') as f: data = json.load(f) S0 = data[idx0:idx1] batch_size=100 i0 = 0 i1 = i0+batch_size while i0<len(S0): print('##########################%d-%d################'%(idx0+i0,idx0+min(i1,len(S0)))) S = S0[i0:i1] fun(S, path_config, gpu, nsamples,model, tokenizer, config, device,configpredict) i0 = i1 i1 = i1+batch_size
import gpt_gen_thread import sys import time import logging import torch from Config import config_predict from datetime import datetime import GPUtil #from gevent.pywsgi import WSGIServer #关键这个 app = Flask(__name__) app.logger.setLevel(logging.INFO) port = 5000 style = 0#0大白狗, 1散文 if len(sys.argv)>1: port = int(sys.argv[1]) ConfigPredict = config_predict() batchGenerating=ConfigPredict.batchGenerating path_configs = ConfigPredict.model_configs num0 = ConfigPredict.predict_nums tags = ConfigPredict.tags rmHFW = ConfigPredict.rmHFW maxNext = ConfigPredict.maxNext_JLX path_next = ConfigPredict.path_JLX_next path_simi = ConfigPredict.path_JLX_simi model,tokenizer,config,device,GPUs = [],[],[],[],[] ModelIndex = [] for ii in range(len(path_configs)): M0,T0,C0,D0 = [],[],[],[] gpus = ConfigPredict.gpus[ii].split(',') idx = path_configs[ii].index('config_')+len('config_') key = path_configs[ii][idx:-5]
num=num0[ii], removeHighFreqWords=rmHFW[ii], batchGenerating=batchGenerating) r0 = [rr + tags[ii] for rr in r0] result.extend(r0) d = {'input': data, 'outputs': result, 'num': len(result)} D.append(d) with open(path_target, 'w') as f: json.dump(D, f, ensure_ascii=False, indent=4) t1 = time.time() print('predict time is {} for parameter topk={}'.format(t1 - t0, topk)) if __name__ == '__main__': mode, path_config, data, path_target = sys.argv[1:5] if len(sys.argv) > 6: topk = int(sys.argv[5]) temp = float(sys.argv[6]) else: topk = 8 temp = 1.0 ConfigPredict = config_predict(path_config) batchGenerating = ConfigPredict.batchGenerating path_configs = ConfigPredict.model_configs num0 = ConfigPredict.predict_nums tags = ConfigPredict.tags rmHFW = ConfigPredict.rmHFW maxNext = ConfigPredict.maxNext_JLX path_next = ConfigPredict.path_JLX_next path_simi = ConfigPredict.path_JLX_simi main(data, mode, path_config, path_target, topk, temp)
import gpt_gen import gpt_gen_thread import sys from datetime import datetime import time import logging from Config import config_predict path_source = sys.argv[1] path_target = sys.argv[2] if len(sys.argv) > 3: path_config = sys.argv[3].split(',') doPredict = [int(t) for t in sys.argv[4].split(',')] gpus = sys.argv[5].split(',') ConfigPredict = config_predict(model_config=path_config, doPredict=doPredict, gpus=gpus) print('use input configs:%s' % '\n'.join(path_config)) else: print('use default configs') ConfigPredict = config_predict() with open(path_source, 'r') as f: Data = f.read().strip().split('\n') batchGenerating = ConfigPredict.batchGenerating path_configs = ConfigPredict.model_configs num0 = ConfigPredict.predict_nums tags = ConfigPredict.tags rmHFW = ConfigPredict.rmHFW maxNext = ConfigPredict.maxNext_JLX path_next = ConfigPredict.path_JLX_next
import torch import numpy as np import gpt_gen import gpt_gen_thread import sys from datetime import datetime import time import logging from Config import config_predict path_source = sys.argv[1] path_target = sys.argv[2] if len(sys.argv) > 3: path_config = sys.argv[3].split(',') doPredict = [int(t) for t in sys.argv[4].split(',')] ConfigPredict = config_predict(model_config=path_config, doPredict=doPredict) print('use input configs:%s' % '\n'.join(path_config)) else: print('use default configs') ConfigPredict = config_predict() with open(path_source, 'r') as f: Data = f.read().strip().split('\n') batchGenerating = ConfigPredict.batchGenerating path_configs = ConfigPredict.model_configs num0 = ConfigPredict.predict_nums tags = ConfigPredict.tags rmHFW = ConfigPredict.rmHFW maxNext = ConfigPredict.maxNext_JLX path_next = ConfigPredict.path_JLX_next path_simi = ConfigPredict.path_JLX_simi