Beispiel #1
0
def fun(S, path_config, gpu, nsamples):
    model, tokenizer, config, device = getModel(path_config=path_config,
                                                gpu=gpu)
    path_target = 'data/new.txt'
    #if not os.path.exists(path_target):
    #os.mkdir(path_target)
    N = 0
    n = 0
    for i in range(len(S)):
        if i % 10 == 0:
            print(
                "proceed {} poem (total {}), get {} prefix and generate {} poems"
                .format(i, len(S), n, N))
        sents = peomSplit(S[i])
        for s in sents:
            n += 1
            if '□' in s:
                continue
            r = generating(s, model, config, tokenizer, device, nsamples, gpu)
            N += len(r)
            if len(r) == 0:
                continue
            r = [s + '\t' + rr[len(s):] for rr in r]
            with open(path_target, 'a+') as f:
                f.write('\n'.join(r) + '\n')
            f.close()
Beispiel #2
0
def main(path_data, mode, path_config, path_target, topk, temp):
    ii = int(mode)
    model, tokenizer, config, device = gpt_gen.getModel(
        path_config=path_config)
    config['topk'] = topk
    config['temperature'] = temp
    with open(path_data, 'r') as f:
        s = f.read().strip().split('\n')
    D = []
    t0 = time.time()
    for data in s:
        result = []
        for _ in range(1):
            if ii == 1:
                r0 = gpt_gen.generating_poem(app,
                                             data,
                                             model,
                                             config,
                                             tokenizer,
                                             device,
                                             quick,
                                             num0[ii],
                                             batchGenerating=batchGenerating)
            else:
                r0 = gpt_gen.generating(app,
                                        data,
                                        model,
                                        config,
                                        tokenizer,
                                        device,
                                        ConfigPredict,
                                        quick=quick,
                                        num=num0[ii],
                                        removeHighFreqWords=rmHFW[ii],
                                        batchGenerating=batchGenerating)
            r0 = [rr + tags[ii] for rr in r0]
            result.extend(r0)
        d = {'input': data, 'outputs': result, 'num': len(result)}
        D.append(d)
        with open(path_target, 'w') as f:
            json.dump(D, f, ensure_ascii=False, indent=4)
    t1 = time.time()
    print('predict time is {} for parameter topk={}'.format(t1 - t0, topk))
Beispiel #3
0
path_configs = ConfigPredict.model_configs
num0 = ConfigPredict.predict_nums
tags = ConfigPredict.tags
rmHFW = ConfigPredict.rmHFW
maxNext = ConfigPredict.maxNext_JLX
path_next = ConfigPredict.path_JLX_next
path_simi = ConfigPredict.path_JLX_simi
model,tokenizer,config,device,GPUs = [],[],[],[],[]
ModelIndex = []
for ii in range(len(path_configs)):
    M0,T0,C0,D0 = [],[],[],[]
    gpus = ConfigPredict.gpus[ii].split(',')
    idx = path_configs[ii].index('config_')+len('config_')
    key = path_configs[ii][idx:-5]
    for gpu in gpus:
        m0,t0,c0,d0 = gpt_gen.getModel(path_config=path_configs[ii],gpu=gpu)
        c0['repetition_penalty'] = ConfigPredict.repetition_penalty[ii]
        c0['temperature'] = ConfigPredict.temperature[ii]
        c0['length'] = ConfigPredict.length[ii]
        M0.append(m0)
        T0.append(t0)
        C0.append(c0)
        D0.append(d0)
    model.append(M0)
    tokenizer.append(T0)
    config.append(C0)
    device.append(D0)
    ModelIndex.append([kk for kk in range(len(gpus))])
    GPUs.append(gpus)
D_simi = json.load(open(path_simi,'r',encoding='utf-8'))
D_next = json.load(open(path_next,'r',encoding='utf-8'))
Beispiel #4
0
num0 = [c.predict_nums for c in ConfigPredict]
tags = [c.tags for c in ConfigPredict]
rmHFW = [c.rmHFW for c in ConfigPredict]
'''
maxNext = ConfigPredict.maxNext_JLX
path_next = ConfigPredict.path_JLX_next
path_simi = ConfigPredict.path_JLX_simi
D_simi = json.load(open(path_simi,'r',encoding='utf-8'))
D_next = json.load(open(path_next,'r',encoding='utf-8'))
D_simi = {k:json.loads(D_simi[k]) for k in D_simi}
D_next = {k:json.loads(D_next[k]) for k in D_next}
'''
D_simi,D_next,maxNext=[],[],[]
model,tokenizer,config,device = [], [], [], []
for ii in range(len(path_configs)):
    m0,t0,c0,d0 = gpt_gen.getModel(path_config=path_configs[ii],gpu=ConfigPredict[ii].gpus)
    c0['repetition_penalty'] = ConfigPredict[ii].repetition_penalty
    c0['temperature'] = ConfigPredict[ii].temperature
    c0['length'] = ConfigPredict[ii].length
    model.append(m0)
    tokenizer.append(t0)
    config.append(c0)
    device.append(d0)
@app.route('/api/gen', methods=['POST'])
def test2():
    r = request.json
    #print(type(r))
    #print(request.json)
    #r = '{"input": "们"}'
    #r = json.loads(r)
    data = r["input"]
Beispiel #5
0
port = 7000
from Config_gou import config_predict

gpus = '0,1,2,3'
style = 'gou'
ConfigPredict = config_predict(gpus=gpus)
batchGenerating = ConfigPredict.batchGenerating
path_configs = ConfigPredict.model_configs
num0 = ConfigPredict.predict_nums
tags = ConfigPredict.tags
rmHFW = ConfigPredict.rmHFW
gpus = ConfigPredict.gpus.split(',')
Model = []
for gpu in gpus:
    model, tokenizer, config, device = gpt_gen.getModel(
        path_config=path_configs, gpu=gpu)
    Model.append((model, gpu))


@app.route('/', methods=['POST'])
def test1():
    r = request.json
    data = r["input"]
    quick = False
    if "quick" in r:
        print("quick pattern")
        if r["quick"] == "True":
            quick = True
    try:
        random.shuffle(Model)
        model, gpu = Model[0][0], Model[0][1]