Beispiel #1
0
def gendescr_pathast_2(model, data, comstok, comlen, batchsize, config, w=1):
    tdats, sdats, coms, wsmlpaths = zip(*data.values())
    tdats = np.array(tdats)
    coms = np.array(coms)
    sdats = np.array(sdats)
    wsmlpaths = np.array(wsmlpaths)

    for i in range(1, comlen):
        if (config['use_sdats']):
            results = model.predict([tdats, sdats, coms, wsmlpaths],
                                    batch_size=batchsize)
        else:
            results = model.predict([tdats, coms, wsmlpaths],
                                    batch_size=batchsize)
        for c, s in enumerate(results):
            for k in range(w):
                ps = np.argmax(s)
                s[ps] = 0
                new_ps = np.argmax(s)
                coms[c][i] = new_ps

    final_data = {}
    for fid, com in zip(data.keys(), coms):
        final_data[fid] = seq2sent(com, comstok)

    return final_data
Beispiel #2
0
def gendescr_pathast_threed(model, data, comstok, comlen, batchsize, config,
                            strat, bw):
    if strat == 'beam':
        return gendescr_pathast_beam(model, data, comstok, comlen, batchsize,
                                     config, bw)

    tdats, sdats, coms, wsmlpaths = zip(*data.values())
    tdats = np.array(tdats)
    coms = np.array(coms)
    sdats = np.array(sdats)
    wsmlpaths = np.array(wsmlpaths)

    for i in range(1, comlen):
        if (config['use_sdats']):
            results = model.predict([tdats, sdats, coms, wsmlpaths],
                                    batch_size=batchsize)
        else:
            results = model.predict([tdats, coms, wsmlpaths],
                                    batch_size=batchsize)
        for c, s in enumerate(results):
            coms[c][i] = np.argmax(s)

    final_data = {}
    for fid, com in zip(data.keys(), coms):
        final_data[fid] = seq2sent(com, comstok)

    return final_data
Beispiel #3
0
def predictorLime(codeList):
    print(len(codeList))
    retList = []
    inpts = np.zeros((len(codeList), 100))
    coms = np.zeros((len(codeList), comlen))
    coms[:, 0] = st

    for i, c in enumerate(codeList):
        for j, w in enumerate(c.split(' ')):
            if j >= 100:
                break
            if w not in datstok.w2i.keys():
                inpts[i][j] = 0
            else:
                inpts[i][j] = datstok.w2i[w]
    l = 0
    r = batchSize
    while l < len(codeList):
        tmpInpts = inpts[l:r]
        tmpComs = coms[l:r]
        for i in range(1, comlen):
            results = model.predict([tmpInpts, tmpComs], batch_size=r - l)
            for c, s in enumerate(results):
                coms[l + c][i] = np.argmax(s)
        r += batchSize
        l += batchSize
        if r >= len(codeList):
            r = len(codeList) - 1

    for c in coms:
        if key in seq2sent(c, comstok).split(' '):
            retList.append([0, 1])
        else:
            retList.append([1, 0])
    return np.array(retList)
Beispiel #4
0
def gendescr_4inp(model,
                  data,
                  comstok,
                  comlen,
                  batchsize,
                  config,
                  strat='greedy'):
    # right now, only greedy search is supported...

    tdats, sdats, coms, smls = zip(*data.values())
    tdats = np.array(tdats)
    sdats = np.array(sdats)
    coms = np.array(coms)
    smls = np.array(smls)

    #print(sdats)

    for i in range(1, comlen):
        results = model.predict([tdats, sdats, coms, smls],
                                batch_size=batchsize)
        for c, s in enumerate(results):
            coms[c][i] = np.argmax(s)

    final_data = {}
    for fid, com in zip(data.keys(), coms):
        final_data[fid] = seq2sent(com, comstok)

    return final_data
Beispiel #5
0
def gendescr_pathast(model, data, comstok, comlen, batchsize, config, refcoms, count, strat='greedy'):
    # right now, only greedy search is supported...

    tdats, sdats, coms, wsmlpaths = zip(*data.values())
    tdats = np.array(tdats)
    coms = np.array(coms)
    sdats = np.array(sdats)
    wsmlpaths = np.array(wsmlpaths)

    print('refcoms len:', len(refcoms))
    print('coms len:', coms.shape)
    #coms[:,1] = np.array(list(refcoms.values())[count*batchsize:((count+1)*batchsize)])[:,1]
    coms[:,1] = np.array(list(refcoms.values()))[:,1]
    for i in range(2, comlen):
        if(config['use_sdats']):
            results = model.predict([tdats, sdats, coms, wsmlpaths], batch_size=batchsize)
        else:
            results = model.predict([tdats, coms, wsmlpaths], batch_size=batchsize)
        for c, s in enumerate(results):
            coms[c][i] = np.argmax(s)

    final_data = {}
    for fid, com in zip(data.keys(), coms):
        final_data[fid] = seq2sent(com, comstok)

    return final_data
Beispiel #6
0
def gendescr_2inp(model, data, comstok, comlen, batchsize, config, strat,
                  beamwidth, outfile, stopword):
    # right now, only greedy search is supported...
    fid = [*data.keys()]

    tdats, coms = list(zip(*data.values()))
    tdats = np.array(tdats)
    coms = np.array(coms)

    for i in range(1, stopword):
        results = model.predict([tdats, coms], batch_size=batchsize)

        for c, s in enumerate(results):
            coms[c][i] = np.argmax(s)

    act1 = get_activations(model, [tdats, coms], layer_name='activation_1')
    act1_softmax_val_path = '/nfs/projects/attn-to-fc/data/outdir/viz/{}-{}-act1-stopword-{}.txt'.format(
        fid[0],
        outfile.split('.')[0], stopword)
    act1_prob_file = open(act1_softmax_val_path, 'w')
    for j in act1:
        display_activations(j, 'tdats_activation', fid, act1_prob_file,
                            outfile, stopword)
    act1_prob_file.close()

    final_data = {}
    for fid, com in zip(data.keys(), coms):
        final_data[fid] = seq2sent(com, comstok)

    return final_data
Beispiel #7
0
def gendescr_astattendfcpluspc(model, modelpc, data, data1, comstok, comlen, batchsize, strat='greedy'):
    # right now, only greedy search is supported...

    tdats, pdats,smls,coms, = list(zip(*data.values()))
    _,sdats,_,_ = list(zip(*data1.values()))
    tdats = np.array(tdats)
    coms = np.array(coms)
    smls = np.array(smls)
    sdats = np.array(sdats)
    pdats = np.array(pdats)

    #dats = np.zeros_like(dats)

    for i in range(1, comlen):
        results = model.predict([tdats,sdats, coms, smls], batch_size=batchsize)
        pcresults = modelpc.predict([tdats, pdats, smls, coms], batch_size=batchsize)

        for c, (t, a) in enumerate(zip(results, pcresults)):
            m = np.argmax(np.mean([t, a], axis=0))
            coms[c][i] = m

    final_data = {}
    for fid, com in zip(data.keys(), coms):
        final_data[fid] = seq2sent(com, comstok)

    return final_data
Beispiel #8
0
def gendescr_code2seqplusfc(model, modelpc, data, comstok, comlen, batchsize, strat='greedy'):
    # right now, only greedy search is supported...

    tdats,sdats, coms, pathast = zip(*data.values())
    tdats = np.array(tdats)
    coms = np.array(coms)
    sdats = np.array(sdats)
    pathast = np.array(pathast)

    #dats = np.zeros_like(dats)

    for i in range(1, comlen):
        results = model.predict([tdats, coms, pathast], batch_size=batchsize)
        pcresults = modelpc.predict([tdats, sdats,  coms, pathast], batch_size=batchsize)

        for c, (t, a) in enumerate(zip(results, pcresults)):
            #tm = t[np.argmax(t)]
            #am = a[np.argmax(a)]
            #m = np.argmax(t)
            #if(am > tm):
            #    m = np.argmax(a)
            m = np.argmax(np.mean([t, a], axis=0))
            coms[c][i] = m

    final_data = {}
    for fid, com in zip(data.keys(), coms):
        final_data[fid] = seq2sent(com, comstok)
    return final_data
Beispiel #9
0
def gendescr_pathast_beam(model, data, comstok, comlen, batchsize, config, w):
    tdats, sdats, coms, wsmlpaths = zip(*data.values())
    tdats = np.array(tdats)
    coms = np.array(coms)
    sdats = np.array(sdats)
    wsmlpaths = np.array(wsmlpaths)
    bms = coms.shape[0]
    beamcoms = np.tile(coms, [w, 1])
    beamcoms = np.reshape(beamcoms, [w, bms, comlen])
    beamprobs = np.zeros((w, bms))
    psmat = np.zeros((bms, w * w, comlen))
    prmat = np.zeros((bms, w * w))

    if (config['use_sdats']):
        results = model.predict([tdats, sdats, coms, wsmlpaths],
                                batch_size=batchsize)
    else:
        results = model.predict([tdats, coms, wsmlpaths], batch_size=batchsize)
    for c, s in enumerate(results):
        for j in range(w):
            ps = np.argmax(s)
            pr = np.max(s)
            pr = -np.log(pr)
            s[ps] = 0
            beamprobs[j][c] = pr
            beamcoms[j][c][1] = ps

    for i in range(2, comlen):
        for j in range(w):
            if (config['use_sdats']):
                results = model.predict([tdats, sdats, beamcoms[j], wsmlpaths],
                                        batch_size=batchsize)
            else:
                results = model.predict([tdats, beamcoms[j], wsmlpaths],
                                        batch_size=batchsize)
            for c, s in enumerate(results):
                for k in range(w):
                    ps = np.argmax(s)
                    pr = np.max(s)
                    pr = -np.log(pr)
                    s[ps] = 0
                    prmat[c][(j * w) + k] = beamprobs[j][c] + pr
                    psmat[c][(j * w) + k] = beamcoms[j][c]
                    psmat[c][(j * w) + k][i] = ps
            for c, s in enumerate(prmat):
                for j in range(w):
                    ps = np.argmin(s)
                    pr = np.min(s)
                    s[ps] = np.inf
                    beamprobs[j][c] = pr
                    beamcoms[j][c] = psmat[c][ps]

    coms = beamcoms[0]
    final_data = {}
    for fid, com in zip(data.keys(), coms):
        final_data[fid] = seq2sent(com, comstok)

    return final_data
Beispiel #10
0
def gendescr_2inp(model, data, comstok, comlen, batchsize, config, refcoms, count, strat='greedy'):
    # right now, only greedy search is supported...
    
    tdats, coms = list(zip(*data.values()))
    tdats = np.array(tdats)
    coms = np.array(coms)
    
    coms[:,1] = np.array(list(refcoms.values())[count*batchsize:((count+1)*batchsize)])[:,1]
    for i in range(2, comlen):
        results = model.predict([tdats, coms], batch_size=batchsize)
        for c, s in enumerate(results):
            coms[c][i] = np.argmax(s)

    final_data = {}
    for fid, com in zip(data.keys(), coms):
        final_data[fid] = seq2sent(com, comstok)

    return final_data
Beispiel #11
0
def gendescr_projcon(model, data, comstok, comlen, batchsize, config, strat,
                     bw):

    tdats, pdats, coms = zip(*data.values())
    tdats = np.array(tdats)
    coms = np.array(coms)
    pdats = np.array(pdats)

    for i in range(1, comlen):
        results = model.predict([tdats, pdats, coms], batch_size=batchsize)
        for c, s in enumerate(results):
            coms[c][i] = np.argmax(s)

    final_data = {}
    for fid, com in zip(data.keys(), coms):
        final_data[fid] = seq2sent(com, comstok)

    return final_data
Beispiel #12
0
def gendescr_3inp(model, data, comstok, comlen, batchsize, config, strat, bw):
    if strat == 'beam':
        return gendescr_3inp_beam(model, data, comstok, comlen, batchsize,
                                  config, bw)

    tdats, coms, smls = list(zip(*data.values()))
    tdats = np.array(tdats)
    coms = np.array(coms)
    smls = np.array(smls)

    for i in range(1, comlen):
        results = model.predict([tdats, coms, smls], batch_size=batchsize)
        for c, s in enumerate(results):
            coms[c][i] = np.argmax(s)

    final_data = {}
    for fid, com in zip(data.keys(), coms):
        final_data[fid] = seq2sent(com, comstok)

    return final_data
Beispiel #13
0
def gendescr_3inp(model, data, anstok, anslen, batchsize, config, strat,
                  beamwidth, outfile, stopword):
    # right now, only greedy search is supported...
    fid = [*data.keys()]

    context, ans, ques = list(zip(*data.values()))
    context = np.array(context)
    ans = np.array(ans)
    ques = np.array(ques)

    for i in range(1, stopword):
        results = model.predict([context, ans, ques], batch_size=batchsize)
        for c, s in enumerate(results):
            ans[c][i] = np.argmax(s)

    act1 = get_activations(model, [context, ans, ques],
                           layer_name='activation_1')
    act2 = get_activations(model, [context, ans, ques],
                           layer_name='activation_2')
    act1_softmax_val_path = '../qadatasetKstudy/outdir/viz/{}-{}-act1-stopword-{}.txt'.format(
        fid[0],
        outfile.split('.')[0], stopword)
    act2_softmax_val_path = '../qadatasetKstudy/outdir/viz/{}-{}-act2-stopword-{}.txt'.format(
        fid[0],
        outfile.split('.')[0], stopword)
    act1_prob_file = open(act1_softmax_val_path, 'w')
    act2_prob_file = open(act2_softmax_val_path, 'w')
    for j in act1:
        display_activations(j, 'context_activation', fid, act1_prob_file,
                            outfile, stopword)
    for j in act2:
        display_activations(j, 'context2_activation', fid, act2_prob_file,
                            outfile, stopword)
    act1_prob_file.close()
    act2_prob_file.close()

    final_data = {}
    for fid, an in zip(data.keys(), ans):
        final_data[fid] = seq2sent(an, anstok)
Beispiel #14
0
def translate(code, sbt=False, sml=None):
    words = code.split(' ')
    inpt = [np.zeros(100)]
    for i, w in enumerate(words):
        if i >= 100:
            break
        if w not in datstok.w2i.keys():
            inpt[0][i] = 0
        else:
            inpt[0][i] = datstok.w2i[w]
    coms = np.zeros(comlen)
    coms[0] = st
    coms = [coms]

    for i in range(1, comlen):
        if not sbt:
            results = model.predict([inpt, coms], batch_size=1)
        else:
            results = model.predict([inpt, coms, sml], batch_size=1)
        for c, s in enumerate(results):
            coms[c][i] = np.argmax(s)
    return seq2sent(coms[0], comstok).split(' ')
Beispiel #15
0
def predictor(codeList):
    retList = []
    inpts = np.zeros((len(codeList), 100))
    coms = np.zeros((len(codeList), comlen))
    coms[:, 0] = st

    for i, c in enumerate(codeList):
        for j, w in enumerate(c.split(' ')):
            if w not in datstok.w2i.keys():
                inpts[i][j] = 0
            else:
                inpts[i][j] = datstok.w2i[w]
    for i in range(1, comlen):
        results = model.predict([inpts, coms], batch_size=len(codeList))
        for c, s in enumerate(results):
            coms[c][i] = np.argmax(s)

    for c in coms:
        if key in seq2sent(c, comstok).split(' '):
            retList.append(1)
        else:
            retList.append(0)
    return np.array(retList)
Beispiel #16
0
    def translate(self, code, sbt, sml):
        words = code.split(' ')
        inp = [np.zeros(100)]
        for i, w in enumerate(words):
            if i >= 100:
                break
            if w not in self.datstok.w2i.keys():
                inp[0][i] = 0
            else:
                inp[0][i] = self.datstok.w2i[w]
        coms = np.zeros(comlen)
        coms[0] = self.st
        coms = [coms]

        for i in range(1, comlen):
            if not sbt:
                results = self.model.predict([inp, coms], batch_size=1)
            else:
                # print(sml)
                results = self.model.predict([inp, coms, [sml]], batch_size=1)
            for c, s in enumerate(results):
                coms[c][i] = np.argmax(s)
        return seq2sent(coms[0], self.comstok).split(' ')
Beispiel #17
0
def gendescr_3inp(model,
                  data,
                  anstok,
                  anslen,
                  batchsize,
                  config,
                  strat='greedy'):
    # right now, only greedy search is supported...
    context, ans, ques = list(zip(*data.values()))
    context = np.array(context)
    ans = np.array(ans)
    ques = np.array(ques)

    for i in range(1, anslen):
        results = model.predict([context, ans, ques], batch_size=batchsize)
        for c, s in enumerate(results):
            ans[c][i] = np.argmax(s)

    final_data = {}
    for fid, an in zip(data.keys(), ans):
        final_data[fid] = seq2sent(an, anstok)

    return final_data
Beispiel #18
0
def gendescr_graphastpc(model, data, comstok, comlen, batchsize, config, strat,
                        bw):
    if strat == 'beam':
        return gendescr_graphast_beam(model, data, comstok, comlen, batchsize,
                                      config, bw)

    tdats, coms, wsmlnodes, wsmledges, pdats = zip(*data.values())
    tdats = np.array(tdats)
    coms = np.array(coms)
    pdats = np.array(pdats)
    wsmlnodes = np.array(wsmlnodes)
    wsmledges = np.array(wsmledges)

    for i in range(1, comlen):
        results = model.predict([tdats, coms, wsmlnodes, wsmledges, pdats],
                                batch_size=batchsize)
        for c, s in enumerate(results):
            coms[c][i] = np.argmax(s)

    final_data = {}
    for fid, com in zip(data.keys(), coms):
        final_data[fid] = seq2sent(com, comstok)

    return final_data