def gendescr_pathast_2(model, data, comstok, comlen, batchsize, config, w=1): tdats, sdats, coms, wsmlpaths = zip(*data.values()) tdats = np.array(tdats) coms = np.array(coms) sdats = np.array(sdats) wsmlpaths = np.array(wsmlpaths) for i in range(1, comlen): if (config['use_sdats']): results = model.predict([tdats, sdats, coms, wsmlpaths], batch_size=batchsize) else: results = model.predict([tdats, coms, wsmlpaths], batch_size=batchsize) for c, s in enumerate(results): for k in range(w): ps = np.argmax(s) s[ps] = 0 new_ps = np.argmax(s) coms[c][i] = new_ps final_data = {} for fid, com in zip(data.keys(), coms): final_data[fid] = seq2sent(com, comstok) return final_data
def gendescr_pathast_threed(model, data, comstok, comlen, batchsize, config, strat, bw): if strat == 'beam': return gendescr_pathast_beam(model, data, comstok, comlen, batchsize, config, bw) tdats, sdats, coms, wsmlpaths = zip(*data.values()) tdats = np.array(tdats) coms = np.array(coms) sdats = np.array(sdats) wsmlpaths = np.array(wsmlpaths) for i in range(1, comlen): if (config['use_sdats']): results = model.predict([tdats, sdats, coms, wsmlpaths], batch_size=batchsize) else: results = model.predict([tdats, coms, wsmlpaths], batch_size=batchsize) for c, s in enumerate(results): coms[c][i] = np.argmax(s) final_data = {} for fid, com in zip(data.keys(), coms): final_data[fid] = seq2sent(com, comstok) return final_data
def predictorLime(codeList): print(len(codeList)) retList = [] inpts = np.zeros((len(codeList), 100)) coms = np.zeros((len(codeList), comlen)) coms[:, 0] = st for i, c in enumerate(codeList): for j, w in enumerate(c.split(' ')): if j >= 100: break if w not in datstok.w2i.keys(): inpts[i][j] = 0 else: inpts[i][j] = datstok.w2i[w] l = 0 r = batchSize while l < len(codeList): tmpInpts = inpts[l:r] tmpComs = coms[l:r] for i in range(1, comlen): results = model.predict([tmpInpts, tmpComs], batch_size=r - l) for c, s in enumerate(results): coms[l + c][i] = np.argmax(s) r += batchSize l += batchSize if r >= len(codeList): r = len(codeList) - 1 for c in coms: if key in seq2sent(c, comstok).split(' '): retList.append([0, 1]) else: retList.append([1, 0]) return np.array(retList)
def gendescr_4inp(model, data, comstok, comlen, batchsize, config, strat='greedy'): # right now, only greedy search is supported... tdats, sdats, coms, smls = zip(*data.values()) tdats = np.array(tdats) sdats = np.array(sdats) coms = np.array(coms) smls = np.array(smls) #print(sdats) for i in range(1, comlen): results = model.predict([tdats, sdats, coms, smls], batch_size=batchsize) for c, s in enumerate(results): coms[c][i] = np.argmax(s) final_data = {} for fid, com in zip(data.keys(), coms): final_data[fid] = seq2sent(com, comstok) return final_data
def gendescr_pathast(model, data, comstok, comlen, batchsize, config, refcoms, count, strat='greedy'): # right now, only greedy search is supported... tdats, sdats, coms, wsmlpaths = zip(*data.values()) tdats = np.array(tdats) coms = np.array(coms) sdats = np.array(sdats) wsmlpaths = np.array(wsmlpaths) print('refcoms len:', len(refcoms)) print('coms len:', coms.shape) #coms[:,1] = np.array(list(refcoms.values())[count*batchsize:((count+1)*batchsize)])[:,1] coms[:,1] = np.array(list(refcoms.values()))[:,1] for i in range(2, comlen): if(config['use_sdats']): results = model.predict([tdats, sdats, coms, wsmlpaths], batch_size=batchsize) else: results = model.predict([tdats, coms, wsmlpaths], batch_size=batchsize) for c, s in enumerate(results): coms[c][i] = np.argmax(s) final_data = {} for fid, com in zip(data.keys(), coms): final_data[fid] = seq2sent(com, comstok) return final_data
def gendescr_2inp(model, data, comstok, comlen, batchsize, config, strat, beamwidth, outfile, stopword): # right now, only greedy search is supported... fid = [*data.keys()] tdats, coms = list(zip(*data.values())) tdats = np.array(tdats) coms = np.array(coms) for i in range(1, stopword): results = model.predict([tdats, coms], batch_size=batchsize) for c, s in enumerate(results): coms[c][i] = np.argmax(s) act1 = get_activations(model, [tdats, coms], layer_name='activation_1') act1_softmax_val_path = '/nfs/projects/attn-to-fc/data/outdir/viz/{}-{}-act1-stopword-{}.txt'.format( fid[0], outfile.split('.')[0], stopword) act1_prob_file = open(act1_softmax_val_path, 'w') for j in act1: display_activations(j, 'tdats_activation', fid, act1_prob_file, outfile, stopword) act1_prob_file.close() final_data = {} for fid, com in zip(data.keys(), coms): final_data[fid] = seq2sent(com, comstok) return final_data
def gendescr_astattendfcpluspc(model, modelpc, data, data1, comstok, comlen, batchsize, strat='greedy'): # right now, only greedy search is supported... tdats, pdats,smls,coms, = list(zip(*data.values())) _,sdats,_,_ = list(zip(*data1.values())) tdats = np.array(tdats) coms = np.array(coms) smls = np.array(smls) sdats = np.array(sdats) pdats = np.array(pdats) #dats = np.zeros_like(dats) for i in range(1, comlen): results = model.predict([tdats,sdats, coms, smls], batch_size=batchsize) pcresults = modelpc.predict([tdats, pdats, smls, coms], batch_size=batchsize) for c, (t, a) in enumerate(zip(results, pcresults)): m = np.argmax(np.mean([t, a], axis=0)) coms[c][i] = m final_data = {} for fid, com in zip(data.keys(), coms): final_data[fid] = seq2sent(com, comstok) return final_data
def gendescr_code2seqplusfc(model, modelpc, data, comstok, comlen, batchsize, strat='greedy'): # right now, only greedy search is supported... tdats,sdats, coms, pathast = zip(*data.values()) tdats = np.array(tdats) coms = np.array(coms) sdats = np.array(sdats) pathast = np.array(pathast) #dats = np.zeros_like(dats) for i in range(1, comlen): results = model.predict([tdats, coms, pathast], batch_size=batchsize) pcresults = modelpc.predict([tdats, sdats, coms, pathast], batch_size=batchsize) for c, (t, a) in enumerate(zip(results, pcresults)): #tm = t[np.argmax(t)] #am = a[np.argmax(a)] #m = np.argmax(t) #if(am > tm): # m = np.argmax(a) m = np.argmax(np.mean([t, a], axis=0)) coms[c][i] = m final_data = {} for fid, com in zip(data.keys(), coms): final_data[fid] = seq2sent(com, comstok) return final_data
def gendescr_pathast_beam(model, data, comstok, comlen, batchsize, config, w): tdats, sdats, coms, wsmlpaths = zip(*data.values()) tdats = np.array(tdats) coms = np.array(coms) sdats = np.array(sdats) wsmlpaths = np.array(wsmlpaths) bms = coms.shape[0] beamcoms = np.tile(coms, [w, 1]) beamcoms = np.reshape(beamcoms, [w, bms, comlen]) beamprobs = np.zeros((w, bms)) psmat = np.zeros((bms, w * w, comlen)) prmat = np.zeros((bms, w * w)) if (config['use_sdats']): results = model.predict([tdats, sdats, coms, wsmlpaths], batch_size=batchsize) else: results = model.predict([tdats, coms, wsmlpaths], batch_size=batchsize) for c, s in enumerate(results): for j in range(w): ps = np.argmax(s) pr = np.max(s) pr = -np.log(pr) s[ps] = 0 beamprobs[j][c] = pr beamcoms[j][c][1] = ps for i in range(2, comlen): for j in range(w): if (config['use_sdats']): results = model.predict([tdats, sdats, beamcoms[j], wsmlpaths], batch_size=batchsize) else: results = model.predict([tdats, beamcoms[j], wsmlpaths], batch_size=batchsize) for c, s in enumerate(results): for k in range(w): ps = np.argmax(s) pr = np.max(s) pr = -np.log(pr) s[ps] = 0 prmat[c][(j * w) + k] = beamprobs[j][c] + pr psmat[c][(j * w) + k] = beamcoms[j][c] psmat[c][(j * w) + k][i] = ps for c, s in enumerate(prmat): for j in range(w): ps = np.argmin(s) pr = np.min(s) s[ps] = np.inf beamprobs[j][c] = pr beamcoms[j][c] = psmat[c][ps] coms = beamcoms[0] final_data = {} for fid, com in zip(data.keys(), coms): final_data[fid] = seq2sent(com, comstok) return final_data
def gendescr_2inp(model, data, comstok, comlen, batchsize, config, refcoms, count, strat='greedy'): # right now, only greedy search is supported... tdats, coms = list(zip(*data.values())) tdats = np.array(tdats) coms = np.array(coms) coms[:,1] = np.array(list(refcoms.values())[count*batchsize:((count+1)*batchsize)])[:,1] for i in range(2, comlen): results = model.predict([tdats, coms], batch_size=batchsize) for c, s in enumerate(results): coms[c][i] = np.argmax(s) final_data = {} for fid, com in zip(data.keys(), coms): final_data[fid] = seq2sent(com, comstok) return final_data
def gendescr_projcon(model, data, comstok, comlen, batchsize, config, strat, bw): tdats, pdats, coms = zip(*data.values()) tdats = np.array(tdats) coms = np.array(coms) pdats = np.array(pdats) for i in range(1, comlen): results = model.predict([tdats, pdats, coms], batch_size=batchsize) for c, s in enumerate(results): coms[c][i] = np.argmax(s) final_data = {} for fid, com in zip(data.keys(), coms): final_data[fid] = seq2sent(com, comstok) return final_data
def gendescr_3inp(model, data, comstok, comlen, batchsize, config, strat, bw): if strat == 'beam': return gendescr_3inp_beam(model, data, comstok, comlen, batchsize, config, bw) tdats, coms, smls = list(zip(*data.values())) tdats = np.array(tdats) coms = np.array(coms) smls = np.array(smls) for i in range(1, comlen): results = model.predict([tdats, coms, smls], batch_size=batchsize) for c, s in enumerate(results): coms[c][i] = np.argmax(s) final_data = {} for fid, com in zip(data.keys(), coms): final_data[fid] = seq2sent(com, comstok) return final_data
def gendescr_3inp(model, data, anstok, anslen, batchsize, config, strat, beamwidth, outfile, stopword): # right now, only greedy search is supported... fid = [*data.keys()] context, ans, ques = list(zip(*data.values())) context = np.array(context) ans = np.array(ans) ques = np.array(ques) for i in range(1, stopword): results = model.predict([context, ans, ques], batch_size=batchsize) for c, s in enumerate(results): ans[c][i] = np.argmax(s) act1 = get_activations(model, [context, ans, ques], layer_name='activation_1') act2 = get_activations(model, [context, ans, ques], layer_name='activation_2') act1_softmax_val_path = '../qadatasetKstudy/outdir/viz/{}-{}-act1-stopword-{}.txt'.format( fid[0], outfile.split('.')[0], stopword) act2_softmax_val_path = '../qadatasetKstudy/outdir/viz/{}-{}-act2-stopword-{}.txt'.format( fid[0], outfile.split('.')[0], stopword) act1_prob_file = open(act1_softmax_val_path, 'w') act2_prob_file = open(act2_softmax_val_path, 'w') for j in act1: display_activations(j, 'context_activation', fid, act1_prob_file, outfile, stopword) for j in act2: display_activations(j, 'context2_activation', fid, act2_prob_file, outfile, stopword) act1_prob_file.close() act2_prob_file.close() final_data = {} for fid, an in zip(data.keys(), ans): final_data[fid] = seq2sent(an, anstok)
def translate(code, sbt=False, sml=None): words = code.split(' ') inpt = [np.zeros(100)] for i, w in enumerate(words): if i >= 100: break if w not in datstok.w2i.keys(): inpt[0][i] = 0 else: inpt[0][i] = datstok.w2i[w] coms = np.zeros(comlen) coms[0] = st coms = [coms] for i in range(1, comlen): if not sbt: results = model.predict([inpt, coms], batch_size=1) else: results = model.predict([inpt, coms, sml], batch_size=1) for c, s in enumerate(results): coms[c][i] = np.argmax(s) return seq2sent(coms[0], comstok).split(' ')
def predictor(codeList): retList = [] inpts = np.zeros((len(codeList), 100)) coms = np.zeros((len(codeList), comlen)) coms[:, 0] = st for i, c in enumerate(codeList): for j, w in enumerate(c.split(' ')): if w not in datstok.w2i.keys(): inpts[i][j] = 0 else: inpts[i][j] = datstok.w2i[w] for i in range(1, comlen): results = model.predict([inpts, coms], batch_size=len(codeList)) for c, s in enumerate(results): coms[c][i] = np.argmax(s) for c in coms: if key in seq2sent(c, comstok).split(' '): retList.append(1) else: retList.append(0) return np.array(retList)
def translate(self, code, sbt, sml): words = code.split(' ') inp = [np.zeros(100)] for i, w in enumerate(words): if i >= 100: break if w not in self.datstok.w2i.keys(): inp[0][i] = 0 else: inp[0][i] = self.datstok.w2i[w] coms = np.zeros(comlen) coms[0] = self.st coms = [coms] for i in range(1, comlen): if not sbt: results = self.model.predict([inp, coms], batch_size=1) else: # print(sml) results = self.model.predict([inp, coms, [sml]], batch_size=1) for c, s in enumerate(results): coms[c][i] = np.argmax(s) return seq2sent(coms[0], self.comstok).split(' ')
def gendescr_3inp(model, data, anstok, anslen, batchsize, config, strat='greedy'): # right now, only greedy search is supported... context, ans, ques = list(zip(*data.values())) context = np.array(context) ans = np.array(ans) ques = np.array(ques) for i in range(1, anslen): results = model.predict([context, ans, ques], batch_size=batchsize) for c, s in enumerate(results): ans[c][i] = np.argmax(s) final_data = {} for fid, an in zip(data.keys(), ans): final_data[fid] = seq2sent(an, anstok) return final_data
def gendescr_graphastpc(model, data, comstok, comlen, batchsize, config, strat, bw): if strat == 'beam': return gendescr_graphast_beam(model, data, comstok, comlen, batchsize, config, bw) tdats, coms, wsmlnodes, wsmledges, pdats = zip(*data.values()) tdats = np.array(tdats) coms = np.array(coms) pdats = np.array(pdats) wsmlnodes = np.array(wsmlnodes) wsmledges = np.array(wsmledges) for i in range(1, comlen): results = model.predict([tdats, coms, wsmlnodes, wsmledges, pdats], batch_size=batchsize) for c, s in enumerate(results): coms[c][i] = np.argmax(s) final_data = {} for fid, com in zip(data.keys(), coms): final_data[fid] = seq2sent(com, comstok) return final_data