def rae_trainning_normal(wd_extra, w1, w2, neta, regu=None, wpresent=None): err = 0.0 kerr = 0.0 samp_err = [] branch_samp_err = [] branchs = 0 t_dw1 = {} t_dw2 = 0.0 t_epnum1 = {} for i in wpresent: t_epnum1[i] = 0 t_dw1[i] = 0.0 t_epnum2 = 0.0 for iword in wd_extra: try: vect = [preprocess(i) for i in iword['Word_vects'] ] + [None for i in range(len(iword['h_vect']))] vect_, dw1, dw2, epnum1, epnum2 = rae_operation( vect1=vect, w1=w1, w2=w2, w_size=iword['w_size'], h_vect=iword['h_vect'], Word_ids=iword['Word_ids'], wp=iword['wp'], h_index=iword['h_index'], dep_order=iword['dep_order'], hh_index=iword['hh_index'], p=iword['p'], regu=regu, wpresent=wpresent) terr = 0.0 bc = 0 for iv in range(iword['w_size']): terr += np.sum(np.power((vect[iv] - vect_[iv]), 2)) bc += 1 branchs += bc err += terr if bc: samp_err.append(terr) branch_samp_err.append(terr / bc) t_dw2 += dw2 t_epnum2 += epnum2 except KeyError: kerr += 1 continue for wi in wpresent: t_dw1[wi] += dw1[wi] t_epnum1[wi] += epnum1[wi] w1, w2 = weight_update(w1=w1, w2=w2, dw1=t_dw1, dw2=t_dw2, neta=neta, regu=regu, epnum1=t_epnum1, epnum2=t_epnum2, wpresent=wpresent) return w1, w2, None, None, kerr, err, samp_err, branch_samp_err, branchs
def rae_trainning_normal(wd_extra, neta=None, regu=0.9, w=None, wpresent=None): err = 0.0 kerr = 0.0 samp_err = [] branch_samp_err = [] branchs = 0 t_dw = {} t_epnum = {} grads = {} for i in wpresent: t_dw[i] = 0.0 t_epnum[i] = 0 for iword in wd_extra: try: # vect = [i for i in iword['Word_vects']] + [None for i in range(len(iword['h_vect']))] vect = [preprocess(i) for i in iword['Word_vects'] ] + [None for i in range(len(iword['h_vect']))] vect_, dw, epnum, grads = rae_operation( vect=vect, w=w, w_size=iword['w_size'], h_vect=iword['h_vect'], Word_ids=iword['Word_ids'], wp=iword['wp'], h_index=iword['h_index'], dep_order=iword['dep_order'], hh_index=iword['hh_index'], p=iword['p'], neta=neta, regu=regu, wpresent=wpresent) terr = 0.0 bc = 0 for iv in range(iword['w_size']): terr += np.sum(np.power((vect[iv] - vect_[iv]), 2)) bc += 1 branchs += bc err += terr if bc: samp_err.append(terr) branch_samp_err.append(terr / bc) except RuntimeError: kerr += 1 continue for wi in wpresent: t_dw[wi] += dw[wi] t_epnum[wi] += epnum[wi] # weight_update(w, t_dw, neta=neta, regu=regu, epnum=t_epnum, wpresent=wpresent) adam_weight_update(w, grads, neta=neta, regu=regu, epnum=t_epnum, wpresent=wpresent) return w, t_dw, t_epnum, kerr, err, samp_err, branch_samp_err, branchs
def get_vect_by_wd_dep(flag, words_data, mtype='normal'): if flag == 't': Word_ids = get_words_id(words_data) Word_vects = [] for i in sorted(Word_ids): Word_vects.append( preprocess( get_word_vect(words_data[i]['word'].lower(), Global.v_size))) w_size = len(Word_ids) p = get_parents(words_data) d = get_dep(words_data) dep_order, d1 = pdep_2_deporder_dep(p, d) h_index, h_vect, wp, _ = dep_2_hid_var(p, dep_order, d1, Word_ids) # Word_vects = get_words_vect(words_data, Word_ids, Global.v_size) vect = Word_vects + [None for i in range(len(h_vect))] del Word_vects w = pickle.load(open(Global.wfname, 'rb')) if mtype == 'normal': RAE_adam_herical.rae_encoding(vect=vect, w=w, w_size=w_size, h_vect=h_vect, wp=wp) elif mtype == 'deep': RAE_adam_herical_deep1.rae_encoding(vect=vect, w=w, w_size=w_size, h_vect=h_vect, wp=wp) chunks = {} chunks_vect = {} for i in range(len(Word_ids)): chunks[i] = words_data[Word_ids[i]]['word'] chunks_vect[i] = vect[i] rev_h_index = {v: k for k, v in h_index.items()} count = i + 1 for i in h_vect: if len(i) > 1: chunks_vect[count] = vect[h_index[Word_ids[min(i)]]] chunks[count] = ' '.join([ words_data[rev_h_index[j]]['word'] if j >= len(Word_ids) else words_data[Word_ids[j]]['word'] for j in i ]) count += 1 return chunks, chunks_vect
def get_chk_vect_by_wd(flag, words_data, mtype='normal'): if flag == 't': Word_ids = get_words_id(words_data) Word_vects = [] for i in sorted(Word_ids): Word_vects.append( preprocess( get_word_vect(words_data[i]['word'].lower(), Global.v_size))) w_size = len(Word_ids) p = get_parents(words_data) d = get_dep(words_data) dep_order, d1 = pdep_2_deporder_dep(p, d) h_index, h_vect, wp, _ = dep_2_hid_var(p, dep_order, d1, Word_ids) # Word_vects = get_words_vect(words_data, Word_ids, Global.v_size) vect = Word_vects + [None for i in range(len(h_vect))] del Word_vects w = pickle.load(open(Global.wfname, 'rb')) if mtype == 'normal': RAE_adam_herical.rae_encoding(vect=vect, w=w, w_size=w_size, h_vect=h_vect, wp=wp) elif mtype == 'deep': RAE_adam_herical_deep1.rae_encoding(vect=vect, w=w, w_size=w_size, h_vect=h_vect, wp=wp) chks = get_chunks(words_data) chks_main = get_chunk_main(chks, dep_order) chunks = {} chunks_vect = {} for c in range(len(chks)): chunks[c] = ' '.join([words_data[i]['word'] for i in chks[c]]) del words_data for c in range(len(chks_main)): ind = h_index[chks_main[c]] chunks_vect[c] = vect[ind] return chunks, chunks_vect
def get_chk_vect(flag, line, mtype='normal'): if flag == 't': line = line_processing(line) # print line words_data = extract_feature_using_senna(line) p = get_parents(words_data) d = get_dep(words_data) Word_ids = get_words_id(words_data) for i in Word_ids: words_data[i]['vect'] = preprocess( get_word_vect(words_data[i]['word'].lower(), Global.v_size)) w_size = len(Word_ids) dep_order, d1 = pdep_2_deporder_dep(p, d) h_index, h_vect, wp, _ = dep_2_hid_var(p, dep_order, d1, Word_ids) Word_vects = get_words_vect(words_data, Word_ids, Global.v_size) vect = Word_vects + [None for i in h_vect] del Word_vects w = pickle.load(open(Global.wfname, 'rb')) if mtype == 'normal': RAE_adam_herical.rae_encoding(vect=vect, w=w, w_size=w_size, h_vect=h_vect, wp=wp) elif mtype == 'deep': RAE_adam_herical_deep1.rae_encoding(vect=vect, w=w, w_size=w_size, h_vect=h_vect, wp=wp) chks = get_chunks_by_dep(Word_ids, h_index, h_vect) chunks = {} chunks_vect = {} count = 0 order = get_order(d1, w_size) for m in order: chunks[count] = ' '.join([words_data[i]['word'] for i in chks[m]]) chunks_vect[count] = vect[h_index[m]] count += 1 return chunks, chunks_vect
'Word_vects': Word_vects, 'wp': wp, "dep_order": dep_order, "hh_index": hh_index, 'Word_ids': Word_ids, 'p': p }) for i in w: if type(w[i]) == np.ndarray: w1[i] = w[i].copy() + e w2[i] = w[i].copy() - e epnum = {} for i in w: epnum[i] = 0 for iword in wd_extra: vect1 = [preprocess(i) for i in iword['Word_vects'] ] + [None for i in range(len(iword['h_vect']))] vect2 = [preprocess(i) for i in iword['Word_vects'] ] + [None for i in range(len(iword['h_vect']))] o1 = {} o2 = {} vect_1 = {} vect_2 = {} rae_encoding(vect=vect1, w=w1, w_size=iword['w_size'], h_vect=iword['h_vect'], wp=iword['wp']) vect_1 = rae_decoding(vect=vect1, o=o1,