def rae_trainning_normal(wd_extra, w1, w2, neta, regu=None, wpresent=None):
    err = 0.0
    kerr = 0.0
    samp_err = []
    branch_samp_err = []
    branchs = 0
    t_dw1 = {}
    t_dw2 = 0.0
    t_epnum1 = {}
    for i in wpresent:
        t_epnum1[i] = 0
        t_dw1[i] = 0.0
    t_epnum2 = 0.0
    for iword in wd_extra:
        try:
            vect = [preprocess(i) for i in iword['Word_vects']
                    ] + [None for i in range(len(iword['h_vect']))]
            vect_, dw1, dw2, epnum1, epnum2 = rae_operation(
                vect1=vect,
                w1=w1,
                w2=w2,
                w_size=iword['w_size'],
                h_vect=iword['h_vect'],
                Word_ids=iword['Word_ids'],
                wp=iword['wp'],
                h_index=iword['h_index'],
                dep_order=iword['dep_order'],
                hh_index=iword['hh_index'],
                p=iword['p'],
                regu=regu,
                wpresent=wpresent)
            terr = 0.0
            bc = 0
            for iv in range(iword['w_size']):
                terr += np.sum(np.power((vect[iv] - vect_[iv]), 2))
                bc += 1
            branchs += bc
            err += terr
            if bc:
                samp_err.append(terr)
                branch_samp_err.append(terr / bc)
            t_dw2 += dw2
            t_epnum2 += epnum2
        except KeyError:
            kerr += 1
            continue
        for wi in wpresent:
            t_dw1[wi] += dw1[wi]
            t_epnum1[wi] += epnum1[wi]
    w1, w2 = weight_update(w1=w1,
                           w2=w2,
                           dw1=t_dw1,
                           dw2=t_dw2,
                           neta=neta,
                           regu=regu,
                           epnum1=t_epnum1,
                           epnum2=t_epnum2,
                           wpresent=wpresent)
    return w1, w2, None, None, kerr, err, samp_err, branch_samp_err, branchs
def rae_trainning_normal(wd_extra, neta=None, regu=0.9, w=None, wpresent=None):
    err = 0.0
    kerr = 0.0
    samp_err = []
    branch_samp_err = []
    branchs = 0
    t_dw = {}
    t_epnum = {}
    grads = {}
    for i in wpresent:
        t_dw[i] = 0.0
        t_epnum[i] = 0
    for iword in wd_extra:
        try:
            # vect = [i for i in iword['Word_vects']] + [None for i in range(len(iword['h_vect']))]
            vect = [preprocess(i) for i in iword['Word_vects']
                    ] + [None for i in range(len(iword['h_vect']))]
            vect_, dw, epnum, grads = rae_operation(
                vect=vect,
                w=w,
                w_size=iword['w_size'],
                h_vect=iword['h_vect'],
                Word_ids=iword['Word_ids'],
                wp=iword['wp'],
                h_index=iword['h_index'],
                dep_order=iword['dep_order'],
                hh_index=iword['hh_index'],
                p=iword['p'],
                neta=neta,
                regu=regu,
                wpresent=wpresent)
            terr = 0.0
            bc = 0
            for iv in range(iword['w_size']):
                terr += np.sum(np.power((vect[iv] - vect_[iv]), 2))
                bc += 1
            branchs += bc
            err += terr
            if bc:
                samp_err.append(terr)
                branch_samp_err.append(terr / bc)
        except RuntimeError:
            kerr += 1
            continue
        for wi in wpresent:
            t_dw[wi] += dw[wi]
            t_epnum[wi] += epnum[wi]
    # weight_update(w, t_dw, neta=neta, regu=regu, epnum=t_epnum, wpresent=wpresent)
    adam_weight_update(w,
                       grads,
                       neta=neta,
                       regu=regu,
                       epnum=t_epnum,
                       wpresent=wpresent)
    return w, t_dw, t_epnum, kerr, err, samp_err, branch_samp_err, branchs
Exemple #3
0
def get_vect_by_wd_dep(flag, words_data, mtype='normal'):
    if flag == 't':
        Word_ids = get_words_id(words_data)
        Word_vects = []
        for i in sorted(Word_ids):
            Word_vects.append(
                preprocess(
                    get_word_vect(words_data[i]['word'].lower(),
                                  Global.v_size)))
        w_size = len(Word_ids)
        p = get_parents(words_data)
        d = get_dep(words_data)
        dep_order, d1 = pdep_2_deporder_dep(p, d)
        h_index, h_vect, wp, _ = dep_2_hid_var(p, dep_order, d1, Word_ids)
        # Word_vects = get_words_vect(words_data, Word_ids, Global.v_size)
        vect = Word_vects + [None for i in range(len(h_vect))]
        del Word_vects
        w = pickle.load(open(Global.wfname, 'rb'))
        if mtype == 'normal':
            RAE_adam_herical.rae_encoding(vect=vect,
                                          w=w,
                                          w_size=w_size,
                                          h_vect=h_vect,
                                          wp=wp)
        elif mtype == 'deep':
            RAE_adam_herical_deep1.rae_encoding(vect=vect,
                                                w=w,
                                                w_size=w_size,
                                                h_vect=h_vect,
                                                wp=wp)
        chunks = {}
        chunks_vect = {}
        for i in range(len(Word_ids)):
            chunks[i] = words_data[Word_ids[i]]['word']
            chunks_vect[i] = vect[i]
        rev_h_index = {v: k for k, v in h_index.items()}
        count = i + 1
        for i in h_vect:
            if len(i) > 1:
                chunks_vect[count] = vect[h_index[Word_ids[min(i)]]]
                chunks[count] = ' '.join([
                    words_data[rev_h_index[j]]['word']
                    if j >= len(Word_ids) else words_data[Word_ids[j]]['word']
                    for j in i
                ])
                count += 1
        return chunks, chunks_vect
Exemple #4
0
def get_chk_vect_by_wd(flag, words_data, mtype='normal'):
    if flag == 't':
        Word_ids = get_words_id(words_data)
        Word_vects = []
        for i in sorted(Word_ids):
            Word_vects.append(
                preprocess(
                    get_word_vect(words_data[i]['word'].lower(),
                                  Global.v_size)))
        w_size = len(Word_ids)
        p = get_parents(words_data)
        d = get_dep(words_data)
        dep_order, d1 = pdep_2_deporder_dep(p, d)
        h_index, h_vect, wp, _ = dep_2_hid_var(p, dep_order, d1, Word_ids)
        # Word_vects = get_words_vect(words_data, Word_ids, Global.v_size)
        vect = Word_vects + [None for i in range(len(h_vect))]
        del Word_vects
        w = pickle.load(open(Global.wfname, 'rb'))
        if mtype == 'normal':
            RAE_adam_herical.rae_encoding(vect=vect,
                                          w=w,
                                          w_size=w_size,
                                          h_vect=h_vect,
                                          wp=wp)
        elif mtype == 'deep':
            RAE_adam_herical_deep1.rae_encoding(vect=vect,
                                                w=w,
                                                w_size=w_size,
                                                h_vect=h_vect,
                                                wp=wp)
        chks = get_chunks(words_data)
        chks_main = get_chunk_main(chks, dep_order)
        chunks = {}
        chunks_vect = {}
        for c in range(len(chks)):
            chunks[c] = ' '.join([words_data[i]['word'] for i in chks[c]])
        del words_data
        for c in range(len(chks_main)):
            ind = h_index[chks_main[c]]
            chunks_vect[c] = vect[ind]
        return chunks, chunks_vect
Exemple #5
0
def get_chk_vect(flag, line, mtype='normal'):
    if flag == 't':
        line = line_processing(line)
        # print line
        words_data = extract_feature_using_senna(line)
        p = get_parents(words_data)
        d = get_dep(words_data)
        Word_ids = get_words_id(words_data)
        for i in Word_ids:
            words_data[i]['vect'] = preprocess(
                get_word_vect(words_data[i]['word'].lower(), Global.v_size))
        w_size = len(Word_ids)
        dep_order, d1 = pdep_2_deporder_dep(p, d)
        h_index, h_vect, wp, _ = dep_2_hid_var(p, dep_order, d1, Word_ids)
        Word_vects = get_words_vect(words_data, Word_ids, Global.v_size)
        vect = Word_vects + [None for i in h_vect]
        del Word_vects
        w = pickle.load(open(Global.wfname, 'rb'))
        if mtype == 'normal':
            RAE_adam_herical.rae_encoding(vect=vect,
                                          w=w,
                                          w_size=w_size,
                                          h_vect=h_vect,
                                          wp=wp)
        elif mtype == 'deep':
            RAE_adam_herical_deep1.rae_encoding(vect=vect,
                                                w=w,
                                                w_size=w_size,
                                                h_vect=h_vect,
                                                wp=wp)
        chks = get_chunks_by_dep(Word_ids, h_index, h_vect)
        chunks = {}
        chunks_vect = {}
        count = 0
        order = get_order(d1, w_size)
        for m in order:
            chunks[count] = ' '.join([words_data[i]['word'] for i in chks[m]])
            chunks_vect[count] = vect[h_index[m]]
            count += 1
        return chunks, chunks_vect
Exemple #6
0
            'Word_vects': Word_vects,
            'wp': wp,
            "dep_order": dep_order,
            "hh_index": hh_index,
            'Word_ids': Word_ids,
            'p': p
        })
    for i in w:
        if type(w[i]) == np.ndarray:
            w1[i] = w[i].copy() + e
            w2[i] = w[i].copy() - e
    epnum = {}
    for i in w:
        epnum[i] = 0
    for iword in wd_extra:
        vect1 = [preprocess(i) for i in iword['Word_vects']
                 ] + [None for i in range(len(iword['h_vect']))]
        vect2 = [preprocess(i) for i in iword['Word_vects']
                 ] + [None for i in range(len(iword['h_vect']))]
        o1 = {}
        o2 = {}
        vect_1 = {}
        vect_2 = {}

        rae_encoding(vect=vect1,
                     w=w1,
                     w_size=iword['w_size'],
                     h_vect=iword['h_vect'],
                     wp=iword['wp'])
        vect_1 = rae_decoding(vect=vect1,
                              o=o1,