Beispiel #1
0
def writeout_cluster_to_word_map(mapping, output_f_v, output_f_rep, replace_ids=True, one_hot=False):
    with open(output_f_v, "w") as out_word, open(output_f_rep, "w") as out_rep:
        if replace_ids:
            new_c_id = 0
            for _, w_set in mapping.items():
                new_c_id += 1
                for w in w_set:
                    out_word.write(w+"\n")
                    out_rep.write("{}\n".format(new_c_id))
        elif one_hot:
            for c, (_, w_set) in enumerate(mapping.items()):
                for w in w_set:
                    out_word.write(w+"\n")
                    one_hot = np.zeros(len(mapping), 'int')
                    one_hot[c] = 1
                    out_rep.write("{}\n".format(nparr_to_str(one_hot)))
Beispiel #2
0
def posttype_txt_plain(posttype_f, vocab_f, output_f_v, output_f_rep, threedim=False, vocab_r=None):
    """
    Create two txt files: vocabulary with one word per line, and representation vectors, one per line.

    :param threedim: npy posttypes file contains 3-dimensions, ie extra dim. for syn. fun.
    """
    if threedim and not vocab_r: sys.exit("Missing rel. vocabulary.")
    if vocab_r and not threedim: sys.exit("Use rel. vocabulary?")

    _, _, posttypes = plain_posttype_txt(posttype_f, vocab_f, threedim, vocab_r)
    with open(output_f_v, "w") as out_v, open(output_f_rep, "w") as out_r:
        for w, rep in posttypes:
            if np.isnan(np.sum(rep)):
                continue
            out_v.write("{}\n".format(w))
            out_r.write("{}\n".format(nparr_to_str(rep)))
Beispiel #3
0
def writeout_cluster_to_word_map(mapping,
                                 output_f_v,
                                 output_f_rep,
                                 replace_ids=True,
                                 one_hot=False):
    with open(output_f_v, "w") as out_word, open(output_f_rep, "w") as out_rep:
        if replace_ids:
            new_c_id = 0
            for _, w_set in mapping.items():
                new_c_id += 1
                for w in w_set:
                    out_word.write(w + "\n")
                    out_rep.write("{}\n".format(new_c_id))
        elif one_hot:
            for c, (_, w_set) in enumerate(mapping.items()):
                for w in w_set:
                    out_word.write(w + "\n")
                    one_hot = np.zeros(len(mapping), 'int')
                    one_hot[c] = 1
                    out_rep.write("{}\n".format(nparr_to_str(one_hot)))
Beispiel #4
0
def posttype_txt_plain(posttype_f,
                       vocab_f,
                       output_f_v,
                       output_f_rep,
                       threedim=False,
                       vocab_r=None):
    """
    Create two txt files: vocabulary with one word per line, and representation vectors, one per line.

    :param threedim: npy posttypes file contains 3-dimensions, ie extra dim. for syn. fun.
    """
    if threedim and not vocab_r: sys.exit("Missing rel. vocabulary.")
    if vocab_r and not threedim: sys.exit("Use rel. vocabulary?")

    _, _, posttypes = plain_posttype_txt(posttype_f, vocab_f, threedim,
                                         vocab_r)
    with open(output_f_v, "w") as out_v, open(output_f_rep, "w") as out_r:
        for w, rep in posttypes:
            if np.isnan(np.sum(rep)):
                continue
            out_v.write("{}\n".format(w))
            out_r.write("{}\n".format(nparr_to_str(rep)))
Beispiel #5
0
    # obtain model parameters
    n_states, n_obs, _, _, _, omit_class_cond, omit_emis_cond = read_params_from_path(path)
    lemmas = args.use_lemmas
    eval_spec_rel = args.synfunc
    lr = False

    # load model
    params_fixed = (np.load("{}ip.npy".format(path)),
                    np.load("{}tp.npy".format(path)),
                    np.load("{}fp.npy".format(path)),
                    np.load("{}ep.npy".format(path)))


    # prepare sents for decoding
    sents = ConllCorpus(infile, howbig=1000000, lemmas=lemmas, eval_spec_rels=eval_spec_rel, dirname=path, lr=lr)
    sents.prepare_trees()

    h = HMRTM(n_states, n_obs, R=len(sents.r_dict), params=params_fixed, writeout=False, dirname=path,
              omit_class_cond=omit_class_cond, omit_emis_cond=omit_emis_cond) if eval_spec_rel else \
        HMTM(n_states, n_obs, params=params_fixed, writeout=False, dirname=path)

    with open(args.outfile, "w") as out:
        for tree in sents.train:
            # obtain posteriors for all nodes
            node_to_rep = h.posterior_decode(tree, cont=True)
            # get words
            for node in tree.get_nonroots():
                out.write(
                    "{} {}\n".format(sents.x_dict.get_label_name(node.name), nparr_to_str(node_to_rep[node.index])))
            out.write("\n")
Beispiel #6
0
def write_fig_data(reps, ws, outfile):
    with open(outfile, "w") as out:
        for w, arr in zip(ws, reps):
            out.write("{} {}\n".format(w, nparr_to_str(arr)))
Beispiel #7
0
 def format(word, nparray):
     return "{} {}\n".format(word, nparr_to_str(nparray))
Beispiel #8
0
 def format(word, nparray):
     return "{} {}\n".format(word, nparr_to_str(nparray))
def write_fig_data(reps, ws, outfile):
    with open(outfile, "w") as out:
        for w, arr in zip(ws, reps):
            out.write("{} {}\n".format(w, nparr_to_str(arr)))