def get_tree(n_inst): trees = ConllCorpus(file_parsed, howbig=1000000, lemmas=False, eval_spec_rels=self.eval_spec_rel, dirname=self.dirname, lr=self.lr) trees.prepare_trees() # extend instances with trees assert len(trees.train) == n_inst, "Number of parses not equal to number of classification instances." c_append = 0 for i in range(n_inst): # we have a parse: inst = self.normalize_tree(trees.train[c_append], trees.x_dict, c_append) c_append += 1 # we don't have a parse: yield inst
def get_tree(n_inst): trees = ConllCorpus(file_parsed, howbig=1000000, lemmas=True, eval_spec_rels=self.eval_spec_rel, dirname=self.dirname, lr=self.lr) trees.prepare_trees() # not every instance has a corresponding tree due to errors in parsing conll_idx = ConllFilesIndex(files_parsed_path) conll_idx.create_ids_set() # extend instances with trees c_append = 0 for i in range(n_inst): # we have a parse: if i + 1 in conll_idx.fileids: inst = self.normalize_tree(trees.train[c_append], trees.x_dict, c_append) c_append += 1 # we don't have a parse: else: inst = None yield inst
def get_tree(n_inst): trees = ConllCorpus(file_parsed, howbig=1000000, lemmas=False, eval_spec_rels=self.eval_spec_rel, dirname=self.dirname, lr=self.lr) trees.prepare_trees() # extend instances with trees assert len( trees.train ) == n_inst, "Number of parses not equal to number of classification instances." c_append = 0 for i in range(n_inst): # we have a parse: inst = self.normalize_tree(trees.train[c_append], trees.x_dict, c_append) c_append += 1 # we don't have a parse: yield inst
def get_tree(n_inst): trees = ConllCorpus(file_parsed, howbig=1000000, lemmas=True, eval_spec_rels=self.eval_spec_rel, dirname=self.dirname, lr=self.lr) trees.prepare_trees() self.tree_vocab = trees.x_dict # not every instance has a corresponding tree due to errors in parsing conll_idx = ConllFilesIndex(files_parsed_path) conll_idx.create_ids_set() # extend instances with trees c_append = 0 for i in range(n_inst): # we have a parse: if i + 1 in conll_idx.fileids: inst = trees.train[c_append] c_append += 1 # we don't have a parse: else: inst = None yield inst
# obtain model parameters n_states, n_obs, _, _, _, omit_class_cond, omit_emis_cond = read_params_from_path(path) lemmas = args.use_lemmas eval_spec_rel = args.synfunc lr = False # load model params_fixed = (np.load("{}ip.npy".format(path)), np.load("{}tp.npy".format(path)), np.load("{}fp.npy".format(path)), np.load("{}ep.npy".format(path))) # prepare sents for decoding sents = ConllCorpus(infile, howbig=1000000, lemmas=lemmas, eval_spec_rels=eval_spec_rel, dirname=path, lr=lr) sents.prepare_trees() h = HMRTM(n_states, n_obs, R=len(sents.r_dict), params=params_fixed, writeout=False, dirname=path, omit_class_cond=omit_class_cond, omit_emis_cond=omit_emis_cond) if eval_spec_rel else \ HMTM(n_states, n_obs, params=params_fixed, writeout=False, dirname=path) with open(args.outfile, "w") as out: for tree in sents.train: # obtain posteriors for all nodes node_to_rep = h.posterior_decode(tree, cont=True) # get words for node in tree.get_nonroots(): out.write( "{} {}\n".format(sents.x_dict.get_label_name(node.name), nparr_to_str(node_to_rep[node.index]))) out.write("\n")