def collect_vg_postags(self,infile): aux_pos = {} main_verbs_pos = {} dgs_in = self._file_handler.file_to_dg_list(infile) for dg in dgs_in: transform = VGtransformer(dg, dep_style=self._dep_style) transform.add_vg_pos_information(aux_pos, main_verbs_pos) main_verbs_pos = dict_count_to_freq(main_verbs_pos) aux_pos = dict_count_to_freq(aux_pos) return main_verbs_pos, aux_pos
def count_aux(self, infile): """return n of aux n of tokens and n of sentences""" n_aux = 0 n_tokens = 0 dgs_in = self._file_handler.file_to_dg_list(infile) for dg in dgs_in: n_tokens += len(dg) transform = VGtransformer(dg, dep_style=self._dep_style) transform.transform() n_aux += transform.tot_aux return n_aux, n_tokens, len(dgs_in)
def transform(self, infile, outfile, transformation): dgs_in = self._file_handler.file_to_dg_list(infile) dgs_out = [] for dg in dgs_in: if self._transformer == "vg": transform = VGtransformer(dg, dep_style=self._dep_style,pos_style=self._pos_style) else: raise Exception, "Invalid transformation" if transformation == "transform": transform.transform() elif transformation == "detransform": transform.detransform() elif transformation == "disambig": transform.disambiguate_vg_postags() elif transformation == "ambig": dg.make_verbs_ambiguous(pos_style=self._pos_style) elif transformation == "to_conllx": dg.to_conllx() else: raise Exception, "Invalid transformation" dgs_out.append(dg) self._file_handler.dep_graphs_to_file(outfile, dgs_out)