def compute(self, ngram_db, table: str, field: str, save_every=-1): set_display(False) pbar = messages.manual_pbar(total=8) messages.new_display() calc = SkipgramCalc() calc.compute(ngram_db, table, field, save_every) pbar.update() calc = LexPredCalc() calc.compute(ngram_db, table, field, save_every) pbar.update() gen_skipos(ngram_db, False) pbar.update() calc = SynContextCalc() calc.compute(ngram_db, table, field, save_every) pbar.update() calc = SynPredCalc() calc.compute(ngram_db, save_every) pbar.update() calc = WordLprCalc() calc.compute(ngram_db, save_every) pbar.update() calc = MaxPredCalc() calc.compute(ngram_db, 'lex_context_counts', 'lpr', save_every) pbar.update() calc = NgramLprCalc(self.agg_fun) calc.compute(ngram_db, table) pbar.update() messages.close_manual_pbar(pbar) set_display(True)
def __init__(self, fileroot: str, feats=DEFAULT_FEATS, sep='\t', new=True): NgramCounter._feats_integrity(feats) self.feats = list(set(feats)) self.sep = sep db_file = fileroot + '.db' self.info_file = fileroot + '.info.json' info = {'feats': self.feats, 'sep': self.sep} utils.save_json(info, self.info_file) super().__init__(db_file, new=new) messages.new_display()
def gen_skipos(ngram_db, display=True): if display: messages.new_display() messages.msg('Generating skipos statistics...') ngram_db.connect() ngram_db.aggregate_by('skipos_counts', 'skipgram_counts', ['freq'], ['length'], ['skipos']) ngram_db.upd_info('skipos_counts') ngram_db.disconnect() messages.done()
def __init__(self, ngram_db, in_tb: str, in_fld: 'list', save_every=-1): if new_display: messages.new_display() self.sv_every = save_every self.ngram_db = ngram_db self.output_db = self.copy_db(ngram_db) self.ngram_db.connect(), self.output_db.connect() self.n_max = ngram_db.n_max self.ngram_db.set_query(in_tb, in_fld) self.in_table = in_tb self.new = False
def from_NgramCounter(cls, ngram_counter, fileroot: str): """Creates a new NgramDb from an NgramCounter. Parameters ---------- ngram_counter: NgramCounter The NgramCounter from which to create the NgramDb. fileroot: str Path to the file of the new NgramCounter. Returns ------- NgramDb """ messages.new_display() copyfile(ngram_counter.db, fileroot + '.db') new_cls = cls(fileroot) new_cls.connect() new_cls._gen_info() new_cls.disconnect() new_cls._load_info() return new_cls