예제 #1
0
 def compute(self, ngram_db, table: str, field: str, save_every=-1):
     set_display(False)
     pbar = messages.manual_pbar(total=8)
     messages.new_display()
     calc = SkipgramCalc()
     calc.compute(ngram_db, table, field, save_every)
     pbar.update()
     calc = LexPredCalc()
     calc.compute(ngram_db, table, field, save_every)
     pbar.update()
     gen_skipos(ngram_db, False)
     pbar.update()
     calc = SynContextCalc()
     calc.compute(ngram_db, table, field, save_every)
     pbar.update()
     calc = SynPredCalc()
     calc.compute(ngram_db, save_every)
     pbar.update()
     calc = WordLprCalc()
     calc.compute(ngram_db, save_every)
     pbar.update()
     calc = MaxPredCalc()
     calc.compute(ngram_db, 'lex_context_counts', 'lpr', save_every)
     pbar.update()
     calc = NgramLprCalc(self.agg_fun)
     calc.compute(ngram_db, table)
     pbar.update()
     messages.close_manual_pbar(pbar)
     set_display(True)
예제 #2
0
 def __init__(self, fileroot: str, feats=DEFAULT_FEATS, sep='\t', new=True):
     NgramCounter._feats_integrity(feats)
     self.feats = list(set(feats))
     self.sep = sep
     db_file = fileroot + '.db'
     self.info_file = fileroot + '.info.json'
     info = {'feats': self.feats, 'sep': self.sep}
     utils.save_json(info, self.info_file)
     super().__init__(db_file, new=new)
     messages.new_display()
예제 #3
0
def gen_skipos(ngram_db, display=True):
    if display:
        messages.new_display()
    messages.msg('Generating skipos statistics...')
    ngram_db.connect()
    ngram_db.aggregate_by('skipos_counts', 'skipgram_counts', ['freq'],
                          ['length'], ['skipos'])
    ngram_db.upd_info('skipos_counts')
    ngram_db.disconnect()
    messages.done()
예제 #4
0
 def __init__(self, ngram_db, in_tb: str, in_fld: 'list', save_every=-1):
     if new_display:
         messages.new_display()
     self.sv_every = save_every
     self.ngram_db = ngram_db
     self.output_db = self.copy_db(ngram_db)
     self.ngram_db.connect(), self.output_db.connect()
     self.n_max = ngram_db.n_max
     self.ngram_db.set_query(in_tb, in_fld)
     self.in_table = in_tb
     self.new = False
예제 #5
0
    def from_NgramCounter(cls, ngram_counter, fileroot: str):
        """Creates a new NgramDb from an NgramCounter.

        Parameters
        ----------
        ngram_counter: NgramCounter
            The NgramCounter from which to create the NgramDb.
        fileroot: str
            Path to the file of the new NgramCounter.

        Returns
        -------
        NgramDb

        """
        messages.new_display()
        copyfile(ngram_counter.db, fileroot + '.db')
        new_cls = cls(fileroot)
        new_cls.connect()
        new_cls._gen_info()
        new_cls.disconnect()
        new_cls._load_info()
        return new_cls