Exemple #1
0
    def syntax_filtration(self, source_path, target_path, fail_path,
                          n_threads):
        source_db_op = DBOperation(source_path)
        target_db_op = DBOperation(target_path)
        target_db_op.init_db()
        fail_db_op = DBOperation(fail_path)
        fail_db_op.init_db()
        contents = source_db_op.query_all(['Content'])
        caseNum = len(contents)
        contents.clear()
        batch_size = 1000
        batch_num = math.ceil(caseNum / batch_size)

        for x in range(0, batch_num):
            print("\n*****SynFlt_Batch" + str(x) + " start")
            if x < (batch_num - 1):
                contents = source_db_op.query_part(['Content'],
                                                   (x * batch_size + 1),
                                                   (x + 1) * batch_size)
            else:
                contents = source_db_op.query_part(['Content'],
                                                   (x * batch_size), caseNum)
            processor = SyntaxFiltration(contents, target_path, fail_path,
                                         n_threads)
            processor.execute()
Exemple #2
0
def db2list(db_path: str) -> typing.List[str]:
    """从单个数据库文件中读取数据
    """
    source_db_op = DBOperation(db_path)
    contents = source_db_op.query_all(['Content'])
    source_db_op.finalize()
    contents = [i[0].strip() for i in contents]
    return contents
Exemple #3
0
 def readFunctions(self):
     db_path = self.hparams.corpus_db
     target_db = DBOperation(db_path)
     contents = target_db.query_all(['Content'])
     return contents