Ejemplo n.º 1
0
class BulkLoadChecker(Checker):
    """check bulk load operations in a dependent thread"""

    def __init__(self, collection_name=None, files=[]):
        if collection_name is None:
            collection_name = cf.gen_unique_str("BulkLoadChecker_")
        super().__init__(collection_name=collection_name)
        self.utility_wrap = ApiUtilityWrapper()
        self.schema = cf.gen_default_collection_schema()
        self.files = files
        self.row_based = True
        self.recheck_failed_task = False
        self.failed_tasks = []
        self.c_name = None

    def update(self, files=None, schema=None, row_based=None):
        if files is not None:
            self.files = files
        if schema is not None:
            self.schema = schema
        if row_based is not None:
            self.row_based = row_based

    @trace()
    def bulk_load(self):
        task_ids, result = self.utility_wrap.bulk_load(collection_name=self.c_name,
                                                       row_based=self.row_based,
                                                       files=self.files)
        completed, result = self.utility_wrap.wait_for_bulk_load_tasks_completed(task_ids=task_ids, timeout=30)
        return task_ids, completed

    @exception_handler()
    def run_task(self):
        if self.recheck_failed_task and self.failed_tasks:
            self.c_name = self.failed_tasks.pop(0)
            log.debug(f"check failed task: {self.c_name}")
        else:
            self.c_name = cf.gen_unique_str("BulkLoadChecker_")
        self.c_wrap.init_collection(name=self.c_name, schema=self.schema)
        # import data
        task_ids, completed = self.bulk_load()
        if not completed:
            self.failed_tasks.append(self.c_name)
        return task_ids, completed

    def keep_running(self):
        while self._keep_running:
            self.run_task()
            sleep(constants.WAIT_PER_OP / 10)
Ejemplo n.º 2
0
class BulkLoadChecker(Checker):
    """check bulk load operations in a dependent thread"""
    def __init__(self, flush=False):
        super().__init__()
        self.utility_wrap = ApiUtilityWrapper()
        self.schema = cf.gen_default_collection_schema()
        self.flush = flush
        self.files = ["bulk_load_data_source.json"]
        self.row_based = True
        self.recheck_failed_task = False
        self.failed_tasks = []

    def update(self, files=None, schema=None, row_based=None):
        if files is not None:
            self.files = files
        if schema is not None:
            self.schema = schema
        if row_based is not None:
            self.row_based = row_based

    def keep_running(self):
        while True:
            if self.recheck_failed_task and self.failed_tasks:
                c_name = self.failed_tasks.pop(0)
                log.info(f"check failed task: {c_name}")
            else:
                c_name = cf.gen_unique_str("BulkLoadChecker_")
            self.c_wrap.init_collection(name=c_name, schema=self.schema)
            if self.flush:
                t0 = time.time()
                pre_entities_num = self.c_wrap.num_entities
                tt = time.time() - t0
                log.info(f"flush before bulk load, cost time: {tt:.4f}")
            # import data
            t0 = time.time()
            task_ids, res_1 = self.utility_wrap.bulk_load(
                collection_name=c_name,
                row_based=self.row_based,
                files=self.files)
            log.info(f"bulk load task ids:{task_ids}")
            completed, res_2 = self.utility_wrap.wait_for_bulk_load_tasks_completed(
                task_ids=task_ids, timeout=30)
            tt = time.time() - t0
            # added_num = sum(res_2[task_id].row_count for task_id in task_ids)
            if completed:
                self.rsp_times.append(tt)
                self.average_time = (tt + self.average_time * self._succ) / (
                    self._succ + 1)
                self._succ += 1
                log.info(
                    f"bulk load success for collection {c_name}, time: {tt:.4f}, average_time: {self.average_time:4f}"
                )
                if self.flush:
                    t0 = time.time()
                    cur_entities_num = self.c_wrap.num_entities
                    tt = time.time() - t0
                    log.info(f"flush after bulk load, cost time: {tt:.4f}")
            else:
                self._fail += 1
                # if the task failed, store the failed collection name for further checking after chaos
                self.failed_tasks.append(c_name)
                log.info(
                    f"bulk load failed for collection {c_name} time: {tt:.4f}, average_time: {self.average_time:4f}"
                )
                sleep(constants.WAIT_PER_OP / 10)