Esempio n. 1
0
def multiprocess_run(func, deal_list, work_num: int = 0):
    if not work_num:
        work_num = cpu_count()
    work_num = min(work_num, 80)
    logger.info("generate {} worker pool for {}".format(work_num, func))
    pool = Pool(work_num)
    res = pool.map(func, deal_list)
    pool.close()
    return res
Esempio n. 2
0
 def query(self, hql):
     cursor = self.get_connection().cursor()
     logger.info("execute hql {}".format(hql))
     cursor.execute(hql)
     all_data = cursor.fetchall()
     request_id = set()
     for data in all_data:
         request_id.add(data[0])
     fields, fields_list = self.get_fields(cursor.description)
     cursor.close()
     return fields, fields_list, all_data
Esempio n. 3
0
 def get_connection(self) -> HiveServer2Connection:
     if self._connection is None:
         logger.info("begin connect hive addr {} and database is {}".format(
             self._hive_config.host, self._hive_config.database))
         kinit()
         self._connection = connect(
             host=self._hive_config.host,
             port=10000,
             auth_mechanism='GSSAPI',
             kerberos_service_name=self._hive_config.username,
             database=self._hive_config.database)
     return self._connection
Esempio n. 4
0
def multiprocess_exe(func, deal_list, work_num: int = 0):
    if not work_num:
        work_num = cpu_count()
    process_list = []
    deal_list = chunk(deal_list, work_num)
    logger.info("generate {} worker pool for {}".format(work_num, func))
    for i in range(work_num):
        process_list.append(Process(target=func, args=(deal_list[i],)))
    for process in process_list:
        process.start()
    for process in process_list:
        process.join()
Esempio n. 5
0
def download_ceph_image(date, bucket, face_ids, img_dir):
    for face_id in face_ids:
        img_path = "{}/{}.jpg".format(img_dir, face_id)
        if os.path.exists(img_path):
            continue
        try:
            res = bucket.get_key(face_id)
            img = res.get_contents_as_string()
            with open(img_path, "wb") as f:
                f.write(img)
        except Exception as e:
            logger.info("face_id {}: {}".format(face_id, e))
Esempio n. 6
0
def multithread_run(func, deal_list, work_num: int = 0, max_execute_time=10):
    if not work_num:
        work_num = cpu_count()
    work_num = min(work_num, 200)
    logger.info("generate {} thread worker pool for {}".format(work_num, func))
    res = []
    with concurrent.futures.ThreadPoolExecutor(max_workers=work_num) as executor:
        thread_tasks = {executor.submit(func, *params): params for params in deal_list}
        for task in concurrent.futures.as_completed(thread_tasks):
            try:
                data = task.result(timeout=max_execute_time)
                res.append(data)
            except Exception as exc:
                logger.error('generated an exception: {}'.format(exc))
    return res
Esempio n. 7
0
 def query_and_write(self, hql, local_file, chunk_size=100000):
     """ 分块查询结果并将其写入文件 """
     cursor = self.get_connection().cursor()
     logger.info("execute hql {}".format(hql))
     with open(local_file, "w+") as f:
         cursor.execute(hql)
         columns = [
             column[0].split('.')[-1] for column in cursor.description
         ]
         while True:
             chunk_data = cursor.fetchmany(chunk_size)
             if not chunk_data:
                 break
             for data in chunk_data:
                 f.write(json.dumps(dict(zip(columns, data))) + "\n")
     cursor.close()
     return local_file
Esempio n. 8
0
 def wait(self):
     logger.info("wait process finish")
     if self.res:
         self.res.get()
     if self.pool:
         self.pool.close()
Esempio n. 9
0
 def deal(self):
     logger.info("generate {} worker pool for {}".format(self.work_num, self.func))
     self.res = self.pool.starmap_async(self.func, self.params)