Esempio n. 1
0
    async def run(self):
        db_session = get_db_session(self.region, self.provider, self.data_schema)
        kafka_producer = connect_kafka_producer(findy_config['kafka'])

        if not hasattr(self, 'entities'):
            self.entities: List = None
            await self.init_entities(db_session)

        if self.entities and len(self.entities) > 0:
            http_session = get_async_http_session()
            throttler = asyncio.Semaphore(self.share_para[0])

            (taskid, desc) = self.share_para[1]
            data = {"task": taskid, "total": len(self.entities), "desc": desc, "leave": True, "update": 0}
            publish_message(kafka_producer, progress_topic, bytes(progress_key, encoding='utf-8'), bytes(json.dumps(data), encoding='utf-8'))

            # tasks = [asyncio.ensure_future(self.process_loop(entity, http_session, db_session, throttler)) for entity in self.entities]
            tasks = [self.process_loop(entity, http_session, db_session, kafka_producer, throttler) for entity in self.entities]

            # for result in asyncio.as_completed(tasks):
            #     await result

            [await _ for _ in asyncio.as_completed(tasks)]

            await self.on_finish()

            return await http_session.close()
Esempio n. 2
0
    async def on_finish(self):
        desc = DividendFinancing.__name__ + ": update relevant table"
        db_session = get_db_session(self.region, self.provider, DividendFinancing)
        kafka_producer = connect_kafka_producer(findy_config['kafka'])

        for entity in self.entities:
            code_security = {}
            code_security[entity.code] = entity

            need_fill_items, column_names = DividendFinancing.query_data(
                region=self.region,
                provider=self.provider,
                db_session=db_session,
                codes=list(code_security.keys()),
                filters=[
                    DividendFinancing.ipo_raising_fund.is_(None),
                    DividendFinancing.ipo_issues != 0])

            if need_fill_items and len(need_fill_items) > 0:
                for need_fill_item in need_fill_items:
                    need_fill_item.ipo_raising_fund = code_security[entity.code].raising_fund

            data = {"task": 'div', "total": len(self.entities), "desc": desc, "leave": True, "update": 1}
            publish_message(kafka_producer, progress_topic, bytes(progress_key, encoding='utf-8'), bytes(json.dumps(data), encoding='utf-8'))

        try:
            db_session.commit()
        except Exception as e:
            self.logger.error(f'{self.__class__.__name__}, error: {e}')
            db_session.rollback()

        await super().on_finish()
    async def on_finish(self):
        last_year = str(now_pd_timestamp(self.region).year)
        codes = [item.code for item in self.entities]

        db_session = get_db_session(self.region, self.provider,
                                    DividendFinancing)

        need_filleds, column_names = DividendFinancing.query_data(
            region=self.region,
            provider=self.provider,
            db_session=db_session,
            codes=codes,
            end_timestamp=last_year,
            filters=[DividendFinancing.rights_raising_fund.is_(None)])

        if need_filleds:
            desc = self.data_schema.__name__ + ": update relevant table"

            db_session_1 = get_db_session(self.region, self.provider,
                                          self.data_schema)
            kafka_producer = connect_kafka_producer(findy_config['kafka'])

            for item in need_filleds:
                result, column_names = self.data_schema.query_data(
                    region=self.region,
                    provider=self.provider,
                    db_session=db_session_1,
                    entity_id=item.entity_id,
                    start_timestamp=item.timestamp,
                    end_timestamp=f"{item.timestamp.year}-12-31",
                    func=func.sum(self.data_schema.rights_raising_fund))

                if isinstance(result, (int, float)):
                    item.rights_raising_fund = result

                data = {
                    "task": 'rig',
                    "total": len(need_filleds),
                    "desc": desc,
                    "leave": True,
                    "update": 1
                }
                publish_message(kafka_producer, progress_topic,
                                bytes(progress_key, encoding='utf-8'),
                                bytes(json.dumps(data), encoding='utf-8'))

            try:
                db_session.commit()
            except Exception as e:
                self.logger.error(f'{self.__class__.__name__}, error: {e}')
                db_session.rollback()

        await super().on_finish()
Esempio n. 4
0
def fetching(region: Region):
    pbar = ProgressBarProcess()
    pbar.start()

    print("waiting for kafka connection.....")
    time.sleep(5)

    asyncio.run(fetch_data(region))

    kafka_producer = connect_kafka_producer(findy_config['kafka'])
    data = {"command": "@end"}
    publish_message(kafka_producer, progress_topic, bytes(progress_key, encoding='utf-8'), bytes(json.dumps(data), encoding='utf-8'))

    pbar.join()
Esempio n. 5
0
async def fetch_data(region: Region):
    print("")
    print("*" * 80)
    print(f"*    Start Fetching {region.value.upper()} Stock information...      {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print("*" * 80)

    if region == Region.CHN:
        task_set = task_set_chn
    elif region == Region.US:
        task_set = task_set_us
    else:
        task_set = []

    print("")
    print("parallel fetching processing...")
    print("")

    kafka_producer = connect_kafka_producer(findy_config['kafka'])

    data = {"task": "main", "total": len(task_set), "desc": "Total Jobs", "position": 0, "leave": True, "update": 0}
    publish_message(kafka_producer,
                    progress_topic,
                    bytes(progress_key, encoding='utf-8'),
                    bytes(json.dumps(data), encoding='utf-8'))

    cache = get_cache('cache')
    calls_list = []

    for index, item in enumerate(task_set):
        if not valid(region, item[Para.FunName.value].__name__, item[Para.Cache.value], cache):
            if item[Para.Mode.value] == RunMode.Serial:
                item[Para.Desc.value] = (index + 2, item[Para.Desc.value])
            else:
                item[Para.Desc.value] = (index + 2, item[Para.Desc.value])

            calls_list.append((region, item))

    # calls_list = [(region, item) for item in task_set if not valid(region, item[Para.FunName.value].__name__, item[Para.Cache.value], cache)]

    Multi = True
    if Multi:
        pool_tasks = []
        tasks = len(calls_list)
        cpus = min(tasks, os.cpu_count())
        childconcurrency = round(tasks / cpus)

        current_os = platform.system().lower()
        if current_os != "windows":
            import uvloop
            loop_initializer = uvloop.new_event_loop
        else:
            loop_initializer = None

        async with amp.Pool(cpus, childconcurrency=childconcurrency, loop_initializer=loop_initializer) as pool:
            for call in calls_list:
                if call[1][Para.Mode.value] == RunMode.Serial:
                    result = await loop_task_set(call)

                    publish_message(kafka_producer,
                                    progress_topic,
                                    bytes(progress_key, encoding='utf-8'),
                                    bytes(json.dumps({"command": "@task-finish", "task": result[Para.Desc.value][0]}), encoding='utf-8'))

                    data['update'] = 1
                    publish_message(kafka_producer,
                                    progress_topic,
                                    bytes(progress_key, encoding='utf-8'),
                                    bytes(json.dumps(data), encoding='utf-8'))

                    # cache.update({f"{region.value}_{result[Para.FunName.value].__name__}": datetime.now()})
                    # dump_cache('cache', cache)
                else:
                    pool_tasks.append(pool.apply(loop_task_set, args=[call]))

            for task in asyncio.as_completed(pool_tasks):
                result = await task

                publish_message(kafka_producer,
                                progress_topic,
                                bytes(progress_key, encoding='utf-8'),
                                bytes(json.dumps({"command": "@task-finish", "task": result[Para.Desc.value][0]}), encoding='utf-8'))

                data['update'] = 1
                publish_message(kafka_producer,
                                progress_topic,
                                bytes(progress_key, encoding='utf-8'),
                                bytes(json.dumps(data), encoding='utf-8'))

                # cache.update({f"{region.value}_{result[Para.FunName.value].__name__}": datetime.now()})
                # dump_cache('cache', cache)

    else:
        for call in calls_list:
            result = await loop_task_set(call)

            publish_message(kafka_producer,
                            progress_topic,
                            bytes(progress_key, encoding='utf-8'),
                            bytes(json.dumps({"command": "@task-finish", "task": result[Para.Desc.value][0]}), encoding='utf-8'))

            data['update'] = 1
            publish_message(kafka_producer,
                            progress_topic,
                            bytes(progress_key, encoding='utf-8'),
                            bytes(json.dumps(data), encoding='utf-8'))