예제 #1
0
    def load_data(self):
        self.logger.info('load_data start')
        start_time = time.time()

        db_session = get_db_session(self.region, self.provider,
                                    self.data_schema)

        # params = dict(entity_ids=self.entity_ids, provider=self.provider,
        #               columns=self.columns, start_timestamp=self.start_timestamp,
        #               end_timestamp=self.end_timestamp, filters=self.filters,
        #               order=self.order, limit=self.limit, level=self.level,
        #               index=[self.category_field, self.time_field],
        #               time_field=self.time_field)
        # self.logger.info(f'query_data params:{params}')

        # 转换成标准entity_id
        if self.entity_schema and not self.entity_ids:
            entities, column_names = get_entities(
                region=self.region,
                provider=self.provider,
                db_session=db_session,
                entity_schema=self.entity_schema,
                exchanges=self.exchanges,
                codes=self.codes,
                columns=[self.entity_schema.entity_id])

            if len(entities) > 0:
                self.entity_ids = [entity.entity_id for entity in entities]
            else:
                return

        data, column_names = self.data_schema.query_data(
            region=self.region,
            provider=self.provider,
            db_session=db_session,
            entity_ids=self.entity_ids,
            columns=self.columns,
            start_timestamp=self.start_timestamp,
            end_timestamp=self.end_timestamp,
            filters=self.filters,
            order=self.order,
            limit=self.limit,
            level=self.level,
            index=[self.category_field, self.time_field],
            time_field=self.time_field)

        if data and not self.columns:
            self.data_df = pd.DataFrame([s.__dict__ for s in data],
                                        columns=column_names)
        else:
            self.data_df = pd.DataFrame(data, columns=column_names)

        cost_time = time.time() - start_time
        self.logger.info(f'load_data finished, cost_time:{cost_time}')

        for listener in self.data_listeners:
            listener.on_data_loaded(self.data_df)
예제 #2
0
 async def init_entities(self, db_session):
     # init the entity list
     if not self.force_update:
         self.entities, column_names = get_entities(
             region=self.region,
             provider=self.provider,
             db_session=db_session,
             entity_type=EntityType.StockDetail,
             codes=self.codes,
             filters=[StockDetail.profile.is_(None)])
예제 #3
0
 async def init_entities(self, db_session):
     # init the entity list
     self.entities, column_names = get_entities(
         region=self.region,
         provider=self.provider,
         db_session=db_session,
         entity_schema=self.entity_schema,
         entity_type=self.entity_type,
         exchanges=self.exchanges,
         entity_ids=self.entity_ids,
         codes=self.codes)
예제 #4
0
def load_company_info(tickers=None):
    entity_schema = get_entity_schema_by_type(EntityType.StockDetail)
    db_session = get_db_session(Region.US, Provider.Yahoo, entity_schema)
    entities, column_names = get_entities(region=Region.US,
                                          provider=Provider.Yahoo,
                                          entity_schema=entity_schema,
                                          db_session=db_session,
                                          codes=tickers)

    df = pd.DataFrame([s.__dict__ for s in entities], columns=column_names)
    df.reset_index(drop=True, inplace=True)

    return df
 async def init_entities(self, db_session):
     self.entities, column_names = get_entities(
         region=self.region,
         provider=self.provider,
         db_session=db_session,
         entity_type=EntityType.Index,
         exchanges=self.exchanges,
         codes=self.codes,
         entity_ids=self.entity_ids,
         # 只抓概念和行业
         filters=[
             Index.category.in_([
                 BlockCategory.industry.value, BlockCategory.concept.value
             ])
         ])