def main(fd):
    engine = get_db_engine_for_pandas()
    total_page = int(get_page_num(get_suspended_url(fd=fd))['pages'])
    loop = asyncio.get_event_loop()
    tasks = [
        loop.create_task(parse(get_suspended_url(fd=fd, page=p), engine))
        for p in range(1, total_page + 1)
    ]
    loop.run_until_complete(asyncio.wait(tasks))
def run():
    try:
        url = get_concept_url(psize=1, page=2)
        tc = json.loads(requests_get(url))['TotalCount']
        p_num = 100
        page_data = tuple(range(1, math.ceil(tc / p_num) + 1))

        engine = get_db_engine_for_pandas()
        spider = ConceptDown(page_data, p_num, engine)
        spider.run()
    except Exception as e:
        print(e)
Exemple #3
0
def main(year, month):
    engine = get_db_engine_for_pandas()
    report_date = time_last_day_of_month(year=year, month=month)
    total_page = int(
        get_page_num(
            get_cashflow_url(report_date=report_date))['result']['pages'])
    loop = asyncio.get_event_loop()
    tasks = [
        loop.create_task(
            parse(get_cashflow_url(report_date=report_date, page=p), engine))
        for p in range(1, total_page + 1)
    ]
    loop.run_until_complete(asyncio.wait(tasks))
def run(date, pagesize=100):
    try:
        psize = 1
        url = get_hsgt_detail_url(psize=psize, page=1, date=date)
        tc = json.loads(requests_get(url))['pages']

        p_num = pagesize
        page_data = tuple(range(1, math.ceil(tc / p_num) + 1))
        engine = get_db_engine_for_pandas()
        spider = HSGTDetailDown(page_data, p_num, date, engine)
        spider.run()
    except Exception as e:
        except_handle(e)
def main(page_size=100):
    engine = get_db_engine_for_pandas()
    total_page = math.ceil(
        get_page_num(
            get_trade_date_detail_url(psize=page_size))['data']['total'] /
        page_size)
    loop = asyncio.get_event_loop()
    tasks = [
        loop.create_task(
            parse(get_trade_date_detail_url(psize=page_size, page=p), engine))
        for p in range(1, total_page + 1)
    ]
    loop.run_until_complete(asyncio.wait(tasks))
Exemple #6
0
    def scheduler(self):
        '''
        调度函数
        :return:
        '''
        engine = get_db_engine_for_pandas()
        column = self.columns()
        if len(self._page_list):
            for page_num in self._page_list:
                url = get_notice_url(page=page_num,
                                     notice_type=self._type,
                                     date=self._time)

                try:
                    notice = requests_get(url)
                    notice = notice.replace('var  = ', '')
                    notices = notice.rstrip(';')
                    data_all = json.loads(notices)

                    if len(data_all['data']):

                        insert_values = []
                        for row in data_all['data']:
                            tmp_data = []
                            tmp_data.append(row['codes'][0]['stock_code'])
                            tmp_data.append(row['codes'][0]['short_name'])
                            tmp_data.append(row['notice_date'][:10].replace(
                                '-', ''))
                            tmp_data.append(row['columns'][0]['column_code'])
                            tmp_data.append(row['columns'][0]['column_name'])
                            tmp_data.append(row['art_code'])
                            tmp_data.append(row['title'])
                            insert_values.append(tmp_data)

                        df = pd.DataFrame(insert_values, columns=column)

                        df.set_index('notice_date', inplace=True)
                        df.to_sql(name='s_notices',
                                  con=engine,
                                  if_exists='append')
                    print('第{}下载完成'.format(page_num))

                except Exception as e:
                    except_handle(e)
def main(begin_date, end_date):
    engine = get_db_engine_for_pandas()
    total_page = int(
        get_page_num(
            get_unlocked_url(
                begin_date=begin_date,
                end_date=end_date))['pages'])
    loop = asyncio.get_event_loop()
    tasks = [
        loop.create_task(
            parse(
                get_unlocked_url(
                    begin_date=begin_date,
                    end_date=end_date,
                    page=p),
                engine)) for p in range(
            1,
            total_page +
            1)]
    loop.run_until_complete(asyncio.wait(tasks))