def main(fd): engine = get_db_engine_for_pandas() total_page = int(get_page_num(get_suspended_url(fd=fd))['pages']) loop = asyncio.get_event_loop() tasks = [ loop.create_task(parse(get_suspended_url(fd=fd, page=p), engine)) for p in range(1, total_page + 1) ] loop.run_until_complete(asyncio.wait(tasks))
def run(): try: url = get_concept_url(psize=1, page=2) tc = json.loads(requests_get(url))['TotalCount'] p_num = 100 page_data = tuple(range(1, math.ceil(tc / p_num) + 1)) engine = get_db_engine_for_pandas() spider = ConceptDown(page_data, p_num, engine) spider.run() except Exception as e: print(e)
def main(year, month): engine = get_db_engine_for_pandas() report_date = time_last_day_of_month(year=year, month=month) total_page = int( get_page_num( get_cashflow_url(report_date=report_date))['result']['pages']) loop = asyncio.get_event_loop() tasks = [ loop.create_task( parse(get_cashflow_url(report_date=report_date, page=p), engine)) for p in range(1, total_page + 1) ] loop.run_until_complete(asyncio.wait(tasks))
def run(date, pagesize=100): try: psize = 1 url = get_hsgt_detail_url(psize=psize, page=1, date=date) tc = json.loads(requests_get(url))['pages'] p_num = pagesize page_data = tuple(range(1, math.ceil(tc / p_num) + 1)) engine = get_db_engine_for_pandas() spider = HSGTDetailDown(page_data, p_num, date, engine) spider.run() except Exception as e: except_handle(e)
def main(page_size=100): engine = get_db_engine_for_pandas() total_page = math.ceil( get_page_num( get_trade_date_detail_url(psize=page_size))['data']['total'] / page_size) loop = asyncio.get_event_loop() tasks = [ loop.create_task( parse(get_trade_date_detail_url(psize=page_size, page=p), engine)) for p in range(1, total_page + 1) ] loop.run_until_complete(asyncio.wait(tasks))
def scheduler(self): ''' 调度函数 :return: ''' engine = get_db_engine_for_pandas() column = self.columns() if len(self._page_list): for page_num in self._page_list: url = get_notice_url(page=page_num, notice_type=self._type, date=self._time) try: notice = requests_get(url) notice = notice.replace('var = ', '') notices = notice.rstrip(';') data_all = json.loads(notices) if len(data_all['data']): insert_values = [] for row in data_all['data']: tmp_data = [] tmp_data.append(row['codes'][0]['stock_code']) tmp_data.append(row['codes'][0]['short_name']) tmp_data.append(row['notice_date'][:10].replace( '-', '')) tmp_data.append(row['columns'][0]['column_code']) tmp_data.append(row['columns'][0]['column_name']) tmp_data.append(row['art_code']) tmp_data.append(row['title']) insert_values.append(tmp_data) df = pd.DataFrame(insert_values, columns=column) df.set_index('notice_date', inplace=True) df.to_sql(name='s_notices', con=engine, if_exists='append') print('第{}下载完成'.format(page_num)) except Exception as e: except_handle(e)
def main(begin_date, end_date): engine = get_db_engine_for_pandas() total_page = int( get_page_num( get_unlocked_url( begin_date=begin_date, end_date=end_date))['pages']) loop = asyncio.get_event_loop() tasks = [ loop.create_task( parse( get_unlocked_url( begin_date=begin_date, end_date=end_date, page=p), engine)) for p in range( 1, total_page + 1)] loop.run_until_complete(asyncio.wait(tasks))