def get_news_item(start_date, end_date): # 从最新到最远获取新闻 reverse_year = int(end_date[0:4]) reverse_month = int(end_date[4:6]) reverse_day = int(end_date[6:8]) reverse_pattern = (reverse_year, reverse_month, reverse_day, 23, 59, 59, 99, 99, 99) reverse_cursor = CommonUtil.convert_date_to_long(reverse_pattern) logger.info("reverseCursor is %s" % reverse_cursor) finished_year = int(start_date[0:4]) finished_month = int(start_date[4:6]) finished_day = int(start_date[6:8]) finished_pattern = (finished_year, finished_month, finished_day, 0, 0, 0, 0, 0, 0) finished_cursor = CommonUtil.convert_date_to_long(finished_pattern) logger.info("finishedCursor is %s" % finished_cursor) # 需要爬数据的网址 url_pattern = 'https://api-prod.wallstreetcn.com/apiv1/content/lives?' \ 'channel=weex-channel,gold-channel,gold-forex-channel,' \ 'forex-channel,goldc-channel,oil-channel&client=pc' news_limit = 100 cursor = reverse_cursor file_content = '' # 页面计数器 page_num = 0 # 循环开始 while int(cursor) > int(finished_cursor): page_num += 1 url = url_pattern + "&cursor=" + str(cursor) + "&" + str(news_limit) logger.info(url) page = requests.Session().get(url) page.encoding = 'utf-8' if page.status_code == 200: data_all = json.loads(page.text) res_data = data_all['data'] data_items = res_data['items'] cursor = res_data['next_cursor'] for item_i in range(len(data_items)): display_time = data_items[item_i]['display_time'] context_text = data_items[item_i]['content_text'] context = context_text.strip().replace('\n', '') context = context.replace('\r', '') time = CommonUtil.convert_long_to_date(display_time) file_content = file_content + time + "," + context + "\n" # print(item_i+1, ": " , time, ", ", context_text) CommonUtil.save_to_file( '../files/wallstreetcn_%s_%s.csv' % (start_date, end_date), file_content) file_content = '' # 无下一页数据时退出循环 if cursor == '': break logger.info("Finished With %s Pages Crawled." % page_num)
def get_market_data(i_type, i_count, s_data_type): url_pattern = 'https://forexdata.wallstreetcn.com/kline?prod_code=' + s_data_type + \ '&candle_period=' + str(i_type) + \ '&fields=time_stamp,open_px,close_px,high_px,low_px,ma5,ma10,ma20,ma60,upper,mid,lower,diff,dea,' \ 'macd,k,d,j,rsi6,rsi12,rsi24&data_count=' + str(i_count) # 需要爬数据的网址 logger.info(url_pattern) page = requests.Session().get(url_pattern) page.encoding = 'utf-8' file_content = '' if page.status_code == 200: data_all = json.loads(page.text) res_data = data_all['data'] candle_data = res_data['candle'] # 处理标题 data_fields = candle_data['fields'] for item_i in range(len(data_fields)): file_content += data_fields[item_i] + ',' file_content += '\n' # 处理数据 data_list = candle_data[s_data_type] for item_i in range(len(data_list)): data_items = data_list[item_i] data_item = '' for item_j in range(len(data_items)): # 日期格式转换 if item_j == 0: data_item += CommonUtil.convert_long_to_date( data_items[item_j]) + ',' else: data_item += str(data_items[item_j]) + ',' file_content += data_item + '\n' logger.info("Finished With %s Items Crawled." % (len(data_list))) CommonUtil.save_to_file(MARKET_DATA_PATH + '/%s.csv' % s_data_type, file_content) else: logger.warning("Response Code is %s, Please Check!" % page.status_code)