def scrapy_crawl_abchina(): logger.info('scrapy_crawl_abchina()') date_str = str(datetime.now().date()) settings = { # 'LOG_FILE': 'logs/spider/abchina/{date}.log'.format(date=date_str), # 'LOG_LEVEL': 'DEBUG', } run_spider(AbchinaSpider, settings, None)
def scrapy_crawl_weibo_stars_info(): logger.info('scrapy_crawl_weibo_stars_info()') date_str = str(datetime.now().date()) settings = { # 'LOG_FILE': 'logs/spider/weibo_stars/{date}.log'.format(date=date_str), # 'LOG_LEVEL': 'DEBUG', } run_spider(WeiboStarsSpider, settings, None)
def scrapy_crawl_gyrx(): logger.info('scrapy_crawl_gyrx()') beijing_datetime_now = datetime.utcnow().replace( tzinfo=timezone.utc).astimezone(timezone(timedelta(hours=8))) beijing_yesterday_str = (beijing_datetime_now.date() - timedelta(days=1)).strftime('%Y%m%d') kwargs = { 'startTime': beijing_yesterday_str, 'endTime': beijing_yesterday_str, 'fundId': '485111', } run_spider(GyrxSpider, {}, kwargs)
def scrapy_crawl_yhfund(): logger.info('scrapy_crawl_yhfund()') date_str = str(datetime.now().date()) settings = { # 'LOG_FILE': 'logs/spider/yhfund/{date}.log'.format(date=date_str), # 'LOG_LEVEL': 'DEBUG', } beijing_datetime_now = datetime.utcnow().replace( tzinfo=timezone.utc).astimezone(timezone(timedelta(hours=8))) beijing_yesterday_str = (beijing_datetime_now.date() - timedelta(days=1)).strftime('%Y-%m-%d') kwargs = { 'start_date': beijing_yesterday_str, 'end_date': beijing_yesterday_str, 'fund_code': '000286', } run_spider(YhfundSpider, settings, kwargs)
def scrapy_crawl_cmfchina(): logger.info('scrapy_crawl_cmfchina()') beijing_datetime_now = datetime.utcnow().replace( tzinfo=timezone.utc).astimezone(timezone(timedelta(hours=8))) beijing_yesterday_str = (beijing_datetime_now.date() - timedelta(days=1)).strftime('%Y%m%d') # 抓取招商安心收益 kwargs = { 'startTime': beijing_yesterday_str, 'endTime': beijing_yesterday_str, 'fundId': '217011', } run_spider(CmfchinaSpider, {}, kwargs) # 抓取招商双债增强LOF kwargs['fundId'] = '161716' run_spider(CmfchinaSpider, {}, kwargs)
def bank_response_to_info(): logger.info('bank_response_to_info()') session = db.DBSession() last_bank_crawl_result_id = 1 last_bank_crawl_result = session.query(db.BankInfo).order_by(db.BankInfo.bank_crawl_result_id.desc()).first() if last_bank_crawl_result is not None: last_bank_crawl_result_id = last_bank_crawl_result.bank_crawl_result_id deal_bank_crawl_result_list = session.query(db.BankCrawlResult)\ .filter(db.BankCrawlResult.id > last_bank_crawl_result_id).all() for deal_bank_crawl_result in deal_bank_crawl_result_list: bank_crawl_response = deal_bank_crawl_result.response bank_crawl_response_json = json.loads(bank_crawl_response) table1_item_list = bank_crawl_response_json['Table1'] rmb_gold_customer_sell = None rmb_gold_update_beijing_time = None for table1_item in table1_item_list: if table1_item['ProdName'] == '人民币账户黄金': rmb_gold_customer_sell = table1_item['CustomerSell'] rmb_gold_update_beijing_time = table1_item['UpdateTime'] break if rmb_gold_customer_sell is None or rmb_gold_update_beijing_time is None: logger.error('Can not find 人民币账户黄金 from bank_crawl_result.response where bank_crawl_result.id = ' + deal_bank_crawl_result.id) continue new_bank_info = db.BankInfo() new_bank_info.bank_crawl_result_id = deal_bank_crawl_result.id new_bank_info.bank_name = deal_bank_crawl_result.bank_name new_bank_info.rmb_gold_customer_sell = decimal.Decimal(rmb_gold_customer_sell) new_bank_info.rmb_gold_update_beijing_time = datetime.strptime(rmb_gold_update_beijing_time, '%Y-%m-%d %H:%M:%S') session.add(new_bank_info) session.commit() session.close() return
def add(x, y): logger.info('add(%s, %s)', x, y) result = x + y return result
def collect(): logger.info('collect()') unreachable_count = gc.collect() logger.info('gc.collect()=' + str(unreachable_count))