def _download_sheet(stock_code, sheet_type, term_type, sheet_part): detail = 'download finance_sheet(sheet_type[{}], term_type[{}], sheet_part[{}]) failed.'.format( sheet_type, term_type, sheet_part) try: resp = requests.get(_download_url(stock_code, sheet_type, term_type, sheet_part), stream=True) if resp is not None and resp.ok: lines = [ line.strip() for line in resp.content.decode('gbk').replace( '\r\n', '\n').strip().split('\n') ] lines = [ line if line[-1] != ',' else line[0:-1] for line in lines if line != '' ] lines.append('') with codecs.open( finance_sheet_file_path(stock_code, sheet_type, term_type, sheet_part), 'w', 'utf-8') as f: f.write('\n'.join(lines)) else: raise error.ServerException(error.SERVER_ERR_DOWNLOAD_FAILED, detail) except error.ServerException as e: logger.error('{}.'.format(detail)) raise e except Exception as e: raise error.ServerException( error.SERVER_ERR_DOWNLOAD_FAILED, '{}.{}'.format(detail, error.exception_string(e)))
def create_directory(directory_path): if not os.path.exists(directory_path): os.makedirs(directory_path) elif os.path.isdir(directory_path): return else: raise error.ServerException('path[{}] exists and is not directory'.format(directory_path))
def clean_directory(directory_path): if not os.path.exists(directory_path): os.makedirs(directory_path) elif os.path.isdir(directory_path): for path in os.listdir(directory_path): remove_path(os.path.join(directory_path, path)) else: raise error.ServerException('path[{}] is not directory'.format(directory_path))
def remove_path(path): if os.path.exists(path): if os.path.isdir(path): shutil.rmtree(path) elif os.path.isfile(path): os.remove(path) else: raise error.ServerException('nonsupport type for remove path[{}]'.format(path)) else: return
def download_stock_basics(): utils.clean_directory(BASIC_DIR) try: df = ts.get_stock_basics() utils.df2csv(df, _stock_basics_file_path) except Exception as e: logger.error('download_categories failed. {}'.format( error.exception_string(e))) utils.clean_directory(BASIC_DIR) raise error.ServerException(error.SERVER_ERR_DOWNLOAD_FAILED, error.exception_string(e))
def download_forecast(year, quarter): try: df = ts.forecast_data(year, quarter) df.set_index('code', inplace=True) utils.df2csv(df, _forecast_file_path(year, quarter)) except Exception as e: logger.error( 'download_forecast(year[{}], quarter[{}]) failed.{}'.format( year, quarter, error.exception_string(e))) raise error.ServerException(error.SERVER_ERR_DOWNLOAD_FAILED, error.exception_string(e))
def download_categories(): utils.clean_directory(CATEGORY_DIR) try: for category_method, (func_name,) in _all_categories.items(): try: df = getattr(ts, func_name)() df.set_index('code', inplace=True) utils.df2csv(df, _category_file_path(category_method)) except Exception as e: logger.error('download_categories[{}] failed. {}'.format(category_method, error.exception_string(e))) raise e except Exception as e: utils.clean_directory(CATEGORY_DIR) raise error.ServerException(error.SERVER_ERR_DOWNLOAD_FAILED, error.exception_string(e))
def _get_latest_report_html(report_type, page_no, start_time=None, end_time=None): to_notice_type = {1: '010305', 2: '010303', 3: '010307', 4: '010301'} cur_dt = utils.current_datetime() today = '%d-%02d-%02d' % (cur_dt.year, cur_dt.month, cur_dt.day) url = 'http://www.cninfo.com.cn/search/search.jsp' data = {'orderby': 'date11', 'marketType': '', 'stockCode': '', 'keyword': '', 'noticeType': to_notice_type[report_type], 'pageNo': page_no, 'startTime': today if start_time is None else start_time, 'endTime': today if end_time is None else end_time} resp = requests.post(url, data) if resp is not None and resp.ok: return resp.content.decode('gbk') else: raise error.ServerException(error.SERVER_ERR_DOWNLOAD_FAILED, 'download html from {} failed'.format(url))
def crawl(): with _crawl_lock: if CrawlSummary().crawling: raise error.ServerException(error.SERVER_ERR_OP_CONCURRENT) else: CrawlSummary().crawling = True if CrawlSummary().full_time is None: CrawlSummary().full_start() if CrawlSummary().full_complete: if CrawlSummary().increment_time is None: CrawlSummary().increment_start() crawl_increment() else: crawl_full()
def finance_sheet_file_path(stock_code, sheet_type, term_type, sheet_part): # check param if sheet_type not in sheet_types or term_type not in sheet_types[sheet_type]['terms'] or \ sheet_part not in sheet_types[sheet_type]['parts']: raise error.ServerException( error.SERVER_ERR_INTERNAL, 'wrong param: stock_code[{}], sheet_type[{}], term_type[{}], sheet_part[{}]' .format(stock_code, sheet_type, term_type, sheet_part)) sheet_directory = finance_sheet_directory(stock_code) if sheet_part == '': return os.path.join( sheet_directory, '{}_{}_{}.csv'.format(sheet_type, term_type, stock_code)) else: return os.path.join( sheet_directory, '{}_{}_{}_{}.csv'.format(sheet_type, sheet_part, term_type, stock_code))
def raise_exception(request, e, content=None): from jsonrpc.exceptions import ServerError import traceback if isinstance(e, error.ServerException): raise_e = e else: raise_e = error.ServerException(error.SERVER_ERR_INTERNAL, '{}\n{}'.format(e, traceback.format_exc())) err_msg = 'download error code: {}, msg: {}'.format(raise_e.err_code, raise_e.err_msg) # log err if content is not None: logger.error('{} failed. {}'.format(content, err_msg)) # raise exception if request is not None: raise ServerError(err_msg) else: raise raise_e
def crawler_object(code): try: return CrawlerModel.objects.get(code=code) except ObjectDoesNotExist as e: raise error.ServerException(error.SERVER_ERR_OBJECT_NOT_EXIST, error.exception_string(e))