def QA_SU_save_financial_files(): """本地存储financialdata """ download_financialzip() coll = DATABASE.financial coll.create_index([("code", ASCENDING), ("report_date", ASCENDING)], unique=True) for item in os.listdir(download_path): if item[0:4] != 'gpcw': print( "file ", item, " is not start with gpcw , seems not a financial file , ignore!" ) continue date = int(item.split('.')[0][-8:]) print('QUANTAXIS NOW SAVING {}'.format(date)) if coll.find({'report_date': date}).count() < 100: print(coll.find({'report_date': date}).count()) data = QA_util_to_json_from_pandas( parse_filelist([item]).reset_index().drop_duplicates( subset=['code', 'report_date']).sort_index()) try: coll.insert_many(data, ordered=False) except Exception as e: if isinstance(e, MemoryError): coll.insert_many(data, ordered=True) elif isinstance(e, pymongo.bulk.BulkWriteError): pass else: print('ALL READY IN DATABASE') print('SUCCESSFULLY SAVE/UPDATE FINANCIAL DATA')
def QA_SU_save_financial_files(): """本地存储financialdata """ download_financialzip() coll = DATABASE.financial coll.create_index( [("code", ASCENDING), ("report_date", ASCENDING)], unique=True) for item in os.listdir(download_path): if item[0:4] != 'gpcw': print( "file ", item, " is not start with gpcw , seems not a financial file , ignore!") continue date = int(item.split('.')[0][-8:]) print('QUANTAXIS NOW SAVING {}'.format(date)) if coll.find({'report_date': date}).count() < 3600: print(coll.find({'report_date': date}).count()) data = QA_util_to_json_from_pandas(parse_filelist([item]).reset_index( ).drop_duplicates(subset=['code', 'report_date']).sort_index()) # data["crawl_date"] = str(datetime.date.today()) try: coll.insert_many(data, ordered=False) except Exception as e: if isinstance(e, MemoryError): coll.insert_many(data, ordered=True) elif isinstance(e, pymongo.bulk.BulkWriteError): pass else: print('ALL READY IN DATABASE') print('SUCCESSFULLY SAVE/UPDATE FINANCIAL DATA')
def QA_SU_save_financial_files(fromtdx=False): """本地存储financialdata """ if (fromtdx): download_financialzip_fromtdx() else: download_financialzip() coll = DATABASE.financial coll.create_index([("code", ASCENDING), ("report_date", ASCENDING)], unique=True) for item in os.listdir(download_path): if item[0:4] != 'gpcw': print( "file ", item, " is not start with gpcw , seems not a financial file , ignore!" ) continue date = int(item.split('.')[0][-8:]) print('QUANTAXIS NOW SAVING {}'.format(date)) print('在数据库中的条数 {}'.format(coll.find({'report_date': date}).count())) try: data = QA_util_to_json_from_pandas( parse_filelist([item]).reset_index().drop_duplicates( subset=['code', 'report_date']).sort_index()) print('即将更新的条数 {}'.format(len(data))) # data["crawl_date"] = str(datetime.date.today()) try: for d in data: coll.update_one( { 'code': d['code'], 'report_date': d['report_date'] }, {'$set': d}, upsert=True) except Exception as e: if isinstance(e, MemoryError): coll.insert_many(data, ordered=True) elif isinstance(e, pymongo.bulk.BulkWriteError): pass except Exception as e: print('似乎没有数据') print('SUCCESSFULLY SAVE/UPDATE FINANCIAL DATA')