def TestA(): df1 = util.LoadData('stock_signal', '2019-12-21', condition={'操作': 1}, sort=[('百分比', -1)]) df2 = util.LoadData('stock_statistcs', 'dvYears', condition={}, sort=[('百分比', -1)]) mergeData = df1.join(df2, how='left', rsuffix='right') codes = [] for code, row in mergeData.iterrows(): codes.append(code) df3 = util.LoadData2('stock_backtest', 'all_dv1_digest', codes) df4 = util.LoadData2('stock_statistcs', 'quarterSpeed', codes) df5 = util.LoadData2('stock', 'stock_list', codes) df6 = util.LastPriceNone(codes) df5 = df5[['所属行业', '地区', '总股本(亿)']] mergeData = mergeData.join(df3, how='left', rsuffix='right2') mergeData = mergeData.join(df4, how='left', rsuffix='right3') mergeData = mergeData.join(df5, how='left', rsuffix='right4') mergeData = mergeData.join(df6, how='left', rsuffix='right5') mergeData['总市值(亿)'] = mergeData['price'] * mergeData['总股本(亿)'] # mergeData.eval('总市值 = price * 总股本(亿)', inplace=True) mergeData.to_excel("c:/workspace/tmp/1224.xlsx") return mergeData
def Run(codes): out = [] for one in codes: try: baseCounter = 0 hitCounter = 0 firstQuarter = None lastQuarter = None baseCounter2010 = 0 hitCounter2010 = 0 firstQuarter2010 = None lastQuarter2010 = None df = util.LoadData('stock', 'yjbg2-' + one['_id'], condition={}, sort=[('_id', 1)]) for quarter, row in df.iterrows(): id = datetime.strptime(quarter, '%Y-%m-%d') value = util.String2Number(row['sjltz']) if not np.isnan(value): if firstQuarter is None: firstQuarter = id lastQuarter = id baseCounter += 1 if id.year >= 2010: if firstQuarter2010 is None: firstQuarter2010 = id lastQuarter2010 = id baseCounter2010 += 1 if value < -10: hitCounter += 1 if id.year >= 2010: hitCounter2010 += 1 percent = 0 percent2010 = 0 if baseCounter > 0: percent = hitCounter / baseCounter if baseCounter2010 > 0: percent2010 = hitCounter2010 / baseCounter2010 out.append({ '_id': one['_id'], 'begin': firstQuarter, 'end': lastQuarter, 'base': baseCounter, 'hit': hitCounter, 'percent': percent, 'begin2010': firstQuarter2010, 'end2010': lastQuarter2010, 'base2010': baseCounter2010, 'hit2010': hitCounter2010, 'percent2010': percent2010 }) except Exception as e: util.PrintException(e) dfOut = pd.DataFrame(out) util.SaveMongoDB_DF(dfOut, 'stock_statistics2', 'dangerousQuarterRatio')
def mirs(args): parentFolder = args.dir if args.dir[-1] == "/" else args.dir + "/" primInfo, aInfo, rmInfo = util.LoadData(parentFolder, False) myRetriever = retriever.Retriever( primInfo, args.sortAlg, args.amountToShow, args.verbose, aInfo, rmInfo ) if args.topo is not None: myRetriever.PrintTopo(args.topo, args.regex, args.regexneg) if args.queryTokens != []: myRetriever.PrintSelect(args.queryTokens)
def __init__(self, parentFolder, fixedEncodeFilename, verbose, showToken): super().__init__(parentFolder, fixedEncodeFilename, verbose, showToken) princ = util.LoadData(parentFolder, True) if princ is None: print("Indexamento principal nao encontrado. Abortando...") exit(1) self.mainFiles = princ[util.LISTFILES] encs = princ[util.ENCODINGS] self.mainFilesModified = [ encs[i]["modificado"] for i in range(len(self.mainFiles)) ]
def LastSignal2File(collectionName, path): df = util.LoadData('stock_backtest', collectionName, condition={'priceFromDate': { "$exists": True }}, sort=[('priceBuy', 1)]) # db.getCollection('all_dv3').find({'priceFromDate': {"$exists": true}}, # {'_id': 1, 'name': 1, 'priceBuy': 1, 'priceSell': 1}).sort({'priceBuy': 1}) df.loc[:, [ '_id', 'name', 'priceBuy', 'priceFrom', 'priceSell', 'priceFromDate', 'priceWhere' ]].to_excel(path)
def Show(codes): out = [] for one in codes: try: df = util.LoadData('stock_statistics2', 'dangerousQuarterRatio', condition={'_id': one['_id']}) df['name'] = one['name'] out.append(df) except Exception as e: print(e) dfAll = out[0] for index in range(1, len(out)): dfAll = dfAll.append(out[index]) print(dfAll)
def CalcHoldTime(stockList, collectionName, toDBName, beginDate, endDate): utcTZ = timezone('UTC') dfIndex = pd.date_range(start=beginDate, end=endDate, freq='M') df = pd.DataFrame(np.random.randn(len(dfIndex)), index=dfIndex, columns=['willDrop']) df = pd.concat([df, pd.DataFrame(columns=[ 'number', 'stockList', ])], sort=False) df.drop([ 'willDrop', ], axis=1, inplace=True) result = {} for date, row in df.iterrows(): result[date] = row.to_dict() result[date]['number'] = 0 result[date]['stockList'] = set() codes = [] for one in stockList: codes.append(one['_id']) condition = {'_id': {'$in': codes}} df2 = util.LoadData('stock_backtest', collectionName, condition) for code, row in df2.iterrows(): name = row['name'] oneList = row['holdStockDateVec'] for datePair in oneList: startDate = datetime(datePair[0].year, datePair[0].month, datePair[0].day, tzinfo=utcTZ) endDate = datetime(datePair[1].year, datePair[1].month, datePair[1].day, tzinfo=utcTZ) print(startDate) tmp = pd.date_range(start=startDate, end=endDate, freq='M') print(tmp) for oneDate in tmp: if oneDate in result: result[oneDate]['number'] += 1 result[oneDate]['stockList'].add(name) for k, v in result.items(): result[k]['stockList'] = list(result[k]['stockList']) util.SaveMongoDBDict(result, 'stock_hold', toDBName)
def Run(codes): #计算全财报的,计算2010年作为开始年份的 for one in codes: try: out = [] # out2010 = [] beforeSJLTZ = 0 beforeContinuityTrend = 0 # beforeSJLTZ2010 = 0 beforeContinuityTrend2010 = 0 df = util.LoadData('stock', 'yjbg-' + one, condition={}, sort=[('_id', 1)]) for quarter, row in df.iterrows(): id = datetime.strptime(quarter, '%Y-%m-%d') value = util.String2Number(row['sjltz']) if not np.isnan(value): nowTrend = None #这个条件不过分,一个公司即使业务完全停止增长,考虑到通胀也该在数值上是增长的 if value < 0: nowTrend = -1 # nowTrend2010 = -1 elif value - beforeSJLTZ < 0: nowTrend = -0.5 # nowTrend2010 = -0.5 else: nowTrend = 1 # nowTrend2010 = 1 beforeSJLTZ = value nowContinuityTrend = beforeContinuityTrend + nowTrend beforeContinuityTrend = nowContinuityTrend out.append({ '_id': id, 'nowPMT': nowTrend, 'continuityPMT': nowContinuityTrend }) if id.year >= 2010: nowContinuityTrend2010 = beforeContinuityTrend2010 + nowTrend beforeContinuityTrend2010 = nowContinuityTrend2010 out[-1].update({ 'continuityPMTFrom2010': nowContinuityTrend2010 }) dfOut = pd.DataFrame(out) util.SaveMongoDB_DF(dfOut, 'stock_statistics', one) except Exception as e: print(e)
def TestB(): df1 = util.LoadData('stock_hold', 'dv1', condition={'diff': { '$lt': 0.1 }}, sort=[('diff', 1)]) df2 = util.LoadData('stock_statistcs', 'dvYears', condition={}, sort=[('百分比', -1)]) mergeData = df1.join(df2, how='left', rsuffix='right') codes = [] for code, row in mergeData.iterrows(): codes.append(code) df3 = util.LoadData2('stock_backtest', 'all_dv1', codes) df4 = util.LoadData2('stock_statistcs', 'quarterSpeed', codes) df5 = util.LoadData2('stock', 'stock_list', codes) df6 = util.LastPriceNone(codes) df5 = df5[['所属行业', '地区', '总股本(亿)']] mergeData = mergeData.join(df3, how='left', rsuffix='right2') mergeData = mergeData.join(df4, how='left', rsuffix='right3') mergeData = mergeData.join(df5, how='left', rsuffix='right4') mergeData = mergeData.join(df6, how='left', rsuffix='right5') mergeData['markValue'] = mergeData['price'] * mergeData['总股本(亿)'] mergeData.query('first > 0 and second > 0 and third > 0', inplace=True) mergeData.query('markValue >= 50', inplace=True) mergeData = mergeData[[ 'name', 'diff', 'percent', '统计年数', '分红年数', 'maxValue:value', '所属行业', '地区', 'markValue' ]] # mergeData.eval('总市值 = price * 总股本(亿)', inplace=True) mergeData.to_excel("c:/workspace/tmp/1222-3.xlsx") return mergeData
def Show(codes): out = [] for one in codes: try: df = util.LoadData('stock_statistics', one['_id'], condition={}, sort=[('_id', -1)], limit=1) df['_id'] = one['_id'] df['name'] = one['name'] df['date'] = df.index df.set_index('_id', inplace=True) out.append(df) except Exception as e: print(e) dfAll = out[0] for index in range(1, len(out)): dfAll = dfAll.append(out[index]) print(dfAll)
def TimeWarpAnnotations(data_root_path, sample_rate=1): max_warp_seconds = 3 hz_suffix = str(sample_rate)+'hz' data_dict = util.LoadData(data_root_path, sample_rate) for task in data_dict.keys(): # Apply DTW anno_df = data_dict[task]['raw']['annotations'] ot_df = data_dict[task]['raw']['objective_truth'] dtw_anno_df = agree.DTWReference(anno_df.iloc[:,1:], ot_df.iloc[:,1], max_warp_distance=max_warp_seconds*sample_rate) # Output DTW annotations dtw_output_task_path = os.path.join(data_root_path, task, 'annotations_'+hz_suffix+'_dtw_aligned') if not os.path.isdir(dtw_output_task_path): os.makedirs(dtw_output_task_path) for anno_col in dtw_anno_df.columns: output_task_file = os.path.join(dtw_output_task_path, task+'_'+anno_col+'.csv') dtw_single_anno_df = pd.concat((anno_df.iloc[:,0], dtw_anno_df[anno_col]), axis=1) dtw_single_anno_df.to_csv(output_task_file, index=False, header=True) # Plot num_annos = len(anno_df.columns)-1 subplot_rows = int(math.floor(math.sqrt(num_annos))) subplot_cols = math.ceil(float(num_annos)/subplot_rows) fig, axs = plt.subplots(subplot_rows, subplot_cols) for anno_idx in range(len(anno_df.columns)-1): anno_col = anno_df.columns[anno_idx+1] # Skip time col anno = anno_df[anno_col] ot = ot_df.iloc[:,1] axs_col = anno_idx%subplot_cols axs_row = int(math.floor(anno_idx/subplot_cols)) axs[axs_row, axs_col].plot(ot_df.iloc[:,0], ot_df.iloc[:,1], 'm-') axs[axs_row, axs_col].plot(ot_df.iloc[:,0], dtw_anno_df.iloc[:,anno_idx], 'k-') axs[axs_row, axs_col].plot(ot_df.iloc[:,0], anno_df[anno_col].values, 'b--') axs[axs_row, axs_col].title.set_text(anno_col) fig.suptitle(task + ' DTW') plt.show() return
def mirs(args): parentFolder = args.dir if args.dir[-1] == "/" else args.dir + "/" primInfo, aInfo, rmInfo = util.LoadData(parentFolder, False) indMap, aOffset = BuildInfo(primInfo, aInfo, rmInfo) if args.topo is not None: PrintTopo( primInfo[util.INDEX], aInfo[util.INDEX], indMap, aOffset, args.topo, args.regex, args.regexneg, ) if args.queryTokens != []: PrintSelect( args.queryTokens, primInfo[util.INDEX], primInfo[util.LISTFILES], aInfo[util.INDEX], aInfo[util.LISTFILES], indMap, aOffset, )
# stock.CloseAccount() # stock.Store2DB() #沪深300统计############################################## # df = util.QueryHS300All() # out = [] # for code, row in df.iterrows(): # out.append({'code': code, 'name': row['股票名称']}) # # strategy.dv1.CompareAll('hs300_dv1', out) # 作图 df = util.LoadData('stock_result', 'dv_jusths300_w') df[['total', 'capital']].plot() plt.show() #全部股票################################################ # df = util.QueryAll() # out = [] # client = MongoClient() # db = client['stock_backtest'] # collection = db['all_dv1'] # cursor = collection.find() # already = set() # for c in cursor: # already.add(c['_id']) # # for code, row in df.iterrows(): # # if code in already:
def AgreementMeasureExamples(data_root_path, output_path, show_plots=True, sample_rate=1): if not os.path.isdir(output_path): os.makedirs(output_path) data_dict = util.LoadData(data_root_path, sample_rate) print('Computing examples of drawbacks to existing measures...') # Pearson anno = data_dict['TaskA']['aligned_shifted'][ 'annotations'].loc[:, 'ann7'].values ot = data_dict['TaskA']['aligned_shifted'][ 'objective_truth'].iloc[:, 1].values poly_feats = PolynomialFeatures(degree=3) poly_feats_fit = poly_feats.fit_transform(anno.reshape(-1, 1)) poly_reg = LinearRegression() poly_reg.fit(poly_feats_fit, ot) ot_warped = poly_reg.predict(poly_feats.fit_transform(ot.reshape(-1, 1))) pearson_self = pearsonr(ot, ot)[0] pearson_distort = pearsonr(ot, ot_warped)[0] print('Pearson example, ann7:') print(' Self correlation: %f' % (pearson_self)) print(' Distorted correlation: %f' % (pearson_distort)) # CCC ccc_self = agree._CCCHelper(ot, ot) ccc_distort = agree._CCCHelper(ot, ot_warped) print('CCC example, ann7:') print(' Self correlation: %f' % (ccc_self)) print(' Distorted correlation: %f' % (ccc_distort)) # Spearman and Kendall anno = 10 * [4] + 3 * [0] + 8 * [4.1] anno_distort = 10 * [4] + 3 * [0] + 8 * [3.9] spearman_self = spearmanr(anno, anno)[0] spearman_distort = spearmanr(anno, anno_distort)[0] kendall_self = kendalltau(anno, anno)[0] kendall_distort = kendalltau(anno, anno_distort)[0] print('Spearman example:') print(' Self correlation: %f' % (spearman_self)) print(' Distorted correlation: %f' % (spearman_distort)) print('Kendall example:') print(' Self correlation: %f' % (kendall_self)) print(' Distorted correlation: %f' % (kendall_distort)) fig, axs = plt.subplots(1, 2) axs[0].plot(range(len(anno)), anno, 'b-') axs[1].plot(range(len(anno_distort)), anno_distort, 'r-') axs[0].axes.set_ylim(-1, 5) axs[1].axes.set_ylim(-1, 5) axs[0].title.set_text('Example Annotation') axs[1].title.set_text('Similar Annotation') tikzplotlib.save(os.path.join(output_path, 'agreement_examples_spearmankendall.tex'), figure=fig, axis_width='\\figureWidth', textsize=12) print('Simulating annotations and computing agreement measures....') sim_sig, sim_axs = plt.subplots(1, 1) sim_anno = np.array([ 8, 8, 8, 7, 5.2, 3, 2.5, 2, 1.8, 1.7, 1.6, 1.5, 1.7, 1.9, 2.2, 3, 4, 5.5, 6.8, 7.9, 9.5, 10.5, 11, 11.2, 11.3, 11.3, 11.3, 11.3, 11.2, 11, 10.7, 10.4, 10, 9, 7.5, 7.3, 7.2, 7.2, 7.2, 7.2 ]) sim_anno2 = sim_anno.copy() sim_anno2[16:] *= 1.5 sim_anno2 += 2 sim_axs.plot(range(len(sim_anno)), sim_anno, 'r-') sim_axs.plot(range(len(sim_anno2)), sim_anno2, 'b--') sim_axs.title.set_text('Simulated annotations') if show_plots: plt.show() return