def min60(time): min60_times = [1030, 1130, 1400, 1500] if time in min60_times: temp = time // 100 * 60 + time % 100 - 60 temp = temp // 60 * 100 + temp % 60 print(temp) sql = {'time': {'$gt': temp, '$lte': time}, 'date': Calendar.today()} curror = BaseModel('real_kline_min5').query(sql=sql) if curror.count(): data = pd.DataFrame(list(curror)) data = data.sort_values(by=['time'], ascending=True) data = data.groupby(by=['stock_code'], as_index=False).agg({ 'volume': 'sum', 'amount': 'sum', 'open': 'first', 'close': 'last', 'high': 'max', 'low': 'min' }) data['time'] = time data['date'] = Calendar.today() BaseModel('real_kline_min60').insert_batch( data.to_dict(orient='records')) print('min60 ok') # min60(time=1030)
def fun(stocks): obj = Outstanding() result = list() for sc in stocks: data = [] try: print('success', sc) data = (obj.get_data(stock=sc)) except Exception as e: print('fail', sc) pass finally: result.extend(data) BaseModel('outstanding').remove({}) BaseModel('outstanding').insert_batch(result)
def get_data(self, start_date, end_date): print(start_date) curror = BaseModel('features_index_day').query( sql=dict(stock_code={'$in': model.li}, date={ '$gte': start_date, '$lte': end_date })) if curror.count(): data = pd.DataFrame(list(curror)) data = data.loc[:, model.names + [model.label]] data = data.replace(to_replace=np.Infinity, value=np.NaN).dropna() # data['change_r_next16'] = data.change_r_next16.map(lambda x: 1 if x > 0 else 0) for cl in model.names: v = data[cl].iloc[0] # if type(v).__name__ !='bool': # data[cl] = data.sort_values(by=[cl], ascending=False).index # temp = data[cl].mean() # data[cl] = data[cl].map(lambda x:1 if x>len(data)//2 else 0) # pass y = data.loc[:, [model.label]] y['sort'] = y.sort_values(by=[model.label], ascending=False).index y = y.drop([model.label], axis=1) y['sort'] = y.sort.map(lambda x: self.classify(x)) # y['sort'] = y.sort.map(lambda x:1 if x>len(y)/2 else 0) X = data.drop([model.label], axis=1) # X = data.loc[:, model.names] # X_test = data_test.loc[:, model.names] # y_test = data_test.loc[:, ['change_r_next16']] return X, y else: return pd.DataFrame([]), pd.DataFrame([])
def draw_DMI(self, data, pixel, no, count, table_name, code, profits, date): if len(data) != no: return for i in ['PDI', 'MDI', 'ADX']: data[i] += 0.5 # data=data[::-1] PDI = data.PDI.astype('int').tolist() MDI = data.MDI.astype('int').tolist() ADX = data.ADX.astype('int').tolist() matrix = np.zeros((pixel, pixel), dtype=np.bool) num, mark = pixel, 0 for i in range(no): # matrix[(num - ma5[i]), mark * count + 1: (mark + 1) * count - 1] = True # matrix[(num - ma10[i]), mark * count + 1: (mark + 1) * count - 1] = True # matrix[(num - ma20[i]), mark * count + 1: (mark + 1) * count - 1] = True # print(num- PDI[i]) matrix[(num - PDI[i]), i] = True matrix[(num - MDI[i]), i] = True matrix[(num - ADX[i]), i] = True # print(date) # plt.matshow(matrix,cmap='hot') # plt.plot( range(len(data)),data['PDI']) # plt.plot( range(len(data)),data['MDI']) # plt.plot( range(len(data)),data['ADX']) # plt.show() BaseModel(table_name).insert_batch({ 'stock_code': code, 'profit': profits, 'date': date, 'value': matrix.tolist() })
def get_data_for_selection(self, start_date, end_date): curror = BaseModel('novel_Feature').query( sql=dict(stock_code={'$in': model.li}, date={ '$gte': start_date, '$lte': end_date })) if curror.count(): data = pd.DataFrame(list(curror)) data = data.loc[:, model.names] data = data.replace(to_replace=np.Infinity, value=np.NaN).dropna() # data['change_r_next16'] = data.change_r_next16.map(lambda x: 1 if x > 0 else 0) for cl in model.names: v = data[cl].iloc[0] # tp=type(v) # if type(v).__name__ !='bool': # data[cl] = data.sort_values(by=[cl], ascending=False).index # # temp = data[cl].mean() # data[cl] = data[cl].map(lambda x:1 if x>len(data)//2 else 0) # pass y = data.loc[:, [model.label]] y['sort'] = y.sort_values(by=[model.label], ascending=False).index y = y.drop([model.label], axis=1) y['sort'] = y.sort.map(lambda x: 1 if x > len(y) / 2 else 0) X = data.drop([model.label, 'date', '_id'], axis=1) return X, y else: return [], []
def draw_boll_Line(self, data, pixel, no, count, table_name, code, profits, date): for i in data.columns.values: data[i] += 0.5 high = data.high.astype('int').tolist() low = data.low.astype('int').tolist() top = data.top.astype('int').tolist() close = data.close.astype('int').tolist() open = data.open.astype('int').tolist() bottom = data.bottom.astype('int').tolist() matrix = np.zeros((pixel, pixel)) num = pixel - 1 for i in range(no): if open[i] < close[i]: matrix[(num - high[i]):(num - low[i]) + 1, i] = 1 else: matrix[(num - high[i]):(num - low[i]) + 1, i] = -1 matrix[(num - top[i]), i] = 1 matrix[(num - bottom[i]), i] = -1 BaseModel(table_name).insert_batch({ 'stock_code': code, 'profit': profits, 'date': date, 'value': matrix.tolist() })
def draw_America_Line(self, data, pixel, no, count, table_name, code, profits, date): for i in data.columns.values: data[i] += 0.5 high = data.high.astype('int').tolist() low = data.low.astype('int').tolist() close = data.close.astype('int').tolist() open = data.open.astype('int').tolist() matrix = np.zeros((pixel, pixel), dtype=np.bool) num, mark = pixel - 1, 0 for i in range(no): # matrix[(num - ma5[i]), mark * count + 1: (mark + 1) * count - 1] = True # matrix[(num - ma10[i]), mark * count + 1: (mark + 1) * count - 1] = True # matrix[(num - ma20[i]), mark * count + 1: (mark + 1) * count - 1] = True position = ((mark * count + count // 3) + ((mark + 1) * count - count // 3)) // 2 matrix[(num - open[i]), position - 1] = True # 横开 matrix[(num - close[i]), position + 1] = True # 横开 matrix[(num - high[i]):(num - low[i]) + 1, position] = True mark += 1 BaseModel(table_name).insert_batch({ 'stock_code': code, 'profit': profits, 'date': date, 'value': matrix.tolist() })
def MODEL_TABLE(location=None, dbname=None, tablename=None): """ 数据源的映射,主要是指一些公共的数据源 :param location: :param dbname: :param tablename: :return: """ if tablename == 'signals_his': return SellList('signals_his', location, dbname) elif tablename == 'signals': return signal('signals', location, dbname) elif tablename == 'orders': return Orders('orders', location, dbname) elif tablename == 'orders_simulated': return BaseModel('orders_simulated', location, dbname) elif tablename == 'orders_his': return OrdersHis('orders_his', location, dbname) elif tablename == 'trademenu': return TradeMenu('trademenu', location, dbname) elif tablename == 'financeindex': return ModelFinanceIndex('financeindex', location, dbname) elif tablename == 'risk_and_position': return Risk_and_Position('risk_and_position', location, dbname) elif tablename == 'risk': return Risk('risk', location, dbname) elif tablename == 'position': return Position('position', location, dbname) elif tablename == 'naughtiers': return Naughtiers('naughtiers', location, dbname) elif tablename == 'inflexion': return Inflexion('inflexion', location, dbname) elif tablename == 'buy_point': return Buy_Point('buy_point', location, dbname) elif tablename == 'asset': return Asset('asset', location, dbname) elif tablename == 'rmds_his': return RmdsHis('rmds_his', location, dbname) elif tablename == 'RRADS': return rrads('announce', location, dbname) elif tablename == 'accounts': return BaseModel('accounts', location, dbname) elif tablename == 'clients': return BaseModel('clients', location, dbname) else: info = 'not find this "%s" in model list' % tablename raise MongoIOError(info)
def similarity(): index_features = pd.DataFrame( list(BaseModel('index_feature_temp').query({}))) print(index_features.columns.values) x_columns = [ x for x in index_features.columns if x in [ "ma5_angle", "ma10_angle", "ma20_angle", "close_open", "high_low", 'high_close', 'Close_dt_ma5', 'Close_dt_ma10', "Close_dt_ma20" ] ] # "ma10_angle", "ma20_angle","ma30_angle", "ma60_angle",'close_ma5','open_ma5','high_open','high_low','high_close','close_open' index_features = index_features.loc[:, x_columns + ['date']] index_features = index_features.replace(to_replace=np.Infinity, value=np.NaN) index_features = index_features.dropna() index_features = index_features.sort_values(by=['date'], ascending=False) index_features = index_features.reset_index(drop=True) # index_features_=index_features.loc[:,x_columns].apply(lambda x: (x - np.min(x)) / (np.max(x) - np.min(x))) # index_features_['date']=index_features['date'] # index_features=index_features_ from sklearn import preprocessing # =============================================================================标准化 index_features_ = index_features.loc[:, x_columns] index_features_ = np.array(index_features_) index_features_ = preprocessing.scale(index_features_) index_features_ = pd.DataFrame(index_features_, columns=x_columns) index_features_['date'] = index_features['date'] index_features = index_features_ # =============================================================================标准化 target_date = dt.datetime(2017, 7, 17) target = index_features[index_features.date == target_date] other = index_features[index_features.date < target_date] date_list = other.date.tolist() print('date len', len(date_list)) target = target.drop(['date'], axis=1) other = other.drop(['date'], axis=1) target_arr = np.array(target) other_arr = np.array(other) print('other_arr len', len(other_arr)) result = target_arr - other_arr print('result len', len(result)) result = result**2 result = np.sum(result, axis=1) result = np.sqrt(result) # ===============================================================================余弦 # from scipy.spatial.distance import pdist # result=list() # for i in range(len(date_list)):result # result.append(pdist(np.vstack([target_arr,other_arr[i][:]]), 'cosine')[0]) # print(result) # ===============================================================================余弦 data = pd.DataFrame({'result': result, 'date': date_list}) print('target', target_date) print('result', data[data.result == data.result.min()])
def SELF_TABLE(location=None, dbname=None, tablename=None): """ 用户自定义的一些表 :param location: :param dbname: :param tablename: :return: """ return BaseModel(tablename, location, dbname)
def deal_data(code): data = KlineData.read_data(code=code, start_date=dt.datetime(2016, 1, 1), end_date=dt.datetime(2018, 9, 18), kline='index_min5',timemerge=True) if len(data): data = n_KDJ(data, 9, 3) # t1 = time.clock() # print(t1-t) # t = time.clock() data = n_MA(data) # t1 = time.clock() # print(t1 - t) # t = time.clock() data = n_MACD(data, 12, 26, 9) # t1 = time.clock() # print(t1 - t) # t = time.clock() data = n_RSI(data, 6, 12, 24) # t1 = time.clock() # print(t1 - t) # t = time.clock() data = n_DMI(data, 14, 6) # t1 = time.clock() # print(t1 - t) # t = time.clock() data = n_BRAR(data, 26) # t1 = time.clock() # print(t1 - t) # t = time.clock() data = n_CR(data, 26, 10, 20, 40) # t1 = time.clock() # print(t1 - t) # t = time.clock() data = n_VR(data, 26) # t1 = time.clock() # print(t1 - t) # t = time.clock() data = n_WR(data, 10, 6) # t1 = time.clock() # print(t1 - t) # t = time.clock() data = n_CCI(data, 14) # t1 = time.clock() # print(t1 - t) # t = time.clock() data = n_BOLL(data, 20) # t1 = time.clock() # print(t1 - t) # t = time.clock() data = n_PSY(data, 12) data.dropna(inplace=True) data=data[data.time==955] BaseModel('features_index_min5').insert_batch(data.to_dict(orient='records'))
def exe(): data = list() with open('000300cons.txt') as f: for line in f.read().split('\n'): data.append(line) m = 300 data = [data[0:m] for i in range(0, len(data), m)] for idata in data: x = RealData.get_stocks_data(idata) x = x.drop(['time', 'other'], axis=1) idata = x.to_dict(orient='records') BaseModel('real_buy_sell').insert_batch(idata)
def log_length(self, length): obj = BaseModel('email_log') curror = obj.query({'user': self.email}) if curror.count(): lg = pd.DataFrame(list(curror)).loc[0].length obj.remove({}) obj.insert_batch( dict(date=dt.datetime.now(), user=self.email, length=length)) return lg else: obj.insert_batch( dict(date=dt.datetime.now(), user=self.email, length=length)) return length - 1
def draw(self, data, pixel, no, count, table_name, code, profits, date, h, w): if len(data) != no: return for i in data.columns.values: data[i] += 0.5 high = data.high.astype('int').tolist() low = data.low.astype('int').tolist() close = data.close.astype('int').tolist() open = data.open.astype('int').tolist() matrix = np.zeros((w, h), dtype=np.bool) num, mark = w - 1, 0 for i in range(no): # matrix[(num - ma5[i]), mark * count + 1: (mark + 1) * count - 1] = True # matrix[(num - ma10[i]), mark * count + 1: (mark + 1) * count - 1] = True # matrix[(num - ma20[i]), mark * count + 1: (mark + 1) * count - 1] = True # position = ((mark * count + count // 3) + ((mark + 1) * count - count // 3)) // 2 # matrix[(num - open[i]), position - 1] = True # 横开 # matrix[(num - close[i]), position + 1] = True # 横开 # matrix[(num - high[i]):(num - low[i]) + 1, position] = True if open[i] >= close[i]: matrix[(num - open[i]):(num - close[i]) + 1, mark * count + 1:(mark + 1) * count - 1] = True matrix[(num - high[i]):(num - low[i]) + 1, mark * count + count // 3 + 1:(mark + 1) * count - count // 3 - 1] = True else: matrix[(num - close[i]), mark * count + 1:(mark + 1) * count - 1] = True matrix[(num - open[i]), mark * count + 1:(mark + 1) * count - 1] = True matrix[(num - open[i]):(num - low[i]) + 1, mark * count + count // 3 + 1:(mark + 1) * count - count // 3 - 1] = True matrix[(num - high[i]):(num - close[i]) + 1, mark * count + count // 3 + 1:(mark + 1) * count - count // 3 - 1] = True matrix[(num - close[i]):(num - open[i]) + 1, mark * count + count // 3] = True matrix[(num - close[i]):(num - open[i]) + 1, (mark + 1) * count - count // 3 - 1] = True mark += 1 BaseModel(table_name).insert_batch({ 'stock_code': code, 'profit': profits, 'date': date, 'value': matrix.tolist() })
def model_select(self, start_date, end_date): curror = BaseModel('calendar').query(sql=dict( date={ '$gte': start_date, '$lte': end_date })) date_list = list(curror) n = 0 x, y = list(), list() for i in range(len(date_list) - n): print(date_list[i]['date']) temp1, temp2 = self.get_data_for_selection( date_list[i]['date'], date_list[i + n]['date']) x.append(temp1) y.append(temp2) X = pd.concat(x) y = pd.concat(y) X = np.array(X) y = np.array(y) # Build a forest and compute the feature importances forest = ExtraTreesClassifier(criterion='entropy', n_estimators=250, random_state=0) forest.fit(X, y) importances = forest.feature_importances_ std = np.std( [tree.feature_importances_ for tree in forest.estimators_], axis=0) indices = np.argsort(importances)[::-1] # Print the feature ranking print("Feature ranking:") result = list() for f in range(X.shape[1]): print("%d. feature %d %s (%f)" % (f + 1, indices[f], model.names[indices[f]], importances[indices[f]])) result.append(model.names[indices[f]]) print(result) # Plot the feature importances of the forest plt.figure() plt.title("Feature importances") plt.bar(range(X.shape[1]), importances[indices], color="r", yerr=std[indices], align="center") plt.xticks(range(X.shape[1]), indices) plt.xlim([-1, X.shape[1]]) plt.show()
def exe(): data = list() with open('gz2000cons.txt') as f: for line in f.read().split('\n'): data.append(line) # data=data[1500:2000] m = 300 data = [data[i:m + i] for i in range(0, len(data), m)] # pool = multiprocessing.Pool(processes=3) # result = pool.map(fun, data) for idata in data: # print(idata) x = RealData.get_stocks_data(idata) if len(x): x = x.drop(['time', 'other'], axis=1) idata = x.to_dict(orient='records') BaseModel('real_buy_sell_gz2000').insert_batch(idata)
def gt(): start_date = dt.datetime(2018, 5, 24) end_date = dt.datetime(2018, 6, 2) curror = BaseModel('calendar').query(sql=dict(date={ '$gte': start_date, '$lte': end_date })) date_list = list(curror) obj = model() n = 0 # clf = SGDClassifier() # clf = DecisionTreeClassifier(min_samples_leaf=20) X = list() Y = list() for i in range(len(date_list) - n): # print(date_list[i]['date']) temp1, temp2 = obj.get_data(date_list[i]['date'], date_list[i]['date']) X.append(temp1) Y.append(temp2) X_train = pd.concat(X) Y_train = pd.concat(Y) return np.array(X_train), np.array(Y_train)
def get_result(sc, kline, start, end, table_name): data = KlineData.read_data(code=sc, start_date=start, end_date=end, kline=kline, timemerge=True) data = cal_ma(data) data = data.dropna() while len(data) >= 64: last_one = data.iloc[-1] if last_one.time != 1500: break elif abs(last_one.profit_self) >= 0.097: continue else: date = last_one.date data = data[data.date <= dt.datetime(date.year, date.month, date.day, 9, 55)] if len(data) >= 64: profit = data.iloc[-1].profit date = data.iloc[-1].date idata = data.tail(64) data = data[ data.date < dt.datetime(date.year, date.month, date.day)] idata = idata.loc[:, [ 'close', 'open', 'high', 'low', 'ma5', 'ma10', 'ma20', 'ma60', 'ma120', 'ma200' ]] idata = np.array(idata) amin, amax = idata.min(), idata.max() # 求最大最小值 idata = (idata - amin) / (amax - amin) BaseModel(table_name).insert_batch({ 'stock_code': sc, 'profit': profit, 'date': date, 'value': idata.tolist() })
def report(self, clf, start_date, end_date): curror = BaseModel('calendar').query(sql=dict( date={ '$gte': start_date, '$lte': end_date })) date_list = list(curror) n = 0 x, y = list(), list() for i in range(len(date_list) - n): print(date_list[i]['date']) temp1, temp2 = self.get_data(date_list[i]['date'], date_list[i + n]['date']) x.append(temp1) y.append(temp2) X_test, y_test = pd.concat(x), pd.concat(y) y_pred = clf.predict(X_test) # 预测 result = classification_report(y_test, y_pred) print(result) print(clf.score(X_test, y_test))
def get_result(sc, kline, start, end, table_name): data = KlineData.read_data(code=sc, start_date=start, end_date=end, kline=kline, timemerge=True) data = cal_macd(data) data = data.dropna() while len(data) >= 64: last_one = data.iloc[-1] if last_one.time != 1500: break elif abs(last_one.profit_self) >= 0.097: continue else: date = last_one.date data = data[data.date <= dt.datetime(date.year, date.month, date.day, 9, 55)] if len(data) >= 64: profit = data.iloc[-1].profit date = data.iloc[-1].date idata = data.tail(64) idata = idata.loc[:, ['macd', 'dif', 'dea']] data = data[ data.date < dt.datetime(date.year, date.month, date.day)] idata = np.array(idata) amin, amax = idata.min(), idata.max() # 求最大最小值 max = -amax if abs(amax) > abs(amin) else amin idata = idata * (64 / 2 - 1) / max idata = idata + 64 / 2 BaseModel(table_name).insert_batch({ 'stock_code': sc, 'profit': profit, 'date': date, 'value': idata.tolist() })
def fun(codes): for i in codes: BaseModel('features_index_min5').remove(stock_code=i) deal_data(i) print(i,'over')
] for i in names: data[i] = (data[i] - data[i].min()) / (data[i].max() - data[i].min()) return data if __name__ == '__main__': # clf = DecisionTreeClassifier() # clf = LogisticRegression() table = 'features_index_day' clf = RandomForestClassifier(n_estimators=200, n_jobs=2) label = 'change_r_next2' curror = BaseModel(table).query( sql={ 'date': { '$gte': dt.datetime(2018, 1, 5), '$lte': dt.datetime(2018, 7, 25) } }) print(curror.count()) if curror.count(): data = pd.DataFrame(list(curror)) data = data.replace(to_replace=np.Infinity, value=np.NaN).dropna() name = data.columns.values.tolist() names = list() # data=data.sample(frac=0.2)
def fun(idata): x = RealData.get_stocks_data(idata) x = x.drop(['time', 'other'], axis=1) idata = x.to_dict(orient='records') BaseModel('real_buy_sell_gz2000').insert_batch(idata)
def deal_data(code): data = KlineData.read_data(code=code, start_date=dt.datetime(2015, 1, 1), end_date=dt.datetime(2018, 9, 17), kline='kline_day', timemerge=True) name = data.columns.values.tolist() if len(data): data = data.drop(['_id', 'classtype', 'market'], axis=1) data = n_KDJ(data, 9, 3) # t1 = time.clock() # print(t1-t) # t = time.clock() data = n_MA(data) # t1 = time.clock() # print(t1 - t1) # t = time.clock() data = n_MACD(data, 12, 26, 9) # t1 = time.clock() # print(t1 - t) # t = time.clock() data = n_RSI(data, 6, 12, 24) # t1 = time.clock() # print(t1 - t) # t = time.clock() data = n_DMI(data, 14, 6) # t1 = time.clock() # print(t1 - t) # t = time.clock() data = n_BRAR(data, 26) # t1 = time.clock() # print(t1 - t) # t = time.clock() data = n_CR(data, 26, 10, 20, 40) # t1 = time.clock() # print(t1 - t) # t = time.clock() data = n_VR(data, 26) # t1 = time.clock() # print(t1 - t) # t = time.clock() data = n_WR(data, 10, 6) # t1 = time.clock() # print(t1 - t) # t = time.clock() data = n_CCI(data, 14) # t1 = time.clock() # print(t1 - t) # t = time.clock() data = n_BOLL(data, 20) # t1 = time.clock() # print(t1 - t) # t = time.clock() data = n_PSY(data, 12) columns = data.columns.values.tolist() data = data.replace(to_replace=np.Infinity, value=np.NaN) fund_data = pd.DataFrame( list(BaseModel('jq_fund_data').query({'code': code}))) jq_name = [ 'capitalization', 'circulating_cap', 'circulating_market_cap', 'market_cap', 'pb_ratio', 'pcf_ratio', 'pe_ratio', 'pe_ratio_lyr', 'ps_ratio', 'turnover_ratio' ] for i in jq_name: data[i] = 0 # data['circulating_market_cap'] = 0 for i in columns: if i not in name and 'change_r_next' not in i: v = data[i].iloc[0] if type(v).__name__ != 'bool_': # print(i, v, type(v)) if data[i].min() == 0: # pass # print(i, data[i].loc[0]) data.loc[data[i] != 0, i] = (data.loc[data[i] != 0, i] - data.loc[data[i] != 0, i].min()) / ( data.loc[data[i] != 0, i].max() - data.loc[data[i] != 0, i].min()) # print(i,data[i].loc[0]) else: # print(i, v, type(v)) data[i] = (data[i] - data[i].min()) / (data[i].max() - data[i].min()) # data['up'] = data['up'] / (data['up'] + data['down']) # data['down'] = data['down'] / (data['up'] + data['down']) data.loc[ (data.date >= dt.datetime(2015, 12, 31)) & (data.date < dt.datetime(2016, 3, 31)), jq_name] = \ fund_data[fund_data.day == '2015-12-31'].loc[:,jq_name].iloc[0].tolist() data.loc[ (data.date >= dt.datetime(2016, 3, 31)) & (data.date <= dt.datetime(2016, 6, 30)), jq_name] = \ fund_data[fund_data.day == '2016-03-31'].loc[:,jq_name].iloc[0].tolist() data.loc[ (data.date < dt.datetime(2016, 9, 30)) & (data.date >= dt.datetime(2016, 6, 30)), jq_name] = \ fund_data[fund_data.day == '2016-06-30'].loc[:,jq_name].iloc[0].tolist() data.loc[ (data.date >= dt.datetime(2016, 9, 30)) & (data.date < dt.datetime(2016, 12, 30)), jq_name] = \ fund_data[fund_data.day == '2016-09-30'].loc[:,jq_name].iloc[0].tolist() data.loc[ (data.date < dt.datetime(2017, 3, 31)) & (data.date >= dt.datetime(2016, 12, 30)), jq_name] = \ fund_data[fund_data.day == '2016-12-30'].loc[:,jq_name].iloc[0].tolist() data.loc[ (data.date >= dt.datetime(2017, 3, 31)) & (data.date < dt.datetime(2017, 6, 30)), jq_name] = \ fund_data[fund_data.day == '2017-03-31'].loc[:,jq_name].iloc[0].tolist() data.loc[ (data.date >= dt.datetime(2017, 6, 30)) & (data.date < dt.datetime(2017, 9, 29)), jq_name] = \ fund_data[fund_data.day == '2017-06-30'].loc[:,jq_name].iloc[0].tolist() data.loc[ (data.date >= dt.datetime(2017, 9, 29)) & (data.date < dt.datetime(2017, 12, 29)), jq_name] = \ fund_data[fund_data.day == '2017-09-29'].loc[:,jq_name].iloc[0].tolist() data.loc[ (data.date >= dt.datetime(2017, 12, 29)) & (data.date < dt.datetime(2018, 3, 30)), jq_name] = \ fund_data[fund_data.day == '2017-12-29'].loc[:,jq_name].iloc[0].tolist() data.loc[ (data.date >= dt.datetime(2018, 3, 30)) & (data.date < dt.datetime(2018, 6, 29)), jq_name] = \ fund_data[fund_data.day == '2018-03-30'].loc[:,jq_name].iloc[0].tolist() data.loc[ (data.date >= dt.datetime(2018, 6, 29)), jq_name] = \ fund_data[fund_data.day == '2018-06-29'].loc[:,jq_name].iloc[0].tolist() name = ['open', 'close', 'high', 'low', 'amount', 'volume'] data['turnover_ratio'] = data.volume / (data.circulating_cap * 10000) for i in name: data[i] = (data[i] - data[i].min()) / (data[i].max() - data[i].min()) data = data[::-1] data = data.reset_index(drop=True) # data['count'] = data.index.tolist() curror = BaseModel('LaunchDate').query(sql={'stock_code': code}) data['count'] = 0 if curror.count(): launchdate = list(curror)[0]['date'] data['count'] = data.date.map(lambda x: (x - launchdate).days) data = data[data.date >= dt.datetime(2016, 1, 1)] BaseModel('features_kline_day').insert_batch( data.to_dict(orient='records'))
def fun(codes): for i in codes: BaseModel('features_kline_day').remove(stock_code=i) deal_data(i) # break print(i, 'over')
def fun(data): for i in data: BaseModel('kline_min5').remove(date=i['date']) print(i['date'], 'over')
auth('18623166973', 'zjf950613') # q = query(valuation).filter(valuation.code == '000001.XSHE') # df = get_fundamentals(q, '2015-10-15') # # 打印出总市值 # print(df['market_cap'][0]) name = [ '2016q1', '2016q2', '2016q3', '2016q4', '2017q1', '2017q2', '2017q3', '2017q4', '2018q1', '2018q2' ] for i in ['2015q4']: df = get_fundamentals(query( valuation.code, valuation.day, valuation.capitalization, valuation.circulating_cap, valuation.market_cap, valuation.circulating_market_cap, valuation.turnover_ratio, valuation.pe_ratio, valuation.pe_ratio_lyr, valuation.pb_ratio, valuation.ps_ratio, valuation.pcf_ratio, ), statDate=i) df['code'] = df.code.map(lambda x: x[0:6]) df['date'] = i BaseModel('jq_fund_data').insert_batch(df.to_dict(orient='records'))
# Build a classification task using 3 informative features # X, y = make_classification(n_samples=1000, # n_features=10, # n_informative=3, # n_redundant=0, # n_repeated=0, # n_classes=2, # random_state=0, # shuffle=False) li=[603993, 601989, 601988, 601881, 601878, 601857, 601818, 601800, 601766, 601688, 601668, 601628, 601601, 601398, 601390, 601360, 601336, 601328, 601318, 601288, 601229, 601211, 601186, 601169, 601166, 601088, 601006, 600999, 600958, 600887, 600703, 600690, 600606, 600585, 600547] curror = BaseModel('novel_Feature').query( sql=dict(stock_code={'$in':li}, date={'$gte': dt.datetime(2016, 1, 1), '$lte': dt.datetime(2018, 9, 15)})) data = pd.DataFrame(list(curror)) columns=['ADX', 'ADX1', 'ADX_50_DOWN', 'ADX_50_UP', 'ADX_z', 'AR', 'AR_50_DOWN', 'AR_50_UP', 'AR_z', 'BOLL', 'BR', 'BR_AR_DOWN', 'BR_AR_UP', 'BR_z', 'CCI', 'CCI_-100_UP', 'CCI_100_DOWN', 'CCI_z', 'CR', 'CR_40_DOWN', 'CR_40_UP', 'CR_MA1_DOWN', 'CR_MA1_UP', 'CR_MA2_DOWN', 'CR_MA2_UP', 'CR_MA3_DOWN', 'CR_MA3_UP', 'CR_z', 'Close_dt_ma10', 'Close_dt_ma20', 'Close_dt_ma30', 'Close_dt_ma5', 'Close_dt_ma60', 'D', 'DMM', 'DMM1', 'DMP', 'DMP1', 'D_z', 'HD', 'J', 'J_down_cross_100', 'J_less_20', 'J_over_80', 'J_up_cross_0', 'J_z', 'K', 'LB', 'LB_z', 'LD', 'MA1', 'MA1_z', 'MA2', 'MA2_z', 'MA3', 'MA3_z', 'MDI', 'MDI_z', 'MTR', 'MTR_1', 'MTR_2', 'MTR_D1', 'MTR_D2', 'MTR_D3', 'New_H_10', 'New_H_10_10', 'New_H_10_20', 'New_H_10_5', 'New_H_20', 'New_H_20_10', 'New_H_20_20', 'New_H_20_5', 'New_H_30', 'New_H_30_10', 'New_H_30_20', 'New_H_30_5', 'New_H_5', 'New_H_5_10', 'New_H_5_20', 'New_H_5_5', 'New_H_60', 'New_H_60_10', 'New_H_60_20', 'New_H_60_5', 'New_Highest_10', 'New_Highest_20', 'New_Highest_30', 'New_Highest_5', 'New_Highest_60', 'New_Hl_10', 'New_Hl_20', 'New_Hl_30', 'New_Hl_5', 'New_Hl_60', 'New_L_10', 'New_L_10_10', 'New_L_10_20', 'New_L_10_5', 'New_L_20', 'New_L_20_10', 'New_L_20_20', 'New_L_20_5', 'New_L_30', 'New_L_30_10', 'New_L_30_20', 'New_L_30_5', 'New_L_5', 'New_L_5_10', 'New_L_5_20', 'New_L_5_5', 'New_L_60', 'New_L_60_10', 'New_L_60_20',
# feature select=========================================================================== print(__doc__) from time import time import numpy as np from scipy import ndimage from matplotlib import pyplot as plt from sklearn import manifold, datasets # digits = datasets.load_digits(n_class=10) # X = digits.data # y = digits.target data = BaseModel('a_ma_index_min5_gru_128').query( sql={ 'date': { '$lte': dt.datetime(2018, 8, 9, 9, 55), '$gte': dt.datetime(2018, 1, 1, 9, 55) } }) data = pd.DataFrame(list(data)) data2 = data.groupby(by=['type']).agg({'profit': 'mean'}) data2 = data2.sort_values(by=['profit'], ascending=False) X = np.array(data.encode.tolist()) y = np.array(data.type) # X,y=gt() # y=np.array(300) n_samples, n_features = X.shape np.random.seed(0) def nudge_images(X, y):
graph.write_png("tree.png") # 生成png文件 if __name__ == '__main__': pass # feature select=========================================================================== # start_date = dt.datetime(2018, 1, 1) # end_date = dt.datetime(2018, 1, 23) # model().model_select(start_date, end_date) # feature select=========================================================================== train_model_name = 'train_model_16.m' start_date = dt.datetime(2017, 5, 24) end_date = dt.datetime(2017, 5, 24) curror = BaseModel('calendar').query(sql=dict(date={ '$gte': start_date, '$lte': end_date })) date_list = list(curror) obj = model() n = 0 # clf = SGDClassifier() clf = DecisionTreeClassifier() #min_samples_leaf=20 X = list() Y = list() for i in range(len(date_list) - n): # print(date_list[i]['date']) temp1, temp2 = obj.get_data(date_list[i]['date'], date_list[i]['date']) X.append(temp1) Y.append(temp2) X_train = pd.concat(X) Y_train = pd.concat(Y)