Esempio n. 1
0
    def get_data(self, start_date, end_date):
        print(start_date)
        curror = BaseModel('features_index_day').query(
            sql=dict(stock_code={'$in': model.li},
                     date={
                         '$gte': start_date,
                         '$lte': end_date
                     }))
        if curror.count():
            data = pd.DataFrame(list(curror))

            data = data.loc[:, model.names + [model.label]]
            data = data.replace(to_replace=np.Infinity, value=np.NaN).dropna()
            # data['change_r_next16'] = data.change_r_next16.map(lambda x: 1 if x > 0 else 0)
            for cl in model.names:
                v = data[cl].iloc[0]
                # if type(v).__name__ !='bool':

                # data[cl] = data.sort_values(by=[cl], ascending=False).index
                # temp = data[cl].mean()
                # data[cl] = data[cl].map(lambda x:1 if x>len(data)//2 else 0)
                # pass
            y = data.loc[:, [model.label]]
            y['sort'] = y.sort_values(by=[model.label], ascending=False).index
            y = y.drop([model.label], axis=1)
            y['sort'] = y.sort.map(lambda x: self.classify(x))
            # y['sort'] = y.sort.map(lambda x:1 if x>len(y)/2 else 0)
            X = data.drop([model.label], axis=1)
            # X = data.loc[:, model.names]
            # X_test = data_test.loc[:, model.names]
            # y_test = data_test.loc[:, ['change_r_next16']]

            return X, y
        else:
            return pd.DataFrame([]), pd.DataFrame([])
Esempio n. 2
0
    def get_data_for_selection(self, start_date, end_date):
        curror = BaseModel('novel_Feature').query(
            sql=dict(stock_code={'$in': model.li},
                     date={
                         '$gte': start_date,
                         '$lte': end_date
                     }))
        if curror.count():
            data = pd.DataFrame(list(curror))

            data = data.loc[:, model.names]
            data = data.replace(to_replace=np.Infinity, value=np.NaN).dropna()
            # data['change_r_next16'] = data.change_r_next16.map(lambda x: 1 if x > 0 else 0)
            for cl in model.names:
                v = data[cl].iloc[0]
                # tp=type(v)
                # if type(v).__name__ !='bool':

                # data[cl] = data.sort_values(by=[cl], ascending=False).index
            #       # temp = data[cl].mean()
            #       data[cl] = data[cl].map(lambda x:1 if x>len(data)//2 else 0)
            #       pass
            y = data.loc[:, [model.label]]
            y['sort'] = y.sort_values(by=[model.label], ascending=False).index
            y = y.drop([model.label], axis=1)
            y['sort'] = y.sort.map(lambda x: 1 if x > len(y) / 2 else 0)
            X = data.drop([model.label, 'date', '_id'], axis=1)
            return X, y
        else:
            return [], []
Esempio n. 3
0
def min60(time):
    min60_times = [1030, 1130, 1400, 1500]
    if time in min60_times:
        temp = time // 100 * 60 + time % 100 - 60
        temp = temp // 60 * 100 + temp % 60
        print(temp)
        sql = {'time': {'$gt': temp, '$lte': time}, 'date': Calendar.today()}
        curror = BaseModel('real_kline_min5').query(sql=sql)
        if curror.count():
            data = pd.DataFrame(list(curror))

            data = data.sort_values(by=['time'], ascending=True)
            data = data.groupby(by=['stock_code'], as_index=False).agg({
                'volume':
                'sum',
                'amount':
                'sum',
                'open':
                'first',
                'close':
                'last',
                'high':
                'max',
                'low':
                'min'
            })

            data['time'] = time
            data['date'] = Calendar.today()
            BaseModel('real_kline_min60').insert_batch(
                data.to_dict(orient='records'))
            print('min60 ok')


# min60(time=1030)
Esempio n. 4
0
def fun(stocks):
    obj = Outstanding()
    result = list()
    for sc in stocks:
        data = []
        try:
            print('success', sc)
            data = (obj.get_data(stock=sc))
        except Exception as e:
            print('fail', sc)
            pass
        finally:
            result.extend(data)
    BaseModel('outstanding').remove({})
    BaseModel('outstanding').insert_batch(result)
Esempio n. 5
0
    def draw_boll_Line(self, data, pixel, no, count, table_name, code, profits,
                       date):
        for i in data.columns.values:
            data[i] += 0.5
        high = data.high.astype('int').tolist()
        low = data.low.astype('int').tolist()
        top = data.top.astype('int').tolist()
        close = data.close.astype('int').tolist()
        open = data.open.astype('int').tolist()
        bottom = data.bottom.astype('int').tolist()
        matrix = np.zeros((pixel, pixel))
        num = pixel - 1
        for i in range(no):
            if open[i] < close[i]:
                matrix[(num - high[i]):(num - low[i]) + 1, i] = 1
            else:
                matrix[(num - high[i]):(num - low[i]) + 1, i] = -1

            matrix[(num - top[i]), i] = 1
            matrix[(num - bottom[i]), i] = -1

        BaseModel(table_name).insert_batch({
            'stock_code': code,
            'profit': profits,
            'date': date,
            'value': matrix.tolist()
        })
Esempio n. 6
0
    def draw_DMI(self, data, pixel, no, count, table_name, code, profits,
                 date):
        if len(data) != no:
            return
        for i in ['PDI', 'MDI', 'ADX']:
            data[i] += 0.5
        # data=data[::-1]
        PDI = data.PDI.astype('int').tolist()
        MDI = data.MDI.astype('int').tolist()
        ADX = data.ADX.astype('int').tolist()

        matrix = np.zeros((pixel, pixel), dtype=np.bool)
        num, mark = pixel, 0
        for i in range(no):
            # matrix[(num - ma5[i]), mark * count + 1: (mark + 1) * count - 1] = True
            # matrix[(num - ma10[i]), mark * count + 1: (mark + 1) * count - 1] = True
            # matrix[(num - ma20[i]), mark * count + 1: (mark + 1) * count - 1] = True
            # print(num- PDI[i])
            matrix[(num - PDI[i]), i] = True
            matrix[(num - MDI[i]), i] = True
            matrix[(num - ADX[i]), i] = True
        # print(date)
        # plt.matshow(matrix,cmap='hot')
        # plt.plot( range(len(data)),data['PDI'])
        # plt.plot( range(len(data)),data['MDI'])
        # plt.plot( range(len(data)),data['ADX'])
        # plt.show()
        BaseModel(table_name).insert_batch({
            'stock_code': code,
            'profit': profits,
            'date': date,
            'value': matrix.tolist()
        })
Esempio n. 7
0
 def draw_America_Line(self, data, pixel, no, count, table_name, code,
                       profits, date):
     for i in data.columns.values:
         data[i] += 0.5
     high = data.high.astype('int').tolist()
     low = data.low.astype('int').tolist()
     close = data.close.astype('int').tolist()
     open = data.open.astype('int').tolist()
     matrix = np.zeros((pixel, pixel), dtype=np.bool)
     num, mark = pixel - 1, 0
     for i in range(no):
         # matrix[(num - ma5[i]), mark * count + 1: (mark + 1) * count - 1] = True
         # matrix[(num - ma10[i]), mark * count + 1: (mark + 1) * count - 1] = True
         # matrix[(num - ma20[i]), mark * count + 1: (mark + 1) * count - 1] = True
         position = ((mark * count + count // 3) +
                     ((mark + 1) * count - count // 3)) // 2
         matrix[(num - open[i]), position - 1] = True  # 横开
         matrix[(num - close[i]), position + 1] = True  # 横开
         matrix[(num - high[i]):(num - low[i]) + 1, position] = True
         mark += 1
     BaseModel(table_name).insert_batch({
         'stock_code': code,
         'profit': profits,
         'date': date,
         'value': matrix.tolist()
     })
Esempio n. 8
0
def MODEL_TABLE(location=None, dbname=None, tablename=None):
    """
    数据源的映射,主要是指一些公共的数据源
    :param location:
    :param dbname:
    :param tablename:
    :return:
    """
    if tablename == 'signals_his':
        return SellList('signals_his', location, dbname)
    elif tablename == 'signals':
        return signal('signals', location, dbname)
    elif tablename == 'orders':
        return Orders('orders', location, dbname)
    elif tablename == 'orders_simulated':
        return BaseModel('orders_simulated', location, dbname)
    elif tablename == 'orders_his':
        return OrdersHis('orders_his', location, dbname)
    elif tablename == 'trademenu':
        return TradeMenu('trademenu', location, dbname)
    elif tablename == 'financeindex':
        return ModelFinanceIndex('financeindex', location, dbname)
    elif tablename == 'risk_and_position':
        return Risk_and_Position('risk_and_position', location, dbname)
    elif tablename == 'risk':
        return Risk('risk', location, dbname)
    elif tablename == 'position':
        return Position('position', location, dbname)
    elif tablename == 'naughtiers':
        return Naughtiers('naughtiers', location, dbname)
    elif tablename == 'inflexion':
        return Inflexion('inflexion', location, dbname)
    elif tablename == 'buy_point':
        return Buy_Point('buy_point', location, dbname)
    elif tablename == 'asset':
        return Asset('asset', location, dbname)
    elif tablename == 'rmds_his':
        return RmdsHis('rmds_his', location, dbname)
    elif tablename == 'RRADS':
        return rrads('announce', location, dbname)
    elif tablename == 'accounts':
        return BaseModel('accounts', location, dbname)
    elif tablename == 'clients':
        return BaseModel('clients', location, dbname)
    else:
        info = 'not find this "%s" in model list' % tablename
        raise MongoIOError(info)
Esempio n. 9
0
 def log_length(self, length):
     obj = BaseModel('email_log')
     curror = obj.query({'user': self.email})
     if curror.count():
         lg = pd.DataFrame(list(curror)).loc[0].length
         obj.remove({})
         obj.insert_batch(
             dict(date=dt.datetime.now(), user=self.email, length=length))
         return lg
     else:
         obj.insert_batch(
             dict(date=dt.datetime.now(), user=self.email, length=length))
         return length - 1
Esempio n. 10
0
def similarity():
    index_features = pd.DataFrame(
        list(BaseModel('index_feature_temp').query({})))
    print(index_features.columns.values)
    x_columns = [
        x for x in index_features.columns if x in [
            "ma5_angle", "ma10_angle", "ma20_angle", "close_open", "high_low",
            'high_close', 'Close_dt_ma5', 'Close_dt_ma10', "Close_dt_ma20"
        ]
    ]  # "ma10_angle", "ma20_angle","ma30_angle", "ma60_angle",'close_ma5','open_ma5','high_open','high_low','high_close','close_open'
    index_features = index_features.loc[:, x_columns + ['date']]

    index_features = index_features.replace(to_replace=np.Infinity,
                                            value=np.NaN)
    index_features = index_features.dropna()
    index_features = index_features.sort_values(by=['date'], ascending=False)
    index_features = index_features.reset_index(drop=True)

    # index_features_=index_features.loc[:,x_columns].apply(lambda x: (x - np.min(x)) / (np.max(x) - np.min(x)))
    # index_features_['date']=index_features['date']
    # index_features=index_features_
    from sklearn import preprocessing
    # =============================================================================标准化
    index_features_ = index_features.loc[:, x_columns]
    index_features_ = np.array(index_features_)
    index_features_ = preprocessing.scale(index_features_)
    index_features_ = pd.DataFrame(index_features_, columns=x_columns)
    index_features_['date'] = index_features['date']
    index_features = index_features_
    # =============================================================================标准化
    target_date = dt.datetime(2017, 7, 17)
    target = index_features[index_features.date == target_date]
    other = index_features[index_features.date < target_date]
    date_list = other.date.tolist()
    print('date len', len(date_list))
    target = target.drop(['date'], axis=1)
    other = other.drop(['date'], axis=1)
    target_arr = np.array(target)
    other_arr = np.array(other)
    print('other_arr len', len(other_arr))
    result = target_arr - other_arr
    print('result len', len(result))
    result = result**2
    result = np.sum(result, axis=1)
    result = np.sqrt(result)

    # ===============================================================================余弦
    # from scipy.spatial.distance import pdist
    # result=list()
    # for i in range(len(date_list)):result
    #     result.append(pdist(np.vstack([target_arr,other_arr[i][:]]), 'cosine')[0])
    # print(result)
    # ===============================================================================余弦

    data = pd.DataFrame({'result': result, 'date': date_list})
    print('target', target_date)
    print('result', data[data.result == data.result.min()])
Esempio n. 11
0
def SELF_TABLE(location=None, dbname=None, tablename=None):
    """
    用户自定义的一些表
    :param location:
    :param dbname:
    :param tablename:
    :return:
    """
    return BaseModel(tablename, location, dbname)
Esempio n. 12
0
def deal_data(code):
    data = KlineData.read_data(code=code, start_date=dt.datetime(2016, 1, 1), end_date=dt.datetime(2018, 9, 18),
                               kline='index_min5',timemerge=True)
    if len(data):
        data = n_KDJ(data, 9, 3)
        # t1 = time.clock()
        # print(t1-t)
        # t = time.clock()
        data = n_MA(data)
        # t1 = time.clock()
        # print(t1 - t)
        # t = time.clock()
        data = n_MACD(data, 12, 26, 9)
        # t1 = time.clock()
        # print(t1 - t)
        # t = time.clock()
        data = n_RSI(data, 6, 12, 24)
        # t1 = time.clock()
        # print(t1 - t)
        # t = time.clock()
        data = n_DMI(data, 14, 6)
        # t1 = time.clock()
        # print(t1 - t)
        # t = time.clock()
        data = n_BRAR(data, 26)
        # t1 = time.clock()
        # print(t1 - t)
        # t = time.clock()
        data = n_CR(data, 26, 10, 20, 40)
        # t1 = time.clock()
        # print(t1 - t)
        # t = time.clock()
        data = n_VR(data, 26)
        # t1 = time.clock()
        # print(t1 - t)
        # t = time.clock()
        data = n_WR(data, 10, 6)
        # t1 = time.clock()
        # print(t1 - t)
        # t = time.clock()
        data = n_CCI(data, 14)
        # t1 = time.clock()
        # print(t1 - t)
        # t = time.clock()
        data = n_BOLL(data, 20)
        # t1 = time.clock()
        # print(t1 - t)
        # t = time.clock()
        data = n_PSY(data, 12)


        data.dropna(inplace=True)
        data=data[data.time==955]

        BaseModel('features_index_min5').insert_batch(data.to_dict(orient='records'))
Esempio n. 13
0
def exe():
    data = list()
    with open('000300cons.txt') as f:
        for line in f.read().split('\n'):
            data.append(line)
    m = 300
    data = [data[0:m] for i in range(0, len(data), m)]
    for idata in data:
        x = RealData.get_stocks_data(idata)
        x = x.drop(['time', 'other'], axis=1)
        idata = x.to_dict(orient='records')
        BaseModel('real_buy_sell').insert_batch(idata)
Esempio n. 14
0
    def draw(self, data, pixel, no, count, table_name, code, profits, date, h,
             w):
        if len(data) != no:
            return
        for i in data.columns.values:
            data[i] += 0.5
        high = data.high.astype('int').tolist()
        low = data.low.astype('int').tolist()
        close = data.close.astype('int').tolist()
        open = data.open.astype('int').tolist()
        matrix = np.zeros((w, h), dtype=np.bool)
        num, mark = w - 1, 0

        for i in range(no):
            # matrix[(num - ma5[i]), mark * count + 1: (mark + 1) * count - 1] = True
            # matrix[(num - ma10[i]), mark * count + 1: (mark + 1) * count - 1] = True
            # matrix[(num - ma20[i]), mark * count + 1: (mark + 1) * count - 1] = True
            # position = ((mark * count + count // 3) + ((mark + 1) * count - count // 3)) // 2
            # matrix[(num - open[i]), position - 1] = True  # 横开
            # matrix[(num - close[i]), position + 1] = True  # 横开
            # matrix[(num - high[i]):(num - low[i]) + 1, position] = True

            if open[i] >= close[i]:
                matrix[(num - open[i]):(num - close[i]) + 1,
                       mark * count + 1:(mark + 1) * count - 1] = True
                matrix[(num - high[i]):(num - low[i]) + 1,
                       mark * count + count // 3 + 1:(mark + 1) * count -
                       count // 3 - 1] = True
            else:
                matrix[(num - close[i]),
                       mark * count + 1:(mark + 1) * count - 1] = True
                matrix[(num - open[i]),
                       mark * count + 1:(mark + 1) * count - 1] = True
                matrix[(num - open[i]):(num - low[i]) + 1,
                       mark * count + count // 3 + 1:(mark + 1) * count -
                       count // 3 - 1] = True
                matrix[(num - high[i]):(num - close[i]) + 1,
                       mark * count + count // 3 + 1:(mark + 1) * count -
                       count // 3 - 1] = True

                matrix[(num - close[i]):(num - open[i]) + 1,
                       mark * count + count // 3] = True
                matrix[(num - close[i]):(num - open[i]) + 1,
                       (mark + 1) * count - count // 3 - 1] = True
            mark += 1
        BaseModel(table_name).insert_batch({
            'stock_code': code,
            'profit': profits,
            'date': date,
            'value': matrix.tolist()
        })
Esempio n. 15
0
    def model_select(self, start_date, end_date):
        curror = BaseModel('calendar').query(sql=dict(
            date={
                '$gte': start_date,
                '$lte': end_date
            }))
        date_list = list(curror)

        n = 0
        x, y = list(), list()
        for i in range(len(date_list) - n):
            print(date_list[i]['date'])
            temp1, temp2 = self.get_data_for_selection(
                date_list[i]['date'], date_list[i + n]['date'])
            x.append(temp1)
            y.append(temp2)
        X = pd.concat(x)
        y = pd.concat(y)
        X = np.array(X)
        y = np.array(y)
        # Build a forest and compute the feature importances
        forest = ExtraTreesClassifier(criterion='entropy',
                                      n_estimators=250,
                                      random_state=0)
        forest.fit(X, y)
        importances = forest.feature_importances_
        std = np.std(
            [tree.feature_importances_ for tree in forest.estimators_], axis=0)
        indices = np.argsort(importances)[::-1]

        # Print the feature ranking
        print("Feature ranking:")
        result = list()
        for f in range(X.shape[1]):
            print("%d. feature %d  %s (%f)" %
                  (f + 1, indices[f], model.names[indices[f]],
                   importances[indices[f]]))
            result.append(model.names[indices[f]])
        print(result)
        # Plot the feature importances of the forest
        plt.figure()
        plt.title("Feature importances")
        plt.bar(range(X.shape[1]),
                importances[indices],
                color="r",
                yerr=std[indices],
                align="center")
        plt.xticks(range(X.shape[1]), indices)
        plt.xlim([-1, X.shape[1]])
        plt.show()
Esempio n. 16
0
def exe():
    data = list()
    with open('gz2000cons.txt') as f:
        for line in f.read().split('\n'):
            data.append(line)
    # data=data[1500:2000]
    m = 300
    data = [data[i:m + i] for i in range(0, len(data), m)]
    # pool = multiprocessing.Pool(processes=3)
    # result = pool.map(fun, data)
    for idata in data:
        # print(idata)
        x = RealData.get_stocks_data(idata)
        if len(x):
            x = x.drop(['time', 'other'], axis=1)
            idata = x.to_dict(orient='records')
            BaseModel('real_buy_sell_gz2000').insert_batch(idata)
Esempio n. 17
0
def get_result(sc, kline, start, end, table_name):
    data = KlineData.read_data(code=sc,
                               start_date=start,
                               end_date=end,
                               kline=kline,
                               timemerge=True)
    data = cal_ma(data)
    data = data.dropna()
    while len(data) >= 64:
        last_one = data.iloc[-1]
        if last_one.time != 1500:
            break
        elif abs(last_one.profit_self) >= 0.097:
            continue
        else:
            date = last_one.date
            data = data[data.date <= dt.datetime(date.year, date.month,
                                                 date.day, 9, 55)]
            if len(data) >= 64:
                profit = data.iloc[-1].profit
                date = data.iloc[-1].date
                idata = data.tail(64)

                data = data[
                    data.date < dt.datetime(date.year, date.month, date.day)]

                idata = idata.loc[:, [
                    'close', 'open', 'high', 'low', 'ma5', 'ma10', 'ma20',
                    'ma60', 'ma120', 'ma200'
                ]]
                idata = np.array(idata)
                amin, amax = idata.min(), idata.max()  # 求最大最小值
                idata = (idata - amin) / (amax - amin)
                BaseModel(table_name).insert_batch({
                    'stock_code': sc,
                    'profit': profit,
                    'date': date,
                    'value': idata.tolist()
                })
Esempio n. 18
0
def gt():
    start_date = dt.datetime(2018, 5, 24)
    end_date = dt.datetime(2018, 6, 2)
    curror = BaseModel('calendar').query(sql=dict(date={
        '$gte': start_date,
        '$lte': end_date
    }))
    date_list = list(curror)
    obj = model()
    n = 0
    # clf = SGDClassifier()
    # clf = DecisionTreeClassifier(min_samples_leaf=20)
    X = list()
    Y = list()
    for i in range(len(date_list) - n):
        # print(date_list[i]['date'])
        temp1, temp2 = obj.get_data(date_list[i]['date'], date_list[i]['date'])
        X.append(temp1)
        Y.append(temp2)
    X_train = pd.concat(X)
    Y_train = pd.concat(Y)
    return np.array(X_train), np.array(Y_train)
Esempio n. 19
0
    def report(self, clf, start_date, end_date):
        curror = BaseModel('calendar').query(sql=dict(
            date={
                '$gte': start_date,
                '$lte': end_date
            }))
        date_list = list(curror)

        n = 0
        x, y = list(), list()
        for i in range(len(date_list) - n):
            print(date_list[i]['date'])
            temp1, temp2 = self.get_data(date_list[i]['date'],
                                         date_list[i + n]['date'])
            x.append(temp1)
            y.append(temp2)

        X_test, y_test = pd.concat(x), pd.concat(y)
        y_pred = clf.predict(X_test)  # 预测
        result = classification_report(y_test, y_pred)
        print(result)
        print(clf.score(X_test, y_test))
Esempio n. 20
0
def get_result(sc, kline, start, end, table_name):
    data = KlineData.read_data(code=sc,
                               start_date=start,
                               end_date=end,
                               kline=kline,
                               timemerge=True)
    data = cal_macd(data)
    data = data.dropna()
    while len(data) >= 64:
        last_one = data.iloc[-1]
        if last_one.time != 1500:
            break
        elif abs(last_one.profit_self) >= 0.097:
            continue
        else:
            date = last_one.date
            data = data[data.date <= dt.datetime(date.year, date.month,
                                                 date.day, 9, 55)]
            if len(data) >= 64:
                profit = data.iloc[-1].profit
                date = data.iloc[-1].date
                idata = data.tail(64)
                idata = idata.loc[:, ['macd', 'dif', 'dea']]

                data = data[
                    data.date < dt.datetime(date.year, date.month, date.day)]
                idata = np.array(idata)
                amin, amax = idata.min(), idata.max()  # 求最大最小值
                max = -amax if abs(amax) > abs(amin) else amin
                idata = idata * (64 / 2 - 1) / max
                idata = idata + 64 / 2
                BaseModel(table_name).insert_batch({
                    'stock_code': sc,
                    'profit': profit,
                    'date': date,
                    'value': idata.tolist()
                })
Esempio n. 21
0
        graph.write_png("tree.png")  # 生成png文件


if __name__ == '__main__':
    pass
    # feature select===========================================================================
    # start_date = dt.datetime(2018, 1, 1)
    # end_date = dt.datetime(2018, 1, 23)
    # model().model_select(start_date, end_date)
    # feature select===========================================================================

    train_model_name = 'train_model_16.m'
    start_date = dt.datetime(2017, 5, 24)
    end_date = dt.datetime(2017, 5, 24)
    curror = BaseModel('calendar').query(sql=dict(date={
        '$gte': start_date,
        '$lte': end_date
    }))
    date_list = list(curror)
    obj = model()
    n = 0
    # clf = SGDClassifier()
    clf = DecisionTreeClassifier()  #min_samples_leaf=20
    X = list()
    Y = list()
    for i in range(len(date_list) - n):
        # print(date_list[i]['date'])
        temp1, temp2 = obj.get_data(date_list[i]['date'], date_list[i]['date'])
        X.append(temp1)
        Y.append(temp2)
    X_train = pd.concat(X)
    Y_train = pd.concat(Y)
    ]
    for i in names:
        data[i] = (data[i] - data[i].min()) / (data[i].max() - data[i].min())
    return data


if __name__ == '__main__':
    # clf = DecisionTreeClassifier()
    # clf = LogisticRegression()
    table = 'features_index_day'
    clf = RandomForestClassifier(n_estimators=200, n_jobs=2)
    label = 'change_r_next2'
    curror = BaseModel(table).query(
        sql={
            'date': {
                '$gte': dt.datetime(2018, 1, 5),
                '$lte': dt.datetime(2018, 7, 25)
            }
        })

    print(curror.count())
    if curror.count():

        data = pd.DataFrame(list(curror))

        data = data.replace(to_replace=np.Infinity, value=np.NaN).dropna()
        name = data.columns.values.tolist()
        names = list()

        # data=data.sample(frac=0.2)
Esempio n. 23
0
def fun(idata):
    x = RealData.get_stocks_data(idata)
    x = x.drop(['time', 'other'], axis=1)
    idata = x.to_dict(orient='records')
    BaseModel('real_buy_sell_gz2000').insert_batch(idata)
Esempio n. 24
0



# Build a classification task using 3 informative features
# X, y = make_classification(n_samples=1000,
#                            n_features=10,
#                            n_informative=3,
#                            n_redundant=0,
#                            n_repeated=0,
#                            n_classes=2,
#                            random_state=0,
#                            shuffle=False)

li=[603993, 601989, 601988, 601881, 601878, 601857, 601818, 601800, 601766, 601688, 601668, 601628, 601601, 601398, 601390, 601360, 601336, 601328, 601318, 601288, 601229, 601211, 601186, 601169, 601166, 601088, 601006, 600999, 600958, 600887, 600703, 600690, 600606, 600585, 600547]
curror = BaseModel('novel_Feature').query(
    sql=dict(stock_code={'$in':li}, date={'$gte': dt.datetime(2016, 1, 1), '$lte': dt.datetime(2018, 9, 15)}))

data = pd.DataFrame(list(curror))
columns=['ADX', 'ADX1', 'ADX_50_DOWN', 'ADX_50_UP', 'ADX_z', 'AR', 'AR_50_DOWN', 'AR_50_UP', 'AR_z', 'BOLL', 'BR',
        'BR_AR_DOWN', 'BR_AR_UP', 'BR_z', 'CCI', 'CCI_-100_UP', 'CCI_100_DOWN', 'CCI_z', 'CR', 'CR_40_DOWN',
        'CR_40_UP', 'CR_MA1_DOWN', 'CR_MA1_UP', 'CR_MA2_DOWN', 'CR_MA2_UP', 'CR_MA3_DOWN', 'CR_MA3_UP', 'CR_z',
        'Close_dt_ma10', 'Close_dt_ma20', 'Close_dt_ma30', 'Close_dt_ma5', 'Close_dt_ma60', 'D', 'DMM', 'DMM1',
        'DMP', 'DMP1', 'D_z', 'HD', 'J', 'J_down_cross_100', 'J_less_20', 'J_over_80', 'J_up_cross_0', 'J_z', 'K',
        'LB', 'LB_z', 'LD', 'MA1', 'MA1_z', 'MA2', 'MA2_z', 'MA3', 'MA3_z', 'MDI', 'MDI_z', 'MTR', 'MTR_1', 'MTR_2',
        'MTR_D1', 'MTR_D2', 'MTR_D3', 'New_H_10', 'New_H_10_10', 'New_H_10_20', 'New_H_10_5', 'New_H_20',
        'New_H_20_10', 'New_H_20_20', 'New_H_20_5', 'New_H_30', 'New_H_30_10', 'New_H_30_20', 'New_H_30_5',
        'New_H_5', 'New_H_5_10', 'New_H_5_20', 'New_H_5_5', 'New_H_60', 'New_H_60_10', 'New_H_60_20', 'New_H_60_5',
        'New_Highest_10', 'New_Highest_20', 'New_Highest_30', 'New_Highest_5', 'New_Highest_60', 'New_Hl_10',
        'New_Hl_20', 'New_Hl_30', 'New_Hl_5', 'New_Hl_60', 'New_L_10', 'New_L_10_10', 'New_L_10_20', 'New_L_10_5',
        'New_L_20', 'New_L_20_10', 'New_L_20_20', 'New_L_20_5', 'New_L_30', 'New_L_30_10', 'New_L_30_20',
        'New_L_30_5', 'New_L_5', 'New_L_5_10', 'New_L_5_20', 'New_L_5_5', 'New_L_60', 'New_L_60_10', 'New_L_60_20',
Esempio n. 25
0
def deal_data(code):
    data = KlineData.read_data(code=code,
                               start_date=dt.datetime(2015, 1, 1),
                               end_date=dt.datetime(2018, 9, 17),
                               kline='kline_day',
                               timemerge=True)

    name = data.columns.values.tolist()
    if len(data):
        data = data.drop(['_id', 'classtype', 'market'], axis=1)
        data = n_KDJ(data, 9, 3)
        # t1 = time.clock()
        # print(t1-t)
        # t = time.clock()
        data = n_MA(data)
        # t1 = time.clock()
        # print(t1 - t1)
        # t = time.clock()
        data = n_MACD(data, 12, 26, 9)
        # t1 = time.clock()
        # print(t1 - t)
        # t = time.clock()
        data = n_RSI(data, 6, 12, 24)
        # t1 = time.clock()
        # print(t1 - t)
        # t = time.clock()
        data = n_DMI(data, 14, 6)
        # t1 = time.clock()
        # print(t1 - t)
        # t = time.clock()
        data = n_BRAR(data, 26)
        # t1 = time.clock()
        # print(t1 - t)
        # t = time.clock()
        data = n_CR(data, 26, 10, 20, 40)
        # t1 = time.clock()
        # print(t1 - t)
        # t = time.clock()
        data = n_VR(data, 26)
        # t1 = time.clock()
        # print(t1 - t)
        # t = time.clock()
        data = n_WR(data, 10, 6)
        # t1 = time.clock()
        # print(t1 - t)
        # t = time.clock()
        data = n_CCI(data, 14)
        # t1 = time.clock()
        # print(t1 - t)
        # t = time.clock()
        data = n_BOLL(data, 20)
        # t1 = time.clock()
        # print(t1 - t)
        # t = time.clock()
        data = n_PSY(data, 12)
        columns = data.columns.values.tolist()
        data = data.replace(to_replace=np.Infinity, value=np.NaN)

        fund_data = pd.DataFrame(
            list(BaseModel('jq_fund_data').query({'code': code})))
        jq_name = [
            'capitalization', 'circulating_cap', 'circulating_market_cap',
            'market_cap', 'pb_ratio', 'pcf_ratio', 'pe_ratio', 'pe_ratio_lyr',
            'ps_ratio', 'turnover_ratio'
        ]
        for i in jq_name:
            data[i] = 0
        # data['circulating_market_cap'] = 0

        for i in columns:
            if i not in name and 'change_r_next' not in i:
                v = data[i].iloc[0]
                if type(v).__name__ != 'bool_':
                    # print(i, v, type(v))
                    if data[i].min() == 0:
                        # pass
                        # print(i, data[i].loc[0])
                        data.loc[data[i] != 0,
                                 i] = (data.loc[data[i] != 0, i] -
                                       data.loc[data[i] != 0, i].min()) / (
                                           data.loc[data[i] != 0, i].max() -
                                           data.loc[data[i] != 0, i].min())
                        # print(i,data[i].loc[0])
                    else:
                        # print(i, v, type(v))
                        data[i] = (data[i] - data[i].min()) / (data[i].max() -
                                                               data[i].min())

        # data['up'] = data['up'] / (data['up'] + data['down'])
        # data['down'] = data['down'] / (data['up'] + data['down'])
        data.loc[
            (data.date >= dt.datetime(2015, 12, 31)) & (data.date < dt.datetime(2016, 3, 31)), jq_name] = \
            fund_data[fund_data.day == '2015-12-31'].loc[:,jq_name].iloc[0].tolist()
        data.loc[
            (data.date >= dt.datetime(2016, 3, 31)) & (data.date <= dt.datetime(2016, 6, 30)), jq_name] = \
            fund_data[fund_data.day == '2016-03-31'].loc[:,jq_name].iloc[0].tolist()
        data.loc[
            (data.date < dt.datetime(2016, 9, 30)) & (data.date >= dt.datetime(2016, 6, 30)), jq_name] = \
            fund_data[fund_data.day == '2016-06-30'].loc[:,jq_name].iloc[0].tolist()
        data.loc[
            (data.date >= dt.datetime(2016, 9, 30)) & (data.date < dt.datetime(2016, 12, 30)), jq_name] = \
            fund_data[fund_data.day == '2016-09-30'].loc[:,jq_name].iloc[0].tolist()
        data.loc[
            (data.date < dt.datetime(2017, 3, 31)) & (data.date >= dt.datetime(2016, 12, 30)), jq_name] = \
            fund_data[fund_data.day == '2016-12-30'].loc[:,jq_name].iloc[0].tolist()
        data.loc[
            (data.date >= dt.datetime(2017, 3, 31)) & (data.date < dt.datetime(2017, 6, 30)), jq_name] = \
            fund_data[fund_data.day == '2017-03-31'].loc[:,jq_name].iloc[0].tolist()
        data.loc[
            (data.date >= dt.datetime(2017, 6, 30)) & (data.date < dt.datetime(2017, 9, 29)), jq_name] = \
            fund_data[fund_data.day == '2017-06-30'].loc[:,jq_name].iloc[0].tolist()
        data.loc[
            (data.date >= dt.datetime(2017, 9, 29)) & (data.date < dt.datetime(2017, 12, 29)), jq_name] = \
            fund_data[fund_data.day == '2017-09-29'].loc[:,jq_name].iloc[0].tolist()
        data.loc[
            (data.date >= dt.datetime(2017, 12, 29)) & (data.date < dt.datetime(2018, 3, 30)), jq_name] = \
            fund_data[fund_data.day == '2017-12-29'].loc[:,jq_name].iloc[0].tolist()
        data.loc[
            (data.date >= dt.datetime(2018, 3, 30)) & (data.date < dt.datetime(2018, 6, 29)), jq_name] = \
            fund_data[fund_data.day == '2018-03-30'].loc[:,jq_name].iloc[0].tolist()
        data.loc[
            (data.date >= dt.datetime(2018, 6, 29)), jq_name] = \
            fund_data[fund_data.day == '2018-06-29'].loc[:,jq_name].iloc[0].tolist()
        name = ['open', 'close', 'high', 'low', 'amount', 'volume']
        data['turnover_ratio'] = data.volume / (data.circulating_cap * 10000)
        for i in name:
            data[i] = (data[i] - data[i].min()) / (data[i].max() -
                                                   data[i].min())
        data = data[::-1]

        data = data.reset_index(drop=True)
        # data['count'] = data.index.tolist()
        curror = BaseModel('LaunchDate').query(sql={'stock_code': code})
        data['count'] = 0
        if curror.count():
            launchdate = list(curror)[0]['date']
            data['count'] = data.date.map(lambda x: (x - launchdate).days)

        data = data[data.date >= dt.datetime(2016, 1, 1)]
        BaseModel('features_kline_day').insert_batch(
            data.to_dict(orient='records'))
Esempio n. 26
0
def fun(codes):
    for i in codes:
        BaseModel('features_kline_day').remove(stock_code=i)
        deal_data(i)
        # break
        print(i, 'over')
Esempio n. 27
0
def fun(codes):
    for i in codes:
        BaseModel('features_index_min5').remove(stock_code=i)
        deal_data(i)
        print(i,'over')
Esempio n. 28
0
auth('18623166973', 'zjf950613')
# q = query(valuation).filter(valuation.code == '000001.XSHE')
# df = get_fundamentals(q, '2015-10-15')
# # 打印出总市值
# print(df['market_cap'][0])
name = [
    '2016q1', '2016q2', '2016q3', '2016q4', '2017q1', '2017q2', '2017q3',
    '2017q4', '2018q1', '2018q2'
]
for i in ['2015q4']:
    df = get_fundamentals(query(
        valuation.code,
        valuation.day,
        valuation.capitalization,
        valuation.circulating_cap,
        valuation.market_cap,
        valuation.circulating_market_cap,
        valuation.turnover_ratio,
        valuation.pe_ratio,
        valuation.pe_ratio_lyr,
        valuation.pb_ratio,
        valuation.ps_ratio,
        valuation.pcf_ratio,
    ),
                          statDate=i)
    df['code'] = df.code.map(lambda x: x[0:6])
    df['date'] = i

    BaseModel('jq_fund_data').insert_batch(df.to_dict(orient='records'))
Esempio n. 29
0
def fun(data):
    for i in data:
        BaseModel('kline_min5').remove(date=i['date'])
        print(i['date'], 'over')
Esempio n. 30
0
    # feature select===========================================================================
    print(__doc__)
    from time import time

    import numpy as np
    from scipy import ndimage
    from matplotlib import pyplot as plt
    from sklearn import manifold, datasets
    # digits = datasets.load_digits(n_class=10)
    # X = digits.data
    # y = digits.target

    data = BaseModel('a_ma_index_min5_gru_128').query(
        sql={
            'date': {
                '$lte': dt.datetime(2018, 8, 9, 9, 55),
                '$gte': dt.datetime(2018, 1, 1, 9, 55)
            }
        })
    data = pd.DataFrame(list(data))
    data2 = data.groupby(by=['type']).agg({'profit': 'mean'})
    data2 = data2.sort_values(by=['profit'], ascending=False)

    X = np.array(data.encode.tolist())
    y = np.array(data.type)
    # X,y=gt()
    # y=np.array(300)
    n_samples, n_features = X.shape
    np.random.seed(0)

    def nudge_images(X, y):