def ts_rank_m(x, period): # res = np.zeros(x.shape) * np.nan # for ix in range(0, x.shape[0] - period + 1): # res[ix + period - 1] = (np.argsort(np.argsort(x[ix:ix + period], axis=0), axis=0)[-1] + 1) * 1. / period # res[np.isnan(x)] = np.nan res = bn.move_rank(x, period, min_count=1, axis=0) return res
def getStatsTS(X, Y, quantile=10, window=500, minCnt=250): """ X: Input factor, shape should be 40320*1082 Y: Existing factor, price Calculate the return of 10, 20 ,30 by Standardized Return_i = (Price_t+i-Price_t)/Price_t/i """ def calcFwdRet(price, window=30): """ """ fwd = np.roll(price, -window, axis=0) fwd[-window:, :] = np.nan return fwd / price - 1 print('Now Calculating IC and IR matrix, start counting...') t0 = time.time() X = np.asarray(X) Y = np.asarray(Y) Y_ = np.zeros(Y.shape) for i in range(len(Y) - 30): for j in range(Y.shape[1]): Y_[i, j] = (Y[i + 30, j] - Y[i, j]) / Y[i, j] / 30 Y = Y_ if X.shape != Y.shape: print(X.shape) print(Y.shape) raise N = len(X) IC = np.zeros((N, )) bottom = 1.0 / quantile top = 1 - bottom # ts rank X = bn.move_rank(X, window=window, min_count=minCnt, axis=0) print(np.isnan(X).sum()) # norm to [0, 1] X = 0.5 * (X + 1) # get common data X = np.where((~np.isnan(X) & (~np.isnan(Y))), X, np.nan) Y = np.where((~np.isnan(X) & (~np.isnan(Y))), Y, np.nan) # cross-rank Y Y_rk = bn.nanrankdata(Y, axis=1) Y_rk /= bn.nanmax(Y_rk, axis=1)[:, np.newaxis] # ls LS = np.nanmean(np.where(X > top, Y, np.nan), axis=1) \ - np.nanmean(np.where(X < bottom, Y, np.nan), axis=1) # Loop for ii in range(N): IC[ii] = np.corrcoef(X[ii][~np.isnan(X[ii])], Y_rk[ii][~np.isnan(Y_rk[ii])])[0, 1] t1 = time.time() print("total time used for IC and LS matrix calculation is:", (t1 - t0)) return IC, LS
def init(self): self.windows = self.param['window'] self.cols = self.param['col'] self.types = self.param['type'] self.translation_cols = self.param.get('translation') self.scale_cols = self.param.get('scale') self.move_window_mapping = { "mean": lambda c, s, t, w: bn.move_mean(c, w) * s + t, "std": lambda c, s, t, w: bn.move_std(c, w) * s, "var": lambda c, s, t, w: bn.move_var(c, w) * s * s, "min": lambda c, s, t, w: bn.move_min(c, w) * s + t, "max": lambda c, s, t, w: bn.move_max(c, w) * s + t, "rank": lambda c, s, t, w: bn.move_rank(c, w), "sum": lambda c, s, t, w: bn.move_sum(c, w) * s + t * w, "ema": lambda c, s, t, w: F. ema(c, 2.0 / (w + 1), start_indices=self.base.start_indices) * s + t, "rsi": lambda c, s, t, w: F.rsi( c, w, start_indices=self.base.start_indices), "psy": lambda c, s, t, w: F.psy( c, w, start_indices=self.base.start_indices), "bias": lambda c, s, t, w: F.bias( c, w, start_indices=self.base.start_indices) }
def Ts_rank(A, n): if n < 2: #print ("计算周期的排序,n不得小于2,返回输入") return A result = bk.move_rank(A, n, axis=0, min_count=1) result[np.isnan(A)] = np.nan result = result + 2 # 不希望值中出现0 return result
def ts_rank(data,period): result = bk.move_rank(data,window=period, min_count=1,axis = 0) return (result+1)/2
def main(): lines = getLines() hdr = getHdr(lines) # array if sys.argv[1] == 'hdrs': hdrList = '\n'.join(hdr) print(hdrList) sys.exit(0) colRefs = getColRefs(hdr) #print('colRefs: {}'.format(colRefs)) windowPeriod = int(sys.argv[1]) workingColumns = [] for column in sys.argv[2:]: workingColumns.append(column) validateWorkingColumns(hdr, workingColumns) #print('working columns: {}'.format(' - '.join(workingColumns))) #print('first line: {}'.format(lines[0])) # get the position in the data array for each column to check colNums = [colRefs[i] for i in workingColumns] #print('colNums: {}'.format(colNums)) #sys.exit(0) # this is a dict of lists, where each list is all values for the column #colValues = getDataSet(colRefs[workingColumns[0]],lines) colValues = getDataSet(colRefs, workingColumns, lines) #print('{}'.format(colValues)) ratesRising = False for colName in workingColumns: colNum = colRefs[colName] ma = bn.move_rank(colValues[colNum], window=windowPeriod) #print('len(ma): {}'.format(len(ma))) #for i in range(0,len(ma)): #if math.isnan(ma[i]): #continue #if i%72 == 0: #print('{:0.2f}'.format(ma[i])) # skip the first windowPeriod of values, as they are 'nan' avgFirstPeriod = average(ma[windowPeriod:windowPeriod * 2]) midStart = int(len(ma) - (windowPeriod / 2)) avgMiddlePeriod = average(ma[midStart:midStart + windowPeriod]) avgLastPeriod = average(ma[len(ma) - windowPeriod:]) if (avgMiddlePeriod > avgFirstPeriod > 0) and (avgLastPeriod > avgMiddlePeriod > 0): #if True: ratesRising = True #print('{} is rising: {} {} {}'.format(colName,avgFirstPeriod,avgMiddlePeriod,avgLastPeriod)) print('{} is rising'.format(colName)) print(' avgFirstPeriod: {:0.6f}'.format(avgFirstPeriod)) print('avgMiddlePeriod: {:0.6f}'.format(avgMiddlePeriod)) print(' avgLastPeriod: {:0.6f}'.format(avgLastPeriod)) print(' %increased: {:5.0f}'.format( ((avgLastPeriod / avgFirstPeriod) - 1) * 100)) #print('{}'.format(ma)) if ratesRising: sys.exit(1)
def time_move_rank(self, dtype, shape, order, axis, window): bn.move_rank(self.arr, window, axis=axis)
def time_move_rank(self, dtype, shape, window): bn.move_rank(self.arr, window)