def create_ud_cont_2cls(stock, meta): def check_cont_status(stock_ud, index): count = [0, 0] for i in range(index - 1, -1, -1): if stock_ud[i] == stock_ud[index] == 0: count[0] += 1 elif stock_ud[i] == stock_ud[index] == 1 or stock_ud[ i] == stock_ud[index] == 2: count[1] += 1 else: break return count fe = f_extr.feature_extractor(meta, stock) f_ud, _ = getattr(fe, 'ud')() f_ud = np.argmax(f_ud, axis=1) stock_ud = [] for i in range(len(stock)): stock_ud.append(check_cont_status(f_ud, i)) stock_ud = np.vstack(stock_ud) stock = np.concatenate((stock_ud, stock), axis=1) meta_up = ['contdown', 'contup'] meta_ud = meta_up + meta return stock, meta_ud
def create_ud_info(stock, meta, backDay=1): fe = f_extr.feature_extractor(meta, stock) f_ud, _ = getattr(fe, 'ud')() stock_ud = np.zeros((len(stock), backDay * 3)) f_ud = np.argmax(f_ud, axis=1) for i in range(backDay, len(stock)): for j in range(backDay): if f_ud[i] == f_ud[i - j - 1] == 0: stock_ud[i][j * 3] = 1 elif f_ud[i] == f_ud[i - j - 1] == 1: stock_ud[i][j * 3 + 1] = 1 elif f_ud[i] == f_ud[i - j - 1] == 2: stock_ud[i][j * 3 + 2] = 1 stock_ud = stock_ud[backDay:] stock = stock[backDay:] stock = np.concatenate((stock_ud, stock), axis=1) meta_temp = ['downLag_', 'fairLag_', 'upLag_'] meta_label = [ meta_l + str(bd) for bd in range(backDay) for meta_l in meta_temp ] meta_ud = meta_label + meta return stock, meta_ud
def get_data_from_dow_friday(raw, stocks, meta, predict_day, feature_list=['ratio'], isShift=True): stocks = clean_stock(stocks, meta, feature_list) df = pd.DataFrame({'date': raw.columns}) df['date'] = pd.to_datetime(df['date']) df['dow'] = df['date'].dt.dayofweek dow_array = np.array(df['dow'][-len(stocks):]) #print('*****************************') #print(np.array(df['date'][-len(stocks):])[-1]) dow_array_mask_mon = np.equal(dow_array, 4) def get_mask(dow_array_mask_mon): for i in range(5): dow_array_mask_mon[i] = False dow_array_mask = [dow_array_mask_mon] for j in range(1, 5): tmp_mask = np.zeros(np.shape(dow_array_mask_mon), np.bool) for i in range(1, len(dow_array_mask_mon)): if dow_array_mask_mon[i] == True: tmp_mask[i - j] = True else: tmp_mask[i] = False dow_array_mask.append(tmp_mask) return dow_array_mask dow_array_mask = get_mask(dow_array_mask_mon) dow = {0: 'mon', 1: 'tue', 2: 'wed', 3: 'thu', 4: 'fri'} features = {} for d in range(5): features[dow[d]] = {} shifted_stock = stocks[dow_array_mask[d]] if isShift == True: shifted_stock = shifted_stock[:-1] fe = fe_extr.feature_extractor(meta, shifted_stock) for feature_name in feature_list: features[dow[d]][feature_name], _ = getattr(fe, feature_name)() if isShift == True: label = np.argmax(stocks[dow_array_mask[predict_day]][1:, -3:], axis=-1) else: label = np.argmax(stocks[dow_array_mask[predict_day]][:, -3:], axis=-1) return features, label
def get_data_from_normal_weekly_train(stocks, meta, consider_lagday, feature_list=['ratio'], isShift=True): idx = len(stocks) label = {1: [], 2: [], 3: [], 4: [], 5: []} data = {1: [], 2: [], 3: [], 4: [], 5: []} while idx > 5: for i in range(1, 6): label[6 - i].append(np.argmax(stocks[idx - i, -3:], axis=-1)) if isShift: idx = idx - 5 for i in range(1, 6): data[6 - i].append(stocks[idx - i]) #print(idx-i, ' ',stocks[idx-i][92]) if not isShift: idx = idx - 5 features = {} for d in data.keys(): features[d] = {} data[d] = np.stack(data[d], axis=0) fe = fe_extr.feature_extractor(meta, data[d]) for feature_name in feature_list: features[d][feature_name], _ = getattr(fe, feature_name)() feature_concat = [] for i in range(5, 5 - consider_lagday, -1): for k in features[i]: feature_concat.append(features[i][k]) data_feature = np.concatenate(feature_concat, axis=1) data = data_feature weekly_label = [] for i in range(1, 6): weekly_label.append([ut.map_ud(_label) for _label in label[i]]) weekly_label = np.sum(weekly_label, axis=0) for i in range(len(weekly_label)): if weekly_label[i] > 0 or weekly_label[i] == 0: weekly_label[i] = 1 else: weekly_label[i] = 0 return data, weekly_label, label
def create_velocity(stock, meta): fe = f_extr.feature_extractor(meta, stock) f_velocity, _ = getattr(fe, 'ratio')() velocity = f_velocity[1:] - f_velocity[:-1] stock = stock[1:] stock = np.concatenate((velocity, stock), axis=1) meta_v = [ 'velocity_1', 'velocity_v2', 'velocity_v3', 'velocity_v4', 'velocity_v5' ] + meta return stock, meta_v
def get_data_from_normal(stocks, meta, predict_day, feature_list=['ratio'], isShift=True): stocks = clean_stock(stocks, meta, feature_list) current_mask = np.ones(len(stocks), np.bool) def get_mask(current_mask): for i in range(5): current_mask[i] = False shift_array_mask = [current_mask] for j in range(1, 5): tmp_mask = np.zeros(np.shape(current_mask), np.bool) for i in range(1, len(current_mask)): if current_mask[i] == True: tmp_mask[i - j] = True else: tmp_mask[i] = False shift_array_mask.append(tmp_mask) return shift_array_mask mask = get_mask(current_mask) features = {} for d in range(5): features[d] = {} shifted_stock = stocks[mask[d]] if isShift == True: shifted_stock = shifted_stock[:-predict_day] fe = fe_extr.feature_extractor(meta, shifted_stock) for feature_name in feature_list: features[d][feature_name], _ = getattr(fe, feature_name)() if isShift == True: label = np.argmax(stocks[mask[0]][predict_day:, -3:], axis=-1) else: label = np.argmax(stocks[mask[0]][:, -3:], axis=-1) return features, label
def get_data_from_normal_v2_train(stocks, meta, predict_day, consider_lagday, feature_list=['ratio'], isShift=True): idx = len(stocks) label = {1: [], 2: [], 3: [], 4: [], 5: []} data = {1: [], 2: [], 3: [], 4: [], 5: []} while idx > 5: for i in range(1, 6): label[6 - i].append(np.argmax(stocks[idx - i, -3:], axis=-1)) if isShift: idx = idx - 5 for i in range(1, 6): data[6 - i].append(stocks[idx - i]) #print(idx-i, ' ',stocks[idx-i][92]) if not isShift: idx = idx - 5 features = {} for d in data.keys(): features[d] = {} data[d] = np.stack(data[d], axis=0) fe = fe_extr.feature_extractor(meta, data[d]) for feature_name in feature_list: features[d][feature_name], _ = getattr(fe, feature_name)() feature_concat = [] for i in range(5, 5 - consider_lagday, -1): for k in features[i]: feature_concat.append(features[i][k]) data_feature = np.concatenate(feature_concat, axis=1) data = data_feature label = label[predict_day] return data, label
def clean_stock(single_stock, meta, feature_list): tmpStock = [] fe = fe_extr.feature_extractor(meta, single_stock) feature_mask = [] _, tmp_mask = getattr(fe, 'ratio')() feature_mask += tmp_mask for f in feature_list: _, tmp_mask = getattr(fe, f)() feature_mask += tmp_mask for i in range(len(single_stock)): if not np.isnan(single_stock[i, list(set(feature_mask))]).any(): tmpStock.append(single_stock[i]) single_stock = np.array(tmpStock) return single_stock
tmp_mask[i] = False dow_array_mask.append(tmp_mask) return dow_array_mask dow_array_mask = get_mask(dow_array_mask_mon) dow = {0: 'mon', 1: 'tue', 2: 'wed', 3: 'thu', 4: 'fri'} features = {} isShift = True for d in range(5): features[dow[d]] = {} shifted_stock = stocks[dow_array_mask[d]] if isShift == True: shifted_stock = shifted_stock[:-1] fe = f_extr.feature_extractor(meta_v, shifted_stock) for feature_name in feature_list: features[dow[d]][feature_name], _ = getattr(fe, feature_name)() feature_concat = [] dow = {0: 'mon', 1: 'tue', 2: 'wed', 3: 'thu', 4: 'fri'} for i in range(5): for k in features[dow[i]]: feature_concat.append(features[dow[i]][k]) train = np.concatenate(feature_concat, axis=1) label = np.argmax(stocks[dow_array_mask[predict_day]][1:, -3:], axis=-1) #***********Test**************** period_test = ('20180414', '20180610')