def alpha34(df): """ Alpha#34 rank(((1 - rank((stddev(returns, 2) / stddev(returns, 5)))) + (1 - rank(delta(close, 1))))) """ return u.rank(((1 - u.rank((u.stddev(df.returns, 2) / u.stddev(df.returns, 5)))) \ + (1 - u.rank(u.delta(df.close, 1)))))
def alpha38(df): """ Alpha#38 ((-1 * rank(Ts_Rank(close, 10))) * rank((close / open))) """ return ((-1 * u.rank(u.ts_rank(df.close, 10))) * u.rank( (df.close / df.open)))
def alpha50(df): """ Alpha#50 (-1 * ts_max(rank(correlation(rank(volume), rank(vwap), 5)), 5)) """ return (-1 * u.ts_max(u.rank(u.corr(u.rank(df.volume), u.rank(df.vwap), 5)), 5))
def alpha5(df): """ Alpha#5 (rank((open - (sum(vwap, 10) / 10))) * (-1 * abs(rank((close - vwap))))) """ return (u.rank((df.open - (u.ts_sum(df.vwap, 10) / 10))) * (-1 * abs(u.rank((df.close - df.vwap)))))
def alpha37(df): """ Alpha#37 (rank(correlation(delay((open - close), 1), close, 200)) + rank((open - close))) """ return (u.rank(u.corr(u.delay( (df.open - df.close), 1), df.close, 200)) + u.rank( (df.open - df.close)))
def alpha2(df): """ Alpha#2 (-1 * correlation(rank(delta(log(volume), 2)), rank(((close - open) / open)), 6)) """ tmp_1 = u.rank(u.delta(np.log(df.volume), 2)) tmp_2 = u.rank(((df.close - df.open) / df.open)) return (-1 * u.corr(tmp_1, tmp_2, 6))
def alpha27(df): """ Alpha#27 ((0.5 < rank((sum(correlation(rank(volume), rank(vwap), 6), 2) / 2.0))) ? (-1 * 1) : 1) """ temp = np.where((0.5 < u.rank( (u.ts_sum(u.corr(u.rank(df.volume), u.rank(df.vwap), 6), 2) / 2.0))), (-1 * 1), 1) return pd.Series(temp, index=df.index)
def alpha55(df): """ Alpha#55 (-1 * correlation(rank(((close - ts_min(low, 12)) / (ts_max(high, 12) - ts_min(low, 12)))), rank(volume), 6)) """ temp1 = (df.close - u.ts_min(df.low, 12)) temp2 = (u.ts_max(df.high, 12) - u.ts_min(df.low, 12)) return (-1 * u.corr(u.rank((temp1 / temp2)), u.rank(df.volume), 6))
def alpha61(df): """ Alpha#61 (rank((vwap - ts_min(vwap, 16.1219))) < rank(correlation(vwap, adv180, 17.9282))) Rounded the days to int since partial lookback """ return (u.rank((df.vwap - u.ts_min(df.vwap, 16))) < u.rank( u.corr(df.vwap, u.adv(df, 180), 18)))
def alpha75(df): """ Alpha#75(df) (rank(correlation(vwap, volume, 4.24304)) < rank(correlation(rank(low), rank(adv50), 12.4413))) """ temp1 = u.rank(u.corr(df.vwap, df.volume, 4)) temp2 = u.rank(u.corr(u.rank(df.low), u.rank(u.adv(df, 50)), 12)) return (temp1 < temp2)
def alpha39(df): """ Alpha#39 """ temp = (-1 * u.rank( (u.delta(df.close, 7) * (1 - u.rank(u.decay_linear((df.volume / u.adv(df, 20)), 9)))))) return (temp * (1 + u.rank(u.ts_sum(df.returns, 250))))
def alpha20(df): """ Alpha#20 (((-1 * rank((open - delay(high, 1)))) * rank((open - delay(close, 1)))) * rank((open - delay(low, 1)))) """ temp1 = (-1 * u.rank((df.open - u.delay(df.high, 1)))) temp2 = u.rank((df.open - u.delay(df.close, 1))) temp3 = u.rank((df.open - u.delay(df.low, 1))) return (temp1 * temp2 * temp3)
def alpha17(df): """ Alpha#17 (((-1 * rank(ts_rank(close, 10))) * rank(delta(delta(close, 1), 1))) * rank(ts_rank((volume / adv20), 5))) """ temp1 = (-1 * u.rank(u.ts_rank(df.close, 10))) temp2 = u.rank(u.delta(u.delta(df.close, 1), 1)) temp3 = u.rank(u.ts_rank((df.volume / u.adv(df, 20)), 5)) return ((temp1 * temp2) * temp3)
def alpha11(df): """ Alpha#11 ((rank(ts_max((vwap - close), 3)) + rank(ts_min((vwap - close), 3))) * rank(delta(volume, 3))) """ temp1 = u.rank(u.ts_max((df.vwap - df.close), 3)) temp2 = u.rank(u.ts_min((df.vwap - df.close), 3)) temp3 = u.rank(u.delta(df.volume, 3)) return temp1 + (temp2 * temp3)
def get_sloc_hour_latlon_sub_rank(result): result = rank(result, ['geohashed_start_loc', 'hour'], 'eloc_sloc_lat_sub', rank_name='sloc_hour_lat_sub_rank', ascending=False) result = rank(result, ['geohashed_start_loc', 'hour'], 'eloc_sloc_lon_sub', rank_name='sloc_hour_lon_sub_rank', ascending=False) return result
def alpha65(df): """ Alpha#65 ((rank(correlation(((open * 0.00817205) + (vwap * (1 - 0.00817205))), sum(adv60, 8.6911), 6.40374)) < rank((open - ts_min(open, 13.635)))) * -1) """ temp1 = (df.open * 0.00817205) + (df.vwap * (1 - 0.00817205)) temp2 = u.rank((df.open - u.ts_min(df.open, 14))) return ((u.rank(u.corr(temp1, u.ts_sum(u.adv(df, 60), 9), 6)) < temp2) * -1)
def alpha78(df): """ Alpha#78 (rank(correlation(sum(((low * 0.352233) + (vwap * (1 - 0.352233))), 19.7428), sum(adv40, 19.7428), 6.83313))^rank(correlation(rank(vwap), rank(volume), 5.77492))) """ temp1 = u.ts_sum(((df.low * 0.352233) + (df.vwap * (1 - 0.352233))), 20) temp2 = u.rank(u.corr(u.rank(df.vwap), u.rank(df.volume), 6)) temp3 = u.rank(u.corr(temp1, u.ts_sum(u.adv(df, 40), 20), 7)) return (temp3**temp2)
def alpha45(df): """ Alpha#45 (-1 * ((rank((sum(delay(close, 5), 20) / 20)) * correlation(close, volume, 2)) * rank(correlation(sum(close, 5), sum(close, 20), 2)))) """ temp1 = u.rank((u.ts_sum(u.delay(df.close, 5), 20) / 20)) temp2 = u.corr(df.close, df.volume, 2) temp3 = u.rank(u.corr(u.ts_sum(df.close, 5), u.ts_sum(df.close, 20), 2)) return (-1 * ((temp1 * temp2) * temp3))
def alpha47(df): """ Alpha#47 ((((rank((1 / close)) * volume) / adv20) * ((high * rank((high - close))) / (sum(high, 5) / 5))) - rank((vwap - delay(vwap, 5)))) """ temp1 = ((u.rank((1 / df.close)) * df.volume) / u.adv(df, 20)) temp2 = ((df.high * u.rank( (df.high - df.close))) / (u.ts_sum(df.high, 5) / 5)) return ((temp1 * temp2) - u.rank((df.vwap - u.delay(df.vwap, 5))))
def alpha95(df): """ Alpha#95 (rank((open - ts_min(open, 12.4105))) < Ts_Rank((rank(correlation(sum(((high + low) / 2), 19.1351), sum(adv40, 19.1351), 12.8742))^5), 11.7584)) """ temp1 = u.rank((df.open - u.ts_min(df.open, 12))) temp2 = u.corr(u.ts_sum(((df.high + df.low) / 2), 19), u.ts_sum(u.adv(df, 40), 19), 13) return (temp1 < u.ts_rank((u.rank(temp2)**5), 12))
def get_user_eloc_hour_latlon_sub_rank(result): result = rank(result, ['userid', 'geohashed_end_loc', 'hour'], 'eloc_sloc_lat_sub', rank_name='user_eloc_hour_lat_sub_rank', ascending=False) result = rank(result, ['userid', 'geohashed_end_loc', 'hour'], 'eloc_sloc_lon_sub', rank_name='user_eloc_hour_lon_sub_rank', ascending=False) return result
def alpha68(df): """ Alpha#68 ((Ts_Rank(correlation(rank(high), rank(adv15), 8.91644), 13.9333) < rank(delta(((close * 0.518371) + (low * (1 - 0.518371))), 1.06157))) * -1) """ temp1 = u.ts_rank(u.corr(u.rank(df.high), u.rank(u.adv(df, 15)), 9), 14) temp2 = u.rank( u.delta(((df.close * 0.518371) + (df.low * (1 - 0.518371))), 1)) return u.rank( u.delta(((df.close * 0.518371) + (df.low * (1 - 0.518371))), 1))
def alpha60(df): """ Alpha#60 (0 - (1 * ((2 * scale(rank(((((close - low) - (high - close)) / (high - low)) * volume)))) - scale(rank(ts_argmax(close, 10)))))) """ temp1 = u.scale( u.rank(((((df.close - df.low) - (df.high - df.close)) / (df.high - df.low)) * df.volume))) return (0 - (1 * ((2 * temp1) - u.scale(u.rank(u.ts_argmax(df.close, 10))))))
def alpha74(df): """ Alpha#74 ((rank(correlation(close, sum(adv30, 37.4843), 15.1365)) < rank(correlation(rank(((high * 0.0261661) + (vwap * (1 - 0.0261661)))), rank(volume), 11.4791))) * -1) """ temp1 = u.rank(u.corr(df.close, u.ts_sum(u.adv(df, 30), 37), 15)) temp2 = u.rank( u.corr(u.rank(((df.high * 0.0261661) + (df.vwap * (1 - 0.0261661)))), u.rank(df.volume), 11)) return ((temp1 < temp2) * -1)
def alpha99(df): """ Alpha#99 ((rank(correlation(sum(((high + low) / 2), 19.8975), sum(adv60, 19.8975), 8.8136)) < rank(correlation(low, volume, 6.28259))) * -1) """ temp1 = u.rank( u.corr(u.ts_sum(((df.high + df.low) / 2), 20), u.ts_sum(u.adv(df, 60), 20), 9)) temp2 = u.rank(u.corr(df.low, df.volume, 6)) return pd.Series(np.where(temp1 < temp2, temp1 * -1, temp2 * -1), index=df.index)
def alpha83(df): """ Alpha#83 ((rank(delay(((high - low) / (sum(close, 5) / 5)), 2)) * rank(rank(volume))) / (((high - low) / (sum(close, 5) / 5)) / (vwap - close))) """ temp1 = u.rank( u.delay(((df.high - df.low) / (u.ts_sum(df.close, 5) / 5)), 2)) * u.rank(u.rank(df.volume)) temp2 = (((df.high - df.low) / (u.ts_sum(df.close, 5) / 5)) / (df.vwap - df.close)) return (temp1 / temp2)
def get_user_latlon_sub_rank(result): result = rank(result, 'userid', 'eloc_sloc_lat_sub', rank_name='user_lat_sub_rank', ascending=False) result = rank(result, 'userid', 'eloc_sloc_lon_sub', rank_name='user_lon_sub_rank', ascending=False) return result
def get_eloc_latlon_sub_rank(result): result = rank(result, 'geohashed_end_loc', 'eloc_sloc_lat_sub', rank_name='eloc_lat_sub_rank', ascending=False) result = rank(result, 'geohashed_end_loc', 'eloc_sloc_lon_sub', rank_name='eloc_lon_sub_rank', ascending=False) return result
def alpha64(df): """ Alpha#64 ((rank(correlation(sum(((open * 0.178404) + (low * (1 - 0.178404))), 12.7054), sum(adv120, 12.7054), 16.6208)) < rank(delta(((((high + low) / 2) * 0.178404) + (vwap * (1 -0.178404))), 3.69741))) * -1) """ temp1 = u.ts_sum(((df.open * 0.178404) + (df.low * (1 - 0.178404))), 13) temp2 = u.rank(u.corr(temp1, u.ts_sum(u.adv(df, 120), 18), 17)) temp3 = u.rank( u.delta(((((df.high + df.low) / 2) * 0.178404) + (df.vwap * (1 - 0.178404))), 4)) return ((temp2 < temp3) * -1)
def alpha98(df): """ Alpha#98 (rank(decay_linear(correlation(vwap, sum(adv5, 26.4719), 4.58418), 7.18088)) - rank(decay_linear(Ts_Rank(Ts_ArgMin(correlation(rank(open), rank(adv15), 20.8187), 8.62571), 6.95668), 8.07206))) """ temp1 = u.ts_rank( u.ts_argmin(u.corr(u.rank(df.open), u.rank(u.adv(df, 15)), 21), 9), 7) temp2 = u.rank(u.decay_linear(temp1, 8.07206)) temp3 = u.rank( u.decay_linear(u.corr(df.vwap, u.ts_sum(u.adv(df, 5), 26), 5), 7)) return (temp3 - temp2)
def testRank(self): scores = [2.1, 2.9, 2.3, 2.3, 5.5] self.assertIn(utils.rank(scores, ties="random"), [[0, 3, 1, 2, 4], [0, 3, 2, 1, 4]]) self.assertIn(utils.rank(scores, reverse=True, ties="random"), [[4, 1, 3, 2, 0], [4, 1, 2, 3, 0]]) self.assertEqual(utils.rank(scores, reverse=True, ties="first"), [4, 1, 2, 3, 0]) self.assertEqual(utils.rank(scores, reverse=True, ties="last"), [4, 1, 3, 2, 0]) scores = [2.1, 2.9, 2.3, 2.3, 5.5, 2.9] self.assertIn(utils.rank(scores, ties="random"), [[0, 4, 2, 1, 5, 3], [0, 3, 2, 1, 5, 4], [0, 4, 1, 2, 5, 3], [0, 3, 1, 2, 5, 4]]) self.assertIn(utils.rank(scores, reverse=True, ties="random"), [[5, 1, 3, 4, 0, 2], [5, 2, 3, 4, 0, 1], [5, 1, 4, 3, 0, 2], [5, 2, 4, 3, 0, 1]]) self.assertEqual(utils.rank(scores, reverse=True, ties="first"), [5, 1, 3, 4, 0, 2]) self.assertEqual(utils.rank(scores, reverse=True, ties="last"), [5, 2, 4, 3, 0, 1])
def init_ranking(self, query): self.qid = query.get_qid() scores = self.ranking_model.score(query.get_feature_vectors(), self.w.transpose()) # rank scores ranks = rank(scores, ties=self.ties, reverse=False) # get docids for the ranked scores ranked_docids = [] for pos, docid in enumerate(query.__docids__): ranked_docids.append((ranks[pos], docid)) # sort docids by rank ranked_docids.sort(reverse=True) self.docids = [docid for (_, docid) in ranked_docids] # break ties randomly and sort ranks to compute probabilities ranks = np.asarray([i + 1.0 for i in sorted(rank(scores, ties=self.ties, reverse=False))]) # determine probabilities based on (reverse) document ranks max_rank = len(ranks) tmp_val = max_rank / pow(ranks, self.ranker_type) self.probs = tmp_val / sum(tmp_val)
def init_ranking(self, query): self.qid = query.get_qid() scores = self.ranking_model.score(query.get_feature_vectors(), self.w.transpose()) ranks = rank(scores, reverse=False, ties=self.ties) # sort documents by ranks, ties are broken at random by default ranked_docids = [] for pos, docid in enumerate(query.get_docids()): ranked_docids.append((ranks[pos], docid)) ranked_docids.sort(reverse=True) self.docids = [docid for (_, docid) in ranked_docids]
def degree_rank(G, C, q, arguments): """ Finds the communities of A by the degree-rank method. """ consider = rank(G.k) not_seen = set(xrange(G.n)) while True: new_q, moved = degree_rank_inner(G, C, consider, not_seen, q, arguments) not_seen = set(xrange(G.n)) if new_q - q <= arguments.tsh: break q = new_q return new_q
def get_user_sloc_distance_rank(result): result = rank(result, ['userid', 'geohashed_start_loc'], 'distance', rank_name='user_sloc_distance_rank', ascending=False) return result
def get_sloc_eloc_hour_rank(result): result = rank(result, ['geohashed_start_loc', 'geohashed_end_loc'], 'hour', rank_name='sloc_eloc_hour_rank', ascending=False) return result
def get_eloc_hour_rank(result): result = rank(result, 'geohashed_end_loc', 'hour', rank_name='eloc_hour_rank', ascending=False) return result
def get_sloc_hour_distance_rank(result): result = rank(result, ['geohashed_start_loc', 'hour'], 'distance', rank_name='sloc_hour_distance_rank', ascending=False) return result
def get_eloc_distance_rank(result): result = rank(result, 'geohashed_end_loc', 'distance', rank_name='eloc_distance_rank', ascending=False) return result
def get_user_eloc_hour_rank(result): result = rank(result, ['userid', 'geohashed_end_loc'], 'hour', rank_name='user_eloc_hour_rank', ascending=False) return result
def get_user_hour_rank(result): result = rank(result, 'userid', 'hour', rank_name='user_hour_rank', ascending=False) return result