class MyAI(threading.Thread): def __init__(self, day_index, volume_type): threading.Thread.__init__(self) self.k = 1.6 self.big_deal_threshold = 0.15 self.turnoverratio = 4 self.prev_ku = 0 self.prev_kd = 0 self.day_index = day_index self.volume_type = volume_type # small mid big # add turnoverratio from index self.weight = {} self.symbol = {} self.redis = RedisOperator("localhost", 6379, 0) def processOneCode(self): try: self.weight[self.code] = 0 for i in range(5): table = "summary_" + str( self.day_index) + "_amount_" + str(4 - i) data = self.redis.hget(table, self.code) if data is None: continue data = json.loads(data) #print type(data) self.weight[self.code] += self.analyzeKuKd(data, 5 - i) table = "summary_" + str(self.day_index) + "_amount_0" s = self.redis.hget(table, self.code) if data is not None: data = json.loads(s) self.analyzeBigDealPer(data) except Exception, e: print "processOneCode error %s \n" % (str(e))
class Trainning(object): def __init__(self, day_index, volume_type): self.day_index = day_index self.volume_type = volume_type # get redis connection self.redis = RedisOperator('localhost', 6379, 0) # sh small rank self.sh_small = {} # code <-> changepercent self.index_dict = "index_" + str(self.day_index) + "_dict" def getTrainningCode(self): for k in code_vol_map['sh'][self.volume_type].keys(): s = self.redis.hget(self.index_dict, k) if s is None: continue data = json.loads(s) self.sh_small[k] = float(data['changepercent']) self.ranked = sorted(self.sh_small.items(), key=operator.itemgetter(1), reverse=True) #print ranked def writeToRedis(self): table = "trainning_" + self.volume_type for k in self.ranked: code = k[0] s = self.redis.hget(self.index_dict, code) self.redis.rpush(table, s)
def __init__(self, day_index, volume_type): self.day_index = day_index self.volume_type = volume_type # get redis connection self.redis = RedisOperator('localhost', 6379, 0) # sh small rank self.sh_small = {} # code <-> changepercent self.index_dict = "index_" + str(self.day_index) + "_dict"
def __init__(self, day_index, volume_type): threading.Thread.__init__(self) self.k = 1.6 self.big_deal_threshold = 0.15 self.turnoverratio = 4 self.prev_ku = 0 self.prev_kd = 0 self.day_index = day_index self.volume_type = volume_type # small mid big # add turnoverratio from index self.weight = {} self.symbol = {} self.redis = RedisOperator("localhost", 6379, 0)
def __init__(self, day_index): threading.Thread.__init__(self) self.k = 1.6 self.turnoverratio = 4 self.prev_ku = 0 self.prev_kd = 0 self.day_index = day_index # add turnoverratio from index self.weight = {} self.symbol = {} self.redis = RedisOperator("localhost", 6379, 0) # day index used to predict stocks trending self.index_dict = "index_" + str(self.day_index + 1) + "_dict" self.start()
class MyAI(threading.Thread): def __init__(self, day_index): threading.Thread.__init__(self) self.k = 1.6 self.turnoverratio = 4 self.prev_ku = 0 self.prev_kd = 0 self.day_index = day_index # add turnoverratio from index self.weight = {} self.symbol = {} self.redis = RedisOperator("localhost", 6379, 0) # day index used to predict stocks trending self.index_dict = "index_" + str(self.day_index + 1) + "_dict" self.start() def prepareInfo(self): hkeys = self.redis.hkeys(self.index_dict) #print type(hkeys) for k in hkeys: self.symbol[k] = {} s = self.redis.hget(self.index_dict, k) data = json.loads(s) self.symbol[k]['changepercent'] = float(data['changepercent']) self.symbol[k]['turnoverratio'] = float(data['turnoverratio']) self.symbol[k]['volume'] = int(data['volume']) #print self.symbol["sh600074"] # real time turnoverratio should be get from summary['stockvol'] def RealTimePrepareInfo(self): pass def processOneCode(self): try: self.weight[self.code] = 0 for i in range(5): table = "summary_" + str( self.day_index) + "_amount_" + str(4 - i) data = self.redis.hget(table, self.code) if data is None: continue data = json.loads(data) #print type(data) self.weight[self.code] += self.judgeData(data, 5 - i) except Exception, e: print "processOneCode error %s \n" % (str(e))
def __init__(self, data_type, api_type, level, date_string, day_index, db=0): threading.Thread.__init__(self) self.data_type = data_type # used for params, bill or index self.api_type = api_type self.level = level self.date_string = date_string self.day_index = day_index self.db = db self.rtda = RTDA(date_string) self.redis = RedisOperator("localhost", 6379, self.db) # add table index to redis self.addTableIndex() self.start()
def __init__(self, day_index, vol_type): self.day_index = day_index self.vol_type = vol_type # small, mid, big self.big_deal_type = 0 # 0 - 4 self.factors = ['ku'] #factors are all from summary table self.summary_table = "summary_" + str( self.day_index) + "_amount_" + str(self.big_deal_type) self.index_table = "index_" + str(self.day_index + 1) + "_dict" self.test_summary_table = "summary_" + str(self.day_index + 1) + "_amount_" + str( self.big_deal_type) self.validate_index_table = "index_" + str(self.day_index + 2) + "_dict" self.x_keys = ['kuvolume', 'kdvolume', 'totalvolpct', 'changevolpct'] self.y_keys = ['score'] self.keys = ['kuvolume', 'kdvolume', 'totalvolpct'] #changevolpct is derived from both table self.data_source = [ ] # structure: kuvolume, kdvolume, totalvolpct, changevolpct, score self.redis = RedisOperator('localhost', 6379, 0)
class DataCollection(threading.Thread): ''' judge: by default, amount. Currently, volume is not supported data_type: bill, index api_type: bill_list, bill_list_summary, stocks_index level: 0 - 4. ''' def __init__(self, data_type, api_type, level, date_string, day_index, db=0): threading.Thread.__init__(self) self.data_type = data_type # used for params, bill or index self.api_type = api_type self.level = level self.date_string = date_string self.day_index = day_index self.db = db self.rtda = RTDA(date_string) self.redis = RedisOperator("localhost", 6379, self.db) # add table index to redis self.addTableIndex() self.start() def addTableIndex(self): table = redis_conf['name'][self.api_type] self.redis.hset(table, 'day_index', self.day_index) self.redis.hset(table, 'sort_type', 'amount') self.redis.hset(table, 'level', self.level) if self.data_type == "index": self.data_table = table + "_" + str(self.day_index) else: self.data_table = table + "_" + str( self.day_index) + '_amount_' + str(self.level) def processOneCode(self, code): self.code = code self.rtda.setCode(code) if self.api_type == 'bill_list': self.rtda.setParams(self.data_type, amount=BIG_DEAL['amount'][self.level]) self.getBillList() elif self.api_type == 'bill_list_summary': self.rtda.setParams(self.data_type, amount=BIG_DEAL['amount'][self.level]) self.getBillListSummary() elif self.api_type == 'stocks_index': self.rtda.setParams(self.data_type, num=80) self.getStocksIndex() def getStocksIndex(self): for i in range(index_count / 80): self.rtda.setParams('index', page=i + 1) data = self.rtda.getStocksIndex() if data is None: continue for i in range(len(data)): self.redis.rpushJson(self.data_table, data[i]) def getBillListSummary(self): data = self.rtda.getBillListSummary()[0] # return one element array self.redis.hsetJson(self.data_table, self.code, data) print "Write to redis for %s" % (self.data_table) def getBillList(self): pass def run(self): if self.data_type == 'index': self.processOneCode("sh000001") return while True: try: if for_debug: code = test_queue.get(False) else: code = code_queue.get(False) print "I got code %s \n" % (code) self.processOneCode(code) except Queue.Empty: print "All works of DataCollection have been done \n" break except Exception, e: print "DataCollection Error : %s \n" % (str(e))
data = data.replace('\'trade\'', '"trade"') data = data.replace('\'pricechange\'', '"pricechange"') #side affect data = data.replace('\'per\'', '"per"') data = data.replace('\'changepercent\'', '"changepercent"') data = data.replace('\'buy\'', '"buy"') data = data.replace('\'sell\'', '"sell"') data = data.replace('\'settlement\'', '"settlement"') data = data.replace('\'open\'', '"open"') data = data.replace('\'high\'', '"high"') data = data.replace('\'low\'', '"low"') data = data.replace('\'volume\'', '"volume"') data = data.replace('\'amount\'', '"amount"') data = data.replace('\'ticktime\'', '"ticktime"') data = data.replace('\'pb\'', '"pb"') data = data.replace('\'mktcap\'', '"mktcap"') data = data.replace('\'nmc\'', '"nmc"') data = data.replace('\'turnoverratio\'', '"turnoverratio"') return data r = RedisOperator("localhost", 6379, 0) data = r.lindex('index_1', 3) print data data = data.replace(', u\'', ', \'') data = data.replace('{u', '{') data = handleResponseStocksIndex(data) print data print json.loads(data)
class Prediction(object): def __init__(self, day_index, vol_type): self.day_index = day_index self.vol_type = vol_type # small, mid, big self.big_deal_type = 0 # 0 - 4 self.factors = ['ku'] #factors are all from summary table self.summary_table = "summary_" + str( self.day_index) + "_amount_" + str(self.big_deal_type) self.index_table = "index_" + str(self.day_index + 1) + "_dict" self.test_summary_table = "summary_" + str(self.day_index + 1) + "_amount_" + str( self.big_deal_type) self.validate_index_table = "index_" + str(self.day_index + 2) + "_dict" self.x_keys = ['kuvolume', 'kdvolume', 'totalvolpct', 'changevolpct'] self.y_keys = ['score'] self.keys = ['kuvolume', 'kdvolume', 'totalvolpct'] #changevolpct is derived from both table self.data_source = [ ] # structure: kuvolume, kdvolume, totalvolpct, changevolpct, score self.redis = RedisOperator('localhost', 6379, 0) def LDARegression(self): x, y = self.generateTrainningData(self.summary_table, self.index_table, 'LDA') enc = LabelEncoder() self.label_encoder = enc.fit(y.values.ravel()) y = self.label_encoder.transform(y.values.ravel()) self.ldaReg = LinearDiscriminantAnalysis() self.ldaReg.fit(x, y) def testLDARegression(self): x, real_y = self.generateTrainningData(self.test_summary_table, self.validate_index_table, 'LDA') real_y = self.label_encoder.transform(real_y.values.ravel()) predict_y = self.ldaReg.predict(x) print predict_y print real_y plt.figure() plt.plot(range(len(predict_y)), predict_y, 'b-o') plt.plot(range(len(real_y)), real_y, 'r-*') plt.show() def linearRegression(self): x, y = self.generateTrainningData(self.summary_table, self.index_table) self.linreg = LinearRegression() model = self.linreg.fit(x, y) print model # LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False) print self.linreg.intercept_ # 2.87696662232 #print self.linreg.coef_ # [ 0.04656457 0.17915812 0.00345046] r = zip(self.x_keys, self.linreg.coef_[0]) print r def testLinearPrecision(self): x, real_y = self.generateTrainningData(self.test_summary_table, self.validate_index_table) _, previous_y = self.generateTrainningData(self.summary_table, self.index_table) predict_y = self.linreg.predict(x) # RMSE sum_mean = 0 for i in range(len(predict_y)): sum_mean += (predict_y[i] - real_y.values[i])**2 sum_erro = np.sqrt(sum_mean / len(predict_y)) print "RMSE: %s" % (sum_erro) pos = 0 neg = 0 for i in range(len(predict_y)): if float(predict_y[i]) > 0 and float(real_y.values[i][0]) < 0: neg += 1 else: pos += 1 print "Predict precison is %s" % (pos / float(pos + neg)) # plot lines plt.figure() plt.grid(True) plt.plot(range(len(predict_y)), predict_y, 'b-o', label="predict") plt.plot(range(len(real_y)), real_y, 'r-*', label="real") # plt.plot(range(len(previous_y)), previous_y,'ys',label="previous") plt.show() def sigmod(self, raw): r = [] x = [] for i in range(len(raw)): #print raw.values[i][0] f = 1 / (1 + np.e**(-raw.values[i][0])) if f > 0.5: r.append(1) else: r.append(0) #print r return r def logisticRegression(self): train_x, raw_train_y = self.generateTrainningData( self.summary_table, self.index_table) self.logisticReg = LogisticRegression() train_y = self.sigmod(raw_train_y) self.logisticReg.fit(train_x, train_y) def testLogisticRegression(self): test_x, raw_real_y = self.generateTrainningData( self.test_summary_table, self.validate_index_table) real_y = self.sigmod(raw_real_y) predict_y = self.logisticReg.predict(test_x) print type(predict_y) print np.mean(predict_y == real_y) pos = 0 neg = 0 for i in range(len(predict_y)): if int(predict_y[i] == real_y[i]): pos += 1 else: neg += 1 print "Predict precision is %s " % (pos / float(pos + neg)) def generateTrainningData(self, summary_table, index_table, change_type='normal'): data_source = [] for k in code_vol_map['sh'][self.vol_type].keys(): s = self.redis.hget(summary_table, k) rs = self.redis.hget(index_table, k) if s is None or rs is None: continue data = json.loads(s) e = {} for t in self.keys: e[t] = data[t] # cal change vol percent stockvol = float(data['stockvol']) e['changevolpct'] = stockvol / code_vol_map['sh'][ self.vol_type][k] * 100 data = json.loads(rs) cp = float(data['changepercent']) score = 0 if change_type == 'normal': score = self.transChange2Score(cp) elif change_type == 'LDA': score = self.transChange2Catalog(cp) e['score'] = score data_source.append(e) # trans cp to score #trainning data x df = pd.DataFrame.from_records(data_source) x = df[self.x_keys] y = df[self.y_keys] ''' print "X:" print type(x) print x.shape print x.head() print "Y:" print type(y) print y.shape print y.head() ''' return x, y def transChange2Score(self, cp): score = 0 if cp >= 10.0: score = 10.0 elif cp >= 7.0: score = 9.0 elif cp >= 5.0: score = 7.0 elif cp >= 2.0: score = 5.0 elif cp >= 1.0: score = 2.0 elif cp > 0: score = 0 elif cp > -2.0: score = -2.0 elif cp > -5.0: score = -5.0 else: score = -10.0 return score def transChange2Catalog(self, cp): score = 0 if cp > 5.0: score = 5 elif cp > 0: score = 3 else: score = 0 return score
# -*- coding: utf8 -*- import os import json from utils import * from RedisOperator import RedisOperator r = RedisOperator("localhost", 6379, 0) def changeIndexToDict(day_index): list_table = "index_" + str(day_index) hash_table = "index_" + str(day_index) + "_dict" l = r.llen(list_table) for i in range(l): s = r.lindex(list_table, i) data = json.loads(s) r.hset(hash_table, data['symbol'], s) def prepareInfo(): symbol = {} hkeys = r.hkeys('index_2_dict') print type(hkeys) for k in hkeys: symbol[k] = {} s = r.hget('index_2_dict', k) data = json.loads(s) symbol[k]['changepercent'] = data['changepercent'] symbol[k]['turnoverratio'] = data['turnoverratio'] symbol[k]['volume'] = data['volume']