def divide_archived_news(traingingStart,trainingEnd,estimationStart,estimationEnd): archivedNewsPath = common.get_configuration("training", "GROUP_STOCK_NEWS") archivedNews = json.load(open(archivedNewsPath),encoding='ISO-8859-1') trainingPhaseNews = {} testPhaseNews = {} timelineBegin = time.strptime(traingingStart, "%Y-%m-%d") timelineEnd = time.strptime(trainingEnd, "%Y-%m-%d") eTimeLineBegin = time.strptime(estimationStart, "%Y-%m-%d") eTimeLineEnd = time.strptime(estimationEnd, "%Y-%m-%d") for stock in archivedNews: if stock not in trainingPhaseNews: trainingPhaseNews[stock] = {} if stock not in testPhaseNews: testPhaseNews[stock] = {} for articleId in archivedNews[stock]: newsDate = time.strptime(articleId[0:8],"%Y%m%d") if newsDate <= timelineEnd and newsDate >= timelineBegin: trainingPhaseNews[stock][articleId] = archivedNews[stock][articleId] elif newsDate >= eTimeLineBegin and newsDate <= eTimeLineEnd : testPhaseNews[stock][articleId] = archivedNews[stock][articleId] "Write Training data and Test Data to File" trainingFilePath = common.get_configuration("training", "TRAINING_NEWS_FILE") with open(trainingFilePath,"w") as output: output.write(json.dumps(trainingPhaseNews)) testingFilePath = common.get_configuration("training", "TESTING_NEWS_FILE") with open(testingFilePath,"w") as output: output.write(json.dumps(testPhaseNews))
def divide_archived_news(endDate): archivedNewsPath = common.get_configuration("model", "GROUP_STOCK_NEWS") archivedNews = json.load(open(archivedNewsPath), encoding='ISO-8859-1') trainingPhaseNews = {} testPhaseNews = {} timeLine = time.strptime(endDate, "%Y-%m-%d") for stock in archivedNews: if stock not in trainingPhaseNews: trainingPhaseNews[stock] = {} if stock not in testPhaseNews: testPhaseNews[stock] = {} for articleId in archivedNews[stock]: newsDate = time.strptime(articleId[0:8], "%Y%m%d") if newsDate < timeLine: trainingPhaseNews[stock][articleId] = archivedNews[stock][ articleId] else: testPhaseNews[stock][articleId] = archivedNews[stock][ articleId] "Write Training data and Test Data to File" trainingFilePath = common.get_configuration("model", "TRAINING_NEWS_FILE") with open(trainingFilePath, "w") as output: output.write(json.dumps(trainingPhaseNews)) testingFilePath = common.get_configuration("model", "TESTING_NEWS_FILE") with open(testingFilePath, "w") as output: output.write(json.dumps(testPhaseNews))
def create_conf(warning_threshold,news_back): termConFile = common.get_configuration("model", "TERM_CONTRIBUTION_PATH") clustConFile = common.get_configuration("model", "CLUSTER_CONTRIBUTION_PATH") clustProFile = common.get_configuration("model", "CLUSTER_PROBABILITY_PATH") keyWordsFile = common.get_configuration("training", "VOCABULARY_FILE") trendFile = common.get_configuration("model", "TREND_RANGE_FILE") conf = {} conf["1"] = {} conf["1"]["termContribution"] = json.load(open(termConFile)) conf["1"]["clusterProbability"] = json.load(open(clustProFile)) conf["1"]["clusterContribution"] = json.load(open(clustConFile)) conf["1"]["location"] = {"BVPSBVPS":"Panama","MERVAL":"Argentina","IBOV":"Brazil","CHILE65":"Chile","COLCAP":"Colombia","CRSMBCT":"Costa Rica","MEXBOL":"Mexico","IGBVL":"Peru","IBVC":"Venezuela"} conf["1"]["stocks"] = ["MERVAL","IBOV","CHILE65","COLCAP","CRSMBCT","MEXBOL","BVPSBVPS","IGBVL","IBVC"] conf["1"]["kyewordList"] = json.load(open(keyWordsFile)) conf["1"]["warning_threshold"] = warning_threshold conf["1"]["version"] = "1" conf["1"]["news_back"] = news_back with open("./model_test.conf","w") as o_q: o_q.write(json.dumps(conf)) conf_trend = {} conf_trend["1"] = json.load(open(trendFile)) with open("./trendRange.json","w") as o_q: o_q.write(json.dumps(conf_trend))
def get_company_list(): comDir = common.get_configuration("model", "COMPANY_MEMBER") sfile = os.listdir(comDir) companyList = {} for fi in sfile: with open(comDir + "/" + fi) as comFile: lines = comFile.readlines() stockIndex = lines[1].replace("\r", "").replace( "\n", "").split(",")[1].replace(" Index", "") if stockIndex not in companyList: companyList[stockIndex] = [] for line in lines[2:]: infos = line.replace("\r", "").replace("\n", "").split(",") companyName = infos[2] tmps = companyName.split(" ") companyName = " ".join(tmps[:len(tmps) - 1 if len(tmps) > 1 else len(tmps)]) if companyName not in companyList[stockIndex]: companyList[stockIndex].append(companyName) companyList[stockIndex].append(stockIndex) desFile = common.get_configuration("model", "COMPANY_LIST") with open(desFile, "w") as output: jsStr = json.dumps(companyList) output.write(jsStr)
def compute_term_contribution(): "Read the Vocabulary File" vocabularyFilePath = common.get_configuration("model", "VOCABULARY_FILE") vocaLines = open(vocabularyFilePath).readlines() vocaList = [w.replace("\n", "") for w in vocaLines] stemmer = nltk.stem.snowball.SnowballStemmer('english') print "StartTime: ", datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S") finalWordContribution = {} "Iteratively to access each Stock Index" trainingFile = group_news_by_cluster() for index in trainingFile: stockNews = trainingFile[index] wordContribution = {} for cluster in stockNews: #computing the words count in each cluster articles = cluster["articles"] #initiate the wordFreq wordFreq = {} for term in vocaList: wordFreq[term] = 0 for article in articles: content = article["content"] tokens = nltk.word_tokenize(content) words = [ w.lower() for w in tokens if w not in [ ",", ".", ")", "]", "(", "[", "*", ";", "...", ":", "&", '"' ] and not w.isdigit() ] words = [ w for w in words if w.encode("utf8") not in nltk.corpus.stopwords.words('english') ] stemmedWords = [stemmer.stem(w) for w in words] fdist = nltk.FreqDist(stemmedWords) for term in wordFreq: if term in fdist: wordFreq[term] = wordFreq[term] + fdist[term] #computing the word contribution count = sum(wordFreq.values()) contributions = {} for term in wordFreq: contribution = (wordFreq[term] + 1) / (count + len(wordFreq)) contributions[term] = "%0.4f" % contribution # print "term:%s, contribution:%f" %(term,contribution) # add the contributions to each cluster wordContribution[cluster["cluster"]] = contributions finalWordContribution[index] = wordContribution print "EndTime: ", datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S") "Write the Term Contribution To File" termContributionFile = common.get_configuration("model", "TERM_CONTRIBUTION_PATH") jsString = json.dumps(finalWordContribution) with open(termContributionFile, "w") as output: output.write(jsString)
def divide_archived_news(endDate): archivedNewsPath = common.get_configuration("model", "GROUP_STOCK_NEWS") archivedNews = json.load(open(archivedNewsPath),encoding='ISO-8859-1') trainingPhaseNews = {} testPhaseNews = {} timeLine = time.strptime(endDate, "%Y-%m-%d") for stock in archivedNews: if stock not in trainingPhaseNews: trainingPhaseNews[stock] = {} if stock not in testPhaseNews: testPhaseNews[stock] = {} for articleId in archivedNews[stock]: newsDate = time.strptime(articleId[0:8],"%Y%m%d") if newsDate < timeLine: trainingPhaseNews[stock][articleId] = archivedNews[stock][articleId] else: testPhaseNews[stock][articleId] = archivedNews[stock][articleId] "Write Training data and Test Data to File" trainingFilePath = common.get_configuration("model", "TRAINING_NEWS_FILE") with open(trainingFilePath,"w") as output: output.write(json.dumps(trainingPhaseNews)) testingFilePath = common.get_configuration("model", "TESTING_NEWS_FILE") with open(testingFilePath,"w") as output: output.write(json.dumps(testPhaseNews))
def compute_stock_index_probability(self, predictiveDate, clusterType, stockIndex): try: "Get the clusters List" stockIndexFile = open( common.get_configuration("model", 'CLUSTER_PROBABILITY_PATH')) clusterProbability = json.load(stockIndexFile) clusterJson = {} clusterContributionJson = {} clusterJson = clusterProbability[stockIndex] "Get the contribution of each cluster" clusterContributionFile = open( common.get_configuration("model", 'CLUSTER_CONTRIBUTION_PATH')) clusterContributionJson = json.load(clusterContributionFile) clusterTypesHistory, stockDerived = self.get_stock_index_cluster( predictiveDate, stockIndex) stockIndexProbability = 0 for key in clusterContributionJson[stockIndex].keys(): if key == str(clusterType): "Search from the Cluster contribution Matrix to get the contribution probability" stockIndexProbability = stockIndexProbability + math.log( float(clusterContributionJson[stockIndex][key][ int(clusterTypesHistory[0]) - 1][2])) + math.log( float(clusterContributionJson[stockIndex][key][ int(clusterTypesHistory[1]) - 1][1]) ) + math.log( float(clusterContributionJson[stockIndex][key][ int(clusterTypesHistory[2]) - 1][0])) + math.log( float(clusterJson[str(clusterType)])) return stockIndexProbability, stockDerived except Exception as e: print traceback.format_exc() print "Error in computing stock index probability: %s" % e.args
def compute_term_contribution(): "Read the Vocabulary File" vocabularyFilePath = common.get_configuration("training", "VOCABULARY_FILE") vocaLines = open(vocabularyFilePath).readlines() vocaList = [w.replace("\n","") for w in vocaLines] stemmer = nltk.stem.snowball.SnowballStemmer('english') print "StartTime: ",datetime.strftime(datetime.now(),"%Y-%m-%d %H:%M:%S") finalWordContribution = {} "Iteratively to access each Stock Index" trainingFile = group_news_by_cluster() print "Finish Group news by cluster" for index in trainingFile: stockNews = trainingFile[index] wordContribution = {} for cluster in stockNews: #computing the words count in each cluster print "Start Cluster ", cluster["cluster"], "For Stock ",index, "at ",datetime.strftime(datetime.now(),"%Y-%m-%d %H:%M:%S") articles = cluster["articles"] #initiate the wordFreq wordFreq = {} for term in vocaList: wordFreq[term] = 0 for article in articles: content = article["content"] tokens = nltk.word_tokenize(content) words = [w.lower() for w in tokens if w not in [",",".",")","]","(","[","*",";","...",":","&",'"'] and not w.isdigit()] words = [w for w in words if w.encode("utf8") not in nltk.corpus.stopwords.words('english')] stemmedWords = [stemmer.stem(w) for w in words] fdist=nltk.FreqDist(stemmedWords) for term in wordFreq: if term in fdist: wordFreq[term] = wordFreq[term] + fdist[term] #computing the word contribution count = sum(wordFreq.values()) contributions = {} for term in wordFreq: contribution = (wordFreq[term]+1)/(count + len(wordFreq)) contributions[term] = "%0.4f" %contribution # print "term:%s, contribution:%f" %(term,contribution) # add the contributions to each cluster wordContribution[cluster["cluster"]] = contributions print "Finish Cluster ", cluster["cluster"], "For Stock ",index, "at ",datetime.strftime(datetime.now(),"%Y-%m-%d %H:%M:%S") finalWordContribution[index] = wordContribution print "EndTime: ",datetime.strftime(datetime.now(),"%Y-%m-%d %H:%M:%S") "Write the Term Contribution To File" termContributionFile = common.get_configuration("model", "TERM_CONTRIBUTION_PATH") jsString = json.dumps(finalWordContribution) with open(termContributionFile,"w") as output: output.write(jsString)
def clusterSet(traingingStart,traningEndDate,clu_num): con = common.getDBConnection() cur = con.cursor() finalClusterRecord = [] stockList = ["MERVAL","MEXBOL","CHILE65","BVPSBVPS","COLCAP","CRSMBCT","IBOV","IGBVL","IBVC"] finalOrderCluster = {} for stock in stockList: sql = "select embers_id,post_date,current_value,previous_close_value,one_day_change,change_percent,name from t_enriched_bloomberg_prices where name=? and post_date<=? and post_date>=? order by post_date asc" cur.execute(sql,(stock,traningEndDate,traingingStart)) rows = cur.fetchall() changes = [row[5] for row in rows] fdist = nltk.FreqDist(changes) clusterS = [(0,x) for x in fdist.keys()] c1 = KMeansClustering(clusterS) cluster = c1.getclusters(clu_num) "The sample data of cluster by the KMeans algorithm" # cluster = [[(0, 0.0862), (0, 0.088), (0, 0.0914), (0, 0.094), (0, 0.0957), (0, 0.097), (0, 0.1017), (0, 0.1024), (0, 0.0774), (0, 0.0882), (0, 0.0783), (0, 0.11), (0, 0.0807), (0, 0.0813), (0, 0.1367), (0, 0.0831), (0, 0.0836), (0, 0.0855), (0, 0.0879), (0, 0.0912), (0, 0.0763), (0, 0.1046), (0, 0.0784), (0, 0.0815), (0, 0.1464), (0, 0.1987), (0, 0.1053), (0, 0.1101), (0, 0.1176), (0, 0.0868), (0, 0.1342), (0, 0.1466), (0, 0.0761), (0, 0.0772)], [(0, -0.0001), (0, 0.0), (0, 0.0001), (0, -0.0002), (0, -0.0003), (0, -0.0004), (0, -0.0005), (0, -0.0006), (0, 0.0002), (0, 0.0003), (0, 0.0004), (0, 0.0005), (0, 0.0006), (0, 0.0007), (0, 0.0008), (0, 0.0009), (0, 0.001), (0, 0.0011), (0, 0.0012), (0, 0.0013), (0, 0.0014), (0, 0.0015), (0, 0.0016), (0, 0.0017), (0, 0.0018), (0, 0.0019), (0, 0.002), (0, 0.0021), (0, 0.0022), (0, 0.0023), (0, 0.0024), (0, 0.0025), (0, 0.0026), (0, 0.0027), (0, 0.0028), (0, 0.0029), (0, 0.003), (0, 0.0031), (0, 0.0032), (0, 0.0033), (0, 0.0034), (0, 0.0035), (0, 0.0036), (0, 0.0037), (0, 0.0038), (0, 0.0039), (0, 0.004), (0, 0.0041), (0, 0.0042), (0, 0.0043), (0, 0.0044), (0, 0.0045), (0, 0.0046), (0, 0.0047), (0, 0.0048), (0, 0.0049), (0, 0.005), (0, -0.0007), (0, -0.0008)], [(0, 0.0297), (0, 0.0296), (0, 0.0298), (0, 0.0299), (0, 0.0301), (0, 0.03), (0, 0.0303), (0, 0.0302), (0, 0.0304), (0, 0.0305), (0, 0.0306), (0, 0.0308), (0, 0.0307), (0, 0.0309), (0, 0.031), (0, 0.0311), (0, 0.0313), (0, 0.0314), (0, 0.0312), (0, 0.0316), (0, 0.0315), (0, 0.0317), (0, 0.0318), (0, 0.032), (0, 0.0319), (0, 0.0322), (0, 0.0321), (0, 0.0324), (0, 0.0323), (0, 0.0326), (0, 0.0325), (0, 0.0328), (0, 0.033), (0, 0.0327), (0, 0.0332), (0, 0.0331), (0, 0.0333), (0, 0.0329), (0, 0.0335), (0, 0.0336), (0, 0.0334), (0, 0.0337), (0, 0.0338), (0, 0.0339), (0, 0.034), (0, 0.0341), (0, 0.0342), (0, 0.0343), (0, 0.0344), (0, 0.0345), (0, 0.0346), (0, 0.0348), (0, 0.0349), (0, 0.035), (0, 0.0351), (0, 0.0352), (0, 0.0355), (0, 0.0356), (0, 0.0358), (0, 0.0357), (0, 0.0359), (0, 0.036), (0, 0.0361), (0, 0.0362), (0, 0.0363), (0, 0.0365)], [(0, 0.0559), (0, 0.0564), (0, 0.0568), (0, 0.0571), (0, 0.0573), (0, 0.0579), (0, 0.0578), (0, 0.0581), (0, 0.0587), (0, 0.0589), (0, 0.0595), (0, 0.0591), (0, 0.0594), (0, 0.0604), (0, 0.0598), (0, 0.06), (0, 0.0602), (0, 0.0609), (0, 0.0612), (0, 0.059), (0, 0.0606), (0, 0.0614), (0, 0.0619), (0, 0.0625), (0, 0.0628), (0, 0.0615), (0, 0.0637), (0, 0.0633), (0, 0.0634), (0, 0.0636), (0, 0.0654), (0, 0.0658), (0, 0.0659), (0, 0.0669), (0, 0.0667), (0, 0.0664), (0, 0.067), (0, 0.0675), (0, 0.0673), (0, 0.0676), (0, 0.0686), (0, 0.07), (0, 0.0697), (0, 0.0709), (0, 0.0716), (0, 0.0717), (0, 0.0738), (0, 0.0747)], [(0, -0.0133), (0, -0.0132), (0, -0.0135), (0, -0.0134), (0, -0.0137), (0, -0.0138), (0, -0.0136), (0, -0.014), (0, -0.0139), (0, -0.0142), (0, -0.0143), (0, -0.0144), (0, -0.0141), (0, -0.0145), (0, -0.0146), (0, -0.0147), (0, -0.0148), (0, -0.0149), (0, -0.015), (0, -0.0151), (0, -0.0152), (0, -0.0153), (0, -0.0154), (0, -0.0155), (0, -0.0156), (0, -0.0157), (0, -0.0158), (0, -0.0159), (0, -0.016), (0, -0.0161), (0, -0.0162), (0, -0.0163), (0, -0.0164), (0, -0.0165), (0, -0.0166), (0, -0.0167), (0, -0.0168), (0, -0.0169), (0, -0.017), (0, -0.0171), (0, -0.0172), (0, -0.0173), (0, -0.0174), (0, -0.0175), (0, -0.0176), (0, -0.0177), (0, -0.0178), (0, -0.0179), (0, -0.018), (0, -0.0181), (0, -0.0182), (0, -0.0183), (0, -0.0184), (0, -0.0185), (0, -0.0186), (0, -0.0187), (0, -0.0188), (0, -0.0189), (0, -0.019), (0, -0.0191), (0, -0.0192), (0, -0.0193), (0, -0.0194), (0, -0.0195)], [(0, 0.0448), (0, 0.0451), (0, 0.0452), (0, 0.0446), (0, 0.0447), (0, 0.0456), (0, 0.045), (0, 0.0455), (0, 0.0462), (0, 0.0459), (0, 0.0461), (0, 0.0466), (0, 0.046), (0, 0.0467), (0, 0.0445), (0, 0.0458), (0, 0.0464), (0, 0.0477), (0, 0.0463), (0, 0.0472), (0, 0.0478), (0, 0.0457), (0, 0.0476), (0, 0.0481), (0, 0.0484), (0, 0.0488), (0, 0.0483), (0, 0.0487), (0, 0.0471), (0, 0.0482), (0, 0.0496), (0, 0.0474), (0, 0.0495), (0, 0.0485), (0, 0.0504), (0, 0.0505), (0, 0.0506), (0, 0.0501), (0, 0.0509), (0, 0.0508), (0, 0.051), (0, 0.0515), (0, 0.0516), (0, 0.052), (0, 0.0522), (0, 0.0524), (0, 0.053), (0, 0.0531), (0, 0.0534), (0, 0.0535), (0, 0.0536), (0, 0.0537), (0, 0.0538), (0, 0.0541), (0, 0.0542), (0, 0.0545), (0, 0.0546), (0, 0.0548), (0, 0.055)], [(0, 0.0172), (0, 0.017), (0, 0.0173), (0, 0.0174), (0, 0.0171), (0, 0.0177), (0, 0.0175), (0, 0.0178), (0, 0.0179), (0, 0.0176), (0, 0.0181), (0, 0.018), (0, 0.0183), (0, 0.0182), (0, 0.0186), (0, 0.0185), (0, 0.0187), (0, 0.0184), (0, 0.0189), (0, 0.0188), (0, 0.019), (0, 0.0191), (0, 0.0192), (0, 0.0194), (0, 0.0193), (0, 0.0196), (0, 0.0195), (0, 0.0197), (0, 0.0199), (0, 0.0198), (0, 0.02), (0, 0.0201), (0, 0.0202), (0, 0.0204), (0, 0.0205), (0, 0.0206), (0, 0.0203), (0, 0.0208), (0, 0.0207), (0, 0.021), (0, 0.0209), (0, 0.0211), (0, 0.0213), (0, 0.0212), (0, 0.0214), (0, 0.0215), (0, 0.0216), (0, 0.0217), (0, 0.0218), (0, 0.0219), (0, 0.022), (0, 0.0221), (0, 0.0222), (0, 0.0223), (0, 0.0224), (0, 0.0225), (0, 0.0226), (0, 0.0227), (0, 0.0228), (0, 0.0229), (0, 0.023), (0, 0.0231)], [(0, -0.0408), (0, -0.041), (0, -0.0411), (0, -0.0412), (0, -0.0413), (0, -0.0415), (0, -0.0416), (0, -0.0417), (0, -0.0419), (0, -0.042), (0, -0.0423), (0, -0.0424), (0, -0.0418), (0, -0.0425), (0, -0.0428), (0, -0.043), (0, -0.0431), (0, -0.0432), (0, -0.0433), (0, -0.0434), (0, -0.0436), (0, -0.0438), (0, -0.0439), (0, -0.044), (0, -0.0442), (0, -0.0441), (0, -0.0446), (0, -0.0443), (0, -0.0448), (0, -0.0447), (0, -0.045), (0, -0.0449), (0, -0.0453), (0, -0.0451), (0, -0.0454), (0, -0.0455), (0, -0.0458), (0, -0.0456), (0, -0.0459), (0, -0.0463), (0, -0.0461), (0, -0.046), (0, -0.0464), (0, -0.0465), (0, -0.0467), (0, -0.0462), (0, -0.0466), (0, -0.0472), (0, -0.0469), (0, -0.0475), (0, -0.0473), (0, -0.0478), (0, -0.0477), (0, -0.0476), (0, -0.0482), (0, -0.0481), (0, -0.0483), (0, -0.0487), (0, -0.0488), (0, -0.049), (0, -0.0492), (0, -0.0494)], [(0, -0.0261), (0, -0.0262), (0, -0.0263), (0, -0.0264), (0, -0.0266), (0, -0.0265), (0, -0.0267), (0, -0.0268), (0, -0.0269), (0, -0.0271), (0, -0.027), (0, -0.0273), (0, -0.0272), (0, -0.0275), (0, -0.0274), (0, -0.0277), (0, -0.0278), (0, -0.0276), (0, -0.0279), (0, -0.0281), (0, -0.028), (0, -0.0283), (0, -0.0282), (0, -0.0284), (0, -0.0285), (0, -0.0286), (0, -0.0287), (0, -0.0288), (0, -0.0289), (0, -0.0291), (0, -0.0292), (0, -0.0293), (0, -0.029), (0, -0.0294), (0, -0.0295), (0, -0.0297), (0, -0.0296), (0, -0.0299), (0, -0.03), (0, -0.0301), (0, -0.0302), (0, -0.0298), (0, -0.0303), (0, -0.0304), (0, -0.0307), (0, -0.0305), (0, -0.0308), (0, -0.031), (0, -0.0309), (0, -0.0312), (0, -0.0311), (0, -0.0313), (0, -0.0315), (0, -0.0314), (0, -0.0316), (0, -0.0317), (0, -0.0319), (0, -0.0318), (0, -0.032), (0, -0.0321), (0, -0.0322), (0, -0.0323), (0, -0.0325), (0, -0.0326), (0, -0.0327), (0, -0.0328), (0, -0.0329)], [(0, -0.0619), (0, -0.0622), (0, -0.0627), (0, -0.064), (0, -0.0645), (0, -0.065), (0, -0.0653), (0, -0.0651), (0, -0.0659), (0, -0.0663), (0, -0.0665), (0, -0.066), (0, -0.0666), (0, -0.0674), (0, -0.0671), (0, -0.0684), (0, -0.0672), (0, -0.0691), (0, -0.0689), (0, -0.0692), (0, -0.0701), (0, -0.0698), (0, -0.0709), (0, -0.0715), (0, -0.0717), (0, -0.0722), (0, -0.0734), (0, -0.0741), (0, -0.0749), (0, -0.0763), (0, -0.0772), (0, -0.0758), (0, -0.0762), (0, -0.0787), (0, -0.0788), (0, -0.0759), (0, -0.0775), (0, -0.0808)], [(0, -0.0905), (0, -0.1081), (0, -0.1018), (0, -0.094), (0, -0.0937), (0, -0.0936), (0, -0.0927), (0, -0.0919), (0, -0.0863), (0, -0.1593), (0, -0.1245), (0, -0.0847), (0, -0.1215), (0, -0.1139), (0, -0.1099), (0, -0.1068), (0, -0.0868), (0, -0.0856), (0, -0.0854), (0, -0.0837), (0, -0.0822), (0, -0.0877), (0, -0.1241), (0, -0.1073), (0, -0.1065), (0, -0.1011), (0, -0.0835)], [(0, -0.0196), (0, -0.0198), (0, -0.0197), (0, -0.0199), (0, -0.02), (0, -0.0201), (0, -0.0202), (0, -0.0204), (0, -0.0203), (0, -0.0205), (0, -0.0206), (0, -0.0208), (0, -0.0207), (0, -0.021), (0, -0.0209), (0, -0.0212), (0, -0.0211), (0, -0.0214), (0, -0.0215), (0, -0.0213), (0, -0.0217), (0, -0.0216), (0, -0.0219), (0, -0.0218), (0, -0.0221), (0, -0.022), (0, -0.0223), (0, -0.0222), (0, -0.0225), (0, -0.0224), (0, -0.0227), (0, -0.0226), (0, -0.0229), (0, -0.0228), (0, -0.023), (0, -0.0231), (0, -0.0232), (0, -0.0234), (0, -0.0233), (0, -0.0236), (0, -0.0235), (0, -0.0238), (0, -0.0237), (0, -0.024), (0, -0.0239), (0, -0.0242), (0, -0.0241), (0, -0.0244), (0, -0.0243), (0, -0.0245), (0, -0.0246), (0, -0.0247), (0, -0.0248), (0, -0.0249), (0, -0.025), (0, -0.0251), (0, -0.0252), (0, -0.0253), (0, -0.0254), (0, -0.0255), (0, -0.0256), (0, -0.0257), (0, -0.0258), (0, -0.0259), (0, -0.026)], [(0, -0.05), (0, -0.0504), (0, -0.0499), (0, -0.0507), (0, -0.0501), (0, -0.0509), (0, -0.0513), (0, -0.0505), (0, -0.051), (0, -0.0508), (0, -0.0517), (0, -0.0519), (0, -0.0516), (0, -0.052), (0, -0.0524), (0, -0.0525), (0, -0.0526), (0, -0.0528), (0, -0.0529), (0, -0.0533), (0, -0.0538), (0, -0.0535), (0, -0.0532), (0, -0.0542), (0, -0.0543), (0, -0.0546), (0, -0.054), (0, -0.055), (0, -0.0556), (0, -0.0545), (0, -0.056), (0, -0.0554), (0, -0.0567), (0, -0.0563), (0, -0.0571), (0, -0.0572), (0, -0.0576), (0, -0.0579), (0, -0.058), (0, -0.0584), (0, -0.0581), (0, -0.0588), (0, -0.0589), (0, -0.0591), (0, -0.0593), (0, -0.0596), (0, -0.0595), (0, -0.0601), (0, -0.0613), (0, -0.0614)], [(0, -0.001), (0, -0.0012), (0, -0.0017), (0, -0.0016), (0, -0.0013), (0, -0.0011), (0, -0.002), (0, -0.0018), (0, -0.0015), (0, -0.0014), (0, -0.0019), (0, -0.0021), (0, -0.0022), (0, -0.0023), (0, -0.0009), (0, -0.0024), (0, -0.0025), (0, -0.0026), (0, -0.0027), (0, -0.0028), (0, -0.0029), (0, -0.003), (0, -0.0031), (0, -0.0032), (0, -0.0033), (0, -0.0034), (0, -0.0035), (0, -0.0036), (0, -0.0037), (0, -0.0038), (0, -0.0039), (0, -0.004), (0, -0.0041), (0, -0.0042), (0, -0.0043), (0, -0.0044), (0, -0.0045), (0, -0.0046), (0, -0.0047), (0, -0.0048), (0, -0.0049), (0, -0.005), (0, -0.0051), (0, -0.0052), (0, -0.0053), (0, -0.0054), (0, -0.0055), (0, -0.0056), (0, -0.0057), (0, -0.0058), (0, -0.0059), (0, -0.006), (0, -0.0061), (0, -0.0062), (0, -0.0063), (0, -0.0064), (0, -0.0065), (0, -0.0066), (0, -0.0067), (0, -0.0068), (0, -0.0069)], [(0, -0.033), (0, -0.0332), (0, -0.0331), (0, -0.0334), (0, -0.0333), (0, -0.0336), (0, -0.0337), (0, -0.0335), (0, -0.0338), (0, -0.034), (0, -0.0339), (0, -0.0342), (0, -0.0343), (0, -0.0341), (0, -0.0344), (0, -0.0345), (0, -0.0346), (0, -0.0347), (0, -0.0348), (0, -0.035), (0, -0.0349), (0, -0.0351), (0, -0.0352), (0, -0.0353), (0, -0.0354), (0, -0.0355), (0, -0.0357), (0, -0.0356), (0, -0.0358), (0, -0.0359), (0, -0.0361), (0, -0.036), (0, -0.0363), (0, -0.0362), (0, -0.0365), (0, -0.0366), (0, -0.0364), (0, -0.0368), (0, -0.0369), (0, -0.0372), (0, -0.0371), (0, -0.0367), (0, -0.0375), (0, -0.0373), (0, -0.0376), (0, -0.0374), (0, -0.0378), (0, -0.038), (0, -0.0379), (0, -0.0377), (0, -0.0382), (0, -0.0384), (0, -0.0383), (0, -0.0386), (0, -0.0381), (0, -0.0387), (0, -0.0389), (0, -0.0385), (0, -0.039), (0, -0.0391), (0, -0.0388), (0, -0.0392), (0, -0.0395), (0, -0.0393), (0, -0.0397), (0, -0.0398), (0, -0.0396), (0, -0.0399), (0, -0.0402), (0, -0.0401), (0, -0.0403), (0, -0.0406), (0, -0.0407)], [(0, 0.0232), (0, 0.0233), (0, 0.0234), (0, 0.0235), (0, 0.0237), (0, 0.0236), (0, 0.0238), (0, 0.0239), (0, 0.024), (0, 0.0241), (0, 0.0242), (0, 0.0243), (0, 0.0244), (0, 0.0245), (0, 0.0247), (0, 0.0248), (0, 0.0246), (0, 0.0249), (0, 0.025), (0, 0.0251), (0, 0.0253), (0, 0.0252), (0, 0.0255), (0, 0.0254), (0, 0.0257), (0, 0.0256), (0, 0.0259), (0, 0.026), (0, 0.0258), (0, 0.0261), (0, 0.0262), (0, 0.0264), (0, 0.0265), (0, 0.0263), (0, 0.0267), (0, 0.0268), (0, 0.0266), (0, 0.027), (0, 0.0269), (0, 0.0271), (0, 0.0272), (0, 0.0274), (0, 0.0273), (0, 0.0276), (0, 0.0275), (0, 0.0277), (0, 0.0278), (0, 0.0279), (0, 0.0281), (0, 0.0282), (0, 0.0283), (0, 0.0284), (0, 0.0285), (0, 0.0286), (0, 0.0287), (0, 0.0288), (0, 0.0289), (0, 0.029), (0, 0.0291), (0, 0.0292), (0, 0.0293), (0, 0.0294)], [(0, 0.011), (0, 0.0112), (0, 0.0113), (0, 0.0111), (0, 0.0115), (0, 0.0114), (0, 0.0117), (0, 0.0116), (0, 0.0118), (0, 0.0119), (0, 0.0121), (0, 0.0122), (0, 0.0123), (0, 0.0124), (0, 0.012), (0, 0.0126), (0, 0.0125), (0, 0.0128), (0, 0.0127), (0, 0.013), (0, 0.0129), (0, 0.0131), (0, 0.0133), (0, 0.0132), (0, 0.0135), (0, 0.0134), (0, 0.0136), (0, 0.0137), (0, 0.0138), (0, 0.014), (0, 0.0139), (0, 0.0142), (0, 0.0141), (0, 0.0143), (0, 0.0144), (0, 0.0145), (0, 0.0146), (0, 0.0147), (0, 0.0148), (0, 0.0149), (0, 0.015), (0, 0.0151), (0, 0.0153), (0, 0.0152), (0, 0.0154), (0, 0.0155), (0, 0.0156), (0, 0.0157), (0, 0.0158), (0, 0.0159), (0, 0.016), (0, 0.0161), (0, 0.0162), (0, 0.0163), (0, 0.0164), (0, 0.0165), (0, 0.0166), (0, 0.0167), (0, 0.0168), (0, 0.0169)], [(0, -0.007), (0, -0.0071), (0, -0.0072), (0, -0.0073), (0, -0.0074), (0, -0.0075), (0, -0.0076), (0, -0.0077), (0, -0.0078), (0, -0.0079), (0, -0.0081), (0, -0.008), (0, -0.0082), (0, -0.0083), (0, -0.0084), (0, -0.0085), (0, -0.0086), (0, -0.0087), (0, -0.0088), (0, -0.0089), (0, -0.009), (0, -0.0091), (0, -0.0092), (0, -0.0093), (0, -0.0094), (0, -0.0095), (0, -0.0096), (0, -0.0097), (0, -0.0098), (0, -0.0099), (0, -0.01), (0, -0.0101), (0, -0.0102), (0, -0.0103), (0, -0.0104), (0, -0.0105), (0, -0.0106), (0, -0.0107), (0, -0.0108), (0, -0.0109), (0, -0.011), (0, -0.0111), (0, -0.0112), (0, -0.0113), (0, -0.0114), (0, -0.0115), (0, -0.0116), (0, -0.0117), (0, -0.0118), (0, -0.0119), (0, -0.012), (0, -0.0121), (0, -0.0122), (0, -0.0123), (0, -0.0124), (0, -0.0125), (0, -0.0126), (0, -0.0127), (0, -0.0128), (0, -0.0129), (0, -0.013), (0, -0.0131)], [(0, 0.0051), (0, 0.0052), (0, 0.0053), (0, 0.0055), (0, 0.0054), (0, 0.0057), (0, 0.0056), (0, 0.0059), (0, 0.0058), (0, 0.0061), (0, 0.006), (0, 0.0062), (0, 0.0063), (0, 0.0064), (0, 0.0065), (0, 0.0066), (0, 0.0068), (0, 0.0069), (0, 0.0067), (0, 0.007), (0, 0.0072), (0, 0.0071), (0, 0.0073), (0, 0.0074), (0, 0.0075), (0, 0.0076), (0, 0.0077), (0, 0.0078), (0, 0.0079), (0, 0.008), (0, 0.0081), (0, 0.0082), (0, 0.0083), (0, 0.0084), (0, 0.0085), (0, 0.0086), (0, 0.0087), (0, 0.0088), (0, 0.0089), (0, 0.009), (0, 0.0091), (0, 0.0092), (0, 0.0093), (0, 0.0094), (0, 0.0095), (0, 0.0096), (0, 0.0097), (0, 0.0098), (0, 0.0099), (0, 0.01), (0, 0.0101), (0, 0.0102), (0, 0.0103), (0, 0.0104), (0, 0.0105), (0, 0.0106), (0, 0.0107), (0, 0.0108), (0, 0.0109)], [(0, 0.0369), (0, 0.0371), (0, 0.0367), (0, 0.037), (0, 0.0375), (0, 0.0373), (0, 0.0376), (0, 0.0372), (0, 0.0377), (0, 0.038), (0, 0.0379), (0, 0.0374), (0, 0.0381), (0, 0.0382), (0, 0.0378), (0, 0.0384), (0, 0.0386), (0, 0.0387), (0, 0.0385), (0, 0.0389), (0, 0.0391), (0, 0.039), (0, 0.0392), (0, 0.0394), (0, 0.0395), (0, 0.0396), (0, 0.0398), (0, 0.0399), (0, 0.04), (0, 0.0401), (0, 0.0404), (0, 0.0405), (0, 0.0406), (0, 0.0407), (0, 0.0408), (0, 0.0409), (0, 0.041), (0, 0.0411), (0, 0.0412), (0, 0.0414), (0, 0.0415), (0, 0.0416), (0, 0.0417), (0, 0.0419), (0, 0.042), (0, 0.0421), (0, 0.0422), (0, 0.0426), (0, 0.0428), (0, 0.0427), (0, 0.043), (0, 0.0429), (0, 0.0431), (0, 0.0433), (0, 0.0434), (0, 0.0435), (0, 0.0436), (0, 0.0438), (0, 0.0437), (0, 0.044), (0, 0.0442), (0, 0.0444)]] namedCluster = {} i = 0 orderCluster = {} for clu in cluster: i = i + 1 namedCluster[i] = clu orderCluster[i] = [min(clu)[1],max(clu)[1]] "The number of rows to be committed for each interval" committedInterval=0 for row in rows: for nc in namedCluster: if (0,row[5]) in namedCluster[nc]: newRow = list(row) newRow.append(nc) "update the trend type into Database" UpdateEnrichedData(con, committedInterval, newRow) finalClusterRecord.append(newRow) con.commit() finalOrderCluster[stock] = orderCluster print stock, " Done" "Write the type range into a file" trendRangeFile = common.get_configuration("model", "TREND_RANGE_FILE") dataStr = json.dumps(finalOrderCluster) with open(trendRangeFile,"w") as output: output.write(dataStr) "Write the training data into file" trendSetRecordFile = common.get_configuration("training", "TRAINING_TREND_RECORDS") dataStr = json.dumps(finalClusterRecord) with open(trendSetRecordFile,"w") as output: output.write(dataStr) if con: con.close()
def create_vocabulary(feature_num=150): "Read the Negative Finance Dictionary" negativeFilePath = common.get_configuration("training", "NEGATIVE_DIC") negKeywords = json.load(open(negativeFilePath)) "Read the Positive Finance Dictionary" positiveFilePath = common.get_configuration("training", "POSITIVE_DIC") posiKeyWords = json.load(open(positiveFilePath)) "Read the archived news to count the top words" BBNewsPath = common.get_configuration("training", "TRAINING_NEWS_FILE") keyWords = [] for w in negKeywords: keyWords.append(w) for w in posiKeyWords: keyWords.append(w) print "Over Here" wordFreq = {} flatCount = 0 for line in open(BBNewsPath, "r"): news = json.loads(line) flatCount = flatCount + 1 fdist = news["content"] for word in keyWords: if word in fdist: if word in wordFreq: wordFreq[word] = wordFreq[word] + fdist[word] else: wordFreq[word] = fdist[word] # sorted_obj2 = wordFreq.iteritems() sorted_obj2 = sorted(wordFreq.items(), key=lambda x: x[1], reverse=True) print sorted_obj2 "Write the vocabulary list to File" vocabularyFile = common.get_configuration("training", "VOCABULARY_FILE") output = open(vocabularyFile, "w") result_word_list = [] i = 1 for word in sorted_obj2: if i > feature_num: break else: result_word_list.append(word[0]) i = i + 1 output.write(json.dumps(result_word_list)) output.flush() output.close()
def group_news_by_cluster(): "Load the Traing news File" trainingNewsFile = common.get_configuration("training", "TRAINING_NEWS_FILE") articles = json.load(open(trainingNewsFile)) finalStockClusterNews = {} "Iterately read the news" for index in articles: indexNews = articles[index] dayNews = {} #group the news by date for articleId in indexNews: day = articleId[0:8] if day not in dayNews: dayNews[day] = [] dayNews[day].append(indexNews[articleId]) #read the day cluster file to group the date clusterDays = {} trendFilePath = common.get_configuration("training", "TRAINING_TREND_RECORDS") trendFile = open(trendFilePath) trendJson = json.load(trendFile) for trend in trendJson: if index == trend[6]: cluster = trend[7] structDate = time.strptime(trend[1],"%Y-%m-%d") dtDay = datetime(structDate[0],structDate[1],structDate[2]) for i in range(1,4): day = dtDay - timedelta(days=i) dayStr = day.strftime("%Y%m%d") if cluster not in clusterDays: clusterDays[cluster] = [] if dayStr not in clusterDays[cluster]: clusterDays[cluster].append(dayStr) clusterNews = [] for cluster in clusterDays: cNews = {} cNews["cluster"] = cluster; docs = [] for day in clusterDays[cluster]: if day in dayNews: for doc in dayNews[day]: docs.append(doc) cNews["articles"] = docs; clusterNews.append(cNews) finalStockClusterNews[index] = clusterNews with open("D:/groupByCluster.json","w") as ot: ot.write(json.dumps(finalStockClusterNews)) return finalStockClusterNews
def group_news_by_cluster(): "Load the Traing news File" trainingNewsFile = common.get_configuration("model", "TRAINING_NEWS_FILE") articles = json.load(open(trainingNewsFile)) finalStockClusterNews = {} "Iterately read the news" for index in articles: indexNews = articles[index] dayNews = {} #group the news by date for articleId in indexNews: day = articleId[0:8] if day not in dayNews: dayNews[day] = [] dayNews[day].append(indexNews[articleId]) #read the day cluster file to group the date clusterDays = {} trendFilePath = common.get_configuration("model", "TRAINING_TREND_RECORDS") trendFile = open(trendFilePath) trendJson = json.load(trendFile) for trend in trendJson: if index == trend[6]: cluster = trend[7] structDate = time.strptime(trend[2], "%Y-%m-%d") dtDay = datetime(structDate[0], structDate[1], structDate[2]) for i in range(1, 4): day = dtDay - timedelta(days=i) dayStr = day.strftime("%Y%m%d") if cluster not in clusterDays: clusterDays[cluster] = [] if dayStr not in clusterDays[cluster]: clusterDays[cluster].append(dayStr) clusterNews = [] for cluster in clusterDays: cNews = {} cNews["cluster"] = cluster docs = [] for day in clusterDays[cluster]: if day in dayNews: for doc in dayNews[day]: docs.append(doc) cNews["articles"] = docs clusterNews.append(cNews) finalStockClusterNews[index] = clusterNews return finalStockClusterNews
def divide_archived_news(traingingStart,trainingEnd,estimationStart,estimationEnd): archivedNewsPath = common.get_configuration("training", "GROUP_STOCK_NEWS") timelineBegin = time.strptime(traingingStart, "%Y-%m-%d") timelineEnd = time.strptime(trainingEnd, "%Y-%m-%d") "Write Training data and Test Data to File" trainingFilePath = common.get_configuration("training", "TRAINING_NEWS_FILE") with open(trainingFilePath,"w") as output: for line in open(archivedNewsPath,"r"): news = json.loads(line) post_date = time.strptime(news["postDate"],"%Y-%m-%d") if post_date <= timelineEnd and post_date >= timelineBegin: output.write(json.dumps(news)) output.write("\n")
def group_daily_articles(): stockArticles = {} archiveDir = common.get_configuration("model", "ARCHIVE_NEWS_DIR") dailyFileNames = os.listdir(archiveDir) matchRule = create_match_rule() pattern = re.compile(matchRule, re.I) "Construct company-stock object" comListFile = common.get_configuration("model", "COMPANY_LIST") comList = json.load(open(comListFile)) comStock = {} for stock in comList: for company in comList[stock]: comStock[company.strip()] = stock i = 0 print "StartTime: ", datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S") for dailyFile in dailyFileNames: dailyNews = json.load(open(archiveDir + "/" + dailyFile), encoding='ISO-8859-1') for news in dailyNews: content = news["content"] matchedList = pattern.findall(content) matchedGroup = [] if matchedList: i = i + 1 for item in matchedList: matchedGroup.append(item) matchedGroup = {}.fromkeys(matchedGroup).keys() "Group the news to matched stock" for item in matchedGroup: item = item.strip() if item in comStock: stockIndex = comStock[item] if stockIndex not in stockArticles: stockArticles[stockIndex] = {} articleId = news["articleId"] stockArticles[stockIndex][articleId] = news print i "Write the grouped articles to file" groupedFile = common.get_configuration("model", "GROUP_STOCK_NEWS") with open(groupedFile, "w") as output: output.write(json.dumps(stockArticles)) print "EndTime: ", datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S")
def group_daily_articles(rule_name): stockArticles = {} archiveDir = common.get_configuration("training", "ARCHIVE_NEWS_DIR") dailyFileNames = os.listdir(archiveDir) matchRule = create_match_rule(rule_name) pattern = re.compile(matchRule,re.I) print matchRule "Construct company-stock object" comListFile = common.get_configuration("training", rule_name) comList = json.load(open(comListFile)) comStock = {} for stock in comList: for company in comList[stock]: comStock[company.strip().lower()] = stock i = 0 print "StartTime: ",datetime.strftime(datetime.now(),"%Y-%m-%d %H:%M:%S") for dailyFile in dailyFileNames: dailyNews = json.load(open(archiveDir+ "/" + dailyFile),encoding='ISO-8859-1') for news in dailyNews: content = news["content"].lower() # print content matchedList = pattern.findall(content) matchedGroup = [] if matchedList: i = i + 1 for item in matchedList: matchedGroup.append(item) matchedGroup = {}.fromkeys(matchedGroup).keys() "Group the news to matched stock" for item in matchedGroup: item = item.strip() if item in comStock: stockIndex = comStock[item] if stockIndex not in stockArticles: stockArticles[stockIndex] = {} articleId = news["articleId"] stockArticles[stockIndex][articleId] = news print i "Write the grouped articles to file" groupedFile = common.get_configuration("training","GROUP_STOCK_NEWS") with open(groupedFile,"w") as output: output.write(json.dumps(stockArticles)) print "EndTime: ",datetime.strftime(datetime.now(),"%Y-%m-%d %H:%M:%S")
def compute_term_contribution(days_back): "Read the Vocabulary File" vocabularyFilePath = common.get_configuration("training", "VOCABULARY_FILE") vocaList = json.load(open(vocabularyFilePath,"r")) print "StartTime: ",datetime.strftime(datetime.now(),"%Y-%m-%d %H:%M:%S") finalWordContribution = {} "Iteratively to access each Stock Index" trainingFile = group_news_by_cluster(days_back) print "Finish Group news by cluster" for index in trainingFile: stockNews = trainingFile[index] wordContribution = {} for cluster in stockNews: #computing the words count in each cluster print "Start Cluster ", cluster["cluster"], "For Stock ",index, "at ",datetime.strftime(datetime.now(),"%Y-%m-%d %H:%M:%S") articles = cluster["articles"] #initiate the wordFreq wordFreq = {} for term in vocaList: wordFreq[term] = 0 for article in articles: fdist = article["content"] for term in wordFreq: if term in fdist: wordFreq[term] = wordFreq[term] + fdist[term] #computing the word contribution count = sum(wordFreq.values()) contributions = {} for term in wordFreq: contribution = round(1.0*(wordFreq[term]+1)/(count + len(wordFreq)),4) contributions[term] = contribution # add the contributions to each cluster wordContribution[cluster["cluster"]] = contributions print "Finish Cluster ", cluster["cluster"], "For Stock ",index, "at ",datetime.strftime(datetime.now(),"%Y-%m-%d %H:%M:%S") finalWordContribution[index] = wordContribution print "EndTime: ",datetime.strftime(datetime.now(),"%Y-%m-%d %H:%M:%S") "Write the Term Contribution To File" termContributionFile = common.get_configuration("model", "TERM_CONTRIBUTION_PATH") jsString = json.dumps(finalWordContribution) with open(termContributionFile,"w") as output: output.write(jsString)
def import_news_to_database(): try: historyNews = open( common.get_configuration("model", 'GROUP_STOCK_NEWS')) historyNewsJson = json.load(historyNews) for stockIndex in historyNewsJson: for article in historyNewsJson[stockIndex].values(): news = {} news["title"] = article["title"] news["author"] = article["author"] postTime = article["postTime"].split(".")[0] postTime = datetime.strptime(postTime, "%Y-%m-%d %H:%M:%S") news["post_time"] = postTime news["post_date"] = postTime.date() news["content"] = article["content"] news["stock_index"] = stockIndex news["source"] = "Bloomberg News" news["update_time"] = article["queryTime"] news["newsUrl"] = article["newsUrl"] embersId = hashlib.sha1(article["content"]).hexdigest() news["embers_id"] = embersId ifExisted = bns.check_article_existed(news) if not ifExisted: bns.insert_news(news) "Insert into Mission process" bns.insert_news_mission(news) bns.close_db_connection() except lite.Error, e: print "Error: %s" % e.args[0]
def import_news_to_database(): try: historyNews = open(common.get_configuration("model", "GROUP_STOCK_NEWS")) historyNewsJson = json.load(historyNews) for stockIndex in historyNewsJson: for article in historyNewsJson[stockIndex].values(): news = {} news["title"] = article["title"] news["author"] = article["author"] postTime = article["postTime"].split(".")[0] postTime = datetime.strptime(postTime, "%Y-%m-%d %H:%M:%S") news["post_time"] = postTime news["post_date"] = postTime.date() news["content"] = article["content"] news["stock_index"] = stockIndex news["source"] = "Bloomberg News" news["update_time"] = article["queryTime"] news["newsUrl"] = article["newsUrl"] embersId = hashlib.sha1(article["content"]).hexdigest() news["embers_id"] = embersId ifExisted = bns.check_article_existed(news) if not ifExisted: bns.insert_news(news) "Insert into Mission process" bns.insert_news_mission(news) bns.close_db_connection() except lite.Error, e: print "Error: %s" % e.args[0]
def import_news_to_database(): try: global con init() historyNews = open(common.get_configuration( "training", 'GROUP_STOCK_NEWS')) historyNewsJson = json.load(historyNews) i = 0 for stockIndex in historyNewsJson: for article in historyNewsJson[stockIndex].values(): news = {} news["title"] = article["title"] news["author"] = article["author"] postTime = article["postTime"].split(".")[0] postTime = datetime.strptime(postTime,"%Y-%m-%d %H:%M:%S") news["postTime"] = postTime news["postDate"] = postTime.date() news["content"] = article["content"] news["stockIndex"] = stockIndex news["source"] = "Bloomberg News" news["updateTime"] = article["queryTime"] news["url"] = article["newsUrl"] embersId = hashlib.sha1(article["content"]).hexdigest() news["embersId"] = embersId ifExisted = check_article_existed(news) if not ifExisted: insert_news(news) "Insert into Mission process" insert_news_mission(news) i = i + 1 if i % 1000 == 0: con.commit() con.commit() except lite.Error, e: print "Error: %s" % e.args[0]
def get_trend_type(rawIndexData): """ Computing current day's trend type, compareing change percent to the trend range, Choose the nearnes trend as current day's type """ "Load the trend type range file" rangeFilePath = common.get_configuration("model", "TREND_RANGE_FILE") tFile = open(rangeFilePath) trendsJson = json.load(tFile) "Get the indicated stock range" stockIndex = rawIndexData["stockIndex"] tJson = trendsJson[stockIndex] print tJson "Computing change percent" lastPrice = float(rawIndexData["currentValue"]) preLastPrice = float(rawIndexData["previousCloseValue"]) changePercent = round((lastPrice - preLastPrice)/preLastPrice,4) distance = 10000 trendType = None for type in tJson: tmpDistance = min(abs(changePercent-tJson[type][0]),abs(changePercent-tJson[type][1])) if tmpDistance < distance: distance = tmpDistance trendType = type return trendType
def compute_stock_news_probability(self, predictiveDate, clusterType, stockIndex): try: termContributionFile = open( common.get_configuration("model", 'TERM_CONTRIBUTION_PATH')) termContributionJson = json.load(termContributionFile) terms, newsDerived = self.get_stock_news_data( predictiveDate, stockIndex) termContributionProbability = 0 if stockIndex in termContributionJson: for termClusterType in termContributionJson[stockIndex].keys(): if termClusterType == str(clusterType): stermlist = termContributionJson[stockIndex][ termClusterType] #print stermlist for word, count in terms.iteritems(): if word in stermlist: #print word termContributionProbability = count * math.log( float(termContributionJson[stockIndex] [termClusterType][word])) del stermlist[word] return termContributionProbability, newsDerived except IOError: print "Can't open the file:stock_raw_data.json." except Exception as e: print traceback.format_exc() print "Error in computing stock news probability: %s" % e.message return None
def clusterSet(traingingStart,traningEndDate): con = common.getDBConnection() cur = con.cursor() finalClusterRecord = [] stockList = ["MERVAL","MEXBOL","CHILE65","BVPSBVPS","COLCAP","CRSMBCT","IBOV","IGBVL","IBVC"] for stock in stockList: sql = "select embers_id,post_date,current_value,previous_close_value,round(current_value-previous_close_value,4),round((current_value-previous_close_value)/previous_close_value,4),name from t_bloomberg_prices where name=? and post_date<=? and post_date>=? order by post_date asc" cur.execute(sql,(stock,traningEndDate,traingingStart)) rows = cur.fetchall() "The number of rows to be committed for each interval" committedInterval = 0 for row in rows: newRow = list(row) "Insert the pre-enriched stock index data into Database" InitiateEnrichedData(con, committedInterval, newRow) finalClusterRecord.append(newRow) con.commit() "Write the training data into file" trendSetRecordFile = common.get_configuration("training", "TRAINING_TREND_RECORDS") dataStr = json.dumps(finalClusterRecord) with open(trendSetRecordFile,"w") as output: output.write(dataStr) if con: con.close()
def main(): vocabularyFile = common.get_configuration("training", "VOCABULARY_FILE") key_list = [] with open(vocabularyFile,"r") as rf: lines = rf.readlines() for line in lines: line = line.strip() key_list.append(line) print key_list
def enumberate_stock_index( self ): try: clustersFile = open( common.get_configuration( "model", 'CLUSTER_PROBABILITY_PATH' ) ) clusterJson = json.load( clustersFile ) stockIndexList = [] for stockIndex in clusterJson.keys(): stockIndexList.append( stockIndex ) return stockIndexList except Exception as e: log.info( traceback.format_exc()) log.info( "Error: %s" % e.args[0])
def enumberate_stock_index(self): try: clustersFile = open( common.get_configuration("model", 'CLUSTER_PROBABILITY_PATH')) clusterJson = json.load(clustersFile) stockIndexList = [] for stockIndex in clusterJson.keys(): stockIndexList.append(stockIndex) return stockIndexList except Exception as e: print traceback.format_exc() print "Error: %s" % e.args[0]
def compute_stock_index_probability( self, predictiveDate, clusterType , stockIndex ): try: "Get the clusters List" stockIndexFile = open( common.get_configuration( "model", 'CLUSTER_PROBABILITY_PATH' ) ) clusterProbability = json.load( stockIndexFile ) clusterJson = {} clusterContributionJson = {} clusterJson = clusterProbability[stockIndex] "Get the contribution of each cluster" clusterContributionFile = open( common.get_configuration( "model", 'CLUSTER_CONTRIBUTION_PATH' ) ) clusterContributionJson = json.load( clusterContributionFile ) clusterTypesHistory,stockDerived = self.get_stock_index_cluster( predictiveDate, stockIndex ) stockIndexProbability = 0 for key in clusterContributionJson[stockIndex].keys(): if key == str( clusterType ): "Search from the Cluster contribution Matrix to get the contribution probability" stockIndexProbability = stockIndexProbability + math.log( float( clusterContributionJson[stockIndex][key][int( clusterTypesHistory[0] ) - 1][2] ) ) + math.log( float( clusterContributionJson[stockIndex][key][int( clusterTypesHistory[1] ) - 1][1] ) ) + math.log( float( clusterContributionJson[stockIndex][key][int( clusterTypesHistory[2] ) - 1][0] ) ) + math.log( float( clusterJson[str( clusterType )] ) ) return stockIndexProbability,stockDerived except Exception as e: log.info( traceback.format_exc()) log.info( "Error in computing stock index probability: %s" % e.args)
def get_company_list(): comDir = common.get_configuration("model", "COMPANY_MEMBER") sfile = os.listdir(comDir) companyList = {} for fi in sfile: with open(comDir+"/"+fi) as comFile: lines = comFile.readlines() stockIndex = lines[1].replace("\r","").replace("\n","").split(",")[1].replace(" Index","") if stockIndex not in companyList: companyList[stockIndex] = [] for line in lines[2:]: infos = line.replace("\r","").replace("\n","").split(",") companyName = infos[2] tmps = companyName.split(" ") companyName = " ".join(tmps[:len(tmps)-1 if len(tmps)>1 else len(tmps)]) if companyName not in companyList[stockIndex]: companyList[stockIndex].append(companyName) companyList[stockIndex].append(stockIndex) desFile = common.get_configuration("model", "COMPANY_LIST") with open(desFile,"w") as output: jsStr = json.dumps(companyList) output.write(jsStr)
def create_match_rule(rule_name): comListFile = common.get_configuration("training", rule_name) comList = json.load(open(comListFile)) rule = "(" for stock in comList: for company in comList[stock]: company.replace("\\.","\\\\.") "check If the country name only contain one word, then we will add blank before and after the name to avoid the sub matching" if company.find(" ") < 0: eachRule = " " + company + " " + "|" else: eachRule = company + "|" rule += eachRule rule = rule[0:len(rule)-1] + ")" return rule.lower()
def create_match_rule(): comListFile = common.get_configuration("model", "COMPANY_LIST") comList = json.load(open(comListFile)) rule = "(" for stock in comList: for company in comList[stock]: company.replace("\\.", "\\\\.") "check If the company name only contain one word, then we will add blank before and after the name" if company.find(" ") < 0: eachRule = " " + company + " " + "|" else: eachRule = company + "|" rule += eachRule rule = rule[0 : len(rule) - 1] + ")" return rule
def create_match_rule(): comListFile = common.get_configuration("model", "COMPANY_LIST") comList = json.load(open(comListFile)) rule = "(" for stock in comList: for company in comList[stock]: company.replace("\\.", "\\\\.") "check If the company name only contain one word, then we will add blank before and after the name" if company.find(" ") < 0: eachRule = " " + company + " " + "|" else: eachRule = company + "|" rule += eachRule rule = rule[0:len(rule) - 1] + ")" return rule
def import_history(): hisFile = common.get_configuration("training", "HISTORICAL_STOCK_JSON") raw_price_list = [] with open(hisFile,'r') as raw_file: lines = raw_file.readlines() for line in lines: raw_data = json.loads(line.replace("\n","").replace("\r","")) raw_price_list.append(raw_data) conn = common.getDBConnection() #process data one by one for raw_data in raw_price_list: process(conn,raw_data) if conn: conn.commit()
def enumberate_clusters( self , stockIndex ): try: clusterFile = open( common.get_configuration( "model", 'CLUSTER_PROBABILITY_PATH' ) ) clusterJson = json.load( clusterFile ) clustersList = [] clusterProbability = {} for clusterKey in clusterJson.keys(): if clusterKey == stockIndex: clusterProbability = clusterJson[clusterKey] break for clusterKey in clusterProbability.keys(): clustersList.append( clusterKey ) return clustersList except Exception as e: log.info( traceback.format_exc()) log.info( "Error: %s" % e.args)
def enumberate_clusters(self, stockIndex): try: clusterFile = open( common.get_configuration("model", 'CLUSTER_PROBABILITY_PATH')) clusterJson = json.load(clusterFile) clustersList = [] clusterProbability = {} for clusterKey in clusterJson.keys(): if clusterKey == stockIndex: clusterProbability = clusterJson[clusterKey] break for clusterKey in clusterProbability.keys(): clustersList.append(clusterKey) return clustersList except Exception as e: print traceback.format_exc() print "Error: %s" % e.args
def execute(date,cfgPath): init(cfgPath) enricheDa = ed.Enriched_Data(cfgPath) obj = enricheDa.enrich_all_stock(date) warningList = [] for item in obj: warning = warningCheck(item) if warning is not None: warningList.append(warning) #push warning to ZMQ port = common.get_configuration("info", "ZMQ_PORT") with queue.open(port, 'w', capture=True) as outq: for warning in warningList: outq.write(json.dumps(warning, encoding='utf8')) return warningList
def get_trend_type(rawIndexData): """ Computing current day's trend changeType, compareing change percent to the trend range, Choose the nearnes trend as current day's changeType """ "Load the trend changeType range file" rangeFilePath = common.get_configuration("model", "TREND_RANGE_FILE") tFile = open(rangeFilePath) trendsJson = json.load(tFile) tFile.close() "Get the indicated stock range" stockIndex = rawIndexData["name"] tJson = trendsJson[stockIndex] "Computing change percent" lastPrice = float(rawIndexData["currentValue"]) preLastPrice = float(rawIndexData["previousCloseValue"]) changePercent = round((lastPrice - preLastPrice)/preLastPrice,4) distance = 10000 trendType = None for changeType in tJson: tmpDistance = min(abs(changePercent-tJson[changeType][0]),abs(changePercent-tJson[changeType][1])) if tmpDistance < distance: distance = tmpDistance trendType = changeType #According the current change percent to adjust the range of trend type bottom = tJson[trendType][0] top = tJson[trendType][1] if changePercent > top: top = changePercent if changePercent < bottom: bottom = changePercent trendsJson[stockIndex][trendType][0] = bottom trendsJson[stockIndex][trendType][1] = top with open(rangeFilePath,"w") as rangeFile: rangeFile.write(json.dumps(trendsJson)) return trendType
def dailySigmaTrends(stockIndex,cluster,m30,m90,std30,std90,curValue): #computing the bottom and upper line for daily sigma event s4Bottom = m30 - 4*std30 s4Upper = m30 + 4*std30 s3Bottom = m90 - 3*std90 s3Upper = m90 + 3*std90 bottom = s4Bottom upper = s4Upper if s4Bottom >= s3Bottom: bottom = s3Bottom if s3Upper <= s4Upper: upper = s3Upper #Get the span of input cluster """ One point needed to be changed later: currently we just merge the two type of extreme into one trend type 7, and we need to divide type 7 into type 7 and 11 """ trendRangePath = common.get_configuration("model", "TREND_RANGE_FILE") clusterDis = json.load(open(trendRangePath)) #get the span of the input trend type cBottom = 0.0 cUpper = 0.0 clusters = clusterDis[stockIndex] for clu in clusters: if clu == cluster: cBottom = clusters[clu][0] * curValue cUpper = clusters[clu][1] * curValue #If Nothing happen, the eventType will be 0000 eventType = "0000" if cBottom <= bottom: eventType = "0412" if cUpper >= upper: eventType = "0411" #If the predictive trends is the extreme value(Type == 1 and 6) #If previous day is not extreme sigma day, then predict that the next day will be extreme day # if eventType != "0000": # print "eventType:%s cBottom: %0.4f, bottom:%0.4f, cUpper:%0.4f, upper:%0.4f" %(eventType,cBottom,bottom,cUpper,upper) return eventType,cBottom,cUpper
def get_trend_type(stockIndex,changePercent): """ Computing current day's trend type, compareing change percent to the trend range, Choose the nearnes trend as current day's type """ "Load the trend type range file" rangeFilePath = common.get_configuration("model", "TREND_RANGE_FILE") tFile = open(rangeFilePath) trendsJson = json.load(tFile) tJson = trendsJson[stockIndex] distance = 10000 trendType = None for type in tJson: tmpDistance = min(abs(changePercent-tJson[type][0]),abs(changePercent-tJson[type][1])) if tmpDistance < distance: distance = tmpDistance trendType = type return trendType
def get_trend_type(stockIndex, changePercent): """ Computing current day's trend type, compareing change percent to the trend range, Choose the nearnes trend as current day's type """ "Load the trend type range file" rangeFilePath = common.get_configuration("model", "TREND_RANGE_FILE") tFile = open(rangeFilePath) trendsJson = json.load(tFile) tJson = trendsJson[stockIndex] distance = 10000 trendType = None for type in tJson: tmpDistance = min(abs(changePercent - tJson[type][0]), abs(changePercent - tJson[type][1])) if tmpDistance < distance: distance = tmpDistance trendType = type return trendType
def get_stock_news_data(self, predictiveDate, stockIndex): con = None try: con = common.getDBConnection() cur = con.cursor() "Get past 3 day's news before Predictive Day " predictiveDate = datetime.strptime(predictiveDate, "%Y-%m-%d") startDay = (predictiveDate - timedelta(days=3)).strftime("%Y-%m-%d") endDay = (predictiveDate - timedelta(days=1)).strftime("%Y-%m-%d") sqlquery = "select content,embers_id from t_daily_enrichednews where post_date>=? and post_date<=? and stock_index=?" cur.execute(sqlquery, ([startDay, endDay, stockIndex])) articleRecords = cur.fetchall() "Initiate the words List" vocabularyFile = open( common.get_configuration("model", 'VOCABULARY_FILE')) wordLines = vocabularyFile.readlines() termList = {} for line in wordLines: line = line.replace("\n", "").replace("\r", "") termList[line] = 0 newsDerived = [] "Merge all the term in each record" for record in articleRecords: jsonRecord = json.loads(record[0]) newsDerived.append(record[1]) for curWord in jsonRecord: if curWord in termList: termList[ curWord] = termList[curWord] + jsonRecord[curWord] return termList, newsDerived except sqlite.Error, e: print traceback.format_exc() print "Error: %s" % e.args[0]
def import_historical_stock(): #get the historical stock dir stockFileDir = common.get_configuration("training", "HISTORICAL_STOCK") fileNames = os.listdir(stockFileDir) con = common.getDBConnection() cur = con.cursor() #clear the database clearSql = "delete from t_daily_stockindices" cur.execute(clearSql) con.commit() sql = "insert into t_daily_stockindices (sub_sequence,stock_index,date,last_price,one_day_change) values (?,?,?,?,?)"; for filename in fileNames: fpath = stockFileDir + "/" + filename stock = filename.split(".")[0] subSequence = 0 with open(fpath,"r") as stockFile: lines = stockFile.readlines()[2:] for line in lines: line = line.replace("\r","").replace("\n","") date = line.split(",")[0] lastPrice = line.split(",")[1] previousLastPrice = line.split(",")[2] if lastPrice == "#N/A N/A" or previousLastPrice == "#N/A N/A": continue lastPrice = float(lastPrice) previousLastPrice = float(previousLastPrice) date = datetime.strptime(date,"%m/%d/%Y").strftime("%Y-%m-%d") oneDayChange = round(lastPrice - previousLastPrice,4) subSequence = subSequence + 1 cur.execute(sql,(subSequence,stock,date,lastPrice,oneDayChange,)) if subSequence % 300 == 0: con.commit() con.commit()
def get_trend_type(stockIndex,changePercent): """ Computing current day's trend type, compareing change percent to the trend range, Choose the nearnes trend as current day's type """ "Load the trend type range file" rangeFilePath = common.get_configuration("model", "TREND_RANGE_FILE") tFile = open(rangeFilePath) trendsJson = json.load(tFile) tFile.close() tJson = trendsJson[stockIndex] distance = 10000 trendType = None for changeType in tJson: tmpDistance = min(abs(changePercent-tJson[changeType][0]),abs(changePercent-tJson[changeType][1])) if tmpDistance < distance: distance = tmpDistance trendType = changeType #According the current change percent to adjust the range of trend type bottom = tJson[trendType][0] top = tJson[trendType][1] if changePercent > top: top = changePercent if changePercent < bottom: bottom = changePercent trendsJson[stockIndex][trendType][0] = bottom trendsJson[stockIndex][trendType][1] = top with open(rangeFilePath,"w") as rangeFile: rangeFile.write(json.dumps(trendsJson)) return trendType
def get_stock_news_data( self, predictiveDate , stockIndex ): con = None try: con = common.getDBConnection() cur = con.cursor() "Get past 3 day's news before Predictive Day " predictiveDate = datetime.strptime( predictiveDate, "%Y-%m-%d" ) startDay = ( predictiveDate - timedelta( days = 3 ) ).strftime( "%Y-%m-%d" ) endDay = ( predictiveDate - timedelta( days = 1 ) ).strftime( "%Y-%m-%d" ) sqlquery = "select content,embers_id from t_daily_enrichednews where post_date>=? and post_date<=? and stock_index=?" cur.execute( sqlquery, ([startDay, endDay , stockIndex])) articleRecords = cur.fetchall() "Initiate the words List" vocabularyFile = open(common.get_configuration( "training", 'VOCABULARY_FILE')) wordLines = vocabularyFile.readlines() termList = {} for line in wordLines: line = line.replace("\n","").replace("\r","") termList[line] = 0 newsDerived = [] "Merge all the term in each record" for record in articleRecords: jsonRecord = json.loads(record[0]) newsDerived.append(record[1]) for curWord in jsonRecord: if curWord in termList: termList[curWord] = termList[curWord] + jsonRecord[curWord] return termList,newsDerived except sqlite.Error, e: log.info( traceback.format_exc()) log.info( "Error: %s" % e.args[0])
def compute_stock_news_probability( self, predictiveDate, clusterType , stockIndex ): try: termContributionFile = open( common.get_configuration( "model", 'TERM_CONTRIBUTION_PATH' ) ) termContributionJson = json.load( termContributionFile ) terms,newsDerived = self.get_stock_news_data( predictiveDate , stockIndex ) termContributionProbability = 0 if stockIndex in termContributionJson: for termClusterType in termContributionJson[stockIndex].keys(): if termClusterType == str( clusterType ): stermlist = termContributionJson[stockIndex][termClusterType] #print stermlist for word, count in terms.iteritems(): if word in stermlist: #print word termContributionProbability = count * math.log( float( termContributionJson[stockIndex][termClusterType][word] ) ) del stermlist[word] return termContributionProbability,newsDerived except IOError: log.info( "Can't open the file:stock_raw_data.json.") except Exception as e: log.info( traceback.format_exc()) log.info( "Error in computing stock news probability: %s" % e.message) return None
def create_vocabulary(): "Read the Negative Finance Dictionary" negativeFilePath = common.get_configuration("model", "NEGATIVE_DIC") negativeDoc = open(negativeFilePath).readlines() stemmer = nltk.stem.snowball.SnowballStemmer('english') negativeWords = [] for l in negativeDoc: negativeWords.append(stemmer.stem(l.replace("\n", ""))) fdist = nltk.FreqDist(negativeWords) negKeywords = [] for k in fdist: negKeywords.append(k) "Read the Positive Finance Dictionary" positiveFilePath = common.get_configuration("model", "POSITIVE_DIC") positiveDoc = open(positiveFilePath).readlines() postiveWords = [] for line in positiveDoc: postiveWords.append(stemmer.stem(line.replace("\n", ""))) fdist = nltk.FreqDist(postiveWords) posiKeyWords = [] for posWord in fdist: posiKeyWords.append(posWord) "Read the archived news to count the top words" BBNewsPath = common.get_configuration("model", "TRAINING_NEWS_FILE") news = open(BBNewsPath) jsonNews = json.load(news) #remove all the duplicated articles newsWarehouse = {} for stockIndex in jsonNews: for articleId in jsonNews[stockIndex]: newsWarehouse[articleId] = jsonNews[stockIndex][articleId] keyWords = [] for w in negKeywords: keyWords.append(w) for w in posiKeyWords: keyWords.append(w) print "Over Here" wordFreq = {} flatCount = 0 for news in newsWarehouse: flatCount = flatCount + 1 doc = newsWarehouse[news] #print doc tokens = nltk.word_tokenize(doc["content"]) stemmer = nltk.stem.snowball.SnowballStemmer('english') words = [ w.lower() for w in tokens if w not in [",", ".", ")", "]", "(", "[", "*", ";", "...", ":", "&", '"'] and not w.isdigit() ] words = [ w for w in words if w.encode("utf8") not in nltk.corpus.stopwords.words('english') ] stemmedWords = [stemmer.stem(w) for w in words] fdist = nltk.FreqDist(stemmedWords) for word in keyWords: if word in fdist: if word in wordFreq: wordFreq[word] = wordFreq[word] + fdist[word] else: wordFreq[word] = fdist[word] print wordFreq #sorted_obj2 = wordFreq.iteritems() sorted_obj2 = sorted(wordFreq.items(), key=lambda x: x[1], reverse=True) print sorted_obj2[0][1] "Write the vocabulary list to File" vocabularyFile = common.get_configuration("model", "VOCABULARY_FILE") output = open(vocabularyFile, "w") i = 1 for word in sorted_obj2: if i > 150: break else: output.write(word[0]) output.write("\n") i = i + 1 output.close()
def clusterSet(traningEndDate): con = common.getDBConnection() cur = con.cursor() finalClusterRecord = [] stockList = [ "MERVAL", "MEXBOL", "CHILE65", "BVPSBVPS", "COLCAP", "CRSMBCT", "IBOV", "IGBVL" ] finalOrderCluster = {} for stock in stockList: sql = "select embers_id,sub_sequence,date,last_price,one_day_change,round(one_day_change/(last_price-one_day_change),4),stock_index from t_daily_stockindex where stock_index=? and date<=?" cur.execute(sql, (stock, traningEndDate)) rows = cur.fetchall() changes = [row[5] for row in rows] fdist = nltk.FreqDist(changes) clusterS = [(0, x) for x in fdist.keys()] print "StartTime: ", datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S") c1 = KMeansClustering(clusterS) print "MiddleTime: ", datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S") cluster = c1.getclusters(20) # cluster = [[(0, 0.0862), (0, 0.088), (0, 0.0914), (0, 0.094), (0, 0.0957), (0, 0.097), (0, 0.1017), (0, 0.1024), (0, 0.0774), (0, 0.0882), (0, 0.0783), (0, 0.11), (0, 0.0807), (0, 0.0813), (0, 0.1367), (0, 0.0831), (0, 0.0836), (0, 0.0855), (0, 0.0879), (0, 0.0912), (0, 0.0763), (0, 0.1046), (0, 0.0784), (0, 0.0815), (0, 0.1464), (0, 0.1987), (0, 0.1053), (0, 0.1101), (0, 0.1176), (0, 0.0868), (0, 0.1342), (0, 0.1466), (0, 0.0761), (0, 0.0772)], [(0, -0.0001), (0, 0.0), (0, 0.0001), (0, -0.0002), (0, -0.0003), (0, -0.0004), (0, -0.0005), (0, -0.0006), (0, 0.0002), (0, 0.0003), (0, 0.0004), (0, 0.0005), (0, 0.0006), (0, 0.0007), (0, 0.0008), (0, 0.0009), (0, 0.001), (0, 0.0011), (0, 0.0012), (0, 0.0013), (0, 0.0014), (0, 0.0015), (0, 0.0016), (0, 0.0017), (0, 0.0018), (0, 0.0019), (0, 0.002), (0, 0.0021), (0, 0.0022), (0, 0.0023), (0, 0.0024), (0, 0.0025), (0, 0.0026), (0, 0.0027), (0, 0.0028), (0, 0.0029), (0, 0.003), (0, 0.0031), (0, 0.0032), (0, 0.0033), (0, 0.0034), (0, 0.0035), (0, 0.0036), (0, 0.0037), (0, 0.0038), (0, 0.0039), (0, 0.004), (0, 0.0041), (0, 0.0042), (0, 0.0043), (0, 0.0044), (0, 0.0045), (0, 0.0046), (0, 0.0047), (0, 0.0048), (0, 0.0049), (0, 0.005), (0, -0.0007), (0, -0.0008)], [(0, 0.0297), (0, 0.0296), (0, 0.0298), (0, 0.0299), (0, 0.0301), (0, 0.03), (0, 0.0303), (0, 0.0302), (0, 0.0304), (0, 0.0305), (0, 0.0306), (0, 0.0308), (0, 0.0307), (0, 0.0309), (0, 0.031), (0, 0.0311), (0, 0.0313), (0, 0.0314), (0, 0.0312), (0, 0.0316), (0, 0.0315), (0, 0.0317), (0, 0.0318), (0, 0.032), (0, 0.0319), (0, 0.0322), (0, 0.0321), (0, 0.0324), (0, 0.0323), (0, 0.0326), (0, 0.0325), (0, 0.0328), (0, 0.033), (0, 0.0327), (0, 0.0332), (0, 0.0331), (0, 0.0333), (0, 0.0329), (0, 0.0335), (0, 0.0336), (0, 0.0334), (0, 0.0337), (0, 0.0338), (0, 0.0339), (0, 0.034), (0, 0.0341), (0, 0.0342), (0, 0.0343), (0, 0.0344), (0, 0.0345), (0, 0.0346), (0, 0.0348), (0, 0.0349), (0, 0.035), (0, 0.0351), (0, 0.0352), (0, 0.0355), (0, 0.0356), (0, 0.0358), (0, 0.0357), (0, 0.0359), (0, 0.036), (0, 0.0361), (0, 0.0362), (0, 0.0363), (0, 0.0365)], [(0, 0.0559), (0, 0.0564), (0, 0.0568), (0, 0.0571), (0, 0.0573), (0, 0.0579), (0, 0.0578), (0, 0.0581), (0, 0.0587), (0, 0.0589), (0, 0.0595), (0, 0.0591), (0, 0.0594), (0, 0.0604), (0, 0.0598), (0, 0.06), (0, 0.0602), (0, 0.0609), (0, 0.0612), (0, 0.059), (0, 0.0606), (0, 0.0614), (0, 0.0619), (0, 0.0625), (0, 0.0628), (0, 0.0615), (0, 0.0637), (0, 0.0633), (0, 0.0634), (0, 0.0636), (0, 0.0654), (0, 0.0658), (0, 0.0659), (0, 0.0669), (0, 0.0667), (0, 0.0664), (0, 0.067), (0, 0.0675), (0, 0.0673), (0, 0.0676), (0, 0.0686), (0, 0.07), (0, 0.0697), (0, 0.0709), (0, 0.0716), (0, 0.0717), (0, 0.0738), (0, 0.0747)], [(0, -0.0133), (0, -0.0132), (0, -0.0135), (0, -0.0134), (0, -0.0137), (0, -0.0138), (0, -0.0136), (0, -0.014), (0, -0.0139), (0, -0.0142), (0, -0.0143), (0, -0.0144), (0, -0.0141), (0, -0.0145), (0, -0.0146), (0, -0.0147), (0, -0.0148), (0, -0.0149), (0, -0.015), (0, -0.0151), (0, -0.0152), (0, -0.0153), (0, -0.0154), (0, -0.0155), (0, -0.0156), (0, -0.0157), (0, -0.0158), (0, -0.0159), (0, -0.016), (0, -0.0161), (0, -0.0162), (0, -0.0163), (0, -0.0164), (0, -0.0165), (0, -0.0166), (0, -0.0167), (0, -0.0168), (0, -0.0169), (0, -0.017), (0, -0.0171), (0, -0.0172), (0, -0.0173), (0, -0.0174), (0, -0.0175), (0, -0.0176), (0, -0.0177), (0, -0.0178), (0, -0.0179), (0, -0.018), (0, -0.0181), (0, -0.0182), (0, -0.0183), (0, -0.0184), (0, -0.0185), (0, -0.0186), (0, -0.0187), (0, -0.0188), (0, -0.0189), (0, -0.019), (0, -0.0191), (0, -0.0192), (0, -0.0193), (0, -0.0194), (0, -0.0195)], [(0, 0.0448), (0, 0.0451), (0, 0.0452), (0, 0.0446), (0, 0.0447), (0, 0.0456), (0, 0.045), (0, 0.0455), (0, 0.0462), (0, 0.0459), (0, 0.0461), (0, 0.0466), (0, 0.046), (0, 0.0467), (0, 0.0445), (0, 0.0458), (0, 0.0464), (0, 0.0477), (0, 0.0463), (0, 0.0472), (0, 0.0478), (0, 0.0457), (0, 0.0476), (0, 0.0481), (0, 0.0484), (0, 0.0488), (0, 0.0483), (0, 0.0487), (0, 0.0471), (0, 0.0482), (0, 0.0496), (0, 0.0474), (0, 0.0495), (0, 0.0485), (0, 0.0504), (0, 0.0505), (0, 0.0506), (0, 0.0501), (0, 0.0509), (0, 0.0508), (0, 0.051), (0, 0.0515), (0, 0.0516), (0, 0.052), (0, 0.0522), (0, 0.0524), (0, 0.053), (0, 0.0531), (0, 0.0534), (0, 0.0535), (0, 0.0536), (0, 0.0537), (0, 0.0538), (0, 0.0541), (0, 0.0542), (0, 0.0545), (0, 0.0546), (0, 0.0548), (0, 0.055)], [(0, 0.0172), (0, 0.017), (0, 0.0173), (0, 0.0174), (0, 0.0171), (0, 0.0177), (0, 0.0175), (0, 0.0178), (0, 0.0179), (0, 0.0176), (0, 0.0181), (0, 0.018), (0, 0.0183), (0, 0.0182), (0, 0.0186), (0, 0.0185), (0, 0.0187), (0, 0.0184), (0, 0.0189), (0, 0.0188), (0, 0.019), (0, 0.0191), (0, 0.0192), (0, 0.0194), (0, 0.0193), (0, 0.0196), (0, 0.0195), (0, 0.0197), (0, 0.0199), (0, 0.0198), (0, 0.02), (0, 0.0201), (0, 0.0202), (0, 0.0204), (0, 0.0205), (0, 0.0206), (0, 0.0203), (0, 0.0208), (0, 0.0207), (0, 0.021), (0, 0.0209), (0, 0.0211), (0, 0.0213), (0, 0.0212), (0, 0.0214), (0, 0.0215), (0, 0.0216), (0, 0.0217), (0, 0.0218), (0, 0.0219), (0, 0.022), (0, 0.0221), (0, 0.0222), (0, 0.0223), (0, 0.0224), (0, 0.0225), (0, 0.0226), (0, 0.0227), (0, 0.0228), (0, 0.0229), (0, 0.023), (0, 0.0231)], [(0, -0.0408), (0, -0.041), (0, -0.0411), (0, -0.0412), (0, -0.0413), (0, -0.0415), (0, -0.0416), (0, -0.0417), (0, -0.0419), (0, -0.042), (0, -0.0423), (0, -0.0424), (0, -0.0418), (0, -0.0425), (0, -0.0428), (0, -0.043), (0, -0.0431), (0, -0.0432), (0, -0.0433), (0, -0.0434), (0, -0.0436), (0, -0.0438), (0, -0.0439), (0, -0.044), (0, -0.0442), (0, -0.0441), (0, -0.0446), (0, -0.0443), (0, -0.0448), (0, -0.0447), (0, -0.045), (0, -0.0449), (0, -0.0453), (0, -0.0451), (0, -0.0454), (0, -0.0455), (0, -0.0458), (0, -0.0456), (0, -0.0459), (0, -0.0463), (0, -0.0461), (0, -0.046), (0, -0.0464), (0, -0.0465), (0, -0.0467), (0, -0.0462), (0, -0.0466), (0, -0.0472), (0, -0.0469), (0, -0.0475), (0, -0.0473), (0, -0.0478), (0, -0.0477), (0, -0.0476), (0, -0.0482), (0, -0.0481), (0, -0.0483), (0, -0.0487), (0, -0.0488), (0, -0.049), (0, -0.0492), (0, -0.0494)], [(0, -0.0261), (0, -0.0262), (0, -0.0263), (0, -0.0264), (0, -0.0266), (0, -0.0265), (0, -0.0267), (0, -0.0268), (0, -0.0269), (0, -0.0271), (0, -0.027), (0, -0.0273), (0, -0.0272), (0, -0.0275), (0, -0.0274), (0, -0.0277), (0, -0.0278), (0, -0.0276), (0, -0.0279), (0, -0.0281), (0, -0.028), (0, -0.0283), (0, -0.0282), (0, -0.0284), (0, -0.0285), (0, -0.0286), (0, -0.0287), (0, -0.0288), (0, -0.0289), (0, -0.0291), (0, -0.0292), (0, -0.0293), (0, -0.029), (0, -0.0294), (0, -0.0295), (0, -0.0297), (0, -0.0296), (0, -0.0299), (0, -0.03), (0, -0.0301), (0, -0.0302), (0, -0.0298), (0, -0.0303), (0, -0.0304), (0, -0.0307), (0, -0.0305), (0, -0.0308), (0, -0.031), (0, -0.0309), (0, -0.0312), (0, -0.0311), (0, -0.0313), (0, -0.0315), (0, -0.0314), (0, -0.0316), (0, -0.0317), (0, -0.0319), (0, -0.0318), (0, -0.032), (0, -0.0321), (0, -0.0322), (0, -0.0323), (0, -0.0325), (0, -0.0326), (0, -0.0327), (0, -0.0328), (0, -0.0329)], [(0, -0.0619), (0, -0.0622), (0, -0.0627), (0, -0.064), (0, -0.0645), (0, -0.065), (0, -0.0653), (0, -0.0651), (0, -0.0659), (0, -0.0663), (0, -0.0665), (0, -0.066), (0, -0.0666), (0, -0.0674), (0, -0.0671), (0, -0.0684), (0, -0.0672), (0, -0.0691), (0, -0.0689), (0, -0.0692), (0, -0.0701), (0, -0.0698), (0, -0.0709), (0, -0.0715), (0, -0.0717), (0, -0.0722), (0, -0.0734), (0, -0.0741), (0, -0.0749), (0, -0.0763), (0, -0.0772), (0, -0.0758), (0, -0.0762), (0, -0.0787), (0, -0.0788), (0, -0.0759), (0, -0.0775), (0, -0.0808)], [(0, -0.0905), (0, -0.1081), (0, -0.1018), (0, -0.094), (0, -0.0937), (0, -0.0936), (0, -0.0927), (0, -0.0919), (0, -0.0863), (0, -0.1593), (0, -0.1245), (0, -0.0847), (0, -0.1215), (0, -0.1139), (0, -0.1099), (0, -0.1068), (0, -0.0868), (0, -0.0856), (0, -0.0854), (0, -0.0837), (0, -0.0822), (0, -0.0877), (0, -0.1241), (0, -0.1073), (0, -0.1065), (0, -0.1011), (0, -0.0835)], [(0, -0.0196), (0, -0.0198), (0, -0.0197), (0, -0.0199), (0, -0.02), (0, -0.0201), (0, -0.0202), (0, -0.0204), (0, -0.0203), (0, -0.0205), (0, -0.0206), (0, -0.0208), (0, -0.0207), (0, -0.021), (0, -0.0209), (0, -0.0212), (0, -0.0211), (0, -0.0214), (0, -0.0215), (0, -0.0213), (0, -0.0217), (0, -0.0216), (0, -0.0219), (0, -0.0218), (0, -0.0221), (0, -0.022), (0, -0.0223), (0, -0.0222), (0, -0.0225), (0, -0.0224), (0, -0.0227), (0, -0.0226), (0, -0.0229), (0, -0.0228), (0, -0.023), (0, -0.0231), (0, -0.0232), (0, -0.0234), (0, -0.0233), (0, -0.0236), (0, -0.0235), (0, -0.0238), (0, -0.0237), (0, -0.024), (0, -0.0239), (0, -0.0242), (0, -0.0241), (0, -0.0244), (0, -0.0243), (0, -0.0245), (0, -0.0246), (0, -0.0247), (0, -0.0248), (0, -0.0249), (0, -0.025), (0, -0.0251), (0, -0.0252), (0, -0.0253), (0, -0.0254), (0, -0.0255), (0, -0.0256), (0, -0.0257), (0, -0.0258), (0, -0.0259), (0, -0.026)], [(0, -0.05), (0, -0.0504), (0, -0.0499), (0, -0.0507), (0, -0.0501), (0, -0.0509), (0, -0.0513), (0, -0.0505), (0, -0.051), (0, -0.0508), (0, -0.0517), (0, -0.0519), (0, -0.0516), (0, -0.052), (0, -0.0524), (0, -0.0525), (0, -0.0526), (0, -0.0528), (0, -0.0529), (0, -0.0533), (0, -0.0538), (0, -0.0535), (0, -0.0532), (0, -0.0542), (0, -0.0543), (0, -0.0546), (0, -0.054), (0, -0.055), (0, -0.0556), (0, -0.0545), (0, -0.056), (0, -0.0554), (0, -0.0567), (0, -0.0563), (0, -0.0571), (0, -0.0572), (0, -0.0576), (0, -0.0579), (0, -0.058), (0, -0.0584), (0, -0.0581), (0, -0.0588), (0, -0.0589), (0, -0.0591), (0, -0.0593), (0, -0.0596), (0, -0.0595), (0, -0.0601), (0, -0.0613), (0, -0.0614)], [(0, -0.001), (0, -0.0012), (0, -0.0017), (0, -0.0016), (0, -0.0013), (0, -0.0011), (0, -0.002), (0, -0.0018), (0, -0.0015), (0, -0.0014), (0, -0.0019), (0, -0.0021), (0, -0.0022), (0, -0.0023), (0, -0.0009), (0, -0.0024), (0, -0.0025), (0, -0.0026), (0, -0.0027), (0, -0.0028), (0, -0.0029), (0, -0.003), (0, -0.0031), (0, -0.0032), (0, -0.0033), (0, -0.0034), (0, -0.0035), (0, -0.0036), (0, -0.0037), (0, -0.0038), (0, -0.0039), (0, -0.004), (0, -0.0041), (0, -0.0042), (0, -0.0043), (0, -0.0044), (0, -0.0045), (0, -0.0046), (0, -0.0047), (0, -0.0048), (0, -0.0049), (0, -0.005), (0, -0.0051), (0, -0.0052), (0, -0.0053), (0, -0.0054), (0, -0.0055), (0, -0.0056), (0, -0.0057), (0, -0.0058), (0, -0.0059), (0, -0.006), (0, -0.0061), (0, -0.0062), (0, -0.0063), (0, -0.0064), (0, -0.0065), (0, -0.0066), (0, -0.0067), (0, -0.0068), (0, -0.0069)], [(0, -0.033), (0, -0.0332), (0, -0.0331), (0, -0.0334), (0, -0.0333), (0, -0.0336), (0, -0.0337), (0, -0.0335), (0, -0.0338), (0, -0.034), (0, -0.0339), (0, -0.0342), (0, -0.0343), (0, -0.0341), (0, -0.0344), (0, -0.0345), (0, -0.0346), (0, -0.0347), (0, -0.0348), (0, -0.035), (0, -0.0349), (0, -0.0351), (0, -0.0352), (0, -0.0353), (0, -0.0354), (0, -0.0355), (0, -0.0357), (0, -0.0356), (0, -0.0358), (0, -0.0359), (0, -0.0361), (0, -0.036), (0, -0.0363), (0, -0.0362), (0, -0.0365), (0, -0.0366), (0, -0.0364), (0, -0.0368), (0, -0.0369), (0, -0.0372), (0, -0.0371), (0, -0.0367), (0, -0.0375), (0, -0.0373), (0, -0.0376), (0, -0.0374), (0, -0.0378), (0, -0.038), (0, -0.0379), (0, -0.0377), (0, -0.0382), (0, -0.0384), (0, -0.0383), (0, -0.0386), (0, -0.0381), (0, -0.0387), (0, -0.0389), (0, -0.0385), (0, -0.039), (0, -0.0391), (0, -0.0388), (0, -0.0392), (0, -0.0395), (0, -0.0393), (0, -0.0397), (0, -0.0398), (0, -0.0396), (0, -0.0399), (0, -0.0402), (0, -0.0401), (0, -0.0403), (0, -0.0406), (0, -0.0407)], [(0, 0.0232), (0, 0.0233), (0, 0.0234), (0, 0.0235), (0, 0.0237), (0, 0.0236), (0, 0.0238), (0, 0.0239), (0, 0.024), (0, 0.0241), (0, 0.0242), (0, 0.0243), (0, 0.0244), (0, 0.0245), (0, 0.0247), (0, 0.0248), (0, 0.0246), (0, 0.0249), (0, 0.025), (0, 0.0251), (0, 0.0253), (0, 0.0252), (0, 0.0255), (0, 0.0254), (0, 0.0257), (0, 0.0256), (0, 0.0259), (0, 0.026), (0, 0.0258), (0, 0.0261), (0, 0.0262), (0, 0.0264), (0, 0.0265), (0, 0.0263), (0, 0.0267), (0, 0.0268), (0, 0.0266), (0, 0.027), (0, 0.0269), (0, 0.0271), (0, 0.0272), (0, 0.0274), (0, 0.0273), (0, 0.0276), (0, 0.0275), (0, 0.0277), (0, 0.0278), (0, 0.0279), (0, 0.0281), (0, 0.0282), (0, 0.0283), (0, 0.0284), (0, 0.0285), (0, 0.0286), (0, 0.0287), (0, 0.0288), (0, 0.0289), (0, 0.029), (0, 0.0291), (0, 0.0292), (0, 0.0293), (0, 0.0294)], [(0, 0.011), (0, 0.0112), (0, 0.0113), (0, 0.0111), (0, 0.0115), (0, 0.0114), (0, 0.0117), (0, 0.0116), (0, 0.0118), (0, 0.0119), (0, 0.0121), (0, 0.0122), (0, 0.0123), (0, 0.0124), (0, 0.012), (0, 0.0126), (0, 0.0125), (0, 0.0128), (0, 0.0127), (0, 0.013), (0, 0.0129), (0, 0.0131), (0, 0.0133), (0, 0.0132), (0, 0.0135), (0, 0.0134), (0, 0.0136), (0, 0.0137), (0, 0.0138), (0, 0.014), (0, 0.0139), (0, 0.0142), (0, 0.0141), (0, 0.0143), (0, 0.0144), (0, 0.0145), (0, 0.0146), (0, 0.0147), (0, 0.0148), (0, 0.0149), (0, 0.015), (0, 0.0151), (0, 0.0153), (0, 0.0152), (0, 0.0154), (0, 0.0155), (0, 0.0156), (0, 0.0157), (0, 0.0158), (0, 0.0159), (0, 0.016), (0, 0.0161), (0, 0.0162), (0, 0.0163), (0, 0.0164), (0, 0.0165), (0, 0.0166), (0, 0.0167), (0, 0.0168), (0, 0.0169)], [(0, -0.007), (0, -0.0071), (0, -0.0072), (0, -0.0073), (0, -0.0074), (0, -0.0075), (0, -0.0076), (0, -0.0077), (0, -0.0078), (0, -0.0079), (0, -0.0081), (0, -0.008), (0, -0.0082), (0, -0.0083), (0, -0.0084), (0, -0.0085), (0, -0.0086), (0, -0.0087), (0, -0.0088), (0, -0.0089), (0, -0.009), (0, -0.0091), (0, -0.0092), (0, -0.0093), (0, -0.0094), (0, -0.0095), (0, -0.0096), (0, -0.0097), (0, -0.0098), (0, -0.0099), (0, -0.01), (0, -0.0101), (0, -0.0102), (0, -0.0103), (0, -0.0104), (0, -0.0105), (0, -0.0106), (0, -0.0107), (0, -0.0108), (0, -0.0109), (0, -0.011), (0, -0.0111), (0, -0.0112), (0, -0.0113), (0, -0.0114), (0, -0.0115), (0, -0.0116), (0, -0.0117), (0, -0.0118), (0, -0.0119), (0, -0.012), (0, -0.0121), (0, -0.0122), (0, -0.0123), (0, -0.0124), (0, -0.0125), (0, -0.0126), (0, -0.0127), (0, -0.0128), (0, -0.0129), (0, -0.013), (0, -0.0131)], [(0, 0.0051), (0, 0.0052), (0, 0.0053), (0, 0.0055), (0, 0.0054), (0, 0.0057), (0, 0.0056), (0, 0.0059), (0, 0.0058), (0, 0.0061), (0, 0.006), (0, 0.0062), (0, 0.0063), (0, 0.0064), (0, 0.0065), (0, 0.0066), (0, 0.0068), (0, 0.0069), (0, 0.0067), (0, 0.007), (0, 0.0072), (0, 0.0071), (0, 0.0073), (0, 0.0074), (0, 0.0075), (0, 0.0076), (0, 0.0077), (0, 0.0078), (0, 0.0079), (0, 0.008), (0, 0.0081), (0, 0.0082), (0, 0.0083), (0, 0.0084), (0, 0.0085), (0, 0.0086), (0, 0.0087), (0, 0.0088), (0, 0.0089), (0, 0.009), (0, 0.0091), (0, 0.0092), (0, 0.0093), (0, 0.0094), (0, 0.0095), (0, 0.0096), (0, 0.0097), (0, 0.0098), (0, 0.0099), (0, 0.01), (0, 0.0101), (0, 0.0102), (0, 0.0103), (0, 0.0104), (0, 0.0105), (0, 0.0106), (0, 0.0107), (0, 0.0108), (0, 0.0109)], [(0, 0.0369), (0, 0.0371), (0, 0.0367), (0, 0.037), (0, 0.0375), (0, 0.0373), (0, 0.0376), (0, 0.0372), (0, 0.0377), (0, 0.038), (0, 0.0379), (0, 0.0374), (0, 0.0381), (0, 0.0382), (0, 0.0378), (0, 0.0384), (0, 0.0386), (0, 0.0387), (0, 0.0385), (0, 0.0389), (0, 0.0391), (0, 0.039), (0, 0.0392), (0, 0.0394), (0, 0.0395), (0, 0.0396), (0, 0.0398), (0, 0.0399), (0, 0.04), (0, 0.0401), (0, 0.0404), (0, 0.0405), (0, 0.0406), (0, 0.0407), (0, 0.0408), (0, 0.0409), (0, 0.041), (0, 0.0411), (0, 0.0412), (0, 0.0414), (0, 0.0415), (0, 0.0416), (0, 0.0417), (0, 0.0419), (0, 0.042), (0, 0.0421), (0, 0.0422), (0, 0.0426), (0, 0.0428), (0, 0.0427), (0, 0.043), (0, 0.0429), (0, 0.0431), (0, 0.0433), (0, 0.0434), (0, 0.0435), (0, 0.0436), (0, 0.0438), (0, 0.0437), (0, 0.044), (0, 0.0442), (0, 0.0444)]] print "EndTime: ", datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S") namedCluster = {} i = 0 orderCluster = {} for clu in cluster: i = i + 1 namedCluster[i] = clu orderCluster[i] = [min(clu)[1], max(clu)[1]] for m in orderCluster: min1 = orderCluster[m][0] max1 = orderCluster[m][1] for n in orderCluster: min2 = orderCluster[n][0] max2 = orderCluster[n][1] if (min1 > min2 and min1 < max2) or (max1 > min2 and max1 < max2): print m, " intersect with ", n, " values: ", min1, max1, min2, max2 clusterR = [] for row in rows: for nc in namedCluster: if (0, row[5]) in namedCluster[nc]: newRow = list(row) newRow.append(nc) clusterR.append(newRow) finalClusterRecord.append(newRow) #insert the clusterR into Database insertSql = "insert into t_daily_enrichedIndex (embers_id,derived_from,sub_sequence,stock_index,date,last_price,one_day_change,change_percent,trend_type)values (?,?,?,?,?,?,?,?,?)" m = 0 for j in clusterR: contentStr = json.dumps(j) embersId = hashlib.sha1(contentStr).hexdigest() derivedFrom = "[" + str(j[0]) + "]" subsequenceId = j[1] postDate = j[2] lastPrice = j[3] oneDayChange = j[4] changePercent = j[5] stockIndex = j[6] trendType = j[7] cur.execute( insertSql, (embersId, derivedFrom, subsequenceId, stockIndex, postDate, lastPrice, oneDayChange, changePercent, trendType)) m = m + 1 if m % 1000 == 0: con.commit() con.commit() finalOrderCluster[stock] = orderCluster "Write the type range into a file" trendRangeFile = common.get_configuration("model", "TREND_RANGE_FILE") dataStr = json.dumps(finalOrderCluster) with open(trendRangeFile, "w") as output: output.write(dataStr) "Write the training data into file" trendSetRecordFile = common.get_configuration("model", "TRAINING_TREND_RECORDS") dataStr = json.dumps(finalClusterRecord) with open(trendSetRecordFile, "w") as output: output.write(dataStr) if con: con.close()
obj = enricheDa.enrich_all_stock(date) warningList = [] for item in obj: warning = warningCheck(item) if warning is not None: warningList.append(warning) return warningList if __name__ == "__main__": if len(sys.argv) == 3: startDay = sys.argv[1] endDay = sys.argv[2] startD = datetime.strptime(startDay, "%Y-%m-%d") endD = datetime.strptime(endDay, "%Y-%m-%d") resultFile = common.get_configuration("model", "TESTING_RESULT_FILE") warningResult = open(resultFile, "w") while startD <= endD: predictiveDay = datetime.strftime(startD, "%Y-%m-%d") warningList = execute(predictiveDay) if warningList is not None: for warning in warningList: warningResult.write(json.dumps(warning)) warningResult.write("\n") startD = startD + timedelta(days=1) warningResult.close() elif len(sys.argv) == 2: "The imput date format should be 'yyyy-mm-dd'" predictiveDay = sys.argv[1] warningList = execute(predictiveDay) elif len(sys.argv) == 1:
def compute_trend_contribution(): #read the trend segments file trendFileName = common.get_configuration("model", "TRAINING_TREND_RECORDS") trendFile = open(trendFileName) jsonTrend = json.load(trendFile) #Group Trend By StockIndex stockGroupTrend = {} for trend in jsonTrend: stockIndex = trend[6] if stockIndex not in stockGroupTrend: stockGroupTrend[stockIndex] = [] stockGroupTrend[stockIndex].append(trend) for item in stockGroupTrend: stockGroupTrend[item].sort() stockGroupTrend[item] = [w[7] for w in stockGroupTrend[item]] finalClusterMatrix = {} finalClusterProbability = {} for item in stockGroupTrend: #read all the line and skip the first line trendsSerial = stockGroupTrend[item] clusterDist = nltk.FreqDist(trendsSerial) clusterProbability = {} for cl in clusterDist: clusterProbability[cl] = "%0.4f" %(clusterDist[cl]/sum(clusterDist.values())) finalClusterProbability[item] = clusterProbability #Define the ultimated json object clusterMatrix = {} for cluster in range(1,21): #create matrix for each cluster matrix = [[0 for col in range(3)] for row in range(20)] for i in range(0,len(trendsSerial)): if cluster == trendsSerial[i]: t1 = 0 t2 = 0 t3 = 0 if i - 1 >= 0: t1 = trendsSerial[i-1] matrix[t1-1][0] = matrix[t1-1][0] + 1 if i - 2 >= 0: t2 = trendsSerial[i-2] matrix[t2-1][1] = matrix[t2-1][1] + 1 if i - 3 >= 0: t3 = trendsSerial[i-3] matrix[t3-1][2] = matrix[t3-1][2] + 1 #calculating the contribution matrix contributionMatrix = [[0 for col in range(3)] for row in range(20)] sumCol = [0,0,0] for col in range(3): for row in range(20): sumCol[col] = sumCol[col] + matrix[row][col] for col in range(3): for row in range(20): contributionMatrix[row][col] = "%0.4f" %((matrix[row][col] + 1)/(sumCol[col]+20)) clusterMatrix[cluster] = contributionMatrix finalClusterMatrix[item] = clusterMatrix "Write the cluster contribution to File " clusterContributionFile = common.get_configuration("model", "CLUSTER_CONTRIBUTION_PATH") with open(clusterContributionFile,"w") as output: jsString = json.dumps(finalClusterMatrix) output.write(jsString) "Write the cluster Probability to File " clusterProbabilityFile = common.get_configuration("model", "CLUSTER_PROBABILITY_PATH") with open(clusterProbabilityFile,"w") as output2: jsString = json.dumps(finalClusterProbability) output2.write(jsString)