def main(): # Initialize arguments argparser = args.get_parser() argparser.add_argument('--local_port', help='Local port to connect to java server', required=True) arg = argparser.parse_args() localPort = int(arg.local_port) # Initialize log logs.init(arg) global log # Initialize the queue with arguments and connect to the specified feed log.info("Opening and connecting to queue %s", arg.sub) queue.init(arg) reader = queue.open(arg.sub, 'sub', ssh_key=arg.ssh_key, ssh_conn=arg.tunnel) # Initialize the writer to publish to a queue log.info("Publishing to queue %s", arg.pub) writer = queue.open(arg.pub, 'pub', ssh_key=arg.ssh_key, ssh_conn=arg.tunnel) count = 0 # Connect to Java server while True: for feedmsg in reader: try: while True: try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect(("localhost", localPort)) break except: log.info("Unable to connect to local server") log.debug("Connected to java server on port %d" % localPort) socketLines = sock.makefile() # Clean the message to fix irregularities feedmsg = message.clean(feedmsg) log.debug("Read message %d. Sending to java" % count) # Write message to socket stream sock.sendall(json.dumps(feedmsg)) sock.sendall('\n') # Receive result from socket stream result = socketLines.readline() writer.write(json.dumps(result)) count += 1 sock.close() except KeyboardInterrupt: sys.exit(1) else: log.info("Server was disconnected.")
def main(): ''' Reads the from the queue, retrieves the content from the source website and publishes the content to a new queue. ''' ap = args.get_parser() ap.add_argument( '--cat', action="store_true", help='Read input from standard in and write to standard out.') arg = ap.parse_args() logs.init(arg) geo_mena = GeoMena() geo_lac = Geo(geo_region=GEO_REGION.lac) try: if arg.cat: log.debug('Reading from stdin and writing to stdout.') ins = sys.stdin outs = sys.stdout for entry in ins: entry = entry.decode(encoding='utf-8') try: tweet = json.loads(entry.strip()) geo_annotate(tweet, geo_mena, geo_lac) if tweet is not None: outs.write( json.dumps(tweet, ensure_ascii=False).encode("utf-8")) outs.write('\n') outs.flush() except Exception: log.exception('Failed to process message "%s".', (entry, )) else: queue.init(arg) with queue.open(arg.sub, 'r') as inq: with queue.open(arg.pub, 'w', capture=True) as outq: for tweet in inq: try: content = geo_annotate(tweet, geo_mena, geo_lac) if content is not None: outq.write(content) except KeyboardInterrupt: log.info("Got SIGINT, exiting.") break except Exception: log.exception('Failed to process message "%s".', (tweet, )) return 0 except Exception as e: log.exception("Unknown error in main function-{}".format(str(e))) return 1
def main(): ''' Reads the from the queue, retrieves the content from the source website and publishes the content to a new queue. ''' ap = args.get_parser() ap.add_argument('--cat', action="store_true", help='Read input from standard in and write to standard out.') arg = ap.parse_args() logs.init(arg) geo_mena = GeoMena() geo_lac = Geo(geo_region=GEO_REGION.lac) try: if arg.cat: log.debug('Reading from stdin and writing to stdout.') ins = sys.stdin outs = sys.stdout for entry in ins: entry = entry.decode(encoding='utf-8') try: tweet = json.loads(entry.strip()) geo_annotate(tweet, geo_mena, geo_lac) if tweet is not None: outs.write(json.dumps(tweet, ensure_ascii=False).encode("utf-8")) outs.write('\n') outs.flush() except Exception: log.exception('Failed to process message "%s".', (entry,)) else: queue.init(arg) with queue.open(arg.sub, 'r') as inq: with queue.open(arg.pub, 'w', capture=True) as outq: for tweet in inq: try: content = geo_annotate(tweet, geo_mena, geo_lac) if content is not None: outq.write(content) except KeyboardInterrupt: log.info("Got SIGINT, exiting.") break except Exception: log.exception('Failed to process message "%s".', (tweet,)) return 0 except Exception as e: log.exception("Unknown error in main function-{}".format(str(e))) return 1
def main(): ap = args.get_parser() ap.add_argument('--test', action="store_true", help="Test Flag, if contain this argument, it means a test case") arg = ap.parse_args() assert arg.sub, 'Need a queue to subscribe to' assert arg.pub, 'Need a queue to publish to' logs.init(arg) queue.init(arg) test_flag = arg.test conn = boto.connect_sdb() with queue.open(arg.sub, 'r') as inq: for m in inq: try: durationProcess(conn, m, arg.pub, test_flag) except KeyboardInterrupt: log.info('GOT SIGINT, exiting!') break except EmbersException as e: log.exception(e.value) except: log.exception("Unexpected exception in process")
def main(): # Initialize arguments argparser = args.get_parser() argparser.add_argument('--json_file', help='JSON file to publish', required=True) arg = argparser.parse_args() queue.init(arg) writer = queue.open(arg.pub, 'pub', ssh_key=arg.ssh_key, ssh_conn=arg.tunnel) try: msg_reader = codecs.open(arg.json_file, encoding='utf-8', mode='r') message = msg_reader.readline() while message: writer.write(json.loads(message)) message = msg_reader.readline() msg_reader.close() except KeyboardInterrupt: pass return 0
def main(): args = parse_args() predict_date = args.predict_day conf_f = args.conf_f cur_list = args.currency_list key_id = args.key_id secret = args.secret zmq_port = args.zmq_port conn = boto.connect_sdb(key_id,secret) all_config = json.load(open(conf_f)) "Get the latest version of CONFIG " latest_version = max([int(k) for k in all_config.keys()]) CONFIG = all_config[str(latest_version)] if cur_list is None: cur_list = CONFIG["currency_list"] with queue.open(zmq_port, 'w', capture=False) as outq: for currency in cur_list: prediction = predict(conn,currency,predict_date,CONFIG) if prediction and prediction["eventType"]!="0000": "push message to ZMQ" outq.write(prediction)
def test(): queue.init() port = 'tcp://*:30115' with queue.open(port,'w',capture=True) as outq: msgObj = {'embersId': 'f0c030a20e28a12134d9ad0e98fd0861fae7438b', 'confidence': 0.13429584033181682, 'strength': '4', 'derivedFrom': [u'5df18f77723885a12fa6943421c819c90c6a2a02', u'be031c4dcf3eb9bba2d86870683897dfc4ec4051', u'3c6571a4d89b17ed01f1345c80cf2802a8a02b7b'], 'shiftDate': '2011-08-08', 'shiftType': 'Trend', 'location': u'Colombia', 'date': '2012-10-03', 'model': 'Finance Stock Model', 'valueSpectrum': 'changePercent', 'confidenceIsProbability': True, 'population': 'COLCAP'} outq.write(msgObj) print "Success" pathName = os.path.dirname(sys.argv[0]) print pathName
def main(): ''' Reads the from the queue, retrieves the content from the source website and publishes the content to a new queue. ''' ap = args.get_parser() ap.add_argument('--cat', action="store_true", help='Read input from standard in and write to standard out.') ap.add_argument('--region', metavar='REGION', type=str, default=None, help='Specify region to filter by') arg = ap.parse_args() logs.init(arg) filter_region = arg.region geoc = GeoCountry() try: if arg.cat: log.debug('Reading from stdin and writing to stdout.') ins = sys.stdin outs = codecs.getwriter('utf-8')(sys.stdout) for entry in ins: entry = entry.decode(encoding='utf-8') try: tweet = json.loads(entry.strip()) tweet = annotate(tweet, geoc, filter_region) if tweet is not None: outs.write(json.dumps(tweet, ensure_ascii=False)) outs.write('\n') outs.flush() except Exception: log.exception('Failed to process message "%s".', entry) else: queue.init(arg) iqueue.init(arg) qname = "{}-geoCountry-{}".format(os.environ["CLUSTERNAME"], filter_region) with iqueue.open(arg.sub, 'r', qname=qname) as inq: with queue.open(arg.pub, 'w') as outq: # , capture=True) as outq: for tweet in inq: try: content = annotate(tweet, geoc, filter_region) if content is not None: outq.write(content) except KeyboardInterrupt: log.info("Got SIGINT, exiting.") break except Exception: log.exception('Failed to process message "%s".', tweet) return 0 except Exception as e: log.exception("Unknown error in main function-{0!s}.".format(e)) return 1
def process(t_domain, port, raw_data): try: "Check if current data already in database, if not exist then insert otherwise skip" ifExisted = check_if_existed(t_domain, raw_data) if not ifExisted: embers_id = raw_data["embersId"] ty = raw_data["type"] name = raw_data["name"] last_price = float(raw_data["currentValue"].replace(",", "")) pre_last_price = float(raw_data["previousCloseValue"].replace(",", "")) one_day_change = round(last_price - pre_last_price, 4) #source = raw_data["feed"] post_date = raw_data["date"][0:10] raw_data['postDate'] = post_date "Initiate the enriched Data" enrichedData = {} "calculate zscore 30 and zscore 90" zscore30 = getZscore(t_domain, post_date, name, one_day_change, 30) zscore90 = getZscore(t_domain, post_date, name, one_day_change, 90) if ty == "stock": trend_type = get_trend_type(raw_data) else: trend_type = "0" derived_from = {"derivedIds": [embers_id]} enrichedData["derivedFrom"] = derived_from enrichedData["type"] = ty enrichedData["name"] = name enrichedData["postDate"] = post_date enrichedData["currentValue"] = last_price enrichedData["previousCloseValue"] = pre_last_price enrichedData["oneDayChange"] = one_day_change enrichedData["changePercent"] = round((last_price - pre_last_price) / pre_last_price, 4) enrichedData["trendType"] = trend_type enrichedData["zscore30"] = zscore30 enrichedData["zscore90"] = zscore90 enrichedData["operateTime"] = datetime.utcnow().isoformat() enrichedDataEmID = hashlib.sha1(json.dumps(enrichedData)).hexdigest() enrichedData["embersId"] = enrichedDataEmID insert_enriched_data(t_domain, enrichedData) #push data to ZMQ with queue.open(port, 'w', capture=False) as outq: outq.write(enrichedData) except: log.exception("Exception captured: %s %s" % (sys.exc_info()[0], str(raw_data)))
def main(): ap = args.get_parser() ap.add_argument('--replay', action="store_true", help="Test Flag, if contain this argument, it means a test case") #if the rule file is not indicated in argument, it need to be load from sys.stdin ap.add_argument('--rulefile', type=str, help="The rule file for duration analysis model") arg = ap.parse_args() if not arg.replay: assert arg.sub, 'Need a queue to subscribe to' assert arg.pub, 'Need a queue to publish to' logs.init(arg) queue.init(arg) test_flag = arg.replay if arg.rulefile: rule = eval(open(arg.rulefile).read()) else: #load the rules from sys.stdin rule = eval(sys.stdin.read()) conn = boto.connect_sdb() if not arg.replay: with queue.open(arg.sub, 'r') as inq: for m in inq: try: replayIO = StringIO.StringIO() durationProcess(rule, conn, m, arg.pub, test_flag, replayIO) except KeyboardInterrupt: log.info('GOT SIGINT, exiting!') break except EmbersException as e: log.exception(e.value) except: log.exception("Unexpected exception in process") else: #replay model take enriched file as input enrich_messages = sys.stdin.readlines() for m in enrich_messages: m = json.loads(m.strip()) try: replayIO = StringIO.StringIO() durationProcess(rule, conn, m, arg.pub, test_flag, replayIO) except KeyboardInterrupt: log.info('GOT SIGINT, exiting!') break except EmbersException as e: log.exception(e.value) except: log.exception("Unexpected exception in process")
def process(conn,trend_file,port,raw_data): "Check if current data already in database, if not exist then insert otherwise skip" ifExisted = check_if_existed(conn,raw_data) if not ifExisted: sql = "insert into t_bloomberg_prices (embers_id,type,name,current_value,previous_close_value,update_time,query_time,post_date,source) values (?,?,?,?,?,?,?,?,?) " embers_id = raw_data["embersId"] ty = raw_data["type"] name = raw_data["name"] tmpUT = raw_data["updateTime"].split(" ")[0] update_time = raw_data["updateTime"] last_price = float(raw_data["currentValue"]) pre_last_price = float(raw_data["previousCloseValue"]) one_day_change = round(last_price - pre_last_price,4) query_time = raw_data["queryTime"] source = raw_data["feed"] post_date = tmpUT.split("/")[2] + "-" + tmpUT.split("/")[0] + "-" + tmpUT.split("/")[1] cur = conn.cursor() cur.execute(sql,(embers_id,ty,name,last_price,pre_last_price,update_time,query_time,post_date,source)) "Initiate the enriched Data" enrichedData = {} "calculate zscore 30 and zscore 90" zscore30 = getZscore(conn,post_date,name,one_day_change,30) zscore90 = getZscore(conn,post_date,name,one_day_change,90) trend_type = get_trend_type(trend_file,raw_data) derived_from = "[" + embers_id + "]" enrichedData["derivedFrom"] = derived_from enrichedData["type"] = ty enrichedData["name"] = name enrichedData["postDate"] = post_date enrichedData["currentValue"] = last_price enrichedData["previousCloseValue"] = pre_last_price enrichedData["oneDayChange"] = one_day_change enrichedData["changePercent"] = round((last_price - pre_last_price)/pre_last_price,4) enrichedData["trendType"] = trend_type enrichedData["zscore30"] = zscore30 enrichedData["zscore90"] = zscore90 enrichedData["operateTime"] = datetime.now().isoformat() enrichedDataEmID = hashlib.sha1(json.dumps(enrichedData)).hexdigest() enrichedData["embersId"] = enrichedDataEmID insert_enriched_data(conn,enrichedData) conn.commit() #push data to ZMQ with queue.open(port, 'w', capture=False) as outq: outq.write(enrichedData)
def attach_to_queue(index_name, queue_name, type_name=None, limit=None): """ Attaches to the queue_name provided and inserts the messages into Elasticsearch :param index_name: :param queue_name: :param limit: :return: """ queue.init() log.debug('Attempting to attach to the queue %s' % queue_name) with queue.open(name=queue_name, mode='r') as message_queue: if limit: batch_messages(iterable_obj=message_queue, es_index_name=index_name, es_type=type_name, limit=limit) else: return push(iterable_obj=message_queue, es_index_name=index_name, es_type=type_name)
def main(): ap = args.get_parser() ap.add_argument('--f', type=str, help='the newes file') arg = ap.parse_args() assert arg.f, 'Need a file to ingest' assert arg.pub, 'Need a queue to publish' logs.init(arg) queue.init(arg) with queue.open(arg.pub, 'w') as q_w, open(arg.f, 'r') as f_r: for line in f_r: news = json.loads(line) q_w.write(news)
def execute(date,cfgPath): init(cfgPath) enricheDa = ed.Enriched_Data(cfgPath) obj = enricheDa.enrich_all_stock(date) warningList = [] for item in obj: warning = warningCheck(item) if warning is not None: warningList.append(warning) #push warning to ZMQ port = common.get_configuration("info", "ZMQ_PORT") with queue.open(port, 'w', capture=True) as outq: for warning in warningList: outq.write(json.dumps(warning, encoding='utf8')) return warningList
def process(port,keyId,secret,operateDate): #get DB connection conn = boto.connect_sdb(keyId,secret) domain = conn.get_domain("bloomberg_news") sql = "select * from {} where updateDate = '{}'".format(operateDate) results = domain.select(sql) enrichedNewsList = [] for result in results: enrichedNews = process_news(result) if enrichedNews: enrichedNewsList.append(enrichedNews) enrichedDomain = conn.get_domain("enriched_news") #Write the enricheNews to simpleDB and push them into ZMQ with queue.open(port, 'w', capture=True) as outq: for enrichedNews in enrichedNewsList: outq.write(enrichedNews) enrichedDomain.put_attributes(enrichedNews["embersId"], enrichedNews)
def main(): ap = args.get_parser() ap.add_argument('--out', help="the output file of warnings") arg = ap.parse_args() assert arg.sub, 'Need a queue to subcribe!' assert arg.out, 'Need a file to store warnings!' logs.init(arg) queue.init(arg) out_file = arg.out with queue.open(arg.sub, 'r') as q_r: for m in q_r: with open(out_file, "a") as out_w: if not check_ifexist(m): out_w.write(json.dumps(m) + "\n") else: print "Duplicated Warnings"
def main(): svm_twitter = SVM_Twitter(0.1, 0.1, 'rbf') ap = args.get_parser() ap.add_argument("--pca_num", default=8, type=int) ap.add_argument("--net", type=str) ap.add_argument("--k", type=int) ap.add_argument("--inf", type=str, help="input folder") ap.add_argument("--o_surr", type=str, help="output surrogate file") arg = ap.parse_args() folder = { "t": "content", "c": "comprehend", "u": "user2user", "e": "entity" } assert arg.pub, "Please input a queue to publish surrogate" queue.init(arg) send_queue = queue.open(arg.pub, "w") surr_w = open(arg.o_surr, "w") for country in COUNTRY: train_file = os.path.join( arg.inf, "%s_train_%d" % (country.replace(" ", ""), arg.k)) test_file = os.path.join( arg.inf, "%s_test_%d" % (country.replace(" ", ""), arg.k)) svm_twitter.load_data(train_file, test_file) svm_twitter.normalize() #svm_twitter.normalize() #svm_twitter.pca(arg.pca_num) svm_twitter.fit() svm_twitter.predict() for day in svm_twitter.novel_days: surrogate = {"country": country, "date": day.strftime("%Y-%m-%d")} send_queue.write(surrogate) surr_w.write(json.dumps(surrogate) + "\n") print "prediction result: %s " % country print[day.strftime("%Y-%m-%d") for day in svm_twitter.novel_days] surr_w.flush() surr_w.close() send_queue.close()
def main(): ap = args.get_parser() default_day = datetime.strftime(datetime.now(), "%Y-%m-%d") ap.add_argument("--d", type=str, default=default_day, help="The day to ingest, Format: dd/mm/yyyy") ap.add_argument("--domain", default="bloomberg_prices", help="The simpleDB table to store raw data") arg = ap.parse_args() assert arg.pub, "Need a queue to publish" logs.init(arg) queue.init(arg) with queue.open(arg.pub, "w") as out_q: for stock in STOCK_CON: if stock == "COLCAP": scrape_f = scrape_colcap_url if stock == "CHILE65": scrape_f = scrape_chile65_url msg = ingest_price(arg, stock, scrape_f) if msg is not None: out_q.write(msg) store(arg, msg)
def main(): ap = args.get_parser() ap.add_argument('--dir') arg = ap.parse_args() assert arg.pub, "Enter a queue to pub" file_folder = arg.dir files = os.listdir(file_folder) w_queue = queue.open(arg.pub, "w", capture=True) for f in files: full_f = os.path.join(file_folder, f) with open(full_f) as af: for d_ana in af: temp = d_ana.strip().split("|") message = {"country": temp[1], "date": temp[0], "z_value": temp[2], "diff_mag": temp[3]} w_queue.write(message) w_queue.close()
def main(): ap = args.get_parser() ap.add_argument('-c', '--conf', metavar='CONF', type=str, nargs='?', default=os.path.join(os.path.dirname(__file__), 'bloomberg_news_ingest.conf'), help='The location of the configuration file.') arg = ap.parse_args() assert arg.pub, "--pub required. Need a queue to publish on" logs.init(arg) conf = get_conf(arg.conf) seen_it = shelve.open("bloomberg_news_seen_it.db") try: with queue.open(arg.pub, 'w', capture=True) as outq: for (index, companies) in conf.items(): for company in companies: articles = get_stock_news(index, company, seen_it) for a in articles: outq.write(a) except KeyboardInterrupt: log.info('GOT SIGINT, exiting')
def main(): ap = args.get_parser() ap.add_argument('--level', type=str, default="0.6", help='The threhold') ap.add_argument('--svm', action='store_true') ap.add_argument('--zmq', action='store_true') ap.add_argument('--surr', type=str, help="surrogate file") ap.add_argument('--warn', type=str, help="warning file") arg = ap.parse_args() logs.init(arg) queue.init(arg) assert arg.pub, "Please input a queue to publish warning" if arg.zmq: assert arg.sub, "Please input a queue to sub surrogate message" conn = boto.connect_sdb() t_domain = get_domain(conn, "s_holiday") if arg.zmq: with queue.open(arg.sub, 'r') as inq: for m in inq: try: if arg.svm: svm_warning(t_domain, m, arg.pub) else: warning_center(t_domain, m, arg.pub, float(arg.level)) except KeyboardInterrupt: log.info('GOT SIGINIT, exiting!') break except: log.exception("Exception in Process:%s" % sys.exc_info()[0]) else: with open(arg.warn, "w") as w, open(arg.surr) as r: if arg.svm: for m in r: m = json.loads(m) warning = svm_warning(t_domain, m, arg.pub) w.write(json.dumps(warning) + "\n")
def main(): svm_twitter = SVM_Twitter(0.1, 0.1, 'rbf') ap = args.get_parser() ap.add_argument("--pca_num", default=8, type=int) ap.add_argument("--net", type=str) ap.add_argument("--k", type=int) ap.add_argument("--inf", type=str, help="input folder") ap.add_argument("--o_surr", type=str, help="output surrogate file") arg = ap.parse_args() folder = {"t": "content", "c": "comprehend", "u": "user2user", "e": "entity"} assert arg.pub, "Please input a queue to publish surrogate" queue.init(arg) send_queue = queue.open(arg.pub, "w") surr_w = open(arg.o_surr, "w") for country in COUNTRY: train_file = os.path.join(arg.inf, "%s_train_%d" % (country.replace(" ", ""), arg.k)) test_file = os.path.join(arg.inf, "%s_test_%d" % (country.replace(" ", ""), arg.k)) svm_twitter.load_data(train_file, test_file) svm_twitter.normalize() #svm_twitter.normalize() #svm_twitter.pca(arg.pca_num) svm_twitter.fit() svm_twitter.predict() for day in svm_twitter.novel_days: surrogate = {"country": country, "date": day.strftime("%Y-%m-%d")} send_queue.write(surrogate) surr_w.write(json.dumps(surrogate)+ "\n") print "prediction result: %s " % country print [day.strftime("%Y-%m-%d") for day in svm_twitter.novel_days] surr_w.flush() surr_w.close() send_queue.close()
def process(port,conn,blg_news_file): "Get all the news" newsList = [] with open(blg_news_file,"r") as news_file: lines = news_file.readlines() for line in lines: line = line.replace("\r","").replace("\n","") news = json.loads(line) newsList.append(news) enrichedNewsList = [] for news in newsList: if_succ = insert_news(conn, news) if if_succ: enrichedNews = process_news(news) if enrichedNews: enrichedNewsList.append(enrichedNews) #Write the enricheNews to SqliteDB and push them into ZMQ with queue.open(port, 'w', capture=True) as outq: for enrichedNews in enrichedNewsList: outq.write(enrichedNews) insert_enriched_news(conn,enrichedNews)
def main(): ap = args.get_parser() ap.add_argument('--r_file', type=str, help="The rule file") ap.add_argument('--o', type=str, help="The output file") arg = ap.parse_args() assert arg.r_file, 'Need a rule file' assert arg.sub, 'Need a queue to subscribe' assert arg.o, 'Need a file to output' logs.init(arg) queue.init(arg) u_pattern = re.compile("http://(www\.){0,1}[^/]*/[a-z0-9/.\-]*(econ)[a-z0-9\.\-]*", flags=re.I) c_rule = create_label_rule(arg.r_file) g_rule = create_gold_lable(arg.r_file) c_pattern = re.compile(c_rule, flags=re.I) with queue.open(arg.sub, 'r') as q_r, codecs.open(arg.o, 'a') as f_a: for news in q_r: f_news = process(news, u_pattern, c_pattern, g_rule) if f_news is not None: f_a.write(json.dumps(f_news) + "\n") print f_news['date'], f_news['title'], "|", f_news['o_country'], "|", f_news["p_country"]
def send(self, pub_zmq): with queue.open(pub_zmq, "w", capture=True) as q_w: q_w.write(self.warning) time.sleep(1)
def process_single_stock(conn,predict_date,stock_index,regeFlag=False): try: "Check if the predictive Day is trading day, if so continue, otherwise just return None" if_trading_day = check_if_tradingday(conn,predict_date,stock_index) if if_trading_day is False: return None predictiveResults = {} finalRatio = {} clusterProbability = {} predictiveProbability = 0 stockDerived = [] newsDerived = [] "Iteratively compute the probabilty of each cluster for the stock " cluster_pro_list = CONFIG["clusterProbability"][stock_index] term_list,newsDerived = get_term_list(conn, predict_date, stock_index) his_cluster_list,stockDerived = get_past_cluster_list(conn,predict_date,stock_index) for cluster_type in cluster_pro_list: "compute the contribution of 3 past day's trend " stockIndexProbability = compute_stock_index_probability(conn,predict_date, cluster_type , stock_index, his_cluster_list ) "compute the contribution of 3 past day's news" newsProbability = compute_stock_news_probability(conn,predict_date, cluster_type , stock_index,term_list ) "combine two contribution together" predictiveProbability = math.exp( stockIndexProbability + newsProbability ) predictiveResults[cluster_type] = predictiveProbability sumProbability = sum( predictiveResults.itervalues() ) "Get the maximum probability between the predictive values" for item_key, item_value in predictiveResults.iteritems(): finalRatio[item_key] = item_value / sumProbability sorted_ratio = sorted( finalRatio.iteritems(), key = operator.itemgetter( 1 ), reverse = True ) clusterProbability[stock_index] = {} clusterProbability[stock_index][predict_date] = sorted_ratio[0] "Construct the Surrogate data" surrogateData = {} "Merge News Derived and Stock Derived" derivedFrom = {"derivedIds":[]} for item in stockDerived: derivedFrom["derivedIds"].append(item) for item in newsDerived: derivedFrom["derivedIds"].append(item) "construct surrogate data" model = 'Bayesian - Time serial Model' location = CONFIG["location"][stock_index] population = stock_index confidence = round(sorted_ratio[0][1],2) confidenceIsProbability = True shiftType = "Trend" valueSpectrum = "changePercent" strength = sorted_ratio[0][0] shiftDate = predict_date surrogateData["derivedFrom"] = derivedFrom surrogateData["model"] = model surrogateData["location"] = location surrogateData["population"] = population surrogateData["confidence"] = confidence surrogateData["confidenceIsProbability"] = confidenceIsProbability surrogateData["shiftType"] = shiftType surrogateData["valueSpectrum"] = valueSpectrum surrogateData["strength"] = strength surrogateData["shiftDate"] = shiftDate surrogateData["version"] = __version__ comments = {} comments["configVersion"] = CONFIG["version"] comments["model"] = "Bayesian Model" surrogateData["comments"] = json.dumps(comments) surrogateData["description"] = "Predict the change type of the future day" surrogateData["date"] = datetime.utcnow().isoformat() "Generate Embers Id" jsonStr = json.dumps(surrogateData) embersId = hashlib.sha1(json.dumps(jsonStr)).hexdigest() surrogateData["embersId"] = embersId "if the action is not for regenerating past warning, then store the surrogate and warning" if not regeFlag: #push surrodate data into ZMQ with queue.open(SURROGATE_PORT, 'w', capture=False) as outq: outq.write(surrogateData) "Insert the surrogatedata to Simple DB: " insert_surrogatedata(conn, surrogateData) return surrogateData except Exception as e: log.exception( "process_single_stock Error: %s" % e.message) return None
rs = t_domain.select(sql) return rs if __name__ == "__main__": ap = args.get_parser() ap.add_argument('--s_date', type=str, help="the start date of the query") ap.add_argument('--e_date', type=str, help='the end date of the query') ap.add_argument('--f', action='store_true', help='load enriched message from file') ap.add_argument('--sdb', action='store_true', help='load enriched message from simpledb') ap.add_argument('--file', type=str, help="the file location") arg = ap.parse_args() assert arg.pub, 'Need a queue to publish' logs.init(arg) queue.init(arg) if arg.sdb: conn = boto.connect_sdb() t_domain = conn.get_domain('t_enriched_bloomberg_prices') rs = get_enriched_prices(t_domain, arg.s_date, arg.e_date) if arg.f: with open(arg.file, "r") as r: rs = [eval(line.strip()) for line in r.readlines()] with queue.open(arg.pub, 'w') as q_w, open("surrogate.txt", "w") as s_w: for r in rs: print r q_w.write(r) s_w.write(json.dumps(r) + "\n")
def enrich_single_stock( predict_date , stock_index ): try: "Check if the predictive Day is trading day, if so continue, otherwise just return None" if_trading_day = check_if_tradingday(predict_date,stock_index) if if_trading_day is False: return None predictiveResults = {} finalRatio = {} clusterProbability = {} predictiveProbability = 0 stockDerived = [] newsDerived = [] "Iteratively compute the probabilty of each cluster for the stock " cluster_pro_list = CONFIG["clusterProbability"][stock_index] for cluster_type in cluster_pro_list: "compute the contribution of 3 past day's trend " stockIndexProbability,stockDerived = compute_stock_index_probability(predict_date, cluster_type , stock_index ) "compute the contribution of 3 past day's news" newsProbability,newsDerived = compute_stock_news_probability(predict_date, cluster_type , stock_index ) "combine two contribution together" predictiveProbability = math.exp( stockIndexProbability + newsProbability ) * float( 1e90 ) predictiveResults[cluster_type] = predictiveProbability sumProbability = sum( predictiveResults.itervalues() ) "Get the maximum probability between the predictive values" for item_key, item_value in predictiveResults.iteritems(): finalRatio[item_key] = item_value / sumProbability sorted_ratio = sorted( finalRatio.iteritems(), key = operator.itemgetter( 1 ), reverse = True ) clusterProbability[stock_index] = {} clusterProbability[stock_index][predict_date] = sorted_ratio[0] "Construct the Surrogate data" surrogateData = {} date = time.strftime('%Y-%m-%d',time.localtime(time.time())) "Merge News Derived and Stock Derived" derivedFrom = [] for item in stockDerived: derivedFrom.append(item) for item in newsDerived: derivedFrom.append(item) "construct surrogate data" model = 'Bayesian - Time serial Model' location = CONFIG["location"][stock_index] population = stock_index confidence = sorted_ratio[0][1] confidenceIsProbability = True shiftType = "Trend" valueSpectrum = "changePercent" strength = sorted_ratio[0][0] shiftDate = predict_date surrogateData["date"] = date surrogateData["derivedFrom"] = derivedFrom surrogateData["model"] = model surrogateData["location"] = location surrogateData["population"] = population surrogateData["confidence"] = confidence surrogateData["confidenceIsProbability"] = confidenceIsProbability surrogateData["shiftType"] = shiftType surrogateData["valueSpectrum"] = valueSpectrum surrogateData["strength"] = strength surrogateData["shiftDate"] = shiftDate "Generate Embers Id" jsonStr = json.dumps(surrogateData) embersId = hashlib.sha1(json.dumps(jsonStr)).hexdigest() surrogateData["embersId"] = embersId "Insert the surrogatedata to simple DB" domain_name = "finance_surrogatedata" domain = get_domain(domain_name) domain.put_attributes(embersId,surrogateData) #push surrodate data into ZMQ with queue.open(PORT, 'w', capture=True) as outq: outq.write(surrogateData) return surrogateData except Exception as e: log.info( "Error: %s" % e.args)
def send(self, pub_zmq): with queue.open(pub_zmq, "w") as q_w: q_w.write(self.warning)
def warning_check(warningDomain, surObj, regeFlag=False, replayIO=None): # surObj = {'embersId': 'f0c030a20e28a12134d9ad0e98fd0861fae7438b', 'confidence': 0.13429584033181682, 'strength': '4', 'derivedFrom': [u'5df18f77723885a12fa6943421c819c90c6a2a02', u'be031c4dcf3eb9bba2d86870683897dfc4ec4051', u'3c6571a4d89b17ed01f1345c80cf2802a8a02b7b'], 'shiftDate': '2011-08-08', 'shiftType': 'Trend', 'location': u'Colombia', 'date': '2012-10-03', 'model': 'Finance Stock Model', 'valueSpectrum': 'changePercent', 'confidenceIsProbability': True, 'population': 'COLCAP'} stock_index = surObj["population"] trend_type = surObj["strength"] date = surObj["shiftDate"] replayIO.write("Check whether the surrogate data trigger the warning.\n") try: pClusster = trend_type table_name = "t_enriched_bloomberg_prices" sql = "select currentValue from {} where name='{}' and postDate < '{}' order by postDate desc".format(table_name, stock_index, date) current_val = 0.0 rs = warningDomain.select(sql, max_items=1) for r in rs: current_val = float(r['currentValue']) replayIO.write("Retrive past 30 day's price daily change.\n") querySql = "select oneDayChange from {} where name='{}' and postDate <'{}' order by postDate desc".format(table_name, stock_index, date) rs = warningDomain.select(querySql, max_items=30) moving30 = [] for r in rs: moving30.append(float(r['oneDayChange'])) replayIO.write("\t %s\n" % json.dumps(moving30)) replayIO.write("Retrive past 90 day's price daily change.\n") querySql = "select oneDayChange from {} where name='{}' and postDate <'{}' order by postDate desc".format(table_name, stock_index, date) rs = warningDomain.select(querySql, max_items=90) moving90 = [] for r in rs: moving90.append(float(r['oneDayChange'])) replayIO.write("\t %s\n" % json.dumps(moving90)) m30 = sum(moving30) / len(moving30) m90 = sum(moving90) / len(moving90) std30 = calculator.calSD(moving30) std90 = calculator.calSD(moving90) eventType, cButtom, cUpper = \ dailySigmaTrends(stock_index, str(pClusster), m30, m90, std30, std90, current_val) dailyRecord = {} dailyRecord["date"] = date dailyRecord["cBottom"] = cButtom dailyRecord["cUpper"] = cUpper dailyRecord["currentValue"] = current_val "Construct the warning message" warningMessage = {} derivedFrom = {"derivedIds": [surObj["embersId"]]} model = surObj["model"] event = eventType confidence = surObj["confidence"] confidenceIsProbability = surObj["confidenceIsProbability"] eventDate = surObj["shiftDate"] population = surObj["population"] location = surObj["location"] comments = surObj["comments"] comObj = json.loads(comments) warningMessage["derivedFrom"] = derivedFrom warningMessage["model"] = model warningMessage["eventType"] = event warningMessage["confidence"] = confidence warningMessage["confidenceIsProbability"] = confidenceIsProbability warningMessage["eventDate"] = eventDate warningMessage["population"] = population warningMessage["location"] = location warningMessage["version"] = __version__ operateTime = datetime.utcnow().isoformat() warningMessage["date"] = operateTime comObj["trendVersion"] = CONFIG["trendRange"]["version"] warningMessage["comments"] = json.dumps(comObj) warningMessage["description"] = "Use Bayesian to predict stock sigma events" embersId = hashlib.sha1(json.dumps(warningMessage)).hexdigest() warningMessage["embersId"] = embersId replayIO.write("Warning Message: \n\t%s\n" % json.dumps(warningMessage)) if eventType != "0000": "push warningmessage to ZMQ" with queue.open(WARNING_PORT, 'w', capture=True) as outq: sleep(1) outq.write(warningMessage) replayIO.write("Publish Warningmessage to ZMQ!\n") if not regeFlag: insert_warningmessage(warningDomain, warningMessage) if eventType != "0000": return warningMessage else: return None except lite.Error, e: log.exception("Error: %s" % e.args[0])
def process_single_stock(surrogateDomain, predict_date, stock_index, regeFlag=False, replayIO=None): try: replayIO.write("Check predict date '%s' whether weekend or holiday\n" % predict_date) "Check if the predictive Day is trading day, if so continue, otherwise just return None" if_trading_day = check_if_tradingday(surrogateDomain, predict_date, stock_index) if if_trading_day is False: return None replayIO.write("\t'%s' is trading day for index '%s'\n" % (predict_date, stock_index)) predictiveResults = {} finalRatio = {} clusterProbability = {} predictiveProbability = 0 stockDerived = [] newsDerived = [] "Iteratively compute the probabilty of each cluster for the stock " cluster_pro_list = CONFIG["clusterProbability"][stock_index] replayIO.write("Iteratively compute the probabilty of each cluster for the stock %s\n" % stock_index) replayIO.write("retrieve past 3 day's news:\n ") term_list, newsDerived = get_term_list(surrogateDomain, predict_date, stock_index) replayIO.write("\tkeywords list: %s\n" % json.dumps(term_list)) replayIO.write("\tparent news embersIDs: %s\n" % newsDerived) replayIO.write("retrieve past 3 day's cluster.\n") his_cluster_list, stockDerived = get_past_cluster_list(surrogateDomain, predict_date, stock_index) replayIO.write("\tpast 3 days' clusters: %s \n" % json.dumps(his_cluster_list)) for cluster_type in cluster_pro_list: "compute the contribution of 3 past day's trend " stockIndexProbability = compute_stock_index_probability(predict_date, cluster_type, stock_index, his_cluster_list ) "compute the contribution of 3 past day's news" newsProbability = compute_stock_news_probability(predict_date, cluster_type, stock_index, term_list ) "combine two contribution together" predictiveProbability = stockIndexProbability + newsProbability predictiveResults[cluster_type] = predictiveProbability replayIO.write("Compute the propability for each cluster.\n") #normalize the probability max_val = max(predictiveResults.values()) min_val = min(predictiveResults.values()) for k in predictiveResults: predictiveResults[k] = 1.0 * (predictiveResults[k] - min_val) / (max_val - min_val) sumProbability = sum(predictiveResults.itervalues()) "Get the maximum probability between the predictive values" for item_key, item_value in predictiveResults.iteritems(): finalRatio[item_key] = item_value / sumProbability sorted_ratio = sorted(finalRatio.iteritems(), key=operator.itemgetter(1), reverse=True) clusterProbability[stock_index] = {} clusterProbability[stock_index][predict_date] = sorted_ratio[0] replayIO.write("\tprobability for each cluster:[%s]\n" % json.dumps(sorted_ratio)) "Construct the Surrogate data" surrogateData = {} "Merge News Derived and Stock Derived" derivedFrom = {"derivedIds": []} for item in stockDerived: derivedFrom["derivedIds"].append(item) for item in newsDerived: derivedFrom["derivedIds"].append(item) "construct surrogate data" model = 'Bayesian - Time serial Model' location = CONFIG["location"][stock_index] population = stock_index #confidence = round(sorted_ratio[0][1], 2) confidence = 0.5 confidenceIsProbability = True shiftType = "Trend" valueSpectrum = "changePercent" strength = sorted_ratio[0][0] shiftDate = predict_date surrogateData["derivedFrom"] = derivedFrom surrogateData["model"] = model surrogateData["location"] = location surrogateData["population"] = population surrogateData["confidence"] = confidence surrogateData["confidenceIsProbability"] = confidenceIsProbability surrogateData["shiftType"] = shiftType surrogateData["valueSpectrum"] = valueSpectrum surrogateData["strength"] = strength surrogateData["shiftDate"] = shiftDate surrogateData["version"] = __version__ comments = {} comments["configVersion"] = CONFIG["version"] comments["model"] = "Bayesian Model" surrogateData["comments"] = json.dumps(comments) surrogateData["description"] = "Predict the change type of the future day" surrogateData["date"] = datetime.utcnow().isoformat() "Generate Embers Id" jsonStr = json.dumps(surrogateData) embersId = hashlib.sha1(json.dumps(jsonStr)).hexdigest() surrogateData["embersId"] = embersId replayIO.write("Surrogate message: \n\t%s\n" % json.dumps(surrogateData)) "if the action is not for regenerating past warning, then store the surrogate and warning" if not regeFlag: #push surrodate data into ZMQ with queue.open(SURROGATE_PORT, 'w', capture=False) as outq: sleep(1) outq.write(surrogateData) "Insert the surrogatedata to Simple DB: " insert_surrogatedata(surrogateDomain, surrogateData) return surrogateData except Exception as e: log.exception("process_single_stock Error: %s" % e.message) return None
def enrich_single_stock( self, predictiveDate , stockIndex ): try: "Check if the predictive Day is trading day, if so continue, otherwise just return None" ifTradingDay = self.check_if_tradingday(predictiveDate,stockIndex) if ifTradingDay is False: return None predictiveResults = {} finalRatio = {} clusterProbability = {} predictiveProbability = 0 stockDerived = [] newsDerived = [] "Iteratively compute the probabilty of each cluster for the stock " for clusterType in self.enumberate_clusters( stockIndex ): "compute the contribution of 3 past day's trend " stockIndexProbability,stockDerived = self.compute_stock_index_probability( predictiveDate, clusterType , stockIndex ) "compute the contribution of 3 past day's news" newsProbability,newsDerived = self.compute_stock_news_probability( predictiveDate, clusterType , stockIndex ) "combine two contribution together" predictiveProbability = math.exp( stockIndexProbability + newsProbability ) * float( 1e90 ) predictiveResults[clusterType] = predictiveProbability sumProbability = sum( predictiveResults.itervalues() ) "Get the maximum probability between the predictive values" for item_key, item_value in predictiveResults.iteritems(): finalRatio[item_key] = item_value / sumProbability sorted_ratio = sorted( finalRatio.iteritems(), key = operator.itemgetter( 1 ), reverse = True ) clusterProbability[stockIndex] = {} clusterProbability[stockIndex][predictiveDate] = sorted_ratio[0] # return clusterProbability "Construct the Surrogate data" surrogateData = {} date = time.strftime('%Y-%m-%d',time.localtime(time.time())) "Merge News Derived and Stock Derived" derivedFrom = [] for item in stockDerived: derivedFrom.append(item) for item in newsDerived: derivedFrom.append(item) model = 'Bayesian - Time serial Model' location = common.getLocationByStockIndex(stockIndex) population = stockIndex confidence = sorted_ratio[0][1] confidenceIsProbability = True shiftType = "Trend" valueSpectrum = "changePercent" strength = sorted_ratio[0][0] shiftDate = predictiveDate surrogateData["date"] = date surrogateData["derivedFrom"] = derivedFrom surrogateData["model"] = model surrogateData["location"] = location surrogateData["population"] = population surrogateData["confidence"] = confidence surrogateData["confidenceIsProbability"] = confidenceIsProbability surrogateData["shiftType"] = shiftType surrogateData["valueSpectrum"] = valueSpectrum surrogateData["strength"] = strength surrogateData["shiftDate"] = shiftDate "Generate Embers Id" jsonStr = json.dumps(surrogateData) embersId = hashlib.sha1(json.dumps(jsonStr)).hexdigest() surrogateData["embersId"] = embersId self.insert_surrogatedata(surrogateData) #push surrodate data into ZMQ port = common.get_configuration("info", "ZMQ_PORT") with queue.open(port, 'w', capture=True) as outq: outq.write(json.dumps(surrogateData, encoding='utf8')) return surrogateData except Exception as e: log.info( "Error: %s" % e.args) log.info( traceback.format_exc())
def main(): ''' Reads the from the queue, retrieves the content from the source website and publishes the content to a new queue. ''' ap = args.get_parser() ap.add_argument( '--cat', action="store_true", help='Read input from standard in and write to standard out.') ap.add_argument('--region', metavar='REGION', type=str, default=None, help='Specify region to filter by') arg = ap.parse_args() logs.init(arg) filter_region = arg.region geoc = GeoCountry() try: if arg.cat: log.debug('Reading from stdin and writing to stdout.') ins = sys.stdin outs = codecs.getwriter('utf-8')(sys.stdout) for entry in ins: entry = entry.decode(encoding='utf-8') try: tweet = json.loads(entry.strip()) tweet = annotate(tweet, geoc, filter_region) if tweet is not None: outs.write(json.dumps(tweet, ensure_ascii=False)) outs.write('\n') outs.flush() except Exception: log.exception('Failed to process message "%s".', entry) else: queue.init(arg) iqueue.init(arg) qname = "{}-geoCountry-{}".format(os.environ["CLUSTERNAME"], filter_region) with iqueue.open(arg.sub, 'r', qname=qname) as inq: with queue.open(arg.pub, 'w') as outq: # , capture=True) as outq: for tweet in inq: try: content = annotate(tweet, geoc, filter_region) if content is not None: outq.write(content) except KeyboardInterrupt: log.info("Got SIGINT, exiting.") break except Exception: log.exception('Failed to process message "%s".', tweet) return 0 except Exception as e: log.exception("Unknown error in main function-{0!s}.".format(e)) return 1