def main(): ap = args.get_parser() ap.add_argument('--test', action="store_true", help="Test Flag, if contain this argument, it means a test case") arg = ap.parse_args() assert arg.sub, 'Need a queue to subscribe to' assert arg.pub, 'Need a queue to publish to' logs.init(arg) queue.init(arg) test_flag = arg.test conn = boto.connect_sdb() with queue.open(arg.sub, 'r') as inq: for m in inq: try: durationProcess(conn, m, arg.pub, test_flag) except KeyboardInterrupt: log.info('GOT SIGINT, exiting!') break except EmbersException as e: log.exception(e.value) except: log.exception("Unexpected exception in process")
def main(): # Initialize arguments argparser = args.get_parser() argparser.add_argument('--json_file', help='JSON file to publish', required=True) arg = argparser.parse_args() queue.init(arg) writer = queue.open(arg.pub, 'pub', ssh_key=arg.ssh_key, ssh_conn=arg.tunnel) try: msg_reader = codecs.open(arg.json_file, encoding='utf-8', mode='r') message = msg_reader.readline() while message: writer.write(json.loads(message)) message = msg_reader.readline() msg_reader.close() except KeyboardInterrupt: pass return 0
def main(): # Initialize arguments argparser = args.get_parser() argparser.add_argument('--local_port', help='Local port to connect to java server', required=True) arg = argparser.parse_args() localPort = int(arg.local_port) # Initialize log logs.init(arg) global log # Initialize the queue with arguments and connect to the specified feed log.info("Opening and connecting to queue %s", arg.sub) queue.init(arg) reader = queue.open(arg.sub, 'sub', ssh_key=arg.ssh_key, ssh_conn=arg.tunnel) # Initialize the writer to publish to a queue log.info("Publishing to queue %s", arg.pub) writer = queue.open(arg.pub, 'pub', ssh_key=arg.ssh_key, ssh_conn=arg.tunnel) count = 0 # Connect to Java server while True: for feedmsg in reader: try: while True: try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect(("localhost", localPort)) break except: log.info("Unable to connect to local server") log.debug("Connected to java server on port %d" % localPort) socketLines = sock.makefile() # Clean the message to fix irregularities feedmsg = message.clean(feedmsg) log.debug("Read message %d. Sending to java" % count) # Write message to socket stream sock.sendall(json.dumps(feedmsg)) sock.sendall('\n') # Receive result from socket stream result = socketLines.readline() writer.write(json.dumps(result)) count += 1 sock.close() except KeyboardInterrupt: sys.exit(1) else: log.info("Server was disconnected.")
def main(): #initiate parameters global TREND_RANGE "Initiate the TimeZone Setting" arg = parse_args() conn = boto.connect_sdb() operate_date = arg.operate_date start_date = arg.start_date end_date = arg.end_date port = arg.pub assert port, "Need a queue to publish to" logs.init(arg) queue.init(arg) t_domain = get_domain(conn, 't_enriched_bloomberg_prices') #trend_file = args.trend_file # "Load the trend changeType range file" trendObject = None trendObject = json.load(sys.stdin) # "Get the latest version of Trend Ranage" trend_versionNum = max([int(v) for v in trendObject.keys()]) # "To avoid changing the initiate values, we first transfer the json obj to string ,then load it to create a news object" TREND_RANGE = json.loads(json.dumps(trendObject[str(trend_versionNum)])) # "If input a date range, then we will handle all the data query from those days" if start_date is None: #get raw price list raw_price_list = [] rs = get_raw_data(conn, operate_date) for r in rs: raw_price_list.append(r) for raw_data in raw_price_list: process(t_domain, port, raw_data) else: t_format = "%Y-%m-%d" s_date = datetime.strptime(start_date, t_format) e_date = datetime.strptime(end_date, t_format) while s_date <= e_date: raw_price_list = [] rs = get_raw_data(conn, datetime.strftime(s_date, t_format)) for r in rs: raw_price_list.append(r) for raw_data in raw_price_list: process(t_domain, port, raw_data) s_date = s_date + timedelta(days=1) # "sleep 5 s to wait simpleDB to commit" time.sleep(5) #"Write back the trendFile" new_version_num = trend_versionNum + 1 trendObject[str(new_version_num)] = TREND_RANGE json.dump(trendObject, sys.stdout)
def test(): queue.init() port = 'tcp://*:30115' with queue.open(port,'w',capture=True) as outq: msgObj = {'embersId': 'f0c030a20e28a12134d9ad0e98fd0861fae7438b', 'confidence': 0.13429584033181682, 'strength': '4', 'derivedFrom': [u'5df18f77723885a12fa6943421c819c90c6a2a02', u'be031c4dcf3eb9bba2d86870683897dfc4ec4051', u'3c6571a4d89b17ed01f1345c80cf2802a8a02b7b'], 'shiftDate': '2011-08-08', 'shiftType': 'Trend', 'location': u'Colombia', 'date': '2012-10-03', 'model': 'Finance Stock Model', 'valueSpectrum': 'changePercent', 'confidenceIsProbability': True, 'population': 'COLCAP'} outq.write(msgObj) print "Success" pathName = os.path.dirname(sys.argv[0]) print pathName
def main(): ''' Reads the from the queue, retrieves the content from the source website and publishes the content to a new queue. ''' ap = args.get_parser() ap.add_argument( '--cat', action="store_true", help='Read input from standard in and write to standard out.') arg = ap.parse_args() logs.init(arg) geo_mena = GeoMena() geo_lac = Geo(geo_region=GEO_REGION.lac) try: if arg.cat: log.debug('Reading from stdin and writing to stdout.') ins = sys.stdin outs = sys.stdout for entry in ins: entry = entry.decode(encoding='utf-8') try: tweet = json.loads(entry.strip()) geo_annotate(tweet, geo_mena, geo_lac) if tweet is not None: outs.write( json.dumps(tweet, ensure_ascii=False).encode("utf-8")) outs.write('\n') outs.flush() except Exception: log.exception('Failed to process message "%s".', (entry, )) else: queue.init(arg) with queue.open(arg.sub, 'r') as inq: with queue.open(arg.pub, 'w', capture=True) as outq: for tweet in inq: try: content = geo_annotate(tweet, geo_mena, geo_lac) if content is not None: outq.write(content) except KeyboardInterrupt: log.info("Got SIGINT, exiting.") break except Exception: log.exception('Failed to process message "%s".', (tweet, )) return 0 except Exception as e: log.exception("Unknown error in main function-{}".format(str(e))) return 1
def main(): ''' Reads the from the queue, retrieves the content from the source website and publishes the content to a new queue. ''' ap = args.get_parser() ap.add_argument('--cat', action="store_true", help='Read input from standard in and write to standard out.') ap.add_argument('--region', metavar='REGION', type=str, default=None, help='Specify region to filter by') arg = ap.parse_args() logs.init(arg) filter_region = arg.region geoc = GeoCountry() try: if arg.cat: log.debug('Reading from stdin and writing to stdout.') ins = sys.stdin outs = codecs.getwriter('utf-8')(sys.stdout) for entry in ins: entry = entry.decode(encoding='utf-8') try: tweet = json.loads(entry.strip()) tweet = annotate(tweet, geoc, filter_region) if tweet is not None: outs.write(json.dumps(tweet, ensure_ascii=False)) outs.write('\n') outs.flush() except Exception: log.exception('Failed to process message "%s".', entry) else: queue.init(arg) iqueue.init(arg) qname = "{}-geoCountry-{}".format(os.environ["CLUSTERNAME"], filter_region) with iqueue.open(arg.sub, 'r', qname=qname) as inq: with queue.open(arg.pub, 'w') as outq: # , capture=True) as outq: for tweet in inq: try: content = annotate(tweet, geoc, filter_region) if content is not None: outq.write(content) except KeyboardInterrupt: log.info("Got SIGINT, exiting.") break except Exception: log.exception('Failed to process message "%s".', tweet) return 0 except Exception as e: log.exception("Unknown error in main function-{0!s}.".format(e)) return 1
def main(): ap = args.get_parser() ap.add_argument('--replay', action="store_true", help="Test Flag, if contain this argument, it means a test case") #if the rule file is not indicated in argument, it need to be load from sys.stdin ap.add_argument('--rulefile', type=str, help="The rule file for duration analysis model") arg = ap.parse_args() if not arg.replay: assert arg.sub, 'Need a queue to subscribe to' assert arg.pub, 'Need a queue to publish to' logs.init(arg) queue.init(arg) test_flag = arg.replay if arg.rulefile: rule = eval(open(arg.rulefile).read()) else: #load the rules from sys.stdin rule = eval(sys.stdin.read()) conn = boto.connect_sdb() if not arg.replay: with queue.open(arg.sub, 'r') as inq: for m in inq: try: replayIO = StringIO.StringIO() durationProcess(rule, conn, m, arg.pub, test_flag, replayIO) except KeyboardInterrupt: log.info('GOT SIGINT, exiting!') break except EmbersException as e: log.exception(e.value) except: log.exception("Unexpected exception in process") else: #replay model take enriched file as input enrich_messages = sys.stdin.readlines() for m in enrich_messages: m = json.loads(m.strip()) try: replayIO = StringIO.StringIO() durationProcess(rule, conn, m, arg.pub, test_flag, replayIO) except KeyboardInterrupt: log.info('GOT SIGINT, exiting!') break except EmbersException as e: log.exception(e.value) except: log.exception("Unexpected exception in process")
def main(): ''' Reads the from the queue, retrieves the content from the source website and publishes the content to a new queue. ''' ap = args.get_parser() ap.add_argument('--cat', action="store_true", help='Read input from standard in and write to standard out.') arg = ap.parse_args() logs.init(arg) geo_mena = GeoMena() geo_lac = Geo(geo_region=GEO_REGION.lac) try: if arg.cat: log.debug('Reading from stdin and writing to stdout.') ins = sys.stdin outs = sys.stdout for entry in ins: entry = entry.decode(encoding='utf-8') try: tweet = json.loads(entry.strip()) geo_annotate(tweet, geo_mena, geo_lac) if tweet is not None: outs.write(json.dumps(tweet, ensure_ascii=False).encode("utf-8")) outs.write('\n') outs.flush() except Exception: log.exception('Failed to process message "%s".', (entry,)) else: queue.init(arg) with queue.open(arg.sub, 'r') as inq: with queue.open(arg.pub, 'w', capture=True) as outq: for tweet in inq: try: content = geo_annotate(tweet, geo_mena, geo_lac) if content is not None: outq.write(content) except KeyboardInterrupt: log.info("Got SIGINT, exiting.") break except Exception: log.exception('Failed to process message "%s".', (tweet,)) return 0 except Exception as e: log.exception("Unknown error in main function-{}".format(str(e))) return 1
def attach_to_queue(index_name, queue_name, type_name=None, limit=None): """ Attaches to the queue_name provided and inserts the messages into Elasticsearch :param index_name: :param queue_name: :param limit: :return: """ queue.init() log.debug('Attempting to attach to the queue %s' % queue_name) with queue.open(name=queue_name, mode='r') as message_queue: if limit: batch_messages(iterable_obj=message_queue, es_index_name=index_name, es_type=type_name, limit=limit) else: return push(iterable_obj=message_queue, es_index_name=index_name, es_type=type_name)
def main(): ap = args.get_parser() ap.add_argument('--f', type=str, help='the newes file') arg = ap.parse_args() assert arg.f, 'Need a file to ingest' assert arg.pub, 'Need a queue to publish' logs.init(arg) queue.init(arg) with queue.open(arg.pub, 'w') as q_w, open(arg.f, 'r') as f_r: for line in f_r: news = json.loads(line) q_w.write(news)
def main(): ap = args.get_parser() ap.add_argument('--out', help="the output file of warnings") arg = ap.parse_args() assert arg.sub, 'Need a queue to subcribe!' assert arg.out, 'Need a file to store warnings!' logs.init(arg) queue.init(arg) out_file = arg.out with queue.open(arg.sub, 'r') as q_r: for m in q_r: with open(out_file, "a") as out_w: if not check_ifexist(m): out_w.write(json.dumps(m) + "\n") else: print "Duplicated Warnings"
def main(): svm_twitter = SVM_Twitter(0.1, 0.1, 'rbf') ap = args.get_parser() ap.add_argument("--pca_num", default=8, type=int) ap.add_argument("--net", type=str) ap.add_argument("--k", type=int) ap.add_argument("--inf", type=str, help="input folder") ap.add_argument("--o_surr", type=str, help="output surrogate file") arg = ap.parse_args() folder = { "t": "content", "c": "comprehend", "u": "user2user", "e": "entity" } assert arg.pub, "Please input a queue to publish surrogate" queue.init(arg) send_queue = queue.open(arg.pub, "w") surr_w = open(arg.o_surr, "w") for country in COUNTRY: train_file = os.path.join( arg.inf, "%s_train_%d" % (country.replace(" ", ""), arg.k)) test_file = os.path.join( arg.inf, "%s_test_%d" % (country.replace(" ", ""), arg.k)) svm_twitter.load_data(train_file, test_file) svm_twitter.normalize() #svm_twitter.normalize() #svm_twitter.pca(arg.pca_num) svm_twitter.fit() svm_twitter.predict() for day in svm_twitter.novel_days: surrogate = {"country": country, "date": day.strftime("%Y-%m-%d")} send_queue.write(surrogate) surr_w.write(json.dumps(surrogate) + "\n") print "prediction result: %s " % country print[day.strftime("%Y-%m-%d") for day in svm_twitter.novel_days] surr_w.flush() surr_w.close() send_queue.close()
def main(): ap = args.get_parser() default_day = datetime.strftime(datetime.now(), "%Y-%m-%d") ap.add_argument("--d", type=str, default=default_day, help="The day to ingest, Format: dd/mm/yyyy") ap.add_argument("--domain", default="bloomberg_prices", help="The simpleDB table to store raw data") arg = ap.parse_args() assert arg.pub, "Need a queue to publish" logs.init(arg) queue.init(arg) with queue.open(arg.pub, "w") as out_q: for stock in STOCK_CON: if stock == "COLCAP": scrape_f = scrape_colcap_url if stock == "CHILE65": scrape_f = scrape_chile65_url msg = ingest_price(arg, stock, scrape_f) if msg is not None: out_q.write(msg) store(arg, msg)
def main(): ap = args.get_parser() ap.add_argument('--level', type=str, default="0.6", help='The threhold') ap.add_argument('--svm', action='store_true') ap.add_argument('--zmq', action='store_true') ap.add_argument('--surr', type=str, help="surrogate file") ap.add_argument('--warn', type=str, help="warning file") arg = ap.parse_args() logs.init(arg) queue.init(arg) assert arg.pub, "Please input a queue to publish warning" if arg.zmq: assert arg.sub, "Please input a queue to sub surrogate message" conn = boto.connect_sdb() t_domain = get_domain(conn, "s_holiday") if arg.zmq: with queue.open(arg.sub, 'r') as inq: for m in inq: try: if arg.svm: svm_warning(t_domain, m, arg.pub) else: warning_center(t_domain, m, arg.pub, float(arg.level)) except KeyboardInterrupt: log.info('GOT SIGINIT, exiting!') break except: log.exception("Exception in Process:%s" % sys.exc_info()[0]) else: with open(arg.warn, "w") as w, open(arg.surr) as r: if arg.svm: for m in r: m = json.loads(m) warning = svm_warning(t_domain, m, arg.pub) w.write(json.dumps(warning) + "\n")
def main(): svm_twitter = SVM_Twitter(0.1, 0.1, 'rbf') ap = args.get_parser() ap.add_argument("--pca_num", default=8, type=int) ap.add_argument("--net", type=str) ap.add_argument("--k", type=int) ap.add_argument("--inf", type=str, help="input folder") ap.add_argument("--o_surr", type=str, help="output surrogate file") arg = ap.parse_args() folder = {"t": "content", "c": "comprehend", "u": "user2user", "e": "entity"} assert arg.pub, "Please input a queue to publish surrogate" queue.init(arg) send_queue = queue.open(arg.pub, "w") surr_w = open(arg.o_surr, "w") for country in COUNTRY: train_file = os.path.join(arg.inf, "%s_train_%d" % (country.replace(" ", ""), arg.k)) test_file = os.path.join(arg.inf, "%s_test_%d" % (country.replace(" ", ""), arg.k)) svm_twitter.load_data(train_file, test_file) svm_twitter.normalize() #svm_twitter.normalize() #svm_twitter.pca(arg.pca_num) svm_twitter.fit() svm_twitter.predict() for day in svm_twitter.novel_days: surrogate = {"country": country, "date": day.strftime("%Y-%m-%d")} send_queue.write(surrogate) surr_w.write(json.dumps(surrogate)+ "\n") print "prediction result: %s " % country print [day.strftime("%Y-%m-%d") for day in svm_twitter.novel_days] surr_w.flush() surr_w.close() send_queue.close()
def main(): ap = args.get_parser() ap.add_argument('--r_file', type=str, help="The rule file") ap.add_argument('--o', type=str, help="The output file") arg = ap.parse_args() assert arg.r_file, 'Need a rule file' assert arg.sub, 'Need a queue to subscribe' assert arg.o, 'Need a file to output' logs.init(arg) queue.init(arg) u_pattern = re.compile("http://(www\.){0,1}[^/]*/[a-z0-9/.\-]*(econ)[a-z0-9\.\-]*", flags=re.I) c_rule = create_label_rule(arg.r_file) g_rule = create_gold_lable(arg.r_file) c_pattern = re.compile(c_rule, flags=re.I) with queue.open(arg.sub, 'r') as q_r, codecs.open(arg.o, 'a') as f_a: for news in q_r: f_news = process(news, u_pattern, c_pattern, g_rule) if f_news is not None: f_a.write(json.dumps(f_news) + "\n") print f_news['date'], f_news['title'], "|", f_news['o_country'], "|", f_news["p_country"]
rs = t_domain.select(sql) return rs if __name__ == "__main__": ap = args.get_parser() ap.add_argument('--s_date', type=str, help="the start date of the query") ap.add_argument('--e_date', type=str, help='the end date of the query') ap.add_argument('--f', action='store_true', help='load enriched message from file') ap.add_argument('--sdb', action='store_true', help='load enriched message from simpledb') ap.add_argument('--file', type=str, help="the file location") arg = ap.parse_args() assert arg.pub, 'Need a queue to publish' logs.init(arg) queue.init(arg) if arg.sdb: conn = boto.connect_sdb() t_domain = conn.get_domain('t_enriched_bloomberg_prices') rs = get_enriched_prices(t_domain, arg.s_date, arg.e_date) if arg.f: with open(arg.file, "r") as r: rs = [eval(line.strip()) for line in r.readlines()] with queue.open(arg.pub, 'w') as q_w, open("surrogate.txt", "w") as s_w: for r in rs: print r q_w.write(r) s_w.write(json.dumps(r) + "\n")
def main(): ''' Reads the from the queue, retrieves the content from the source website and publishes the content to a new queue. ''' ap = args.get_parser() ap.add_argument( '--cat', action="store_true", help='Read input from standard in and write to standard out.') ap.add_argument('--region', metavar='REGION', type=str, default=None, help='Specify region to filter by') arg = ap.parse_args() logs.init(arg) filter_region = arg.region geoc = GeoCountry() try: if arg.cat: log.debug('Reading from stdin and writing to stdout.') ins = sys.stdin outs = codecs.getwriter('utf-8')(sys.stdout) for entry in ins: entry = entry.decode(encoding='utf-8') try: tweet = json.loads(entry.strip()) tweet = annotate(tweet, geoc, filter_region) if tweet is not None: outs.write(json.dumps(tweet, ensure_ascii=False)) outs.write('\n') outs.flush() except Exception: log.exception('Failed to process message "%s".', entry) else: queue.init(arg) iqueue.init(arg) qname = "{}-geoCountry-{}".format(os.environ["CLUSTERNAME"], filter_region) with iqueue.open(arg.sub, 'r', qname=qname) as inq: with queue.open(arg.pub, 'w') as outq: # , capture=True) as outq: for tweet in inq: try: content = annotate(tweet, geoc, filter_region) if content is not None: outq.write(content) except KeyboardInterrupt: log.info("Got SIGINT, exiting.") break except Exception: log.exception('Failed to process message "%s".', tweet) return 0 except Exception as e: log.exception("Unknown error in main function-{0!s}.".format(e)) return 1
def main(): global CONFIG,VOCABULARY_FILE,WARNING_PORT,SURROGATE_PORT,__version__,KEY_ID,SECRET,T_EASTERN,T_UTC "Initiate the TimeZone Setting" T_UTC = pytz.utc T_EASTERN = pytz.timezone("US/Eastern") "Get the input args" args = parse_args() rege_date = args.rege_date KEY_ID = args.key_id SECRET = args.secret logs.init(args) queue.init(args) #replace dbconnection to simple DB # conn = lite.connect(db_file) conn = boto.connect_sdb(KEY_ID,SECRET) "if rege_date is not none, it means to regenerate the past day's prediction" if not rege_date: "Normal predict" predict_date = args.predict_date model_cfg = args.model_cfg WARNING_PORT = args.warning_port SURROGATE_PORT = args.surrogate_port stock_list = None if args.stock_list: stock_list = args.stock_list "Get the Latest version of Config Object" f = open(model_cfg,"r") configObj = json.load(f) f.close() con_versionNum = max([int(v) for v in configObj.keys()]) CONFIG = configObj[str(con_versionNum)] "Get the Latest version of Trend Range Object" clusterTrends = json.load(sys.stdin) trend_versionNum = max([int(v) for v in clusterTrends.keys()]) CONFIG["trendRange"] = {"version":str(trend_versionNum),"range":clusterTrends[str(trend_versionNum)]} if not stock_list: stock_list = CONFIG["stocks"] "Retrain the model configuration if current day is Saturday" weekDay = datetime.strptime(predict_date,"%Y-%m-%d").weekday() if weekDay == 5: finalClusterProbability,finalClusterMatrix = re_training(conn,predict_date,stock_list) new_config = json.loads(json.dumps(CONFIG)) new_config["clusterProbability"] = finalClusterProbability new_config["clusterContribution"] = finalClusterMatrix "Write back to configure file" new_version_num = con_versionNum + 1 new_config["version"] = new_version_num configObj[str(new_version_num)] = new_config with open(model_cfg,"w") as out_q: out_q.write(json.dumps(configObj)) "Process stock each by each" for stock in stock_list: surrogate = process_single_stock(conn,predict_date,stock) if surrogate: warning = warning_check(conn,surrogate) else: "regenerate the old prediction" model_cfg = args.model_cfg stock_list = None if args.stock_list: stock_list = args.stock_list "Get the version of Config Object for the indicated prediction" versionObj = get_predicion_version(conn,rege_date) configVersionNum = versionObj["configVersion"] trendVersionNum = versionObj["trendVersion"] configObj = json.load(open(model_cfg)) if configVersionNum in configObj: CONFIG = configObj[configVersionNum] else: CONFIG = configObj["1"] "Get the Latest version of Trend Range Object" clusterTrends = json.load(sys.stdin) CONFIG["trendRange"] = {"version":str(trendVersionNum),"range":clusterTrends[trendVersionNum]} if not stock_list: stock_list = CONFIG["stocks"] "Process stock each by each" for stock in stock_list: surrogate = process_single_stock(conn,rege_date,stock,True) if surrogate: warning = warning_check(conn,surrogate,True) return warning if conn: conn.close()
def main(): global CONFIG, VOCABULARY_FILE, WARNING_PORT, SURROGATE_PORT, __version__, KEY_ID, SECRET, T_EASTERN, T_UTC "Initiate the TimeZone Setting" T_UTC = pytz.utc T_EASTERN = pytz.timezone("US/Eastern") "Get the input arg" arg = parse_args() rege_date = arg.rege_date KEY_ID = arg.aws_key SECRET = arg.aws_secret logs.init(arg) queue.init(arg) conn = boto.connect_sdb(KEY_ID, SECRET) #initiate simpleDB domains surrogateDomain = get_domain(conn, arg.surrogate_domain) warningDomain = get_domain(conn, arg.warning_domain) WARNING_PORT = arg.warning_port SURROGATE_PORT = arg.surrogate_port "if rege_date is not none, it means to regenerate the past day's prediction" if not rege_date: "Normal predict" predict_date = arg.predict_date model_cfg = arg.model_cfg stock_list = None if arg.stock_list: stock_list = arg.stock_list "Get the Latest version of Config Object" f = open(model_cfg, "r") configObj = json.load(f) f.close() con_versionNum = max([int(v) for v in configObj.keys()]) CONFIG = configObj[str(con_versionNum)] "Get the Latest version of Trend Range Object" clusterTrends = json.load(sys.stdin) trend_versionNum = max([int(v) for v in clusterTrends.keys()]) CONFIG["trendRange"] = {"version": str(trend_versionNum), "range": clusterTrends[str(trend_versionNum)]} if not stock_list: stock_list = CONFIG["stocks"] #"Retrain the model configuration if current day is Saturday" #weekDay = datetime.strptime(predict_date,"%Y-%m-%d").weekday() #if weekDay == 5: # finalClusterProbability, finalClusterMatrix = re_training(surrogateDomain, predict_date, stock_list) # new_config = json.loads(json.dumps(CONFIG)) # new_config["clusterProbability"] = finalClusterProbability # new_config["clusterContribution"] = finalClusterMatrix # "Write back to configure file" # new_version_num = con_versionNum + 1 # new_config["version"] = new_version_num # configObj[str(new_version_num)] = new_config # with open(model_cfg, "w") as out_q: # out_q.write(json.dumps(configObj)) "Process stock each by each" for stock in stock_list: surrogate = process_single_stock(surrogateDomain, predict_date,stock) if surrogate: warning = warning_check(warningDomain, surrogate) else: "regenerate the old prediction" model_cfg = arg.model_cfg stock_list = None if arg.stock_list: stock_list = arg.stock_list "Get the version of Config Object for the indicated prediction" versionObj = get_predicion_version(warningDomain, rege_date) configVersionNum = versionObj["configVersion"] trendVersionNum = versionObj["trendVersion"] configObj = json.load(open(model_cfg)) if configVersionNum in configObj: CONFIG = configObj[configVersionNum] else: CONFIG = configObj["1"] "Get the Latest version of Trend Range Object" clusterTrends = json.load(sys.stdin) #get the latest version of the warning tmpVersion = int(trendVersionNum) while tmpVersion >= 1: if str(tmpVersion) in clusterTrends: trendVersionNum = str(tmpVersion) break else: tmpVersion -= 1 CONFIG["trendRange"] = {"version": str(trendVersionNum), "range": clusterTrends[trendVersionNum]} if not stock_list: stock_list = CONFIG["stocks"] "Process stock each by each" for stock in stock_list: replayIO = StringIO.StringIO() surrogate = process_single_stock(surrogateDomain, rege_date, stock, True, replayIO) if surrogate: warning = warning_check(warningDomain, surrogate, True, replayIO) replayInfo = replayIO.getvalue() weid = getwarningeid(surrogateDomain, rege_date, stock) with open("./demo/%s.txt" % weid, "w") as win: win.write(replayInfo) if conn: conn.close()
def main(): """ Utility to cache messages from a queue into Elasticsearch -q | --queue : Read from <queue> and write the messages to Elasticsearch. Settings are read from embers.conf --log_file : Path to write the log file to --log_level : Logging level """ from etool import args global log arg_parser = args.get_parser() arg_parser.add_argument('-q', '--queue', help='Queue name to index into Elasticsearch') arg_parser.add_argument('-s', '--s3fromq', action='store_true', help='ingest from S3 prefix derived from queue name') arg_parser.add_argument('-p', '--prefix', help='Ingest from prefix') #arg_parser.add_argument('-t', '--typename', default='noqueue', help='Type for prefix ingest') arg_parser.add_argument('-t', '--typename', help='Type for prefix ingest') arg_parser.add_argument('-l', '--tmpcopy', default='/home/embers/data/tmpcopy',help='Name of local copy of S3 file (same for all S3 files)') arg_parser.add_argument('-c', '--chunk', type=int, default=100,help='Chunk size for S3 ingest') arg_parser.add_argument('-i', '--clustername', help='Clustername to determine index name') arg_parser.add_argument('-w', '--withbase', action="store_true", help="Add basename to prefix when looking for type.") arg_parser.add_argument('--startdate', help='start date in format like 2015-01-02') arg_parser.add_argument('--enddate', help='end date in format like 2015-01-02') arg = arg_parser.parse_args() #assert (arg.queue or (arg.prefix and arg.typename)), 'Either --queue (with optional --s3fromq/--typename) or --prefix with --typename must be provided' assert (arg.queue or arg.prefix ), 'Either --queue (with optional --s3fromq/--typename) or --prefix must be provided' log = logs.getLogger(log_name=arg.log_file) logs.init(arg, l=arg.log_level, logfile=arg.log_file) index_name = general.get_index_name(arg.clustername) queue.init() if arg.prefix or (arg.queue and arg.s3fromq): if arg.prefix: prefix = arg.prefix # get queue name or its substitute for S3 objects from prefix if arg.typename: type_name = arg.typename else: type_name = queue.conf.get_prefixpair(prefix=prefix,includeS3=True,withBasename=arg.withbase) if not type_name: log.error("Could not get type from prefix %s" % prefix) return 1 log.warning("type_name=%s from prefix=%s" % (type_name, prefix)) else: type_name = arg.queue prefix, include = queue.conf.get_prefix_for_queue(type_name, withBasename=False) if not prefix: log.error("Could not get S3 prefix for queue %s" % type_name) return 1 if not general.get_es_connection().indices.exists_type(index=index_name, doc_type=type_name): # Create mapping if the queue has not been stored in Elasticsearch yet index_setup.add_type(index_name=index_name, type_name=type_name) conn_s3 = boto.connect_s3(aws_access_key_id=arg.aws_key, aws_secret_access_key=arg.aws_secret) bucket = conn_s3.get_bucket(arg.bucket) # connect to S3, get bucket ptr for arg.bucket attach_to_s3(index_name, s3prefix=prefix, bucket=bucket, type_name=type_name, tmpcopy=arg.tmpcopy, chunk_size=arg.chunk, startdate=arg.startdate, enddate=arg.enddate) else: if arg.typename: type_name=arg.typename else: type_name=arg.queue if not general.get_es_connection().indices.exists_type(index=index_name, doc_type=type_name): # Create mapping if the queue has not been stored in Elasticsearch yet index_setup.add_type(index_name=index_name, type_name=type_name) attach_to_queue(index_name=index_name, queue_name=arg.queue, type_name=type_name)
def main(): """ Utility to cache messages from a queue into Elasticsearch -q | --queue : Read from <queue> and write the messages to Elasticsearch. Settings are read from embers.conf --log_file : Path to write the log file to --log_level : Logging level """ from etool import args global log arg_parser = args.get_parser() arg_parser.add_argument('-q', '--queue', help='Queue name to index into Elasticsearch') arg_parser.add_argument( '-s', '--s3fromq', action='store_true', help='ingest from S3 prefix derived from queue name') arg_parser.add_argument('-p', '--prefix', help='Ingest from prefix') #arg_parser.add_argument('-t', '--typename', default='noqueue', help='Type for prefix ingest') arg_parser.add_argument('-t', '--typename', help='Type for prefix ingest') arg_parser.add_argument( '-l', '--tmpcopy', default='/home/embers/data/tmpcopy', help='Name of local copy of S3 file (same for all S3 files)') arg_parser.add_argument('-c', '--chunk', type=int, default=100, help='Chunk size for S3 ingest') arg_parser.add_argument('-i', '--clustername', help='Clustername to determine index name') arg_parser.add_argument( '-w', '--withbase', action="store_true", help="Add basename to prefix when looking for type.") arg_parser.add_argument('--startdate', help='start date in format like 2015-01-02') arg_parser.add_argument('--enddate', help='end date in format like 2015-01-02') arg = arg_parser.parse_args() #assert (arg.queue or (arg.prefix and arg.typename)), 'Either --queue (with optional --s3fromq/--typename) or --prefix with --typename must be provided' assert ( arg.queue or arg.prefix ), 'Either --queue (with optional --s3fromq/--typename) or --prefix must be provided' log = logs.getLogger(log_name=arg.log_file) logs.init(arg, l=arg.log_level, logfile=arg.log_file) index_name = general.get_index_name(arg.clustername) queue.init() if arg.prefix or (arg.queue and arg.s3fromq): if arg.prefix: prefix = arg.prefix # get queue name or its substitute for S3 objects from prefix if arg.typename: type_name = arg.typename else: type_name = queue.conf.get_prefixpair( prefix=prefix, includeS3=True, withBasename=arg.withbase) if not type_name: log.error("Could not get type from prefix %s" % prefix) return 1 log.warning("type_name=%s from prefix=%s" % (type_name, prefix)) else: type_name = arg.queue prefix, include = queue.conf.get_prefix_for_queue( type_name, withBasename=False) if not prefix: log.error("Could not get S3 prefix for queue %s" % type_name) return 1 if not general.get_es_connection().indices.exists_type( index=index_name, doc_type=type_name): # Create mapping if the queue has not been stored in Elasticsearch yet index_setup.add_type(index_name=index_name, type_name=type_name) conn_s3 = boto.connect_s3(aws_access_key_id=arg.aws_key, aws_secret_access_key=arg.aws_secret) bucket = conn_s3.get_bucket( arg.bucket) # connect to S3, get bucket ptr for arg.bucket attach_to_s3(index_name, s3prefix=prefix, bucket=bucket, type_name=type_name, tmpcopy=arg.tmpcopy, chunk_size=arg.chunk, startdate=arg.startdate, enddate=arg.enddate) else: if arg.typename: type_name = arg.typename else: type_name = arg.queue if not general.get_es_connection().indices.exists_type( index=index_name, doc_type=type_name): # Create mapping if the queue has not been stored in Elasticsearch yet index_setup.add_type(index_name=index_name, type_name=type_name) attach_to_queue(index_name=index_name, queue_name=arg.queue, type_name=type_name)