Ejemplo n.º 1
0
def main():
	# Initialize arguments
	argparser = args.get_parser()
	argparser.add_argument('--local_port', help='Local port to connect to java server', required=True)
	arg = argparser.parse_args()
		
	localPort = int(arg.local_port)

	# Initialize log
	logs.init(arg)
	global log
	
	# Initialize the queue with arguments and connect to the specified feed
	log.info("Opening and connecting to queue %s", arg.sub)
	queue.init(arg)
	reader = queue.open(arg.sub, 'sub', ssh_key=arg.ssh_key, ssh_conn=arg.tunnel)
	
	# Initialize the writer to publish to a queue
	log.info("Publishing to queue %s", arg.pub)
	writer = queue.open(arg.pub, 'pub', ssh_key=arg.ssh_key, ssh_conn=arg.tunnel)
	

	count = 0
	# Connect to Java server
	while True:
		for feedmsg in reader:
			try:
				while True:
					try:
						sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
						sock.connect(("localhost", localPort))
						break
					except:
						log.info("Unable to connect to local server")

				log.debug("Connected to java server on port %d" % localPort)

				socketLines = sock.makefile()

				# Clean the message to fix irregularities
				feedmsg = message.clean(feedmsg)

				log.debug("Read message %d. Sending to java" % count)
				# Write message to socket stream
				sock.sendall(json.dumps(feedmsg))
				sock.sendall('\n')

				# Receive result from socket stream
				result = socketLines.readline()
				writer.write(json.dumps(result))
				count += 1

				sock.close()
			except KeyboardInterrupt:
				sys.exit(1)
			else:
				log.info("Server was disconnected.")
Ejemplo n.º 2
0
def main():
    '''
    Reads the  from the queue, retrieves the content
    from the source website and publishes the content to a new queue.
    '''
    ap = args.get_parser()
    ap.add_argument(
        '--cat',
        action="store_true",
        help='Read input from standard in and write to standard out.')
    arg = ap.parse_args()
    logs.init(arg)
    geo_mena = GeoMena()
    geo_lac = Geo(geo_region=GEO_REGION.lac)
    try:
        if arg.cat:
            log.debug('Reading from stdin and writing to stdout.')
            ins = sys.stdin
            outs = sys.stdout
            for entry in ins:
                entry = entry.decode(encoding='utf-8')
                try:
                    tweet = json.loads(entry.strip())
                    geo_annotate(tweet, geo_mena, geo_lac)
                    if tweet is not None:
                        outs.write(
                            json.dumps(tweet,
                                       ensure_ascii=False).encode("utf-8"))
                        outs.write('\n')
                        outs.flush()
                except Exception:
                    log.exception('Failed to process message "%s".', (entry, ))

        else:
            queue.init(arg)
            with queue.open(arg.sub, 'r') as inq:
                with queue.open(arg.pub, 'w', capture=True) as outq:
                    for tweet in inq:
                        try:
                            content = geo_annotate(tweet, geo_mena, geo_lac)
                            if content is not None:
                                outq.write(content)
                        except KeyboardInterrupt:
                            log.info("Got SIGINT, exiting.")
                            break
                        except Exception:
                            log.exception('Failed to process message "%s".',
                                          (tweet, ))

        return 0

    except Exception as e:
        log.exception("Unknown error in main function-{}".format(str(e)))
        return 1
Ejemplo n.º 3
0
def main():
    '''
    Reads the  from the queue, retrieves the content
    from the source website and publishes the content to a new queue.
    '''
    ap = args.get_parser()
    ap.add_argument('--cat', action="store_true",
                    help='Read input from standard in and write to standard out.')
    arg = ap.parse_args()
    logs.init(arg)
    geo_mena = GeoMena()
    geo_lac = Geo(geo_region=GEO_REGION.lac)
    try:
        if arg.cat:
            log.debug('Reading from stdin and writing to stdout.')
            ins = sys.stdin
            outs = sys.stdout
            for entry in ins:
                entry = entry.decode(encoding='utf-8')
                try:
                    tweet = json.loads(entry.strip())
                    geo_annotate(tweet, geo_mena, geo_lac)
                    if tweet is not None:
                        outs.write(json.dumps(tweet, ensure_ascii=False).encode("utf-8"))
                        outs.write('\n')
                        outs.flush()
                except Exception:
                    log.exception('Failed to process message "%s".', (entry,))

        else:
            queue.init(arg)
            with queue.open(arg.sub, 'r') as inq:
                with queue.open(arg.pub, 'w', capture=True) as outq:
                    for tweet in inq:
                        try:
                            content = geo_annotate(tweet, geo_mena, geo_lac)
                            if content is not None:
                                outq.write(content)
                        except KeyboardInterrupt:
                            log.info("Got SIGINT, exiting.")
                            break
                        except Exception:
                            log.exception('Failed to process message "%s".', (tweet,))

        return 0

    except Exception as e:
        log.exception("Unknown error in main function-{}".format(str(e)))
        return 1
Ejemplo n.º 4
0
def main():
    ap = args.get_parser()
    ap.add_argument('--test', action="store_true", help="Test Flag, if contain this argument, it means a test case")
    arg = ap.parse_args()

    assert arg.sub, 'Need a queue to subscribe to'
    assert arg.pub, 'Need a queue to publish to'

    logs.init(arg)
    queue.init(arg)
    test_flag = arg.test

    conn = boto.connect_sdb()

    with queue.open(arg.sub, 'r') as inq:
        for m in inq:
            try:
                durationProcess(conn, m, arg.pub, test_flag)
            except KeyboardInterrupt:
                log.info('GOT SIGINT, exiting!')
                break
            except EmbersException as e:
                log.exception(e.value)
            except:
                log.exception("Unexpected exception in process")
Ejemplo n.º 5
0
def main():
    # Initialize arguments
    argparser = args.get_parser()
    argparser.add_argument('--json_file',
                           help='JSON file to publish',
                           required=True)
    arg = argparser.parse_args()

    queue.init(arg)
    writer = queue.open(arg.pub,
                        'pub',
                        ssh_key=arg.ssh_key,
                        ssh_conn=arg.tunnel)

    try:
        msg_reader = codecs.open(arg.json_file, encoding='utf-8', mode='r')
        message = msg_reader.readline()
        while message:
            writer.write(json.loads(message))
            message = msg_reader.readline()

        msg_reader.close()
    except KeyboardInterrupt:
        pass

    return 0
Ejemplo n.º 6
0
def	main():
	args = parse_args()

	predict_date = args.predict_day
	conf_f = args.conf_f
	cur_list = args.currency_list
	key_id = args.key_id
	secret = args.secret
	zmq_port = args.zmq_port
	
	conn = boto.connect_sdb(key_id,secret)

	all_config = json.load(open(conf_f))
	"Get the latest version of CONFIG "	
	latest_version = max([int(k) for k in all_config.keys()])
	CONFIG = all_config[str(latest_version)]
	if cur_list is None:
		cur_list = CONFIG["currency_list"]
	
	with queue.open(zmq_port, 'w', capture=False) as outq:
		for currency in cur_list:
			prediction = predict(conn,currency,predict_date,CONFIG)
			if prediction and prediction["eventType"]!="0000":
				"push message to ZMQ"
				outq.write(prediction)
Ejemplo n.º 7
0
def test():
    queue.init()
    port = 'tcp://*:30115'
    with queue.open(port,'w',capture=True) as outq:
        msgObj = {'embersId': 'f0c030a20e28a12134d9ad0e98fd0861fae7438b', 'confidence': 0.13429584033181682, 'strength': '4', 'derivedFrom': [u'5df18f77723885a12fa6943421c819c90c6a2a02', u'be031c4dcf3eb9bba2d86870683897dfc4ec4051', u'3c6571a4d89b17ed01f1345c80cf2802a8a02b7b'], 'shiftDate': '2011-08-08', 'shiftType': 'Trend', 'location': u'Colombia', 'date': '2012-10-03', 'model': 'Finance Stock Model', 'valueSpectrum': 'changePercent', 'confidenceIsProbability': True, 'population': 'COLCAP'}
        outq.write(msgObj)
    
    print "Success"
    pathName = os.path.dirname(sys.argv[0])
    print pathName
Ejemplo n.º 8
0
def main():
    '''
    Reads the  from the queue, retrieves the content
    from the source website and publishes the content to a new queue.
    '''
    ap = args.get_parser()
    ap.add_argument('--cat', action="store_true",
                    help='Read input from standard in and write to standard out.')
    ap.add_argument('--region', metavar='REGION', type=str, default=None,
                    help='Specify region to filter by')
    arg = ap.parse_args()
    logs.init(arg)
    filter_region = arg.region
    geoc = GeoCountry()
    try:
        if arg.cat:
            log.debug('Reading from stdin and writing to stdout.')
            ins = sys.stdin
            outs = codecs.getwriter('utf-8')(sys.stdout)
            for entry in ins:
                entry = entry.decode(encoding='utf-8')
                try:
                    tweet = json.loads(entry.strip())
                    tweet = annotate(tweet, geoc, filter_region)
                    if tweet is not None:
                        outs.write(json.dumps(tweet, ensure_ascii=False))
                        outs.write('\n')
                        outs.flush()
                except Exception:
                    log.exception('Failed to process message "%s".', entry)

        else:
            queue.init(arg)
            iqueue.init(arg)
            qname = "{}-geoCountry-{}".format(os.environ["CLUSTERNAME"], filter_region)
            with iqueue.open(arg.sub, 'r', qname=qname) as inq:
                with queue.open(arg.pub, 'w') as outq:  # , capture=True) as outq:
                    for tweet in inq:
                        try:
                            content = annotate(tweet, geoc, filter_region)
                            if content is not None:
                                outq.write(content)
                        except KeyboardInterrupt:
                            log.info("Got SIGINT, exiting.")
                            break
                        except Exception:
                            log.exception('Failed to process message "%s".', tweet)

        return 0

    except Exception as e:
        log.exception("Unknown error in main function-{0!s}.".format(e))
        return 1
Ejemplo n.º 9
0
def process(t_domain, port, raw_data):
    try:
        "Check if current data already in database, if not exist then insert otherwise skip"
        ifExisted = check_if_existed(t_domain, raw_data)
        if not ifExisted:
            embers_id = raw_data["embersId"]
            ty = raw_data["type"]
            name = raw_data["name"]
            last_price = float(raw_data["currentValue"].replace(",", ""))
            pre_last_price = float(raw_data["previousCloseValue"].replace(",", ""))
            one_day_change = round(last_price - pre_last_price, 4)
            #source = raw_data["feed"]
            post_date = raw_data["date"][0:10]
            raw_data['postDate'] = post_date

            "Initiate the enriched Data"
            enrichedData = {}

            "calculate zscore 30 and zscore 90"
            zscore30 = getZscore(t_domain, post_date, name, one_day_change, 30)
            zscore90 = getZscore(t_domain, post_date, name, one_day_change, 90)

            if ty == "stock":
                trend_type = get_trend_type(raw_data)
            else:
                trend_type = "0"
            derived_from = {"derivedIds": [embers_id]}

            enrichedData["derivedFrom"] = derived_from
            enrichedData["type"] = ty
            enrichedData["name"] = name
            enrichedData["postDate"] = post_date
            enrichedData["currentValue"] = last_price
            enrichedData["previousCloseValue"] = pre_last_price
            enrichedData["oneDayChange"] = one_day_change
            enrichedData["changePercent"] = round((last_price - pre_last_price) / pre_last_price, 4)
            enrichedData["trendType"] = trend_type
            enrichedData["zscore30"] = zscore30
            enrichedData["zscore90"] = zscore90
            enrichedData["operateTime"] = datetime.utcnow().isoformat()
            enrichedDataEmID = hashlib.sha1(json.dumps(enrichedData)).hexdigest()
            enrichedData["embersId"] = enrichedDataEmID

            insert_enriched_data(t_domain, enrichedData)

            #push data to ZMQ
            with queue.open(port, 'w', capture=False) as outq:
                outq.write(enrichedData)
    except:
        log.exception("Exception captured: %s %s" % (sys.exc_info()[0], str(raw_data)))
Ejemplo n.º 10
0
def main():
    ap = args.get_parser()
    ap.add_argument('--replay', action="store_true", help="Test Flag, if contain this argument, it means a test case")
    #if the rule file is not indicated in argument, it need to be load from sys.stdin
    ap.add_argument('--rulefile', type=str, help="The rule file for duration analysis model")
    arg = ap.parse_args()

    if not arg.replay:
        assert arg.sub, 'Need a queue to subscribe to'
    assert arg.pub, 'Need a queue to publish to'

    logs.init(arg)
    queue.init(arg)
    test_flag = arg.replay
    if arg.rulefile:
        rule = eval(open(arg.rulefile).read())
    else:
        #load the rules from sys.stdin
        rule = eval(sys.stdin.read())

    conn = boto.connect_sdb()

    if not arg.replay:
        with queue.open(arg.sub, 'r') as inq:
            for m in inq:
                try:
                    replayIO = StringIO.StringIO()
                    durationProcess(rule, conn, m, arg.pub, test_flag, replayIO)
                except KeyboardInterrupt:
                    log.info('GOT SIGINT, exiting!')
                    break
                except EmbersException as e:
                    log.exception(e.value)
                except:
                    log.exception("Unexpected exception in process")
    else:
        #replay model take enriched file as input
        enrich_messages = sys.stdin.readlines()
        for m in enrich_messages:
            m = json.loads(m.strip())
            try:
                replayIO = StringIO.StringIO()
                durationProcess(rule, conn, m, arg.pub, test_flag, replayIO)
            except KeyboardInterrupt:
                log.info('GOT SIGINT, exiting!')
                break
            except EmbersException as e:
                log.exception(e.value)
            except:
                log.exception("Unexpected exception in process")
Ejemplo n.º 11
0
def process(conn,trend_file,port,raw_data):
    "Check if current data already in database, if not exist then insert otherwise skip"
    ifExisted = check_if_existed(conn,raw_data)
    if not ifExisted:
        sql = "insert into t_bloomberg_prices (embers_id,type,name,current_value,previous_close_value,update_time,query_time,post_date,source) values (?,?,?,?,?,?,?,?,?) "
        embers_id = raw_data["embersId"]
        ty = raw_data["type"]
        name = raw_data["name"]
        tmpUT =  raw_data["updateTime"].split(" ")[0]
        update_time = raw_data["updateTime"]
        last_price = float(raw_data["currentValue"])
        pre_last_price = float(raw_data["previousCloseValue"])
        one_day_change = round(last_price - pre_last_price,4)
        query_time = raw_data["queryTime"]
        source = raw_data["feed"]
        post_date = tmpUT.split("/")[2] + "-" +  tmpUT.split("/")[0] + "-" + tmpUT.split("/")[1]
        
        cur = conn.cursor()
        cur.execute(sql,(embers_id,ty,name,last_price,pre_last_price,update_time,query_time,post_date,source))
        
        "Initiate the enriched Data"
        enrichedData = {}
        
        "calculate zscore 30 and zscore 90"
        zscore30 = getZscore(conn,post_date,name,one_day_change,30)
        zscore90 = getZscore(conn,post_date,name,one_day_change,90)
        
        trend_type = get_trend_type(trend_file,raw_data)
        derived_from = "[" + embers_id + "]"
        enrichedData["derivedFrom"] = derived_from
        enrichedData["type"] = ty
        enrichedData["name"] = name
        enrichedData["postDate"] = post_date
        enrichedData["currentValue"] = last_price
        enrichedData["previousCloseValue"] = pre_last_price
        enrichedData["oneDayChange"] = one_day_change
        enrichedData["changePercent"] = round((last_price - pre_last_price)/pre_last_price,4)
        enrichedData["trendType"] = trend_type
        enrichedData["zscore30"] = zscore30
        enrichedData["zscore90"] = zscore90
        enrichedData["operateTime"] = datetime.now().isoformat()
        enrichedDataEmID = hashlib.sha1(json.dumps(enrichedData)).hexdigest()
        enrichedData["embersId"] = enrichedDataEmID
       
        insert_enriched_data(conn,enrichedData)
        
        conn.commit()
        #push data to ZMQ
        with queue.open(port, 'w', capture=False) as outq:
            outq.write(enrichedData)
Ejemplo n.º 12
0
def attach_to_queue(index_name, queue_name, type_name=None, limit=None):
    """
    Attaches to the queue_name provided and inserts the messages into Elasticsearch
    :param index_name:
    :param queue_name:
    :param limit:
    :return:
    """
    queue.init()
    log.debug('Attempting to attach to the queue %s' % queue_name)
    with queue.open(name=queue_name, mode='r') as message_queue:
        if limit:
            batch_messages(iterable_obj=message_queue, es_index_name=index_name, es_type=type_name, limit=limit)
        else:
            return push(iterable_obj=message_queue, es_index_name=index_name, es_type=type_name)
Ejemplo n.º 13
0
def main():
    ap = args.get_parser()
    ap.add_argument('--f', type=str, help='the newes file')

    arg = ap.parse_args()

    assert arg.f, 'Need a file to ingest'
    assert arg.pub, 'Need a queue to publish'

    logs.init(arg)
    queue.init(arg)

    with queue.open(arg.pub, 'w') as q_w, open(arg.f, 'r') as f_r:
        for line in f_r:
            news = json.loads(line)
            q_w.write(news)
Ejemplo n.º 14
0
def execute(date,cfgPath):
    init(cfgPath)
    enricheDa = ed.Enriched_Data(cfgPath)
    obj = enricheDa.enrich_all_stock(date)
    warningList = []
    for item in obj:
        warning = warningCheck(item)
        if warning is not None:
            warningList.append(warning) 
    
    #push warning to ZMQ
    port = common.get_configuration("info", "ZMQ_PORT")
    with queue.open(port, 'w', capture=True) as outq:
        for warning in warningList:
            outq.write(json.dumps(warning, encoding='utf8'))    
                
    return warningList   
Ejemplo n.º 15
0
def process(port,keyId,secret,operateDate):
    #get DB connection
    conn = boto.connect_sdb(keyId,secret)
    domain = conn.get_domain("bloomberg_news")
    sql = "select * from {} where updateDate = '{}'".format(operateDate)
    results = domain.select(sql)
    enrichedNewsList = []
    for result in results:
        enrichedNews = process_news(result)
        if enrichedNews:
            enrichedNewsList.append(enrichedNews)
    
    enrichedDomain = conn.get_domain("enriched_news")
    
    #Write the enricheNews to simpleDB and push them into ZMQ
    with queue.open(port, 'w', capture=True) as outq:
        for enrichedNews in enrichedNewsList:
            outq.write(enrichedNews)
            enrichedDomain.put_attributes(enrichedNews["embersId"], enrichedNews)
Ejemplo n.º 16
0
def main():
    ap = args.get_parser()
    ap.add_argument('--out', help="the output file of warnings")
    arg = ap.parse_args()

    assert arg.sub, 'Need a queue to subcribe!'
    assert arg.out, 'Need a file to store warnings!'

    logs.init(arg)
    queue.init(arg)
    out_file =  arg.out

    with queue.open(arg.sub, 'r') as q_r:
        for m in q_r:
            with open(out_file, "a") as out_w:
                if not check_ifexist(m):
                    out_w.write(json.dumps(m) + "\n")
                else:
                    print "Duplicated Warnings"
Ejemplo n.º 17
0
def main():
    ap = args.get_parser()
    ap.add_argument('--out', help="the output file of warnings")
    arg = ap.parse_args()

    assert arg.sub, 'Need a queue to subcribe!'
    assert arg.out, 'Need a file to store warnings!'

    logs.init(arg)
    queue.init(arg)
    out_file = arg.out

    with queue.open(arg.sub, 'r') as q_r:
        for m in q_r:
            with open(out_file, "a") as out_w:
                if not check_ifexist(m):
                    out_w.write(json.dumps(m) + "\n")
                else:
                    print "Duplicated Warnings"
Ejemplo n.º 18
0
def main():
    svm_twitter = SVM_Twitter(0.1, 0.1, 'rbf')
    ap = args.get_parser()
    ap.add_argument("--pca_num", default=8, type=int)
    ap.add_argument("--net", type=str)
    ap.add_argument("--k", type=int)
    ap.add_argument("--inf", type=str, help="input folder")
    ap.add_argument("--o_surr", type=str, help="output surrogate file")
    arg = ap.parse_args()
    folder = {
        "t": "content",
        "c": "comprehend",
        "u": "user2user",
        "e": "entity"
    }

    assert arg.pub, "Please input a queue to publish surrogate"
    queue.init(arg)
    send_queue = queue.open(arg.pub, "w")
    surr_w = open(arg.o_surr, "w")
    for country in COUNTRY:
        train_file = os.path.join(
            arg.inf, "%s_train_%d" % (country.replace(" ", ""), arg.k))
        test_file = os.path.join(
            arg.inf, "%s_test_%d" % (country.replace(" ", ""), arg.k))
        svm_twitter.load_data(train_file, test_file)
        svm_twitter.normalize()
        #svm_twitter.normalize()
        #svm_twitter.pca(arg.pca_num)
        svm_twitter.fit()
        svm_twitter.predict()

        for day in svm_twitter.novel_days:
            surrogate = {"country": country, "date": day.strftime("%Y-%m-%d")}
            send_queue.write(surrogate)
            surr_w.write(json.dumps(surrogate) + "\n")

        print "prediction result: %s " % country
        print[day.strftime("%Y-%m-%d") for day in svm_twitter.novel_days]
    surr_w.flush()
    surr_w.close()
    send_queue.close()
Ejemplo n.º 19
0
def attach_to_queue(index_name, queue_name, type_name=None, limit=None):
    """
    Attaches to the queue_name provided and inserts the messages into Elasticsearch
    :param index_name:
    :param queue_name:
    :param limit:
    :return:
    """
    queue.init()
    log.debug('Attempting to attach to the queue %s' % queue_name)
    with queue.open(name=queue_name, mode='r') as message_queue:
        if limit:
            batch_messages(iterable_obj=message_queue,
                           es_index_name=index_name,
                           es_type=type_name,
                           limit=limit)
        else:
            return push(iterable_obj=message_queue,
                        es_index_name=index_name,
                        es_type=type_name)
Ejemplo n.º 20
0
def main():
    ap = args.get_parser()
    default_day = datetime.strftime(datetime.now(), "%Y-%m-%d")
    ap.add_argument("--d", type=str, default=default_day, help="The day to ingest, Format: dd/mm/yyyy")
    ap.add_argument("--domain", default="bloomberg_prices", help="The simpleDB table to store raw data")
    arg = ap.parse_args()

    assert arg.pub, "Need a queue to publish"
    logs.init(arg)
    queue.init(arg)

    with queue.open(arg.pub, "w") as out_q:
        for stock in STOCK_CON:
            if stock == "COLCAP":
                scrape_f = scrape_colcap_url
            if stock == "CHILE65":
                scrape_f = scrape_chile65_url
            msg = ingest_price(arg, stock, scrape_f)
            if msg is not None:
                out_q.write(msg)
                store(arg, msg)
Ejemplo n.º 21
0
def main():
	# Initialize arguments
	argparser = args.get_parser()
	argparser.add_argument('--json_file', help='JSON file to publish', required=True)
	arg = argparser.parse_args()
	
	queue.init(arg)
	writer = queue.open(arg.pub, 'pub', ssh_key=arg.ssh_key, ssh_conn=arg.tunnel)
	
	try:
		msg_reader = codecs.open(arg.json_file, encoding='utf-8', mode='r')
		message = msg_reader.readline()
		while message:
			writer.write(json.loads(message))
			message = msg_reader.readline()
		
		msg_reader.close()
	except KeyboardInterrupt:
		pass
	
	return 0
def main():
    ap = args.get_parser()
    ap.add_argument('--dir')
    arg = ap.parse_args()

    assert arg.pub, "Enter a queue to pub"

    file_folder = arg.dir
    files = os.listdir(file_folder)
    w_queue = queue.open(arg.pub, "w", capture=True)

    for f in files:
        full_f = os.path.join(file_folder, f)
        with open(full_f) as af:
            for d_ana in af:
                temp = d_ana.strip().split("|")
                message = {"country": temp[1],
                           "date": temp[0],
                           "z_value": temp[2],
                           "diff_mag": temp[3]}
                w_queue.write(message)
    w_queue.close()
Ejemplo n.º 23
0
def main():
    ap = args.get_parser()
    ap.add_argument('-c', '--conf', metavar='CONF', type=str, nargs='?', 
                    default=os.path.join(os.path.dirname(__file__), 'bloomberg_news_ingest.conf'),
                    help='The location of the configuration file.')
    arg = ap.parse_args()
    assert arg.pub, "--pub required. Need a queue to publish on"

    logs.init(arg)
    conf = get_conf(arg.conf)
    seen_it = shelve.open("bloomberg_news_seen_it.db")
    
    try:
        with queue.open(arg.pub, 'w', capture=True) as outq:
            for (index, companies) in conf.items():
                for company in companies:
                    articles = get_stock_news(index, company, seen_it)
                    for a in articles:
                        outq.write(a)

    except KeyboardInterrupt:
        log.info('GOT SIGINT, exiting')
def main():
    ap = args.get_parser()
    ap.add_argument('--level', type=str, default="0.6",
                    help='The threhold')
    ap.add_argument('--svm', action='store_true')
    ap.add_argument('--zmq', action='store_true')
    ap.add_argument('--surr', type=str, help="surrogate file")
    ap.add_argument('--warn', type=str, help="warning file")
    arg = ap.parse_args()

    logs.init(arg)
    queue.init(arg)
    assert arg.pub, "Please input a queue to publish warning"
    if arg.zmq:
        assert arg.sub, "Please input a queue to sub surrogate message"
    conn = boto.connect_sdb()
    t_domain = get_domain(conn, "s_holiday")

    if arg.zmq:
        with queue.open(arg.sub, 'r') as inq:
            for m in inq:
                try:
                    if arg.svm:
                        svm_warning(t_domain, m, arg.pub)
                    else:
                        warning_center(t_domain, m, arg.pub, float(arg.level))
                except KeyboardInterrupt:
                    log.info('GOT SIGINIT, exiting!')
                    break
                except:
                    log.exception("Exception in Process:%s" % sys.exc_info()[0])
    else:
        with open(arg.warn, "w") as w, open(arg.surr) as r:
            if arg.svm:
                for m in r:
                    m = json.loads(m)
                    warning = svm_warning(t_domain, m, arg.pub)
                    w.write(json.dumps(warning) + "\n")
Ejemplo n.º 25
0
def main():
    svm_twitter = SVM_Twitter(0.1, 0.1, 'rbf')
    ap = args.get_parser()
    ap.add_argument("--pca_num", default=8, type=int)
    ap.add_argument("--net", type=str)
    ap.add_argument("--k", type=int)
    ap.add_argument("--inf", type=str, help="input folder")
    ap.add_argument("--o_surr", type=str, help="output surrogate file")
    arg = ap.parse_args()
    folder = {"t": "content", "c": "comprehend", "u": "user2user",
              "e": "entity"}

    assert arg.pub, "Please input a queue to publish surrogate"
    queue.init(arg)
    send_queue = queue.open(arg.pub, "w")
    surr_w = open(arg.o_surr, "w")
    for country in COUNTRY:
        train_file = os.path.join(arg.inf,
                                  "%s_train_%d" % (country.replace(" ", ""), arg.k))
        test_file = os.path.join(arg.inf,
                                 "%s_test_%d" % (country.replace(" ", ""), arg.k))
        svm_twitter.load_data(train_file, test_file)
        svm_twitter.normalize()
        #svm_twitter.normalize()
        #svm_twitter.pca(arg.pca_num)
        svm_twitter.fit()
        svm_twitter.predict()

        for day in svm_twitter.novel_days:
            surrogate = {"country": country, "date": day.strftime("%Y-%m-%d")}
            send_queue.write(surrogate)
            surr_w.write(json.dumps(surrogate)+ "\n")

        print "prediction result: %s " % country
        print [day.strftime("%Y-%m-%d") for day in svm_twitter.novel_days]
    surr_w.flush()
    surr_w.close()
    send_queue.close()
def main():
    ap = args.get_parser()
    ap.add_argument('--level', type=str, default="0.6", help='The threhold')
    ap.add_argument('--svm', action='store_true')
    ap.add_argument('--zmq', action='store_true')
    ap.add_argument('--surr', type=str, help="surrogate file")
    ap.add_argument('--warn', type=str, help="warning file")
    arg = ap.parse_args()

    logs.init(arg)
    queue.init(arg)
    assert arg.pub, "Please input a queue to publish warning"
    if arg.zmq:
        assert arg.sub, "Please input a queue to sub surrogate message"
    conn = boto.connect_sdb()
    t_domain = get_domain(conn, "s_holiday")

    if arg.zmq:
        with queue.open(arg.sub, 'r') as inq:
            for m in inq:
                try:
                    if arg.svm:
                        svm_warning(t_domain, m, arg.pub)
                    else:
                        warning_center(t_domain, m, arg.pub, float(arg.level))
                except KeyboardInterrupt:
                    log.info('GOT SIGINIT, exiting!')
                    break
                except:
                    log.exception("Exception in Process:%s" %
                                  sys.exc_info()[0])
    else:
        with open(arg.warn, "w") as w, open(arg.surr) as r:
            if arg.svm:
                for m in r:
                    m = json.loads(m)
                    warning = svm_warning(t_domain, m, arg.pub)
                    w.write(json.dumps(warning) + "\n")
Ejemplo n.º 27
0
def process(port,conn,blg_news_file):
    "Get all the news"
    newsList = []
    with open(blg_news_file,"r") as news_file:
        lines = news_file.readlines()
        for line in lines:
            line = line.replace("\r","").replace("\n","")
            news = json.loads(line)
            newsList.append(news)
            
    enrichedNewsList = []
    for news in newsList:
        if_succ = insert_news(conn, news)
        if if_succ:
            enrichedNews = process_news(news)
            if enrichedNews:
                enrichedNewsList.append(enrichedNews)
    
    #Write the enricheNews to SqliteDB and push them into ZMQ
    with queue.open(port, 'w', capture=True) as outq:
        for enrichedNews in enrichedNewsList:
            outq.write(enrichedNews)
            insert_enriched_news(conn,enrichedNews)
Ejemplo n.º 28
0
def main():
    ap = args.get_parser()
    ap.add_argument('--r_file', type=str, help="The rule file")
    ap.add_argument('--o', type=str, help="The output file")
    arg = ap.parse_args()

    assert arg.r_file, 'Need a rule file'
    assert arg.sub, 'Need a queue to subscribe'
    assert arg.o, 'Need a file to output'

    logs.init(arg)
    queue.init(arg)

    u_pattern = re.compile("http://(www\.){0,1}[^/]*/[a-z0-9/.\-]*(econ)[a-z0-9\.\-]*", flags=re.I)
    c_rule = create_label_rule(arg.r_file)
    g_rule = create_gold_lable(arg.r_file)
    c_pattern = re.compile(c_rule, flags=re.I)

    with queue.open(arg.sub, 'r') as q_r, codecs.open(arg.o, 'a') as f_a:
        for news in q_r:
            f_news = process(news, u_pattern, c_pattern, g_rule)
            if f_news is not None:
                f_a.write(json.dumps(f_news) + "\n")
                print f_news['date'], f_news['title'], "|", f_news['o_country'], "|", f_news["p_country"]
 def send(self, pub_zmq):
     with queue.open(pub_zmq, "w", capture=True) as q_w:
         q_w.write(self.warning)
         time.sleep(1)
Ejemplo n.º 30
0
def process_single_stock(conn,predict_date,stock_index,regeFlag=False):
    try:
        "Check if the predictive Day is trading day, if so continue, otherwise just return None"
        if_trading_day = check_if_tradingday(conn,predict_date,stock_index) 
        if if_trading_day is False:
            return None
        
        predictiveResults = {}
        finalRatio = {}
        clusterProbability = {}
        predictiveProbability = 0
        stockDerived = []
        newsDerived = []
        
        "Iteratively compute the probabilty of each cluster for the stock "
        cluster_pro_list = CONFIG["clusterProbability"][stock_index]
        
        term_list,newsDerived = get_term_list(conn, predict_date, stock_index)
        his_cluster_list,stockDerived = get_past_cluster_list(conn,predict_date,stock_index)
        
        for cluster_type in cluster_pro_list:
            "compute the contribution of 3 past day's trend "
            stockIndexProbability = compute_stock_index_probability(conn,predict_date, cluster_type , stock_index, his_cluster_list )
            "compute the contribution of 3 past day's news"
            newsProbability = compute_stock_news_probability(conn,predict_date, cluster_type , stock_index,term_list )
            "combine two contribution together"
            predictiveProbability = math.exp( stockIndexProbability + newsProbability )
            predictiveResults[cluster_type] = predictiveProbability
        
        sumProbability = sum( predictiveResults.itervalues() ) 
        
        "Get the maximum probability between the predictive values"
        for item_key, item_value in predictiveResults.iteritems():
            finalRatio[item_key] = item_value / sumProbability
        sorted_ratio = sorted( finalRatio.iteritems(), key = operator.itemgetter( 1 ), reverse = True )
        clusterProbability[stock_index] = {}
        clusterProbability[stock_index][predict_date] = sorted_ratio[0]
        
        "Construct the Surrogate data"
        surrogateData = {}
        "Merge News Derived and Stock Derived"
        derivedFrom = {"derivedIds":[]}
        for item in stockDerived:
            derivedFrom["derivedIds"].append(item)
        for item in newsDerived:
            derivedFrom["derivedIds"].append(item)
        "construct surrogate data"    
        model = 'Bayesian - Time serial Model'
        location = CONFIG["location"][stock_index]
        population = stock_index
        confidence = round(sorted_ratio[0][1],2)
        confidenceIsProbability = True
        shiftType = "Trend"
        valueSpectrum = "changePercent"
        strength = sorted_ratio[0][0]
        shiftDate = predict_date
        
        surrogateData["derivedFrom"] = derivedFrom
        surrogateData["model"] = model
        surrogateData["location"] = location
        surrogateData["population"] = population
        surrogateData["confidence"] = confidence
        surrogateData["confidenceIsProbability"] = confidenceIsProbability
        surrogateData["shiftType"] = shiftType
        surrogateData["valueSpectrum"] = valueSpectrum
        surrogateData["strength"] = strength
        surrogateData["shiftDate"] = shiftDate
        surrogateData["version"] = __version__
        comments = {}
        comments["configVersion"] = CONFIG["version"]
        comments["model"] = "Bayesian Model"
        surrogateData["comments"] = json.dumps(comments)
        surrogateData["description"] = "Predict the change type of the future day"
        surrogateData["date"] = datetime.utcnow().isoformat()
        
        "Generate Embers Id"
        jsonStr = json.dumps(surrogateData)
        embersId = hashlib.sha1(json.dumps(jsonStr)).hexdigest()
        surrogateData["embersId"] = embersId
        
        "if the action is not for regenerating past warning, then store the surrogate and warning"
        if not regeFlag:
            #push surrodate data into ZMQ
            with queue.open(SURROGATE_PORT, 'w', capture=False) as outq:
                outq.write(surrogateData)
            
            "Insert the surrogatedata to Simple DB: "
            insert_surrogatedata(conn, surrogateData)
        
        return surrogateData
    except Exception as e:
        log.exception( "process_single_stock Error: %s" % e.message)
        return None
Ejemplo n.º 31
0
    rs = t_domain.select(sql)
    return rs

if __name__ == "__main__":
    ap = args.get_parser()
    ap.add_argument('--s_date', type=str, help="the start date of the query")
    ap.add_argument('--e_date', type=str, help='the end date of the query')
    ap.add_argument('--f', action='store_true', help='load enriched message from file')
    ap.add_argument('--sdb', action='store_true', help='load enriched message from simpledb')
    ap.add_argument('--file', type=str, help="the file location")
    arg = ap.parse_args()

    assert arg.pub, 'Need a queue to publish'

    logs.init(arg)
    queue.init(arg)
    if arg.sdb:
        conn = boto.connect_sdb()
        t_domain = conn.get_domain('t_enriched_bloomberg_prices')
        rs = get_enriched_prices(t_domain, arg.s_date, arg.e_date)

    if arg.f:
        with open(arg.file, "r") as r:
            rs = [eval(line.strip()) for line in r.readlines()]

    with queue.open(arg.pub, 'w') as q_w, open("surrogate.txt", "w") as s_w:
        for r in rs:
            print r
            q_w.write(r)
            s_w.write(json.dumps(r) + "\n")
Ejemplo n.º 32
0
def enrich_single_stock( predict_date , stock_index ):
    try:
        "Check if the predictive Day is trading day, if so continue, otherwise just return None"
        if_trading_day = check_if_tradingday(predict_date,stock_index) 
        if if_trading_day is False:
            return None
        
        predictiveResults = {}
        finalRatio = {}
        clusterProbability = {}
        predictiveProbability = 0
        stockDerived = []
        newsDerived = []
        
        "Iteratively compute the probabilty of each cluster for the stock "
        cluster_pro_list = CONFIG["clusterProbability"][stock_index]
        for cluster_type in cluster_pro_list:
            "compute the contribution of 3 past day's trend "
            stockIndexProbability,stockDerived = compute_stock_index_probability(predict_date, cluster_type , stock_index )
            "compute the contribution of 3 past day's news"
            newsProbability,newsDerived = compute_stock_news_probability(predict_date, cluster_type , stock_index )
            "combine two contribution together"
            predictiveProbability = math.exp( stockIndexProbability + newsProbability ) * float( 1e90 )
            predictiveResults[cluster_type] = predictiveProbability
        
        sumProbability = sum( predictiveResults.itervalues() ) 
        
        "Get the maximum probability between the predictive values"
        for item_key, item_value in predictiveResults.iteritems():
            finalRatio[item_key] = item_value / sumProbability
        sorted_ratio = sorted( finalRatio.iteritems(), key = operator.itemgetter( 1 ), reverse = True )
        clusterProbability[stock_index] = {}
        clusterProbability[stock_index][predict_date] = sorted_ratio[0]
        
        "Construct the Surrogate data"
        surrogateData = {}
        date = time.strftime('%Y-%m-%d',time.localtime(time.time()))
        "Merge News Derived and Stock Derived"
        derivedFrom = []
        for item in stockDerived:
            derivedFrom.append(item)
        for item in newsDerived:
            derivedFrom.append(item)
        "construct surrogate data"    
        model = 'Bayesian - Time serial Model'
        location = CONFIG["location"][stock_index]
        population = stock_index
        confidence = sorted_ratio[0][1]
        confidenceIsProbability = True
        shiftType = "Trend"
        valueSpectrum = "changePercent"
        strength = sorted_ratio[0][0]
        shiftDate = predict_date
        
        surrogateData["date"] = date
        surrogateData["derivedFrom"] = derivedFrom
        surrogateData["model"] = model
        surrogateData["location"] = location
        surrogateData["population"] = population
        surrogateData["confidence"] = confidence
        surrogateData["confidenceIsProbability"] = confidenceIsProbability
        surrogateData["shiftType"] = shiftType
        surrogateData["valueSpectrum"] = valueSpectrum
        surrogateData["strength"] = strength
        surrogateData["shiftDate"] = shiftDate
        
        "Generate Embers Id"
        jsonStr = json.dumps(surrogateData)
        embersId = hashlib.sha1(json.dumps(jsonStr)).hexdigest()
        surrogateData["embersId"] = embersId
        
        "Insert the surrogatedata to simple DB"
        domain_name = "finance_surrogatedata"
        domain = get_domain(domain_name)
        domain.put_attributes(embersId,surrogateData)
        
        #push surrodate data into ZMQ
        with queue.open(PORT, 'w', capture=True) as outq:
            outq.write(surrogateData)
        
        return surrogateData
    except Exception as e:
        log.info( "Error: %s" % e.args)
Ejemplo n.º 33
0
 def send(self, pub_zmq):
     with queue.open(pub_zmq, "w") as q_w:
         q_w.write(self.warning)
Ejemplo n.º 34
0
def warning_check(warningDomain, surObj, regeFlag=False, replayIO=None):
#   surObj = {'embersId': 'f0c030a20e28a12134d9ad0e98fd0861fae7438b', 'confidence': 0.13429584033181682, 'strength': '4', 'derivedFrom': [u'5df18f77723885a12fa6943421c819c90c6a2a02', u'be031c4dcf3eb9bba2d86870683897dfc4ec4051', u'3c6571a4d89b17ed01f1345c80cf2802a8a02b7b'], 'shiftDate': '2011-08-08', 'shiftType': 'Trend', 'location': u'Colombia', 'date': '2012-10-03', 'model': 'Finance Stock Model', 'valueSpectrum': 'changePercent', 'confidenceIsProbability': True, 'population': 'COLCAP'}
    stock_index = surObj["population"]
    trend_type = surObj["strength"]
    date = surObj["shiftDate"]
    replayIO.write("Check whether the surrogate data trigger the warning.\n")
    try:
        pClusster = trend_type
        table_name = "t_enriched_bloomberg_prices"

        sql = "select currentValue from {} where name='{}' and postDate < '{}' order by postDate desc".format(table_name, stock_index, date)
        current_val = 0.0
        rs = warningDomain.select(sql, max_items=1)
        for r in rs:
            current_val = float(r['currentValue'])
        replayIO.write("Retrive past 30 day's price daily change.\n")
        querySql = "select oneDayChange from {} where name='{}' and postDate <'{}' order by postDate desc".format(table_name, stock_index, date)
        rs = warningDomain.select(querySql, max_items=30)
        moving30 = []
        for r in rs:
            moving30.append(float(r['oneDayChange']))

        replayIO.write("\t %s\n" % json.dumps(moving30))

        replayIO.write("Retrive past 90 day's price daily change.\n")
        querySql = "select oneDayChange from {} where name='{}' and postDate <'{}' order by postDate desc".format(table_name, stock_index, date)
        rs = warningDomain.select(querySql, max_items=90)
        moving90 = []
        for r in rs:
            moving90.append(float(r['oneDayChange']))

        replayIO.write("\t %s\n" % json.dumps(moving90))

        m30 = sum(moving30) / len(moving30)
        m90 = sum(moving90) / len(moving90)
        std30 = calculator.calSD(moving30)
        std90 = calculator.calSD(moving90)

        eventType, cButtom, cUpper = \
            dailySigmaTrends(stock_index, str(pClusster), m30, m90, std30, std90, current_val)

        dailyRecord = {}
        dailyRecord["date"] = date
        dailyRecord["cBottom"] = cButtom
        dailyRecord["cUpper"] = cUpper
        dailyRecord["currentValue"] = current_val

        "Construct the warning message"
        warningMessage = {}
        derivedFrom = {"derivedIds": [surObj["embersId"]]}
        model = surObj["model"]
        event = eventType
        confidence = surObj["confidence"]
        confidenceIsProbability = surObj["confidenceIsProbability"]
        eventDate = surObj["shiftDate"]
        population = surObj["population"]
        location = surObj["location"]
        comments = surObj["comments"]
        comObj = json.loads(comments)

        warningMessage["derivedFrom"] = derivedFrom
        warningMessage["model"] = model
        warningMessage["eventType"] = event
        warningMessage["confidence"] = confidence
        warningMessage["confidenceIsProbability"] = confidenceIsProbability
        warningMessage["eventDate"] = eventDate
        warningMessage["population"] = population
        warningMessage["location"] = location
        warningMessage["version"] = __version__
        operateTime = datetime.utcnow().isoformat()
        warningMessage["date"] = operateTime
        comObj["trendVersion"] = CONFIG["trendRange"]["version"]
        warningMessage["comments"] = json.dumps(comObj)
        warningMessage["description"] = "Use Bayesian to predict stock sigma events"

        embersId = hashlib.sha1(json.dumps(warningMessage)).hexdigest()
        warningMessage["embersId"] = embersId

        replayIO.write("Warning Message: \n\t%s\n" % json.dumps(warningMessage))

        if eventType != "0000":
            "push warningmessage to ZMQ"
            with queue.open(WARNING_PORT, 'w', capture=True) as outq:
                sleep(1)
                outq.write(warningMessage)
            replayIO.write("Publish Warningmessage to ZMQ!\n")
        if not regeFlag:
            insert_warningmessage(warningDomain, warningMessage)
        if eventType != "0000":
            return warningMessage
        else:
            return None

    except lite.Error, e:
        log.exception("Error: %s" % e.args[0])
Ejemplo n.º 35
0
def process_single_stock(surrogateDomain, predict_date, stock_index, regeFlag=False, replayIO=None):
    try:

        replayIO.write("Check predict date '%s' whether weekend or holiday\n" % predict_date)
        "Check if the predictive Day is trading day, if so continue, otherwise just return None"
        if_trading_day = check_if_tradingday(surrogateDomain, predict_date, stock_index)
        if if_trading_day is False:
            return None

        replayIO.write("\t'%s' is trading day for index '%s'\n" % (predict_date, stock_index))

        predictiveResults = {}
        finalRatio = {}
        clusterProbability = {}
        predictiveProbability = 0
        stockDerived = []
        newsDerived = []

        "Iteratively compute the probabilty of each cluster for the stock "
        cluster_pro_list = CONFIG["clusterProbability"][stock_index]
        replayIO.write("Iteratively compute the probabilty of each cluster for the stock %s\n" % stock_index)

        replayIO.write("retrieve past 3 day's news:\n ")

        term_list, newsDerived = get_term_list(surrogateDomain, predict_date, stock_index)
        replayIO.write("\tkeywords list: %s\n" % json.dumps(term_list))
        replayIO.write("\tparent news embersIDs: %s\n" % newsDerived)

        replayIO.write("retrieve past 3 day's cluster.\n")
        his_cluster_list, stockDerived = get_past_cluster_list(surrogateDomain, predict_date, stock_index)
        replayIO.write("\tpast 3 days' clusters: %s \n" %  json.dumps(his_cluster_list))

        for cluster_type in cluster_pro_list:
            "compute the contribution of 3 past day's trend "
            stockIndexProbability = compute_stock_index_probability(predict_date, cluster_type, stock_index, his_cluster_list )
            "compute the contribution of 3 past day's news"
            newsProbability = compute_stock_news_probability(predict_date, cluster_type, stock_index, term_list )
            "combine two contribution together"
            predictiveProbability = stockIndexProbability + newsProbability
            predictiveResults[cluster_type] = predictiveProbability

        replayIO.write("Compute the propability for each cluster.\n")

        #normalize the probability
        max_val = max(predictiveResults.values())
        min_val = min(predictiveResults.values())
        for k in predictiveResults:
            predictiveResults[k] = 1.0 * (predictiveResults[k] - min_val) / (max_val - min_val)

        sumProbability = sum(predictiveResults.itervalues())

        "Get the maximum probability between the predictive values"
        for item_key, item_value in predictiveResults.iteritems():
            finalRatio[item_key] = item_value / sumProbability
        sorted_ratio = sorted(finalRatio.iteritems(), key=operator.itemgetter(1), reverse=True)
        clusterProbability[stock_index] = {}
        clusterProbability[stock_index][predict_date] = sorted_ratio[0]

        replayIO.write("\tprobability for each cluster:[%s]\n" % json.dumps(sorted_ratio))

        "Construct the Surrogate data"
        surrogateData = {}
        "Merge News Derived and Stock Derived"
        derivedFrom = {"derivedIds": []}
        for item in stockDerived:
            derivedFrom["derivedIds"].append(item)
        for item in newsDerived:
            derivedFrom["derivedIds"].append(item)
        "construct surrogate data"
        model = 'Bayesian - Time serial Model'
        location = CONFIG["location"][stock_index]
        population = stock_index
        #confidence = round(sorted_ratio[0][1], 2)
        confidence = 0.5
        confidenceIsProbability = True
        shiftType = "Trend"
        valueSpectrum = "changePercent"
        strength = sorted_ratio[0][0]
        shiftDate = predict_date

        surrogateData["derivedFrom"] = derivedFrom
        surrogateData["model"] = model
        surrogateData["location"] = location
        surrogateData["population"] = population
        surrogateData["confidence"] = confidence
        surrogateData["confidenceIsProbability"] = confidenceIsProbability
        surrogateData["shiftType"] = shiftType
        surrogateData["valueSpectrum"] = valueSpectrum
        surrogateData["strength"] = strength
        surrogateData["shiftDate"] = shiftDate
        surrogateData["version"] = __version__
        comments = {}
        comments["configVersion"] = CONFIG["version"]
        comments["model"] = "Bayesian Model"
        surrogateData["comments"] = json.dumps(comments)
        surrogateData["description"] = "Predict the change type of the future day"
        surrogateData["date"] = datetime.utcnow().isoformat()

        "Generate Embers Id"
        jsonStr = json.dumps(surrogateData)
        embersId = hashlib.sha1(json.dumps(jsonStr)).hexdigest()
        surrogateData["embersId"] = embersId

        replayIO.write("Surrogate message: \n\t%s\n" % json.dumps(surrogateData))

        "if the action is not for regenerating past warning, then store the surrogate and warning"
        if not regeFlag:
            #push surrodate data into ZMQ
            with queue.open(SURROGATE_PORT, 'w', capture=False) as outq:
                sleep(1)
                outq.write(surrogateData)

            "Insert the surrogatedata to Simple DB: "
            insert_surrogatedata(surrogateDomain, surrogateData)

        return surrogateData
    except Exception as e:
        log.exception("process_single_stock Error: %s" % e.message)
        return None
 def send(self, pub_zmq):
     with queue.open(pub_zmq, "w", capture=True) as q_w:
         q_w.write(self.warning)
         time.sleep(1)
Ejemplo n.º 37
0
    def enrich_single_stock( self, predictiveDate , stockIndex ):
        try:
            "Check if the predictive Day is trading day, if so continue, otherwise just return None"
            ifTradingDay = self.check_if_tradingday(predictiveDate,stockIndex) 
            if ifTradingDay is False:
                return None
            
            predictiveResults = {}
            finalRatio = {}
            clusterProbability = {}
            predictiveProbability = 0
            stockDerived = []
            newsDerived = []
            "Iteratively compute the probabilty of each cluster for the stock "
            for clusterType in self.enumberate_clusters( stockIndex ):
                "compute the contribution of 3 past day's trend "
                stockIndexProbability,stockDerived = self.compute_stock_index_probability( predictiveDate, clusterType , stockIndex )
                "compute the contribution of 3 past day's news"
                newsProbability,newsDerived = self.compute_stock_news_probability( predictiveDate, clusterType , stockIndex )
                "combine two contribution together"
                predictiveProbability = math.exp( stockIndexProbability + newsProbability ) * float( 1e90 )
                predictiveResults[clusterType] = predictiveProbability
            
            sumProbability = sum( predictiveResults.itervalues() ) 
            
            "Get the maximum probability between the predictive values"
            for item_key, item_value in predictiveResults.iteritems():
                finalRatio[item_key] = item_value / sumProbability
            sorted_ratio = sorted( finalRatio.iteritems(), key = operator.itemgetter( 1 ), reverse = True )
            clusterProbability[stockIndex] = {}
            clusterProbability[stockIndex][predictiveDate] = sorted_ratio[0]
#            return clusterProbability
            
            "Construct the Surrogate data"
            surrogateData = {}
            date = time.strftime('%Y-%m-%d',time.localtime(time.time()))
            "Merge News Derived and Stock Derived"
            derivedFrom = []
            for item in stockDerived:
                derivedFrom.append(item)
            for item in newsDerived:
                derivedFrom.append(item)
            model = 'Bayesian - Time serial Model'
            location = common.getLocationByStockIndex(stockIndex)
            population = stockIndex
            confidence = sorted_ratio[0][1]
            confidenceIsProbability = True
            shiftType = "Trend"
            valueSpectrum = "changePercent"
            strength = sorted_ratio[0][0]
            shiftDate = predictiveDate
            
            surrogateData["date"] = date
            surrogateData["derivedFrom"] = derivedFrom
            surrogateData["model"] = model
            surrogateData["location"] = location
            surrogateData["population"] = population
            surrogateData["confidence"] = confidence
            surrogateData["confidenceIsProbability"] = confidenceIsProbability
            surrogateData["shiftType"] = shiftType
            surrogateData["valueSpectrum"] = valueSpectrum
            surrogateData["strength"] = strength
            surrogateData["shiftDate"] = shiftDate
            
            "Generate Embers Id"
            jsonStr = json.dumps(surrogateData)
            embersId = hashlib.sha1(json.dumps(jsonStr)).hexdigest()
            surrogateData["embersId"] = embersId
            
            self.insert_surrogatedata(surrogateData)
            
            #push surrodate data into ZMQ
            port = common.get_configuration("info", "ZMQ_PORT")
            with queue.open(port, 'w', capture=True) as outq:
                outq.write(json.dumps(surrogateData, encoding='utf8'))
            
            return surrogateData
        except Exception as e:
            log.info( "Error: %s" % e.args)
            log.info( traceback.format_exc())
Ejemplo n.º 38
0
def main():
    '''
    Reads the  from the queue, retrieves the content
    from the source website and publishes the content to a new queue.
    '''
    ap = args.get_parser()
    ap.add_argument(
        '--cat',
        action="store_true",
        help='Read input from standard in and write to standard out.')
    ap.add_argument('--region',
                    metavar='REGION',
                    type=str,
                    default=None,
                    help='Specify region to filter by')
    arg = ap.parse_args()
    logs.init(arg)
    filter_region = arg.region
    geoc = GeoCountry()
    try:
        if arg.cat:
            log.debug('Reading from stdin and writing to stdout.')
            ins = sys.stdin
            outs = codecs.getwriter('utf-8')(sys.stdout)
            for entry in ins:
                entry = entry.decode(encoding='utf-8')
                try:
                    tweet = json.loads(entry.strip())
                    tweet = annotate(tweet, geoc, filter_region)
                    if tweet is not None:
                        outs.write(json.dumps(tweet, ensure_ascii=False))
                        outs.write('\n')
                        outs.flush()
                except Exception:
                    log.exception('Failed to process message "%s".', entry)

        else:
            queue.init(arg)
            iqueue.init(arg)
            qname = "{}-geoCountry-{}".format(os.environ["CLUSTERNAME"],
                                              filter_region)
            with iqueue.open(arg.sub, 'r', qname=qname) as inq:
                with queue.open(arg.pub,
                                'w') as outq:  # , capture=True) as outq:
                    for tweet in inq:
                        try:
                            content = annotate(tweet, geoc, filter_region)
                            if content is not None:
                                outq.write(content)
                        except KeyboardInterrupt:
                            log.info("Got SIGINT, exiting.")
                            break
                        except Exception:
                            log.exception('Failed to process message "%s".',
                                          tweet)

        return 0

    except Exception as e:
        log.exception("Unknown error in main function-{0!s}.".format(e))
        return 1