def main(): ''' Reads the from the queue, retrieves the content from the source website and publishes the content to a new queue. ''' ap = args.get_parser() ap.add_argument( '--cat', action="store_true", help='Read input from standard in and write to standard out.') arg = ap.parse_args() logs.init(arg) geo_mena = GeoMena() geo_lac = Geo(geo_region=GEO_REGION.lac) try: if arg.cat: log.debug('Reading from stdin and writing to stdout.') ins = sys.stdin outs = sys.stdout for entry in ins: entry = entry.decode(encoding='utf-8') try: tweet = json.loads(entry.strip()) geo_annotate(tweet, geo_mena, geo_lac) if tweet is not None: outs.write( json.dumps(tweet, ensure_ascii=False).encode("utf-8")) outs.write('\n') outs.flush() except Exception: log.exception('Failed to process message "%s".', (entry, )) else: queue.init(arg) with queue.open(arg.sub, 'r') as inq: with queue.open(arg.pub, 'w', capture=True) as outq: for tweet in inq: try: content = geo_annotate(tweet, geo_mena, geo_lac) if content is not None: outq.write(content) except KeyboardInterrupt: log.info("Got SIGINT, exiting.") break except Exception: log.exception('Failed to process message "%s".', (tweet, )) return 0 except Exception as e: log.exception("Unknown error in main function-{}".format(str(e))) return 1
def parse(self, gsrObj, geo=False): if geo: self.embersgeo = Geo() if isinstance(gsrObj, file): gsr = [self._formatcheck(json.loads(l), geo) for l in gsrObj if l.strip()] elif isinstance(gsrObj, basestring): with open(gsrObj) as gfile: gsr = [self._formatcheck(json.loads(l), geo) for l in gfile if l.strip()] elif isinstance(gsrObj, list): gsr = [self._formatcheck(j, geo) for j in gsrObj] else: raise NotImplementedError gsr_df = self._dfmap(gsr) return gsr_df
__version__ = "0.0.1" import json import gzip from geoutils.dbManager import ESWrapper from geocode_twitter import TweetGeocoder from embers.geocode import Geo, decode from embers.geocode_mena import GeoMena as MENAGEO DB = ESWrapper('geonames', 'places') GEO = TweetGeocoder(DB) ptrue, pfalse = 0, 0 error = open("error_colombia.txt", "w") mGeo = MENAGEO() eGeo = Geo() eGeo = mGeo def embersgeo(doc): msg = json.loads(doc) try: lt, ln, places, texts, enr = eGeo._normalize_payload(msg) true_geo = eGeo._geo_normalize(lt, ln, None, {}, None, eGeo.priority_policy) true_geo = { "city": decode(true_geo[0]), "country": decode(true_geo[1]), "admin1": decode(true_geo[2]) }