コード例 #1
0
def main():
    '''
    Reads the  from the queue, retrieves the content
    from the source website and publishes the content to a new queue.
    '''
    ap = args.get_parser()
    ap.add_argument(
        '--cat',
        action="store_true",
        help='Read input from standard in and write to standard out.')
    arg = ap.parse_args()
    logs.init(arg)
    geo_mena = GeoMena()
    geo_lac = Geo(geo_region=GEO_REGION.lac)
    try:
        if arg.cat:
            log.debug('Reading from stdin and writing to stdout.')
            ins = sys.stdin
            outs = sys.stdout
            for entry in ins:
                entry = entry.decode(encoding='utf-8')
                try:
                    tweet = json.loads(entry.strip())
                    geo_annotate(tweet, geo_mena, geo_lac)
                    if tweet is not None:
                        outs.write(
                            json.dumps(tweet,
                                       ensure_ascii=False).encode("utf-8"))
                        outs.write('\n')
                        outs.flush()
                except Exception:
                    log.exception('Failed to process message "%s".', (entry, ))

        else:
            queue.init(arg)
            with queue.open(arg.sub, 'r') as inq:
                with queue.open(arg.pub, 'w', capture=True) as outq:
                    for tweet in inq:
                        try:
                            content = geo_annotate(tweet, geo_mena, geo_lac)
                            if content is not None:
                                outq.write(content)
                        except KeyboardInterrupt:
                            log.info("Got SIGINT, exiting.")
                            break
                        except Exception:
                            log.exception('Failed to process message "%s".',
                                          (tweet, ))

        return 0

    except Exception as e:
        log.exception("Unknown error in main function-{}".format(str(e)))
        return 1
コード例 #2
0
    def parse(self, gsrObj, geo=False):
        if geo:
            self.embersgeo = Geo()

        if isinstance(gsrObj, file):
            gsr = [self._formatcheck(json.loads(l), geo) for l in gsrObj if l.strip()]

        elif isinstance(gsrObj, basestring):
            with open(gsrObj) as gfile:
                gsr = [self._formatcheck(json.loads(l), geo) for l in gfile if l.strip()]

        elif isinstance(gsrObj, list):
            gsr = [self._formatcheck(j, geo) for j in gsrObj]

        else:
            raise NotImplementedError

        gsr_df = self._dfmap(gsr)
        return gsr_df
コード例 #3
0
ファイル: evaluate.py プロジェクト: sathappanspm/geocoding
__version__ = "0.0.1"

import json
import gzip
from geoutils.dbManager import ESWrapper
from geocode_twitter import TweetGeocoder
from embers.geocode import Geo, decode
from embers.geocode_mena import GeoMena as MENAGEO

DB = ESWrapper('geonames', 'places')
GEO = TweetGeocoder(DB)
ptrue, pfalse = 0, 0

error = open("error_colombia.txt", "w")
mGeo = MENAGEO()
eGeo = Geo()

eGeo = mGeo


def embersgeo(doc):
    msg = json.loads(doc)
    try:
        lt, ln, places, texts, enr = eGeo._normalize_payload(msg)
        true_geo = eGeo._geo_normalize(lt, ln, None, {}, None,
                                       eGeo.priority_policy)
        true_geo = {
            "city": decode(true_geo[0]),
            "country": decode(true_geo[1]),
            "admin1": decode(true_geo[2])
        }