Esempio n. 1
0
def get_econtext_api(access_key,
                     access_secret,
                     baseurl="https://api.econtext.com/v2",
                     *args,
                     **kwargs):
    log.info("connecting to econtext API")
    return Client(access_key, access_secret, baseurl=baseurl)
Esempio n. 2
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("-u",
                        dest="username",
                        required=True,
                        action="store",
                        default=None,
                        help="API username")
    parser.add_argument("-p",
                        dest="password",
                        required=True,
                        action="store",
                        default=None,
                        help="API password")
    parser.add_argument("-c",
                        dest="category",
                        required=True,
                        action="store",
                        default=None,
                        help="ID of the category to retrieve")
    parser.add_argument("-v",
                        dest="verbose",
                        action="store_true",
                        default=False,
                        help="Be verbose")
    options = parser.parse_args()

    def get_log(log_level=logging.DEBUG):
        h = logging.StreamHandler()
        h.setFormatter(
            logging.Formatter(
                "%(process)s - %(asctime)s - %(levelname)s :: %(message)s",
                "%Y-%m-%d %H:%M:%S"))
        log.addHandler(h)
        h.setLevel(log_level)
        log.setLevel(log_level)

    if options.verbose:
        get_log(logging.DEBUG)

    client = Client(options.username, options.password)
    usage = Category(client, options.category)
    usage.get_category()
    usage.print_summary()
    return True
def main():
    parser = argparse.ArgumentParser(description=usage)
    parser.add_argument("-i",
                        "--in",
                        dest="infile",
                        default=None,
                        help="Input file",
                        metavar="PATH")
    parser.add_argument("-o",
                        "--out",
                        dest="outfile",
                        default="stdout",
                        help="Output file",
                        metavar="PATH")
    parser.add_argument("-u",
                        dest="username",
                        required=True,
                        action="store",
                        default=None,
                        help="API username")
    parser.add_argument("-p",
                        dest="password",
                        required=True,
                        action="store",
                        default=None,
                        help="API password")
    parser.add_argument("-w",
                        dest="workers",
                        action="store",
                        default=1,
                        help="How many worker processes to use")
    parser.add_argument("-c",
                        dest="chunk_size",
                        action="store",
                        type=int,
                        default=500,
                        help="Number of POSTs per call")
    parser.add_argument("-m",
                        "--meta",
                        dest="meta",
                        default=None,
                        help="Meta data to be included with each call",
                        metavar="JSON")
    parser.add_argument("-v",
                        dest="config_verbose",
                        action="count",
                        default=0,
                        help="Be more or less verbose")
    parser.add_argument("-b",
                        "--base-url",
                        dest="base_url",
                        default="https://api.econtext.com/v2",
                        help="Use a different base-url",
                        metavar="URL")
    options = parser.parse_args()
    get_log(options.config_verbose)

    start = time.time()
    log.info("Running classification using {} worker processes".format(
        options.workers))

    infile = ropen(options.infile,
                   batch_size=options.chunk_size)  # resumable input file
    if options.outfile == 'stdout':
        outfile = sys.stdout
    else:
        outfile = sopen(options.outfile, 'w')  # threadsafe output file

    stream_meta = None
    if options.meta:
        meta_file = open(options.meta)
        stream_meta = json.load(meta_file)
        log.debug("stream_meta: %s", json.dumps(stream_meta))

    client = Client(options.username,
                    options.password,
                    baseurl=options.base_url)

    def worker():
        while True:
            try:
                item = infile.readlines_batch()
                if not item:
                    break
                start_time = time.time()
                s = Social(client, item)
                s.data['stream_meta'] = stream_meta
                s.data['source_language'] = 'auto'
                response = f(s)
                outfile.write("{}\n".format(response))
                log.debug("classify/social with %s posts took %0.3f seconds",
                          len(s.classify_data), (time.time() - start_time))
            except:
                break

    threads = []
    for i in range(int(options.workers)):
        t = threading.Thread(target=worker)
        t.start()
        threads.append(t)

    try:
        for t in threads:
            t.join()
    except KeyboardInterrupt:
        pass
    finally:
        infile.close()
        outfile.close()

    elapsed = time.time() - start
    log.info("Total time: {}".format(elapsed))
    return True
Esempio n. 4
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--category",
                        dest="category",
                        required=True,
                        action="store",
                        default=None,
                        help="Search category id")
    parser.add_argument("--branch",
                        dest="branch",
                        required=False,
                        action="store",
                        default=True,
                        help="Include the whole branch in the search")
    parser.add_argument("-u",
                        dest="username",
                        required=True,
                        action="store",
                        default=None,
                        help="API username")
    parser.add_argument("-p",
                        dest="password",
                        required=True,
                        action="store",
                        default=None,
                        help="API password")
    parser.add_argument("-f",
                        "--filter",
                        dest="filter",
                        default=None,
                        help="Add a filter",
                        metavar="JSON")
    parser.add_argument("-v",
                        dest="verbose",
                        action="store_true",
                        default=False,
                        help="Be verbose")
    options = parser.parse_args()

    def get_log(log_level=logging.DEBUG):
        h = logging.StreamHandler()
        h.setFormatter(
            logging.Formatter(
                "%(process)s - %(asctime)s - %(levelname)s :: %(message)s",
                "%Y-%m-%d %H:%M:%S"))
        log.addHandler(h)
        h.setLevel(log_level)
        log.setLevel(log_level)

    if options.verbose:
        get_log(logging.DEBUG)

    filters = []
    if options.filter:
        filters.append(json.loads(options.filter))

    client = Client(options.username, options.password)
    search = CategorySearch(client, limit=100)
    search.set_categories({options.category: bool(options.branch)})
    page = search.retrieve_page()
    print("Total Results: {:>10}".format(search.count))
    print("        Pages: {:>10}".format(search.pages))
    print("    Page size: {:>10}".format(search.pagesize))

    return True
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--phrase",
                        dest="phrase",
                        required=True,
                        action="store",
                        default=None,
                        help="Search phrase")
    parser.add_argument("-u",
                        dest="username",
                        required=True,
                        action="store",
                        default=None,
                        help="API username")
    parser.add_argument("-p",
                        dest="password",
                        required=True,
                        action="store",
                        default=None,
                        help="API password")
    parser.add_argument("-f",
                        "--filter",
                        dest="filter",
                        default=None,
                        help="Add a filter",
                        metavar="JSON")
    parser.add_argument("-v",
                        dest="verbose",
                        action="store_true",
                        default=False,
                        help="Be verbose")
    options = parser.parse_args()

    def get_log(log_level=logging.DEBUG):
        h = logging.StreamHandler()
        h.setFormatter(
            logging.Formatter(
                "%(process)s - %(asctime)s - %(levelname)s :: %(message)s",
                "%Y-%m-%d %H:%M:%S"))
        log.addHandler(h)
        h.setLevel(log_level)
        log.setLevel(log_level)

    if options.verbose:
        get_log(logging.DEBUG)

    filters = []
    if options.filter:
        filters.append(json.loads(options.filter))

    client = Client(options.username, options.password)
    search = KeywordSearch(client, pagesize=27, limit=100)
    search.set_term(options.phrase)
    page = search.retrieve_page()
    print("Total Results: {:>10}".format(search.count))
    print("        Pages: {:>10}".format(search.pages))
    print("    Page size: {:>10}".format(search.pagesize))

    result_pages = list(search.yield_pages())
    for page in result_pages:
        keywords = page.get_keywords()
        print("Page {}: {} keywords".format(page.page, len(keywords)))

    return True
Esempio n. 6
0
def main():
    parser = argparse.ArgumentParser(description=usage)
    parser.add_argument("-t",
                        "--type",
                        dest="type",
                        action="store",
                        default="html",
                        help="What type of classification [html|text|social]")
    parser.add_argument("-i",
                        "--in",
                        dest="infile",
                        action="store",
                        default=None,
                        help="Input file",
                        metavar="PATH")
    parser.add_argument("-u",
                        dest="username",
                        required=True,
                        action="store",
                        default=None,
                        help="API username")
    parser.add_argument("-p",
                        dest="password",
                        required=True,
                        action="store",
                        default=None,
                        help="API password")
    parser.add_argument("-v",
                        dest="config_verbose",
                        action="count",
                        default=0,
                        help="Be more or less verbose")
    options = parser.parse_args()
    get_log(options.config_verbose)

    if options.infile is None:
        infile = sys.stdin
    else:
        infile = open(options.infile, 'r')

    client = Client(options.username, options.password)

    if options.type == "text":
        classify = Text(client, infile.read())
    #URL -- Classify a single URL
    elif options.type == "url":
        classify = Url(client, infile.read().strip())
    #KEYWORD -- Classify up to 1000 keywords (1 per line)
    elif options.type == "keywords":
        classify = Keywords(client,
                            [kwd.strip() for kwd in infile.readlines()][:1000])
    #HTML -- Classify a single HTML document
    elif options.type == "html":
        classify = Html(client, infile.read())
    #SOCIAL -- Classify up to 1000 social posts (1 per line)
    elif options.type == "social":
        classify = Social(client,
                          [social.strip()
                           for social in infile.readlines()][:1000])
    else:
        raise NotImplementedError(
            "{} classification not yet implemented".format(options.type))

    response = classify.classify()
    classify.print_summary()

    return True
def main():
    parser = argparse.ArgumentParser(description=usage)
    parser.add_argument("-i",
                        "--in",
                        dest="infile",
                        default="stdin",
                        help="Input file",
                        metavar="PATH")
    parser.add_argument("-o",
                        "--out",
                        dest="outfile",
                        default="stdout",
                        help="Output file",
                        metavar="PATH")
    parser.add_argument("-u",
                        dest="username",
                        required=True,
                        action="store",
                        default=None,
                        help="API username")
    parser.add_argument("-p",
                        dest="password",
                        required=True,
                        action="store",
                        default=None,
                        help="API password")
    parser.add_argument("-w",
                        dest="workers",
                        action="store",
                        default=1,
                        help="How many worker processes to use")
    parser.add_argument("-c",
                        dest="nocache",
                        default=False,
                        action="store_true",
                        help="Ignore cached URLs")
    parser.add_argument("-m",
                        "--meta",
                        dest="meta",
                        default=None,
                        help="Meta data to be included with each call",
                        metavar="JSON")
    parser.add_argument("-v",
                        dest="config_verbose",
                        action="count",
                        default=0,
                        help="Be more or less verbose")
    options = parser.parse_args()
    get_log(options.config_verbose)

    start = time.time()
    log.info("Running classification using {} worker processes".format(
        options.workers))

    if options.infile == 'stdin':
        infile = sys.stdin
    else:
        infile = ropen(options.infile)
    if options.outfile == 'stdout':
        outfile = sys.stdout
    else:
        outfile = open(options.outfile, 'w')

    stream_meta = None
    if options.meta:
        meta_file = open(options.meta)
        stream_meta = json.load(meta_file)
        log.debug("stream_meta: %s", json.dumps(stream_meta))

    q = queue.Queue(int(options.workers))
    r = queue.Queue()  # result queue
    client = Client(options.username, options.password)

    def worker():
        while True:
            item = q.get()
            if item is None:
                break
            response = f(item)
            r.put(response)
            q.task_done()

    def output_worker():
        while True:
            item = r.get()
            if item is None:
                break
            outfile.write(item)
            outfile.write("\n")
            r.task_done()

    threads = []
    for i in range(int(options.workers)):
        t = threading.Thread(target=worker)
        t.start()
        threads.append(t)
    printer = threading.Thread(target=output_worker)
    printer.start()

    while True:
        try:
            x = next(infile).rstrip()
        except:
            break

        u = Url(client, x)
        u.data['stream_meta'] = stream_meta
        u.data['source_language'] = 'auto'

        if options.nocache:
            u.data['cache_skip'] = True

        q.put(u)

    q.join()
    for i in range(int(options.workers)):
        q.put(None)

    r.join()
    r.put(None)
    for t in threads:
        t.join()
    printer.join()

    elapsed = time.time() - start
    log.info("Total time: {}".format(elapsed))
    return True
Esempio n. 8
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("-u",
                        dest="username",
                        required=True,
                        action="store",
                        default=None,
                        help="API username")
    parser.add_argument("-p",
                        dest="password",
                        required=True,
                        action="store",
                        default=None,
                        help="API password")
    parser.add_argument("-s",
                        dest="start",
                        required=False,
                        action="store",
                        default=None,
                        help="Start Date (yyyy-mm-dd)")
    parser.add_argument("-e",
                        dest="end",
                        required=False,
                        action="store",
                        default=None,
                        help="End Date (yyyy-mm-dd)")
    parser.add_argument("-v",
                        dest="verbose",
                        action="store_true",
                        default=False,
                        help="Be verbose")
    options = parser.parse_args()

    def get_log(log_level=logging.DEBUG):
        h = logging.StreamHandler()
        h.setFormatter(
            logging.Formatter(
                "%(process)s - %(asctime)s - %(levelname)s :: %(message)s",
                "%Y-%m-%d %H:%M:%S"))
        log.addHandler(h)
        h.setLevel(log_level)
        log.setLevel(log_level)

    if options.verbose:
        get_log(logging.DEBUG)

    start_date = None
    if options.start is not None:
        start_date = datetime.strptime(options.start, '%Y-%m-%d')
        log.debug("Using start_date of {}".format(start_date.date()))

    end_date = None
    if options.end is not None:
        end_date = datetime.strptime(options.end, '%Y-%m-%d')
        log.debug("Using end_date of {}".format(end_date.date()))

    client = Client(options.username, options.password)
    usage = Usage(client).set_start_date(start_date).set_end_date(end_date)
    usage.get_usage()
    usage.print_summary()

    return True