def get_econtext_api(access_key, access_secret, baseurl="https://api.econtext.com/v2", *args, **kwargs): log.info("connecting to econtext API") return Client(access_key, access_secret, baseurl=baseurl)
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("-u", dest="username", required=True, action="store", default=None, help="API username") parser.add_argument("-p", dest="password", required=True, action="store", default=None, help="API password") parser.add_argument("-c", dest="category", required=True, action="store", default=None, help="ID of the category to retrieve") parser.add_argument("-v", dest="verbose", action="store_true", default=False, help="Be verbose") options = parser.parse_args() def get_log(log_level=logging.DEBUG): h = logging.StreamHandler() h.setFormatter( logging.Formatter( "%(process)s - %(asctime)s - %(levelname)s :: %(message)s", "%Y-%m-%d %H:%M:%S")) log.addHandler(h) h.setLevel(log_level) log.setLevel(log_level) if options.verbose: get_log(logging.DEBUG) client = Client(options.username, options.password) usage = Category(client, options.category) usage.get_category() usage.print_summary() return True
def main(): parser = argparse.ArgumentParser(description=usage) parser.add_argument("-i", "--in", dest="infile", default=None, help="Input file", metavar="PATH") parser.add_argument("-o", "--out", dest="outfile", default="stdout", help="Output file", metavar="PATH") parser.add_argument("-u", dest="username", required=True, action="store", default=None, help="API username") parser.add_argument("-p", dest="password", required=True, action="store", default=None, help="API password") parser.add_argument("-w", dest="workers", action="store", default=1, help="How many worker processes to use") parser.add_argument("-c", dest="chunk_size", action="store", type=int, default=500, help="Number of POSTs per call") parser.add_argument("-m", "--meta", dest="meta", default=None, help="Meta data to be included with each call", metavar="JSON") parser.add_argument("-v", dest="config_verbose", action="count", default=0, help="Be more or less verbose") parser.add_argument("-b", "--base-url", dest="base_url", default="https://api.econtext.com/v2", help="Use a different base-url", metavar="URL") options = parser.parse_args() get_log(options.config_verbose) start = time.time() log.info("Running classification using {} worker processes".format( options.workers)) infile = ropen(options.infile, batch_size=options.chunk_size) # resumable input file if options.outfile == 'stdout': outfile = sys.stdout else: outfile = sopen(options.outfile, 'w') # threadsafe output file stream_meta = None if options.meta: meta_file = open(options.meta) stream_meta = json.load(meta_file) log.debug("stream_meta: %s", json.dumps(stream_meta)) client = Client(options.username, options.password, baseurl=options.base_url) def worker(): while True: try: item = infile.readlines_batch() if not item: break start_time = time.time() s = Social(client, item) s.data['stream_meta'] = stream_meta s.data['source_language'] = 'auto' response = f(s) outfile.write("{}\n".format(response)) log.debug("classify/social with %s posts took %0.3f seconds", len(s.classify_data), (time.time() - start_time)) except: break threads = [] for i in range(int(options.workers)): t = threading.Thread(target=worker) t.start() threads.append(t) try: for t in threads: t.join() except KeyboardInterrupt: pass finally: infile.close() outfile.close() elapsed = time.time() - start log.info("Total time: {}".format(elapsed)) return True
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("--category", dest="category", required=True, action="store", default=None, help="Search category id") parser.add_argument("--branch", dest="branch", required=False, action="store", default=True, help="Include the whole branch in the search") parser.add_argument("-u", dest="username", required=True, action="store", default=None, help="API username") parser.add_argument("-p", dest="password", required=True, action="store", default=None, help="API password") parser.add_argument("-f", "--filter", dest="filter", default=None, help="Add a filter", metavar="JSON") parser.add_argument("-v", dest="verbose", action="store_true", default=False, help="Be verbose") options = parser.parse_args() def get_log(log_level=logging.DEBUG): h = logging.StreamHandler() h.setFormatter( logging.Formatter( "%(process)s - %(asctime)s - %(levelname)s :: %(message)s", "%Y-%m-%d %H:%M:%S")) log.addHandler(h) h.setLevel(log_level) log.setLevel(log_level) if options.verbose: get_log(logging.DEBUG) filters = [] if options.filter: filters.append(json.loads(options.filter)) client = Client(options.username, options.password) search = CategorySearch(client, limit=100) search.set_categories({options.category: bool(options.branch)}) page = search.retrieve_page() print("Total Results: {:>10}".format(search.count)) print(" Pages: {:>10}".format(search.pages)) print(" Page size: {:>10}".format(search.pagesize)) return True
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("--phrase", dest="phrase", required=True, action="store", default=None, help="Search phrase") parser.add_argument("-u", dest="username", required=True, action="store", default=None, help="API username") parser.add_argument("-p", dest="password", required=True, action="store", default=None, help="API password") parser.add_argument("-f", "--filter", dest="filter", default=None, help="Add a filter", metavar="JSON") parser.add_argument("-v", dest="verbose", action="store_true", default=False, help="Be verbose") options = parser.parse_args() def get_log(log_level=logging.DEBUG): h = logging.StreamHandler() h.setFormatter( logging.Formatter( "%(process)s - %(asctime)s - %(levelname)s :: %(message)s", "%Y-%m-%d %H:%M:%S")) log.addHandler(h) h.setLevel(log_level) log.setLevel(log_level) if options.verbose: get_log(logging.DEBUG) filters = [] if options.filter: filters.append(json.loads(options.filter)) client = Client(options.username, options.password) search = KeywordSearch(client, pagesize=27, limit=100) search.set_term(options.phrase) page = search.retrieve_page() print("Total Results: {:>10}".format(search.count)) print(" Pages: {:>10}".format(search.pages)) print(" Page size: {:>10}".format(search.pagesize)) result_pages = list(search.yield_pages()) for page in result_pages: keywords = page.get_keywords() print("Page {}: {} keywords".format(page.page, len(keywords))) return True
def main(): parser = argparse.ArgumentParser(description=usage) parser.add_argument("-t", "--type", dest="type", action="store", default="html", help="What type of classification [html|text|social]") parser.add_argument("-i", "--in", dest="infile", action="store", default=None, help="Input file", metavar="PATH") parser.add_argument("-u", dest="username", required=True, action="store", default=None, help="API username") parser.add_argument("-p", dest="password", required=True, action="store", default=None, help="API password") parser.add_argument("-v", dest="config_verbose", action="count", default=0, help="Be more or less verbose") options = parser.parse_args() get_log(options.config_verbose) if options.infile is None: infile = sys.stdin else: infile = open(options.infile, 'r') client = Client(options.username, options.password) if options.type == "text": classify = Text(client, infile.read()) #URL -- Classify a single URL elif options.type == "url": classify = Url(client, infile.read().strip()) #KEYWORD -- Classify up to 1000 keywords (1 per line) elif options.type == "keywords": classify = Keywords(client, [kwd.strip() for kwd in infile.readlines()][:1000]) #HTML -- Classify a single HTML document elif options.type == "html": classify = Html(client, infile.read()) #SOCIAL -- Classify up to 1000 social posts (1 per line) elif options.type == "social": classify = Social(client, [social.strip() for social in infile.readlines()][:1000]) else: raise NotImplementedError( "{} classification not yet implemented".format(options.type)) response = classify.classify() classify.print_summary() return True
def main(): parser = argparse.ArgumentParser(description=usage) parser.add_argument("-i", "--in", dest="infile", default="stdin", help="Input file", metavar="PATH") parser.add_argument("-o", "--out", dest="outfile", default="stdout", help="Output file", metavar="PATH") parser.add_argument("-u", dest="username", required=True, action="store", default=None, help="API username") parser.add_argument("-p", dest="password", required=True, action="store", default=None, help="API password") parser.add_argument("-w", dest="workers", action="store", default=1, help="How many worker processes to use") parser.add_argument("-c", dest="nocache", default=False, action="store_true", help="Ignore cached URLs") parser.add_argument("-m", "--meta", dest="meta", default=None, help="Meta data to be included with each call", metavar="JSON") parser.add_argument("-v", dest="config_verbose", action="count", default=0, help="Be more or less verbose") options = parser.parse_args() get_log(options.config_verbose) start = time.time() log.info("Running classification using {} worker processes".format( options.workers)) if options.infile == 'stdin': infile = sys.stdin else: infile = ropen(options.infile) if options.outfile == 'stdout': outfile = sys.stdout else: outfile = open(options.outfile, 'w') stream_meta = None if options.meta: meta_file = open(options.meta) stream_meta = json.load(meta_file) log.debug("stream_meta: %s", json.dumps(stream_meta)) q = queue.Queue(int(options.workers)) r = queue.Queue() # result queue client = Client(options.username, options.password) def worker(): while True: item = q.get() if item is None: break response = f(item) r.put(response) q.task_done() def output_worker(): while True: item = r.get() if item is None: break outfile.write(item) outfile.write("\n") r.task_done() threads = [] for i in range(int(options.workers)): t = threading.Thread(target=worker) t.start() threads.append(t) printer = threading.Thread(target=output_worker) printer.start() while True: try: x = next(infile).rstrip() except: break u = Url(client, x) u.data['stream_meta'] = stream_meta u.data['source_language'] = 'auto' if options.nocache: u.data['cache_skip'] = True q.put(u) q.join() for i in range(int(options.workers)): q.put(None) r.join() r.put(None) for t in threads: t.join() printer.join() elapsed = time.time() - start log.info("Total time: {}".format(elapsed)) return True
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("-u", dest="username", required=True, action="store", default=None, help="API username") parser.add_argument("-p", dest="password", required=True, action="store", default=None, help="API password") parser.add_argument("-s", dest="start", required=False, action="store", default=None, help="Start Date (yyyy-mm-dd)") parser.add_argument("-e", dest="end", required=False, action="store", default=None, help="End Date (yyyy-mm-dd)") parser.add_argument("-v", dest="verbose", action="store_true", default=False, help="Be verbose") options = parser.parse_args() def get_log(log_level=logging.DEBUG): h = logging.StreamHandler() h.setFormatter( logging.Formatter( "%(process)s - %(asctime)s - %(levelname)s :: %(message)s", "%Y-%m-%d %H:%M:%S")) log.addHandler(h) h.setLevel(log_level) log.setLevel(log_level) if options.verbose: get_log(logging.DEBUG) start_date = None if options.start is not None: start_date = datetime.strptime(options.start, '%Y-%m-%d') log.debug("Using start_date of {}".format(start_date.date())) end_date = None if options.end is not None: end_date = datetime.strptime(options.end, '%Y-%m-%d') log.debug("Using end_date of {}".format(end_date.date())) client = Client(options.username, options.password) usage = Usage(client).set_start_date(start_date).set_end_date(end_date) usage.get_usage() usage.print_summary() return True