def main(): path = sys.argv[1] with open(path, 'r') as f: for line in f.readlines(): for graf_text in json.loads(line): for sent in exsto.parse_graf(graf_text): print exsto.pretty_print(sent)
def main (): path = sys.argv[1] if os.path.isdir(path): exsto.test_filter(path) else: with open(path, 'r') as f: for line in f.readlines(): meta = json.loads(line) print exsto.pretty_print(exsto.filter_quotes(meta["text"]))
def main(): path = sys.argv[1] if os.path.isdir(path): exsto.test_filter(path) else: with open(path, 'r') as f: for line in f.readlines(): meta = json.loads(line) print exsto.pretty_print(exsto.filter_quotes(meta["text"]))
def main(): global DEBUG path = sys.argv[1] with open(path, 'r') as f: for line in f.readlines(): meta = json.loads(line) for graf_text in exsto.filter_quotes(meta["text"]): try: for sent in exsto.parse_graf(meta["id"], graf_text): print exsto.pretty_print(sent) except (IndexError) as e: if DEBUG: print "IndexError: " + str(e) print graf_text
def main (): global DEBUG path = sys.argv[1] with open(path, 'r') as f: for line in f.readlines(): meta = json.loads(line) for graf_text in exsto.filter_quotes(meta["text"]): try: for sent in exsto.parse_graf(meta["id"], graf_text): print exsto.pretty_print(sent) except (IndexError) as e: if DEBUG: print "IndexError: " + str(e) print graf_text
def main(): global DEBUG path = sys.argv[1] with open(path, 'r') as f: for line in f.readlines(): meta = json.loads(line) base = 0 for graf_text in exsto.filter_quotes(meta["text"]): if DEBUG: print graf_text grafs, new_base = exsto.parse_graf(meta["id"], graf_text, base) base = new_base for graf in grafs: print exsto.pretty_print(graf)
def main (): config = ConfigParser.ConfigParser() config.read("defaults.cfg") iterations = config.getint("scraper", "iterations") nap_time = config.getint("scraper", "nap_time") base_url = config.get("scraper", "base_url") url = base_url + config.get("scraper", "start_url") with open(sys.argv[1], 'w') as f: for i in xrange(0, iterations): if len(url) < 1: break else: root = exsto.scrape_url(url) meta = exsto.parse_email(root, base_url) f.write(exsto.pretty_print(meta)) f.write('\n') url = meta["next_url"] time.sleep(nap_time)