def run(options): title_number = options.get('title', None) # section = options.get('section', None) year = options.get('year', 2011) # default to 2011 for now if not title_number: utils.log("Supply a 'title' argument to parse a title.") return filename = utils.title_filename(title_number, year) if not os.path.exists(filename): utils.log("This title has not been downloaded.") title = uscode.title_for(filename) sections = title.sections() count = 0 for section in sections: section_number = section.enum() print("[%s USC %s] Parsing..." % (title_number, section_number)) # this could probably be black boxed a little further? bb = section.body_lines() xx = uscode.GPOLocatorParser(bb) qq = xx.parse() output = qq.json() utils.write(json.dumps(output, sort_keys=True, indent=2), uscode_output(year, title_number, section_number)) count += 1 print "\nParsed %s sections of title %s." % (count, title_number)
def run(options): filename = title_filename(int(options["title"]), '2011') fp = open(filename) gpo_file = File(fp) succeeded = 0 failed = 0 for section in gpo_file.sections(): logger.info('Trying to parse %r' % section) try: tree = section.as_tree() except Exception as e: logger.warning(' .. parse failed: %r' % e) failed += 1 else: succeeded += 1 logger.info('Number that succeeded: %d' % succeeded) logger.info('Number that failed: %d' % failed) logger.info('Percent success: %f' % (1 - (1.0 * failed / succeeded))) # ss = ff[int(options["offset"])].instance # bb = ss.body_lines() # xx = GPOLocatorParser(bb) # qq = xx.parse() # qq.tree() # js = qq.json() import pdb;pdb.set_trace()
def run(options): filename = title_filename(int(options["title"]), '2011') fp = open(filename) gpo_file = File(fp) succeeded = 0 failed = 0 for section in gpo_file.sections(): logger.info('Trying to parse %r' % section) try: tree = section.as_tree() except Exception as e: logger.warning(' .. parse failed: %r' % e) failed += 1 else: succeeded += 1 logger.info('Number that succeeded: %d' % succeeded) logger.info('Number that failed: %d' % failed) logger.info('Percent success: %f' % (1 - (1.0 * failed / succeeded))) # ss = ff[int(options["offset"])].instance # bb = ss.body_lines() # xx = GPOLocatorParser(bb) # qq = xx.parse() # qq.tree() # js = qq.json() import pdb pdb.set_trace()
def run(options): title_number = options.get('title', None) # section = options.get('section', None) year = options.get('year', 2011) # default to 2011 for now if not title_number: utils.log("Supply a 'title' argument to parse a title.") return filename = utils.title_filename(title_number, year) if not os.path.exists(filename): utils.log("This title has not been downloaded.") title = uscode.title_for(filename) sections = title.sections() count = 0 for section in sections: section_number = section.enum() print "[%s USC %s] Parsing..." % (title_number, section_number) # this could probably be black boxed a little further? bb = section.body_lines() xx = uscode.GPOLocatorParser(bb) qq = xx.parse() output = qq.json() utils.write( json.dumps(output, sort_keys=True, indent=2), uscode_output(year, title_number, section_number) ) count += 1 print "\nParsed %s sections of title %s." % (count, title_number)
def run(options): argv = options["argv"] args = [ ('2011', '2006 Edition and Supplement V (2011)'), ('2010', '2006 Edition and Supplement IV (2010)'), ('2009', '2006 Edition and Supplement III (2009)'), ('2008', '2006 Edition and Supplement II (2008)'), ('2007', '2006 Edition and Supplement I (2007)'), ('2006', '2006 Edition (2006)'), ('2005', '2000 Edition and Supplement V (2005)'), ('2004', '2000 Edition and Supplement IV (2004)'), ('2003', '2000 Edition and Supplement III (2003)'), ('2002', '2000 Edition and Supplement II (2002)'), ('2001', '2000 Edition and Supplement I (2001)'), ('2000', '2000 Edition (2000)'), ('1999', '1994 Edition and Supplement V (1999)'), ('1998', '1994 Edition and Supplement IV (1998)'), ('1997', '1994 Edition and Supplement III (1997)'), ('1996', '1994 Edition and Supplement II (1996)'), ('1995', '1994 Edition and Supplement I (1995)'), ('1994', '1994 Edition (1994)'), ] path = '/home/thom/code/uscode-git' try: logger.info('Removing tree: %r' % path) shutil.rmtree(path) except OSError: pass # Create the repo again. logger.info('Creating the git repo.') subprocess.check_call('cd /home/thom/code && mkdir -p ' + path, shell=True) subprocess.check_call('touch %s/README' % path, shell=True) subprocess.check_call('cd %s && git init && git add .' % path, shell=True) subprocess.check_call('cd %s && git remote add origin [email protected]:unitedstates/uscode-git.git' % path, shell=True) succeeded = 0 failed = 0 failed_objs = [] for arg in reversed(args): for title in range(1, 51): year, commit_msg = arg logger.info('Writing files for %s ...' % year) filename = utils.title_filename(title, year) title_path = join(path, str(title)) try: os.mkdir(title_path) except OSError: pass else: logger.info('created dir %s' % title_path) try: fp = open(filename) except IOError as e: logger.warning('No such file: %r: %r' % (filename, e)) try: gpo_file = File(fp) except Exception as e: logger.critical('The parser failed on %r' % filename) for section in gpo_file.sections(): try: msg = 'Trying to parse %r' % section # logger.info(msg) except Exception as e: logger.warning('Something terrible happend.') failed_objs.append((section, e)) continue try: tree = section.as_tree() except Exception as e: logger.critical('Parse failed! %r' % e) failed += 1 failed_objs.append((section, e)) else: succeeded += 1 section_path = join(title_path, '%s.txt' % str(section.enum())) with open(section_path, 'w') as f: tree.filedump(f) # import pdb;pdb.set_trace() # try: # shutil.rmtree(path) # except OSError: # pass # os.mkdir(join(path, 'title%d' % title)) #js = qq.json() #fson.dump(js, join(path, 'title%d' % title)) commit_msg = 'Title %d: %s' % (title, commit_msg) cmd = 'cd %r && git add . && git commit -am"%s"' % (path, commit_msg) logger.info('Running %r' % repr(cmd)) try: out = subprocess.check_output(cmd, shell=True) except subprocess.CalledProcessError as e: print(e)
def run(options): argv = options["argv"] args = [ ('2011', '2006 Edition and Supplement V (2011)'), ('2010', '2006 Edition and Supplement IV (2010)'), ('2009', '2006 Edition and Supplement III (2009)'), ('2008', '2006 Edition and Supplement II (2008)'), ('2007', '2006 Edition and Supplement I (2007)'), ('2006', '2006 Edition (2006)'), ('2005', '2000 Edition and Supplement V (2005)'), ('2004', '2000 Edition and Supplement IV (2004)'), ('2003', '2000 Edition and Supplement III (2003)'), ('2002', '2000 Edition and Supplement II (2002)'), ('2001', '2000 Edition and Supplement I (2001)'), ('2000', '2000 Edition (2000)'), ('1999', '1994 Edition and Supplement V (1999)'), ('1998', '1994 Edition and Supplement IV (1998)'), ('1997', '1994 Edition and Supplement III (1997)'), ('1996', '1994 Edition and Supplement II (1996)'), ('1995', '1994 Edition and Supplement I (1995)'), ('1994', '1994 Edition (1994)'), ] path = '/home/thom/code/11USC101' try: shutil.rmtree(path) except OSError: pass subprocess.check_call('cd /home/thom/code && mkdir 11USC101', shell=True) subprocess.check_call('touch %s/README' % path, shell=True) subprocess.check_call('cd %s && git init && git add .' % path, shell=True) for arg in reversed(args): year, msg = arg print('Committing', year, '...') title = int(argv[0]) offset = int(argv[1]) filename = utils.title_filename(title, year) fp = open(filename) lines = getlines(fp) gg = group(lines) ss = gg[offset].instance bb = ss.body_lines() xx = GPOLocatorParser(bb) qq = xx.parse() # try: # shutil.rmtree(path) # except OSError: # pass # os.mkdir(join(path, 'title%d' % title)) js = qq.json() fson.dump(js, join(path, 'title%d' % title)) cmd = 'cd %r && git add . && git commit -am"%s"' % (path, msg) print('Running', repr(cmd)) try: out = subprocess.check_output(cmd, shell=True) except subprocess.CalledProcessError as e: print(e)
def run(options): argv = options["argv"] args = [ ('2011', '2006 Edition and Supplement V (2011)'), ('2010', '2006 Edition and Supplement IV (2010)'), ('2009', '2006 Edition and Supplement III (2009)'), ('2008', '2006 Edition and Supplement II (2008)'), ('2007', '2006 Edition and Supplement I (2007)'), ('2006', '2006 Edition (2006)'), ('2005', '2000 Edition and Supplement V (2005)'), ('2004', '2000 Edition and Supplement IV (2004)'), ('2003', '2000 Edition and Supplement III (2003)'), ('2002', '2000 Edition and Supplement II (2002)'), ('2001', '2000 Edition and Supplement I (2001)'), ('2000', '2000 Edition (2000)'), ('1999', '1994 Edition and Supplement V (1999)'), ('1998', '1994 Edition and Supplement IV (1998)'), ('1997', '1994 Edition and Supplement III (1997)'), ('1996', '1994 Edition and Supplement II (1996)'), ('1995', '1994 Edition and Supplement I (1995)'), ('1994', '1994 Edition (1994)'), ] path = '/home/thom/code/11USC101' try: shutil.rmtree(path) except OSError: pass subprocess.check_call('cd /home/thom/code && mkdir 11USC101', shell=True) subprocess.check_call('touch %s/README' % path, shell=True) subprocess.check_call('cd %s && git init && git add .' % path, shell=True) for arg in reversed(args): year, msg = arg print 'Committing', year, '...' title = int(argv[0]) offset = int(argv[1]) filename = utils.title_filename(title, year) fp = open(filename) lines = getlines(fp) gg = group(lines) ss = gg[offset].instance bb = ss.body_lines() xx = GPOLocatorParser(bb) qq = xx.parse() # try: # shutil.rmtree(path) # except OSError: # pass # os.mkdir(join(path, 'title%d' % title)) js = qq.json() fson.dump(js, join(path, 'title%d' % title)) cmd = 'cd %r && git add . && git commit -am"%s"' % (path, msg) print 'Running', repr(cmd) try: out = subprocess.check_output(cmd, shell=True) except subprocess.CalledProcessError as e: print e
def run(options): argv = options["argv"] args = [ ('2011', '2006 Edition and Supplement V (2011)'), ('2010', '2006 Edition and Supplement IV (2010)'), ('2009', '2006 Edition and Supplement III (2009)'), ('2008', '2006 Edition and Supplement II (2008)'), ('2007', '2006 Edition and Supplement I (2007)'), ('2006', '2006 Edition (2006)'), ('2005', '2000 Edition and Supplement V (2005)'), ('2004', '2000 Edition and Supplement IV (2004)'), ('2003', '2000 Edition and Supplement III (2003)'), ('2002', '2000 Edition and Supplement II (2002)'), ('2001', '2000 Edition and Supplement I (2001)'), ('2000', '2000 Edition (2000)'), ('1999', '1994 Edition and Supplement V (1999)'), ('1998', '1994 Edition and Supplement IV (1998)'), ('1997', '1994 Edition and Supplement III (1997)'), ('1996', '1994 Edition and Supplement II (1996)'), ('1995', '1994 Edition and Supplement I (1995)'), ('1994', '1994 Edition (1994)'), ] path = '/home/thom/code/uscode-git' try: logger.info('Removing tree: %r' % path) shutil.rmtree(path) except OSError: pass # Create the repo again. logger.info('Creating the git repo.') subprocess.check_call('cd /home/thom/code && mkdir -p ' + path, shell=True) subprocess.check_call('touch %s/README' % path, shell=True) subprocess.check_call('cd %s && git init && git add .' % path, shell=True) subprocess.check_call('cd %s && git remote add origin [email protected]:unitedstates/uscode-git.git' % path, shell=True) succeeded = 0 failed = 0 failed_objs = [] for arg in reversed(args): for title in range(1, 51): year, commit_msg = arg logger.info('Writing files for %s ...' % year) filename = utils.title_filename(title, year) title_path = join(path, str(title)) try: os.mkdir(title_path) except OSError: pass else: logger.info('created dir %s' % title_path) try: fp = open(filename) except IOError as e: logger.warning('No such file: %r: %r' % (filename, e)) try: gpo_file = File(fp) except Exception as e: logger.critical('The parser failed on %r' % filename) for section in gpo_file.sections(): try: msg = 'Trying to parse %r' % section # logger.info(msg) except Exception as e: logger.warning('Something terrible happend.') failed_objs.append((section, e)) continue try: tree = section.as_tree() except Exception as e: logger.critical('Parse failed! %r' % e) failed += 1 failed_objs.append((section, e)) else: succeeded += 1 section_path = join(title_path, '%s.txt' % str(section.enum())) with open(section_path, 'w') as f: tree.filedump(f) # import pdb;pdb.set_trace() # try: # shutil.rmtree(path) # except OSError: # pass # os.mkdir(join(path, 'title%d' % title)) #js = qq.json() #fson.dump(js, join(path, 'title%d' % title)) commit_msg = 'Title %d: %s' % (title, commit_msg) cmd = 'cd %r && git add . && git commit -am"%s"' % (path, commit_msg) logger.info('Running %r' % repr(cmd)) try: out = subprocess.check_output(cmd, shell=True) except subprocess.CalledProcessError as e: print e