Ejemplo n.º 1
0
def run(options):
    title_number = options.get('title', None)
    # section = options.get('section', None)
    year = options.get('year', 2011)  # default to 2011 for now

    if not title_number:
        utils.log("Supply a 'title' argument to parse a title.")
        return

    filename = utils.title_filename(title_number, year)
    if not os.path.exists(filename):
        utils.log("This title has not been downloaded.")

    title = uscode.title_for(filename)

    sections = title.sections()

    count = 0
    for section in sections:
        section_number = section.enum()
        print("[%s USC %s] Parsing..." % (title_number, section_number))

        # this could probably be black boxed a little further?
        bb = section.body_lines()
        xx = uscode.GPOLocatorParser(bb)
        qq = xx.parse()
        output = qq.json()

        utils.write(json.dumps(output, sort_keys=True, indent=2),
                    uscode_output(year, title_number, section_number))

        count += 1

    print "\nParsed %s sections of title %s." % (count, title_number)
Ejemplo n.º 2
0
def run(options):
    filename = title_filename(int(options["title"]), '2011')
    fp = open(filename)

    gpo_file = File(fp)
    succeeded = 0
    failed = 0
    for section in gpo_file.sections():
        logger.info('Trying to parse %r' % section)
        try:
            tree = section.as_tree()
        except Exception as e:
            logger.warning('  .. parse failed: %r' % e)
            failed += 1
        else:
            succeeded += 1

    logger.info('Number that succeeded: %d' % succeeded)
    logger.info('Number that failed: %d' % failed)
    logger.info('Percent success: %f' % (1 - (1.0 * failed / succeeded)))





    # ss = ff[int(options["offset"])].instance
    # bb = ss.body_lines()
    # xx = GPOLocatorParser(bb)
    # qq = xx.parse()
    # qq.tree()
    # js = qq.json()

    import pdb;pdb.set_trace()
Ejemplo n.º 3
0
def run(options):
    filename = title_filename(int(options["title"]), '2011')
    fp = open(filename)

    gpo_file = File(fp)
    succeeded = 0
    failed = 0
    for section in gpo_file.sections():
        logger.info('Trying to parse %r' % section)
        try:
            tree = section.as_tree()
        except Exception as e:
            logger.warning('  .. parse failed: %r' % e)
            failed += 1
        else:
            succeeded += 1

    logger.info('Number that succeeded: %d' % succeeded)
    logger.info('Number that failed: %d' % failed)
    logger.info('Percent success: %f' % (1 - (1.0 * failed / succeeded)))

    # ss = ff[int(options["offset"])].instance
    # bb = ss.body_lines()
    # xx = GPOLocatorParser(bb)
    # qq = xx.parse()
    # qq.tree()
    # js = qq.json()

    import pdb
    pdb.set_trace()
Ejemplo n.º 4
0
def run(options):
  title_number = options.get('title', None)
  # section = options.get('section', None)
  year = options.get('year', 2011) # default to 2011 for now

  if not title_number:
    utils.log("Supply a 'title' argument to parse a title.")
    return

  filename = utils.title_filename(title_number, year)
  if not os.path.exists(filename):
    utils.log("This title has not been downloaded.")

  title = uscode.title_for(filename)

  sections = title.sections()

  count = 0
  for section in sections:
    section_number = section.enum()
    print "[%s USC %s] Parsing..." % (title_number, section_number)

    # this could probably be black boxed a little further?
    bb = section.body_lines()
    xx = uscode.GPOLocatorParser(bb)
    qq = xx.parse()
    output = qq.json()

    utils.write(
      json.dumps(output, sort_keys=True, indent=2), 
      uscode_output(year, title_number, section_number)
    )

    count += 1

  print "\nParsed %s sections of title %s." % (count, title_number)
Ejemplo n.º 5
0
def run(options):
    argv = options["argv"]

    args = [
        ('2011', '2006 Edition and Supplement V (2011)'),
        ('2010', '2006 Edition and Supplement IV (2010)'),
        ('2009', '2006 Edition and Supplement III (2009)'),
        ('2008', '2006 Edition and Supplement II (2008)'),
        ('2007', '2006 Edition and Supplement I (2007)'),
        ('2006', '2006 Edition (2006)'),
        ('2005', '2000 Edition and Supplement V (2005)'),
        ('2004', '2000 Edition and Supplement IV (2004)'),
        ('2003', '2000 Edition and Supplement III (2003)'),
        ('2002', '2000 Edition and Supplement II (2002)'),
        ('2001', '2000 Edition and Supplement I (2001)'),
        ('2000', '2000 Edition (2000)'),
        ('1999', '1994 Edition and Supplement V (1999)'),
        ('1998', '1994 Edition and Supplement IV (1998)'),
        ('1997', '1994 Edition and Supplement III (1997)'),
        ('1996', '1994 Edition and Supplement II (1996)'),
        ('1995', '1994 Edition and Supplement I (1995)'),
        ('1994', '1994 Edition (1994)'),
        ]

    path = '/home/thom/code/uscode-git'
    try:
        logger.info('Removing tree: %r' % path)
        shutil.rmtree(path)
    except OSError:
        pass

    # Create the repo again.
    logger.info('Creating the git repo.')
    subprocess.check_call('cd /home/thom/code && mkdir -p ' + path, shell=True)
    subprocess.check_call('touch %s/README' % path, shell=True)
    subprocess.check_call('cd %s && git init && git add .' % path, shell=True)
    subprocess.check_call('cd %s && git remote add origin [email protected]:unitedstates/uscode-git.git' % path, shell=True)

    succeeded = 0
    failed = 0
    failed_objs = []
    for arg in reversed(args):

        for title in range(1, 51):

            year, commit_msg = arg
            logger.info('Writing files for %s ...' % year)
            filename = utils.title_filename(title, year)

            title_path = join(path, str(title))

            try:
                os.mkdir(title_path)
            except OSError:
                pass
            else:
                logger.info('created dir %s' % title_path)

            try:
                fp = open(filename)
            except IOError as e:
                logger.warning('No such file: %r: %r' % (filename, e))

            try:
                gpo_file = File(fp)
            except Exception as e:
                logger.critical('The parser failed on %r' % filename)

            for section in gpo_file.sections():
                try:
                    msg = 'Trying to parse %r' % section
                    # logger.info(msg)
                except Exception as e:
                    logger.warning('Something terrible happend.')
                    failed_objs.append((section, e))
                    continue
                try:
                    tree = section.as_tree()
                except Exception as e:
                    logger.critical('Parse failed! %r' % e)
                    failed += 1
                    failed_objs.append((section, e))
                else:
                    succeeded += 1

                section_path = join(title_path, '%s.txt' % str(section.enum()))
                with open(section_path, 'w') as f:
                    tree.filedump(f)

        # import pdb;pdb.set_trace()

        # try:
        #     shutil.rmtree(path)
        # except OSError:
        #     pass
        # os.mkdir(join(path, 'title%d' % title))
        #js = qq.json()
        #fson.dump(js, join(path, 'title%d' % title))
            commit_msg = 'Title %d: %s' % (title, commit_msg)
            cmd = 'cd %r && git add . && git commit -am"%s"' % (path, commit_msg)
            logger.info('Running %r' % repr(cmd))
            try:
                out = subprocess.check_output(cmd, shell=True)
            except subprocess.CalledProcessError as e:
                print(e)
Ejemplo n.º 6
0
def run(options):
    argv = options["argv"]

    args = [
        ('2011', '2006 Edition and Supplement V (2011)'),
        ('2010', '2006 Edition and Supplement IV (2010)'),
        ('2009', '2006 Edition and Supplement III (2009)'),
        ('2008', '2006 Edition and Supplement II (2008)'),
        ('2007', '2006 Edition and Supplement I (2007)'),
        ('2006', '2006 Edition (2006)'),
        ('2005', '2000 Edition and Supplement V (2005)'),
        ('2004', '2000 Edition and Supplement IV (2004)'),
        ('2003', '2000 Edition and Supplement III (2003)'),
        ('2002', '2000 Edition and Supplement II (2002)'),
        ('2001', '2000 Edition and Supplement I (2001)'),
        ('2000', '2000 Edition (2000)'),
        ('1999', '1994 Edition and Supplement V (1999)'),
        ('1998', '1994 Edition and Supplement IV (1998)'),
        ('1997', '1994 Edition and Supplement III (1997)'),
        ('1996', '1994 Edition and Supplement II (1996)'),
        ('1995', '1994 Edition and Supplement I (1995)'),
        ('1994', '1994 Edition (1994)'),
    ]

    path = '/home/thom/code/11USC101'
    try:
        shutil.rmtree(path)
    except OSError:
        pass
    subprocess.check_call('cd /home/thom/code && mkdir 11USC101', shell=True)
    subprocess.check_call('touch %s/README' % path, shell=True)
    subprocess.check_call('cd %s && git init && git add .' % path, shell=True)

    for arg in reversed(args):
        year, msg = arg
        print('Committing', year, '...')
        title = int(argv[0])
        offset = int(argv[1])
        filename = utils.title_filename(title, year)
        fp = open(filename)
        lines = getlines(fp)
        gg = group(lines)

        ss = gg[offset].instance
        bb = ss.body_lines()
        xx = GPOLocatorParser(bb)
        qq = xx.parse()

        # try:
        #     shutil.rmtree(path)
        # except OSError:
        #     pass
        # os.mkdir(join(path, 'title%d' % title))
        js = qq.json()
        fson.dump(js, join(path, 'title%d' % title))
        cmd = 'cd %r && git add . && git commit -am"%s"' % (path, msg)
        print('Running', repr(cmd))
        try:
            out = subprocess.check_output(cmd, shell=True)
        except subprocess.CalledProcessError as e:
            print(e)
Ejemplo n.º 7
0
def run(options):
    argv = options["argv"]

    args = [
        ('2011', '2006 Edition and Supplement V (2011)'),
        ('2010', '2006 Edition and Supplement IV (2010)'),
        ('2009', '2006 Edition and Supplement III (2009)'),
        ('2008', '2006 Edition and Supplement II (2008)'),
        ('2007', '2006 Edition and Supplement I (2007)'),
        ('2006', '2006 Edition (2006)'),
        ('2005', '2000 Edition and Supplement V (2005)'),
        ('2004', '2000 Edition and Supplement IV (2004)'),
        ('2003', '2000 Edition and Supplement III (2003)'),
        ('2002', '2000 Edition and Supplement II (2002)'),
        ('2001', '2000 Edition and Supplement I (2001)'),
        ('2000', '2000 Edition (2000)'),
        ('1999', '1994 Edition and Supplement V (1999)'),
        ('1998', '1994 Edition and Supplement IV (1998)'),
        ('1997', '1994 Edition and Supplement III (1997)'),
        ('1996', '1994 Edition and Supplement II (1996)'),
        ('1995', '1994 Edition and Supplement I (1995)'),
        ('1994', '1994 Edition (1994)'),
        ]

    path = '/home/thom/code/11USC101'
    try:
        shutil.rmtree(path)
    except OSError:
        pass
    subprocess.check_call('cd /home/thom/code && mkdir 11USC101', shell=True)
    subprocess.check_call('touch %s/README' % path, shell=True)
    subprocess.check_call('cd %s && git init && git add .' % path, shell=True)

    for arg in reversed(args):
        year, msg = arg
        print 'Committing', year, '...'
        title = int(argv[0])
        offset = int(argv[1])
        filename = utils.title_filename(title, year)
        fp = open(filename)
        lines = getlines(fp)
        gg = group(lines)

        ss = gg[offset].instance
        bb = ss.body_lines()
        xx = GPOLocatorParser(bb)
        qq = xx.parse()

        # try:
        #     shutil.rmtree(path)
        # except OSError:
        #     pass
        # os.mkdir(join(path, 'title%d' % title))
        js = qq.json()
        fson.dump(js, join(path, 'title%d' % title))
        cmd = 'cd %r && git add . && git commit -am"%s"' % (path, msg)
        print 'Running', repr(cmd)
        try:
            out = subprocess.check_output(cmd, shell=True)
        except subprocess.CalledProcessError as e:
            print e
Ejemplo n.º 8
0
def run(options):
    argv = options["argv"]

    args = [
        ('2011', '2006 Edition and Supplement V (2011)'),
        ('2010', '2006 Edition and Supplement IV (2010)'),
        ('2009', '2006 Edition and Supplement III (2009)'),
        ('2008', '2006 Edition and Supplement II (2008)'),
        ('2007', '2006 Edition and Supplement I (2007)'),
        ('2006', '2006 Edition (2006)'),
        ('2005', '2000 Edition and Supplement V (2005)'),
        ('2004', '2000 Edition and Supplement IV (2004)'),
        ('2003', '2000 Edition and Supplement III (2003)'),
        ('2002', '2000 Edition and Supplement II (2002)'),
        ('2001', '2000 Edition and Supplement I (2001)'),
        ('2000', '2000 Edition (2000)'),
        ('1999', '1994 Edition and Supplement V (1999)'),
        ('1998', '1994 Edition and Supplement IV (1998)'),
        ('1997', '1994 Edition and Supplement III (1997)'),
        ('1996', '1994 Edition and Supplement II (1996)'),
        ('1995', '1994 Edition and Supplement I (1995)'),
        ('1994', '1994 Edition (1994)'),
        ]

    path = '/home/thom/code/uscode-git'
    try:
        logger.info('Removing tree: %r' % path)
        shutil.rmtree(path)
    except OSError:
        pass

    # Create the repo again.
    logger.info('Creating the git repo.')
    subprocess.check_call('cd /home/thom/code && mkdir -p ' + path, shell=True)
    subprocess.check_call('touch %s/README' % path, shell=True)
    subprocess.check_call('cd %s && git init && git add .' % path, shell=True)
    subprocess.check_call('cd %s && git remote add origin [email protected]:unitedstates/uscode-git.git' % path, shell=True)

    succeeded = 0
    failed = 0
    failed_objs = []
    for arg in reversed(args):

        for title in range(1, 51):

            year, commit_msg = arg
            logger.info('Writing files for %s ...' % year)
            filename = utils.title_filename(title, year)

            title_path = join(path, str(title))

            try:
                os.mkdir(title_path)
            except OSError:
                pass
            else:
                logger.info('created dir %s' % title_path)

            try:
                fp = open(filename)
            except IOError as e:
                logger.warning('No such file: %r: %r' % (filename, e))

            try:
                gpo_file = File(fp)
            except Exception as e:
                logger.critical('The parser failed on %r' % filename)

            for section in gpo_file.sections():
                try:
                    msg = 'Trying to parse %r' % section
                    # logger.info(msg)
                except Exception as e:
                    logger.warning('Something terrible happend.')
                    failed_objs.append((section, e))
                    continue
                try:
                    tree = section.as_tree()
                except Exception as e:
                    logger.critical('Parse failed! %r' % e)
                    failed += 1
                    failed_objs.append((section, e))
                else:
                    succeeded += 1

                section_path = join(title_path, '%s.txt' % str(section.enum()))
                with open(section_path, 'w') as f:
                    tree.filedump(f)

        # import pdb;pdb.set_trace()

        # try:
        #     shutil.rmtree(path)
        # except OSError:
        #     pass
        # os.mkdir(join(path, 'title%d' % title))
        #js = qq.json()
        #fson.dump(js, join(path, 'title%d' % title))
            commit_msg = 'Title %d: %s' % (title, commit_msg)
            cmd = 'cd %r && git add . && git commit -am"%s"' % (path, commit_msg)
            logger.info('Running %r' % repr(cmd))
            try:
                out = subprocess.check_output(cmd, shell=True)
            except subprocess.CalledProcessError as e:
                print e