def extract_year_month(input):
    """
    Turn something like ("January", 2011) into the tuple (2011, 1)
    """
    month = month2num(input[0].lower())
    year = int(input[1])

    if month:
        return year, month
    else:
        return None
Exemple #2
0
def extract_year_month(input):
    """
    Turn something like ("January", 2011) into the tuple (2011, 1)
    """
    month = month2num(input[0].lower())
    year = int(input[1])

    if month:
        return year, month
    else:
        return None
def process_summons(filename):
    """
    Convert a single Excel file to two rows of a CSV.
    """
    sh = xlrd.open_workbook(filename).sheet_by_index(0)
    geog = sh.row(1)[0].value
    month = month2num(sh.row(2)[0].value)
    year = int(sh.row(3)[2].value[-4:])

    data = {}
    for rnum in xrange(4, 39):
        prev_row = sh.row(rnum - 1)
        row = sh.row(rnum)

        if row[0].ctype == 0:
            col_val = prev_row[0].value.split(u'\n')[-1]
        else:
            col_val = row[0].value.split(u'\n')[0]

        if row[1].ctype == 0:
            mtd_val = int(prev_row[1].value.split(u'\n')[-1])
        else:
            mtd_val = int(row[1].value) if row[1].ctype == 2 else int(
                row[1].value.split(u'\n')[0])

        if row[2].ctype == 0:
            ytd_val = int(prev_row[2].value.split(u'\n')[-1])
        else:
            ytd_val = int(row[2].value) if row[2].ctype == 2 else int(
                row[2].value.split(u'\n')[0])

        mtd_col = columnize(col_val + u' mtd')
        ytd_col = columnize(col_val + u' ytd')

        data[mtd_col] = mtd_val
        data[ytd_col] = ytd_val

    sys.stdout.write(u'\t'.join([unicode(geog),
                                 unicode(year),
                                 unicode(month)]))
    for c in DATA_COLUMNS:
        sys.stdout.write(u'\t' + unicode(data[c]))
    sys.stdout.write(u'\n')
def process_summons(filename):
    """
    Convert a single Excel file to two rows of a CSV.
    """
    sh = xlrd.open_workbook(filename).sheet_by_index(0)
    geog = sh.row(1)[0].value
    month = month2num(sh.row(2)[0].value)
    year = int(sh.row(3)[2].value[-4:])

    data = {}
    for rnum in xrange(4, 39):
        prev_row = sh.row(rnum - 1)
        row = sh.row(rnum)

        if row[0].ctype == 0:
            col_val = prev_row[0].value.split(u'\n')[-1]
        else:
            col_val = row[0].value.split(u'\n')[0]

        if row[1].ctype == 0:
            mtd_val = int(prev_row[1].value.split(u'\n')[-1])
        else:
            mtd_val = int(row[1].value) if row[1].ctype == 2 else int(row[1].value.split(u'\n')[0])

        if row[2].ctype == 0:
            ytd_val = int(prev_row[2].value.split(u'\n')[-1])
        else:
            ytd_val = int(row[2].value) if row[2].ctype == 2 else int(row[2].value.split(u'\n')[0])

        mtd_col = columnize(col_val + u' mtd')
        ytd_col = columnize(col_val + u' ytd')

        data[mtd_col] = mtd_val
        data[ytd_col] = ytd_val

    sys.stdout.write(u'\t'.join([unicode(geog), unicode(year), unicode(month)]))
    for c in DATA_COLUMNS:
        sys.stdout.write(u'\t' + unicode(data[c]))
    sys.stdout.write(u'\n')
        if all([c != 200 for c in codes.values()]):
            sys.stderr.write(u"No more archives! {codes} at {year}/{month} "
                             u"\n".format(year=year, month=month, codes=codes))
            break

    # Also download current acc reports in case NYPD forgot to add them to the
    # zip archive
    sys.stderr.write(u"Downloading current acc reports...\n")

    resp = requests.get(CURRENT_EXCEL_ROOT + u'cityacc.xlsx')

    sh = xlrd.open_workbook(file_contents=resp.content).sheet_by_index(0)
    _, cur_month_name, cur_year = sh.row(1)[0].value.split()

    month = month2num(cur_month_name)
    year = int(cur_year)

    path = os.path.join(archive_path, str(year), u'{:0>2}'.format(month))
    try:
        os.makedirs(path)
    except OSError:
        pass

    for boro in ('city', 'bk', 'bx', 'mn', 'qn', 'si'):
        pdf_resp = requests.get(u'{0}{1}{2}'.format(CURRENT_PDF_ROOT, boro, 'acc.pdf'))
        filename = pdf_resp.url.split('/')[-1]
        open(os.path.join(path, filename), 'w').write(pdf_resp.content)
        sys.stderr.write(u"Wrote {0} to {1}\n".format(filename, path))

        excel_resp = requests.get(u'{0}{1}{2}'.format(CURRENT_EXCEL_ROOT, boro, 'acc.xlsx'))
Exemple #6
0
        if all([c != 200 for c in codes.values()]):
            sys.stderr.write(u"No more archives! {codes} at {year}/{month} "
                             u"\n".format(year=year, month=month, codes=codes))
            break

    # Also download current acc reports in case NYPD forgot to add them to the
    # zip archive
    sys.stderr.write(u"Downloading current acc reports...\n")

    resp = requests.get(CURRENT_EXCEL_ROOT + u'cityacc.xlsx')

    sh = xlrd.open_workbook(file_contents=resp.content).sheet_by_index(0)
    _, cur_month_name, cur_year = sh.row(1)[0].value.split()

    month = month2num(cur_month_name)
    year = int(cur_year)

    path = os.path.join(archive_path, str(year), u'{:0>2}'.format(month))
    try:
        os.makedirs(path)
    except OSError:
        pass

    for boro in ('city', 'bk', 'bx', 'mn', 'qn', 'si'):
        pdf_resp = requests.get(u'{0}{1}{2}'.format(CURRENT_PDF_ROOT, boro,
                                                    'acc.pdf'))
        filename = pdf_resp.url.split('/')[-1]
        open(os.path.join(path, filename), 'w').write(pdf_resp.content)
        sys.stderr.write(u"Wrote {0} to {1}\n".format(filename, path))