Esempio n. 1
0
    def parse_day(self, response):
        date = parse_article_date(response)
        filename = "result/ex1_result/" + date_formatter(date, '%Y/%m/%d') + ".csv"
        date = date_formatter(date, '%d/%m/%Y')

        articles_info = []
        for article in response.xpath('//*[@id="archivedArticles"]/ul/li'):
            article_tuple = self.build_article_tuple(article, date)
            articles_info.append(article_tuple)

        csv_title = ('date', 'Headline', 'url')
        csv_writer(csv_title, articles_info, filename)

        return None
def dedupe_licenses(licenses_by_country):
    all_licenses = reduce(set.union,
                          ((n for n, c in v if c > 0)
                           for *_, v in licenses_by_country),
                          set())
    all_licenses = ((i, i.lower()) for i in sorted(all_licenses, key=str.lower))
    csv_path = str(Path(__file__).parent/'data'/'license_mappings.csv')
    existing_keys = {k for k, _ in read_csv(csv_path)}
    with open(csv_path, 'a') as file:
        csv_writer(file).writerows((a, b) for a, b in all_licenses
                                   if a not in existing_keys)
    with open(str(Path(__file__).parent/'data'/'license_details.yaml'), 'w') \
            as file:
        yaml.safe_dump({c: l for c, l, _ in licenses_by_country}, file,
                       allow_unicode=True, default_flow_style=False)
    input('Press any key to continue')  # Pause before reloading the CSV
    return dict(read_csv(csv_path))
Esempio n. 3
0
def dedupe_licenses(licenses_by_country):
    all_licenses = reduce(set.union, ((n for n, c in v if c > 0)
                                      for *_, v in licenses_by_country), set())
    all_licenses = ((i, i.lower())
                    for i in sorted(all_licenses, key=str.lower))
    csv_path = str(Path(__file__).parent / 'data' / 'license_mappings.csv')
    existing_keys = {k for k, _ in read_csv(csv_path)}
    with open(csv_path, 'a') as file:
        csv_writer(file).writerows(
            (a, b) for a, b in all_licenses if a not in existing_keys)
    with open(str(Path(__file__).parent/'data'/'license_details.yaml'), 'w') \
            as file:
        yaml.safe_dump({c: l
                        for c, l, _ in licenses_by_country},
                       file,
                       allow_unicode=True,
                       default_flow_style=False)
    input('Press any key to continue')  # Pause before reloading the CSV
    return dict(read_csv(csv_path))
Esempio n. 4
0
    def search(self, **kwargs):
        year = kwargs.setdefault('year', str(time.localtime().tm_year))
        if isinstance(year, str):
            year = parse_year(year)

        grant_code = kwargs.setdefault('grantCode', 218)  # 面上项目

        data = []
        for y in year:
            print('请求页面信息 grantCode: %s , year: %s' % (grant_code, y))
            checkcode = self.get_validate_code()
            kwargs['year'] = y
            main_search_key = self.__get_search_key(**kwargs)
            self.session.post(
                'https://isisn.nsfc.gov.cn/egrantindex/funcindex/prjsearch-list',
                data={
                    "resultDate": main_search_key,
                    "checkcode": checkcode
                })

            year_data = self.__search_loop(main_search_key)

            def data_fix(d):
                d['year'] = y
                d['grantCode'] = all_grant_code[grant_code]
                return d

            data.extend(list(map(data_fix, year_data)))

        out_file = os.path.join('.', 'output',
                                'out_%s.csv' % (int(time.time())))
        header = [
            'prjNo', 'subjectCode', 'ctitle', 'psnName', 'orgName', 'totalAmt',
            'startEndDate', 'year', 'grantCode'
        ]
        utils.csv_writer(out_file, header, data)
        print('搜索完成, 请检查 %s' % (out_file, ))
Esempio n. 5
0
            continue

        try:
            zccd.append({
                'zip': z,
                'state_fips': st,
                'state_abbr': FIPS_TO_STATE[st],
                'cd': str(int(cd))  # string conversion to drop leading zero
            })
        except Exception, e:
            msg = 'unable to convert CD for %s: %s' % (z, stcd)
            log.error(msg)
            continue
    return zccd


if __name__ == "__main__":
    # load state FIPS codes
    FIPS_TO_STATE = load_fips('raw/state_fips.txt')
    STATE_TO_FIPS = {v: k for k, v in FIPS_TO_STATE.iteritems()}

    # load HUD crosswalk file
    zccd_hud = load_hud_crosswalk('raw/hud_crosswalk.xlsx')

    # sort by fips
    zccd_sorted = sorted(zccd_hud,
                         key=lambda k: (k['state_fips'], k['zip'], k['cd']))

    # write output
    utils.csv_writer('zccd_hud.csv', zccd_sorted,
                     ['state_fips', 'state_abbr', 'zip', 'cd'])