progress = Progress(id="progress", value=0) with open('government.github.com/_data/governments.yml') as infile: _data = yaml.load(infile) data = reshape_data(_data) organizations_government = set( [organization['entity'].lower() for organization in data]) with open('government.github.com/_data/civic_hackers.yml') as infile: _data_civic = yaml.load(infile) data_civic = reshape_data(_data_civic) organizations_civic = set( [organization['entity'].lower() for organization in data_civic]) for i in xrange(progress.value, len(data)): logging.info("{} {} {}".format(i, data[i]['entity'], data[i]['grouping'])) try: o_data = upsert_organization(data[i]['entity'], data[i]['grouping'], "government") r_data = upsert_repositories(o_data) upsert_contributors(o_data, r_data) upsert_members(o_data) progress.value = i + 1 session.add(progress) session.commit() except UnknownObjectException: pass progress.value = 0
logging.basicConfig(filename="scraper.log", level=logging.INFO) G = Github(ACCESS_TOKEN) progress = session.query(Progress).first() if not progress: progress = Progress(id="progress", value=0) with open('government.github.com/_data/governments.yml') as infile: _data = yaml.load(infile) data = reshape_data(_data) organizations_government = set([organization['entity'].lower() for organization in data]) with open('government.github.com/_data/civic_hackers.yml') as infile: _data_civic = yaml.load(infile) data_civic = reshape_data(_data_civic) organizations_civic = set([organization['entity'].lower() for organization in data_civic]) for i in xrange(progress.value, len(data)): logging.info("{} {} {}".format(i, data[i]['entity'], data[i]['grouping'])) try: o_data = upsert_organization(data[i]['entity'], data[i]['grouping'], "government") r_data = upsert_repositories(o_data) upsert_contributors(o_data, r_data) upsert_members(o_data) progress.value = i+1 session.add(progress) session.commit() except UnknownObjectException: pass progress.value = 0