def main(): orgs_data = {} projects_data = {} for year in range(2005, 2009): url = developer + '/open-source/gsoc/{yr}/'.format(yr=year) soup = getPage(url) orgs, projects = get_info(soup) orgs_data[year] = orgs projects_data[year] = projects dumper(orgs_data, "orgs_2005-2008.json") dumper(projects_data, "projects_2005-2008.json")
def main(): projects = [] for year in range(2016, 2018): for page in range(1, 12): url = URL + '/archive/{yr}/projects/?page={page}'.format(yr=year, page=page) soup = getPage(url) projectLinks = getList(soup)[1:-1] pDetails = getDetails(projectLinks) projects.extend(pDetails) dumper(projects, 'projects_2016-2017') dumper(orgs_info(), 'organizations_2016-2017.json')
def main(): orgs_13, orgs_14 = All_orgs() org13, project13 = org_info_below_13(orgs_13) org14, projects14 = org_info_above_14(orgs_14) dumper(org13, "Organization_2009-2013.json") dumper(project13, "projects_2009-2013.json") dumper(org14, "Organization_2014-2015.json") dumper(projects14, "projects_2014-2015.json")
def main(): orgs_data = {} projects_data = {} for year in range(2005, 2009): url = developer + '/open-source/gsoc/{yr}/'.format(yr=year) loop = asyncio.get_event_loop() soup = loop.run_until_complete(get_page(url)) orgs, projects = get_info(soup) orgs_data[year] = orgs projects_data[year] = projects dumper(orgs_data, "2005-2008.json") dumper(projects_data, "2005-2008.json")
def main(): loop = asyncio.get_event_loop() orgs_13, orgs_14 = loop.run_until_complete(All_orgs()) org13, project13 = loop.run_until_complete(org_info_below_13(orgs_13)) org14, projects14 = loop.run_until_complete(org_info_above_14(orgs_14)) dumper(org13, "2009-2013.json") dumper(project13, "2009-2013.json") dumper(org14, "2014-2015.json") dumper(projects14, "2014-2015.json")
def main(): """Maintains all the other functions and generates JSON file""" if len(sys.argv) > 1: year = sys.argv[1] print("Scraping data for:", year) else: print("\nUSAGE: python summerofcode-scraper.py <year>") exit(1) loop = asyncio.get_event_loop() all_orgs = loop.run_until_complete(orgs_links(year)) organizations, project_links = loop.run_until_complete( orgs_information(all_orgs)) projects = loop.run_until_complete(project_details(project_links)) dumper(organizations, 'orgs_' + year + '.json') dumper(projects, '2018.json')