def scan_source_data(source): """This function scan the the nonprofit data source and store interested data into the database. """ dbclient = DBClient() # This dict stores the credit score of each non profit organization, grouped by organization type. org_grouped_by_type = {} for url, prefix in source.items(): for org in jsonparser.parse_json_index(url, prefix, LIMIT): dbclient.upsert(org) # We only care about organization with valid score here. if org['cy_credit_score'] is not None: # If an organization type is encountered for the first time, create a list for it. if org['organization_type'] not in org_grouped_by_type: org_grouped_by_type[org['organization_type']] = [] # Use priority queue to store the score, id tuple so that we can always keep it in order. priority_queue = org_grouped_by_type[org['organization_type']] heappush(priority_queue, (org['cy_credit_score'], org['electronic_id'])) set_score_percentile(org_grouped_by_type)