コード例 #1
0
def scan_source_data(source):
    """This function scan the the nonprofit data source and store interested data
       into the database.
    """
    dbclient = DBClient()
    # This dict stores the credit score of each non profit organization, grouped by organization type.
    org_grouped_by_type = {}

    for url, prefix in source.items():
        for org in jsonparser.parse_json_index(url, prefix, LIMIT):
            dbclient.upsert(org)

            # We only care about organization with valid score here.
            if org['cy_credit_score'] is not None:
                # If an organization type is encountered for the first time, create a list for it.
                if org['organization_type'] not in org_grouped_by_type:
                    org_grouped_by_type[org['organization_type']] = []
                # Use priority queue to store the score, id tuple so that we can always keep it in order.
                priority_queue = org_grouped_by_type[org['organization_type']]
                heappush(priority_queue, (org['cy_credit_score'], org['electronic_id']))

    set_score_percentile(org_grouped_by_type)