def publisher_dicts():
    for publisher, activities in data.current_stats['inverted_publisher'][
        publisher_stats = data.get_publisher_stats(publisher)
        yield {
            'Publisher Name':
            'Publisher Registry Id':
            publisher_stats['activity_files'] +
            'Activity Files':
            'Organisation Files':
            'Total File Size':
            'Reporting Org on Registry':
            'Reporting Orgs in Data (count)':
            'Reporting Orgs in Data':
            'Hierarchies (count)':
def has_future_transactions(publisher):
        returns 0, 1 or 2
        Returns 2 if the most recent data for a publisher has future transactions.
        Returns 1 if the publisher has ever had future transactions.
        Returns -1 if the publisher has not been checked for some reason.
        Returns 0 otherwise.
    publisher_stats = get_publisher_stats(publisher)
    if 'transaction_dates' in publisher_stats:
        for transaction_type, transaction_counts in publisher_stats[
            for transaction_date_string, count in transaction_counts.items():
                transaction_date = parse_iso_date(transaction_date_string)
                if transaction_date and transaction_date >
                    return 2
    if publisher not in blacklist_publisher:
        return -1
    today =
    mindate = - 1, today.month, 1)
    for date, activity_blacklist in blacklist_publisher[publisher][
        if parse_iso_date(date) >= mindate and activity_blacklist:
            return 1
    return 0
def table():
    """Generate data for the publisher coverage table

    # Loop over each publisher
    for publisher_title, publisher in publishers_ordered_by_title:

        # Store the data for this publisher as a new variable
        publisher_stats = get_publisher_stats(publisher)
        # Create a list for publisher data, and populate it with basic data
        row = {}
        row['publisher'] = publisher
        row['publisher_title'] = publisher_title

        # Compute IATI spend
        iati_spend_total = 0
        transactions_usd = publisher_stats['sum_transactions_by_type_by_year_usd']

        if '2014' in transactions_usd.get('3', {}).get('USD', {}):
            iati_spend_total += transactions_usd['3']['USD']['2014']

        if '2014' in transactions_usd.get('D', {}).get('USD', {}):
            iati_spend_total += transactions_usd['D']['USD']['2014']

        if '2014' in transactions_usd.get('4', {}).get('USD', {}):
            iati_spend_total += transactions_usd['4']['USD']['2014']

        if '2014' in transactions_usd.get('E', {}).get('USD', {}):
            iati_spend_total += transactions_usd['E']['USD']['2014']

        # Convert to millions USD 
        row['iati_spend'] = float( iati_spend_total / 1000000)

        # Set spend ratio score
        # This is manually set at 100% for now. The IATI technical team is still working on compiling reference spend data from disparate sources, in order to assess the spend ratio.
        row['spend_ratio'] = 100

        # Compute coverage score
        if row['spend_ratio'] >= 80:
            row['coverage_adjustment'] = 100

        elif row['spend_ratio'] >= 60:
            row['coverage_adjustment'] = 80

        elif row['spend_ratio'] >= 40:
            row['coverage_adjustment'] = 60

            row['coverage_adjustment'] = 40

        # Return a generator object
        yield row
def generate_row(publisher):
    """Generate comprehensiveness table data for a given publisher 

    publisher_stats = get_publisher_stats(publisher)
    # Set an inital dictionary, which will later be populated further
    row = {}
    row['publisher'] = publisher
    row['publisher_title'] = publisher_name[publisher]
    # Calculate percentages for publisher data populated with any data
    for slug in column_slugs['core'] + column_slugs['financials'] + column_slugs['valueadded']:

        # Set the stats base for calculating the numerator. This is based on the hierarchy set in the lookup
        if column_base_lookup[slug] == 'bottom_hierarchy':
            publisher_base = publisher_stats.get('bottom_hierarchy', {})

        elif column_base_lookup[slug] == 'hierarchy_with_most_budgets':
            publisher_base = publisher_stats['by_hierarchy'].get(get_hierarchy_with_most_budgets(publisher_stats), {})

        elif column_base_lookup[slug] == 'first_hierarchy_with_commitments':
            if get_first_hierarchy_with_commitments(publisher_stats):
                publisher_base = publisher_stats['by_hierarchy'].get(get_first_hierarchy_with_commitments(publisher_stats), {})
                publisher_base = publisher_stats.get('bottom_hierarchy', {})

            # Most common case will be column_base_lookup[slug] == 'all':
            publisher_base = publisher_stats

        numerator_all = publisher_base.get('comprehensiveness', {}).get(slug, 0)
        numerator_valid = publisher_base.get('comprehensiveness_with_validation', {}).get(slug, 0)

        if denominator(slug, publisher_base) != 0:
            # Populate the row with the %age
            row[slug] = int(round(
                float(numerator_all)/denominator(slug, publisher_base)*100
            row[slug+'_valid'] = int(round(
                float(numerator_valid)/denominator(slug, publisher_base)*100

    # Loop for averages
    # Calculate the average for each grouping, and the overall 'summary' average
    for page in ['core', 'financials', 'valueadded', 'summary']: 
        # Note that the summary must be last, so that it can use the average calculations from the other groupings
        row[page+'_average'] = int(round(
            sum((row.get(x[0]) or 0)*x[2] for x in columns[page]) / float(sum(x[2] for x in columns[page]))
        row[page+'_average_valid'] = int(round(
            sum((row.get(x[0]+'_valid') or 0)*x[2] for x in columns[page]) / float(sum(x[2] for x in columns[page]))

    return row
def table():
    """Generate the comprehensiveness table

    # Loop over the data for each publisher
    for publisher_title, publisher in publishers_ordered_by_title:
        publisher_stats = get_publisher_stats(publisher)

        # Set an inital dictionary, which will later be populated further
        row = {}
        row["publisher"] = publisher
        row["publisher_title"] = publisher_title

        # This for loop is for non-financial data
        for k, v in publisher_stats["comprehensiveness"].items():
            if k not in column_slugs["financials"]:
                if denominator(k, publisher_stats) != 0:
                    # Populate the row with the %age
                    row[k] = int(float(v) / denominator(k, publisher_stats) * 100)

        # Ensure that only lowest hierarchy is used for financial calculations
        # Arises from
        if "comprehensiveness" in publisher_stats["bottom_hierarchy"]:
            # This loop covers the financials: everything that is low in the hierarchy-attribute of an activity element
            for k, v in publisher_stats["bottom_hierarchy"]["comprehensiveness"].items():
                if k in column_slugs["financials"]:
                    if denominator(k, publisher_stats["bottom_hierarchy"]) != 0:
                        row[k] = int(float(v) / denominator(k, publisher_stats["bottom_hierarchy"]) * 100)

        # Calculate percentages for publisher data which is considered valid
        for k, v in publisher_stats["comprehensiveness_with_validation"].items():
            if k not in column_slugs["financials"]:
                if denominator(k, publisher_stats) != 0:
                    row[k + "_valid"] = int(float(v) / denominator(k, publisher_stats) * 100)

        # Ensure that only lowest hierarchy is used for financial calculations
        # Arises from
        if "comprehensiveness_with_validation" in publisher_stats["bottom_hierarchy"]:
            for k, v in publisher_stats["bottom_hierarchy"]["comprehensiveness_with_validation"].items():
                if k in column_slugs["financials"]:
                    if denominator(k, publisher_stats["bottom_hierarchy"]) != 0:
                        row[k + "_valid"] = int(float(v) / denominator(k, publisher_stats["bottom_hierarchy"]) * 100)

        # Calculate the average for each grouping, and the overall 'summary' average
        for page in ["core", "financials", "valueadded", "summary"]:
            # Note that the summary must be last, so that it can use the average calculations from the other groupings
            row[page + "_average"] = sum((row.get(x[0]) or 0) * x[2] for x in columns[page]) / sum(
                x[2] for x in columns[page]
            row[page + "_average_valid"] = sum((row.get(x[0] + "_valid") or 0) * x[2] for x in columns[page]) / sum(
                x[2] for x in columns[page]

        # Generate a row object
        yield row
def publisher_dicts():
    for publisher, activities in data.current_stats['inverted_publisher'][
        if publisher in publisher_name:
            publisher_stats = data.get_publisher_stats(publisher)
            yield {
                'Publisher Name': publisher_name[publisher],
                'Publisher Registry Id': publisher,
                'Activities': activities,
                'Hierarchies (count)': len(publisher_stats['hierarchies']),
                'Hierarchies': ';'.join(publisher_stats['hierarchies']),
def table():
    """Generate data for the publisher forward-looking table

    # Loop over each publisher
    for publisher_title, publisher in publishers_ordered_by_title:

        # Store the data for this publisher as a new variable
        publisher_stats = get_publisher_stats(publisher)
        # Create a list for publisher data, and populate it with basic data
        row = {}
        row['publisher'] = publisher
        row['publisher_title'] = publisher_title
        row['year_columns'] = [{},{},{}]

        # Work with hierarchies
        by_hierarchy = publisher_stats['by_hierarchy']
        hierarchies_with_nonzero_budgets = [
            h for h, stats in by_hierarchy.items()
            if not all(x==0 for x in stats['forwardlooking_activities_with_budgets'].values())
        # Flag if budgets on current activities are reported at more than one hierarchy
        row['flag'] = len(hierarchies_with_nonzero_budgets) > 1

        # Loop over each of the three years (i.e. this year and the following two years) to generate the statistics for the table
        for year in years:

            # If 'forwardlooking_activities_current' and 'forwardlooking_activities_with_budgets' are both in the bottom hierarchy
            if 'forwardlooking_activities_current' in publisher_stats['bottom_hierarchy'] and 'forwardlooking_activities_with_budgets' in publisher_stats['bottom_hierarchy'] :
                if len(hierarchies_with_nonzero_budgets) != 1:
                    # If budgets are at more than one hierarchy (or no hierarchies), just use activities at all hierarchies
                    row['year_columns'][0][year] = publisher_stats['forwardlooking_activities_current'].get(year) or 0
                    row['year_columns'][1][year] = publisher_stats['forwardlooking_activities_with_budgets'].get(year) or 0
                    # Else, use the hierarchy which they are reported at
                    row['year_columns'][0][year] = by_hierarchy[hierarchies_with_nonzero_budgets[0]]['forwardlooking_activities_current'].get(year) or 0
                    row['year_columns'][1][year] = by_hierarchy[hierarchies_with_nonzero_budgets[0]]['forwardlooking_activities_with_budgets'].get(year) or 0
                if not int(row['year_columns'][0][year]):
                    row['year_columns'][2][year] = '-'
                    row['year_columns'][2][year] = int(round(float(row['year_columns'][1][year])/float(row['year_columns'][0][year])*100))
                # Else if either 'forwardlooking_activities_current' or 'forwardlooking_activities_with_budgets' are not in the bottom hierarchy, set data zero
                # This should only occur if a publisher has 0 activities
                row['year_columns'][0][year] = '0'
                row['year_columns'][1][year] = '0'
                row['year_columns'][2][year] = '-'

        # Return a generator object
        yield row
def table():
    """Generate coverage table data for every publisher and return as a generator object

    # Loop over each publisher
    for publisher_title, publisher in publishers_ordered_by_title:

        # Store the data for this publisher as new variables
        publisher_stats = get_publisher_stats(publisher)

        # Skip if all activities from this publisher are secondary reported
        if publisher in secondary_publishers:

        # Return a generator object
        yield generate_row(publisher)
def table():
    """Generate coverage table data for every publisher and return as a generator object

    # Loop over each publisher
    for publisher_title, publisher in publishers_ordered_by_title:

        # Store the data for this publisher as new variables
        publisher_stats = get_publisher_stats(publisher)

        # Skip if all activities from this publisher are secondary reported
        if publisher in secondary_publishers:

        # Return a generator object
        yield generate_row(publisher)
def table():
    """Generate data for the humanitarian table

    # Loop over each publisher
    for publisher_title, publisher in publishers_ordered_by_title:
        # Store the data for this publisher as a new variable
        publisher_stats = get_publisher_stats(publisher)

        # Create a list for publisher data, and populate it with basic data
        row = {}
        row['publisher'] = publisher
        row['publisher_title'] = publisher_title
        row['publisher_type'] = common.get_publisher_type(publisher)['name']

        # Get data from IATI-Stats output
        row['num_activities'] = publisher_stats.get('humanitarian', {}).get(
            'is_humanitarian', '0')
        row['publishing_humanitarian'] = 100 if int(
            row['num_activities']) > 0 else 0

        # Calculate percentage of all humanitarian activities that are defined using the @humanitarian attribute
        row['humanitarian_attrib'] = (publisher_stats.get(
            'humanitarian', {}).get('is_humanitarian_by_attrib', '0') / float(
                row['num_activities']) if int(row['num_activities']) > 0 else
                                      0) * 100

        # Calculate percentage of all humanitarian activities that use the <humanitarian-scope> element to define an appeal or emergency
        row['appeal_emergency'] = (publisher_stats.get('humanitarian', {}).get(
            'contains_humanitarian_scope', '0') /
                                   float(row['num_activities']) if
                                   int(row['num_activities']) > 0 else 0) * 100

        # Calculate percentage of all humanitarian activities that use clusters
        row['clusters'] = (publisher_stats.get('humanitarian', {}).get(
            'uses_humanitarian_clusters_vocab', '0') /
                           if int(row['num_activities']) > 0 else 0) * 100

        # Calculate the mean average
        row['average'] = (row['publishing_humanitarian'] +
                          row['humanitarian_attrib'] +
                          row['appeal_emergency'] + row['clusters']) / float(4)

        # Return a generator object
        yield row
def publisher_dicts():
    for publisher, activities in data.current_stats['inverted_publisher']['activities'].items():
        publisher_stats = data.get_publisher_stats(publisher)
        yield {
            'Publisher Name': publisher_name[publisher],
            'Publisher Registry Id': publisher,
            'Activities': activities,
            'Organisations': publisher_stats['organisations'],
            'Files': publisher_stats['activity_files']+publisher_stats['organisation_files'],
            'Activity Files': publisher_stats['activity_files'],
            'Organisation Files': publisher_stats['organisation_files'],
            'Total File Size': publisher_stats['file_size'],
            'Reporting Org on Registry': data.ckan_publishers[publisher]['result']['publisher_iati_id'],
            'Reporting Orgs in Data (count)': len(publisher_stats['reporting_orgs']),
            'Reporting Orgs in Data': ';'.join(publisher_stats['reporting_orgs']),
            'Hierarchies (count)': len(publisher_stats['hierarchies']),
            'Hierarchies': ';'.join(publisher_stats['hierarchies']),
def publisher_dicts():
    for publisher, activities in data.current_stats["inverted_publisher"]["activities"].items():
        publisher_stats = data.get_publisher_stats(publisher)
        yield {
            "Publisher Name": publisher_name[publisher],
            "Publisher Registry Id": publisher,
            "Activities": activities,
            "Organisations": publisher_stats["organisations"],
            "Files": publisher_stats["activity_files"] + publisher_stats["organisation_files"],
            "Activity Files": publisher_stats["activity_files"],
            "Organisation Files": publisher_stats["organisation_files"],
            "Total File Size": publisher_stats["file_size"],
            "Reporting Org on Registry": data.ckan_publishers[publisher]["result"]["publisher_iati_id"],
            "Reporting Orgs in Data (count)": len(publisher_stats["reporting_orgs"]),
            "Reporting Orgs in Data": ";".join(publisher_stats["reporting_orgs"]),
            "Data Tickets": len(data.data_tickets[publisher]),
            "Hierarchies (count)": len(publisher_stats["hierarchies"]),
            "Hierarchies": ";".join(publisher_stats["hierarchies"]),
def table():
    """Generate data for the humanitarian table

    # Loop over each publisher
    for publisher_title, publisher in publishers_ordered_by_title:
        # Store the data for this publisher as a new variable
        publisher_stats = get_publisher_stats(publisher)

        # Create a list for publisher data, and populate it with basic data
        row = {}
        row['publisher'] = publisher
        row['publisher_title'] = publisher_title
        row['publisher_type'] = common.get_publisher_type(publisher)['name']

        # Get data from IATI-Stats output
        row['num_activities'] = publisher_stats.get('humanitarian', {}).get('is_humanitarian', '0')
        row['publishing_humanitarian'] = 100 if int(row['num_activities']) > 0 else 0

        # Calculate percentage of all humanitarian activities that are defined using the @humanitarian attribute
        row['humanitarian_attrib'] = (
            publisher_stats.get('humanitarian', {}).get('is_humanitarian_by_attrib', '0') / float(row['num_activities'])
              if int(row['num_activities']) > 0 else 0
            ) * 100

        # Calculate percentage of all humanitarian activities that use the <humanitarian-scope> element to define an appeal or emergency
        row['appeal_emergency'] = (
            publisher_stats.get('humanitarian', {}).get('contains_humanitarian_scope', '0') / float(row['num_activities'])
              if int(row['num_activities']) > 0 else 0
            ) * 100

        # Calculate percentage of all humanitarian activities that use clusters
        row['clusters'] = (
            publisher_stats.get('humanitarian', {}).get('uses_humanitarian_clusters_vocab', '0') / float(row['num_activities'])
              if int(row['num_activities']) > 0 else 0
            ) * 100

        # Calculate the mean average
        row['average'] = (row['publishing_humanitarian'] + row['humanitarian_attrib'] + row['appeal_emergency'] + row['clusters']) / float(4)

        # Return a generator object
        yield row
def table():
    for publisher_title, publisher in publishers_ordered_by_title:
        publisher_stats = get_publisher_stats(publisher)
        # Data structure that gets passed to the table
        row = {}
        row['publisher'] = publisher
        row['publisher_title'] = publisher_title
        # This for loop is for non-financials
        for k,v in publisher_stats['comprehensiveness'].items():
            if k not in column_slugs['financials']:
                if denominator(k, publisher_stats) != 0:
                    row[k] = int(float(v)/denominator(k, publisher_stats)*100)
        if 'comprehensiveness' in publisher_stats['bottom_hierarchy']:
            # This loop covers the financials: everything that is low in the hierarchy-attribute of an activity element
            for k,v in publisher_stats['bottom_hierarchy']['comprehensiveness'].items():
                if k in column_slugs['financials']:
                    if denominator(k, publisher_stats['bottom_hierarchy']) != 0:
                        row[k] = int(float(v)/denominator(k, publisher_stats['bottom_hierarchy'])*100)

        for k,v in publisher_stats['comprehensiveness_with_validation'].items():
            if k not in column_slugs['financials']:
                if denominator(k, publisher_stats) != 0:
                    row[k+'_valid'] = int(float(v)/denominator(k, publisher_stats)*100)
        if 'comprehensiveness_with_validation' in publisher_stats['bottom_hierarchy']:
            for k,v in publisher_stats['bottom_hierarchy']['comprehensiveness_with_validation'].items():
                if k in column_slugs['financials']:
                    if denominator(k, publisher_stats['bottom_hierarchy']) != 0:
                        row[k+'_valid'] = int(float(v)/denominator(k, publisher_stats['bottom_hierarchy'])*100)

        for page in ['core', 'financials', 'valueadded', 'summary']: 
            # summary must be last to use calculations from others
            row[page+'_average'] = sum((row.get(x[0]) or 0)*x[2] for x in columns[page]) / sum(x[2] for x in columns[page])
            row[page+'_average_valid'] = sum((row.get(x[0]+'_valid') or 0)*x[2] for x in columns[page]) / sum(x[2] for x in columns[page])

        yield row
def has_future_transactions(publisher):
        returns 0, 1 or 2
        Returns 2 if the most recent data for a publisher has future transactions.
        Returns 1 if the publisher has ever had future transactions.
        Returns -1 if the publisher has not been checked for some reason.
        Returns 0 otherwise.
    publisher_stats = get_publisher_stats(publisher)
    if 'transaction_dates' in publisher_stats:
        for transaction_type, transaction_counts in publisher_stats['transaction_dates'].items():
            for transaction_date_string, count in transaction_counts.items():
                transaction_date = parse_iso_date(transaction_date_string)
                if transaction_date and transaction_date >
                    return 2
    if publisher not in blacklist_publisher:
        return -1
    today =
    mindate =, today.month, 1)
    for date, activity_blacklist in blacklist_publisher[publisher]['activities_with_future_transactions'].items():
        if parse_iso_date(date) >= mindate and activity_blacklist:
            return 1
    return 0
def table():
    for publisher_title, publisher in publishers_ordered_by_title:
        publisher_stats = get_publisher_stats(publisher)
        row = {}
        row['publisher'] = publisher
        row['publisher_title'] = publisher_title
        row['year_columns'] = [{},{},{}]

        by_hierarchy = publisher_stats['by_hierarchy']
        hierarchies_with_nonzero_budgets = [
            h for h, stats in by_hierarchy.items()
            if not all(x==0 for x in stats['forwardlooking_activities_with_budgets'].values())
        # Flag if budgets on current activities are reported at more than one hierarchy
        row['flag'] = len(hierarchies_with_nonzero_budgets) > 1

        for year in years:
            if 'forwardlooking_activities_current' in publisher_stats['bottom_hierarchy'] and 'forwardlooking_activities_with_budgets' in publisher_stats['bottom_hierarchy'] :
                if len(hierarchies_with_nonzero_budgets) != 1:
                    # If budgets are at more than one hierarchy (or no hierarchies), just use activities at all hierarchies
                    row['year_columns'][0][year] = publisher_stats['forwardlooking_activities_current'].get(year) or 0
                    row['year_columns'][1][year] = publisher_stats['forwardlooking_activities_with_budgets'].get(year) or 0
                    # Else, use the hierarchy they are reported at
                    row['year_columns'][0][year] = by_hierarchy[hierarchies_with_nonzero_budgets[0]]['forwardlooking_activities_current'].get(year) or 0
                    row['year_columns'][1][year] = by_hierarchy[hierarchies_with_nonzero_budgets[0]]['forwardlooking_activities_with_budgets'].get(year) or 0
                if not int(row['year_columns'][0][year]):
                    row['year_columns'][2][year] = '-'
                    row['year_columns'][2][year] = int(round(float(row['year_columns'][1][year])/float(row['year_columns'][0][year])*100))
                # Should only occur if a publisher has 0 activities
                row['year_columns'][0][year] = '0'
                row['year_columns'][1][year] = '0'
                row['year_columns'][2][year] = '-'
        yield row
def generate_row(publisher):
    """Generate coverage table data for a given publisher

    # Store the data for this publisher as new variables
    publisher_stats = get_publisher_stats(publisher)
    transactions_usd = publisher_stats['sum_transactions_by_type_by_year_usd']

    # Create a list for publisher data, and populate it with basic data
    row = {}
    row['publisher'] = publisher
    row['publisher_title'] = publisher_name[publisher]
    row['no_data_flag_red'] = 0
    row['no_data_flag_amber'] = 0
    row['spend_data_error_reported_flag'] = 0
    row['sort_order'] = 0

    # Compute 2014 IATI spend
    iati_2014_spend_total = 0

    if publisher in dfi_publishers:
        # If this publisher is a DFI, then their 2014 spend total should be based on their
        # commitment transactions only. See
        if '2014' in transactions_usd.get('2', {}).get('USD', {}):
            iati_2014_spend_total += transactions_usd['2']['USD']['2014']

        if '2014' in transactions_usd.get('C', {}).get('USD', {}):
            iati_2014_spend_total += transactions_usd['C']['USD']['2014']

        # This is a non-DFI publisher
        if '2014' in transactions_usd.get('3', {}).get('USD', {}):
            iati_2014_spend_total += transactions_usd['3']['USD']['2014']

        if '2014' in transactions_usd.get('D', {}).get('USD', {}):
            iati_2014_spend_total += transactions_usd['D']['USD']['2014']

        if '2014' in transactions_usd.get('4', {}).get('USD', {}):
            iati_2014_spend_total += transactions_usd['4']['USD']['2014']

        if '2014' in transactions_usd.get('E', {}).get('USD', {}):
            iati_2014_spend_total += transactions_usd['E']['USD']['2014']

    # Convert to millions USD
    row['iati_spend_2014'] = round(float(iati_2014_spend_total / 1000000), 2)

    # Compute 2015 IATI spend
    iati_2015_spend_total = 0

    if publisher in dfi_publishers:
        # If this publisher is a DFI, then their 2015 spend total should be based on their
        # commitment transactions only. See
        if '2015' in transactions_usd.get('2', {}).get('USD', {}):
            iati_2015_spend_total += transactions_usd['2']['USD']['2015']

        if '2015' in transactions_usd.get('C', {}).get('USD', {}):
            iati_2015_spend_total += transactions_usd['C']['USD']['2015']

        # This is a non-DFI publisher
        if '2015' in transactions_usd.get('3', {}).get('USD', {}):
            iati_2015_spend_total += transactions_usd['3']['USD']['2015']

        if '2015' in transactions_usd.get('D', {}).get('USD', {}):
            iati_2015_spend_total += transactions_usd['D']['USD']['2015']

        if '2015' in transactions_usd.get('4', {}).get('USD', {}):
            iati_2015_spend_total += transactions_usd['4']['USD']['2015']

        if '2015' in transactions_usd.get('E', {}).get('USD', {}):
            iati_2015_spend_total += transactions_usd['E']['USD']['2015']

    # Convert to millions USD
    row['iati_spend_2015'] = round(float(iati_2015_spend_total / 1000000), 2)

    # Compute 2016 IATI spend
    iati_2016_spend_total = 0

    if publisher in dfi_publishers:
        # If this publisher is a DFI, then their 2016 spend total should be based on their
        # commitment transactions only. See
        if '2016' in transactions_usd.get('2', {}).get('USD', {}):
            iati_2016_spend_total += transactions_usd['2']['USD']['2016']

        if '2016' in transactions_usd.get('C', {}).get('USD', {}):
            iati_2016_spend_total += transactions_usd['C']['USD']['2016']

        # This is a non-DFI publisher
        if '2016' in transactions_usd.get('3', {}).get('USD', {}):
            iati_2016_spend_total += transactions_usd['3']['USD']['2016']

        if '2016' in transactions_usd.get('D', {}).get('USD', {}):
            iati_2016_spend_total += transactions_usd['D']['USD']['2016']

        if '2016' in transactions_usd.get('4', {}).get('USD', {}):
            iati_2016_spend_total += transactions_usd['4']['USD']['2016']

        if '2016' in transactions_usd.get('E', {}).get('USD', {}):
            iati_2016_spend_total += transactions_usd['E']['USD']['2016']

    # Convert to millions USD
    row['iati_spend_2016'] = round(float(iati_2016_spend_total / 1000000), 2)

    # Get reference data
    # Get data from stats files. Set as empty stings if the IATI-Stats code did not find them in the reference data sheet
    data_2014 = publisher_stats['reference_spend_data_usd'].get(
        '2014', {
            'ref_spend': '',
            'not_in_sheet': True
    data_2015 = publisher_stats['reference_spend_data_usd'].get(
        '2015', {
            'ref_spend': '',
            'official_forecast': '',
            'not_in_sheet': True

    # Compute reference data as $USDm
    row['reference_spend_2014'] = round(
        (float(data_2014['ref_spend']) /
         1000000), 2) if is_number(data_2014['ref_spend']) else '-'
    row['reference_spend_2015'] = round(
        (float(data_2015['ref_spend']) /
         1000000), 2) if is_number(data_2015['ref_spend']) else '-'
    row['official_forecast_2015'] = round(
        (float(data_2015['official_forecast']) /
         1000000), 2) if is_number(data_2015['official_forecast']) else '-'

    # Compute spend ratio score
    # Compile a list of ratios for spend & reference data paired by year
    spend_ratio_candidates = [
        (row['iati_spend_2014'] / row['reference_spend_2014']) if
        (row['reference_spend_2014'] > 0)
        and is_number(row['reference_spend_2014']) else 0,
        (row['iati_spend_2015'] / row['reference_spend_2015']) if
        (row['reference_spend_2015'] > 0)
        and is_number(row['reference_spend_2015']) else 0,
        (row['iati_spend_2015'] / row['official_forecast_2015']) if
        (row['official_forecast_2015'] > 0)
        and is_number(row['official_forecast_2015']) else 0

    # If there are no annual pairs, add the value of non-matching-year spend / reference data
    if ((row['iati_spend_2014'] == 0 or row['reference_spend_2014'] == '-') and
        (row['iati_spend_2015'] == 0 or row['reference_spend_2015'] == '-') and
        (row['iati_spend_2015'] == 0 or row['official_forecast_2015'] == '-')):
            row['iati_spend_2015'] / row['reference_spend_2014']
        ) if (row['reference_spend_2014'] > 0
              ) and is_number(row['reference_spend_2014']) else 0)
            row['iati_spend_2016'] / row['reference_spend_2014']
        ) if (row['reference_spend_2014'] > 0
              ) and is_number(row['reference_spend_2014']) else 0)
            row['iati_spend_2016'] / row['reference_spend_2015']
        ) if (row['reference_spend_2015'] > 0
              ) and is_number(row['reference_spend_2015']) else 0)

    # Get the maximum value and convert to a percentage
    row['spend_ratio'] = int(round(max(spend_ratio_candidates) * 100))

    return row
def generate_row(publisher):
    """Generate comprehensiveness table data for a given publisher

    publisher_stats = get_publisher_stats(publisher)

    # Set an inital dictionary, which will later be populated further
    row = {}
    row['publisher'] = publisher
    row['publisher_title'] = publisher_name[publisher]

    # Calculate percentages for publisher data populated with any data
    for slug in column_slugs['core'] + column_slugs[
            'financials'] + column_slugs['valueadded']:

        # Set the stats base for calculating the numerator. This is based on the hierarchy set in the lookup
        if column_base_lookup[slug] == 'bottom_hierarchy':
            publisher_base = publisher_stats.get('bottom_hierarchy', {})

        elif column_base_lookup[slug] == 'hierarchy_with_most_budgets':
            publisher_base = publisher_stats['by_hierarchy'].get(
                get_hierarchy_with_most_budgets(publisher_stats), {})

        elif column_base_lookup[slug] == 'first_hierarchy_with_commitments':
            if get_first_hierarchy_with_commitments(publisher_stats):
                publisher_base = publisher_stats['by_hierarchy'].get(
                    get_first_hierarchy_with_commitments(publisher_stats), {})
                publisher_base = publisher_stats.get('bottom_hierarchy', {})

            # Most common case will be column_base_lookup[slug] == 'all':
            publisher_base = publisher_stats

        if slug == 'budget':
            budget_all = publisher_base.get('comprehensiveness',
                                            {}).get(slug, 0)
            budget_not_provided_all = publisher_base.get(
                'comprehensiveness', {}).get('budget_not_provided', 0)
            row['flag'] = budget_not_provided_all > 0
            numerator_all = budget_all + budget_not_provided_all
            budget_valid = publisher_base.get(
                'comprehensiveness_with_validation', {}).get(slug, 0)
            budget_not_provided_valid = publisher_base.get(
                {}).get('budget_not_provided', 0)
            numerator_valid = budget_valid + budget_not_provided_valid
            numerator_all = publisher_base.get('comprehensiveness',
                                               {}).get(slug, 0)
            numerator_valid = publisher_base.get(
                'comprehensiveness_with_validation', {}).get(slug, 0)

        if denominator(slug, publisher_base) != 0:
            # Populate the row with the %age
            row[slug] = int(
                    float(numerator_all) / denominator(slug, publisher_base) *
            row[slug + '_valid'] = int(
                    float(numerator_valid) /
                    denominator(slug, publisher_base) * 100))

    # Loop for averages
    # Calculate the average for each grouping, and the overall 'summary' average
    for page in ['core', 'financials', 'valueadded', 'summary']:
        # Note that the summary must be last, so that it can use the average calculations from the other groupings
        row[page + '_average'] = int(
                sum((row.get(x[0]) or 0) * x[2] for x in columns[page]) /
                float(sum(x[2] for x in columns[page]))))
        row[page + '_average_valid'] = int(
                sum((row.get(x[0] + '_valid') or 0) * x[2]
                    for x in columns[page]) /
                float(sum(x[2] for x in columns[page]))))

    return row
def table():
    """Generate data for the publisher forward-looking table

    # Store timeliness data in variable
    timeliness_frequency_data = timeliness.publisher_frequency_dict()
    timeliness_timelag_data = timeliness.publisher_timelag_dict()

    # Store generator objects for the data that we are receiving
    forwardlooking_data = forwardlooking.table()
    comprehensiveness_data = comprehensiveness.table()
    coverage_data = coverage.table()

    # Loop over each publisher
    for publisher_title, publisher in publishers_ordered_by_title:

        # Store the data for this publisher as a new variable
        publisher_stats = get_publisher_stats(publisher)
        # Create a list for publisher data, and populate it with basic data
        row = {}
        row['publisher'] = publisher
        row['publisher_title'] = publisher_title
        row['publisher_type'] = common.get_publisher_type(publisher)

        # Compute timeliness statistic
        # Assign frequency score
        if timeliness_frequency_data[publisher][3] == 'Monthly':
            frequency_score = 4
        elif timeliness_frequency_data[publisher][3] == 'Quarterly':
            frequency_score = 3
        elif timeliness_frequency_data[publisher][3] == 'Six-Monthly':
            frequency_score = 2
        elif timeliness_frequency_data[publisher][3] == 'Annual':
            frequency_score = 1
        else: # timeliness_frequency_data[publisher][3] == 'Less than Annual' or something else!
            frequency_score = 0

        # Assign timelag score
        if timeliness_timelag_data[publisher][3] == 'One month':
            timelag_score = 4
        elif timeliness_timelag_data[publisher][3] == 'A quarter':
            timelag_score = 3
        elif timeliness_timelag_data[publisher][3] == 'Six months':
            timelag_score = 2
        elif timeliness_timelag_data[publisher][3] == 'One year':
            timelag_score = 1
        else: # timeliness_timelag_data[publisher][3] == 'More than one year' or something else!
            timelag_score = 0

        # Compute the percentage
        row['timeliness'] = int( (float(frequency_score + timelag_score) / 8) * 100 )

        # Compute forward looking statistic
        # Get the forward looking data for this publisher 
        publisher_forwardlooking_data =

        # Convert the data for this publishers 'Percentage of current activities with budgets' fields into integers
        numbers = [ int(x) for x in publisher_forwardlooking_data['year_columns'][2].itervalues() if is_number(x) ]
        # Compute and store the mean average for these fields
        row['forwardlooking'] = sum(int(y) for y in numbers) / len(publisher_forwardlooking_data['year_columns'][2])

        # Compute comprehensive statistic
        # Get the comprehensiveness data for this publisher 
        publisher_comprehensiveness_data =

        # Set the comprehensive value to be the summary average for valid data
        row['comprehensive'] = convert_to_int(publisher_comprehensiveness_data['summary_average_valid'])

        # Compute score
        row['score'] = int( (row['timeliness'] + row['forwardlooking'] + row['comprehensive']) / 3 )

        # Get coverage statistic
        # Get the coverage data for this publisher 
        publisher_coverage_data =

        # Store the coverage data
        row['coverage_adjustment'] = int(publisher_coverage_data['coverage_adjustment'])

        # Compute Coverage-adjusted score
        row['score_coverage_adjusted'] = int( row['score'] * int(row['coverage_adjustment'] / 100) ) 

        # Return a generator object
        yield row
def generate_row(publisher):
    """Generate coverage table data for a given publisher

    # Store the data for this publisher as new variables
    publisher_stats = get_publisher_stats(publisher)
    transactions_usd = publisher_stats['sum_transactions_by_type_by_year_usd']

    # Create a list for publisher data, and populate it with basic data
    row = {}
    row['publisher'] = publisher
    row['publisher_title'] = publisher_name[publisher]
    row['no_data_flag_red'] = 0
    row['no_data_flag_amber'] = 0
    row['spend_data_error_reported_flag'] = 0
    row['sort_order'] = 0

    # Compute 2014 IATI spend
    iati_2014_spend_total = 0

    if publisher in dfi_publishers:
        # If this publisher is a DFI, then their 2014 spend total should be based on their
        # commitment transactions only. See
        if '2014' in transactions_usd.get('2', {}).get('USD', {}):
            iati_2014_spend_total += transactions_usd['2']['USD']['2014']

        if '2014' in transactions_usd.get('C', {}).get('USD', {}):
            iati_2014_spend_total += transactions_usd['C']['USD']['2014']

        # This is a non-DFI publisher
        if '2014' in transactions_usd.get('3', {}).get('USD', {}):
            iati_2014_spend_total += transactions_usd['3']['USD']['2014']

        if '2014' in transactions_usd.get('D', {}).get('USD', {}):
            iati_2014_spend_total += transactions_usd['D']['USD']['2014']

        if '2014' in transactions_usd.get('4', {}).get('USD', {}):
            iati_2014_spend_total += transactions_usd['4']['USD']['2014']

        if '2014' in transactions_usd.get('E', {}).get('USD', {}):
            iati_2014_spend_total += transactions_usd['E']['USD']['2014']

    # Convert to millions USD
    row['iati_spend_2014'] = round(float( iati_2014_spend_total / 1000000), 2)

    # Compute 2015 IATI spend
    iati_2015_spend_total = 0

    if publisher in dfi_publishers:
        # If this publisher is a DFI, then their 2015 spend total should be based on their
        # commitment transactions only. See
        if '2015' in transactions_usd.get('2', {}).get('USD', {}):
            iati_2015_spend_total += transactions_usd['2']['USD']['2015']

        if '2015' in transactions_usd.get('C', {}).get('USD', {}):
            iati_2015_spend_total += transactions_usd['C']['USD']['2015']

        # This is a non-DFI publisher
        if '2015' in transactions_usd.get('3', {}).get('USD', {}):
            iati_2015_spend_total += transactions_usd['3']['USD']['2015']

        if '2015' in transactions_usd.get('D', {}).get('USD', {}):
            iati_2015_spend_total += transactions_usd['D']['USD']['2015']

        if '2015' in transactions_usd.get('4', {}).get('USD', {}):
            iati_2015_spend_total += transactions_usd['4']['USD']['2015']

        if '2015' in transactions_usd.get('E', {}).get('USD', {}):
            iati_2015_spend_total += transactions_usd['E']['USD']['2015']

    # Convert to millions USD
    row['iati_spend_2015'] = round(float( iati_2015_spend_total / 1000000), 2)

    # Compute 2016 IATI spend
    iati_2016_spend_total = 0

    if publisher in dfi_publishers:
        # If this publisher is a DFI, then their 2016 spend total should be based on their
        # commitment transactions only. See
        if '2016' in transactions_usd.get('2', {}).get('USD', {}):
            iati_2016_spend_total += transactions_usd['2']['USD']['2016']

        if '2016' in transactions_usd.get('C', {}).get('USD', {}):
            iati_2016_spend_total += transactions_usd['C']['USD']['2016']

        # This is a non-DFI publisher
        if '2016' in transactions_usd.get('3', {}).get('USD', {}):
            iati_2016_spend_total += transactions_usd['3']['USD']['2016']

        if '2016' in transactions_usd.get('D', {}).get('USD', {}):
            iati_2016_spend_total += transactions_usd['D']['USD']['2016']

        if '2016' in transactions_usd.get('4', {}).get('USD', {}):
            iati_2016_spend_total += transactions_usd['4']['USD']['2016']

        if '2016' in transactions_usd.get('E', {}).get('USD', {}):
            iati_2016_spend_total += transactions_usd['E']['USD']['2016']

    # Convert to millions USD
    row['iati_spend_2016'] = round(float( iati_2016_spend_total / 1000000), 2)

    # Get reference data
    # Get data from stats files. Set as empty stings if the IATI-Stats code did not find them in the reference data sheet
    data_2014 = publisher_stats['reference_spend_data_usd'].get('2014', {'ref_spend': '', 'not_in_sheet': True})
    data_2015 = publisher_stats['reference_spend_data_usd'].get('2015', {'ref_spend': '', 'official_forecast': '', 'not_in_sheet': True})

    # Compute reference data as $USDm
    row['reference_spend_2014'] = round((float(data_2014['ref_spend']) / 1000000), 2) if is_number(data_2014['ref_spend']) else '-'
    row['reference_spend_2015'] = round((float(data_2015['ref_spend']) / 1000000), 2) if is_number(data_2015['ref_spend']) else '-'
    row['official_forecast_2015'] = round((float(data_2015['official_forecast']) / 1000000), 2) if is_number(data_2015['official_forecast']) else '-'

    # Compute spend ratio score
    # Compile a list of ratios for spend & reference data paired by year
    spend_ratio_candidates = [(row['iati_spend_2014'] / row['reference_spend_2014']) if (row['reference_spend_2014'] > 0) and is_number(row['reference_spend_2014']) else 0,
                              (row['iati_spend_2015'] / row['reference_spend_2015']) if (row['reference_spend_2015'] > 0) and is_number(row['reference_spend_2015']) else 0,
                              (row['iati_spend_2015'] / row['official_forecast_2015']) if (row['official_forecast_2015'] > 0) and is_number(row['official_forecast_2015']) else 0]

    # If there are no annual pairs, add the value of non-matching-year spend / reference data
    if ((row['iati_spend_2014'] == 0 or row['reference_spend_2014'] == '-') and
        (row['iati_spend_2015'] == 0 or row['reference_spend_2015'] == '-') and
        (row['iati_spend_2015'] == 0 or row['official_forecast_2015'] == '-')):
        spend_ratio_candidates.append((row['iati_spend_2015'] / row['reference_spend_2014']) if (row['reference_spend_2014'] > 0) and is_number(row['reference_spend_2014']) else 0)
        spend_ratio_candidates.append((row['iati_spend_2016'] / row['reference_spend_2014']) if (row['reference_spend_2014'] > 0) and is_number(row['reference_spend_2014']) else 0)
        spend_ratio_candidates.append((row['iati_spend_2016'] / row['reference_spend_2015']) if (row['reference_spend_2015'] > 0) and is_number(row['reference_spend_2015']) else 0)

    # Get the maximum value and convert to a percentage
    row['spend_ratio'] = int(round(max(spend_ratio_candidates) * 100))

    return row
def generate_row(publisher):
    """Generate coverage table data for a given publisher 

    # Store the data for this publisher as new variables
    publisher_stats = get_publisher_stats(publisher)
    transactions_usd = publisher_stats['sum_transactions_by_type_by_year_usd']

    # Create a list for publisher data, and populate it with basic data
    row = {}
    row['publisher'] = publisher
    row['publisher_title'] = publisher_name[publisher]
    row['no_data_flag_red'] = 0
    row['no_data_flag_amber'] = 0
    row['spend_data_error_reported_flag'] = 0
    row['sort_order'] = 0

    # Compute 2014 IATI spend
    iati_2014_spend_total = 0

    if publisher in dfi_publishers:
        # If this publisher is a DFI, then their 2014 spend total should be based on their  
        # commitment transactions only. See
        if '2014' in transactions_usd.get('2', {}).get('USD', {}):
            iati_2014_spend_total += transactions_usd['2']['USD']['2014']

        if '2014' in transactions_usd.get('C', {}).get('USD', {}):
            iati_2014_spend_total += transactions_usd['C']['USD']['2014']

        # This is a non-DFI publisher
        if '2014' in transactions_usd.get('3', {}).get('USD', {}):
            iati_2014_spend_total += transactions_usd['3']['USD']['2014']

        if '2014' in transactions_usd.get('D', {}).get('USD', {}):
            iati_2014_spend_total += transactions_usd['D']['USD']['2014']

        if '2014' in transactions_usd.get('4', {}).get('USD', {}):
            iati_2014_spend_total += transactions_usd['4']['USD']['2014']

        if '2014' in transactions_usd.get('E', {}).get('USD', {}):
            iati_2014_spend_total += transactions_usd['E']['USD']['2014']            

    # Convert to millions USD 
    row['iati_spend_2014'] = round(float( iati_2014_spend_total / 1000000), 2)

    # Compute 2015 IATI spend
    iati_2015_spend_total = 0

    if publisher in dfi_publishers:
        # If this publisher is a DFI, then their 2015 spend total should be based on their  
        # commitment transactions only. See
        if '2015' in transactions_usd.get('2', {}).get('USD', {}):
            iati_2015_spend_total += transactions_usd['2']['USD']['2015']

        if '2015' in transactions_usd.get('C', {}).get('USD', {}):
            iati_2015_spend_total += transactions_usd['C']['USD']['2015']

        # This is a non-DFI publisher
        if '2015' in transactions_usd.get('3', {}).get('USD', {}):
            iati_2015_spend_total += transactions_usd['3']['USD']['2015']

        if '2015' in transactions_usd.get('D', {}).get('USD', {}):
            iati_2015_spend_total += transactions_usd['D']['USD']['2015']

        if '2015' in transactions_usd.get('4', {}).get('USD', {}):
            iati_2015_spend_total += transactions_usd['4']['USD']['2015']

        if '2015' in transactions_usd.get('E', {}).get('USD', {}):
            iati_2015_spend_total += transactions_usd['E']['USD']['2015']

    # Convert to millions USD 
    row['iati_spend_2015'] = round(float( iati_2015_spend_total / 1000000), 2)

    # Get reference data 
    # Get data from stats files. Set as empty stings if the IATI-Stats code did not find them in the reference data sheet
    data_2014 = publisher_stats['reference_spend_data_usd'].get('2014', {'ref_spend': '', 'not_in_sheet': True})
    data_2015 = publisher_stats['reference_spend_data_usd'].get('2015', {'ref_spend': '', 'official_forecast': '', 'not_in_sheet': True})

    # Compute reference data as $USDm
    row['reference_spend_2014'] = round((float(data_2014['ref_spend']) / 1000000), 2) if is_number(data_2014['ref_spend']) else '-'
    row['reference_spend_2015'] = round((float(data_2015['ref_spend']) / 1000000), 2) if is_number(data_2015['ref_spend']) else '-'
    row['official_forecast_2015'] = round((float(data_2015['official_forecast']) / 1000000), 2) if is_number(data_2015['official_forecast']) else '-'

    # Compute spend ratio score
    # Compile a list of ratios for spend & reference data paired by year
    spend_ratio_candidates = [(row['iati_spend_2014'] / row['reference_spend_2014']) if (row['reference_spend_2014'] > 0) and is_number(row['reference_spend_2014']) else 0, 
                              (row['iati_spend_2015'] / row['reference_spend_2015']) if (row['reference_spend_2015'] > 0) and is_number(row['reference_spend_2015']) else 0,
                              (row['iati_spend_2015'] / row['official_forecast_2015']) if (row['official_forecast_2015'] > 0) and is_number(row['official_forecast_2015']) else 0]
    # If there are no annual pairs, add the value of 2015 spend / 2014 reference data
    if ((row['iati_spend_2014'] == 0 or row['reference_spend_2014'] == '-') 
        and (row['iati_spend_2015'] == 0 or row['reference_spend_2015'] == '-') 
        and (row['iati_spend_2015'] == 0 or row['official_forecast_2015'] == '-')):
        spend_ratio_candidates.append((row['iati_spend_2015'] / row['reference_spend_2014']) if (row['reference_spend_2014'] > 0) and is_number(row['reference_spend_2014']) else 0)

    # Get the maximum value and convert to a percentage
    row['spend_ratio'] = int(round(max(spend_ratio_candidates) * 100))

    # Compute coverage score and raise to the top of its quintile
    # or set to default 20% where there is no data, or a data error is reported
    if publisher_stats['reference_spend_data_usd'].get('spend_data_error_reported', False):
        # For publishers where a data error is reported, set their score to 20%
        row['coverage_adjustment'] = 20
        row['spend_data_error_reported_flag'] = 1
        row['sort_order'] = 3

    elif all([row['reference_spend_2014'] == '-', row['reference_spend_2015'] == '-', row['official_forecast_2015'] == '-']):
        # For publishers where no reference data has been found, set their score to 20%
        row['coverage_adjustment'] = 20

        if data_2014.get('not_in_sheet', False) and data_2015.get('not_in_sheet', False):
            # This is a new publisher, who was not known when reference data was collected
            row['no_data_flag_amber'] = 1
            row['sort_order'] = 2
            # This is a known publisher, who appears in the reference data sheet (albeit with no data)
            row['no_data_flag_red'] = 1
            row['sort_order'] = 1

    elif row['spend_ratio'] > 120 and not publisher_stats['reference_spend_data_usd'].get('DAC', False):
        # Suggestion that if apend ratio is over 100%, then generally something is wrong with the data
        # Margin of 20% leeway given otherwise bumping coverage adjustment down to 20% due to data quality issues.
        # Note that this does not apply to DAC publishers
        # Full detail:
        row['coverage_adjustment'] = 20

    elif row['spend_ratio'] >= 80:
        row['coverage_adjustment'] = 100

    elif row['spend_ratio'] >= 60:
        row['coverage_adjustment'] = 80

    elif row['spend_ratio'] >= 40:
        row['coverage_adjustment'] = 60

        row['coverage_adjustment'] = 40

    return row
def generate_row(publisher):
    """Generate coverage table data for a given publisher

    # Store the data for this publisher as new variables
    publisher_stats = get_publisher_stats(publisher)
    transactions_usd = publisher_stats['sum_transactions_by_type_by_year_usd']

    # Create a list for publisher data, and populate it with basic data
    row = {}
    row['publisher'] = publisher
    row['publisher_title'] = publisher_name[publisher]
    row['no_data_flag_red'] = 0
    row['no_data_flag_amber'] = 0
    row['spend_data_error_reported_flag'] = 0
    row['sort_order'] = 0

    # Compute 2014 IATI spend
    iati_2014_spend_total = 0

    if publisher in dfi_publishers:
        # If this publisher is a DFI, then their 2014 spend total should be based on their
        # commitment transactions only. See
        if '2014' in transactions_usd.get('2', {}).get('USD', {}):
            iati_2014_spend_total += transactions_usd['2']['USD']['2014']

        if '2014' in transactions_usd.get('C', {}).get('USD', {}):
            iati_2014_spend_total += transactions_usd['C']['USD']['2014']

        # This is a non-DFI publisher
        if '2014' in transactions_usd.get('3', {}).get('USD', {}):
            iati_2014_spend_total += transactions_usd['3']['USD']['2014']

        if '2014' in transactions_usd.get('D', {}).get('USD', {}):
            iati_2014_spend_total += transactions_usd['D']['USD']['2014']

        if '2014' in transactions_usd.get('4', {}).get('USD', {}):
            iati_2014_spend_total += transactions_usd['4']['USD']['2014']

        if '2014' in transactions_usd.get('E', {}).get('USD', {}):
            iati_2014_spend_total += transactions_usd['E']['USD']['2014']

    # Convert to millions USD
    row['iati_spend_2014'] = round(float(iati_2014_spend_total / 1000000), 2)

    # Compute 2015 IATI spend
    iati_2015_spend_total = 0

    if publisher in dfi_publishers:
        # If this publisher is a DFI, then their 2015 spend total should be based on their
        # commitment transactions only. See
        if '2015' in transactions_usd.get('2', {}).get('USD', {}):
            iati_2015_spend_total += transactions_usd['2']['USD']['2015']

        if '2015' in transactions_usd.get('C', {}).get('USD', {}):
            iati_2015_spend_total += transactions_usd['C']['USD']['2015']

        # This is a non-DFI publisher
        if '2015' in transactions_usd.get('3', {}).get('USD', {}):
            iati_2015_spend_total += transactions_usd['3']['USD']['2015']

        if '2015' in transactions_usd.get('D', {}).get('USD', {}):
            iati_2015_spend_total += transactions_usd['D']['USD']['2015']

        if '2015' in transactions_usd.get('4', {}).get('USD', {}):
            iati_2015_spend_total += transactions_usd['4']['USD']['2015']

        if '2015' in transactions_usd.get('E', {}).get('USD', {}):
            iati_2015_spend_total += transactions_usd['E']['USD']['2015']

    # Convert to millions USD
    row['iati_spend_2015'] = round(float(iati_2015_spend_total / 1000000), 2)

    # Compute 2016 IATI spend
    iati_2016_spend_total = 0

    if publisher in dfi_publishers:
        # If this publisher is a DFI, then their 2016 spend total should be based on their
        # commitment transactions only. See
        if '2016' in transactions_usd.get('2', {}).get('USD', {}):
            iati_2016_spend_total += transactions_usd['2']['USD']['2016']

        if '2016' in transactions_usd.get('C', {}).get('USD', {}):
            iati_2016_spend_total += transactions_usd['C']['USD']['2016']

        # This is a non-DFI publisher
        if '2016' in transactions_usd.get('3', {}).get('USD', {}):
            iati_2016_spend_total += transactions_usd['3']['USD']['2016']

        if '2016' in transactions_usd.get('D', {}).get('USD', {}):
            iati_2016_spend_total += transactions_usd['D']['USD']['2016']

        if '2016' in transactions_usd.get('4', {}).get('USD', {}):
            iati_2016_spend_total += transactions_usd['4']['USD']['2016']

        if '2016' in transactions_usd.get('E', {}).get('USD', {}):
            iati_2016_spend_total += transactions_usd['E']['USD']['2016']

    # Convert to millions USD
    row['iati_spend_2016'] = round(float(iati_2016_spend_total / 1000000), 2)

    # Get reference data
    # Get data from stats files. Set as empty stings if the IATI-Stats code did not find them in the reference data sheet
    data_2014 = publisher_stats['reference_spend_data_usd'].get(
        '2014', {
            'ref_spend': '',
            'not_in_sheet': True
    data_2015 = publisher_stats['reference_spend_data_usd'].get(
        '2015', {
            'ref_spend': '',
            'official_forecast': '',
            'not_in_sheet': True

    # Compute reference data as $USDm
    row['reference_spend_2014'] = round(
        (float(data_2014['ref_spend']) /
         1000000), 2) if is_number(data_2014['ref_spend']) else '-'
    row['reference_spend_2015'] = round(
        (float(data_2015['ref_spend']) /
         1000000), 2) if is_number(data_2015['ref_spend']) else '-'
    row['official_forecast_2015'] = round(
        (float(data_2015['official_forecast']) /
         1000000), 2) if is_number(data_2015['official_forecast']) else '-'

    # Compute spend ratio score
    # Compile a list of ratios for spend & reference data paired by year
    spend_ratio_candidates = [
        (row['iati_spend_2014'] / row['reference_spend_2014']) if
        (row['reference_spend_2014'] > 0)
        and is_number(row['reference_spend_2014']) else 0,
        (row['iati_spend_2015'] / row['reference_spend_2015']) if
        (row['reference_spend_2015'] > 0)
        and is_number(row['reference_spend_2015']) else 0,
        (row['iati_spend_2015'] / row['official_forecast_2015']) if
        (row['official_forecast_2015'] > 0)
        and is_number(row['official_forecast_2015']) else 0

    # If there are no annual pairs, add the value of non-matching-year spend / reference data
    if ((row['iati_spend_2014'] == 0 or row['reference_spend_2014'] == '-') and
        (row['iati_spend_2015'] == 0 or row['reference_spend_2015'] == '-') and
        (row['iati_spend_2015'] == 0 or row['official_forecast_2015'] == '-')):
            row['iati_spend_2015'] / row['reference_spend_2014']
        ) if (row['reference_spend_2014'] > 0
              ) and is_number(row['reference_spend_2014']) else 0)
            row['iati_spend_2016'] / row['reference_spend_2014']
        ) if (row['reference_spend_2014'] > 0
              ) and is_number(row['reference_spend_2014']) else 0)
            row['iati_spend_2016'] / row['reference_spend_2015']
        ) if (row['reference_spend_2015'] > 0
              ) and is_number(row['reference_spend_2015']) else 0)

    # Get the maximum value and convert to a percentage
    row['spend_ratio'] = int(round(max(spend_ratio_candidates) * 100))

    # Compute coverage score and raise to the top of its quintile
    # or set to default 20% where there is no data, or a data error is reported
    if publisher_stats['reference_spend_data_usd'].get(
            'spend_data_error_reported', False):
        # For publishers where a data error is reported, set their score to 20%
        row['coverage_adjustment'] = 20
        row['spend_data_error_reported_flag'] = 1
        row['sort_order'] = 3

    elif all([
            row['reference_spend_2014'] == '-',
            row['reference_spend_2015'] == '-',
            row['official_forecast_2015'] == '-'
        # For publishers where no reference data has been found, set their score to 20%
        row['coverage_adjustment'] = 20

        if data_2014.get('not_in_sheet', False) and data_2015.get(
                'not_in_sheet', False):
            # This is a new publisher, who was not known when reference data was collected
            row['no_data_flag_amber'] = 1
            row['sort_order'] = 2
            # This is a known publisher, who appears in the reference data sheet (albeit with no data)
            row['no_data_flag_red'] = 1
            row['sort_order'] = 1

    elif row['spend_ratio'] > 120 and not publisher_stats[
            'reference_spend_data_usd'].get('DAC', False):
        # Suggestion that if apend ratio is over 100%, then generally something is wrong with the data
        # Margin of 20% leeway given otherwise bumping coverage adjustment down to 20% due to data quality issues.
        # Note that this does not apply to DAC publishers
        # Full detail:
        row['coverage_adjustment'] = 20

    elif row['spend_ratio'] >= 80:
        row['coverage_adjustment'] = 100

    elif row['spend_ratio'] >= 60:
        row['coverage_adjustment'] = 80

    elif row['spend_ratio'] >= 40:
        row['coverage_adjustment'] = 60

        row['coverage_adjustment'] = 40

    return row
def generate_row(publisher):
    """Generate forward-looking table data for a given publisher

    # Store the data for this publisher as a new variable
    publisher_stats = get_publisher_stats(publisher)

    # Create a list for publisher data, and populate it with basic data
    row = {}
    row['publisher'] = publisher
    row['publisher_title'] = publisher_name[publisher]
    row['year_columns'] = [{}, {}, {}]
    row['budget_not_provided'] = False
    # Work with hierarchies
    by_hierarchy = publisher_stats['by_hierarchy']
    hierarchies_with_nonzero_budgets = [
        h for h, stats in by_hierarchy.items() if not all(
            x == 0
            for x in stats['forwardlooking_activities_with_budgets'].values())

    # Flag if budgets on current activities are reported at more than one hierarchy
    row['flag'] = len(hierarchies_with_nonzero_budgets) > 1

    hierarchies_with_budget_not_provided = [
        h for h, stats in by_hierarchy.items()
        if not all(x == 0 for x in

    # Loop over each of the three years (i.e. this year and the following two years) to generate the statistics for the table
    for year in years:
        if (len(hierarchies_with_budget_not_provided) > 0):
            row['budget_not_provided'] = True
        # If 'forwardlooking_activities_current' and 'forwardlooking_activities_with_budgets' or 'forwardlooking_activities_with_budget_not_provided' are in the bottom hierarchy
        if 'forwardlooking_activities_current' in publisher_stats[
                'bottom_hierarchy'] and (
                    in publisher_stats['bottom_hierarchy']
                    or 'forwardlooking_activities_with_budget_not_provided'
                    in publisher_stats['bottom_hierarchy']):
            if len(hierarchies_with_nonzero_budgets) != 1:
                # If budgets are at more than one hierarchy (or no hierarchies), just use activities at all hierarchies
                row['year_columns'][0][year] = publisher_stats[
                    'forwardlooking_activities_current'].get(year) or 0
                row['year_columns'][1][year] = publisher_stats[
                    'forwardlooking_activities_with_budgets'].get(year) or 0
                if row['budget_not_provided']:
                    row['year_columns'][1][year] += publisher_stats[
                            year) or 0
                # Else, use the hierarchy which they are reported at
                row['year_columns'][0][year] = by_hierarchy[
                        'forwardlooking_activities_current'].get(year) or 0
                row['year_columns'][1][year] = by_hierarchy[
                            year) or 0
                if row['budget_not_provided']:
                    row['year_columns'][1][year] += by_hierarchy[
                                year) or 0

            if not int(row['year_columns'][0][year]):
                row['year_columns'][2][year] = '-'
                row['year_columns'][2][year] = int(
                        float(row['year_columns'][1][year]) /
                        float(row['year_columns'][0][year]) * 100))
            # Else if either 'forwardlooking_activities_current' or 'forwardlooking_activities_with_budgets' are not in the bottom hierarchy, set data zero
            # This should only occur if a publisher has 0 activities
            row['year_columns'][0][year] = '0'
            row['year_columns'][1][year] = '0'
            row['year_columns'][2][year] = '-'

    return row
def table():
    """Generate data for the publisher forward-looking table

    # Store timeliness data in variable
    timeliness_frequency_data = timeliness.publisher_frequency_dict()
    timeliness_timelag_data = timeliness.publisher_timelag_dict()

    # Loop over each publisher
    for publisher_title, publisher in publishers_ordered_by_title:

        # Store the data for this publisher as a new variable
        publisher_stats = get_publisher_stats(publisher)

        # Skip if all activities from this publisher are secondary reported
        if publisher in secondary_publishers:

        # Create a list for publisher data, and populate it with basic data
        row = {}
        row['publisher'] = publisher
        row['publisher_title'] = publisher_title
        row['publisher_type'] = common.get_publisher_type(publisher)['name']

        # Compute timeliness statistic
        # Assign frequency score
        # Get initial frequency assessment, or use empty set in the case where the publisher is not found
        frequency_assessment_data = timeliness_frequency_data.get(
            publisher, ())
        frequency_assessment = None if len(
            frequency_assessment_data) < 4 else frequency_assessment_data[3]
        if frequency_assessment == 'Monthly':
            frequency_score = 4
        elif frequency_assessment == 'Quarterly':
            frequency_score = 3
        elif frequency_assessment == 'Six-Monthly':
            frequency_score = 2
        elif frequency_assessment == 'Annual':
            frequency_score = 1
        else:  # frequency_assessment == 'Less than Annual' or something else!
            frequency_score = 0

        # Assign timelag score
        # Get initial timelag assessment, or use empty set in the case where the publisher is not found
        timelag_assessment_data = timeliness_timelag_data.get(publisher, ())
        timelag_assessment = None if len(
            timelag_assessment_data) < 4 else timelag_assessment_data[3]
        if timelag_assessment == 'One month':
            timelag_score = 4
        elif timelag_assessment == 'A quarter':
            timelag_score = 3
        elif timelag_assessment == 'Six months':
            timelag_score = 2
        elif timelag_assessment == 'One year':
            timelag_score = 1
        else:  # timelag_assessment == 'More than one year' or something else!
            timelag_score = 0

        # Compute the percentage
        row['timeliness'] = int(
            round((float(frequency_score + timelag_score) / 8) * 100))

        # Compute forward-looking statistic
        # Get the forward-looking data for this publisher
        publisher_forwardlooking_data = forwardlooking.generate_row(publisher)

        # Convert the data for this publishers 'Percentage of current activities with budgets' fields into integers
        numbers = [
            int(x) for x in publisher_forwardlooking_data['year_columns']
            [2].itervalues() if is_number(x)

        # Compute and store the mean average for these fields
        row['forwardlooking'] = sum(int(round(y)) for y in numbers) / len(

        # Compute comprehensive statistic
        # Get the comprehensiveness data for this publisher
        publisher_comprehensiveness_data = comprehensiveness.generate_row(

        # Set the comprehensive value to be the summary average for valid data
        row['comprehensive'] = convert_to_int(

        # Compute score
        row['score'] = int(
                float(row['timeliness'] + row['forwardlooking'] +
                      row['comprehensive']) / 3))

        # Return a generator object
        yield row