Example #1
0
 def __getitem__(self, key):
     if key == 'failed_downloads':
         return dict((row[0], row[1]) for row in failed_downloads)
     elif key == 'publisher_types':
         out = defaultdict(lambda: defaultdict(int))
         for publisher, publisher_data in gitaggregate_publisher.iteritems(
         ):
             if publisher in data.ckan_publishers:
                 for datestring, count in publisher_data[
                         'activities'].iteritems():
                     out[datestring][common.get_publisher_type(publisher)
                                     ['name']] += 1
             else:
                 print('Publisher not matched:', publisher)
         return out
     elif key == 'activities_per_publisher_type':
         out = defaultdict(lambda: defaultdict(int))
         for publisher, publisher_data in gitaggregate_publisher.iteritems(
         ):
             if publisher in data.ckan_publishers:
                 for datestring, count in publisher_data[
                         'activities'].iteritems():
                     out[datestring][common.get_publisher_type(publisher)
                                     ['name']] += count
             else:
                 print('Publisher not matched:', publisher)
         return out
     else:
         return super(AugmentedJSONDir, self).__getitem__(key)
Example #2
0
 def __getitem__(self, key):
     if key == 'failed_downloads':
         return dict((row[0],row[1]) for row in failed_downloads)
     elif key == 'publisher_types':
         out = defaultdict(lambda: defaultdict(int))
         for publisher, publisher_data in gitaggregate_publisher.iteritems():
             if publisher in data.ckan_publishers:
                 organization_type = common.get_publisher_type(publisher)['name']
                 for datestring,count in publisher_data['activities'].iteritems():
                     out[datestring][organization_type] += 1
             else:
                 print('Publisher not matched:', publisher)
         return out
     elif key == 'activities_per_publisher_type':
         out = defaultdict(lambda: defaultdict(int))
         for publisher, publisher_data in gitaggregate_publisher.iteritems():
             if publisher in data.ckan_publishers:
                 organization_type = common.get_publisher_type(publisher)['name']
                 for datestring,count in publisher_data['activities'].iteritems():
                     out[datestring][organization_type] += count
             else:
                 print('Publisher not matched:', publisher)
         return out
     else: 
         return super(AugmentedJSONDir, self).__getitem__(key)
Example #3
0
def table():
    """Generate data for the humanitarian table
    """

    # Loop over each publisher
    for publisher_title, publisher in publishers_ordered_by_title:
        # Store the data for this publisher as a new variable
        publisher_stats = get_publisher_stats(publisher)

        # Create a list for publisher data, and populate it with basic data
        row = {}
        row['publisher'] = publisher
        row['publisher_title'] = publisher_title
        row['publisher_type'] = common.get_publisher_type(publisher)['name']

        # Get data from IATI-Stats output
        row['num_activities'] = publisher_stats.get('humanitarian', {}).get(
            'is_humanitarian', '0')
        row['publishing_humanitarian'] = 100 if int(
            row['num_activities']) > 0 else 0

        # Calculate percentage of all humanitarian activities that are defined using the @humanitarian attribute
        row['humanitarian_attrib'] = (publisher_stats.get(
            'humanitarian', {}).get('is_humanitarian_by_attrib', '0') / float(
                row['num_activities']) if int(row['num_activities']) > 0 else
                                      0) * 100

        # Calculate percentage of all humanitarian activities that use the <humanitarian-scope> element to define an appeal or emergency
        row['appeal_emergency'] = (publisher_stats.get('humanitarian', {}).get(
            'contains_humanitarian_scope', '0') /
                                   float(row['num_activities']) if
                                   int(row['num_activities']) > 0 else 0) * 100

        # Calculate percentage of all humanitarian activities that use clusters
        row['clusters'] = (publisher_stats.get('humanitarian', {}).get(
            'uses_humanitarian_clusters_vocab', '0') /
                           float(row['num_activities'])
                           if int(row['num_activities']) > 0 else 0) * 100

        # Calculate the mean average
        row['average'] = (row['publishing_humanitarian'] +
                          row['humanitarian_attrib'] +
                          row['appeal_emergency'] + row['clusters']) / float(4)

        # Return a generator object
        yield row
Example #4
0
def table():
    """Generate data for the humanitarian table
    """

    # Loop over each publisher
    for publisher_title, publisher in publishers_ordered_by_title:
        # Store the data for this publisher as a new variable
        publisher_stats = get_publisher_stats(publisher)

        # Create a list for publisher data, and populate it with basic data
        row = {}
        row['publisher'] = publisher
        row['publisher_title'] = publisher_title
        row['publisher_type'] = common.get_publisher_type(publisher)['name']

        # Get data from IATI-Stats output
        row['num_activities'] = publisher_stats.get('humanitarian', {}).get('is_humanitarian', '0')
        row['publishing_humanitarian'] = 100 if int(row['num_activities']) > 0 else 0

        # Calculate percentage of all humanitarian activities that are defined using the @humanitarian attribute
        row['humanitarian_attrib'] = (
            publisher_stats.get('humanitarian', {}).get('is_humanitarian_by_attrib', '0') / float(row['num_activities'])
              if int(row['num_activities']) > 0 else 0
            ) * 100

        # Calculate percentage of all humanitarian activities that use the <humanitarian-scope> element to define an appeal or emergency
        row['appeal_emergency'] = (
            publisher_stats.get('humanitarian', {}).get('contains_humanitarian_scope', '0') / float(row['num_activities'])
              if int(row['num_activities']) > 0 else 0
            ) * 100

        # Calculate percentage of all humanitarian activities that use clusters
        row['clusters'] = (
            publisher_stats.get('humanitarian', {}).get('uses_humanitarian_clusters_vocab', '0') / float(row['num_activities'])
              if int(row['num_activities']) > 0 else 0
            ) * 100

        # Calculate the mean average
        row['average'] = (row['publishing_humanitarian'] + row['humanitarian_attrib'] + row['appeal_emergency'] + row['clusters']) / float(4)

        # Return a generator object
        yield row
def table():
    """Generate data for the publisher forward-looking table
    """

    # Store timeliness data in variable
    timeliness_frequency_data = timeliness.publisher_frequency_dict()
    timeliness_timelag_data = timeliness.publisher_timelag_dict()

    # Store generator objects for the data that we are receiving
    forwardlooking_data = forwardlooking.table()
    comprehensiveness_data = comprehensiveness.table()
    coverage_data = coverage.table()

    # Loop over each publisher
    for publisher_title, publisher in publishers_ordered_by_title:

        # Store the data for this publisher as a new variable
        publisher_stats = get_publisher_stats(publisher)
        
        # Create a list for publisher data, and populate it with basic data
        row = {}
        row['publisher'] = publisher
        row['publisher_title'] = publisher_title
        row['publisher_type'] = common.get_publisher_type(publisher)

        # Compute timeliness statistic
        # Assign frequency score
        if timeliness_frequency_data[publisher][3] == 'Monthly':
            frequency_score = 4
        elif timeliness_frequency_data[publisher][3] == 'Quarterly':
            frequency_score = 3
        elif timeliness_frequency_data[publisher][3] == 'Six-Monthly':
            frequency_score = 2
        elif timeliness_frequency_data[publisher][3] == 'Annual':
            frequency_score = 1
        else: # timeliness_frequency_data[publisher][3] == 'Less than Annual' or something else!
            frequency_score = 0

        # Assign timelag score
        if timeliness_timelag_data[publisher][3] == 'One month':
            timelag_score = 4
        elif timeliness_timelag_data[publisher][3] == 'A quarter':
            timelag_score = 3
        elif timeliness_timelag_data[publisher][3] == 'Six months':
            timelag_score = 2
        elif timeliness_timelag_data[publisher][3] == 'One year':
            timelag_score = 1
        else: # timeliness_timelag_data[publisher][3] == 'More than one year' or something else!
            timelag_score = 0

        # Compute the percentage
        row['timeliness'] = int( (float(frequency_score + timelag_score) / 8) * 100 )


        # Compute forward looking statistic
        # Get the forward looking data for this publisher 
        publisher_forwardlooking_data = forwardlooking_data.next()

        # Convert the data for this publishers 'Percentage of current activities with budgets' fields into integers
        numbers = [ int(x) for x in publisher_forwardlooking_data['year_columns'][2].itervalues() if is_number(x) ]
        
        # Compute and store the mean average for these fields
        row['forwardlooking'] = sum(int(y) for y in numbers) / len(publisher_forwardlooking_data['year_columns'][2])


        # Compute comprehensive statistic
        # Get the comprehensiveness data for this publisher 
        publisher_comprehensiveness_data = comprehensiveness_data.next()

        # Set the comprehensive value to be the summary average for valid data
        row['comprehensive'] = convert_to_int(publisher_comprehensiveness_data['summary_average_valid'])


        # Compute score
        row['score'] = int( (row['timeliness'] + row['forwardlooking'] + row['comprehensive']) / 3 )

        
        # Get coverage statistic
        # Get the coverage data for this publisher 
        publisher_coverage_data = coverage_data.next()

        # Store the coverage data
        row['coverage_adjustment'] = int(publisher_coverage_data['coverage_adjustment'])


        # Compute Coverage-adjusted score
        row['score_coverage_adjusted'] = int( row['score'] * int(row['coverage_adjustment'] / 100) ) 


        # Return a generator object
        yield row
def table():
    """Generate data for the publisher forward-looking table
    """

    # Store timeliness data in variable
    timeliness_frequency_data = timeliness.publisher_frequency_dict()
    timeliness_timelag_data = timeliness.publisher_timelag_dict()

    # Loop over each publisher
    for publisher_title, publisher in publishers_ordered_by_title:

        # Store the data for this publisher as a new variable
        publisher_stats = get_publisher_stats(publisher)

        # Skip if all activities from this publisher are secondary reported
        if publisher in secondary_publishers:
            continue

        # Create a list for publisher data, and populate it with basic data
        row = {}
        row['publisher'] = publisher
        row['publisher_title'] = publisher_title
        row['publisher_type'] = common.get_publisher_type(publisher)['name']

        # Compute timeliness statistic
        # Assign frequency score
        # Get initial frequency assessment, or use empty set in the case where the publisher is not found
        frequency_assessment_data = timeliness_frequency_data.get(
            publisher, ())
        frequency_assessment = None if len(
            frequency_assessment_data) < 4 else frequency_assessment_data[3]
        if frequency_assessment == 'Monthly':
            frequency_score = 4
        elif frequency_assessment == 'Quarterly':
            frequency_score = 3
        elif frequency_assessment == 'Six-Monthly':
            frequency_score = 2
        elif frequency_assessment == 'Annual':
            frequency_score = 1
        else:  # frequency_assessment == 'Less than Annual' or something else!
            frequency_score = 0

        # Assign timelag score
        # Get initial timelag assessment, or use empty set in the case where the publisher is not found
        timelag_assessment_data = timeliness_timelag_data.get(publisher, ())
        timelag_assessment = None if len(
            timelag_assessment_data) < 4 else timelag_assessment_data[3]
        if timelag_assessment == 'One month':
            timelag_score = 4
        elif timelag_assessment == 'A quarter':
            timelag_score = 3
        elif timelag_assessment == 'Six months':
            timelag_score = 2
        elif timelag_assessment == 'One year':
            timelag_score = 1
        else:  # timelag_assessment == 'More than one year' or something else!
            timelag_score = 0

        # Compute the percentage
        row['timeliness'] = int(
            round((float(frequency_score + timelag_score) / 8) * 100))

        # Compute forward-looking statistic
        # Get the forward-looking data for this publisher
        publisher_forwardlooking_data = forwardlooking.generate_row(publisher)

        # Convert the data for this publishers 'Percentage of current activities with budgets' fields into integers
        numbers = [
            int(x) for x in publisher_forwardlooking_data['year_columns']
            [2].itervalues() if is_number(x)
        ]

        # Compute and store the mean average for these fields
        row['forwardlooking'] = sum(int(round(y)) for y in numbers) / len(
            publisher_forwardlooking_data['year_columns'][2])

        # Compute comprehensive statistic
        # Get the comprehensiveness data for this publisher
        publisher_comprehensiveness_data = comprehensiveness.generate_row(
            publisher)

        # Set the comprehensive value to be the summary average for valid data
        row['comprehensive'] = convert_to_int(
            publisher_comprehensiveness_data['summary_average_valid'])

        # Compute score
        row['score'] = int(
            round(
                float(row['timeliness'] + row['forwardlooking'] +
                      row['comprehensive']) / 3))

        # Return a generator object
        yield row