Example #1
0
    def parse_events(self):
        """Preformat some special fields and
redistribute records into resources."""

        # Format according to the Model classes
        logging.info("Formatting results")
        resources = {}

        for event in self.data:

            # Preformat some fields
            event_created = datetime.strptime(event['created_at'],
                                              '%Y-%m-%dT%H:%M:%SZ')
            # Keep just YMD
            event_created = event_created.strftime('%Y-%m-%d')
            event_results = json.loads(event['results_by_resource'])
            event_country = geonames_query(event['lat'], event['lon'])
            event_terms = event['query_terms']

            for resource in event_results:

                # Initialize resource if not existing
                if resource not in resources:
                    resources[resource] = {
                        'records': 0,
                        'query_countries': {},
                        'query_dates': {},
                        'query_terms': {}
                    }

                # Add records
                resources[resource]['records'] += event_results[resource]

                # Add query country
                if event_country not in resources[resource]['query_countries']:
                    resources[resource]['query_countries'][event_country] = {
                        'query_country': event_country,
                        'times': 1
                    }
                else:
                    resources[resource]['query_countries'][event_country]['times'] += 1

                # Add query date
                if event_created not in resources[resource]['query_dates']:
                    resources[resource]['query_dates'][event_created] = {
                        'query_date': event_created,
                        'times': 1
                    }
                else:
                    resources[resource]['query_dates'][event_created]['times'] += 1

                # Add query terms
                if event_terms not in resources[resource]['query_terms']:
                    resources[resource]['query_terms'][event_terms] = {
                        'query_terms': event_terms,
                        'times': 1,
                        'records': event_results[resource]
                    }
                else:
                    resources[resource]['query_terms'][event_terms]['times'] += 1
                    resources[resource]['query_terms'][event_terms]['records'] += event_results[resource]

        # Store 'resources' in class property
        self.resources = resources

        # Finish method
        logging.info("Created %d resources" % len(self.resources))
        return 0
Example #2
0
    def parse_events(self):
        """Preformat some special fields and redistribute records into resources."""

        # Format according to the Model classes
        s =  "Version: %s\n" % __version__
        s += "Formatting results"
        logging.info(s)
        resources = {}

        for event in self.data:

            # Preformat some fields
            event_created = datetime.strptime(event['created_at'], '%Y-%m-%dT%H:%M:%SZ')
            # Keep just YMD
            event_created = event_created.strftime('%Y-%m-%d')
            event_results = json.loads(event['results_by_resource'])
            event_country = geonames_query(event['lat'], event['lon'])
            event_terms = event['query_terms']

            for resource in event_results:

                # Initialize resource if not existing
                if resource not in resources:
                    resources[resource] = {
                        'records': 0,
                        'query_countries': {},
                        'query_dates': {},
                        'query_terms': {}
                    }

                # Add records
                resources[resource]['records'] += event_results[resource]

                # Add query country
                if event_country not in resources[resource]['query_countries']:
                    resources[resource]['query_countries'][event_country] = {
                        'query_country': event_country,
                        'times': 1
                    }
                else:
                    resources[resource]['query_countries'][event_country]['times'] += 1

                # Add query date
                if event_created not in resources[resource]['query_dates']:
                    resources[resource]['query_dates'][event_created] = {
                        'query_date': event_created,
                        'times': 1
                    }
                else:
                    resources[resource]['query_dates'][event_created]['times'] += 1

                # Add query terms
                if event_terms not in resources[resource]['query_terms']:
                    resources[resource]['query_terms'][event_terms] = {
                        'query_terms': event_terms,
                        'times': 1,
                        'records': event_results[resource]
                    }
                else:
                    et = resources[resource]['query_terms'][event_terms]
                    et['times'] += 1
                    et['records'] += event_results[resource]
#                     resources[resource]['query_terms'][event_terms]['times'] += 1
#                     resources[resource]['query_terms'][event_terms]['records'] \
#                        += event_results[resource]

        # Store 'resources' in class property
        self.resources = resources

        # Finish method
        s =  "Version: %s\n" % __version__
        s += "Created %d resources" % len(self.resources)
        logging.info(s)
        return 0
def build_model(pubs, pub, lapse, today):
    """Build the JSON model with data about the month for the resource"""

    model = {
        "url": "",  # IPT resource URL, to link with CartoDB resource_staging table
        "inst": "",  # Institution Code
        "col": "",  # Collection code
        "github_org": "",  # GitHub Organization
        "github_repo": "",  # GitHub Repository
        "report_month_string": "",  # String to add to the reports, something like "February, 2014"
        "report_month": "",  # Compact mode of report_month, something like "2014/02"
        "last_report_url": "",  # link to last existing report in GitHub, or empty if first time
        "created_at": "",  # Full date of creation, like "2014/03/17"
        "downloads": {  # Monthly values for downloads
            "downloads": 0,
            "downloads_period": 0,
            "records": 0,
            "records_period": 0,
            "records_unique": 0,
            "countries_list": [],
            "countries": [],
            "dates": [],
            "queries": []
        },
        "searches": {  # Monthly values for searches
            "searches": 0,
            "records": 0,
            "countries_list": [],
            "countries": [],
            "dates": [],
            "queries": []
        }
    }

    url = pubs[pub]['url']
    inst = pubs[pub]['inst']
    col = pubs[pub]['col']
    model['url'] = url
    model['inst'] = inst
    model['col'] = col

    report_month_string, report_month = get_time_lapse(today=today, lapse=lapse)
    model['report_month_string'] = report_month_string
    model['report_month'] = report_month

    model['last_report_url'] = find_last_report(inst, col, today)

    created_at = format(today, '%Y/%m/%d')
    model['created_at'] = created_at

    # DOWNLOADS
    try:  # Try adding download values
        downloads = len(pubs[pub]['download_files'])
        model['downloads']['downloads'] = downloads
        total_downloads = pubs[pub]['downloads_in_period']
        model['downloads']['downloads_period'] = total_downloads
        records = pubs[pub]['records_downloaded']
        model['downloads']['records'] = records
        total_records = pubs[pub]['tot_recs']
        model['downloads']['records_period'] = total_records
        unique_records = len(pubs[pub]['unique_records'])
        model['downloads']['records_unique'] = unique_records

        countries = {}
        for i in pubs[pub]['latlon']:
            lat = i[0]
            lon = i[1]
            try:
                country = geonames_query(lat, lon)
            except KeyError:
                country = "Unknown"
            if country == []:
                country = "Unknown"
            
            if country not in countries:
                countries[country] = pubs[pub]['latlon'][i]
            else:
                countries[country] += pubs[pub]['latlon'][i]
        or_countries = countries.keys()
        or_countries.sort()
        for i in or_countries:
            model['downloads']['countries_list'].append(i)
            model['downloads']['countries'].append({"country": i, "times": countries[i]})

        query_dates = {}
        for i in pubs[pub]['created']:
            this_date = i
            this_times = pubs[pub]['created'][i]
            if this_date not in query_dates:
                query_dates[this_date] = this_times
            else:
                query_dates[this_date] += this_times
        or_query_dates = query_dates.keys()
        or_query_dates.sort()
        for i in or_query_dates:
            model['downloads']['dates'].append({"date": i, "times": query_dates[i]})

        queries = {}
        for i in pubs[pub]['query']:
            this_query = i
            this_values = pubs[pub]['query'][i]
            this_times = this_values[0]
            this_records = this_values[1]
            if this_query not in queries:
                queries[this_query] = [this_times, this_records]
            else:
                queries[this_query][0] += this_times
        for i in queries:
            model['downloads']['queries'].append({"query": i, "times": queries[i][0], "records": queries[i][1]})

    except KeyError:  # If fails, it means there have been no downloads in the period, so use default values
        pass

    # SEARCHES

    try:  # Try adding download values
        searches = pubs[pub]['searches']['searches']
        model['searches']['searches'] = searches
        records = pubs[pub]['searches']['records_searched']
        model['searches']['records'] = records

        countries = {}
        for i in pubs[pub]['searches']['latlon']:
            lat = i[0]
            lon = i[1]
            try:
                country = geonames_query(lat, lon)
            except KeyError:
                country = "Unknown"
            if country == []:
                country = "Unknown"
            
#            print pub
#            print lat, lon, country, countries
            
            if country not in countries:
                countries[country] = pubs[pub]['searches']['latlon'][i]
            else:
                countries[country] += pubs[pub]['searches']['latlon'][i]
        or_countries = countries.keys()
        or_countries.sort()
        for i in or_countries:
            model['searches']['countries_list'].append(i)
            model['searches']['countries'].append({"country": i, "times": countries[i]})

        query_dates = {}
        for i in pubs[pub]['searches']['created']:
            this_date = i
            this_times = pubs[pub]['searches']['created'][i]
            if this_date not in query_dates:
                query_dates[this_date] = this_times
            else:
                query_dates[this_date] += this_times
        or_query_dates = query_dates.keys()
        or_query_dates.sort()
        for i in or_query_dates:
            model['searches']['dates'].append({"date": i, "times": query_dates[i]})

        queries = {}
        for i in pubs[pub]['searches']['query']:
            this_query = i
            this_values = pubs[pub]['searches']['query'][i]
            this_times = this_values[0]
            this_records = this_values[1]
            if this_query not in queries:
                queries[this_query] = [this_times, this_records]
            else:
                queries[this_query][0] += this_times
        for i in queries:
            model['searches']['queries'].append({"query": i, "times": queries[i][0], "records": queries[i][1]})

    except KeyError:  # If fails, it means there have been no searches in the period, so use default values
        pass

    return model