def parse_events(self): """Preformat some special fields and redistribute records into resources.""" # Format according to the Model classes logging.info("Formatting results") resources = {} for event in self.data: # Preformat some fields event_created = datetime.strptime(event['created_at'], '%Y-%m-%dT%H:%M:%SZ') # Keep just YMD event_created = event_created.strftime('%Y-%m-%d') event_results = json.loads(event['results_by_resource']) event_country = geonames_query(event['lat'], event['lon']) event_terms = event['query_terms'] for resource in event_results: # Initialize resource if not existing if resource not in resources: resources[resource] = { 'records': 0, 'query_countries': {}, 'query_dates': {}, 'query_terms': {} } # Add records resources[resource]['records'] += event_results[resource] # Add query country if event_country not in resources[resource]['query_countries']: resources[resource]['query_countries'][event_country] = { 'query_country': event_country, 'times': 1 } else: resources[resource]['query_countries'][event_country]['times'] += 1 # Add query date if event_created not in resources[resource]['query_dates']: resources[resource]['query_dates'][event_created] = { 'query_date': event_created, 'times': 1 } else: resources[resource]['query_dates'][event_created]['times'] += 1 # Add query terms if event_terms not in resources[resource]['query_terms']: resources[resource]['query_terms'][event_terms] = { 'query_terms': event_terms, 'times': 1, 'records': event_results[resource] } else: resources[resource]['query_terms'][event_terms]['times'] += 1 resources[resource]['query_terms'][event_terms]['records'] += event_results[resource] # Store 'resources' in class property self.resources = resources # Finish method logging.info("Created %d resources" % len(self.resources)) return 0
def parse_events(self): """Preformat some special fields and redistribute records into resources.""" # Format according to the Model classes s = "Version: %s\n" % __version__ s += "Formatting results" logging.info(s) resources = {} for event in self.data: # Preformat some fields event_created = datetime.strptime(event['created_at'], '%Y-%m-%dT%H:%M:%SZ') # Keep just YMD event_created = event_created.strftime('%Y-%m-%d') event_results = json.loads(event['results_by_resource']) event_country = geonames_query(event['lat'], event['lon']) event_terms = event['query_terms'] for resource in event_results: # Initialize resource if not existing if resource not in resources: resources[resource] = { 'records': 0, 'query_countries': {}, 'query_dates': {}, 'query_terms': {} } # Add records resources[resource]['records'] += event_results[resource] # Add query country if event_country not in resources[resource]['query_countries']: resources[resource]['query_countries'][event_country] = { 'query_country': event_country, 'times': 1 } else: resources[resource]['query_countries'][event_country]['times'] += 1 # Add query date if event_created not in resources[resource]['query_dates']: resources[resource]['query_dates'][event_created] = { 'query_date': event_created, 'times': 1 } else: resources[resource]['query_dates'][event_created]['times'] += 1 # Add query terms if event_terms not in resources[resource]['query_terms']: resources[resource]['query_terms'][event_terms] = { 'query_terms': event_terms, 'times': 1, 'records': event_results[resource] } else: et = resources[resource]['query_terms'][event_terms] et['times'] += 1 et['records'] += event_results[resource] # resources[resource]['query_terms'][event_terms]['times'] += 1 # resources[resource]['query_terms'][event_terms]['records'] \ # += event_results[resource] # Store 'resources' in class property self.resources = resources # Finish method s = "Version: %s\n" % __version__ s += "Created %d resources" % len(self.resources) logging.info(s) return 0
def build_model(pubs, pub, lapse, today): """Build the JSON model with data about the month for the resource""" model = { "url": "", # IPT resource URL, to link with CartoDB resource_staging table "inst": "", # Institution Code "col": "", # Collection code "github_org": "", # GitHub Organization "github_repo": "", # GitHub Repository "report_month_string": "", # String to add to the reports, something like "February, 2014" "report_month": "", # Compact mode of report_month, something like "2014/02" "last_report_url": "", # link to last existing report in GitHub, or empty if first time "created_at": "", # Full date of creation, like "2014/03/17" "downloads": { # Monthly values for downloads "downloads": 0, "downloads_period": 0, "records": 0, "records_period": 0, "records_unique": 0, "countries_list": [], "countries": [], "dates": [], "queries": [] }, "searches": { # Monthly values for searches "searches": 0, "records": 0, "countries_list": [], "countries": [], "dates": [], "queries": [] } } url = pubs[pub]['url'] inst = pubs[pub]['inst'] col = pubs[pub]['col'] model['url'] = url model['inst'] = inst model['col'] = col report_month_string, report_month = get_time_lapse(today=today, lapse=lapse) model['report_month_string'] = report_month_string model['report_month'] = report_month model['last_report_url'] = find_last_report(inst, col, today) created_at = format(today, '%Y/%m/%d') model['created_at'] = created_at # DOWNLOADS try: # Try adding download values downloads = len(pubs[pub]['download_files']) model['downloads']['downloads'] = downloads total_downloads = pubs[pub]['downloads_in_period'] model['downloads']['downloads_period'] = total_downloads records = pubs[pub]['records_downloaded'] model['downloads']['records'] = records total_records = pubs[pub]['tot_recs'] model['downloads']['records_period'] = total_records unique_records = len(pubs[pub]['unique_records']) model['downloads']['records_unique'] = unique_records countries = {} for i in pubs[pub]['latlon']: lat = i[0] lon = i[1] try: country = geonames_query(lat, lon) except KeyError: country = "Unknown" if country == []: country = "Unknown" if country not in countries: countries[country] = pubs[pub]['latlon'][i] else: countries[country] += pubs[pub]['latlon'][i] or_countries = countries.keys() or_countries.sort() for i in or_countries: model['downloads']['countries_list'].append(i) model['downloads']['countries'].append({"country": i, "times": countries[i]}) query_dates = {} for i in pubs[pub]['created']: this_date = i this_times = pubs[pub]['created'][i] if this_date not in query_dates: query_dates[this_date] = this_times else: query_dates[this_date] += this_times or_query_dates = query_dates.keys() or_query_dates.sort() for i in or_query_dates: model['downloads']['dates'].append({"date": i, "times": query_dates[i]}) queries = {} for i in pubs[pub]['query']: this_query = i this_values = pubs[pub]['query'][i] this_times = this_values[0] this_records = this_values[1] if this_query not in queries: queries[this_query] = [this_times, this_records] else: queries[this_query][0] += this_times for i in queries: model['downloads']['queries'].append({"query": i, "times": queries[i][0], "records": queries[i][1]}) except KeyError: # If fails, it means there have been no downloads in the period, so use default values pass # SEARCHES try: # Try adding download values searches = pubs[pub]['searches']['searches'] model['searches']['searches'] = searches records = pubs[pub]['searches']['records_searched'] model['searches']['records'] = records countries = {} for i in pubs[pub]['searches']['latlon']: lat = i[0] lon = i[1] try: country = geonames_query(lat, lon) except KeyError: country = "Unknown" if country == []: country = "Unknown" # print pub # print lat, lon, country, countries if country not in countries: countries[country] = pubs[pub]['searches']['latlon'][i] else: countries[country] += pubs[pub]['searches']['latlon'][i] or_countries = countries.keys() or_countries.sort() for i in or_countries: model['searches']['countries_list'].append(i) model['searches']['countries'].append({"country": i, "times": countries[i]}) query_dates = {} for i in pubs[pub]['searches']['created']: this_date = i this_times = pubs[pub]['searches']['created'][i] if this_date not in query_dates: query_dates[this_date] = this_times else: query_dates[this_date] += this_times or_query_dates = query_dates.keys() or_query_dates.sort() for i in or_query_dates: model['searches']['dates'].append({"date": i, "times": query_dates[i]}) queries = {} for i in pubs[pub]['searches']['query']: this_query = i this_values = pubs[pub]['searches']['query'][i] this_times = this_values[0] this_records = this_values[1] if this_query not in queries: queries[this_query] = [this_times, this_records] else: queries[this_query][0] += this_times for i in queries: model['searches']['queries'].append({"query": i, "times": queries[i][0], "records": queries[i][1]}) except KeyError: # If fails, it means there have been no searches in the period, so use default values pass return model