def get_statistics_from_last_update_for_all_sources(db_root): last_metainfo_by_day = dict() # fetch the summary of the last day for every source for source_name in get_all_provider_names(db_root): p = Provider(db_root, source_name) all_days = p.get_all_days() if all_days: last_day = utils.get_latest_day(all_days) last_metainfo_by_day[source_name] = (last_day, p.get_cached_metainfos_for_day(last_day)) # not every source has data for the real last day, search for th last_days = set([v[0] for k, v in last_metainfo_by_day.items()]) real_last_day = utils.get_latest_day(last_days) # build the overall metainfos using only the source which have data for the real last day overall_metainfo = defaultdict(int) provider_count = 0 for name, data in last_metainfo_by_day.items(): day, metainfos = data if day == real_last_day: provider_count += 1 for k, v in metainfos.items(): overall_metainfo[k] += v overall_metainfo.update(dict(provider_count=provider_count)) return overall_metainfo
def get_first_and_last_date(db_root): source_names = get_all_provider_names(db_root) p = Provider(db_root, source_names[0]) all_days = p.get_all_days() return utils.make_date_from_string(all_days[0]), utils.make_date_from_string(all_days[-1])
def get_summary_from_last_update_for_all_sources(db_root): source_names = get_all_provider_names(db_root) last_update = list() for name in source_names: p = Provider(db_root, name) all_days = p.get_all_days() if all_days: last_day = utils.get_latest_day(all_days) summary = p.get_cached_metainfos_for_day(last_day) last_update.append((name, utils.make_date_from_string(last_day), summary)) return last_update