def calculate_funnel_data(date, period_type): data = [] # Initialise values for the period and its start # and end dates period = datecalc.get_period(date, period_type) period_start_date = datecalc.period_start_date(date, period_type) period_end_date = datecalc.period_end_date(date, period_type) # The list of calculated metrics is stored in the database. # If a calculation (C2) depends on the result of another # calculation (C1), then C2 will have a higher calcsortorder # value than C1. calculated_metrics = scraperwiki.sql.select( "* FROM metrics WHERE calculated ORDER BY calcsortorder ASC") for metric in calculated_metrics: operand1 = get_metric_value( metric["operand1metricid"], offset_period( date, period_type, float(metric["operand1periodoffset"]) ) ) operand2 = get_metric_value( metric["operand2metricid"], offset_period( date, period_type, float(metric["operand2periodoffset"]) ) ) value = do_operation(operand1, operand2, metric["operation"]) # # Debug code. # if value is not None: data.append({ "metricid": metric["metricid"], "period": period, "value": value, "period_type": period_type, "period_start_date": period_start_date.isoformat(), "period_end_date": period_end_date.isoformat() }) S.StoreRecords(data, table = "funnel", verbose = False) return True
def collect_previous_ckan_data(test_data = False): '''Syncing historical CKAN data with the newly installed database.''' # # TODO: This is a major failure point. # This collector relies on data collected # by a very old collector written in R # and hosted in ScraperWiki. # data_dir = os.path.split(dir)[0] path = os.path.join(data_dir, "temp", "ckan_data.csv") u = "https://ds-ec2.scraperwiki.com/7c6jufm/bwbcvvxuynjbrx2/cgi-bin/csv/ckan_dataset_data.csv" r = requests.get(u, stream = True) if r.status_code == 200: with open(path, 'wb') as f: r.raw.decode_content = True shutil.copyfileobj(r.raw, f) # Read file and store in database. try: print "%s Fetching CKAN historical data." % I.item('prompt_bullet') with open(path) as csvfile: reader = csv.DictReader(csvfile) records = [] for row in reader: user = { 'metricid': 'ckan-number-of-users', 'period': row["date"], 'period_start_date': row["date"], 'period_end_date': row["date"], 'period_type': "d", 'value': row["number_of_users"] } orgs = { 'metricid': 'ckan-number-of-orgs', 'period': row["date"], 'period_start_date': row["date"], 'period_end_date': row["date"], 'period_type': "d", 'value': row["number_of_organizations"] } datasets = { 'metricid': 'ckan-number-of-datasets', 'period': row["date"], 'period_start_date': row["date"], 'period_end_date': row["date"], 'period_type': "d", 'value': row["number_of_datasets"] } records.append(user) records.append(orgs) records.append(datasets) record_date = datetime.strptime(row["date"], "%Y-%m-%d") if record_date == datecalc.period_start_date(date = record_date, period_type = "w"): record_week = datecalc.get_period(date = record_date, period_type = "w") # # Adding weekly records to the # record collection. # user["period"] = record_week user["period_type"] = "w" orgs["period"] = record_week orgs["period_type"] = "w" datasets["period"] = record_week datasets["period_type"] = "w" records.append(user) records.append(orgs) records.append(datasets) # # Store records in database. # print "%s Storing CKAN historical data (%s records)." % (I.item('prompt_bullet'), len(records)) S.StoreRecords(records, table = "funnel") if test_data: return records except Exception as e: print e return False print "%s Successfully collected historic CKAN records." % I.item('prompt_success') return True