Esempio n. 1
0
def offset_period(date, period_type, offset):
    """Offset period by the number of periods periods number of period_types to period"""

    flag = period_type[0:].lower()

    if flag == datecalc.PeriodTypes.Day:
        return datecalc.get_period(date + datetime.timedelta(days=offset), period_type)
    elif flag == datecalc.PeriodTypes.Week:
        return datecalc.get_period(date + datetime.timedelta(weeks=offset), period_type)
    elif flag == datecalc.PeriodTypes.Month:
        return datecalc.get_period(datecalc.add_months(date, int(offset)), period_type)
    elif flag == datecalc.PeriodTypes.Quarter:
        return datecalc.get_period(datecalc.add_months(date, int(offset) * 3), period_type)
    elif flag == datecalc.PeriodTypes.Year:
        return datecalc.get_period(datecalc.add_months(date, int(offset) * 12), period_type)
    else:
        return None
Esempio n. 2
0
def calculate_funnel_data(date, period_type):

    data = []

    # Initialise values for the period and its start
    # and end dates
    period = datecalc.get_period(date, period_type)
    period_start_date = datecalc.period_start_date(date, period_type)
    period_end_date = datecalc.period_end_date(date, period_type)

    # The list of calculated metrics is stored in the database.
    # If a calculation (C2) depends on the result of another
    # calculation (C1), then C2 will have a higher calcsortorder
    # value than C1.

    calculated_metrics = scraperwiki.sql.select(
        "* FROM metrics WHERE calculated ORDER BY calcsortorder ASC")

    for metric in calculated_metrics:
        operand1 = get_metric_value(
            metric["operand1metricid"],
            offset_period(
                date,
                period_type,
                float(metric["operand1periodoffset"])
            )
        )
        operand2 = get_metric_value(
            metric["operand2metricid"],
            offset_period(
                date,
                period_type,
                float(metric["operand2periodoffset"])
            )
        )
        value = do_operation(operand1, operand2, metric["operation"])

        #
        #  Debug code.
        #
        if value is not None:
            data.append({
                "metricid": metric["metricid"],
                "period": period,
                "value": value,
                "period_type": period_type,
                "period_start_date": period_start_date.isoformat(),
                "period_end_date": period_end_date.isoformat()
                })

    S.StoreRecords(data, table = "funnel", verbose = False)
    return True
Esempio n. 3
0
def collect_previous_ckan_data(test_data = False):
    '''Syncing historical CKAN data with the newly installed database.'''

    #
    #  TODO: This is a major failure point.
    #  This collector relies on data collected
    #  by a very old collector written in R
    #  and hosted in ScraperWiki.
    #
    data_dir = os.path.split(dir)[0]
    path = os.path.join(data_dir, "temp", "ckan_data.csv")
    u = "https://ds-ec2.scraperwiki.com/7c6jufm/bwbcvvxuynjbrx2/cgi-bin/csv/ckan_dataset_data.csv"

    r = requests.get(u, stream = True)

    if r.status_code == 200:
        with open(path, 'wb') as f:
            r.raw.decode_content = True
            shutil.copyfileobj(r.raw, f)

    # Read file and store in database.
    try:

        print "%s Fetching CKAN historical data." % I.item('prompt_bullet')

        with open(path) as csvfile:
            reader = csv.DictReader(csvfile)
            records = []
            for row in reader:
                user = {
                    'metricid': 'ckan-number-of-users',
                    'period': row["date"],
                    'period_start_date': row["date"],
                    'period_end_date': row["date"],
                    'period_type': "d",
                    'value': row["number_of_users"]
                }
                orgs = {
                    'metricid': 'ckan-number-of-orgs',
                    'period': row["date"],
                    'period_start_date': row["date"],
                    'period_end_date': row["date"],
                    'period_type': "d",
                    'value': row["number_of_organizations"]
                }
                datasets = {
                    'metricid': 'ckan-number-of-datasets',
                    'period': row["date"],
                    'period_start_date': row["date"],
                    'period_end_date': row["date"],
                    'period_type': "d",
                    'value': row["number_of_datasets"]
                }
                records.append(user)
                records.append(orgs)
                records.append(datasets)

                record_date = datetime.strptime(row["date"], "%Y-%m-%d")
                if record_date == datecalc.period_start_date(date = record_date, period_type = "w"):
                    record_week = datecalc.get_period(date = record_date, period_type = "w")

                    #
                    #  Adding weekly records to the
                    #  record collection.
                    #
                    user["period"] = record_week
                    user["period_type"] = "w"
                    orgs["period"] = record_week
                    orgs["period_type"] = "w"
                    datasets["period"] = record_week
                    datasets["period_type"] = "w"

                    records.append(user)
                    records.append(orgs)
                    records.append(datasets)


            #
            #  Store records in database.
            #
            print "%s Storing CKAN historical data (%s records)." % (I.item('prompt_bullet'), len(records))
            S.StoreRecords(records, table = "funnel")

            if test_data:
                return records


    except Exception as e:
        print e
        return False

    print "%s Successfully collected historic CKAN records." % I.item('prompt_success')
    return True