Esempio n. 1
0
def get_monthly_term_data(term, time_start, time_end, granularity):
    """ Get 'month' granularity metric data for the given term and
        time range

    """
    data = []
    avg = 0 
    total = 0
    max_val = 0 
    data_pts = 0

    # Query the DB for the document containing monthly metrics
    db_query = {"term" : term}
    result = db_metric_data_monthly.find_one(db_query)

    # Iterate over each month in the requested time range
    curr_month = time_start
    while curr_month < time_end:

        if result == None:
            val = 1
        else:
            val = result[str(curr_month.year)][str(curr_month.month)]
        
        data_pts += 1
        data.append(val)
        total += val
        if val > max_val:
            max_val = val
        
        curr_month = date_util.get_next_month(curr_month)

    # Calculate the final average
    if data_pts > 0:
        avg = total / data_pts

    return data, total, avg, max_val
Esempio n. 2
0
def write_file_counts_to_db():

    # The following array is pasted in from the results of running
    # 'cnn_file_count.sh'
    counts = [
        1932,
        1785,
        2042,
        1829,
        2009,
        1883,
        1873,
        2033,
        1887,
        2138,
        1914,
        1937,
        2190,
        1921,
        1178,
        1785,
        2301,
        2284,
        2536,
        2647,
        2503,
        2364,
        2109,
        2009,
        2266,
        1945,
        1995,
        1856,
        1973,
        1816,
        1971,
        1912,
        1728,
        1796,
        1768,
        1865,
        1889,
        1592,
        1923,
        2009,
        2240,
        2170,
        2218,
        2399,
        2020,
        1867,
        1689,
        1720,
        1449,
        1536,
        760,
        756,
        762,
        729,
        757,
        798,
        758,
        734,
        745,
        759,
        715,
        713,
        801,
        728,
        789,
        760,
        723,
        776,
        760,
        748,
        732,
        747,
        709,
        669,
        716,
        702,
        764,
        773,
        801,
        775,
        611,
        620,
        612,
        623,
        635,
        559,
        615,
        598,
        628,
        574,
        606,
        639,
        591,
        626,
        586,
        581,
        614,
        570,
        616,
        587,
        620,
        590,
        612,
        610,
        614,
        619,
        600,
        609,
        620,
        561,
        615,
        600,
        601,
        598,
        605,
        605,
        618,
        670,
        651,
        700,
        686,
        624,
        695,
        670,
        677,
        645,
        668,
        680,
        665,
        673,
        662,
        685,
        692,
        654,
        796,
        707,
        740,
        719,
        735,
        776,
        745,
        751,
        735,
        746,
        760,
        697,
        752,
        569,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
    ]

    # The counts are per month, starting on Jan, 2000
    curr_month = datetime(2000, 01, 1)
    for count in counts:
        count_doc = {"count": count, "published": curr_month}
        db_raw_articles.insert(count_doc)
        curr_month = date_util.get_next_month(curr_month)

    print "... Done"
Esempio n. 3
0
def get_daily_term_data(term, time_start, time_end, granularity):
    """ Get 'daily' granularity metric data for the given term and
        time range

    """
    data = []
    avg = 0 
    total = 0
    max_val = 0 
    data_pts = 0

    # Query the DB for all documents containing the
    # given term and within the given date range.
    db_query =   {"term" : term,
        "date" : {"$gte" : time_start},
        "date" : {"$lte" : time_end}}

    result_set = []
    result_set_idx = 0;
    for result in db_metric_data_daily.find(db_query).sort("date"):
        result_set.append(result)

    # Iterate over each month in the requested time range
    curr_month = time_start
    while (curr_month < time_end):

        # Typically the end day is the number of days in the month.
        # However, if this is the final month in the requested time
        # range, the day may be earlier based upon the end date.
        days_in_month = date_util.get_days_in_month(
                curr_month.year, curr_month.month)

        if date_util.is_same_month(curr_month, time_end):
            days_in_month = time_end.day

        data_pts += days_in_month

        # Get the next result from the result set (assume idx is in
        # bounds). If it matches the current month, then use its data
        # to build the response.  Otherwise, we have a gap in the data
        # which should be filled with an empty result.
        result = None
        if result_set_idx < len(result_set):
            result = result_set[result_set_idx]

        if result and date_util.is_same_month(curr_month, result["date"]):
            # Increment result_set_idx, if not 
            # already at the end of the list
            result_set_idx += 1 if result_set_idx < len(result_set) - 1 else 0
            for day in range(curr_month.day, days_in_month+1):
                val = result["daily"][str(day)]
                data.append(val)
                total += val
                if (val > max_val):
                    max_val = val
        else:
            for day in range(curr_month.day, days_in_month + 1):
                data.append(0)

        curr_month = date_util.get_next_month(curr_month)

    # Calculate the final average
    if data_pts > 0:
        avg = total / data_pts

    return data, total, avg, max_val