def get_monthly_term_data(term, time_start, time_end, granularity): """ Get 'month' granularity metric data for the given term and time range """ data = [] avg = 0 total = 0 max_val = 0 data_pts = 0 # Query the DB for the document containing monthly metrics db_query = {"term" : term} result = db_metric_data_monthly.find_one(db_query) # Iterate over each month in the requested time range curr_month = time_start while curr_month < time_end: if result == None: val = 1 else: val = result[str(curr_month.year)][str(curr_month.month)] data_pts += 1 data.append(val) total += val if val > max_val: max_val = val curr_month = date_util.get_next_month(curr_month) # Calculate the final average if data_pts > 0: avg = total / data_pts return data, total, avg, max_val
def write_file_counts_to_db(): # The following array is pasted in from the results of running # 'cnn_file_count.sh' counts = [ 1932, 1785, 2042, 1829, 2009, 1883, 1873, 2033, 1887, 2138, 1914, 1937, 2190, 1921, 1178, 1785, 2301, 2284, 2536, 2647, 2503, 2364, 2109, 2009, 2266, 1945, 1995, 1856, 1973, 1816, 1971, 1912, 1728, 1796, 1768, 1865, 1889, 1592, 1923, 2009, 2240, 2170, 2218, 2399, 2020, 1867, 1689, 1720, 1449, 1536, 760, 756, 762, 729, 757, 798, 758, 734, 745, 759, 715, 713, 801, 728, 789, 760, 723, 776, 760, 748, 732, 747, 709, 669, 716, 702, 764, 773, 801, 775, 611, 620, 612, 623, 635, 559, 615, 598, 628, 574, 606, 639, 591, 626, 586, 581, 614, 570, 616, 587, 620, 590, 612, 610, 614, 619, 600, 609, 620, 561, 615, 600, 601, 598, 605, 605, 618, 670, 651, 700, 686, 624, 695, 670, 677, 645, 668, 680, 665, 673, 662, 685, 692, 654, 796, 707, 740, 719, 735, 776, 745, 751, 735, 746, 760, 697, 752, 569, 0, 0, 0, 0, 0, 0, 0, 0, ] # The counts are per month, starting on Jan, 2000 curr_month = datetime(2000, 01, 1) for count in counts: count_doc = {"count": count, "published": curr_month} db_raw_articles.insert(count_doc) curr_month = date_util.get_next_month(curr_month) print "... Done"
def get_daily_term_data(term, time_start, time_end, granularity): """ Get 'daily' granularity metric data for the given term and time range """ data = [] avg = 0 total = 0 max_val = 0 data_pts = 0 # Query the DB for all documents containing the # given term and within the given date range. db_query = {"term" : term, "date" : {"$gte" : time_start}, "date" : {"$lte" : time_end}} result_set = [] result_set_idx = 0; for result in db_metric_data_daily.find(db_query).sort("date"): result_set.append(result) # Iterate over each month in the requested time range curr_month = time_start while (curr_month < time_end): # Typically the end day is the number of days in the month. # However, if this is the final month in the requested time # range, the day may be earlier based upon the end date. days_in_month = date_util.get_days_in_month( curr_month.year, curr_month.month) if date_util.is_same_month(curr_month, time_end): days_in_month = time_end.day data_pts += days_in_month # Get the next result from the result set (assume idx is in # bounds). If it matches the current month, then use its data # to build the response. Otherwise, we have a gap in the data # which should be filled with an empty result. result = None if result_set_idx < len(result_set): result = result_set[result_set_idx] if result and date_util.is_same_month(curr_month, result["date"]): # Increment result_set_idx, if not # already at the end of the list result_set_idx += 1 if result_set_idx < len(result_set) - 1 else 0 for day in range(curr_month.day, days_in_month+1): val = result["daily"][str(day)] data.append(val) total += val if (val > max_val): max_val = val else: for day in range(curr_month.day, days_in_month + 1): data.append(0) curr_month = date_util.get_next_month(curr_month) # Calculate the final average if data_pts > 0: avg = total / data_pts return data, total, avg, max_val