コード例 #1
0
ファイル: ka_report.py プロジェクト: Khan/analytics
def report_memcache_statistics(stats, download_dt, graphite_host,
                               verbose=False, dry_run=False):
    """Store memcache statistics in mongo and maybe graphite.

    Arguments:
      stats: Dict returned by parsers.Memcache.statistics().
      download_dt: Datetime when /memcache was downloaded.
      graphite_host: host:port of graphite server to send data to, or ''/None
      verbose: If True, print report to stdout.
      dry_run: If True, do not store report in the database.
    """
    record = {'utc_datetime': download_dt,
              'hit_count': stats['hit_count'].value(),
              'miss_count': stats['miss_count'].value(),
              'hit_ratio': stats['hit_ratio'].value(),
              'item_count': stats['item_count'].value(),
              'total_cache_size_bytes': stats['total_cache_size'].value(),
              'oldest_item_age_seconds': stats['oldest_item_age'].value(),
              }
    if verbose:
        print record

    if not dry_run:
        graphite_util.maybe_send_to_graphite(graphite_host, 'memcache',
                                             [record])
コード例 #2
0
def report_instance_summary(summary,
                            module,
                            download_dt,
                            graphite_host,
                            verbose=False,
                            dry_run=False):
    """Send instance summary to graphite.

    Arguments:
      summary: Dict returned by parsers.InstanceSummary.summary().
      module: the name of the GAE module that this summary has info for.
      download_dt: Datetime when /instance_summary was downloaded.
      graphite_host: host:port of graphite server to send data to, or ''/None
      verbose: If True, print report to stdout.
      dry_run: If True, do not store report in the database.
    """
    record = {
        'utc_datetime': download_dt,
        'num_instances': summary['total_instances'],
        'average_qps': summary['average_qps'],
        'average_latency_ms': summary['average_latency_ms'],
        'average_memory_mb': summary['average_memory_mb'],
    }
    if verbose:
        print record

    if not dry_run:
        graphite_util.maybe_send_to_graphite(graphite_host,
                                             'instances', [record],
                                             module=module)
コード例 #3
0
def report_memcache_statistics(stats,
                               download_dt,
                               graphite_host,
                               verbose=False,
                               dry_run=False):
    """Store memcache statistics in mongo and maybe graphite.

    Arguments:
      stats: Dict returned by parsers.Memcache.statistics().
      download_dt: Datetime when /memcache was downloaded.
      graphite_host: host:port of graphite server to send data to, or ''/None
      verbose: If True, print report to stdout.
      dry_run: If True, do not store report in the database.
    """
    record = {
        'utc_datetime': download_dt,
        'hit_count': stats['hit_count'].value(),
        'miss_count': stats['miss_count'].value(),
        'hit_ratio': stats['hit_ratio'].value(),
        'item_count': stats['item_count'].value(),
        'total_cache_size_bytes': stats['total_cache_size'].value(),
    }
    if 'oldest_item_age' in stats:
        record['oldest_item_age_seconds'] = stats['oldest_item_age'].value()

    if verbose:
        print record

    if not dry_run:
        graphite_util.maybe_send_to_graphite(graphite_host, 'memcache',
                                             [record])
コード例 #4
0
ファイル: ka_report.py プロジェクト: arunpn/analytics
def report_instance_summary(summary,
                            download_dt,
                            graphite_host='',
                            verbose=False,
                            dry_run=False):
    """Store instance summary in mongo and maybe graphite.

    Arguments:
      summary: Dict returned by parsers.InstanceSummary.summary().
      download_dt: Datetime when /instance_summary was downloaded.
      graphite_host: host:port of graphite server to send data to, or ''/None
      verbose: If True, print report to stdout.
      dry_run: If True, do not store report in the database.
    """
    record = {
        'utc_datetime': download_dt,
        'num_instances': summary['total_instances'],
        'average_qps': summary['average_qps'],
        'average_latency_ms': summary['average_latency_ms'],
        'average_memory_mb': summary['average_memory_mb'],
    }
    if verbose:
        print record

    if not dry_run:
        # Do the graphite send first, since mongo modifies 'records' in place.
        graphite_util.maybe_send_to_graphite(graphite_host, 'instances',
                                             [record])
        _mongo_db()['gae_dashboard_instance_reports'].insert(record)
コード例 #5
0
ファイル: ka_report.py プロジェクト: arunpn/analytics
def report_instance_summary(summary, download_dt, graphite_host='',
                            verbose=False, dry_run=False):
    """Store instance summary in mongo and maybe graphite.

    Arguments:
      summary: Dict returned by parsers.InstanceSummary.summary().
      download_dt: Datetime when /instance_summary was downloaded.
      graphite_host: host:port of graphite server to send data to, or ''/None
      verbose: If True, print report to stdout.
      dry_run: If True, do not store report in the database.
    """
    record = {'utc_datetime': download_dt,
              'num_instances': summary['total_instances'],
              'average_qps': summary['average_qps'],
              'average_latency_ms': summary['average_latency_ms'],
              'average_memory_mb': summary['average_memory_mb'],
              }
    if verbose:
        print record

    if not dry_run:
        # Do the graphite send first, since mongo modifies 'records' in place.
        graphite_util.maybe_send_to_graphite(graphite_host, 'instances',
                                             [record])
        _mongo_db()['gae_dashboard_instance_reports'].insert(record)
コード例 #6
0
def main(csv_iter):
    """Parse App Engine usage report CSV and bring a mongo db collection
    up-to-date with it.

    csv_input is any object that returns a line of the usage report CSV for
    each iteration. This includes the header line containing field names.
    """
    parser = argparse.ArgumentParser(description=__doc__.split('\n\n', 1)[0])
    parser.add_argument('--graphite_host',
                        default='carbon.hostedgraphite.com:2004',
                        help=('host:port to send stats to graphite '
                              '(using the pickle protocol). '
                              '[default: %(default)s]'))
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        default=False,
                        help='print report on stdout')
    parser.add_argument('-n',
                        '--dry-run',
                        action='store_true',
                        default=False,
                        help='do not store report in the database')
    args = parser.parse_args()

    csvreader = csv.DictReader(csv_iter)

    start_date = _time_t_of_latest_record()
    if start_date is None:
        print 'No record of previous fetches; importing all records as new.'
        start_date = datetime.date(2000, 1, 1)
    else:
        start_date = datetime.date.fromtimestamp(start_date)
    start_date = start_date.strftime('%Y-%m-%d')

    print 'Importing usage reports starting from %s' % start_date

    records_to_add = []
    for (dt, key, value) in _reports_since_dt(csvreader, start_date):
        records_to_add.append({'utc_datetime': dt, _munge_key(key): value})

    if args.verbose:
        print records_to_add

    print 'Importing %s documents' % len(records_to_add)

    if args.dry_run:
        print 'Skipping import during dry-run.'
        records_to_add = []
    elif records_to_add:
        graphite_util.maybe_send_to_graphite(args.graphite_host, 'usage',
                                             records_to_add)

    if records_to_add:
        _write_time_t_of_latest_record(records_to_add)
コード例 #7
0
def main(csv_iter):
    """Parse App Engine usage report CSV and bring a mongo db collection
    up-to-date with it.

    csv_input is any object that returns a line of the usage report CSV for
    each iteration. This includes the header line containing field names.
    """
    yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1)

    parser = argparse.ArgumentParser(description=__doc__.split('\n\n', 1)[0])
    parser.add_argument('start_date',
                        nargs='?',
                        default=yesterday.strftime('%Y-%m-%d'),
                        help=('Ignore data before this date (YYYY-MM-DD) '
                              '[default: %(default)s]'))
    parser.add_argument('--graphite_host',
                        default='carbon.hostedgraphite.com:2004',
                        help=('host:port to send stats to graphite '
                              '(using the pickle protocol). '
                              '[default: %(default)s]'))
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        default=False,
                        help='print report on stdout')
    parser.add_argument('-n',
                        '--dry-run',
                        action='store_true',
                        default=False,
                        help='do not store report in the database')
    args = parser.parse_args()

    csvreader = csv.DictReader(csv_iter)

    print 'Importing usage reports starting from %s' % args.start_date

    records_to_add = []
    for (dt, key, value) in _reports_since_dt(csvreader, args.start_date):
        records_to_add.append({'utc_datetime': dt, _munge_key(key): value})

    if args.verbose:
        print records_to_add

    print 'Importing %s documents' % len(records_to_add)

    if args.dry_run:
        print >> sys.stderr, 'Skipping import during dry-run.'
    elif records_to_add:
        graphite_util.maybe_send_to_graphite(args.graphite_host, 'usage',
                                             records_to_add)
コード例 #8
0
def main(csv_iter):
    """Parse App Engine usage report CSV and bring a mongo db collection
    up-to-date with it.

    csv_input is any object that returns a line of the usage report CSV for
    each iteration. This includes the header line containing field names.
    """
    parser = argparse.ArgumentParser(description=__doc__.split('\n\n', 1)[0])
    parser.add_argument('--graphite_host',
                        default='carbon.hostedgraphite.com:2004',
                        help=('host:port to send stats to graphite '
                              '(using the pickle protocol). '
                              '[default: %(default)s]'))
    parser.add_argument('-v', '--verbose', action='store_true', default=False,
                        help='print report on stdout')
    parser.add_argument('-n', '--dry-run', action='store_true', default=False,
                        help='do not store report in the database')
    args = parser.parse_args()

    csvreader = csv.DictReader(csv_iter)

    start_date = _time_t_of_latest_record()
    if start_date is None:
        print 'No record of previous fetches; importing all records as new.'
        start_date = datetime.date(2000, 1, 1)
    else:
        start_date = datetime.date.fromtimestamp(start_date)
    start_date = start_date.strftime('%Y-%m-%d')

    print 'Importing usage reports starting from %s' % start_date

    records_to_add = []
    for (dt, key, value) in _reports_since_dt(csvreader, start_date):
        records_to_add.append({'utc_datetime': dt, _munge_key(key): value})

    if args.verbose:
        print records_to_add

    print 'Importing %s documents' % len(records_to_add)

    if args.dry_run:
        print 'Skipping import during dry-run.'
        records_to_add = []
    elif records_to_add:
        graphite_util.maybe_send_to_graphite(args.graphite_host, 'usage',
                                             records_to_add)

    if records_to_add:
        _write_time_t_of_latest_record(records_to_add)
コード例 #9
0
ファイル: load_usage_reports.py プロジェクト: Khan/analytics
def main(csv_iter):
    """Parse App Engine usage report CSV and bring a mongo db collection
    up-to-date with it.

    csv_input is any object that returns a line of the usage report CSV for
    each iteration. This includes the header line containing field names.
    """
    yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1)

    parser = argparse.ArgumentParser(description=__doc__.split('\n\n', 1)[0])
    parser.add_argument('start_date', nargs='?',
                        default=yesterday.strftime('%Y-%m-%d'),
                        help=('Ignore data before this date (YYYY-MM-DD) '
                              '[default: %(default)s]'))
    parser.add_argument('--graphite_host',
                        default='carbon.hostedgraphite.com:2004',
                        help=('host:port to send stats to graphite '
                              '(using the pickle protocol). '
                              '[default: %(default)s]'))
    parser.add_argument('-v', '--verbose', action='store_true', default=False,
                        help='print report on stdout')
    parser.add_argument('-n', '--dry-run', action='store_true', default=False,
                        help='do not store report in the database')
    args = parser.parse_args()

    csvreader = csv.DictReader(csv_iter)

    print 'Importing usage reports starting from %s' % args.start_date

    records_to_add = []
    for (dt, key, value) in _reports_since_dt(csvreader, args.start_date):
        records_to_add.append({'utc_datetime': dt, _munge_key(key): value})

    if args.verbose:
        print records_to_add

    print 'Importing %s documents' % len(records_to_add)

    if args.dry_run:
        print >>sys.stderr, 'Skipping import during dry-run.'
    elif records_to_add:
        graphite_util.maybe_send_to_graphite(args.graphite_host, 'usage',
                                             records_to_add)
コード例 #10
0
ファイル: ka_report.py プロジェクト: Khan/analytics
def report_instance_summary(summary, download_dt, graphite_host,
                            verbose=False, dry_run=False):
    """Send instance summary to graphite.

    Arguments:
      summary: Dict returned by parsers.InstanceSummary.summary().
      download_dt: Datetime when /instance_summary was downloaded.
      graphite_host: host:port of graphite server to send data to, or ''/None
      verbose: If True, print report to stdout.
      dry_run: If True, do not store report in the database.
    """
    record = {'utc_datetime': download_dt,
              'num_instances': summary['total_instances'],
              'average_qps': summary['average_qps'],
              'average_latency_ms': summary['average_latency_ms'],
              'average_memory_mb': summary['average_memory_mb'],
              }
    if verbose:
        print record

    if not dry_run:
        graphite_util.maybe_send_to_graphite(graphite_host, 'instances',
                                             [record])
コード例 #11
0
ファイル: dashboard_report.py プロジェクト: arunpn/analytics
def parse_and_commit_record(input_json, download_time_t, graphite_host='',
                            verbose=False, dry_run=False):
    """Parse and store dashboard chart data.

    Arguments:
      input_json: A JSON list of dicts containing the chart-url for
         one chart, along with an int describing which chart it is
         and other identifying data; see the help for <infile> in main(),
         or just look at how this json is constructed in fetch_stats.sh.
      download_time_t: When /dashboard was downloaded in seconds (UTC).
      graphite_host: host:port of graphite server to send data to, or ''/None
      verbose: If True, print report to stdout.
      dry_run: If True, do not store report in the database.
    """
    # Strip off the None sentinel we add to the end of the input json.
    input_json = [j for j in input_json if j is not None]
    if not input_json:
        return

    # Extract named time series data from the raw HTML.
    named_series = {}
    for chart_json in input_json:
        chart_label_index = chart_json['chart_num']
        chart_label = _label_to_field_map.keys()[chart_label_index]

        time_label_index = chart_json['time_window']
        (time_label, time_duration) = _time_windows[time_label_index]
        time_delta = datetime.timedelta(hours=time_duration)

        chart_url = chart_json['chart_url_data']['chart_url']
        chart_data = unpack_chart_data(chart_url, time_delta.total_seconds())
        for series_label, xy_pairs in chart_data:
            field_name = lookup_field_name(chart_label, series_label)
            named_series[field_name] = xy_pairs

    # Assume all elements of our input_json list have the same time window.
    assert all(input_json[i]['time_window'] == input_json[0]['time_window']
               for i in xrange(len(input_json)))
    chart_start_time_t = download_time_t - time_delta.total_seconds()

    # Determine the starting point for records we want to add.  This
    # script may be run by cron and fetches a minimum of 6 hours of
    # data, but chances are good that it runs more frequently.
    mongo_collection = _mongo_collection()
    time_t_of_latest_record = _time_t_of_latest_record(mongo_collection)
    if time_t_of_latest_record is None:
        print >>sys.stderr, ('No dashboard records found in mongo. '
                             'Importing all records as new.')
        time_t_of_latest_record = 0

    # Build time-keyed records from the named time series data and
    # decide which records will be stored.
    records = []
    for time_value, record in aggregate_series_by_time(named_series):
        record_time_t = chart_start_time_t + time_value
        if record_time_t > time_t_of_latest_record:
            record['utc_datetime'] = datetime.datetime.utcfromtimestamp(
                record_time_t)
            records.append(record)

    if verbose:
        print records

    print >>sys.stderr, 'Importing %d record%s' % (len(records),
                                                   's'[len(records) == 1:])
    if dry_run:
        print >>sys.stderr, 'Skipping import during dry-run.'
    elif records:
        # Do the graphite send first, since mongo modifies 'records' in place.
        graphite_util.maybe_send_to_graphite(graphite_host, 'summary', records)
        mongo_collection.insert(records)
コード例 #12
0
def parse_and_commit_record(input_json,
                            start_time_t,
                            download_time_t,
                            graphite_host,
                            verbose=False,
                            dry_run=False):
    """Parse and store dashboard chart data.

    Arguments:
      input_json: A JSON list of dicts containing the chart-url for
         one chart, along with an int describing which chart it is
         and other identifying data; see the help for <infile> in main(),
         or just look at how this json is constructed in fetch_stats.sh.
      start_time_t: Ignore all datapoints before this time_t (given that
         the last datapoint is at time download_time_t).
      download_time_t: When /dashboard was downloaded in seconds (UTC).
      graphite_host: host:port of graphite server to send data to.
      verbose: If True, print report to stdout.
      dry_run: If True, do not store report in the database.
    """
    # Strip off the None sentinel we add to the end of the input json.
    input_json = [j for j in input_json if j is not None]
    if not input_json:
        return

    # Extract named time series data from the raw HTML.
    named_series = {}
    for chart_json in input_json:
        chart_label_index = chart_json['chart_num']
        chart_label = _label_to_field_map.keys()[chart_label_index]

        time_label_index = chart_json['time_window']
        (time_label, time_duration) = _time_windows[time_label_index]
        time_delta = datetime.timedelta(hours=time_duration)

        chart_url = chart_json['chart_url_data']['chart_url']
        chart_data = unpack_chart_data(chart_url, time_delta.total_seconds())
        for series_label, xy_pairs in chart_data:
            field_name = lookup_field_name(chart_label, series_label)
            named_series[field_name] = xy_pairs

    # Assume all elements of our input_json list have the same time window.
    assert all(input_json[i]['time_window'] == input_json[0]['time_window']
               for i in xrange(len(input_json)))
    chart_start_time_t = download_time_t - time_delta.total_seconds()

    # Build time-keyed records from the named time series data and
    # decide which records will be stored.
    records = []
    for time_value, record in aggregate_series_by_time(named_series):
        record_time_t = chart_start_time_t + time_value
        if record_time_t > start_time_t:
            record['utc_datetime'] = datetime.datetime.utcfromtimestamp(
                record_time_t)
            records.append(record)

    if verbose:
        print records

    print >> sys.stderr, 'Importing %d record%s' % (len(records),
                                                    's'[len(records) == 1:])
    if dry_run:
        print >> sys.stderr, 'Skipping import during dry-run.'
    elif records:
        graphite_util.maybe_send_to_graphite(graphite_host, 'summary', records)
コード例 #13
0
ファイル: dashboard_report.py プロジェクト: Khan/analytics
def parse_and_commit_record(input_json, start_time_t, download_time_t,
                            graphite_host, verbose=False, dry_run=False):
    """Parse and store dashboard chart data.

    Arguments:
      input_json: A JSON list of dicts containing the chart-url for
         one chart, along with an int describing which chart it is
         and other identifying data; see the help for <infile> in main(),
         or just look at how this json is constructed in fetch_stats.sh.
      start_time_t: Ignore all datapoints before this time_t (given that
         the last datapoint is at time download_time_t).
      download_time_t: When /dashboard was downloaded in seconds (UTC).
      graphite_host: host:port of graphite server to send data to.
      verbose: If True, print report to stdout.
      dry_run: If True, do not store report in the database.
    """
    # Strip off the None sentinel we add to the end of the input json.
    input_json = [j for j in input_json if j is not None]
    if not input_json:
        return

    # Extract named time series data from the raw HTML.
    named_series = {}
    for chart_json in input_json:
        chart_label_index = chart_json['chart_num']
        chart_label = _label_to_field_map.keys()[chart_label_index]

        time_label_index = chart_json['time_window']
        (time_label, time_duration) = _time_windows[time_label_index]
        time_delta = datetime.timedelta(hours=time_duration)

        chart_url = chart_json['chart_url_data']['chart_url']
        chart_data = unpack_chart_data(chart_url, time_delta.total_seconds())
        for series_label, xy_pairs in chart_data:
            field_name = lookup_field_name(chart_label, series_label)
            named_series[field_name] = xy_pairs

    # Assume all elements of our input_json list have the same time window.
    assert all(input_json[i]['time_window'] == input_json[0]['time_window']
               for i in xrange(len(input_json)))
    chart_start_time_t = download_time_t - time_delta.total_seconds()

    # Build time-keyed records from the named time series data and
    # decide which records will be stored.
    records = []
    for time_value, record in aggregate_series_by_time(named_series):
        record_time_t = chart_start_time_t + time_value
        if record_time_t > start_time_t:
            record['utc_datetime'] = datetime.datetime.utcfromtimestamp(
                record_time_t)
            records.append(record)

    if verbose:
        print records

    print >>sys.stderr, 'Importing %d record%s' % (len(records),
                                                   's'[len(records) == 1:])
    if dry_run:
        print >>sys.stderr, 'Skipping import during dry-run.'
    elif records:
        graphite_util.maybe_send_to_graphite(graphite_host, 'summary', records)
コード例 #14
0
def parse_and_commit_record(input_json,
                            download_time_t,
                            graphite_host='',
                            verbose=False,
                            dry_run=False):
    """Parse and store dashboard chart data.

    Arguments:
      input_json: A JSON list of dicts containing the chart-url for
         one chart, along with an int describing which chart it is
         and other identifying data; see the help for <infile> in main(),
         or just look at how this json is constructed in fetch_stats.sh.
      download_time_t: When /dashboard was downloaded in seconds (UTC).
      graphite_host: host:port of graphite server to send data to, or ''/None
      verbose: If True, print report to stdout.
      dry_run: If True, do not store report in the database.
    """
    # Strip off the None sentinel we add to the end of the input json.
    input_json = [j for j in input_json if j is not None]
    if not input_json:
        return

    # Extract named time series data from the raw HTML.
    named_series = {}
    for chart_json in input_json:
        chart_label_index = chart_json['chart_num']
        chart_label = _label_to_field_map.keys()[chart_label_index]

        time_label_index = chart_json['time_window']
        (time_label, time_duration) = _time_windows[time_label_index]
        time_delta = datetime.timedelta(hours=time_duration)

        chart_url = chart_json['chart_url_data']['chart_url']
        chart_data = unpack_chart_data(chart_url, time_delta.total_seconds())
        for series_label, xy_pairs in chart_data:
            field_name = lookup_field_name(chart_label, series_label)
            named_series[field_name] = xy_pairs

    # Assume all elements of our input_json list have the same time window.
    assert all(input_json[i]['time_window'] == input_json[0]['time_window']
               for i in xrange(len(input_json)))
    chart_start_time_t = download_time_t - time_delta.total_seconds()

    # Determine the starting point for records we want to add.  This
    # script may be run by cron and fetches a minimum of 6 hours of
    # data, but chances are good that it runs more frequently.
    mongo_collection = _mongo_collection()
    time_t_of_latest_record = _time_t_of_latest_record(mongo_collection)
    if time_t_of_latest_record is None:
        print >> sys.stderr, ('No dashboard records found in mongo. '
                              'Importing all records as new.')
        time_t_of_latest_record = 0

    # Build time-keyed records from the named time series data and
    # decide which records will be stored.
    records = []
    for time_value, record in aggregate_series_by_time(named_series):
        record_time_t = chart_start_time_t + time_value
        if record_time_t > time_t_of_latest_record:
            record['utc_datetime'] = datetime.datetime.utcfromtimestamp(
                record_time_t)
            records.append(record)

    if verbose:
        print records

    print >> sys.stderr, 'Importing %d record%s' % (len(records),
                                                    's'[len(records) == 1:])
    if dry_run:
        print >> sys.stderr, 'Skipping import during dry-run.'
    elif records:
        # Do the graphite send first, since mongo modifies 'records' in place.
        graphite_util.maybe_send_to_graphite(graphite_host, 'summary', records)
        mongo_collection.insert(records)