コード例 #1
0
def fetch_entities(entity_type, is_ndb, start_date=None, end_date=None,
                   max_logs=None, index_name=None):
    """Makes a request to the main Khan Academy server to download entities.

    Arguments:
        entity_type: The appengine "Kind" for the entity to download.
        is_ndb: Whether or not the entity is an NDB model.
        start_date: A datetime object for the inclusive start time of when
            entities should have been modified to be included in the result.
        end_date: A datetime object for the exclusive end time of when
            entities should have been modified to be included in the result.
        max_logs: The maximum number of entities to return in the result set.
            Note that appengine generally does not handle beyond 10000 (it is
            an open item to fix the server code to handle this with cursors).
        index_name: The entity property to be filtered on for the date range.
    Returns:
        The raw server results from the urlfetch.
    """
    # TODO(benkomalo): move common preprocessing of server results into this
    # method as most clients will probably want to do similar things with
    # errors or deserializing of sorts.

    qs_map = filter(lambda x: x[1], [
        ('is_ndb', int(is_ndb)),
        ('dt_start', date_util.to_date_iso(start_date)),
        ('dt_end', date_util.to_date_iso(end_date)),
        ('max', max_logs),
        ('index', index_name),
    ])
    query_string = urllib.urlencode(qs_map)

    # TODO(david): Send request headers that we accept gzipped data?
    response_url = '/api/v1/dev/protobuf/%s?%s' % (entity_type, query_string)

    return oauth_util.fetch_url.fetch_url(response_url)
コード例 #2
0
ファイル: fetch_entities.py プロジェクト: mwahl/analytics
def fetch_entities(entity_type, start_date=None, end_date=None, max_logs=None):
    """Makes a request to the main Khan Academy server to download entities.
    
    Arguments:
        entity_type: The appengine "Kind" for the entity to download.
        start_date: A datetime object for the inclusive start time of when
            entities should have been modified to be included in the result.
        end_date: A datetime object for the exclusive end time of when
            entities should have been modified to be included in the result.
        max_logs: The maximum number of entities to return in the result set.
            Note that appengine generally does not handle beyond 10000 (it is
            an open item to fix the server code to handle this with cursors).
    Returns:
        The raw server results from the urlfetch.
    """
    # TODO(benkomalo): move common preprocessing of server results into this
    # method as most clients will probably want to do similar things with
    # errors or deserializing of sorts.

    qs_map = filter(lambda x: x[1], [
        ('dt_start', date_util.to_date_iso(start_date)),
        ('dt_end', date_util.to_date_iso(end_date)),
        ('max', max_logs),
    ])
    query_string = urllib.urlencode(qs_map)

    # TODO(david): Send request headers that we accept gzipped data?
    response_url = '/api/v1/dev/protobuf/%s?%s' % (entity_type, query_string)

    return oauth_util.fetch_url.fetch_url(response_url)
コード例 #3
0
ファイル: fetch_logs.py プロジェクト: bopopescu/analytics-1
def get_cmd_line_args():
    today_start = datetime.datetime.combine(datetime.date.today(),
                                            datetime.time())
    yesterday_start = today_start - datetime.timedelta(days=1)

    parser = optparse.OptionParser(
        usage="%prog [options]",
        description=("Fetches logs from khanacademy.org using its v1 API. "
                     "Outputs to stdout."))

    parser.add_option("-s",
                      "--start_date",
                      default=date_util.to_date_iso(yesterday_start),
                      help=("Earliest inclusive date of logs to fetch, "
                            "in ISO 8601 format. "
                            "Defaults to yesterday at 00:00."))
    parser.add_option("-e",
                      "--end_date",
                      default=date_util.to_date_iso(today_start),
                      help=("Latest exclusive date of logs to fetch, "
                            "in ISO 8601 format. "
                            "Defaults to today at 00:00."))
    parser.add_option("-i",
                      "--interval",
                      default=10,
                      help=("Time interval to fetch at a time, in seconds. "
                            "Defaults to 10."))
    parser.add_option("-r",
                      "--max_retries",
                      default=8,
                      help=("Maximum # of retries for request attempts "
                            "before failing. Defaults to 8."))
    parser.add_option("-v",
                      "--appengine_version",
                      default=None,
                      help=("If set, the appengine-version (e.g. "
                            "0515-ae96fc55243b) to request the logs from. "
                            "If None, will fetch from all versions (but "
                            "only from one 'class' of versions, like frontend "
                            "vs. backends)."))
    parser.add_option("-b",
                      "--backend",
                      action="store_true",
                      default=False,
                      help=("If set will try and read the logs from all of "
                            "the currently deployed logical backends (those "
                            "that end with 'backend')."))

    options, extra_args = parser.parse_args()

    if options.appengine_version and options.backend:
        parser.error("options --appengine_version and --backend are "
                     "mutually exclusive")

    if extra_args:
        sys.exit('Unknown arguments %s. See --help.' % extra_args)

    return options
コード例 #4
0
ファイル: fetch_entities.py プロジェクト: mwahl/analytics
def get_cmd_line_args():
    today_dt = dt.datetime.combine(dt.date.today(), dt.time())
    yesterday_dt = today_dt - dt.timedelta(days=1)

    parser = optparse.OptionParser(
        usage="%prog [options]",
        description=
        "Fetches problem logs from khanacademy.org using its v1 API. Outputs in pickled entities."
    )
    parser.add_option(
        "-s",
        "--start_date",
        default=date_util.to_date_iso(yesterday_dt),
        help=
        "Earliest inclusive date of logs to fetch, in ISO 8601 format. Defaults to yesterday at 00:00."
    )
    parser.add_option(
        "-e",
        "--end_date",
        default=date_util.to_date_iso(today_dt),
        help=
        "Latest exclusive date of logs to fetch, in ISO 8601 format. Defaults to today at 00:00."
    )
    parser.add_option(
        "-i",
        "--interval",
        default=10,
        help="Time interval to fetch at a time, in seconds. Defaults to 10.")
    parser.add_option(
        "-l",
        "--max_logs",
        default=1000,
        help="Maximum # of log entries to fetch per interval. Defaults to 1000."
    )
    parser.add_option(
        "-r",
        "--max_retries",
        default=8,
        help=
        "Maximum # of retries for request attempts before failing. Defaults to 8."
    )
    parser.add_option("-o",
                      "--output_file",
                      help="Name of the file to output.")
    parser.add_option("-t", "--type", help="Entity type to back up")

    options, _ = parser.parse_args()

    if not options.type:
        print >> sys.stderr, 'Please specify an entity type to back up'
        exit(1)
    if not options.output_file:
        options.output_file = options.type + ".pickle"

    return options
コード例 #5
0
ファイル: fetch_logs.py プロジェクト: prantik/analytics
def get_cmd_line_args():
    today_start = datetime.datetime.combine(datetime.date.today(),
                                            datetime.time())
    yesterday_start = today_start - datetime.timedelta(days=1)

    parser = optparse.OptionParser(
        usage="%prog [options]",
        description=("Fetches logs from khanacademy.org using its v1 API. "
                     "Outputs to stdout."))

    parser.add_option("-s", "--start_date",
                      default=date_util.to_date_iso(yesterday_start),
                      help=("Earliest inclusive date of logs to fetch, "
                            "in ISO 8601 format. "
                            "Defaults to yesterday at 00:00."))
    parser.add_option("-e", "--end_date",
                      default=date_util.to_date_iso(today_start),
                      help=("Latest exclusive date of logs to fetch, "
                            "in ISO 8601 format. "
                            "Defaults to today at 00:00."))
    parser.add_option("-i", "--interval", default=10,
                      help=("Time interval to fetch at a time, in seconds. "
                            "Defaults to 10."))
    parser.add_option("-r", "--max_retries", default=8,
                      help=("Maximum # of retries for request attempts "
                            "before failing. Defaults to 8."))
    parser.add_option("-v", "--appengine_version", default=None,
                      help=("If set, the appengine-version (e.g. "
                            "0515-ae96fc55243b) to request the logs from. "
                            "If None, will use the current active version."))
    parser.add_option("--file_for_alternate_appengine_versions", default=None,
                      help=("If set, whenever fetching logs returns 0 "
                            "loglines, look through this file to find "
                            "alternate appengine_versions, and retry "
                            "fetching the logs against those versions "
                            "until one returns non-zero results.  The "
                            "file should have text like "
                            "appengine_versions=v1,v2,v3"))
    #TODO: Figure out a better way to get the backend versions that were active
    #during the time period.
    parser.add_option("-b", "--backend", action="store_true", default=False,
                      help=("If set will try and read the logs from the "
                            "versions in the file designated by "
                            "--file_for_alternate_appengine_versions with "
                            "'mapreducebackends-' prefixed onto them. If "
                            "there is no file, then no versions will be "
                            "searched"))

    options, extra_args = parser.parse_args()
    if extra_args:
        sys.exit('Unknown arguments %s. See --help.' % extra_args)
 
    return options
コード例 #6
0
ファイル: fetch_logs.py プロジェクト: kohlmeier/analytics
def get_cmd_line_args():
    today_start = datetime.datetime.combine(datetime.date.today(),
                                            datetime.time())
    yesterday_start = today_start - datetime.timedelta(days=1)

    parser = optparse.OptionParser(
        usage="%prog [options]",
        description=("Fetches logs from khanacademy.org using its v1 API. "
                     "Outputs to stdout."))

    parser.add_option("-s", "--start_date",
                      default=date_util.to_date_iso(yesterday_start),
                      help=("Earliest inclusive date of logs to fetch, "
                            "in ISO 8601 format. "
                            "Defaults to yesterday at 00:00."))
    parser.add_option("-e", "--end_date",
                      default=date_util.to_date_iso(today_start),
                      help=("Latest exclusive date of logs to fetch, "
                            "in ISO 8601 format. "
                            "Defaults to today at 00:00."))
    parser.add_option("-i", "--interval", default=10,
                      help=("Time interval to fetch at a time, in seconds. "
                            "Defaults to 10."))
    parser.add_option("-r", "--max_retries", default=8,
                      help=("Maximum # of retries for request attempts "
                            "before failing. Defaults to 8."))
    parser.add_option("-v", "--appengine_version", default=None,
                      help=("If set, the appengine-version (e.g. "
                            "0515-ae96fc55243b) to request the logs from. "
                            "If None, will fetch from all versions (but "
                            "only from one 'class' of versions, like frontend "
                            "vs. backends)."))
    parser.add_option("-b", "--backend", action="store_true", default=False,
                      help=("If set will try and read the logs from all of "
                        "the currently deployed logical backends (those "
                        "that end with 'backend')."))

    options, extra_args = parser.parse_args()

    if options.appengine_version and options.backend:
        parser.error("options --appengine_version and --backend are "
            "mutually exclusive")

    if extra_args:
        sys.exit('Unknown arguments %s. See --help.' % extra_args)

    return options
コード例 #7
0
def get_cmd_line_args():
    today_start = datetime.datetime.combine(datetime.date.today(),
                                            datetime.time())
    yesterday_start = today_start - datetime.timedelta(days=1)

    parser = optparse.OptionParser(
        usage="%prog [options]",
        description=("Fetches logs from khanacademy.org using its v1 API. "
                     "Outputs to stdout."))

    parser.add_option("-s", "--start_date",
                      default=date_util.to_date_iso(yesterday_start),
                      help=("Earliest inclusive date of logs to fetch, "
                            "in ISO 8601 format. "
                            "Defaults to yesterday at 00:00."))
    parser.add_option("-e", "--end_date",
                      default=date_util.to_date_iso(today_start),
                      help=("Latest exclusive date of logs to fetch, "
                            "in ISO 8601 format. "
                            "Defaults to today at 00:00."))
    parser.add_option("-i", "--interval", default=10,
                      help=("Time interval to fetch at a time, in seconds. "
                            "Defaults to 10."))
    parser.add_option("-r", "--max_retries", default=8,
                      help=("Maximum # of retries for request attempts "
                            "before failing. Defaults to 8."))
    parser.add_option("-v", "--appengine_version", default=None,
                      help=("If set, the appengine-version (e.g. "
                            "0515-ae96fc55243b) to request the logs from. "
                            "If None, will use the current active version."))
    parser.add_option("--file_for_alternate_appengine_versions", default=None,
                      help=("If set, whenever fetching logs returns 0 "
                            "loglines, look through this file to find "
                            "alternate appengine_versions, and retry "
                            "fetching the logs against those versions "
                            "until one returns non-zero results.  The "
                            "file should have text like "
                            "appengine_versions=v1,v2,v3"))

    options, extra_args = parser.parse_args()
    if extra_args:
        sys.exit('This script takes no arguments!')

    return options
コード例 #8
0
ファイル: fetch_entities.py プロジェクト: mwahl/analytics
def get_cmd_line_args():
    today_dt = dt.datetime.combine(dt.date.today(), dt.time())
    yesterday_dt = today_dt - dt.timedelta(days=1)

    parser = optparse.OptionParser(
        usage="%prog [options]",
        description="Fetches problem logs from khanacademy.org using its v1 API. Outputs in pickled entities.",
    )
    parser.add_option(
        "-s",
        "--start_date",
        default=date_util.to_date_iso(yesterday_dt),
        help="Earliest inclusive date of logs to fetch, in ISO 8601 format. Defaults to yesterday at 00:00.",
    )
    parser.add_option(
        "-e",
        "--end_date",
        default=date_util.to_date_iso(today_dt),
        help="Latest exclusive date of logs to fetch, in ISO 8601 format. Defaults to today at 00:00.",
    )
    parser.add_option(
        "-i", "--interval", default=10, help="Time interval to fetch at a time, in seconds. Defaults to 10."
    )
    parser.add_option(
        "-l", "--max_logs", default=1000, help="Maximum # of log entries to fetch per interval. Defaults to 1000."
    )
    parser.add_option(
        "-r",
        "--max_retries",
        default=8,
        help="Maximum # of retries for request attempts before failing. Defaults to 8.",
    )
    parser.add_option("-o", "--output_file", help="Name of the file to output.")
    parser.add_option("-t", "--type", help="Entity type to back up")

    options, _ = parser.parse_args()

    if not options.type:
        print >>sys.stderr, "Please specify an entity type to back up"
        exit(1)
    if not options.output_file:
        options.output_file = options.type + ".pickle"

    return options