Example #1
0
def logs_folder_files(args, task):
  uri = BROWSE_FOLDER_FORMAT.format(logfetch_base.base_uri(args), task)
  files_json = get_json_response(uri, args, {'path' : '{0}/logs'.format(task)})
  if 'files' in files_json:
    files = files_json['files']
    return [f['name'] for f in files if logfetch_base.is_in_date_range(args, f['mtime'])]
  else:
    return [f['path'].rsplit('/')[-1] for f in files_json if logfetch_base.is_in_date_range(args, f['mtime'])]
Example #2
0
def logs_folder_files(args, task):
    uri = BROWSE_FOLDER_FORMAT.format(logfetch_base.base_uri(args), task)
    files_json = logfetch_base.get_json_response(uri, args, {"path": "{0}/logs".format(task)}, True)
    if "files" in files_json:
        files = files_json["files"]
        return [f["name"] for f in files if logfetch_base.is_in_date_range(args, f["mtime"])]
    else:
        return [f["path"].rsplit("/")[-1] for f in files_json if logfetch_base.is_in_date_range(args, f["mtime"])]
Example #3
0
def log_file_in_date_range(args, log_file):
    if 'startTime' in log_file:
        if 'endTime' in log_file:
            return logfetch_base.date_range_overlaps(args, int(str(log_file['startTime'])[0:-3]), int(str(log_file['endTime'])[0:-3]))
        else:
            return logfetch_base.date_range_overlaps(args, int(str(log_file['startTime'])[0:-3]), int(str(log_file['lastModified'])[0:-3]))
    elif 'endTime' in log_file:
        return logfetch_base.is_in_date_range(args, int(str(log_file['endTime'])[0:-3]))
    else:
        return logfetch_base.is_in_date_range(args, int(str(log_file['lastModified'])[0:-3]))
Example #4
0
def logs_folder_files(args, task):
    uri = BROWSE_FOLDER_FORMAT.format(logfetch_base.base_uri(args), task)
    files_json = get_json_response(uri, {'path': '{0}/logs'.format(task)})
    if 'files' in files_json:
        files = files_json['files']
        return [
            f['name'] for f in files
            if logfetch_base.is_in_date_range(args, f['mtime'])
        ]
    else:
        return [
            f['path'].rsplit('/')[-1] for f in files_json
            if logfetch_base.is_in_date_range(args, f['mtime'])
        ]
Example #5
0
def in_date_range(args, filename):
    timestamps = re.findall(r"\d{13}", filename)
    if timestamps:
        return logfetch_base.is_in_date_range(args,
                                              int(str(timestamps[-1])[0:-3]))
    else:
        return True
Example #6
0
def in_date_range(args, filename):
    timestamps = re.findall(r"-\d{13}-", filename)
    if timestamps:
        return is_in_date_range(
            args, int(str(timestamps[-1]).replace("-", "")[0:-3]))
    else:
        return True
Example #7
0
def log_file_in_date_range(args, log_file):
    if 'startTime' in log_file:
        if 'endTime' in log_file:
            return logfetch_base.date_range_overlaps(
                args, int(str(log_file['startTime'])[0:-3]),
                int(str(log_file['endTime'])[0:-3]))
        else:
            return logfetch_base.date_range_overlaps(
                args, int(str(log_file['startTime'])[0:-3]),
                int(str(log_file['lastModified'])[0:-3]))
    elif 'endTime' in log_file:
        return logfetch_base.is_in_date_range(
            args, int(str(log_file['endTime'])[0:-3]))
    else:
        return logfetch_base.is_in_date_range(
            args, int(str(log_file['lastModified'])[0:-3]))
Example #8
0
def download_s3_logs(args):
  sys.stderr.write(colored('Checking for S3 log files', 'cyan') + '\n')
  logs = logs_for_all_requests(args)
  async_requests = []
  zipped_files = []
  all_logs = []
  for log_file in logs:
    filename = log_file['key'].rsplit("/", 1)[1]
    if logfetch_base.is_in_date_range(args, time_from_filename(filename)):
      if not already_downloaded(args.dest, filename):
        async_requests.append(
          grequests.AsyncRequest('GET', log_file['getUrl'], callback=generate_callback(log_file['getUrl'], args.dest, filename, args.chunk_size, args.verbose))
        )
      else:
        if args.verbose:
          sys.stderr.write(colored('Log already downloaded {0}'.format(filename), 'magenta') + '\n')
        all_logs.append('{0}/{1}'.format(args.dest, filename.replace('.gz', '.log')))
      zipped_files.append('{0}/{1}'.format(args.dest, filename))
    else:
      if args.verbose:
        sys.stderr.write(colored('Excluding {0}, not in date range'.format(filename), 'magenta') + '\n')
  if async_requests:
    sys.stderr.write(colored('Starting S3 Downloads with {0} parallel fetches'.format(args.num_parallel_fetches), 'cyan'))
    grequests.map(async_requests, stream=True, size=args.num_parallel_fetches)
  else:
    sys.stderr.write(colored('No S3 logs to download', 'cyan'))
  sys.stderr.write(colored('\nUnpacking S3 logs\n', 'cyan'))
  all_logs = all_logs + logfetch_base.unpack_logs(args, zipped_files)
  sys.stderr.write(colored('All S3 logs up to date', 'cyan') + '\n')
  return all_logs
Example #9
0
def download_s3_logs(args):
    if not args.silent:
        sys.stderr.write(colored('Checking for S3 log files', 'cyan') + '\n')
    callbacks.progress = 0
    logs = logs_for_all_requests(args)
    async_requests = []
    all_logs = []
    for log_file in logs:
        filename = log_file['key'].rsplit("/", 1)[1]
        if logfetch_base.is_in_date_range(args, int(str(log_file['lastModified'])[0:-3])):
            if not args.logtype or log_matches(args, filename):
                logfetch_base.log(colored('Including log {0}'.format(filename), 'blue') + '\n', args, True)
                if not already_downloaded(args.dest, filename):
                    async_requests.append(
                        grequests.AsyncRequest('GET', log_file['getUrl'], callback=callbacks.generate_callback(log_file['getUrl'], args.dest, filename, args.chunk_size, args.verbose, args.silent), headers=args.headers)
                    )
                else:
                    logfetch_base.log(colored('Log already downloaded {0}'.format(filename), 'blue') + '\n', args, True)
                all_logs.append('{0}/{1}'.format(args.dest, filename))
            else:
                logfetch_base.log(colored('Excluding {0} log does not match logtype argument {1}'.format(filename, args.logtype), 'magenta') + '\n', args, True)
        else:
            logfetch_base.log(colored('Excluding {0}, not in date range'.format(filename), 'magenta') + '\n', args, True)
    if async_requests:
        logfetch_base.log(colored('Starting {0} S3 Downloads with {1} parallel fetches\n'.format(len(async_requests), args.num_parallel_fetches), 'cyan'), args, False)
        callbacks.goal = len(async_requests)
        grequests.map(async_requests, stream=True, size=args.num_parallel_fetches)
    else:
        logfetch_base.log(colored('No S3 logs to download\n', 'cyan'), args, False)
    logfetch_base.log(colored('All S3 logs up to date\n', 'cyan'), args, False)
    all_logs = modify_download_list(all_logs)
    return all_logs
Example #10
0
def valid_logfile(args, fileData):
    is_in_range = logfetch_base.is_in_date_range(args, fileData['mtime'])
    not_a_directory = not fileData['mode'].startswith('d')
    is_a_logfile = fnmatch.fnmatch(
        fileData['name'], '*.log') or fnmatch.fnmatch(
            fileData['name'], '*.out') or fnmatch.fnmatch(
                fileData['name'], '*.err')
    return is_in_range and not_a_directory and is_a_logfile
Example #11
0
def download_s3_logs(args):
    if not args.silent:
        sys.stderr.write(colored("Checking for S3 log files", "cyan") + "\n")
    callbacks.progress = 0
    logs = logs_for_all_requests(args)
    async_requests = []
    all_logs = []
    for log_file in logs:
        filename = log_file["key"].rsplit("/", 1)[1]
        if logfetch_base.is_in_date_range(args, int(str(log_file["lastModified"])[0:-3])):
            if not args.logtype or log_matches(args, filename):
                logfetch_base.log(colored("Including log {0}".format(filename), "blue") + "\n", args, True)
                if not already_downloaded(args.dest, filename):
                    async_requests.append(
                        grequests.AsyncRequest(
                            "GET",
                            log_file["getUrl"],
                            callback=callbacks.generate_callback(
                                log_file["getUrl"], args.dest, filename, args.chunk_size, args.verbose, args.silent
                            ),
                            headers=args.headers,
                        )
                    )
                else:
                    logfetch_base.log(colored("Log already downloaded {0}".format(filename), "blue") + "\n", args, True)
                all_logs.append("{0}/{1}".format(args.dest, filename))
            else:
                logfetch_base.log(
                    colored(
                        "Excluding {0} log does not match logtype argument {1}".format(filename, args.logtype),
                        "magenta",
                    )
                    + "\n",
                    args,
                    True,
                )
        else:
            logfetch_base.log(
                colored("Excluding {0}, not in date range".format(filename), "magenta") + "\n", args, True
            )
    if async_requests:
        logfetch_base.log(
            colored(
                "Starting {0} S3 Downloads with {1} parallel fetches\n".format(
                    len(async_requests), args.num_parallel_fetches
                ),
                "cyan",
            ),
            args,
            False,
        )
        callbacks.goal = len(async_requests)
        grequests.map(async_requests, stream=True, size=args.num_parallel_fetches)
    else:
        logfetch_base.log(colored("No S3 logs to download\n", "cyan"), args, False)
    logfetch_base.log(colored("All S3 logs up to date\n", "cyan"), args, False)
    return all_logs
Example #12
0
def valid_logfile(args, fileData):
    is_in_range = logfetch_base.is_in_date_range(args, fileData["mtime"])
    not_a_directory = not fileData["mode"].startswith("d")
    is_a_logfile = (
        fnmatch.fnmatch(fileData["name"], "*.log")
        or fnmatch.fnmatch(fileData["name"], "*.out")
        or fnmatch.fnmatch(fileData["name"], "*.err")
    )
    return is_in_range and not_a_directory and is_a_logfile
Example #13
0
def download_s3_logs(args):
    sys.stderr.write(colored('Checking for S3 log files', 'cyan') + '\n')
    logs = logs_for_all_requests(args)
    async_requests = []
    zipped_files = []
    all_logs = []
    for log_file in logs:
        filename = log_file['key'].rsplit("/", 1)[1]
        if logfetch_base.is_in_date_range(
                args, int(str(log_file['lastModified'])[0:-3])):
            if not args.logtype or log_matches(args, filename):
                if not already_downloaded(args.dest, filename):
                    async_requests.append(
                        grequests.AsyncRequest('GET',
                                               log_file['getUrl'],
                                               callback=generate_callback(
                                                   log_file['getUrl'],
                                                   args.dest, filename,
                                                   args.chunk_size,
                                                   args.verbose),
                                               headers=args.headers))
                else:
                    if args.verbose:
                        sys.stderr.write(
                            colored(
                                'Log already downloaded {0}'.format(filename),
                                'magenta') + '\n')
                    all_logs.append('{0}/{1}'.format(
                        args.dest, filename.replace('.gz', '.log')))
                zipped_files.append('{0}/{1}'.format(args.dest, filename))
            else:
                if args.verbose:
                    sys.stderr.write(
                        colored(
                            'Excluding {0} log does not match logtype argument {1}'
                            .format(filename, args.logtype), 'magenta') + '\n')
        else:
            if args.verbose:
                sys.stderr.write(
                    colored(
                        'Excluding {0}, not in date range'.format(filename),
                        'magenta') + '\n')
    if async_requests:
        sys.stderr.write(
            colored(
                'Starting S3 Downloads with {0} parallel fetches'.format(
                    args.num_parallel_fetches), 'cyan'))
        grequests.map(async_requests,
                      stream=True,
                      size=args.num_parallel_fetches)
    else:
        sys.stderr.write(colored('No S3 logs to download', 'cyan'))
    sys.stderr.write(colored('\nUnpacking S3 logs\n', 'cyan'))
    all_logs = all_logs + logfetch_base.unpack_logs(args, zipped_files)
    sys.stderr.write(colored('All S3 logs up to date', 'cyan') + '\n')
    return all_logs
Example #14
0
def download_s3_logs(args):
  sys.stderr.write(colored('Checking for S3 log files', 'cyan') + '\n')
  logs = logs_for_all_requests(args)
  async_requests = []
  all_logs = []
  for log_file in logs:
    filename = log_file['key'].rsplit("/", 1)[1]
    if logfetch_base.is_in_date_range(args, time_from_filename(filename)):
      if not already_downloaded(args.dest, filename):
        async_requests.append(
          grequests.AsyncRequest('GET', log_file['getUrl'], callback=generate_callback(log_file['getUrl'], args.dest, filename, args.chunk_size))
        )
      all_logs.append('{0}/{1}'.format(args.dest, filename.replace('.gz', '.log')))
  if async_requests:
    sys.stderr.write(colored('Starting S3 Downloads', 'cyan'))
    grequests.map(async_requests, stream=True, size=args.num_parallel_fetches)
  zipped_files = ['{0}/{1}'.format(args.dest, log_file['key'].rsplit("/", 1)[1]) for log_file in logs]
  sys.stderr.write(colored('Unpacking S3 logs\n', 'cyan'))
  logfetch_base.unpack_logs(zipped_files)
  sys.stderr.write(colored('All S3 logs up to date', 'cyan') + '\n')
  return all_logs
Example #15
0
def is_valid_live_log(args, file_data):
    is_in_range = logfetch_base.is_in_date_range(args, file_data['mtime'])
    has_data = logfetch_base.logfile_has_data(file_data)
    return is_in_range and has_data and logfetch_base.is_valid_log(file_data)
Example #16
0
def valid_logfile(args, fileData):
    is_in_range = logfetch_base.is_in_date_range(args, fileData['mtime'])
    not_a_directory = not fileData['mode'].startswith('d')
    is_a_logfile = fnmatch.fnmatch(fileData['name'], '*.log') or fnmatch.fnmatch(fileData['name'], '*.out') or fnmatch.fnmatch(fileData['name'], '*.err')
    return is_in_range and not_a_directory and is_a_logfile
Example #17
0
def in_date_range(args, filename):
    timestamps = re.findall(r"-\d{13}-", filename)
    if timestamps:
        return is_in_date_range(args, int(str(timestamps[-1]).replace("-", "")[0:-3]))
    else:
        return True
Example #18
0
def is_valid_live_log(args, file_data):
    is_in_range = logfetch_base.is_in_date_range(args, file_data['mtime'])
    has_data = logfetch_base.logfile_has_data(file_data)
    return is_in_range and has_data and logfetch_base.is_valid_log(file_data)
Example #19
0
def in_date_range(args, filename):
    timestamps = re.findall(r"\d{13}", filename)
    if timestamps:
        return logfetch_base.is_in_date_range(args, int(str(timestamps[-1])[0:-3]))
    else:
        return True