def logs_folder_files(args, task): uri = BROWSE_FOLDER_FORMAT.format(logfetch_base.base_uri(args), task) files_json = get_json_response(uri, args, {'path' : '{0}/logs'.format(task)}) if 'files' in files_json: files = files_json['files'] return [f['name'] for f in files if logfetch_base.is_in_date_range(args, f['mtime'])] else: return [f['path'].rsplit('/')[-1] for f in files_json if logfetch_base.is_in_date_range(args, f['mtime'])]
def logs_folder_files(args, task): uri = BROWSE_FOLDER_FORMAT.format(logfetch_base.base_uri(args), task) files_json = logfetch_base.get_json_response(uri, args, {"path": "{0}/logs".format(task)}, True) if "files" in files_json: files = files_json["files"] return [f["name"] for f in files if logfetch_base.is_in_date_range(args, f["mtime"])] else: return [f["path"].rsplit("/")[-1] for f in files_json if logfetch_base.is_in_date_range(args, f["mtime"])]
def log_file_in_date_range(args, log_file): if 'startTime' in log_file: if 'endTime' in log_file: return logfetch_base.date_range_overlaps(args, int(str(log_file['startTime'])[0:-3]), int(str(log_file['endTime'])[0:-3])) else: return logfetch_base.date_range_overlaps(args, int(str(log_file['startTime'])[0:-3]), int(str(log_file['lastModified'])[0:-3])) elif 'endTime' in log_file: return logfetch_base.is_in_date_range(args, int(str(log_file['endTime'])[0:-3])) else: return logfetch_base.is_in_date_range(args, int(str(log_file['lastModified'])[0:-3]))
def logs_folder_files(args, task): uri = BROWSE_FOLDER_FORMAT.format(logfetch_base.base_uri(args), task) files_json = get_json_response(uri, {'path': '{0}/logs'.format(task)}) if 'files' in files_json: files = files_json['files'] return [ f['name'] for f in files if logfetch_base.is_in_date_range(args, f['mtime']) ] else: return [ f['path'].rsplit('/')[-1] for f in files_json if logfetch_base.is_in_date_range(args, f['mtime']) ]
def in_date_range(args, filename): timestamps = re.findall(r"\d{13}", filename) if timestamps: return logfetch_base.is_in_date_range(args, int(str(timestamps[-1])[0:-3])) else: return True
def in_date_range(args, filename): timestamps = re.findall(r"-\d{13}-", filename) if timestamps: return is_in_date_range( args, int(str(timestamps[-1]).replace("-", "")[0:-3])) else: return True
def log_file_in_date_range(args, log_file): if 'startTime' in log_file: if 'endTime' in log_file: return logfetch_base.date_range_overlaps( args, int(str(log_file['startTime'])[0:-3]), int(str(log_file['endTime'])[0:-3])) else: return logfetch_base.date_range_overlaps( args, int(str(log_file['startTime'])[0:-3]), int(str(log_file['lastModified'])[0:-3])) elif 'endTime' in log_file: return logfetch_base.is_in_date_range( args, int(str(log_file['endTime'])[0:-3])) else: return logfetch_base.is_in_date_range( args, int(str(log_file['lastModified'])[0:-3]))
def download_s3_logs(args): sys.stderr.write(colored('Checking for S3 log files', 'cyan') + '\n') logs = logs_for_all_requests(args) async_requests = [] zipped_files = [] all_logs = [] for log_file in logs: filename = log_file['key'].rsplit("/", 1)[1] if logfetch_base.is_in_date_range(args, time_from_filename(filename)): if not already_downloaded(args.dest, filename): async_requests.append( grequests.AsyncRequest('GET', log_file['getUrl'], callback=generate_callback(log_file['getUrl'], args.dest, filename, args.chunk_size, args.verbose)) ) else: if args.verbose: sys.stderr.write(colored('Log already downloaded {0}'.format(filename), 'magenta') + '\n') all_logs.append('{0}/{1}'.format(args.dest, filename.replace('.gz', '.log'))) zipped_files.append('{0}/{1}'.format(args.dest, filename)) else: if args.verbose: sys.stderr.write(colored('Excluding {0}, not in date range'.format(filename), 'magenta') + '\n') if async_requests: sys.stderr.write(colored('Starting S3 Downloads with {0} parallel fetches'.format(args.num_parallel_fetches), 'cyan')) grequests.map(async_requests, stream=True, size=args.num_parallel_fetches) else: sys.stderr.write(colored('No S3 logs to download', 'cyan')) sys.stderr.write(colored('\nUnpacking S3 logs\n', 'cyan')) all_logs = all_logs + logfetch_base.unpack_logs(args, zipped_files) sys.stderr.write(colored('All S3 logs up to date', 'cyan') + '\n') return all_logs
def download_s3_logs(args): if not args.silent: sys.stderr.write(colored('Checking for S3 log files', 'cyan') + '\n') callbacks.progress = 0 logs = logs_for_all_requests(args) async_requests = [] all_logs = [] for log_file in logs: filename = log_file['key'].rsplit("/", 1)[1] if logfetch_base.is_in_date_range(args, int(str(log_file['lastModified'])[0:-3])): if not args.logtype or log_matches(args, filename): logfetch_base.log(colored('Including log {0}'.format(filename), 'blue') + '\n', args, True) if not already_downloaded(args.dest, filename): async_requests.append( grequests.AsyncRequest('GET', log_file['getUrl'], callback=callbacks.generate_callback(log_file['getUrl'], args.dest, filename, args.chunk_size, args.verbose, args.silent), headers=args.headers) ) else: logfetch_base.log(colored('Log already downloaded {0}'.format(filename), 'blue') + '\n', args, True) all_logs.append('{0}/{1}'.format(args.dest, filename)) else: logfetch_base.log(colored('Excluding {0} log does not match logtype argument {1}'.format(filename, args.logtype), 'magenta') + '\n', args, True) else: logfetch_base.log(colored('Excluding {0}, not in date range'.format(filename), 'magenta') + '\n', args, True) if async_requests: logfetch_base.log(colored('Starting {0} S3 Downloads with {1} parallel fetches\n'.format(len(async_requests), args.num_parallel_fetches), 'cyan'), args, False) callbacks.goal = len(async_requests) grequests.map(async_requests, stream=True, size=args.num_parallel_fetches) else: logfetch_base.log(colored('No S3 logs to download\n', 'cyan'), args, False) logfetch_base.log(colored('All S3 logs up to date\n', 'cyan'), args, False) all_logs = modify_download_list(all_logs) return all_logs
def valid_logfile(args, fileData): is_in_range = logfetch_base.is_in_date_range(args, fileData['mtime']) not_a_directory = not fileData['mode'].startswith('d') is_a_logfile = fnmatch.fnmatch( fileData['name'], '*.log') or fnmatch.fnmatch( fileData['name'], '*.out') or fnmatch.fnmatch( fileData['name'], '*.err') return is_in_range and not_a_directory and is_a_logfile
def download_s3_logs(args): if not args.silent: sys.stderr.write(colored("Checking for S3 log files", "cyan") + "\n") callbacks.progress = 0 logs = logs_for_all_requests(args) async_requests = [] all_logs = [] for log_file in logs: filename = log_file["key"].rsplit("/", 1)[1] if logfetch_base.is_in_date_range(args, int(str(log_file["lastModified"])[0:-3])): if not args.logtype or log_matches(args, filename): logfetch_base.log(colored("Including log {0}".format(filename), "blue") + "\n", args, True) if not already_downloaded(args.dest, filename): async_requests.append( grequests.AsyncRequest( "GET", log_file["getUrl"], callback=callbacks.generate_callback( log_file["getUrl"], args.dest, filename, args.chunk_size, args.verbose, args.silent ), headers=args.headers, ) ) else: logfetch_base.log(colored("Log already downloaded {0}".format(filename), "blue") + "\n", args, True) all_logs.append("{0}/{1}".format(args.dest, filename)) else: logfetch_base.log( colored( "Excluding {0} log does not match logtype argument {1}".format(filename, args.logtype), "magenta", ) + "\n", args, True, ) else: logfetch_base.log( colored("Excluding {0}, not in date range".format(filename), "magenta") + "\n", args, True ) if async_requests: logfetch_base.log( colored( "Starting {0} S3 Downloads with {1} parallel fetches\n".format( len(async_requests), args.num_parallel_fetches ), "cyan", ), args, False, ) callbacks.goal = len(async_requests) grequests.map(async_requests, stream=True, size=args.num_parallel_fetches) else: logfetch_base.log(colored("No S3 logs to download\n", "cyan"), args, False) logfetch_base.log(colored("All S3 logs up to date\n", "cyan"), args, False) return all_logs
def valid_logfile(args, fileData): is_in_range = logfetch_base.is_in_date_range(args, fileData["mtime"]) not_a_directory = not fileData["mode"].startswith("d") is_a_logfile = ( fnmatch.fnmatch(fileData["name"], "*.log") or fnmatch.fnmatch(fileData["name"], "*.out") or fnmatch.fnmatch(fileData["name"], "*.err") ) return is_in_range and not_a_directory and is_a_logfile
def download_s3_logs(args): sys.stderr.write(colored('Checking for S3 log files', 'cyan') + '\n') logs = logs_for_all_requests(args) async_requests = [] zipped_files = [] all_logs = [] for log_file in logs: filename = log_file['key'].rsplit("/", 1)[1] if logfetch_base.is_in_date_range( args, int(str(log_file['lastModified'])[0:-3])): if not args.logtype or log_matches(args, filename): if not already_downloaded(args.dest, filename): async_requests.append( grequests.AsyncRequest('GET', log_file['getUrl'], callback=generate_callback( log_file['getUrl'], args.dest, filename, args.chunk_size, args.verbose), headers=args.headers)) else: if args.verbose: sys.stderr.write( colored( 'Log already downloaded {0}'.format(filename), 'magenta') + '\n') all_logs.append('{0}/{1}'.format( args.dest, filename.replace('.gz', '.log'))) zipped_files.append('{0}/{1}'.format(args.dest, filename)) else: if args.verbose: sys.stderr.write( colored( 'Excluding {0} log does not match logtype argument {1}' .format(filename, args.logtype), 'magenta') + '\n') else: if args.verbose: sys.stderr.write( colored( 'Excluding {0}, not in date range'.format(filename), 'magenta') + '\n') if async_requests: sys.stderr.write( colored( 'Starting S3 Downloads with {0} parallel fetches'.format( args.num_parallel_fetches), 'cyan')) grequests.map(async_requests, stream=True, size=args.num_parallel_fetches) else: sys.stderr.write(colored('No S3 logs to download', 'cyan')) sys.stderr.write(colored('\nUnpacking S3 logs\n', 'cyan')) all_logs = all_logs + logfetch_base.unpack_logs(args, zipped_files) sys.stderr.write(colored('All S3 logs up to date', 'cyan') + '\n') return all_logs
def download_s3_logs(args): sys.stderr.write(colored('Checking for S3 log files', 'cyan') + '\n') logs = logs_for_all_requests(args) async_requests = [] all_logs = [] for log_file in logs: filename = log_file['key'].rsplit("/", 1)[1] if logfetch_base.is_in_date_range(args, time_from_filename(filename)): if not already_downloaded(args.dest, filename): async_requests.append( grequests.AsyncRequest('GET', log_file['getUrl'], callback=generate_callback(log_file['getUrl'], args.dest, filename, args.chunk_size)) ) all_logs.append('{0}/{1}'.format(args.dest, filename.replace('.gz', '.log'))) if async_requests: sys.stderr.write(colored('Starting S3 Downloads', 'cyan')) grequests.map(async_requests, stream=True, size=args.num_parallel_fetches) zipped_files = ['{0}/{1}'.format(args.dest, log_file['key'].rsplit("/", 1)[1]) for log_file in logs] sys.stderr.write(colored('Unpacking S3 logs\n', 'cyan')) logfetch_base.unpack_logs(zipped_files) sys.stderr.write(colored('All S3 logs up to date', 'cyan') + '\n') return all_logs
def is_valid_live_log(args, file_data): is_in_range = logfetch_base.is_in_date_range(args, file_data['mtime']) has_data = logfetch_base.logfile_has_data(file_data) return is_in_range and has_data and logfetch_base.is_valid_log(file_data)
def valid_logfile(args, fileData): is_in_range = logfetch_base.is_in_date_range(args, fileData['mtime']) not_a_directory = not fileData['mode'].startswith('d') is_a_logfile = fnmatch.fnmatch(fileData['name'], '*.log') or fnmatch.fnmatch(fileData['name'], '*.out') or fnmatch.fnmatch(fileData['name'], '*.err') return is_in_range and not_a_directory and is_a_logfile
def in_date_range(args, filename): timestamps = re.findall(r"-\d{13}-", filename) if timestamps: return is_in_date_range(args, int(str(timestamps[-1]).replace("-", "")[0:-3])) else: return True