def _WillProcess(self): if not os.path.exists(self.cache_file): try: cloud_storage.Copy(self.canonical_url, self.cache_file) except cloud_storage.CloudStorageError: return None return URLFileHandle(self.canonical_url, 'file://' + self.cache_file)
def _ReadMapperGCSFile(url): file_handle, file_name = tempfile.mkstemp() try: cloud_storage.Copy(url, file_name) except cloud_storage.CloudStorageError as e: logging.info("Failed to copy: %s" % e) os.close(file_handle) os.unlink(file_name) file_name = None return file_name
def _ReadTracesGCSFile(url): file_handle, file_name = tempfile.mkstemp() file_urls = [] try: cloud_storage.Copy(url, file_name) with open(file_name, 'r') as f: file_urls = json.loads(f.read()) except cloud_storage.CloudStorageError as e: logging.info("Failed to copy: %s" % e) finally: os.close(file_handle) os.unlink(file_name) return file_urls
def Main(argv): parser = argparse.ArgumentParser(description=_DEFAULT_DESCRIPTION) parser.add_argument('map_file_url') parser.add_argument('map_function_name') parser.add_argument('input_url') parser.add_argument('output_url') parser.add_argument('--jobs', type=int, default=1) args = parser.parse_args(argv[1:]) map_file = _ReadMapperGCSFile(args.map_file_url) if not map_file: parser.error('Map does not exist.') if not args.map_function_name: parser.error('Must provide map function name.') temp_directory = tempfile.mkdtemp() _, file_name = tempfile.mkstemp() ofile = open(file_name, 'w') try: output_formatter = json_output_formatter.JSONOutputFormatter(ofile) map_function_module = function_handle.ModuleToLoad( filename=os.path.abspath(map_file)) map_function_handle = function_handle.FunctionHandle( modules_to_load=[map_function_module], function_name=args.map_function_name) trace_handles = _DownloadTraceHandles(args.input_url, temp_directory) runner = map_runner.MapRunner(trace_handles, map_function_handle, jobs=args.jobs, output_formatters=[output_formatter]) results = runner.Run() # TODO: gsutil cp file_name gs://output cloud_storage.Copy(file_name, args.output_url) if not results.had_failures: return 0 else: return 255 finally: ofile.close() os.unlink(map_file) shutil.rmtree(temp_directory)