Example #1
0
 def _WillProcess(self):
     if not os.path.exists(self.cache_file):
         try:
             cloud_storage.Copy(self.canonical_url, self.cache_file)
         except cloud_storage.CloudStorageError:
             return None
     return URLFileHandle(self.canonical_url, 'file://' + self.cache_file)
def _ReadMapperGCSFile(url):
  file_handle, file_name = tempfile.mkstemp()
  try:
    cloud_storage.Copy(url, file_name)
  except cloud_storage.CloudStorageError as e:
    logging.info("Failed to copy: %s" % e)
    os.close(file_handle)
    os.unlink(file_name)
    file_name = None
  return file_name
def _ReadTracesGCSFile(url):
  file_handle, file_name = tempfile.mkstemp()
  file_urls = []
  try:
    cloud_storage.Copy(url, file_name)
    with open(file_name, 'r') as f:
      file_urls = json.loads(f.read())
  except cloud_storage.CloudStorageError as e:
    logging.info("Failed to copy: %s" % e)
  finally:
    os.close(file_handle)
    os.unlink(file_name)
  return file_urls
def Main(argv):
  parser = argparse.ArgumentParser(description=_DEFAULT_DESCRIPTION)
  parser.add_argument('map_file_url')
  parser.add_argument('map_function_name')
  parser.add_argument('input_url')
  parser.add_argument('output_url')
  parser.add_argument('--jobs', type=int, default=1)

  args = parser.parse_args(argv[1:])

  map_file = _ReadMapperGCSFile(args.map_file_url)
  if not map_file:
    parser.error('Map does not exist.')

  if not args.map_function_name:
    parser.error('Must provide map function name.')

  temp_directory = tempfile.mkdtemp()
  _, file_name = tempfile.mkstemp()
  ofile = open(file_name, 'w')

  try:
    output_formatter = json_output_formatter.JSONOutputFormatter(ofile)
    map_function_module = function_handle.ModuleToLoad(
        filename=os.path.abspath(map_file))
    map_function_handle = function_handle.FunctionHandle(
        modules_to_load=[map_function_module],
        function_name=args.map_function_name)

    trace_handles = _DownloadTraceHandles(args.input_url, temp_directory)
    runner = map_runner.MapRunner(trace_handles, map_function_handle,
                                  jobs=args.jobs,
                                  output_formatters=[output_formatter])
    results = runner.Run()

    # TODO: gsutil cp file_name gs://output
    cloud_storage.Copy(file_name, args.output_url)

    if not results.had_failures:
      return 0
    else:
      return 255
  finally:
    ofile.close()
    os.unlink(map_file)
    shutil.rmtree(temp_directory)