Ejemplo n.º 1
0
  def run(self, job_name, input_files):


    empty = True
    for filename in input_files:
      if files.stat(filename).st_size > 0:
        empty = False
        break
    if empty:
      self.complete([])
      return

    shard_number = len(input_files)
    output_files = []
    for i in range(shard_number):
      blob_file_name = (job_name + "-shuffle-output-" + str(i))
      file_name = files.blobstore.create(
          _blobinfo_uploaded_filename=blob_file_name)
      output_files.append(file_name)
    self.fill(self.outputs._output_files, output_files)
    files.shuffler.shuffle("%s-%s" % (job_name, int(time.time())),
                           input_files,
                           output_files,
                           {
                               "url": self.get_callback_url(),
                               "method": "GET",
                               "queue": self.queue_name,
                               "version": os.environ["CURRENT_VERSION_ID"],
                           })
Ejemplo n.º 2
0
    def run(self, job_name, input_files):

        empty = True
        for filename in input_files:
            if files.stat(filename).st_size > 0:
                empty = False
                break
        if empty:
            self.complete([])
            return

        shard_number = len(input_files)
        output_files = []
        for i in range(shard_number):
            blob_file_name = (job_name + "-shuffle-output-" + str(i))
            file_name = files.blobstore.create(
                _blobinfo_uploaded_filename=blob_file_name)
            output_files.append(file_name)
        self.fill(self.outputs._output_files, output_files)

        target = modules.get_current_version_name()
        module_name = modules.get_current_module_name()
        if module_name != "default":

            target = "%s.%s." % (target, module_name)

        files.shuffler.shuffle(
            "%s-%s" % (job_name, int(time.time())), input_files, output_files,
            {
                "url": self.get_callback_url(),
                "method": "GET",
                "queue": self.queue_name,
                "version": target,
            })
Ejemplo n.º 3
0
    def run(self, job_name, input_files):

        empty = True
        for filename in input_files:
            if files.stat(filename).st_size > 0:
                empty = False
                break
        if empty:
            self.complete([])
            return

        shard_number = len(input_files)
        output_files = []
        for i in range(shard_number):
            blob_file_name = (job_name + "-shuffle-output-" + str(i))
            file_name = files.blobstore.create(
                _blobinfo_uploaded_filename=blob_file_name)
            output_files.append(file_name)
        self.fill(self.outputs._output_files, output_files)
        files.shuffler.shuffle(
            "%s-%s" % (job_name, int(time.time())), input_files, output_files,
            {
                "url": self.get_callback_url(),
                "method": "GET",
                "queue": self.queue_name,
                "version": os.environ["CURRENT_VERSION_ID"],
            })
Ejemplo n.º 4
0
    def testShuffleNoFile(self):
        p = shuffler.ShufflePipeline("testjob", [])
        p.start()
        test_support.execute_until_empty(self.taskqueue)

        p = shuffler.ShufflePipeline.from_id(p.pipeline_id)
        for filename in p.outputs.default.value:
            self.assertEqual(0, files.stat(filename).st_size)
  def testShuffleNoFile(self):
    p = shuffler.ShufflePipeline(
        "testjob", [])
    p.start()
    test_support.execute_until_empty(self.taskqueue)

    p = shuffler.ShufflePipeline.from_id(p.pipeline_id)
    for filename in p.outputs.default.value:
      self.assertEqual(0, files.stat(filename).st_size)
  def testShuffleNoData(self):
    input_file = files.blobstore.create()
    files.finalize(input_file)
    input_file = files.blobstore.get_file_name(
        files.blobstore.get_blob_key(input_file))

    p = shuffler.ShufflePipeline(
        "testjob", [input_file, input_file, input_file])
    p.start()
    test_support.execute_until_empty(self.taskqueue)

    p = shuffler.ShufflePipeline.from_id(p.pipeline_id)
    for filename in p.outputs.default.value:
      self.assertEqual(0, files.stat(filename).st_size)
Ejemplo n.º 7
0
def file_insert(key, value):
    FILEPATH = BUCKET + '/' + key
    write_path = files.gs.create(FILEPATH, mime_type='text/plain',
                                             acl='public-read')
    # Write to the file.
    with files.open(write_path, 'a') as fp:
        fp.write(value)

    # Finalize the file so it is readable in Google Cloud Storage.
    files.finalize(write_path)
    filemeta = files.stat(FILEPATH)

    if (filemeta.st_size) < 100* 1024 and MEMCACHED_ENABLED: # cache small files
        memcache.set(key, value)
Ejemplo n.º 8
0
    def testShuffleNoData(self):
        input_file = files.blobstore.create()
        files.finalize(input_file)
        input_file = files.blobstore.get_file_name(
            files.blobstore.get_blob_key(input_file))

        p = shuffler.ShufflePipeline("testjob",
                                     [input_file, input_file, input_file])
        p.start()
        test_support.execute_until_empty(self.taskqueue)

        p = shuffler.ShufflePipeline.from_id(p.pipeline_id)
        for filename in p.outputs.default.value:
            self.assertEqual(0, files.stat(filename).st_size)
Ejemplo n.º 9
0
 def get(self):
     #self.response.headers['Content-Type'] = 'text/html'
     allobj = listing()
     size = 0
     count = 0
     for f in allobj:
         s = files.stat(BUCKET + '/' + f)
         size += s.st_size
         count += 1
     template_values = {
             'listing' : allobj,
             'total_size': size,
             'num_files': count
             }
     template = JINJA_ENVIRONMENT.get_template('index.html')
     self.response.write(template.render(template_values))
Ejemplo n.º 10
0
    def run(self, job_name, input_files):
        # Return immediately if we have no content to shuffle.
        # Big shuffler can not handle no input.
        empty = True
        for filename in input_files:
            if files.stat(filename).st_size > 0:
                empty = False
                break
        if empty:
            self.complete([])
            return

        shard_number = len(input_files)
        output_files = []
        for i in range(shard_number):
            blob_file_name = (job_name + "-shuffle-output-" + str(i))
            file_name = files.blobstore.create(
                _blobinfo_uploaded_filename=blob_file_name)
            output_files.append(file_name)
        self.fill(self.outputs._output_files, output_files)

        # Support shuffler callbacks going to specific modules and
        # specific non-default versions of those modules.
        target = modules.get_current_version_name()
        module_name = modules.get_current_module_name()
        if module_name != "default":
            # NOTE(user): The final dot is necessary here because old versions
            # of the shuffler library would put "myversion.12345678" in this field,
            # expecting the admin-shuffler app to remove the timestamp suffix.
            target = "%s.%s." % (target, module_name)

        files.shuffler.shuffle(
            "%s-%s" % (job_name, int(time.time())),
            input_files,
            output_files,
            {
                "url": self.get_callback_url(),
                # NOTE(user): This is always GET because of
                # how the admin_shuffler app adds the callback
                # task with additional URL params.
                "method": "GET",
                "queue": self.queue_name,
                "version": target,
            })
  def run(self, job_name, input_files):
    # Return immediately if we have no content to shuffle.
    # Big shuffler can not handle no input.
    empty = True
    for filename in input_files:
      if files.stat(filename).st_size > 0:
        empty = False
        break
    if empty:
      self.complete([])
      return

    shard_number = len(input_files)
    output_files = []
    for i in range(shard_number):
      blob_file_name = (job_name + "-shuffle-output-" + str(i))
      file_name = files.blobstore.create(
          _blobinfo_uploaded_filename=blob_file_name)
      output_files.append(file_name)
    self.fill(self.outputs._output_files, output_files)

    # Support shuffler callbacks going to specific modules and
    # specific non-default versions of those modules.
    target = modules.get_current_version_name()
    module_name = modules.get_current_module_name()
    if module_name != "default":
      # NOTE(user): The final dot is necessary here because old versions
      # of the shuffler library would put "myversion.12345678" in this field,
      # expecting the admin-shuffler app to remove the timestamp suffix.
      target = "%s.%s." % (target, module_name)

    files.shuffler.shuffle("%s-%s" % (job_name, int(time.time())),
                           input_files,
                           output_files,
                           {
                               "url": self.get_callback_url(),
                               # NOTE(user): This is always GET because of
                               # how the admin_shuffler app adds the callback
                               # task with additional URL params.
                               "method": "GET",
                               "queue": self.queue_name,
                               "version": target,
                           })
Ejemplo n.º 12
0
  def run(self, job_name, input_files):


    empty = True
    for filename in input_files:
      if files.stat(filename).st_size > 0:
        empty = False
        break
    if empty:
      self.complete([])
      return

    shard_number = len(input_files)
    output_files = []
    for i in range(shard_number):
      blob_file_name = (job_name + "-shuffle-output-" + str(i))
      file_name = files.blobstore.create(
          _blobinfo_uploaded_filename=blob_file_name)
      output_files.append(file_name)
    self.fill(self.outputs._output_files, output_files)



    target = modules.get_current_version_name()
    module_name = modules.get_current_module_name()
    if module_name != "default":



      target = "%s.%s." % (target, module_name)

    files.shuffler.shuffle("%s-%s" % (job_name, int(time.time())),
                           input_files,
                           output_files,
                           {
                               "url": self.get_callback_url(),



                               "method": "GET",
                               "queue": self.queue_name,
                               "version": target,
                           })