Exemple #1
0
 def Run(self):    
   output_uri = self.GetOutput().GetUri()
   local_path = mr.UriToNfsPath(output_uri)    
   iwutil.EnsureParentPathExists(local_path)
   out = open(local_path, 'w')
   out.write(str(self.itergraph_params))
   out.close()
   return
Exemple #2
0
 def PreRunConfig(self):
   output_uri = self.config_pert.GetUri()
   local_path = mr.UriToNfsPath(output_uri)    
   iwutil.EnsureParentPathExists(local_path)     
   out = open(local_path, 'w')
   out.write(str(self.query_scorer_params))
   out.close()    
   return
Exemple #3
0
def GetChunkSizeForUri(uri):
    CHECK(py_pert.Exists(uri), 'expected uri to exist: %s' % uri)
    CHECK(
        py_pert.IsDirectory(uri),
        'Chunk size only defined for directories... See mapr docs for details')
    nfs_path = mr.UriToNfsPath(uri)
    dfs_attribute_path = '%s/.dfs_attributes' % (nfs_path)
    lines = open(dfs_attribute_path, 'r').readlines()
    #print lines
    tokens = lines[2].split('=')
    CHECK_EQ(tokens[0], 'ChunkSize')
    chunksize = long(tokens[1])
    return chunksize
Exemple #4
0
 def Run(self):
     print 'about to run pipes flow: %s' % (self.pipes_binary)
     mr_driver = self.MakeDriver()
     # set output directory property to create files with required chunk size
     if self.output_chunk_size_bytes != None:
         if not py_pert.Exists(self.output_path):
             nfs_path = mr.UriToNfsPath(self.output_path)
             os.makedirs(nfs_path)
         SetChunkSizeForUri(self.output_path, self.output_chunk_size_bytes)
         CHECK_EQ(
             GetChunkSizeForUri(self.output_path),
             self.output_chunk_size_bytes
         )  # verify the features file will have a block size of 4 GB
     status = mr_driver.Run()
     # ensure output was created with the required chunk size
     if self.output_chunk_size_bytes != None:
         # ensure the created output has the requested chunk size
         for uri in py_pert.GetShardUris(self.output_path):
             EnsureChunkSizeForUri(uri, self.output_chunk_size_bytes)
     return status
Exemple #5
0
def SetChunkSizeForUri(uri, block_size):
    CHECK_EQ(block_size % (2**16), 0)  # must be a multiple of 2**16
    CHECK_LE(
        block_size, 1024 * (2**20),
        'Currently libmaprfs has a limitation that prevents chunk sizes greater than 1GB.'
    )
    CHECK(py_pert.Exists(uri), 'expected uri to exist: %s' % uri)
    CHECK(
        py_pert.IsDirectory(uri),
        'Chunk size only defined for directories... See mapr docs for details')
    nfs_path = mr.UriToNfsPath(uri)
    dfs_attribute_path = '%s/.dfs_attributes' % (nfs_path)
    control_file = open(dfs_attribute_path, 'w')
    control_file.write(
        '# lines beginning with # are treated as comments\nCompression=true\nChunkSize=%d'
        % (block_size))
    control_file.close()
    new_block_size = GetChunkSizeForUri(uri)
    CHECK_EQ(new_block_size, block_size)
    return True