def Run(self): output_uri = self.GetOutput().GetUri() local_path = mr.UriToNfsPath(output_uri) iwutil.EnsureParentPathExists(local_path) out = open(local_path, 'w') out.write(str(self.itergraph_params)) out.close() return
def PreRunConfig(self): output_uri = self.config_pert.GetUri() local_path = mr.UriToNfsPath(output_uri) iwutil.EnsureParentPathExists(local_path) out = open(local_path, 'w') out.write(str(self.query_scorer_params)) out.close() return
def GetChunkSizeForUri(uri): CHECK(py_pert.Exists(uri), 'expected uri to exist: %s' % uri) CHECK( py_pert.IsDirectory(uri), 'Chunk size only defined for directories... See mapr docs for details') nfs_path = mr.UriToNfsPath(uri) dfs_attribute_path = '%s/.dfs_attributes' % (nfs_path) lines = open(dfs_attribute_path, 'r').readlines() #print lines tokens = lines[2].split('=') CHECK_EQ(tokens[0], 'ChunkSize') chunksize = long(tokens[1]) return chunksize
def Run(self): print 'about to run pipes flow: %s' % (self.pipes_binary) mr_driver = self.MakeDriver() # set output directory property to create files with required chunk size if self.output_chunk_size_bytes != None: if not py_pert.Exists(self.output_path): nfs_path = mr.UriToNfsPath(self.output_path) os.makedirs(nfs_path) SetChunkSizeForUri(self.output_path, self.output_chunk_size_bytes) CHECK_EQ( GetChunkSizeForUri(self.output_path), self.output_chunk_size_bytes ) # verify the features file will have a block size of 4 GB status = mr_driver.Run() # ensure output was created with the required chunk size if self.output_chunk_size_bytes != None: # ensure the created output has the requested chunk size for uri in py_pert.GetShardUris(self.output_path): EnsureChunkSizeForUri(uri, self.output_chunk_size_bytes) return status
def SetChunkSizeForUri(uri, block_size): CHECK_EQ(block_size % (2**16), 0) # must be a multiple of 2**16 CHECK_LE( block_size, 1024 * (2**20), 'Currently libmaprfs has a limitation that prevents chunk sizes greater than 1GB.' ) CHECK(py_pert.Exists(uri), 'expected uri to exist: %s' % uri) CHECK( py_pert.IsDirectory(uri), 'Chunk size only defined for directories... See mapr docs for details') nfs_path = mr.UriToNfsPath(uri) dfs_attribute_path = '%s/.dfs_attributes' % (nfs_path) control_file = open(dfs_attribute_path, 'w') control_file.write( '# lines beginning with # are treated as comments\nCompression=true\nChunkSize=%d' % (block_size)) control_file.close() new_block_size = GetChunkSizeForUri(uri) CHECK_EQ(new_block_size, block_size) return True