# this method is used to get the size of the files def createDs3Obj(fileName): size = os.stat(fileName).st_size ds3ObjName = "prefix/" + fileName fileMap[ds3ObjName] = fileName return ds3.FileObject(ds3ObjName, size) # get the sizes for each file fileList = ds3.FileObjectList(list(map(createDs3Obj, fileList))) # submit the put bulk request to DS3 bulkResult = client.put_bulk_job_spectra_s3( ds3.PutBulkJobSpectraS3Request(bucketName, fileList)) # the bulk request will split the files over several chunks if it needs to # we need to iterate over the chunks, ask the server for space to send # the chunks, then send all the objects returned in the chunk for chunk in bulkResult.result['ObjectsList']: allocateChunk = client.allocate_job_chunk_spectra_s3( ds3.AllocateJobChunkSpectraS3Request(chunk['ChunkId'])) for obj in allocateChunk.result['ObjectList']: objectDataStream = open(fileMap[obj['Name']], "rb") client.put_object( ds3.PutObjectRequest(bucketName, obj['Name'], obj['Length'], objectDataStream, offset=int(obj['Offset']),
# this method is used to map a file path to a Ds3PutObject def fileNameToDs3PutObject(filePath, prefix=""): size = os.stat(pathForResource(filePath)).st_size return ds3.Ds3PutObject(prefix + filePath, size) # this method is used to get the os specific path for an object located in the resources folder def pathForResource(resourceName): currentPath = os.path.dirname(str(__file__)) return os.path.join(currentPath, "resources", resourceName) # get the sizes for each file fileList = list(map(fileNameToDs3PutObject, fileList)) # submit the put bulk request to DS3 bulkResult = client.put_bulk_job_spectra_s3(ds3.PutBulkJobSpectraS3Request(bucketName, fileList)) # the bulk request will split the files over several chunks if it needs to. # we then need to ask what chunks we can send, and then send them making # sure we don't resend the same chunks # create a set of the chunk ids which will be used to track # what chunks have not been sent chunkIds = set([x['ChunkId'] for x in bulkResult.result['ObjectsList']]) # while we still have chunks to send while len(chunkIds) > 0: # get a list of the available chunks that we can send availableChunks = client.get_job_chunks_ready_for_client_processing_spectra_s3( ds3.GetJobChunksReadyForClientProcessingSpectraS3Request(bulkResult.result['JobId']))