def test_add_file(self, mock_create_dataset): """Test adding a new file to a labbook""" class DummyContext(object): def __init__(self, file_handle): self.dataset_loader = None self.files = {'uploadChunk': file_handle} client = Client(mock_create_dataset[3], middleware=[DataloaderMiddleware()]) # Create file to upload test_file = os.path.join(tempfile.gettempdir(), "myValidFile.dat") est_size = 9000000 try: os.remove(test_file) except: pass with open(test_file, 'wb') as tf: tf.write(os.urandom(est_size)) new_file_size = os.path.getsize(tf.name) # Get upload params chunk_size = 4194000 file_info = os.stat(test_file) file_size = int(file_info.st_size / 1000) total_chunks = int(math.ceil(file_info.st_size / chunk_size)) ds = InventoryManager(mock_create_dataset[0]).load_dataset( 'default', 'default', 'dataset1') fsc = HostFilesystemCache(ds, 'default') target_file = os.path.join(fsc.current_revision_dir, "myValidFile.dat") txid = "000-unitest-transaction" with open(test_file, 'rb') as tf: # Check for file to exist (shouldn't yet) assert os.path.exists(target_file) is False for chunk_index in range(total_chunks): # Upload a chunk chunk = io.BytesIO() chunk.write(tf.read(chunk_size)) chunk.seek(0) file = FileStorage(chunk) query = f""" mutation addDatasetFile{{ addDatasetFile(input:{{owner:"default", datasetName: "dataset1", filePath: "myValidFile.dat", transactionId: "{txid}", chunkUploadParams:{{ uploadId: "fdsfdsfdsfdfs", chunkSize: {chunk_size}, totalChunks: {total_chunks}, chunkIndex: {chunk_index}, fileSizeKb: {file_size}, filename: "{os.path.basename(test_file)}" }} }}) {{ newDatasetFileEdge {{ node{{ id key isDir size }} }} }} }} """ r = client.execute(query, context_value=DummyContext(file)) assert 'errors' not in r # So, these will only be populated once the last chunk is uploaded. Will be None otherwise. assert r['data']['addDatasetFile']['newDatasetFileEdge']['node'][ 'isDir'] is False assert r['data']['addDatasetFile']['newDatasetFileEdge']['node'][ 'key'] == 'myValidFile.dat' assert r['data']['addDatasetFile']['newDatasetFileEdge']['node'][ 'size'] == f"{new_file_size}" # When done uploading, file should exist in the labbook assert os.path.exists(target_file) assert os.path.isfile(target_file) complete_query = f""" mutation completeQuery {{ completeDatasetUploadTransaction(input: {{ owner: "default", datasetName: "dataset1", transactionId: "{txid}" }}) {{ success }} }} """ r = client.execute(complete_query, context_value=DummyContext(file)) assert 'errors' not in r m = Manifest(ds, 'default') status = m.status() assert len(status.created) == 0 assert len(status.modified) == 0 assert len(status.deleted) == 0 assert 'Uploaded 1 new file(s)' in ds.git.log()[0]['message']
def test_add_file(self, mock_create_dataset): """Test adding a new file to a dataset""" class DummyContext(object): def __init__(self, file_handle): self.dataset_loader = None self.labbook_loader = None self.files = {'uploadChunk': file_handle} def dispatcher_mock(self, function_ref, kwargs, metadata): assert kwargs['logged_in_username'] == 'default' assert kwargs['logged_in_email'] == '*****@*****.**' assert kwargs['dataset_owner'] == 'default' assert kwargs['dataset_name'] == 'dataset1' # Inject mocked config file kwargs['config_file'] = mock_create_dataset[0] # Stop patching so job gets scheduled for real dispatcher_patch.stop() # Call same method as in mutation d = Dispatcher() kwargs['dispatcher'] = Dispatcher res = d.dispatch_task(gtmcore.dispatcher.dataset_jobs. complete_dataset_upload_transaction, kwargs=kwargs, metadata=metadata) return res client = Client(mock_create_dataset[3], middleware=[DataloaderMiddleware()]) # Create file to upload test_file = os.path.join(tempfile.gettempdir(), "myValidFile.dat") est_size = 9000000 try: os.remove(test_file) except: pass with open(test_file, 'wb') as tf: tf.write(os.urandom(est_size)) new_file_size = os.path.getsize(tf.name) # Get upload params chunk_size = 4194000 file_info = os.stat(test_file) file_size = int(file_info.st_size / 1000) total_chunks = int(math.ceil(file_info.st_size / chunk_size)) ds = InventoryManager(mock_create_dataset[0]).load_dataset( 'default', 'default', 'dataset1') fsc = HostFilesystemCache(ds, 'default') target_file = os.path.join(fsc.current_revision_dir, "myValidFile.dat") txid = "000-unitest-transaction" with open(test_file, 'rb') as tf: # Check for file to exist (shouldn't yet) assert os.path.exists(target_file) is False for chunk_index in range(total_chunks): # Upload a chunk chunk = io.BytesIO() chunk.write(tf.read(chunk_size)) chunk.seek(0) file = FileStorage(chunk) query = f""" mutation addDatasetFile{{ addDatasetFile(input:{{owner:"default", datasetName: "dataset1", filePath: "myValidFile.dat", transactionId: "{txid}", chunkUploadParams:{{ uploadId: "fdsfdsfdsfdfs", chunkSize: {chunk_size}, totalChunks: {total_chunks}, chunkIndex: {chunk_index}, fileSize: "{file_size}", filename: "{os.path.basename(test_file)}" }} }}) {{ newDatasetFileEdge {{ node{{ id key isDir size }} }} }} }} """ r = client.execute(query, context_value=DummyContext(file)) assert 'errors' not in r # So, these will only be populated once the last chunk is uploaded. Will be None otherwise. assert r['data']['addDatasetFile']['newDatasetFileEdge']['node'][ 'isDir'] is False assert r['data']['addDatasetFile']['newDatasetFileEdge']['node'][ 'key'] == 'myValidFile.dat' assert r['data']['addDatasetFile']['newDatasetFileEdge']['node'][ 'size'] == f"{new_file_size}" # When done uploading, file should exist in the labbook assert os.path.exists(target_file) assert os.path.isfile(target_file) complete_query = f""" mutation completeQuery {{ completeDatasetUploadTransaction(input: {{ owner: "default", datasetName: "dataset1", transactionId: "{txid}" }}) {{ backgroundJobKey }} }} """ # Patch dispatch_task so you can inject the mocked config file dispatcher_patch = patch.object(Dispatcher, 'dispatch_task', dispatcher_mock) dispatcher_patch.start() r = client.execute(complete_query, context_value=DummyContext(None)) assert 'errors' not in r job_query = f""" {{ jobStatus(jobId: "{r['data']['completeDatasetUploadTransaction']['backgroundJobKey']}") {{ status result status jobMetadata failureMessage startedAt finishedAt }} }} """ cnt = 0 while cnt < 20: job_result = client.execute(job_query, context_value=DummyContext(None)) assert 'errors' not in job_result if job_result['data']['jobStatus']['status'] == 'finished': break time.sleep(.25) assert cnt < 20 metadata = json.loads(job_result['data']['jobStatus']['jobMetadata']) assert metadata['percent_complete'] == 100 assert metadata[ 'feedback'] == 'Please wait while file contents are analyzed. 9 MB of 9 MB complete...' # Verify file was added and repo is clean m = Manifest(ds, 'default') status = m.status() assert len(status.created) == 0 assert len(status.modified) == 0 assert len(status.deleted) == 0 assert 'Uploaded 1 new file(s)' in ds.git.log()[0]['message']