def test_twoinvalid_check_existing_file_status(): ''' Test to make sure both invalid statues get passed back ''' validation_statusdf = pd.DataFrame({ 'id': ['syn1234', 'syn2345'], 'status': ['INVALID', 'INVALID'], 'md5': ['3333', '44444'], 'name': ['first.txt', 'second.txt'] }) error_trackerdf = pd.DataFrame({ 'id': ['syn1234', 'syn2345'], 'errors': ['Invalid file format', 'Invalid formatting issues'] }) first_entity = synapseclient.Entity(name='first.txt', id='syn1234', md5='3333') second_entity = synapseclient.Entity(name='second.txt', id='syn2345', md5='44444') entities = [first_entity, second_entity] file_status = input_to_database.check_existing_file_status( mock_csv_query_result(validation_statusdf), mock_csv_query_result(error_trackerdf), entities) assert not file_status['to_validate'] assert file_status['status_list'] == ['INVALID', 'INVALID'] assert file_status['error_list'] == [ 'Invalid file format', 'Invalid formatting issues' ]
def test__find_by_obj_or_create__create(): """Tests creation""" entity = synapseclient.Entity(name=str(uuid.uuid1())) returned = synapseclient.Entity(name=str(uuid.uuid1())) with patch.object(SYN, "store", return_value=returned) as patch_syn_store: created_ent = CREATE_CLS._find_by_obj_or_create(entity) patch_syn_store.assert_called_once_with(entity, createOrUpdate=False) assert created_ent == returned
def test_create_new_fileformat_table(): fileformat = str(uuid.uuid1()) db_synid = "syn1111111" database_mappingdf = pd.DataFrame({ 'Database': [fileformat, "foo"], "Id": [db_synid, "bar"] }) db_mapping_info = {'synid': 'syn666', 'df': database_mappingdf} table_ent = synapseclient.Entity(parentId="syn123", name="foo", primaryKey=['annot'], id='syn12345') project_id = "syn234" archived_project_id = "syn23333" new_table_name = str(uuid.uuid1()) new_table_ent = synapseclient.Entity(parentId="syn123323", name="foofoo", id='syn23231') update_return = Mock() move_entity_return = Mock() with patch.object(process_functions, "get_dbmapping", return_value=db_mapping_info) as patch_getdb,\ patch.object(syn, "get", return_value=table_ent) as patch_syn_get,\ patch.object(syn, "getTableColumns", return_value=['foo', 'ddooo']) as patch_get_table_cols,\ patch.object(process_functions, "_create_schema", return_value=new_table_ent) as patch_create_schema,\ patch.object(process_functions, "_update_database_mapping", return_value=update_return) as patch_update,\ patch.object(process_functions, "_move_entity", return_value=move_entity_return) as patch_move,\ patch.object(process_functions.time, "time", return_value=2): new_table = process_functions.create_new_fileformat_table( syn, fileformat, new_table_name, project_id, archived_project_id) patch_getdb.assert_called_once_with(syn, project_id) patch_syn_get.assert_called_once_with(db_synid) patch_get_table_cols.assert_called_once_with(db_synid) patch_create_schema.assert_called_once_with( syn, table_name=new_table_name, columns=['foo', 'ddooo'], parentid=project_id, annotations=table_ent.annotations) patch_update.assert_called_once_with(syn, database_mappingdf, 'syn666', fileformat, new_table_ent.id) patch_move.assert_called_once_with(syn, table_ent, archived_project_id, name="ARCHIVED 2-foo") assert new_table == { "newdb_ent": new_table_ent, "newdb_mappingdf": update_return, "moved_ent": move_entity_return }
def test__find_by_obj_or_create__wrongcode_raise(): """Tests correct error is raised when not 409 code""" entity = synapseclient.Entity(name=str(uuid.uuid1())) returned = synapseclient.Entity(name=str(uuid.uuid1())) # Mock SynapseHTTPError with 404 response mocked_404 = SynapseHTTPError("Not Found", response=Mock(status_code=404)) with patch.object(SYN, "store", side_effect=mocked_404) as patch_syn_store,\ pytest.raises(SynapseHTTPError, match="Not Found"): CREATE_CLS._find_by_obj_or_create(entity) patch_syn_store.assert_called_once_with(entity, createOrUpdate=False)
def test__find_by_obj_or_create__onlycreate_raise(): """Tests only create flag raises error when entity exists""" entity = synapseclient.Entity(name=str(uuid.uuid1())) returned = synapseclient.Entity(name=str(uuid.uuid1())) # Mock SynapseHTTPError with 409 response mocked_409 = SynapseHTTPError("foo", response=Mock(status_code=409)) with patch.object(SYN, "store", side_effect=mocked_409) as patch_syn_store,\ pytest.raises(ValueError, match="foo. To use existing entities, " "set only_get to True."): CREATE_CLS._find_by_obj_or_create(entity) patch_syn_store.assert_called_once_with(entity, createOrUpdate=False)
def test_invalid__get_status_and_error_list(): ''' Tests the correct status and error lists received when file is invalid. ''' modified_on = 1561143558000 modified_on_string = "2019-06-21T18:59:18.456Z" entity = synapseclient.Entity(id='syn1234', md5='44444', path='/path/to/foobar.txt', name='data_clinical_supp_SAGE.txt') entity.properties.versionNumber = '1' entity.properties.modifiedOn = modified_on_string entities = [entity] filetype = "clinical" # This valid variable control the validation status valid = False errors = 'invalid file content' input_status_list, invalid_errors_list = \ input_to_database._get_status_and_error_list( valid, errors, entities) assert input_status_list == [{'entity': entity, 'status': 'INVALID'}] assert invalid_errors_list == [{'entity': entity, 'errors': errors}]
def test_dups_email_duplication_error(): ''' Test duplicated email sent ''' duplicated_filesdf = pd.DataFrame({ 'id': ['syn1234'], 'name': ['first.cbs'] }) entity = synapseclient.Entity(id='syn1234') entity.modifiedBy = '333' entity.createdBy = '333' error_email = ( "Dear {},\n\n" "Your files ({}) are duplicated! FILES SHOULD BE UPLOADED AS " "NEW VERSIONS AND THE ENTIRE DATASET SHOULD BE " "UPLOADED EVERYTIME".format("trial", "first.cbs")) with patch.object(syn, "get", return_value=entity) as patch_syn_get,\ patch.object(syn, "getUserProfile", return_value={'userName': '******'}) as patch_syn_profile,\ patch.object(syn, "sendMessage") as patch_send: input_to_database.email_duplication_error(syn, duplicated_filesdf) patch_syn_get.assert_called_once_with('syn1234') patch_syn_profile.assert_called_once_with('333') patch_send.assert_called_once_with(['333'], "GENIE Validation Error", error_email)
def test_store_assay_info_files(): """Tests storing of assay information file""" assay_infodf = pd.DataFrame({'library_strategy': ['WXS'], 'SEQ_ASSAY_ID': ['A']}) clinicaldf = pd.DataFrame({'SEQ_ASSAY_ID': ['A']}) database_to_staging.GENIE_RELEASE_DIR = "./" path = os.path.join(database_to_staging.GENIE_RELEASE_DIR, "assay_information_vTEST.txt") with patch.object(SYN, "create_snapshot_version", return_value=2) as patch_create_version,\ patch.object(process_functions, "get_syntabledf", return_value=assay_infodf) as patch_table_query,\ patch.object(database_to_staging, "store_file", return_value=synapseclient.Entity()) as patch_storefile: wes_ids = database_to_staging.store_assay_info_files(SYN, GENIE_VERSION, FILEVIEW_SYNID, clinicaldf, CONSORTIUM_SYNID) patch_create_version.assert_called_once_with(FILEVIEW_SYNID, comment=GENIE_VERSION) patch_table_query.assert_called_once_with( SYN, f"select * from {FILEVIEW_SYNID} where SEQ_ASSAY_ID in ('A')" ) patch_storefile.assert_called_once_with(SYN, path, parent=CONSORTIUM_SYNID, genieVersion=GENIE_VERSION, name="assay_information.txt", used=f"{FILEVIEW_SYNID}.2") assert wes_ids == ['A']
def test_wrong_permission_level(): with pytest.raises(ValueError, match=r'permission_level must be one of these:.*'): challengeutils.permissions._set_permissions(syn, synapseclient.Entity(), principalid="3", permission_level="foo")
def test_valid__get_status_and_error_list(): ''' Tests the correct status and error lists received when file is valid. ''' modified_on = 1561143558000 modified_on_string = "2019-06-21T18:59:18.456Z" entity = synapseclient.Entity(id='syn1234', md5='44444', path='/path/to/foobar.txt', name='data_clinical_supp_SAGE.txt') entity.properties.modifiedOn = modified_on_string entities = [entity] valid = True message = 'valid' filetype = 'clinical' input_status_list, invalid_errors_list = \ input_to_database._get_status_and_error_list( valid, message, entities) assert input_status_list == [{'entity': entity, 'status': 'VALIDATED'}] assert not invalid_errors_list
def test_specifyloc_download_submission(): ''' Download submission json object with specified location ''' entity = synapseclient.Entity(versionNumber=4, concreteType='foo', id='syn123') submission_dict = { 'entity': entity, 'evaluationId': 12345, 'filePath': '/path/here' } expected_submission_dict = { 'docker_repository': None, 'docker_digest': None, 'entity_id': entity['id'], 'entity_version': entity.get('versionNumber'), 'entity_type': entity.get('concreteType'), 'evaluation_id': 12345, 'file_path': '/path/here' } with mock.patch.object( syn, "getSubmission", return_value=submission_dict) as patch_get_submission: sub_dict = challengeutils.utils.download_submission( syn, "12345", download_location=".") patch_get_submission.assert_called_once_with("12345", downloadLocation=".") assert sub_dict == expected_submission_dict
def test_noname__move_entity(): """Tests not changing entity name""" ent = synapseclient.Entity(name="foo", parentId="syn2222") new_parent = "syn1234" with patch.object(syn, "store") as patch_syn_store: process_functions._move_entity(syn, ent, new_parent) ent.parentId = new_parent patch_syn_store.assert_called_once_with(ent)
def test_wrong_permission_level(): """Error raised if incorrect permission level is passed in""" with pytest.raises(ValueError, match=r'permission_level must be one of these:.*'): permissions._set_permissions(SYN, synapseclient.Entity(), principalid="3", permission_level="foo")
def test_valid_validatefile(): ''' Tests the behavior of a file that gets validated that becomes valid ''' validation_statusdf = pd.DataFrame() error_trackerdf = pd.DataFrame() entity = synapseclient.Entity(name="data_clinical_supp_SAGE.txt", id='syn1234', md5='44444', path='/path/to/data_clinical_supp_SAGE.txt') entity['modifiedOn'] = '2019-03-24T12:00:00.Z' # This modifiedOn translates to: 1553428800000 entity.modifiedBy = '333' entity.createdBy = '444' entities = [entity] threads = 0 valid = True message = "Is valid" filetype = "clinical" # Only a list is returned as opposed a list of lists when there are # invalid errors status_error_list_results = ([{ 'entity': entity, 'status': 'VALIDATED' }], []) expected_results = ([{ 'entity': entity, 'status': 'VALIDATED', 'fileType': filetype, 'center': center }], [], []) with patch.object(GenieValidationHelper, "determine_filetype", return_value=filetype) as patch_determine_filetype,\ patch.object(input_to_database, "check_existing_file_status", return_value={'status_list': [], 'error_list': [], 'to_validate': True}) as patch_check, \ patch.object(GenieValidationHelper, "validate_single_file", return_value=(valid, message)) as patch_validate,\ patch.object(input_to_database, "_get_status_and_error_list", return_value=status_error_list_results) as patch_get_staterror_list,\ patch.object(input_to_database, "_send_validation_error_email") as patch_send_email: validate_results = input_to_database.validatefile( syn, None, entities, validation_statusdf, error_trackerdf, center, threads, oncotree_link) assert expected_results == validate_results patch_validate.assert_called_once_with(oncotree_link=oncotree_link, nosymbol_check=False) patch_check.assert_called_once_with(validation_statusdf, error_trackerdf, entities) patch_determine_filetype.assert_called_once() patch_get_staterror_list.assert_called_once_with( valid, message, entities) patch_send_email.assert_not_called()
def test_store_gene_panel_files(): data_gene_panel = pd.DataFrame({'mutations': ['PANEL1']}) gene_paneldf = pd.DataFrame({'id': ['syn3333']}) with mock.patch.object( SYN, "tableQuery", return_value=Tablequerydf(gene_paneldf)) as patch_syn_table_query,\ mock.patch.object( database_to_staging, "store_file", return_value=synapseclient.Entity()) as patch_storefile,\ mock.patch.object( SYN, "get", return_value=synapseclient.Entity( path="/foo/bar/PANEL1.txt", versionNumber=2)) as patch_syn_get,\ mock.patch.object(os, "rename") as patch_os_rename: database_to_staging.store_gene_panel_files(SYN, FILEVIEW_SYNID, GENIE_VERSION, data_gene_panel, CONSORTIUM_SYNID, ["TEST"]) patch_syn_table_query.assert_called_once_with( "select id from %s where cBioFileFormat = 'genePanel' " "and fileStage = 'staging' and " "name not in ('data_gene_panel_TEST.txt')" % FILEVIEW_SYNID) patch_storefile.assert_called_once_with( SYN, os.path.join(database_to_staging.GENIE_RELEASE_DIR, "PANEL1_vTEST.txt"), parent=CONSORTIUM_SYNID, genieVersion=GENIE_VERSION, name="PANEL1.txt", cBioFileFormat="genePanel", used='syn3333.2') patch_syn_get.assert_called_once_with('syn3333') patch_os_rename.assert_called_once_with( "/foo/bar/PANEL1.txt", os.path.join(database_to_staging.GENIE_RELEASE_DIR, "PANEL1_vTEST.txt"))
def test_name__move_entity(): """Tests entity name is updated""" ent = synapseclient.Entity(name="foo", parentId="syn2222") new_parent = "syn1234" new_name = "updated name" with patch.object(syn, "store") as patch_syn_store: process_functions._move_entity(syn, ent, new_parent, new_name) ent.parentId = new_parent ent.name = new_name patch_syn_store.assert_called_once_with(ent)
def test__find_by_obj_or_create__get(): """Tests getting of entity""" concretetype = str(uuid.uuid1()) entity = synapseclient.Entity(name=str(uuid.uuid1()), parentId=str(uuid.uuid1()), concreteType=concretetype) returned = synapseclient.Entity(name=str(uuid.uuid1()), id=str(uuid.uuid1()), parentId=str(uuid.uuid1()), concreteType=concretetype) mocked_409 = SynapseHTTPError("foo", response=Mock(status_code=409)) with patch.object(SYN, "store", side_effect=mocked_409) as patch_syn_store,\ patch.object(GET_CLS, "_get_obj", return_value=returned) as patch_cls_get: get_ent = GET_CLS._find_by_obj_or_create(entity) assert get_ent == returned patch_syn_store.assert_called_once() patch_cls_get.assert_called_once_with(entity)
def test_invalid_check_existing_file_status(): ''' Test the values returned by input that is invalid ''' entity = synapseclient.Entity(name='second.txt', id='syn2345', md5='44444') entities = [entity] file_status = input_to_database.check_existing_file_status( mock_csv_query_result(validation_statusdf), mock_csv_query_result(error_trackerdf), entities) assert not file_status['to_validate'] assert file_status['status_list'] == ['INVALID'] assert file_status['error_list'] == ['Invalid file format']
def test_valid_check_existing_file_status(): ''' Test the values returned by input that is already valid ''' entity = synapseclient.Entity(name='first.txt', id='syn1234', md5='3333') entities = [entity] file_status = input_to_database.check_existing_file_status( mock_csv_query_result(validation_statusdf), mock_csv_query_result(error_trackerdf), entities) assert not file_status['to_validate'] assert file_status['status_list'] == ['VALID'] assert file_status['error_list'] == []
def test_diffnametovalidate_check_existing_file_status(): ''' If name is different from stored name, must re-validate file ''' entity = synapseclient.Entity(name='second.txt', id='syn1234', md5='3333') entities = [entity] file_status = input_to_database.check_existing_file_status( mock_csv_query_result(validation_statusdf), mock_csv_query_result(emptydf), entities) assert file_status['to_validate'] assert file_status['status_list'] == ['VALID'] assert file_status['error_list'] == []
def test_nostorederrors_check_existing_file_status(): ''' If there is no error uploaded, must re-validate file ''' entity = synapseclient.Entity(name='second.txt', id='syn2345', md5='44444') entities = [entity] file_status = input_to_database.check_existing_file_status( mock_csv_query_result(validation_statusdf), mock_csv_query_result(emptydf), entities) assert file_status['to_validate'] assert file_status['status_list'] == ['INVALID'] assert file_status['error_list'] == []
def test_diffmd5validate_check_existing_file_status(): ''' If md5 is different from stored md5, must re-validate file ''' entity = synapseclient.Entity(name='first.txt', id='syn1234', md5='44444') entity.properties.versionNumber = '1' entities = [entity] file_status = input_to_database.check_existing_file_status( mock_csv_query_result(validation_statusdf), mock_csv_query_result(emptydf), entities) assert file_status['to_validate'] assert file_status['status_list'] == ['VALID'] assert file_status['error_list'] == []
def test_unvalidatedinput_check_existing_file_status(): ''' Test the values returned by input that hasn't be validated ''' entity = synapseclient.Entity(id='syn1234') entities = [entity] file_status = input_to_database.check_existing_file_status( mock_csv_query_result(emptydf), mock_csv_query_result(emptydf), entities) assert file_status['to_validate'] assert file_status['status_list'] == [] assert file_status['error_list'] == []
def test_create_and_archive_maf_database(): ''' Test the creation and archive of the maf database ''' table_ent = synapseclient.Entity(parentId="syn123", name="foo", primaryKey=['annot'], id='syn12345') new_maf_ent = synapseclient.Entity(id="syn2222") database_synid_mappingdf = pd.DataFrame({ 'Database': ['vcf2maf', 'main'], 'Id': ['syn12345', 'syn23455'] }) with patch.object(syn, "store", return_value=new_maf_ent) as patch_syn_store,\ patch.object(syn, "setPermissions", return_value=None) as patch_syn_set_permissions,\ patch.object(syn, "get", return_value=table_ent) as patch_syn_get,\ patch.object(syn, "getTableColumns", return_value=['foo', 'ddooo']) as patch_syn_get_table_columns: database_mappingdf = input_to_database.create_and_archive_maf_database( syn, database_synid_mappingdf) assert database_mappingdf['Id'][ database_mappingdf['Database'] == 'vcf2maf'].values[0] \ == new_maf_ent.id assert database_mappingdf['Id'][database_mappingdf['Database'] == 'main'].values[0] == 'syn23455' patch_syn_get_table_columns.assert_called_once_with('syn12345') patch_syn_get.assert_called_once_with('syn12345') assert patch_syn_store.call_count == 3 patch_syn_set_permissions.assert_called_once_with( new_maf_ent.id, 3326313, [])
from mock import patch import pytest import challengeutils import synapseclient syn = synapseclient.Synapse() SET_PERMS = {"set"} @pytest.fixture(params=[ # tuple with (input, expectedOutput) (synapseclient.Project(), None, "view", challengeutils.permissions.ENTITY_PERMS_MAPPINGS['view']), (synapseclient.Folder(parentId="syn123"), None, "download", challengeutils.permissions.ENTITY_PERMS_MAPPINGS['download']), (synapseclient.Entity(), None, "edit", challengeutils.permissions.ENTITY_PERMS_MAPPINGS['edit']), (synapseclient.Schema(parentId="syn123"), None, "edit_and_delete", challengeutils.permissions.ENTITY_PERMS_MAPPINGS['edit_and_delete']), (synapseclient.File(parentId="syn123"), None, "admin", challengeutils.permissions.ENTITY_PERMS_MAPPINGS['admin']), (synapseclient.Entity(), None, "remove", challengeutils.permissions.ENTITY_PERMS_MAPPINGS['remove']), (synapseclient.Evaluation(contentSource="syn123"), None, "view", challengeutils.permissions.EVALUATION_PERMS_MAPPINGS['view']), (synapseclient.Evaluation(contentSource="syn123"), None, "submit", challengeutils.permissions.EVALUATION_PERMS_MAPPINGS['submit']), (synapseclient.Evaluation(contentSource="syn123"), None, "score", challengeutils.permissions.EVALUATION_PERMS_MAPPINGS['score']), (synapseclient.Evaluation(contentSource="syn123"), None, "admin", challengeutils.permissions.EVALUATION_PERMS_MAPPINGS['admin']), (synapseclient.Evaluation(contentSource="syn123"), None, "remove",
from challengeutils import permissions SYN = create_autospec(synapseclient.Synapse) SET_PERMS = {"set"} @pytest.mark.parametrize( "entity,principalid,permission_level,mapped", [ # tuple with (input, expectedOutput) (synapseclient.Project(), None, "view", permissions.ENTITY_PERMS_MAPPINGS['view']), (synapseclient.Folder(parentId="syn123"), None, "download", permissions.ENTITY_PERMS_MAPPINGS['download']), (synapseclient.Entity(), None, "edit", permissions.ENTITY_PERMS_MAPPINGS['edit']), (synapseclient.Schema(parentId="syn123"), None, "edit_and_delete", permissions.ENTITY_PERMS_MAPPINGS['edit_and_delete']), (synapseclient.File(parentId="syn123"), None, "admin", permissions.ENTITY_PERMS_MAPPINGS['admin']), (synapseclient.Entity(), None, "remove", permissions.ENTITY_PERMS_MAPPINGS['remove']), (synapseclient.Evaluation(contentSource="syn123"), None, "view", permissions.EVALUATION_PERMS_MAPPINGS['view']), (synapseclient.Evaluation(contentSource="syn123"), None, "submit", permissions.EVALUATION_PERMS_MAPPINGS['submit']), (synapseclient.Evaluation(contentSource="syn123"), None, "score", permissions.EVALUATION_PERMS_MAPPINGS['score']), (synapseclient.Evaluation(contentSource="syn123"), None, "admin", permissions.EVALUATION_PERMS_MAPPINGS['admin']),
def test_invalid_validatefile(): ''' Tests the behavior of a file that gets validated that becomes invalid ''' validation_statusdf = pd.DataFrame() error_trackerdf = pd.DataFrame(columns=['id'], dtype=str) entity = synapseclient.Entity(name="data_clinical_supp_SAGE.txt", id='syn1234', md5='44444', path='/path/to/data_clinical_supp_SAGE.txt') entity['modifiedOn'] = '2019-03-24T12:00:00.Z' # This modifiedOn translates to: 1553428800000 entity.modifiedBy = '333' entity.createdBy = '444' entities = [entity] threads = 0 valid = False message = "Is invalid" filetype = "clinical" status_error_list_results = ([{ 'entity': entity, 'status': 'INVALID' }], [{ 'entity': entity, 'errors': message }]) expected_results = ([{ 'entity': entity, 'status': 'INVALID', 'fileType': filetype, 'center': center }], [{ 'entity': entity, 'errors': message, 'fileType': filetype, 'center': center }], [(['data_clinical_supp_SAGE.txt'], 'Is invalid', ['333', '444'])]) with patch.object(ValidationHelper, "determine_filetype", return_value=filetype) as patch_determine_filetype,\ patch.object(input_to_database, "check_existing_file_status", return_value={'status_list': [], 'error_list': [], 'to_validate': True}) as patch_check, \ patch.object(ValidationHelper, "validate_single_file", return_value=(valid, message)) as patch_validate,\ patch.object(input_to_database, "_get_status_and_error_list", return_value=status_error_list_results) as patch_get_staterror_list: validate_results = input_to_database.validatefile( syn, None, entities, validation_statusdf, error_trackerdf, center, threads, validator_cls=ValidationHelper) assert expected_results == validate_results patch_validate.assert_called_once_with() patch_check.assert_called_once_with(validation_statusdf, error_trackerdf, entities) patch_determine_filetype.assert_called_once() patch_get_staterror_list.assert_called_once_with( valid, message, entities)
def test_validation(self): """Test validation steps""" modified_on = 1561143558000 process = "main" databaseToSynIdMapping = { 'Database': ["clinical", 'validationStatus', 'errorTracker'], 'Id': ['syn222', 'syn333', 'syn444'] } databaseToSynIdMappingDf = pd.DataFrame(databaseToSynIdMapping) entity = synapseclient.Entity(id='syn1234', md5='44444', path='/path/to/foobar.txt', name='data_clinical_supp_SAGE.txt') entities = [entity] filetype = "clinical" input_status_list = [[ entity.id, entity.path, entity.md5, 'VALIDATED', entity.name, modified_on, filetype, center ]] invalid_errors_list = [] messages = [] new_tables = { 'validation_statusdf': self.validation_statusdf, 'error_trackingdf': self.errors_df, 'duplicated_filesdf': self.empty_dup } validationstatus_mock = emptytable_mock() errortracking_mock = emptytable_mock() valiate_cls = Mock() with patch.object(syn, "tableQuery", side_effect=[validationstatus_mock, errortracking_mock]) as patch_query,\ patch.object(input_to_database, "validatefile", return_value=(input_status_list, invalid_errors_list, messages)) as patch_validatefile,\ patch.object(input_to_database, "build_validation_status_table", return_value=self.validation_statusdf),\ patch.object(input_to_database, "build_error_tracking_table", return_value=self.errors_df),\ patch.object(input_to_database, "_update_tables_content", return_value=new_tables),\ patch.object(input_to_database, "update_status_and_error_tables"): valid_filedf = input_to_database.validation( syn, "syn123", center, process, entities, databaseToSynIdMappingDf, oncotree_link, format_registry={"test": valiate_cls}) assert patch_query.call_count == 2 patch_validatefile.assert_called_once_with( syn, "syn123", entity, validationstatus_mock, errortracking_mock, center='SAGE', threads=1, oncotree_link=oncotree_link, format_registry={"test": valiate_cls}) assert valid_filedf.equals( self.validation_statusdf[['id', 'path', 'fileType', 'name']])
def test_already_validated_validatefile(): ''' Test already validated files ''' validation_statusdf = pd.DataFrame() error_trackerdf = pd.DataFrame() entity = synapseclient.Entity(name="data_clinical_supp_SAGE.txt", id='syn1234', md5='44444', path='/path/to/data_clinical_supp_SAGE.txt') entity['modifiedOn'] = '2019-03-24T12:00:00.Z' # This modifiedOn translates to: 1553428800000 entity.modifiedBy = '333' entity.createdBy = '444' entities = [entity] threads = 0 valid = False errors = "Invalid file" filetype = "markdown" status = "INVALID" check_file_status_dict = { 'status_list': [status], 'error_list': [errors], 'to_validate': False } status_error_list_results = ([{ 'entity': entity, 'status': status }], [{ 'entity': entity, 'errors': errors }]) expected_results = ([{ 'entity': entity, 'status': status, 'fileType': filetype, 'center': center }], [{ 'entity': entity, 'errors': errors, 'fileType': filetype, 'center': center }], []) with patch.object(GenieValidationHelper, "determine_filetype", return_value=filetype) as patch_determine_filetype,\ patch.object(input_to_database, "check_existing_file_status", return_value=check_file_status_dict) as patch_check, \ patch.object(GenieValidationHelper, "validate_single_file", return_value=(valid, errors)) as patch_validate,\ patch.object(input_to_database, "_get_status_and_error_list", return_value=status_error_list_results) as patch_get_staterror_list,\ patch.object(input_to_database, "_send_validation_error_email") as patch_send_email: validate_results = input_to_database.validatefile( syn, None, entities, validation_statusdf, error_trackerdf, center, threads, oncotree_link, ) assert expected_results == validate_results patch_validate.assert_not_called() patch_check.assert_called_once_with(validation_statusdf, error_trackerdf, entities) patch_determine_filetype.assert_called_once() patch_get_staterror_list.assert_not_called() patch_send_email.assert_not_called()
def test_validation(): ''' Test validation steps ''' validation_statusdf = pd.DataFrame({ 'id': ['syn1234'], 'status': ['VALIDATED'], 'path': ["/path/to/file"], 'fileType': ['clinical'] }) thread = 2 testing = False modified_on = 1561143558000 process = "main" databaseToSynIdMapping = { 'Database': ["clinical", 'validationStatus', 'errorTracker'], 'Id': ['syn222', 'syn333', 'syn444'] } databaseToSynIdMappingDf = pd.DataFrame(databaseToSynIdMapping) entity = synapseclient.Entity(id='syn1234', md5='44444', path='/path/to/foobar.txt', name='data_clinical_supp_SAGE.txt') entities = [entity] filetype = "clinical" input_status_list = [[ entity.id, entity.path, entity.md5, 'VALIDATED', entity.name, modified_on, filetype, center ]] invalid_errors_list = [] messages = [] validationstatus_mock = emptytable_mock() errortracking_mock = emptytable_mock() with patch.object(input_to_database, "get_center_input_files", return_value=entities) as patch_get_center,\ patch.object(syn, "tableQuery", side_effect=[validationstatus_mock, errortracking_mock]) as patch_tablequery,\ patch.object(input_to_database, "validatefile", return_value=(input_status_list, invalid_errors_list, messages)) as patch_validatefile,\ patch.object(input_to_database, "update_status_and_error_tables", return_value=validation_statusdf) as patch_update_status: valid_filedf = input_to_database.validation( syn, center, process, center_mapping_df, databaseToSynIdMappingDf, thread, testing, oncotree_link, genie.config.PROCESS_FILES) patch_get_center.assert_called_once_with(syn, center_input_synid, center, process) assert patch_tablequery.call_count == 2 patch_validatefile.assert_called_once_with( syn, entity, validationstatus_mock, errortracking_mock, center='SAGE', threads=1, testing=False, oncotree_link=oncotree_link, format_registry=genie.config.PROCESS_FILES) patch_update_status.assert_called_once_with(syn, input_status_list, [], validationstatus_mock, errortracking_mock) assert valid_filedf.equals( validation_statusdf[['id', 'path', 'fileType']])