def test_twoinvalid_check_existing_file_status():
    '''
    Test to make sure both invalid statues get passed back
    '''
    validation_statusdf = pd.DataFrame({
        'id': ['syn1234', 'syn2345'],
        'status': ['INVALID', 'INVALID'],
        'md5': ['3333', '44444'],
        'name': ['first.txt', 'second.txt']
    })
    error_trackerdf = pd.DataFrame({
        'id': ['syn1234', 'syn2345'],
        'errors': ['Invalid file format', 'Invalid formatting issues']
    })
    first_entity = synapseclient.Entity(name='first.txt',
                                        id='syn1234',
                                        md5='3333')
    second_entity = synapseclient.Entity(name='second.txt',
                                         id='syn2345',
                                         md5='44444')
    entities = [first_entity, second_entity]
    file_status = input_to_database.check_existing_file_status(
        mock_csv_query_result(validation_statusdf),
        mock_csv_query_result(error_trackerdf), entities)
    assert not file_status['to_validate']
    assert file_status['status_list'] == ['INVALID', 'INVALID']
    assert file_status['error_list'] == [
        'Invalid file format', 'Invalid formatting issues'
    ]
Exemple #2
0
def test__find_by_obj_or_create__create():
    """Tests creation"""
    entity = synapseclient.Entity(name=str(uuid.uuid1()))
    returned = synapseclient.Entity(name=str(uuid.uuid1()))
    with patch.object(SYN, "store", return_value=returned) as patch_syn_store:
        created_ent = CREATE_CLS._find_by_obj_or_create(entity)
        patch_syn_store.assert_called_once_with(entity,
                                                createOrUpdate=False)
        assert created_ent == returned
def test_create_new_fileformat_table():
    fileformat = str(uuid.uuid1())
    db_synid = "syn1111111"
    database_mappingdf = pd.DataFrame({
        'Database': [fileformat, "foo"],
        "Id": [db_synid, "bar"]
    })
    db_mapping_info = {'synid': 'syn666', 'df': database_mappingdf}
    table_ent = synapseclient.Entity(parentId="syn123",
                                     name="foo",
                                     primaryKey=['annot'],
                                     id='syn12345')
    project_id = "syn234"
    archived_project_id = "syn23333"
    new_table_name = str(uuid.uuid1())

    new_table_ent = synapseclient.Entity(parentId="syn123323",
                                         name="foofoo",
                                         id='syn23231')
    update_return = Mock()
    move_entity_return = Mock()
    with patch.object(process_functions, "get_dbmapping",
                      return_value=db_mapping_info) as patch_getdb,\
         patch.object(syn, "get",
                      return_value=table_ent) as patch_syn_get,\
         patch.object(syn, "getTableColumns",
                      return_value=['foo', 'ddooo']) as patch_get_table_cols,\
         patch.object(process_functions, "_create_schema",
                      return_value=new_table_ent) as patch_create_schema,\
         patch.object(process_functions,
                      "_update_database_mapping",
                      return_value=update_return) as patch_update,\
         patch.object(process_functions, "_move_entity",
                      return_value=move_entity_return) as patch_move,\
         patch.object(process_functions.time, "time", return_value=2):
        new_table = process_functions.create_new_fileformat_table(
            syn, fileformat, new_table_name, project_id, archived_project_id)
        patch_getdb.assert_called_once_with(syn, project_id)
        patch_syn_get.assert_called_once_with(db_synid)
        patch_get_table_cols.assert_called_once_with(db_synid)
        patch_create_schema.assert_called_once_with(
            syn,
            table_name=new_table_name,
            columns=['foo', 'ddooo'],
            parentid=project_id,
            annotations=table_ent.annotations)
        patch_update.assert_called_once_with(syn, database_mappingdf, 'syn666',
                                             fileformat, new_table_ent.id)
        patch_move.assert_called_once_with(syn,
                                           table_ent,
                                           archived_project_id,
                                           name="ARCHIVED 2-foo")
        assert new_table == {
            "newdb_ent": new_table_ent,
            "newdb_mappingdf": update_return,
            "moved_ent": move_entity_return
        }
Exemple #4
0
def test__find_by_obj_or_create__wrongcode_raise():
    """Tests correct error is raised when not 409 code"""
    entity = synapseclient.Entity(name=str(uuid.uuid1()))
    returned = synapseclient.Entity(name=str(uuid.uuid1()))
    # Mock SynapseHTTPError with 404 response
    mocked_404 = SynapseHTTPError("Not Found", response=Mock(status_code=404))
    with patch.object(SYN, "store",
                      side_effect=mocked_404) as patch_syn_store,\
         pytest.raises(SynapseHTTPError, match="Not Found"):
        CREATE_CLS._find_by_obj_or_create(entity)
        patch_syn_store.assert_called_once_with(entity, createOrUpdate=False)
Exemple #5
0
def test__find_by_obj_or_create__onlycreate_raise():
    """Tests only create flag raises error when entity exists"""
    entity = synapseclient.Entity(name=str(uuid.uuid1()))
    returned = synapseclient.Entity(name=str(uuid.uuid1()))
    # Mock SynapseHTTPError with 409 response
    mocked_409 = SynapseHTTPError("foo", response=Mock(status_code=409))
    with patch.object(SYN, "store",
                      side_effect=mocked_409) as patch_syn_store,\
         pytest.raises(ValueError, match="foo. To use existing entities, "
                                         "set only_get to True."):
        CREATE_CLS._find_by_obj_or_create(entity)
        patch_syn_store.assert_called_once_with(entity, createOrUpdate=False)
def test_invalid__get_status_and_error_list():
    '''
    Tests the correct status and error lists received
    when file is invalid.
    '''
    modified_on = 1561143558000
    modified_on_string = "2019-06-21T18:59:18.456Z"
    entity = synapseclient.Entity(id='syn1234',
                                  md5='44444',
                                  path='/path/to/foobar.txt',
                                  name='data_clinical_supp_SAGE.txt')
    entity.properties.versionNumber = '1'
    entity.properties.modifiedOn = modified_on_string

    entities = [entity]
    filetype = "clinical"
    # This valid variable control the validation status
    valid = False
    errors = 'invalid file content'

    input_status_list, invalid_errors_list = \
        input_to_database._get_status_and_error_list(
            valid, errors, entities)
    assert input_status_list == [{'entity': entity, 'status': 'INVALID'}]
    assert invalid_errors_list == [{'entity': entity, 'errors': errors}]
def test_dups_email_duplication_error():
    '''
    Test duplicated email sent
    '''
    duplicated_filesdf = pd.DataFrame({
        'id': ['syn1234'],
        'name': ['first.cbs']
    })
    entity = synapseclient.Entity(id='syn1234')
    entity.modifiedBy = '333'
    entity.createdBy = '333'
    error_email = (
        "Dear {},\n\n"
        "Your files ({}) are duplicated!  FILES SHOULD BE UPLOADED AS "
        "NEW VERSIONS AND THE ENTIRE DATASET SHOULD BE "
        "UPLOADED EVERYTIME".format("trial", "first.cbs"))
    with patch.object(syn, "get", return_value=entity) as patch_syn_get,\
         patch.object(syn, "getUserProfile",
                      return_value={'userName':
                                    '******'}) as patch_syn_profile,\
         patch.object(syn, "sendMessage") as patch_send:
        input_to_database.email_duplication_error(syn, duplicated_filesdf)
        patch_syn_get.assert_called_once_with('syn1234')
        patch_syn_profile.assert_called_once_with('333')
        patch_send.assert_called_once_with(['333'], "GENIE Validation Error",
                                           error_email)
Exemple #8
0
def test_store_assay_info_files():
    """Tests storing of assay information file"""
    assay_infodf = pd.DataFrame({'library_strategy': ['WXS'],
                                 'SEQ_ASSAY_ID': ['A']})
    clinicaldf = pd.DataFrame({'SEQ_ASSAY_ID': ['A']})
    database_to_staging.GENIE_RELEASE_DIR = "./"
    path = os.path.join(database_to_staging.GENIE_RELEASE_DIR,
                        "assay_information_vTEST.txt")
    with patch.object(SYN, "create_snapshot_version",
                      return_value=2) as patch_create_version,\
         patch.object(process_functions, "get_syntabledf",
                      return_value=assay_infodf) as patch_table_query,\
         patch.object(database_to_staging, "store_file",
                      return_value=synapseclient.Entity()) as patch_storefile:
        wes_ids = database_to_staging.store_assay_info_files(SYN,
                                                             GENIE_VERSION,
                                                             FILEVIEW_SYNID,
                                                             clinicaldf,
                                                             CONSORTIUM_SYNID)
        patch_create_version.assert_called_once_with(FILEVIEW_SYNID,
                                                     comment=GENIE_VERSION)
        patch_table_query.assert_called_once_with(
            SYN,
            f"select * from {FILEVIEW_SYNID} where SEQ_ASSAY_ID in ('A')"
        )
        patch_storefile.assert_called_once_with(SYN, path,
                                                parent=CONSORTIUM_SYNID,
                                                genieVersion=GENIE_VERSION,
                                                name="assay_information.txt",
                                                used=f"{FILEVIEW_SYNID}.2")
        assert wes_ids == ['A']
Exemple #9
0
def test_wrong_permission_level():
    with pytest.raises(ValueError,
                       match=r'permission_level must be one of these:.*'):
        challengeutils.permissions._set_permissions(syn,
                                                    synapseclient.Entity(),
                                                    principalid="3",
                                                    permission_level="foo")
def test_valid__get_status_and_error_list():
    '''
    Tests the correct status and error lists received
    when file is valid.
    '''
    modified_on = 1561143558000
    modified_on_string = "2019-06-21T18:59:18.456Z"

    entity = synapseclient.Entity(id='syn1234',
                                  md5='44444',
                                  path='/path/to/foobar.txt',
                                  name='data_clinical_supp_SAGE.txt')
    entity.properties.modifiedOn = modified_on_string

    entities = [entity]

    valid = True
    message = 'valid'
    filetype = 'clinical'

    input_status_list, invalid_errors_list = \
        input_to_database._get_status_and_error_list(
           valid, message, entities)
    assert input_status_list == [{'entity': entity, 'status': 'VALIDATED'}]
    assert not invalid_errors_list
Exemple #11
0
def test_specifyloc_download_submission():
    '''
    Download submission json object with specified location
    '''
    entity = synapseclient.Entity(versionNumber=4,
                                  concreteType='foo',
                                  id='syn123')
    submission_dict = {
        'entity': entity,
        'evaluationId': 12345,
        'filePath': '/path/here'
    }
    expected_submission_dict = {
        'docker_repository': None,
        'docker_digest': None,
        'entity_id': entity['id'],
        'entity_version': entity.get('versionNumber'),
        'entity_type': entity.get('concreteType'),
        'evaluation_id': 12345,
        'file_path': '/path/here'
    }
    with mock.patch.object(
            syn, "getSubmission",
            return_value=submission_dict) as patch_get_submission:
        sub_dict = challengeutils.utils.download_submission(
            syn, "12345", download_location=".")
        patch_get_submission.assert_called_once_with("12345",
                                                     downloadLocation=".")
        assert sub_dict == expected_submission_dict
def test_noname__move_entity():
    """Tests not changing entity name"""
    ent = synapseclient.Entity(name="foo", parentId="syn2222")
    new_parent = "syn1234"
    with patch.object(syn, "store") as patch_syn_store:
        process_functions._move_entity(syn, ent, new_parent)
        ent.parentId = new_parent
        patch_syn_store.assert_called_once_with(ent)
def test_wrong_permission_level():
    """Error raised if incorrect permission level is passed in"""
    with pytest.raises(ValueError,
                       match=r'permission_level must be one of these:.*'):
        permissions._set_permissions(SYN,
                                     synapseclient.Entity(),
                                     principalid="3",
                                     permission_level="foo")
def test_valid_validatefile():
    '''
    Tests the behavior of a file that gets validated that becomes
    valid
    '''
    validation_statusdf = pd.DataFrame()
    error_trackerdf = pd.DataFrame()
    entity = synapseclient.Entity(name="data_clinical_supp_SAGE.txt",
                                  id='syn1234',
                                  md5='44444',
                                  path='/path/to/data_clinical_supp_SAGE.txt')
    entity['modifiedOn'] = '2019-03-24T12:00:00.Z'
    # This modifiedOn translates to: 1553428800000
    entity.modifiedBy = '333'
    entity.createdBy = '444'
    entities = [entity]
    threads = 0
    valid = True
    message = "Is valid"
    filetype = "clinical"
    # Only a list is returned as opposed a list of lists when there are
    # invalid errors
    status_error_list_results = ([{
        'entity': entity,
        'status': 'VALIDATED'
    }], [])
    expected_results = ([{
        'entity': entity,
        'status': 'VALIDATED',
        'fileType': filetype,
        'center': center
    }], [], [])
    with patch.object(GenieValidationHelper, "determine_filetype",
                      return_value=filetype) as patch_determine_filetype,\
         patch.object(input_to_database, "check_existing_file_status",
                      return_value={'status_list': [],
                                    'error_list': [],
                                    'to_validate': True}) as patch_check, \
         patch.object(GenieValidationHelper, "validate_single_file",
                      return_value=(valid, message)) as patch_validate,\
         patch.object(input_to_database, "_get_status_and_error_list",
                      return_value=status_error_list_results) as patch_get_staterror_list,\
         patch.object(input_to_database,
                      "_send_validation_error_email") as patch_send_email:

        validate_results = input_to_database.validatefile(
            syn, None, entities, validation_statusdf, error_trackerdf, center,
            threads, oncotree_link)

        assert expected_results == validate_results
        patch_validate.assert_called_once_with(oncotree_link=oncotree_link,
                                               nosymbol_check=False)
        patch_check.assert_called_once_with(validation_statusdf,
                                            error_trackerdf, entities)
        patch_determine_filetype.assert_called_once()
        patch_get_staterror_list.assert_called_once_with(
            valid, message, entities)
        patch_send_email.assert_not_called()
Exemple #15
0
def test_store_gene_panel_files():
    data_gene_panel = pd.DataFrame({'mutations': ['PANEL1']})
    gene_paneldf = pd.DataFrame({'id': ['syn3333']})

    with mock.patch.object(
            SYN, "tableQuery",
            return_value=Tablequerydf(gene_paneldf)) as patch_syn_table_query,\
         mock.patch.object(
             database_to_staging, "store_file",
             return_value=synapseclient.Entity()) as patch_storefile,\
         mock.patch.object(
             SYN, "get",
             return_value=synapseclient.Entity(
                 path="/foo/bar/PANEL1.txt",
                 versionNumber=2)) as patch_syn_get,\
         mock.patch.object(os, "rename") as patch_os_rename:

        database_to_staging.store_gene_panel_files(SYN,
                                                   FILEVIEW_SYNID,
                                                   GENIE_VERSION,
                                                   data_gene_panel,
                                                   CONSORTIUM_SYNID,
                                                   ["TEST"])

        patch_syn_table_query.assert_called_once_with(
            "select id from %s where cBioFileFormat = 'genePanel' "
            "and fileStage = 'staging' and "
            "name not in ('data_gene_panel_TEST.txt')" % FILEVIEW_SYNID)

        patch_storefile.assert_called_once_with(
            SYN,
            os.path.join(database_to_staging.GENIE_RELEASE_DIR,
                         "PANEL1_vTEST.txt"),
            parent=CONSORTIUM_SYNID,
            genieVersion=GENIE_VERSION,
            name="PANEL1.txt",
            cBioFileFormat="genePanel",
            used='syn3333.2')

        patch_syn_get.assert_called_once_with('syn3333')
        patch_os_rename.assert_called_once_with(
            "/foo/bar/PANEL1.txt",
            os.path.join(database_to_staging.GENIE_RELEASE_DIR,
                         "PANEL1_vTEST.txt"))
def test_name__move_entity():
    """Tests entity name is updated"""
    ent = synapseclient.Entity(name="foo", parentId="syn2222")
    new_parent = "syn1234"
    new_name = "updated name"
    with patch.object(syn, "store") as patch_syn_store:
        process_functions._move_entity(syn, ent, new_parent, new_name)
        ent.parentId = new_parent
        ent.name = new_name
        patch_syn_store.assert_called_once_with(ent)
Exemple #17
0
def test__find_by_obj_or_create__get():
    """Tests getting of entity"""
    concretetype = str(uuid.uuid1())
    entity = synapseclient.Entity(name=str(uuid.uuid1()),
                                  parentId=str(uuid.uuid1()),
                                  concreteType=concretetype)
    returned = synapseclient.Entity(name=str(uuid.uuid1()),
                                    id=str(uuid.uuid1()),
                                    parentId=str(uuid.uuid1()),
                                    concreteType=concretetype)
    mocked_409 = SynapseHTTPError("foo", response=Mock(status_code=409))

    with patch.object(SYN, "store",
                      side_effect=mocked_409) as patch_syn_store,\
         patch.object(GET_CLS, "_get_obj",
                      return_value=returned) as patch_cls_get:
        get_ent = GET_CLS._find_by_obj_or_create(entity)
        assert get_ent == returned
        patch_syn_store.assert_called_once()
        patch_cls_get.assert_called_once_with(entity)
def test_invalid_check_existing_file_status():
    '''
    Test the values returned by input that is invalid
    '''
    entity = synapseclient.Entity(name='second.txt', id='syn2345', md5='44444')
    entities = [entity]
    file_status = input_to_database.check_existing_file_status(
        mock_csv_query_result(validation_statusdf),
        mock_csv_query_result(error_trackerdf), entities)
    assert not file_status['to_validate']
    assert file_status['status_list'] == ['INVALID']
    assert file_status['error_list'] == ['Invalid file format']
def test_valid_check_existing_file_status():
    '''
    Test the values returned by input that is already valid
    '''
    entity = synapseclient.Entity(name='first.txt', id='syn1234', md5='3333')
    entities = [entity]
    file_status = input_to_database.check_existing_file_status(
        mock_csv_query_result(validation_statusdf),
        mock_csv_query_result(error_trackerdf), entities)
    assert not file_status['to_validate']
    assert file_status['status_list'] == ['VALID']
    assert file_status['error_list'] == []
def test_diffnametovalidate_check_existing_file_status():
    '''
    If name is different from stored name, must re-validate file
    '''
    entity = synapseclient.Entity(name='second.txt', id='syn1234', md5='3333')
    entities = [entity]
    file_status = input_to_database.check_existing_file_status(
        mock_csv_query_result(validation_statusdf),
        mock_csv_query_result(emptydf), entities)
    assert file_status['to_validate']
    assert file_status['status_list'] == ['VALID']
    assert file_status['error_list'] == []
def test_nostorederrors_check_existing_file_status():
    '''
    If there is no error uploaded, must re-validate file
    '''
    entity = synapseclient.Entity(name='second.txt', id='syn2345', md5='44444')
    entities = [entity]
    file_status = input_to_database.check_existing_file_status(
        mock_csv_query_result(validation_statusdf),
        mock_csv_query_result(emptydf), entities)
    assert file_status['to_validate']
    assert file_status['status_list'] == ['INVALID']
    assert file_status['error_list'] == []
def test_diffmd5validate_check_existing_file_status():
    '''
    If md5 is different from stored md5, must re-validate file
    '''
    entity = synapseclient.Entity(name='first.txt', id='syn1234', md5='44444')
    entity.properties.versionNumber = '1'
    entities = [entity]
    file_status = input_to_database.check_existing_file_status(
        mock_csv_query_result(validation_statusdf),
        mock_csv_query_result(emptydf), entities)
    assert file_status['to_validate']
    assert file_status['status_list'] == ['VALID']
    assert file_status['error_list'] == []
def test_unvalidatedinput_check_existing_file_status():
    '''
    Test the values returned by input that hasn't be validated
    '''
    entity = synapseclient.Entity(id='syn1234')
    entities = [entity]

    file_status = input_to_database.check_existing_file_status(
        mock_csv_query_result(emptydf), mock_csv_query_result(emptydf),
        entities)
    assert file_status['to_validate']
    assert file_status['status_list'] == []
    assert file_status['error_list'] == []
def test_create_and_archive_maf_database():
    '''
    Test the creation and archive of the maf database
    '''
    table_ent = synapseclient.Entity(parentId="syn123",
                                     name="foo",
                                     primaryKey=['annot'],
                                     id='syn12345')
    new_maf_ent = synapseclient.Entity(id="syn2222")
    database_synid_mappingdf = pd.DataFrame({
        'Database': ['vcf2maf', 'main'],
        'Id': ['syn12345', 'syn23455']
    })

    with patch.object(syn, "store",
                      return_value=new_maf_ent) as patch_syn_store,\
         patch.object(syn, "setPermissions",
                      return_value=None) as patch_syn_set_permissions,\
         patch.object(syn, "get",
                      return_value=table_ent) as patch_syn_get,\
         patch.object(syn, "getTableColumns",
                      return_value=['foo', 'ddooo']) as patch_syn_get_table_columns:

        database_mappingdf = input_to_database.create_and_archive_maf_database(
            syn, database_synid_mappingdf)

        assert database_mappingdf['Id'][
            database_mappingdf['Database'] == 'vcf2maf'].values[0] \
            == new_maf_ent.id
        assert database_mappingdf['Id'][database_mappingdf['Database'] ==
                                        'main'].values[0] == 'syn23455'
        patch_syn_get_table_columns.assert_called_once_with('syn12345')
        patch_syn_get.assert_called_once_with('syn12345')
        assert patch_syn_store.call_count == 3
        patch_syn_set_permissions.assert_called_once_with(
            new_maf_ent.id, 3326313, [])
Exemple #25
0
from mock import patch
import pytest
import challengeutils
import synapseclient
syn = synapseclient.Synapse()
SET_PERMS = {"set"}


@pytest.fixture(params=[
    # tuple with (input, expectedOutput)
    (synapseclient.Project(), None, "view",
     challengeutils.permissions.ENTITY_PERMS_MAPPINGS['view']),
    (synapseclient.Folder(parentId="syn123"), None, "download",
     challengeutils.permissions.ENTITY_PERMS_MAPPINGS['download']),
    (synapseclient.Entity(), None, "edit",
     challengeutils.permissions.ENTITY_PERMS_MAPPINGS['edit']),
    (synapseclient.Schema(parentId="syn123"), None, "edit_and_delete",
     challengeutils.permissions.ENTITY_PERMS_MAPPINGS['edit_and_delete']),
    (synapseclient.File(parentId="syn123"), None, "admin",
     challengeutils.permissions.ENTITY_PERMS_MAPPINGS['admin']),
    (synapseclient.Entity(), None, "remove",
     challengeutils.permissions.ENTITY_PERMS_MAPPINGS['remove']),
    (synapseclient.Evaluation(contentSource="syn123"), None, "view",
     challengeutils.permissions.EVALUATION_PERMS_MAPPINGS['view']),
    (synapseclient.Evaluation(contentSource="syn123"), None, "submit",
     challengeutils.permissions.EVALUATION_PERMS_MAPPINGS['submit']),
    (synapseclient.Evaluation(contentSource="syn123"), None, "score",
     challengeutils.permissions.EVALUATION_PERMS_MAPPINGS['score']),
    (synapseclient.Evaluation(contentSource="syn123"), None, "admin",
     challengeutils.permissions.EVALUATION_PERMS_MAPPINGS['admin']),
    (synapseclient.Evaluation(contentSource="syn123"), None, "remove",
from challengeutils import permissions

SYN = create_autospec(synapseclient.Synapse)
SET_PERMS = {"set"}


@pytest.mark.parametrize(
    "entity,principalid,permission_level,mapped",
    [
        # tuple with (input, expectedOutput)
        (synapseclient.Project(), None, "view",
         permissions.ENTITY_PERMS_MAPPINGS['view']),
        (synapseclient.Folder(parentId="syn123"), None, "download",
         permissions.ENTITY_PERMS_MAPPINGS['download']),
        (synapseclient.Entity(), None, "edit",
         permissions.ENTITY_PERMS_MAPPINGS['edit']),
        (synapseclient.Schema(parentId="syn123"), None, "edit_and_delete",
         permissions.ENTITY_PERMS_MAPPINGS['edit_and_delete']),
        (synapseclient.File(parentId="syn123"), None, "admin",
         permissions.ENTITY_PERMS_MAPPINGS['admin']),
        (synapseclient.Entity(), None, "remove",
         permissions.ENTITY_PERMS_MAPPINGS['remove']),
        (synapseclient.Evaluation(contentSource="syn123"), None, "view",
         permissions.EVALUATION_PERMS_MAPPINGS['view']),
        (synapseclient.Evaluation(contentSource="syn123"), None, "submit",
         permissions.EVALUATION_PERMS_MAPPINGS['submit']),
        (synapseclient.Evaluation(contentSource="syn123"), None, "score",
         permissions.EVALUATION_PERMS_MAPPINGS['score']),
        (synapseclient.Evaluation(contentSource="syn123"), None, "admin",
         permissions.EVALUATION_PERMS_MAPPINGS['admin']),
def test_invalid_validatefile():
    '''
    Tests the behavior of a file that gets validated that becomes
    invalid
    '''
    validation_statusdf = pd.DataFrame()
    error_trackerdf = pd.DataFrame(columns=['id'], dtype=str)
    entity = synapseclient.Entity(name="data_clinical_supp_SAGE.txt",
                                  id='syn1234',
                                  md5='44444',
                                  path='/path/to/data_clinical_supp_SAGE.txt')
    entity['modifiedOn'] = '2019-03-24T12:00:00.Z'
    # This modifiedOn translates to: 1553428800000
    entity.modifiedBy = '333'
    entity.createdBy = '444'
    entities = [entity]
    threads = 0
    valid = False
    message = "Is invalid"
    filetype = "clinical"
    status_error_list_results = ([{
        'entity': entity,
        'status': 'INVALID'
    }], [{
        'entity': entity,
        'errors': message
    }])
    expected_results = ([{
        'entity': entity,
        'status': 'INVALID',
        'fileType': filetype,
        'center': center
    }], [{
        'entity': entity,
        'errors': message,
        'fileType': filetype,
        'center': center
    }], [(['data_clinical_supp_SAGE.txt'], 'Is invalid', ['333', '444'])])

    with patch.object(ValidationHelper, "determine_filetype",
                      return_value=filetype) as patch_determine_filetype,\
         patch.object(input_to_database, "check_existing_file_status",
                      return_value={'status_list': [],
                                    'error_list': [],
                                    'to_validate': True}) as patch_check, \
         patch.object(ValidationHelper, "validate_single_file",
                      return_value=(valid, message)) as patch_validate,\
         patch.object(input_to_database, "_get_status_and_error_list",
                      return_value=status_error_list_results) as patch_get_staterror_list:

        validate_results = input_to_database.validatefile(
            syn,
            None,
            entities,
            validation_statusdf,
            error_trackerdf,
            center,
            threads,
            validator_cls=ValidationHelper)

        assert expected_results == validate_results
        patch_validate.assert_called_once_with()
        patch_check.assert_called_once_with(validation_statusdf,
                                            error_trackerdf, entities)
        patch_determine_filetype.assert_called_once()
        patch_get_staterror_list.assert_called_once_with(
            valid, message, entities)
    def test_validation(self):
        """Test validation steps"""
        modified_on = 1561143558000
        process = "main"
        databaseToSynIdMapping = {
            'Database': ["clinical", 'validationStatus', 'errorTracker'],
            'Id': ['syn222', 'syn333', 'syn444']
        }
        databaseToSynIdMappingDf = pd.DataFrame(databaseToSynIdMapping)
        entity = synapseclient.Entity(id='syn1234',
                                      md5='44444',
                                      path='/path/to/foobar.txt',
                                      name='data_clinical_supp_SAGE.txt')
        entities = [entity]
        filetype = "clinical"
        input_status_list = [[
            entity.id, entity.path, entity.md5, 'VALIDATED', entity.name,
            modified_on, filetype, center
        ]]
        invalid_errors_list = []
        messages = []
        new_tables = {
            'validation_statusdf': self.validation_statusdf,
            'error_trackingdf': self.errors_df,
            'duplicated_filesdf': self.empty_dup
        }
        validationstatus_mock = emptytable_mock()
        errortracking_mock = emptytable_mock()
        valiate_cls = Mock()
        with patch.object(syn, "tableQuery",
                          side_effect=[validationstatus_mock,
                                       errortracking_mock]) as patch_query,\
             patch.object(input_to_database, "validatefile",
                          return_value=(input_status_list,
                                        invalid_errors_list,
                                        messages)) as patch_validatefile,\
             patch.object(input_to_database, "build_validation_status_table",
                          return_value=self.validation_statusdf),\
             patch.object(input_to_database, "build_error_tracking_table",
                          return_value=self.errors_df),\
             patch.object(input_to_database, "_update_tables_content",
                          return_value=new_tables),\
             patch.object(input_to_database, "update_status_and_error_tables"):

            valid_filedf = input_to_database.validation(
                syn,
                "syn123",
                center,
                process,
                entities,
                databaseToSynIdMappingDf,
                oncotree_link,
                format_registry={"test": valiate_cls})
            assert patch_query.call_count == 2
            patch_validatefile.assert_called_once_with(
                syn,
                "syn123",
                entity,
                validationstatus_mock,
                errortracking_mock,
                center='SAGE',
                threads=1,
                oncotree_link=oncotree_link,
                format_registry={"test": valiate_cls})

            assert valid_filedf.equals(
                self.validation_statusdf[['id', 'path', 'fileType', 'name']])
def test_already_validated_validatefile():
    '''
    Test already validated files
    '''
    validation_statusdf = pd.DataFrame()
    error_trackerdf = pd.DataFrame()
    entity = synapseclient.Entity(name="data_clinical_supp_SAGE.txt",
                                  id='syn1234',
                                  md5='44444',
                                  path='/path/to/data_clinical_supp_SAGE.txt')
    entity['modifiedOn'] = '2019-03-24T12:00:00.Z'
    # This modifiedOn translates to: 1553428800000
    entity.modifiedBy = '333'
    entity.createdBy = '444'
    entities = [entity]
    threads = 0
    valid = False
    errors = "Invalid file"
    filetype = "markdown"
    status = "INVALID"
    check_file_status_dict = {
        'status_list': [status],
        'error_list': [errors],
        'to_validate': False
    }

    status_error_list_results = ([{
        'entity': entity,
        'status': status
    }], [{
        'entity': entity,
        'errors': errors
    }])
    expected_results = ([{
        'entity': entity,
        'status': status,
        'fileType': filetype,
        'center': center
    }], [{
        'entity': entity,
        'errors': errors,
        'fileType': filetype,
        'center': center
    }], [])
    with patch.object(GenieValidationHelper, "determine_filetype",
                      return_value=filetype) as patch_determine_filetype,\
         patch.object(input_to_database, "check_existing_file_status",
                      return_value=check_file_status_dict) as patch_check, \
         patch.object(GenieValidationHelper, "validate_single_file",
                      return_value=(valid, errors)) as patch_validate,\
         patch.object(input_to_database, "_get_status_and_error_list",
                      return_value=status_error_list_results) as patch_get_staterror_list,\
         patch.object(input_to_database,
                      "_send_validation_error_email") as patch_send_email:

        validate_results = input_to_database.validatefile(
            syn,
            None,
            entities,
            validation_statusdf,
            error_trackerdf,
            center,
            threads,
            oncotree_link,
        )

        assert expected_results == validate_results
        patch_validate.assert_not_called()
        patch_check.assert_called_once_with(validation_statusdf,
                                            error_trackerdf, entities)
        patch_determine_filetype.assert_called_once()
        patch_get_staterror_list.assert_not_called()
        patch_send_email.assert_not_called()
def test_validation():
    '''
    Test validation steps
    '''
    validation_statusdf = pd.DataFrame({
        'id': ['syn1234'],
        'status': ['VALIDATED'],
        'path': ["/path/to/file"],
        'fileType': ['clinical']
    })

    thread = 2
    testing = False
    modified_on = 1561143558000
    process = "main"
    databaseToSynIdMapping = {
        'Database': ["clinical", 'validationStatus', 'errorTracker'],
        'Id': ['syn222', 'syn333', 'syn444']
    }
    databaseToSynIdMappingDf = pd.DataFrame(databaseToSynIdMapping)
    entity = synapseclient.Entity(id='syn1234',
                                  md5='44444',
                                  path='/path/to/foobar.txt',
                                  name='data_clinical_supp_SAGE.txt')
    entities = [entity]
    filetype = "clinical"
    input_status_list = [[
        entity.id, entity.path, entity.md5, 'VALIDATED', entity.name,
        modified_on, filetype, center
    ]]
    invalid_errors_list = []
    messages = []
    validationstatus_mock = emptytable_mock()
    errortracking_mock = emptytable_mock()
    with patch.object(input_to_database, "get_center_input_files",
                      return_value=entities) as patch_get_center,\
         patch.object(syn, "tableQuery",
                      side_effect=[validationstatus_mock,
                                   errortracking_mock]) as patch_tablequery,\
         patch.object(input_to_database, "validatefile",
                      return_value=(input_status_list,
                                    invalid_errors_list,
                                    messages)) as patch_validatefile,\
         patch.object(input_to_database, "update_status_and_error_tables",
                      return_value=validation_statusdf) as patch_update_status:
        valid_filedf = input_to_database.validation(
            syn, center, process, center_mapping_df, databaseToSynIdMappingDf,
            thread, testing, oncotree_link, genie.config.PROCESS_FILES)
        patch_get_center.assert_called_once_with(syn, center_input_synid,
                                                 center, process)
        assert patch_tablequery.call_count == 2
        patch_validatefile.assert_called_once_with(
            syn,
            entity,
            validationstatus_mock,
            errortracking_mock,
            center='SAGE',
            threads=1,
            testing=False,
            oncotree_link=oncotree_link,
            format_registry=genie.config.PROCESS_FILES)
        patch_update_status.assert_called_once_with(syn, input_status_list, [],
                                                    validationstatus_mock,
                                                    errortracking_mock)

        assert valid_filedf.equals(
            validation_statusdf[['id', 'path', 'fileType']])