def test_missing_column(): reader = SourcesReader( input_dir='./test_data/input_data/CLINICAL', config_dir='./test_data/input_data/config/invalid_sources_config/missing_column') with pytest.raises(DataException) as excinfo: reader.read_subject_data() assert 'Column \'tumor_type\' not found' in str(excinfo.value)
def test_duplicate_identifier(): reader = SourcesReader( input_dir='./test_data/input_data/CLINICAL', config_dir='./test_data/input_data/config/invalid_sources_config/duplicate_identifier') with pytest.raises(DataException) as excinfo: reader.read_subject_data() assert 'Duplicate identifier' in str(excinfo.value)
def test_biomaterial_cannot_be_derived_from_self(): reader = SourcesReader( input_dir='./test_data/input_data/CLINICAL', config_dir='./test_data/input_data/config/invalid_sources_config/biomaterial_derived_from_self') with pytest.raises(DataException) as excinfo: reader.read_subject_data() assert 'Invalid data for Biomaterial with id BM6' in str(excinfo.value)
def test_invalid_number(): reader = SourcesReader( input_dir='./test_data/input_data/CLINICAL', config_dir='./test_data/input_data/config/invalid_sources_config/invalid_number') with pytest.raises(DataException) as excinfo: reader.read_subject_data() assert 'Invalid data for Biosource with id BS6' in str(excinfo.value)
def test_non_existing_file(): reader = SourcesReader( input_dir='./test_data/input_data/CLINICAL', config_dir='./test_data/input_data/config/invalid_sources_config/non_existing_file') with pytest.raises(ReaderException) as excinfo: reader.read_subject_data() assert 'File not found' in str(excinfo.value)
def test_derived_values_in_sources(): reader = SourcesReader( input_dir='./test_data/input_data/CLINICAL', config_dir='./test_data/input_data/config/invalid_sources_config/derived_biomaterial_values') with pytest.raises(DataException) as excinfo: reader.read_subject_data() assert 'Derived value fields not allowed in source files' \ in str(excinfo.value)
def test_invalid_date(): reader = SourcesReader( input_dir='./test_data/input_data/CLINICAL', config_dir='./test_data/input_data/config/invalid_sources_config/invalid_date') with pytest.raises(DataException) as excinfo: reader.read_subject_data() assert 'Error parsing biomaterial_date from source biomaterial_with_invalid_date.tsv:biomaterial_date with id BM15'\ in str(excinfo.value)
def test_wrong_file_format(): reader = SourcesReader( input_dir='./test_data/input_data/CLINICAL', config_dir='./test_data/input_data/config/invalid_sources_config/wrong_file_format') with pytest.raises(DataException) as excinfo: reader.read_subject_data() # The columns in the biosource.csv file are not correctly parsed, resulting # in a missing identifier column assert 'Identifier column \'biosource_id\' not found' in str(excinfo.value)
def test_biomaterial_biosource_mismatch(): """ Biomaterial BM2 is derived form BM1, but originates from a different biosource. """ reader = SourcesReader( input_dir='./test_data/input_data/bios_biom_mismatch_data/source_data', config_dir='./test_data/input_data/bios_biom_mismatch_data/config') with pytest.raises(DataException) as excinfo: reader.read_subject_data() assert '' in str(excinfo.value)
def test_diagnosis_biosource_patient_mismatch(): """ Individual contains P1 and P2. Diagnosis contains D1 linked to P2. Biosource contains BS1 linked to P1 and D1. """ reader = SourcesReader( input_dir='./test_data/input_data/ind_bios_diag_mismatch_data/source_data', config_dir='./test_data/input_data/ind_bios_diag_mismatch_data/config') with pytest.raises(DataException) as excinfo: reader.read_subject_data() assert '' in str(excinfo.value)
def sources2csr(input_dir, output_dir, config_dir): logger.info('sources2csr') try: reader = SourcesReader(input_dir=input_dir, config_dir=config_dir) subject_registry = reader.read_subject_data() add_derived_values(subject_registry) subject_registry_writer = SubjectRegistryWriter(output_dir) subject_registry_writer.write(subject_registry) study_registry = reader.read_study_data() study_registry_writer = StudyRegistryWriter(output_dir) study_registry_writer.write(study_registry) except Exception as e: logger.error(e) sys.exit(1)