Exemple #1
0
def test_missing_column():
    reader = SourcesReader(
        input_dir='./test_data/input_data/CLINICAL',
        config_dir='./test_data/input_data/config/invalid_sources_config/missing_column')
    with pytest.raises(DataException) as excinfo:
        reader.read_subject_data()
    assert 'Column \'tumor_type\' not found' in str(excinfo.value)
Exemple #2
0
def test_duplicate_identifier():
    reader = SourcesReader(
        input_dir='./test_data/input_data/CLINICAL',
        config_dir='./test_data/input_data/config/invalid_sources_config/duplicate_identifier')
    with pytest.raises(DataException) as excinfo:
        reader.read_subject_data()
    assert 'Duplicate identifier' in str(excinfo.value)
Exemple #3
0
def test_biomaterial_cannot_be_derived_from_self():
    reader = SourcesReader(
            input_dir='./test_data/input_data/CLINICAL',
            config_dir='./test_data/input_data/config/invalid_sources_config/biomaterial_derived_from_self')
    with pytest.raises(DataException) as excinfo:
        reader.read_subject_data()
    assert 'Invalid data for Biomaterial with id BM6' in str(excinfo.value)
Exemple #4
0
def test_invalid_number():
    reader = SourcesReader(
        input_dir='./test_data/input_data/CLINICAL',
        config_dir='./test_data/input_data/config/invalid_sources_config/invalid_number')
    with pytest.raises(DataException) as excinfo:
        reader.read_subject_data()
    assert 'Invalid data for Biosource with id BS6' in str(excinfo.value)
Exemple #5
0
def test_non_existing_file():
    reader = SourcesReader(
        input_dir='./test_data/input_data/CLINICAL',
        config_dir='./test_data/input_data/config/invalid_sources_config/non_existing_file')
    with pytest.raises(ReaderException) as excinfo:
        reader.read_subject_data()
    assert 'File not found' in str(excinfo.value)
Exemple #6
0
def test_derived_values_in_sources():
    reader = SourcesReader(
        input_dir='./test_data/input_data/CLINICAL',
        config_dir='./test_data/input_data/config/invalid_sources_config/derived_biomaterial_values')
    with pytest.raises(DataException) as excinfo:
        reader.read_subject_data()
    assert 'Derived value fields not allowed in source files' \
           in str(excinfo.value)
Exemple #7
0
def test_invalid_date():
    reader = SourcesReader(
        input_dir='./test_data/input_data/CLINICAL',
        config_dir='./test_data/input_data/config/invalid_sources_config/invalid_date')
    with pytest.raises(DataException) as excinfo:
        reader.read_subject_data()
    assert 'Error parsing biomaterial_date from source biomaterial_with_invalid_date.tsv:biomaterial_date with id BM15'\
           in str(excinfo.value)
Exemple #8
0
def test_wrong_file_format():
    reader = SourcesReader(
        input_dir='./test_data/input_data/CLINICAL',
        config_dir='./test_data/input_data/config/invalid_sources_config/wrong_file_format')
    with pytest.raises(DataException) as excinfo:
        reader.read_subject_data()
    # The columns in the biosource.csv file are not correctly parsed, resulting
    # in a missing identifier column
    assert 'Identifier column \'biosource_id\' not found' in str(excinfo.value)
Exemple #9
0
def test_biomaterial_biosource_mismatch():
    """
    Biomaterial BM2 is derived form BM1,
    but originates from a different biosource.
    """
    reader = SourcesReader(
            input_dir='./test_data/input_data/bios_biom_mismatch_data/source_data',
            config_dir='./test_data/input_data/bios_biom_mismatch_data/config')
    with pytest.raises(DataException) as excinfo:
        reader.read_subject_data()
    assert '' in str(excinfo.value)
Exemple #10
0
def test_diagnosis_biosource_patient_mismatch():
    """
    Individual contains P1 and P2.
    Diagnosis contains D1 linked to P2.
    Biosource contains BS1 linked to P1 and D1.
    """
    reader = SourcesReader(
            input_dir='./test_data/input_data/ind_bios_diag_mismatch_data/source_data',
            config_dir='./test_data/input_data/ind_bios_diag_mismatch_data/config')
    with pytest.raises(DataException) as excinfo:
        reader.read_subject_data()
    assert '' in str(excinfo.value)
def sources2csr(input_dir, output_dir, config_dir):
    logger.info('sources2csr')
    try:
        reader = SourcesReader(input_dir=input_dir, config_dir=config_dir)
        subject_registry = reader.read_subject_data()
        add_derived_values(subject_registry)
        subject_registry_writer = SubjectRegistryWriter(output_dir)
        subject_registry_writer.write(subject_registry)

        study_registry = reader.read_study_data()
        study_registry_writer = StudyRegistryWriter(output_dir)
        study_registry_writer.write(study_registry)
    except Exception as e:
        logger.error(e)
        sys.exit(1)