def test_next(): tsv_reader = TSVReader('data/example_samples.tsv', 'data/T2D_xls2xml_v1.conf', 'Sample') row = tsv_reader.next() assert isinstance(row, dict) assert 0 == cmp( row, { 'Novel Attributes': None, 'Ethnicity Description': None, 'Description': 'Male normal', 'Cell Type': 'Blood', 'Maternal_id': 'SAM111113', 'Center_name': 'WTGC cambridge', 'Gender': 'male', 'Subject_ID': 'SAM111111', 'Paternal_id': 'SAM111115', 'T2D': 0, 'Hispanic or Latino; of Spanish origin': None, 'Cohort ID': 'CO1111', 'Year of Birth': '1986', 'Age': '31', 'Analysis_alias': 'AN001', 'Sample_ID': 'SAM111111', 'Geno_ID': None, 'Year of first visit': None, 'Case_Control': 'Control', 'Ethnicity': 'EUWH', 'Phenotype': 'MeSH:D006262' }) for row in tsv_reader: assert isinstance(row, dict)
def test_get_current_headers(): tsv_reader = TSVReader('data/example_samples.tsv', 'data/T2D_xls2xml_v1.conf', 'Sample') headers = tsv_reader.get_current_headers() assert isinstance(headers, list) assert set(headers) == { 'Sample_ID', 'Subject_ID', 'Geno_ID', 'Phenotype', 'Gender', 'Analysis_alias', 'Cohort ID', 'Ethnicity', 'Ethnicity Description', 'T2D', 'Case_Control', 'Description', 'Center_name', 'Hispanic or Latino; of Spanish origin', 'Age', 'Year of Birth', 'Year of first visit', 'Cell Type', 'Maternal_id', 'Paternal_id', 'Novel Attributes' }
def test_validate_file(): validation_schema = 'data/T2D_xls2xml_v1.schema' tsv_reader = TSVReader('data/example_samples.tsv', 'data/T2D_xls2xml_v1.conf', 'Sample') assert utils.validate_file(tsv_reader, validation_schema) xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf') xls_reader.set_current_conf_key('Sample') assert utils.validate_file(xls_reader, validation_schema)
def test_multiple_tsvs_to_xml(): analysis_tsv_reader = TSVReader('data/example_analysis.tsv', 'data/T2D_xls2xml_v1.conf', 'Analysis') files_tsv_reader = TSVReader('data/example_files.tsv', 'data/T2D_xls2xml_v1.conf', 'File') tsv_readers = [ ('Analysis', analysis_tsv_reader), ('File', files_tsv_reader) ] output_xml = utils.multiple_objects_to_xml(tsv_readers, 'data/T2D_xls2xml_v1.schema', 'data/T2D_xls2xml_v2.xslt') with open('data/example_analysis.xml', 'r') as analysis_example: assert analysis_example.readline() assert etree.tostring(output_xml, pretty_print=True) == analysis_example.read() with pytest.raises(Exception): tsv_readers = [ ('Exception', TSVReader('data/example_analysis.tsv', 'data/T2D_xls2xml_v1.conf', 'Exception')), ('Expected', TSVReader('data/example_analysis.tsv', 'data/T2D_xls2xml_v1.conf', 'Expected')) ] utils.multiple_objects_to_xml(tsv_readers, 'data/T2D_xls2xml_v1.schema', 'data/T2D_xls2xml_v2.xslt')
def test_get_valid_conf_keys(): tsv_reader = TSVReader('data/example_samples.tsv', 'data/T2D_xls2xml_v1.conf', 'Sample') assert set(tsv_reader.get_valid_conf_keys()) == {'Sample'} tsv_reader = TSVReader('data/example_samples.tsv', 'data/T2D_xls2xml_v1.conf', 'Analysis') assert tsv_reader.get_valid_conf_keys() == []
def test_valid_data(): validator = MetadataValidator('data/T2D_xls2xml_v1.schema') reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf') reader.active_worksheet = 'Sample' row = reader.next() assert validator.validate_data(row, 'Sample') reader.active_worksheet = 'Analysis' row = reader.next() assert validator.validate_data(row, 'Analysis') reader = TSVReader('data/example_samples.tsv', 'data/T2D_xls2xml_v1.conf', 'Sample') row = reader.next() assert validator.validate_data(row, 'Sample')
def test_extract_rows(): validation_schema = 'data/T2D_xls2xml_v1.schema' rows = [] tsv_reader = TSVReader('data/example_samples.tsv', 'data/T2D_xls2xml_v1.conf', 'Sample') assert utils.extract_rows(tsv_reader, 'Sample', validation_schema, rows) assert isinstance(rows, list) assert 6 == len(rows) tsv_reader = TSVReader('data/example_samples.tsv', 'data/T2D_xls2xml_v1.conf', 'Sample') for a, b in zip(rows, tsv_reader): assert 0 == cmp(a, b) rows = [] xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf') assert utils.extract_rows(xls_reader, 'Sample', validation_schema, rows) assert isinstance(rows, list) assert 6 == len(rows) xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf') xls_reader.set_current_conf_key('Sample') for a, b in zip(rows, xls_reader): assert 0 == cmp(a, b) rows = [] assert not utils.extract_rows(xls_reader, 'FalseExpected', validation_schema, rows)
def test_is_valid(): tsv_reader = TSVReader('data/example_samples.tsv', 'data/T2D_xls2xml_v1.conf', 'Sample') assert tsv_reader.is_valid()
def test_is_not_valid(): tsv_reader = TSVReader('data/example_samples.tsv', 'data/T2D_xls2xml_v1.conf', 'Analysis') assert not tsv_reader.is_valid()
def test_set_current_conf_key(): # set_current_conf_key() should does nothing tsv_reader = TSVReader('data/example_samples.tsv', 'data/T2D_xls2xml_v1.conf', 'Sample') assert tsv_reader.is_valid() assert set(tsv_reader.get_valid_conf_keys()) == {'Sample'} tsv_reader.set_current_conf_key('Analysis') assert tsv_reader.is_valid() assert set(tsv_reader.get_valid_conf_keys()) == {'Sample'} tsv_reader = TSVReader('data/example_samples.tsv', 'data/T2D_xls2xml_v1.conf', 'Analysis') assert not tsv_reader.is_valid() assert tsv_reader.get_valid_conf_keys() == [] tsv_reader.set_current_conf_key('Sample') assert not tsv_reader.is_valid() assert tsv_reader.get_valid_conf_keys() == []