def test_get_current_headers(): xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf') xls_reader.set_current_conf_key('Sample') headers = xls_reader.get_current_headers() assert isinstance(headers, list) assert set(headers) == { u'Sample_ID', u'Subject_ID', u'Geno_ID', u'Phenotype', u'Gender', u'Analysis_alias', u'Cohort ID', u'Ethnicity', u'Ethnicity Description', u'T2D', u'Case_Control', u'Description', u'Center_name', u'Hispanic or Latino; of Spanish origin', u'Age', u'Year of Birth', u'Year of first visit', u'Cell Type', u'Maternal_id', u'Paternal_id', u'Novel Attributes' } xls_reader.set_current_conf_key('Analysis') headers = xls_reader.get_current_headers() assert isinstance(headers, list) assert set(headers) == { u'Analysis_name', u'Analysis_alias', u'Title', u'Description', u'Project_name', u'Experiment_type', u'Platform', u'Standard_refname or Sequence_accession', u'Imputation', u'Sequence_accession_label', u'External_link', u'Software', u'Pipeline Description', u'Run Accession(s)', u'Center_name', u'Analysis_date' } xls_reader.set_current_conf_key('ExceptionExpected') with pytest.raises(Exception): xls_reader.get_current_headers()
def test_validate_file(): validation_schema = 'data/T2D_xls2xml_v1.schema' tsv_reader = TSVReader('data/example_samples.tsv', 'data/T2D_xls2xml_v1.conf', 'Sample') assert utils.validate_file(tsv_reader, validation_schema) xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf') xls_reader.set_current_conf_key('Sample') assert utils.validate_file(xls_reader, validation_schema)
def test_validate_xls(): validation_schema = '../T2D_xlsx.schema' xls_reader = XLSReader( 'data/example_AMP_T2D_Submission_form_V2_DB_12_03_18.xlsx', '../T2D_xlsx.conf') assert xls_reader.is_valid() assert utils.validate_file(xls_reader, validation_schema)
def test_next_row(): xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf') xls_reader.set_current_conf_key('Sample') row = xls_reader.next() assert isinstance(row, dict) assert 0 == cmp( row, { 'Hispanic or Latino; of Spanish origin': None, 'Phenotype': 'MeSH:D006262', 'row_num': 2, 'Description': 'Male normal', 'Center_name': 'WTGC cambridge', 'Case_Control': 'Control', 'T2D': 0L, 'Analysis_alias': 'AN001', 'Geno_ID': None, 'Year of first visit': None, 'Cell Type': 'Blood', 'Maternal_id': 'SAM111113', 'Gender': 'male', 'Subject_ID': 'SAM111111', 'Paternal_id': 'SAM111115', 'Cohort ID': 'CO1111', 'Novel Attributes': None, 'Ethnicity Description': None, 'Year of Birth': 1986L, 'Sample_ID': 'SAM111111', 'Age': 31L, 'Ethnicity': 'EUWH' })
def test_write_empty_xml(): rows = [] xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf') assert utils.extract_rows(xls_reader, 'File', 'data/T2D_xls2xml_v1.schema', rows) input_xml = utils.rows_to_xml(rows, 'File') transformed_xml = utils.transform_xml(input_xml, 'data/T2D_xls2xml_v1.xslt') assert transformed_xml.getroot() is None # to make sure the transformed_xml is empty with open('data/out_empty.xml', 'w') as xml_file: utils.save_xml(transformed_xml, xml_file)
def test_transform_xml(): rows = [] xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf') assert utils.extract_rows(xls_reader, 'Sample', 'data/T2D_xls2xml_v1.schema', rows) input_xml = utils.rows_to_xml(rows, 'Sample') output_xml = utils.transform_xml(input_xml, 'data/T2D_xls2xml_v1.xslt') with open('data/example_samples.xml') as example_xml: assert example_xml.readline() == "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" assert etree.tostring(output_xml, pretty_print=True) == example_xml.read()
def test_xls2xml_study(): xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2_DB_12_03_18.xlsx', '../T2D_xlsx.conf') xls_readers = [ ('Project', xls_reader) ] output_xml = utils.multiple_objects_to_xml(xls_readers, '../T2D_xlsx.schema', '../T2D_xls2xml.xslt') with open('data/example_study.xml', 'r') as study_example: assert study_example.readline() assert etree.tostring(output_xml, pretty_print=True) == study_example.read()
def test_xls2xml_sample(): xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2_DB_12_03_18.xlsx', '../T2D_xlsx.conf') conf_keys = ['Sample', 'Cohort'] xls_readers = [ (key, xls_reader) for key in conf_keys ] output_xml = utils.multiple_objects_to_xml(xls_readers, '../T2D_xlsx.schema', '../T2D_xls2xml.xslt') with open('data/example_sample.xml', 'r') as sample_example: assert sample_example.readline() assert etree.tostring(output_xml, pretty_print=True) == sample_example.read()
def test_save_xml(): rows = [] xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf') assert utils.extract_rows(xls_reader, 'Sample', 'data/T2D_xls2xml_v1.schema', rows) input_xml = utils.rows_to_xml(rows, 'Sample') transformed_xml = utils.transform_xml(input_xml, 'data/T2D_xls2xml_v1.xslt') io_stream = StringIO() utils.save_xml(transformed_xml, io_stream) io_stream.seek(0) assert io_stream.read() == open('data/example_samples.xml').read() io_stream.close()
def test_multiple_sheets_to_xml(): xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf') output_xml = utils.multiple_sheets_to_xml(xls_reader, str('Analysis,File').split(','), 'data/T2D_xls2xml_v1.schema', 'data/T2D_xls2xml_v2.xslt') with open('data/example_analysis.xml', 'r') as analysis_example: assert analysis_example.readline() assert etree.tostring(output_xml, pretty_print=True) == analysis_example.read() with pytest.raises(Exception): utils.multiple_sheets_to_xml(xls_reader, str('Exception,Expected').split(','), 'data/T2D_xls2xml_v1.schema', 'data/T2D_xls2xml_v2.xslt')
def test_set_current_conf_key(): xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf') active_worksheet = xls_reader.active_worksheet assert active_worksheet is None xls_reader.set_current_conf_key('Sample') active_worksheet = xls_reader.active_worksheet assert active_worksheet == 'Sample' xls_reader.set_current_conf_key('Analysis') active_worksheet = xls_reader.active_worksheet assert active_worksheet == 'Analysis'
def test_xls2xml_sample(): xls_reader = XLSReader( 'data/example_AMP_T2D_Submission_form_V2_DB_12_03_18.xlsx', '../T2D_xlsx.conf') output_xml = utils.multiple_sheets_to_xml(xls_reader, str('Sample,Cohort').split(','), '../T2D_xlsx.schema', '../T2D_xls2xml.xslt') with open('data/example_sample.xml', 'r') as sample_example: assert sample_example.readline() assert etree.tostring(output_xml, pretty_print=True) == sample_example.read()
def test_rows_to_xml(): rows = [] xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf') assert utils.extract_rows(xls_reader, 'Sample', 'data/T2D_xls2xml_v1.schema', rows) xml = utils.rows_to_xml(rows, 'Sample') assert isinstance(xml, etree._Element) assert xml.tag == 'SampleSet' assert len(rows) == len(xml) for row, child in zip(rows, xml): assert child.tag == 'Sample' assert {header : str(row.get(header, '')) for header in row} !=\ {e.tag: e.text for e in child} assert {utils.header_to_xml_tag(header) : str('' if row.get(header, '') is None else row.get(header, '')) for header in row} == {e.tag : e.text for e in child}
def test_valid_data(): validator = MetadataValidator('data/T2D_xls2xml_v1.schema') reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf') reader.active_worksheet = 'Sample' row = reader.next() assert validator.validate_data(row, 'Sample') reader.active_worksheet = 'Analysis' row = reader.next() assert validator.validate_data(row, 'Analysis') reader = TSVReader('data/example_samples.tsv', 'data/T2D_xls2xml_v1.conf', 'Sample') row = reader.next() assert validator.validate_data(row, 'Sample')
def test_extract_rows(): validation_schema = 'data/T2D_xls2xml_v1.schema' rows = [] tsv_reader = TSVReader('data/example_samples.tsv', 'data/T2D_xls2xml_v1.conf', 'Sample') assert utils.extract_rows(tsv_reader, 'Sample', validation_schema, rows) assert isinstance(rows, list) assert 6 == len(rows) tsv_reader = TSVReader('data/example_samples.tsv', 'data/T2D_xls2xml_v1.conf', 'Sample') for a, b in zip(rows, tsv_reader): assert 0 == cmp(a, b) rows = [] xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf') assert utils.extract_rows(xls_reader, 'Sample', validation_schema, rows) assert isinstance(rows, list) assert 6 == len(rows) xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf') xls_reader.set_current_conf_key('Sample') for a, b in zip(rows, xls_reader): assert 0 == cmp(a, b) rows = [] assert not utils.extract_rows(xls_reader, 'FalseExpected', validation_schema, rows)
def test_valid_worksheets(): xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf') worksheets = xls_reader.valid_worksheets() assert isinstance(worksheets, list) assert set(worksheets) == {'Sample', 'Analysis', 'File'}