Example #1
0
def test_get_current_headers():
    xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx',
                           'data/T2D_xls2xml_v1.conf')
    xls_reader.set_current_conf_key('Sample')
    headers = xls_reader.get_current_headers()
    assert isinstance(headers, list)
    assert set(headers) == {
        u'Sample_ID', u'Subject_ID', u'Geno_ID', u'Phenotype', u'Gender',
        u'Analysis_alias', u'Cohort ID', u'Ethnicity',
        u'Ethnicity Description', u'T2D', u'Case_Control', u'Description',
        u'Center_name', u'Hispanic or Latino; of Spanish origin', u'Age',
        u'Year of Birth', u'Year of first visit', u'Cell Type', u'Maternal_id',
        u'Paternal_id', u'Novel Attributes'
    }
    xls_reader.set_current_conf_key('Analysis')
    headers = xls_reader.get_current_headers()
    assert isinstance(headers, list)
    assert set(headers) == {
        u'Analysis_name', u'Analysis_alias', u'Title', u'Description',
        u'Project_name', u'Experiment_type', u'Platform',
        u'Standard_refname or Sequence_accession', u'Imputation',
        u'Sequence_accession_label', u'External_link', u'Software',
        u'Pipeline Description', u'Run Accession(s)', u'Center_name',
        u'Analysis_date'
    }
    xls_reader.set_current_conf_key('ExceptionExpected')
    with pytest.raises(Exception):
        xls_reader.get_current_headers()
def test_validate_file():
    validation_schema = 'data/T2D_xls2xml_v1.schema'
    tsv_reader = TSVReader('data/example_samples.tsv', 'data/T2D_xls2xml_v1.conf', 'Sample')
    assert utils.validate_file(tsv_reader, validation_schema)
    xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf')
    xls_reader.set_current_conf_key('Sample')
    assert utils.validate_file(xls_reader, validation_schema)
Example #3
0
def test_validate_xls():
    validation_schema = '../T2D_xlsx.schema'
    xls_reader = XLSReader(
        'data/example_AMP_T2D_Submission_form_V2_DB_12_03_18.xlsx',
        '../T2D_xlsx.conf')
    assert xls_reader.is_valid()
    assert utils.validate_file(xls_reader, validation_schema)
Example #4
0
def test_next_row():
    xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx',
                           'data/T2D_xls2xml_v1.conf')

    xls_reader.set_current_conf_key('Sample')
    row = xls_reader.next()
    assert isinstance(row, dict)
    assert 0 == cmp(
        row, {
            'Hispanic or Latino; of Spanish origin': None,
            'Phenotype': 'MeSH:D006262',
            'row_num': 2,
            'Description': 'Male normal',
            'Center_name': 'WTGC cambridge',
            'Case_Control': 'Control',
            'T2D': 0L,
            'Analysis_alias': 'AN001',
            'Geno_ID': None,
            'Year of first visit': None,
            'Cell Type': 'Blood',
            'Maternal_id': 'SAM111113',
            'Gender': 'male',
            'Subject_ID': 'SAM111111',
            'Paternal_id': 'SAM111115',
            'Cohort ID': 'CO1111',
            'Novel Attributes': None,
            'Ethnicity Description': None,
            'Year of Birth': 1986L,
            'Sample_ID': 'SAM111111',
            'Age': 31L,
            'Ethnicity': 'EUWH'
        })
def test_write_empty_xml():
    rows = []
    xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf')
    assert utils.extract_rows(xls_reader, 'File', 'data/T2D_xls2xml_v1.schema', rows)
    input_xml = utils.rows_to_xml(rows, 'File')
    transformed_xml = utils.transform_xml(input_xml, 'data/T2D_xls2xml_v1.xslt')
    assert transformed_xml.getroot() is None # to make sure the transformed_xml is empty
    with open('data/out_empty.xml', 'w') as xml_file:
        utils.save_xml(transformed_xml, xml_file)
def test_transform_xml():
    rows = []
    xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf')
    assert utils.extract_rows(xls_reader, 'Sample', 'data/T2D_xls2xml_v1.schema', rows)
    input_xml = utils.rows_to_xml(rows, 'Sample')
    output_xml = utils.transform_xml(input_xml, 'data/T2D_xls2xml_v1.xslt')
    with open('data/example_samples.xml') as example_xml:
        assert example_xml.readline() == "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
        assert etree.tostring(output_xml, pretty_print=True) == example_xml.read()
def test_xls2xml_study():
    xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2_DB_12_03_18.xlsx',
                           '../T2D_xlsx.conf')
    xls_readers = [ ('Project', xls_reader) ]
    output_xml = utils.multiple_objects_to_xml(xls_readers, '../T2D_xlsx.schema',
                                              '../T2D_xls2xml.xslt')
    with open('data/example_study.xml', 'r') as study_example:
        assert study_example.readline()
        assert etree.tostring(output_xml, pretty_print=True) == study_example.read()
def test_xls2xml_sample():
    xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2_DB_12_03_18.xlsx',
                           '../T2D_xlsx.conf')
    conf_keys = ['Sample', 'Cohort']
    xls_readers = [ (key, xls_reader) for key in conf_keys ]
    output_xml = utils.multiple_objects_to_xml(xls_readers, '../T2D_xlsx.schema',
                                               '../T2D_xls2xml.xslt')
    with open('data/example_sample.xml', 'r') as sample_example:
        assert sample_example.readline()
        assert etree.tostring(output_xml, pretty_print=True) == sample_example.read()
def test_save_xml():
    rows = []
    xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf')
    assert utils.extract_rows(xls_reader, 'Sample', 'data/T2D_xls2xml_v1.schema', rows)
    input_xml = utils.rows_to_xml(rows, 'Sample')
    transformed_xml = utils.transform_xml(input_xml, 'data/T2D_xls2xml_v1.xslt')
    io_stream = StringIO()
    utils.save_xml(transformed_xml, io_stream)
    io_stream.seek(0)
    assert io_stream.read() == open('data/example_samples.xml').read()
    io_stream.close()
def test_multiple_sheets_to_xml():
    xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf')
    output_xml = utils.multiple_sheets_to_xml(xls_reader, str('Analysis,File').split(','),
                                              'data/T2D_xls2xml_v1.schema', 'data/T2D_xls2xml_v2.xslt')
    with open('data/example_analysis.xml', 'r') as analysis_example:
        assert analysis_example.readline()
        assert etree.tostring(output_xml, pretty_print=True) == analysis_example.read()

    with pytest.raises(Exception):
        utils.multiple_sheets_to_xml(xls_reader, str('Exception,Expected').split(','),
                                     'data/T2D_xls2xml_v1.schema', 'data/T2D_xls2xml_v2.xslt')
Example #11
0
def test_set_current_conf_key():
    xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx',
                           'data/T2D_xls2xml_v1.conf')
    active_worksheet = xls_reader.active_worksheet
    assert active_worksheet is None
    xls_reader.set_current_conf_key('Sample')
    active_worksheet = xls_reader.active_worksheet
    assert active_worksheet == 'Sample'
    xls_reader.set_current_conf_key('Analysis')
    active_worksheet = xls_reader.active_worksheet
    assert active_worksheet == 'Analysis'
Example #12
0
def test_xls2xml_sample():
    xls_reader = XLSReader(
        'data/example_AMP_T2D_Submission_form_V2_DB_12_03_18.xlsx',
        '../T2D_xlsx.conf')
    output_xml = utils.multiple_sheets_to_xml(xls_reader,
                                              str('Sample,Cohort').split(','),
                                              '../T2D_xlsx.schema',
                                              '../T2D_xls2xml.xslt')
    with open('data/example_sample.xml', 'r') as sample_example:
        assert sample_example.readline()
        assert etree.tostring(output_xml,
                              pretty_print=True) == sample_example.read()
def test_rows_to_xml():
    rows = []
    xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf')
    assert utils.extract_rows(xls_reader, 'Sample', 'data/T2D_xls2xml_v1.schema', rows)
    xml = utils.rows_to_xml(rows, 'Sample')
    assert isinstance(xml, etree._Element)
    assert xml.tag == 'SampleSet'
    assert len(rows) == len(xml)
    for row, child in zip(rows, xml):
        assert child.tag == 'Sample'
        assert {header : str(row.get(header, '')) for header in row} !=\
               {e.tag: e.text for e in child}
        assert {utils.header_to_xml_tag(header) : str('' if row.get(header, '') is None else row.get(header, ''))
                for header in row} == {e.tag : e.text for e in child}
def test_valid_data():
    validator = MetadataValidator('data/T2D_xls2xml_v1.schema')
    reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx',
                       'data/T2D_xls2xml_v1.conf')
    reader.active_worksheet = 'Sample'
    row = reader.next()
    assert validator.validate_data(row, 'Sample')
    reader.active_worksheet = 'Analysis'
    row = reader.next()
    assert validator.validate_data(row, 'Analysis')
    reader = TSVReader('data/example_samples.tsv', 'data/T2D_xls2xml_v1.conf',
                       'Sample')
    row = reader.next()
    assert validator.validate_data(row, 'Sample')
def test_extract_rows():
    validation_schema = 'data/T2D_xls2xml_v1.schema'

    rows = []
    tsv_reader = TSVReader('data/example_samples.tsv', 'data/T2D_xls2xml_v1.conf', 'Sample')
    assert utils.extract_rows(tsv_reader, 'Sample', validation_schema, rows)
    assert isinstance(rows, list)
    assert 6 == len(rows)
    tsv_reader = TSVReader('data/example_samples.tsv', 'data/T2D_xls2xml_v1.conf', 'Sample')
    for a, b in zip(rows, tsv_reader):
        assert 0 == cmp(a, b)

    rows = []
    xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf')
    assert utils.extract_rows(xls_reader, 'Sample', validation_schema, rows)
    assert isinstance(rows, list)
    assert 6 == len(rows)
    xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf')
    xls_reader.set_current_conf_key('Sample')
    for a, b in zip(rows, xls_reader):
        assert 0 == cmp(a, b)

    rows = []
    assert not utils.extract_rows(xls_reader, 'FalseExpected', validation_schema, rows)
Example #16
0
def test_valid_worksheets():
    xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx',
                           'data/T2D_xls2xml_v1.conf')
    worksheets = xls_reader.valid_worksheets()
    assert isinstance(worksheets, list)
    assert set(worksheets) == {'Sample', 'Analysis', 'File'}