def test_validate_file(): validation_schema = 'data/T2D_xls2xml_v1.schema' tsv_reader = TSVReader('data/example_samples.tsv', 'data/T2D_xls2xml_v1.conf', 'Sample') assert utils.validate_file(tsv_reader, validation_schema) xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf') xls_reader.set_current_conf_key('Sample') assert utils.validate_file(xls_reader, validation_schema)
def test_next_row(): xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf') xls_reader.set_current_conf_key('Sample') row = xls_reader.next() assert isinstance(row, dict) assert 0 == cmp( row, { 'Hispanic or Latino; of Spanish origin': None, 'Phenotype': 'MeSH:D006262', 'row_num': 2, 'Description': 'Male normal', 'Center_name': 'WTGC cambridge', 'Case_Control': 'Control', 'T2D': 0L, 'Analysis_alias': 'AN001', 'Geno_ID': None, 'Year of first visit': None, 'Cell Type': 'Blood', 'Maternal_id': 'SAM111113', 'Gender': 'male', 'Subject_ID': 'SAM111111', 'Paternal_id': 'SAM111115', 'Cohort ID': 'CO1111', 'Novel Attributes': None, 'Ethnicity Description': None, 'Year of Birth': 1986L, 'Sample_ID': 'SAM111111', 'Age': 31L, 'Ethnicity': 'EUWH' })
def test_set_current_conf_key(): xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf') active_worksheet = xls_reader.active_worksheet assert active_worksheet is None xls_reader.set_current_conf_key('Sample') active_worksheet = xls_reader.active_worksheet assert active_worksheet == 'Sample' xls_reader.set_current_conf_key('Analysis') active_worksheet = xls_reader.active_worksheet assert active_worksheet == 'Analysis'
def test_get_current_headers(): xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf') xls_reader.set_current_conf_key('Sample') headers = xls_reader.get_current_headers() assert isinstance(headers, list) assert set(headers) == { u'Sample_ID', u'Subject_ID', u'Geno_ID', u'Phenotype', u'Gender', u'Analysis_alias', u'Cohort ID', u'Ethnicity', u'Ethnicity Description', u'T2D', u'Case_Control', u'Description', u'Center_name', u'Hispanic or Latino; of Spanish origin', u'Age', u'Year of Birth', u'Year of first visit', u'Cell Type', u'Maternal_id', u'Paternal_id', u'Novel Attributes' } xls_reader.set_current_conf_key('Analysis') headers = xls_reader.get_current_headers() assert isinstance(headers, list) assert set(headers) == { u'Analysis_name', u'Analysis_alias', u'Title', u'Description', u'Project_name', u'Experiment_type', u'Platform', u'Standard_refname or Sequence_accession', u'Imputation', u'Sequence_accession_label', u'External_link', u'Software', u'Pipeline Description', u'Run Accession(s)', u'Center_name', u'Analysis_date' } xls_reader.set_current_conf_key('ExceptionExpected') with pytest.raises(Exception): xls_reader.get_current_headers()
def test_extract_rows(): validation_schema = 'data/T2D_xls2xml_v1.schema' rows = [] tsv_reader = TSVReader('data/example_samples.tsv', 'data/T2D_xls2xml_v1.conf', 'Sample') assert utils.extract_rows(tsv_reader, 'Sample', validation_schema, rows) assert isinstance(rows, list) assert 6 == len(rows) tsv_reader = TSVReader('data/example_samples.tsv', 'data/T2D_xls2xml_v1.conf', 'Sample') for a, b in zip(rows, tsv_reader): assert 0 == cmp(a, b) rows = [] xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf') assert utils.extract_rows(xls_reader, 'Sample', validation_schema, rows) assert isinstance(rows, list) assert 6 == len(rows) xls_reader = XLSReader('data/example_AMP_T2D_Submission_form_V2.xlsx', 'data/T2D_xls2xml_v1.conf') xls_reader.set_current_conf_key('Sample') for a, b in zip(rows, xls_reader): assert 0 == cmp(a, b) rows = [] assert not utils.extract_rows(xls_reader, 'FalseExpected', validation_schema, rows)