def __init__(self, metadata_file): conf = os.path.join(ROOT_DIR, 'etc', 'eva_project_conf.yaml') self.reader = XlsxReader(metadata_file, conf) self.metadata_file=metadata_file
def setUp(self): self.xls_reader = XlsxReader(self.metadata_file, self.eva_xls_reader_conf)
class EvaXlsxReader(AppLogger): def __init__(self, metadata_file): conf = os.path.join(ROOT_DIR, 'etc', 'eva_project_conf.yaml') self.reader = XlsxReader(metadata_file, conf) self.metadata_file=metadata_file def _get_all_rows(self, active_sheet): self.reader.active_worksheet = active_sheet rows = [] try: r = self.reader.next() while r: rows.append(r) r = self.reader.next() rows.append(r) except StopIteration: pass return rows @cached_property def project(self): self.reader.active_worksheet = 'Project' try: return self.reader.next() except StopIteration: self.error('No project was found in the spreadsheet %s', self.metadata_file) @cached_property def submitters(self): return self._get_all_rows('Submitter Details') @cached_property def analysis(self): return self._get_all_rows('Analysis') @cached_property def samples(self): return self._get_all_rows('Sample') @cached_property def files(self): return self._get_all_rows('Files') @cached_property def project_title(self): if self.project: return self.project.get('Project Title') @property def analysis_titles(self): return [a.get('Analysis Title') for a in self.analysis] @property def references(self): return list(set([a.get('Reference') for a in self.analysis if a.get('Reference')])) @property def samples_per_analysis(self): samples_per_analysis = defaultdict(list) for row in self.samples: samples_per_analysis[row.get('Analysis Alias')].append(row) return samples_per_analysis @property def files_per_analysis(self): files_per_analysis = defaultdict(list) for row in self.files: files_per_analysis[row.get('Analysis Alias')].append(row) return files_per_analysis
class TestXlsxReader(TestCase): metadata_file = os.path.join(os.path.dirname(__file__), 'resources', 'metadata.xlsx') eva_xls_reader_conf = os.path.join(os.path.dirname(__file__), 'resources', 'test_metadata_fields.yaml') def setUp(self): self.xls_reader = XlsxReader(self.metadata_file, self.eva_xls_reader_conf) def test_valid_worksheets(self): worksheets = self.xls_reader.valid_worksheets() assert isinstance(worksheets, list) assert set(worksheets) == {'Project', 'Sample', 'Analysis'} def test_get_valid_conf_keys(self): worksheets = self.xls_reader.valid_worksheets() assert isinstance(worksheets, list) assert set(worksheets) == {'Project', 'Sample', 'Analysis'} def test_next_row(self): self.xls_reader.active_worksheet = 'Sample' row = self.xls_reader.next() assert isinstance(row, dict) assert row == { 'Analysis Alias': 'GAE', 'Sample Accession': None, 'Sample ID': None, 'Sample Name': 'S1', 'Sampleset Accession': None, 'Title': 'Sample 1', 'row_num': 4 } self.xls_reader.active_worksheet = 'Project' row = self.xls_reader.next() assert isinstance(row, dict) assert row == { 'Project Title': 'Greatest project ever', 'Project Alias': 'GPE', 'Publication(s)': None, 'Parent Project(s)': None, 'Child Project(s)': None, 'row_num': 2 } def test_get_rows(self): self.xls_reader.active_worksheet = 'Sample' rows = self.xls_reader.get_rows() assert isinstance(rows, list) assert len(rows) == 100 assert rows[0] == { 'Analysis Alias': 'GAE', 'Sample Accession': None, 'Sample ID': None, 'Sample Name': 'S1', 'Sampleset Accession': None, 'Title': 'Sample 1', 'row_num': 4 }