def produce(self, unknown_args=None): opts = self.namespace name = opts.name or self.name if opts.files: # load all files sample_collections = [] if callable(opts.header): opts.header = [opts.header(f) for f in opts.files] for i, file_obj in enumerate(opts.files): use_header = isinstance(opts.header[i], int) sample_collections.append( SampleCollection.from_file( f'Sample collection, part {i} of {name}', file_obj, columns_selector=opts.columns[i].get_iterator if opts.columns else None, samples=opts.samples[i] if opts.samples else None, reverse_selection=getattr(opts, 'reverse', False), delimiter=opts.delimiter, header_line=opts.header[i] if use_header else None, use_header=use_header, prefix=opts.header[i] if not use_header else None, description_column=opts.description_column)) opts.sample_collection = sum(sample_collections, SampleCollection(name)) return opts
def minimal_data(): tp53 = Gene('TP53') map2k1 = Gene('MAP2K1') case = SampleCollection('case', [Sample('1', {tp53: 2, map2k1: 1})]) control = SampleCollection('control', [Sample('1', {tp53: 1, map2k1: 1})]) return tp53, map2k1, case, control
def test_ttest(): data1 = {'BAD': 1.2345, 'FUCA2': 6.5432} data2 = {'BAD': 2.3456, 'FUCA2': 7.6543} data3 = {'BAD': 6.3456, 'FUCA2': 11.6543} data4 = {'BAD': 7.1111, 'FUCA2': 9.9711} tumour_samples = [Sample.from_names('Tumour_1', data1), Sample.from_names('Tumour_2', data2)] normal_samples = [Sample.from_names('Normal_1', data3), Sample.from_names('Normal_2', data4)] tumour = SampleCollection('Tumour', tumour_samples) normal = SampleCollection('Normal', normal_samples) experiment = Experiment(case=tumour, control=normal) tt = ttest(experiment) assert isinstance(tt, pd.Series) assert all(gene in list(tt.keys()) for gene in experiment.get_all().genes)
def test_from_csv(): with temp_text_file(csv_contents) as csv_file: collection = SampleCollection.from_csv_file('all_samples.csv', csv_file) assert len(collection.samples) == 4 with temp_text_file(csv_contents) as csv_file: with warns( UserWarning, match= 'You are using not comma delimiter for what looks like csv file.' ): SampleCollection.from_csv_file('all_samples.csv', csv_file, delimiter='\t')
def test_init(): genes1 = {Gene('BAD'): 1.2345, Gene('FUCA2'): 6.5432} genes2 = {Gene('BAD'): 2.3456, Gene('FUCA2'): 7.6543} samples = [Sample('Tumour_1', genes1), Sample('Tumour_2', genes2)] sample_collection = SampleCollection('Tumour', samples) assert sample_collection.name == 'Tumour' assert all(isinstance(k, Sample) for k in sample_collection.samples)
def test_get_all(): data1 = {'BAD': 1.2345, 'FUCA2': 6.5432} data2 = {'BAD': 2.3456, 'FUCA2': 7.6543} data3 = {'BAD': 3.4567} tumour_samples = [ Sample.from_names('Tumour_1', data1), Sample.from_names('Tumour_2', data2) ] normal_samples = [Sample.from_names('Normal_1', data3)] tumour = SampleCollection('Tumour', tumour_samples) normal = SampleCollection('Normal', normal_samples) experiment_samples = Experiment(case=tumour, control=normal).get_all() assert isinstance(experiment_samples, SampleCollection) assert all(label in experiment_samples.labels for label in tumour.labels + normal.labels)
def test_init(): data1 = {'BAD': 1.2345, 'FUCA2': 6.5432} data2 = {'BAD': 2.3456, 'FUCA2': 7.6543} data3 = {'BAD': 3.4567} tumour_samples = [ Sample.from_names('Tumour_1', data1), Sample.from_names('Tumour_2', data2) ] normal_samples = [Sample.from_names('Normal_1', data3)] tumour = SampleCollection('Tumour', tumour_samples) normal = SampleCollection('Normal', normal_samples) experiment = Experiment(case=tumour, control=normal) assert isinstance(experiment.case, SampleCollection) assert isinstance(experiment.control, SampleCollection) assert experiment.case == tumour assert experiment.control == normal
def test_from_gct(): with temp_text_file(gct_contents) as gct_file: expected_warning = 'Samples count \(4\) does not match with the 3 declared in all_samples.gct file.' with warns(UserWarning, match=expected_warning): collection = SampleCollection.from_gct_file( 'all_samples.gct', gct_file) assert len(collection.samples) == 4 assert collection.labels == ['NORM-1', 'GBM-1', 'GBM-2', 'OV-1'] # replace version definition lines = gct_contents.split('\n') lines[0] = '#1.1' old_content = '\n'.join(lines) with temp_text_file(old_content) as old_gct_file: with warns(UserWarning, match='Unsupported version of GCT file'): SampleCollection.from_gct_file('Outdated file', old_gct_file)
def shuffle_and_divide(merged_collection, midpoint): shuffled = copy(merged_collection.samples) shuffle(shuffled) return (SampleCollection('Random collection', shuffled[:midpoint]), SampleCollection('Random collection', shuffled[midpoint:]))