def test_merge_mapping_correct_num_records(self): """merge_mapping_file: num recs returned is correct with varied input """ # number of lines is always 1 greater than number of samples, and number # of tab-separated fields on each line is equal actual = merge_mapping_files([self.m1]) self.assertEqual(len(actual),3) self.assertEqual(len(set([len(line.split('\t')) for line in actual])),1) actual = merge_mapping_files([self.m2]) self.assertEqual(len(actual),2) self.assertEqual(len(set([len(line.split('\t')) for line in actual])),1) actual = merge_mapping_files([self.m3]) self.assertEqual(len(actual),4) self.assertEqual(len(set([len(line.split('\t')) for line in actual])),1) actual = merge_mapping_files([self.m1,self.m2]) self.assertEqual(len(actual),4) self.assertEqual(len(set([len(line.split('\t')) for line in actual])),1) actual = merge_mapping_files([self.m1,self.m3]) self.assertEqual(len(actual),6) self.assertEqual(len(set([len(line.split('\t')) for line in actual])),1) actual = merge_mapping_files([self.m2,self.m3]) self.assertEqual(len(actual),5) self.assertEqual(len(set([len(line.split('\t')) for line in actual])),1) actual = merge_mapping_files([self.m1,self.m2,self.m3]) self.assertEqual(len(actual),7) self.assertEqual(len(set([len(line.split('\t')) for line in actual])),1) actual = merge_mapping_files([self.m3,self.m3,self.m3,self.m3]) self.assertEqual(len(actual),4) self.assertEqual(len(set([len(line.split('\t')) for line in actual])),1)
def write_full_mapping_file(study, study_input_dir,zip_fname,files_to_remove, output_dir): """ Generate a merged mapping file """ # write per_run mapping files and then return the prefixes run_prefixes=write_db_mapping_files(study,True,study_input_dir,False) # define the full mapping file mapping_fname='study_%s_mapping_file.txt' % (str(study)) final_mapping_fp=join(output_dir,mapping_fname) # add to list of files to remove files_to_remove.append(final_mapping_fp) # iterate over run_prefixes and create a list of files that need to be # merged mapping_files_to_merge=[] for i in run_prefixes: mapping_fp=join(study_input_dir,'study_%s_run_%s_mapping.txt' % \ (str(study),str(i))) # add to list of files to remove files_to_remove.append(mapping_fp) mapping_files_to_merge.append(open(mapping_fp,'U')) # merge the mapping files and return tabular data mapping_data = merge_mapping_files(mapping_files_to_merge,\ no_data_value='no_data') # write the full mapping file write_mapping_file(mapping_data,final_mapping_fp) # zip the full mapping file #cmd_call='cd %s; tar czvf %s %s' % (study_input_dir,zip_fname,mapping_fname) #system(cmd_call) return files_to_remove
def test_merge_mapping_file_good_duplicates(self): """merge_mapping_file: same sample ids merged correctly when they have mergable data """ actual = merge_mapping_files([self.m1,self.m1_dup_good]) # 2 total entries self.assertEqual(len(actual),3) # all fields are the same length self.assertEqual(len(set([len(line.split('\t')) for line in actual])),1) # 7 fields self.assertEqual(len(actual[0].split('\t')),7)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose output_fp = opts.output_fp mapping_files = [open(fp,'U') for fp in opts.mapping_fps] no_data_value = opts.no_data_value mapping_data = merge_mapping_files(mapping_files,\ no_data_value=no_data_value) write_mapping_file(mapping_data,output_fp)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose output_fp = opts.output_fp mapping_files = [open(fp, 'U') for fp in opts.mapping_fps] no_data_value = opts.no_data_value mapping_data = merge_mapping_files(mapping_files, no_data_value=no_data_value) write_mapping_file(mapping_data, output_fp)
def test_merge_mapping_file_good_duplicates(self): """merge_mapping_file: same sample ids merged correctly when they have mergable data """ actual = merge_mapping_files([self.m1, self.m1_dup_good]) # 2 total entries self.assertEqual(len(actual), 3) # all fields are the same length self.assertEqual(len(set([len(line.split('\t')) for line in actual])), 1) # 7 fields self.assertEqual(len(actual[0].split('\t')), 7)
def test_merge_mapping_file(self): """merge_mapping_file: functions with default parameters """ actual = merge_mapping_files([self.m1, self.m2, self.m3]) expected = self.m1_m2_m3_exp self.assertTrue(actual[0].startswith('#SampleID\tBarcodeSequence')) self.assertTrue(actual[0].endswith('Description')) actual.sort() expected.sort() for a, e in zip(actual, expected): a_fields = a.split('\t') e_fields = e.split('\t') a_fields.sort() e_fields.sort() self.assertEqual(a_fields, e_fields)
def test_merge_mapping_file(self): """merge_mapping_file: functions with default parameters """ actual = merge_mapping_files([self.m1,self.m2,self.m3]) expected = self.m1_m2_m3_exp self.assertTrue(actual[0].startswith('#SampleID\tBarcodeSequence')) self.assertTrue(actual[0].endswith('Description')) actual.sort() expected.sort() for a,e in zip(actual,expected): a_fields = a.split('\t') e_fields = e.split('\t') a_fields.sort() e_fields.sort() self.assertEqual(a_fields,e_fields)
def test_merge_mapping_correct_num_records(self): """merge_mapping_file: num recs returned is correct with varied input """ # number of lines is always 1 greater than number of samples, and number # of tab-separated fields on each line is equal actual = merge_mapping_files([self.m1]) self.assertEqual(len(actual), 3) self.assertEqual(len(set([len(line.split('\t')) for line in actual])), 1) actual = merge_mapping_files([self.m2]) self.assertEqual(len(actual), 2) self.assertEqual(len(set([len(line.split('\t')) for line in actual])), 1) actual = merge_mapping_files([self.m3]) self.assertEqual(len(actual), 4) self.assertEqual(len(set([len(line.split('\t')) for line in actual])), 1) actual = merge_mapping_files([self.m1, self.m2]) self.assertEqual(len(actual), 4) self.assertEqual(len(set([len(line.split('\t')) for line in actual])), 1) actual = merge_mapping_files([self.m1, self.m3]) self.assertEqual(len(actual), 6) self.assertEqual(len(set([len(line.split('\t')) for line in actual])), 1) actual = merge_mapping_files([self.m2, self.m3]) self.assertEqual(len(actual), 5) self.assertEqual(len(set([len(line.split('\t')) for line in actual])), 1) actual = merge_mapping_files([self.m1, self.m2, self.m3]) self.assertEqual(len(actual), 7) self.assertEqual(len(set([len(line.split('\t')) for line in actual])), 1) actual = merge_mapping_files([self.m3, self.m3, self.m3, self.m3]) self.assertEqual(len(actual), 4) self.assertEqual(len(set([len(line.split('\t')) for line in actual])), 1)
def test_merge_mapping_correct_num_records(self): """merge_mapping_file: num recs returned is correct with varied input """ # length is always number of samples plus 1 (for header line) actual = merge_mapping_files([self.m1]) self.assertEqual(len(actual), 3) actual = merge_mapping_files([self.m2]) self.assertEqual(len(actual), 2) actual = merge_mapping_files([self.m3]) self.assertEqual(len(actual), 4) actual = merge_mapping_files([self.m1, self.m2]) self.assertEqual(len(actual), 4) actual = merge_mapping_files([self.m1, self.m3]) self.assertEqual(len(actual), 6) actual = merge_mapping_files([self.m2, self.m3]) self.assertEqual(len(actual), 5) actual = merge_mapping_files([self.m1, self.m2, self.m3]) self.assertEqual(len(actual), 7) actual = merge_mapping_files([self.m3, self.m3, self.m3, self.m3]) self.assertEqual(len(actual), 13)
def test_merge_mapping_correct_num_records(self): """merge_mapping_file: num recs returned is correct with varied input """ # length is always number of samples plus 1 (for header line) actual = merge_mapping_files([self.m1]) self.assertEqual(len(actual),3) actual = merge_mapping_files([self.m2]) self.assertEqual(len(actual),2) actual = merge_mapping_files([self.m3]) self.assertEqual(len(actual),4) actual = merge_mapping_files([self.m1,self.m2]) self.assertEqual(len(actual),4) actual = merge_mapping_files([self.m1,self.m3]) self.assertEqual(len(actual),6) actual = merge_mapping_files([self.m2,self.m3]) self.assertEqual(len(actual),5) actual = merge_mapping_files([self.m1,self.m2,self.m3]) self.assertEqual(len(actual),7) actual = merge_mapping_files([self.m3,self.m3,self.m3,self.m3]) self.assertEqual(len(actual),13)
def test_merge_mapping_file_dups(self): """merge_mapping_file: duplicate sample ids stay duplicate """ actual = merge_mapping_files([self.m1, self.m2, self.m1]) # length is number of samples plus header line self.assertEqual(len(actual), 6)
def test_merge_mapping_file_dups(self): """merge_mapping_file: duplicate sample ids stay duplicate """ actual = merge_mapping_files([self.m1,self.m2,self.m1]) # length is number of samples plus header line self.assertEqual(len(actual),6)