def test_merge_mapping_correct_num_records(self):
     """merge_mapping_file: num recs returned is correct with varied input 
     """
     # number of lines is always 1 greater than number of samples, and number
     # of tab-separated fields on each line is equal
     actual = merge_mapping_files([self.m1])
     self.assertEqual(len(actual),3)
     self.assertEqual(len(set([len(line.split('\t')) for line in actual])),1)
     actual = merge_mapping_files([self.m2])
     self.assertEqual(len(actual),2)
     self.assertEqual(len(set([len(line.split('\t')) for line in actual])),1)
     actual = merge_mapping_files([self.m3])
     self.assertEqual(len(actual),4)
     self.assertEqual(len(set([len(line.split('\t')) for line in actual])),1)
     
     actual = merge_mapping_files([self.m1,self.m2])
     self.assertEqual(len(actual),4)
     self.assertEqual(len(set([len(line.split('\t')) for line in actual])),1)
     actual = merge_mapping_files([self.m1,self.m3])
     self.assertEqual(len(actual),6)
     self.assertEqual(len(set([len(line.split('\t')) for line in actual])),1)
     actual = merge_mapping_files([self.m2,self.m3])
     self.assertEqual(len(actual),5)
     self.assertEqual(len(set([len(line.split('\t')) for line in actual])),1)
     
     actual = merge_mapping_files([self.m1,self.m2,self.m3])
     self.assertEqual(len(actual),7)
     self.assertEqual(len(set([len(line.split('\t')) for line in actual])),1)
     
     actual = merge_mapping_files([self.m3,self.m3,self.m3,self.m3])
     self.assertEqual(len(actual),4)
     self.assertEqual(len(set([len(line.split('\t')) for line in actual])),1)
def write_full_mapping_file(study, study_input_dir,zip_fname,files_to_remove,
                            output_dir):
    """ Generate a merged mapping file """
    
    # write per_run mapping files and then return the prefixes
    run_prefixes=write_db_mapping_files(study,True,study_input_dir,False)
    
    # define the full mapping file
    mapping_fname='study_%s_mapping_file.txt' % (str(study))
    final_mapping_fp=join(output_dir,mapping_fname)
    # add to list of files to remove
    files_to_remove.append(final_mapping_fp)
    
    # iterate over run_prefixes and create a list of files that need to be
    # merged
    mapping_files_to_merge=[]
    for i in run_prefixes:
        mapping_fp=join(study_input_dir,'study_%s_run_%s_mapping.txt' % \
                                        (str(study),str(i)))
        # add to list of files to remove
        files_to_remove.append(mapping_fp)
        mapping_files_to_merge.append(open(mapping_fp,'U'))
    
    # merge the mapping files and return tabular data
    mapping_data = merge_mapping_files(mapping_files_to_merge,\
                                       no_data_value='no_data')
    # write the full mapping file
    write_mapping_file(mapping_data,final_mapping_fp)
    
    # zip the full mapping file
    #cmd_call='cd %s; tar czvf %s %s' % (study_input_dir,zip_fname,mapping_fname)
    #system(cmd_call)
    
    return files_to_remove
 def test_merge_mapping_file_good_duplicates(self):
     """merge_mapping_file: same sample ids merged correctly when they have mergable data 
     """
     actual = merge_mapping_files([self.m1,self.m1_dup_good])
     # 2 total entries
     self.assertEqual(len(actual),3)
     # all fields are the same length
     self.assertEqual(len(set([len(line.split('\t')) for line in actual])),1)
     # 7 fields
     self.assertEqual(len(actual[0].split('\t')),7)
Exemple #4
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
      
    verbose = opts.verbose
    output_fp = opts.output_fp
    mapping_files = [open(fp,'U') for fp in opts.mapping_fps]
    no_data_value = opts.no_data_value
    
    mapping_data = merge_mapping_files(mapping_files,\
                                       no_data_value=no_data_value)
    write_mapping_file(mapping_data,output_fp)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    verbose = opts.verbose
    output_fp = opts.output_fp
    mapping_files = [open(fp, 'U') for fp in opts.mapping_fps]
    no_data_value = opts.no_data_value

    mapping_data = merge_mapping_files(mapping_files,
                                       no_data_value=no_data_value)
    write_mapping_file(mapping_data, output_fp)
Exemple #6
0
 def test_merge_mapping_file_good_duplicates(self):
     """merge_mapping_file: same sample ids merged correctly when they have mergable data 
     """
     actual = merge_mapping_files([self.m1, self.m1_dup_good])
     # 2 total entries
     self.assertEqual(len(actual), 3)
     # all fields are the same length
     self.assertEqual(len(set([len(line.split('\t')) for line in actual])),
                      1)
     # 7 fields
     self.assertEqual(len(actual[0].split('\t')), 7)
    def test_merge_mapping_file(self):
        """merge_mapping_file: functions with default parameters """
        actual = merge_mapping_files([self.m1, self.m2, self.m3])
        expected = self.m1_m2_m3_exp

        self.assertTrue(actual[0].startswith('#SampleID\tBarcodeSequence'))
        self.assertTrue(actual[0].endswith('Description'))

        actual.sort()
        expected.sort()

        for a, e in zip(actual, expected):
            a_fields = a.split('\t')
            e_fields = e.split('\t')
            a_fields.sort()
            e_fields.sort()
            self.assertEqual(a_fields, e_fields)
 def test_merge_mapping_file(self):
     """merge_mapping_file: functions with default parameters """
     actual = merge_mapping_files([self.m1,self.m2,self.m3])
     expected = self.m1_m2_m3_exp
     
     self.assertTrue(actual[0].startswith('#SampleID\tBarcodeSequence'))
     self.assertTrue(actual[0].endswith('Description'))
     
     actual.sort()
     expected.sort()
     
     for a,e in zip(actual,expected):
         a_fields = a.split('\t')
         e_fields = e.split('\t')
         a_fields.sort()
         e_fields.sort()
         self.assertEqual(a_fields,e_fields)
Exemple #9
0
    def test_merge_mapping_correct_num_records(self):
        """merge_mapping_file: num recs returned is correct with varied input 
        """
        # number of lines is always 1 greater than number of samples, and number
        # of tab-separated fields on each line is equal
        actual = merge_mapping_files([self.m1])
        self.assertEqual(len(actual), 3)
        self.assertEqual(len(set([len(line.split('\t')) for line in actual])),
                         1)
        actual = merge_mapping_files([self.m2])
        self.assertEqual(len(actual), 2)
        self.assertEqual(len(set([len(line.split('\t')) for line in actual])),
                         1)
        actual = merge_mapping_files([self.m3])
        self.assertEqual(len(actual), 4)
        self.assertEqual(len(set([len(line.split('\t')) for line in actual])),
                         1)

        actual = merge_mapping_files([self.m1, self.m2])
        self.assertEqual(len(actual), 4)
        self.assertEqual(len(set([len(line.split('\t')) for line in actual])),
                         1)
        actual = merge_mapping_files([self.m1, self.m3])
        self.assertEqual(len(actual), 6)
        self.assertEqual(len(set([len(line.split('\t')) for line in actual])),
                         1)
        actual = merge_mapping_files([self.m2, self.m3])
        self.assertEqual(len(actual), 5)
        self.assertEqual(len(set([len(line.split('\t')) for line in actual])),
                         1)

        actual = merge_mapping_files([self.m1, self.m2, self.m3])
        self.assertEqual(len(actual), 7)
        self.assertEqual(len(set([len(line.split('\t')) for line in actual])),
                         1)

        actual = merge_mapping_files([self.m3, self.m3, self.m3, self.m3])
        self.assertEqual(len(actual), 4)
        self.assertEqual(len(set([len(line.split('\t')) for line in actual])),
                         1)
    def test_merge_mapping_correct_num_records(self):
        """merge_mapping_file: num recs returned is correct with varied input 
        """
        # length is always number of samples plus 1 (for header line)
        actual = merge_mapping_files([self.m1])
        self.assertEqual(len(actual), 3)
        actual = merge_mapping_files([self.m2])
        self.assertEqual(len(actual), 2)
        actual = merge_mapping_files([self.m3])
        self.assertEqual(len(actual), 4)

        actual = merge_mapping_files([self.m1, self.m2])
        self.assertEqual(len(actual), 4)
        actual = merge_mapping_files([self.m1, self.m3])
        self.assertEqual(len(actual), 6)
        actual = merge_mapping_files([self.m2, self.m3])
        self.assertEqual(len(actual), 5)

        actual = merge_mapping_files([self.m1, self.m2, self.m3])
        self.assertEqual(len(actual), 7)

        actual = merge_mapping_files([self.m3, self.m3, self.m3, self.m3])
        self.assertEqual(len(actual), 13)
 def test_merge_mapping_correct_num_records(self):
     """merge_mapping_file: num recs returned is correct with varied input 
     """
     # length is always number of samples plus 1 (for header line)
     actual = merge_mapping_files([self.m1])
     self.assertEqual(len(actual),3)
     actual = merge_mapping_files([self.m2])
     self.assertEqual(len(actual),2)
     actual = merge_mapping_files([self.m3])
     self.assertEqual(len(actual),4)
     
     actual = merge_mapping_files([self.m1,self.m2])
     self.assertEqual(len(actual),4)
     actual = merge_mapping_files([self.m1,self.m3])
     self.assertEqual(len(actual),6)
     actual = merge_mapping_files([self.m2,self.m3])
     self.assertEqual(len(actual),5)
     
     actual = merge_mapping_files([self.m1,self.m2,self.m3])
     self.assertEqual(len(actual),7)
     
     actual = merge_mapping_files([self.m3,self.m3,self.m3,self.m3])
     self.assertEqual(len(actual),13)
 def test_merge_mapping_file_dups(self):
     """merge_mapping_file: duplicate sample ids stay duplicate """
     actual = merge_mapping_files([self.m1, self.m2, self.m1])
     # length is number of samples plus header line
     self.assertEqual(len(actual), 6)
 def test_merge_mapping_file_dups(self):
     """merge_mapping_file: duplicate sample ids stay duplicate """
     actual = merge_mapping_files([self.m1,self.m2,self.m1])
     # length is number of samples plus header line
     self.assertEqual(len(actual),6)