Esempio n. 1
0
 def test_append_column(self):
     """Append new column to a Tabfile
     """
     tabfile = TabFile('test',self.fp,first_line_is_header=True)
     self.assertEqual(len(tabfile.header()),4)
     tabfile.appendColumn('new')
     self.assertEqual(len(tabfile.header()),5)
     self.assertEqual(tabfile.header()[4],'new')
     self.assertEqual(tabfile[0]['new'],'')
Esempio n. 2
0
 def test_compute_midpoint(self):
     """Compute the midpoint of the start and end columns
     """
     tabfile = TabFile('test',self.fp,first_line_is_header=True)
     # Check number of columns and header items
     self.assertEqual(tabfile.nColumns(),4)
     self.assertEqual(tabfile.header(),['chr','start','end','data'])
     # Compute midpoint of start and end
     tabfile.computeColumn('midpoint',lambda line: (line['end'] + line['start'])/2.0)
     self.assertEqual(tabfile.nColumns(),5)
     self.assertEqual(tabfile.header(),['chr','start','end','data','midpoint'])
     results = [117.5,728.5,3456]
     for i in range(len(tabfile)):
         self.assertEqual(tabfile[i]['midpoint'],results[i])
Esempio n. 3
0
 def test_compute_and_overwrite_existing_column_integer_index(self):
     """Compute new values for an existing column referenced using integer index
     """
     tabfile = TabFile('test',self.fp,first_line_is_header=True)
     # Check number of columns and header items
     self.assertEqual(tabfile.nColumns(),4)
     self.assertEqual(tabfile.header(),['chr','start','end','data'])
     # Compute new values for data column
     tabfile.computeColumn(3,lambda line: line['end'] - line['start'])
     self.assertEqual(tabfile.nColumns(),4)
     self.assertEqual(tabfile.header(),['chr','start','end','data'])
     results = [233,323,4444]
     for i in range(len(tabfile)):
         self.assertEqual(tabfile[i]['data'],results[i])
Esempio n. 4
0
 def test_reorder_columns(self):
     """Reorder columns in a TabFile
     """
     tabfile = TabFile('test',self.fp,first_line_is_header=True)
     # Check number of columns and header items
     self.assertEqual(tabfile.nColumns(),4)
     self.assertEqual(tabfile.header(),['chr','start','end','data'])
     # Reorder
     new_columns = ['chr','data','start','end']
     tabfile = tabfile.reorderColumns(new_columns)
     self.assertEqual(tabfile.nColumns(),4)
     self.assertEqual(tabfile.header(),new_columns)
     self.assertEqual(str(tabfile[0]),"chr1\t4.6\t1\t234")
     self.assertEqual(str(tabfile[1]),"chr1\t5.7\t567\t890")
     self.assertEqual(str(tabfile[2]),"chr2\t6.8\t1234\t5678")
Esempio n. 5
0
 def test_set_column_to_constant_value(self):
     """Set a column to a constant value using transformColumn
     """
     tabfile = TabFile('test',self.fp,first_line_is_header=True)
     # Check number of columns and header items
     self.assertEqual(tabfile.nColumns(),4)
     self.assertEqual(tabfile.header(),['chr','start','end','data'])
     # Add a strand column
     tabfile.appendColumn('strand')
     self.assertEqual(tabfile.nColumns(),5)
     self.assertEqual(tabfile.header(),['chr','start','end','data','strand'])
     # Set all values to '+'
     tabfile.transformColumn('strand',lambda x: '+')
     for line in tabfile:
         self.assertEqual(line['strand'],'+')
Esempio n. 6
0
 def test_load_data_with_header(self):
     """Create and load Tabfile using first line as header
     """
     tabfile = TabFile('test',self.fp,first_line_is_header=True)
     self.assertEqual(len(tabfile),3,"Input has 3 lines of data")
     self.assertEqual(tabfile.header(),['chr','start','end','data'],"Wrong header")
     self.assertEqual(str(tabfile[0]),"chr1\t1\t234\t4.6","Incorrect string representation")
     self.assertEqual(tabfile[2]['chr'],'chr2',"Incorrect data")
     self.assertEqual(tabfile.nColumns(),4)
Esempio n. 7
0
 def test_unexpected_uncommented_header(self):
     """Test reading in a tab file with an unexpected uncommented header
     """
     tabfile = TabFile('test',self.fp)
     self.assertEqual(len(tabfile),4,"Input has 4 lines of data")
     self.assertEqual(tabfile.header(),[],"Wrong header")
     self.assertEqual(str(tabfile[0]),"chr\tstart\tend\tdata","Incorrect string representation")
     self.assertRaises(KeyError,tabfile[3].__getitem__,'chr')
     self.assertEqual(tabfile.nColumns(),4)
Esempio n. 8
0
 def test_expected_uncommented_header(self):
     """Test reading in a tab file with an expected uncommented header
     """
     tabfile = TabFile('test',self.fp,first_line_is_header=True)
     self.assertEqual(len(tabfile),3,"Input has 3 lines of data")
     self.assertEqual(tabfile.header(),['chr','start','end','data'],"Wrong header")
     self.assertEqual(str(tabfile[0]),"chr1\t1\t234\t4.6","Incorrect string representation")
     self.assertEqual(tabfile[2]['chr'],'chr2',"Incorrect data")
     self.assertEqual(tabfile.nColumns(),4)
Esempio n. 9
0
 def test_reorder_columns_empty_cells(self):
     """Reorder columns where some lines have empty cells at the start
     """
     tabfile = TabFile('test',self.fp,first_line_is_header=True)
     # Check number of columns and header items
     self.assertEqual(tabfile.nColumns(),4)
     self.assertEqual(tabfile.header(),['chr','start','end','data'])
     # Reset some cells to empty
     tabfile[0]['chr'] = ''
     tabfile[2]['chr'] = ''
     # Reorder
     new_columns = ['chr','data','start','end']
     tabfile = tabfile.reorderColumns(new_columns)
     self.assertEqual(tabfile.nColumns(),4)
     self.assertEqual(tabfile.header(),new_columns)
     self.assertEqual(str(tabfile[0]),"\t4.6\t1\t234")
     self.assertEqual(str(tabfile[1]),"chr1\t5.7\t567\t890")
     self.assertEqual(str(tabfile[2]),"\t6.8\t1234\t5678")
Esempio n. 10
0
 def test_load_data(self):
     """Create and load new TabFile instance
     """
     tabfile = TabFile('test',self.fp,delimiter=',')
     self.assertEqual(len(tabfile),3,"Input has 3 lines of data")
     self.assertEqual(tabfile.header(),[],"Header should be empty")
     self.assertEqual(str(tabfile[0]),"chr1,1,234,4.6","Incorrect string representation")
     self.assertEqual(tabfile[2][0],'chr2',"Incorrect data")
     self.assertEqual(tabfile.nColumns(),4)
     self.assertEqual(tabfile.filename(),'test')
Esempio n. 11
0
 def test_load_data_setting_explicit_header(self):
     """Create and load TabFile setting the header explicitly
     """
     tabfile = TabFile('test',self.fp,first_line_is_header=True,
                       column_names=('CHROM','START','STOP','VALUES'))
     self.assertEqual(len(tabfile),3,"Input has 3 lines of data")
     self.assertEqual(tabfile.header(),['CHROM','START','STOP','VALUES'],"Wrong header")
     self.assertEqual(str(tabfile[0]),"chr1\t1\t234\t4.6","Incorrect string representation")
     self.assertEqual(tabfile[2]['CHROM'],'chr2',"Incorrect data")
     self.assertEqual(tabfile.nColumns(),4)
Esempio n. 12
0
 def test_expected_uncommented_header(self):
     """Test reading in a tab file with an expected uncommented header
     """
     tabfile = TabFile('test', self.fp, first_line_is_header=True)
     self.assertEqual(len(tabfile), 3, "Input has 3 lines of data")
     self.assertEqual(tabfile.header(), ['chr', 'start', 'end', 'data'],
                      "Wrong header")
     self.assertEqual(str(tabfile[0]), "chr1\t1\t234\t4.6",
                      "Incorrect string representation")
     self.assertEqual(tabfile[2]['chr'], 'chr2', "Incorrect data")
     self.assertEqual(tabfile.nColumns(), 4)
Esempio n. 13
0
 def test_apply_operation_to_column(self):
     """Divide values in a column by 10
     """
     tabfile = TabFile('test',self.fp,first_line_is_header=True)
     # Check number of columns and header items
     self.assertEqual(tabfile.nColumns(),4)
     self.assertEqual(tabfile.header(),['chr','start','end','data'])
     # Divide data column by 10
     tabfile.transformColumn('data',lambda x: x/10)
     results = [0.46,0.57,0.68]
     for i in range(len(tabfile)):
         self.assertEqual(tabfile[i]['data'],results[i])
Esempio n. 14
0
 def test_apply_operation_to_column(self):
     """Divide values in a column by 10
     """
     tabfile = TabFile('test', self.fp, first_line_is_header=True)
     # Check number of columns and header items
     self.assertEqual(tabfile.nColumns(), 4)
     self.assertEqual(tabfile.header(), ['chr', 'start', 'end', 'data'])
     # Divide data column by 10
     tabfile.transformColumn('data', lambda x: x / 10)
     results = [0.46, 0.57, 0.68]
     for i in range(len(tabfile)):
         self.assertEqual(tabfile[i]['data'], results[i])
Esempio n. 15
0
 def test_load_data_with_header(self):
     """Create and load Tabfile using first line as header
     """
     tabfile = TabFile('test',
                       self.fp,
                       first_line_is_header=True,
                       delimiter=',')
     self.assertEqual(len(tabfile), 3, "Input has 3 lines of data")
     self.assertEqual(tabfile.header(), ['chr', 'start', 'end', 'data'],
                      "Wrong header")
     self.assertEqual(str(tabfile[0]), "chr1,1,234,4.6",
                      "Incorrect string representation")
     self.assertEqual(tabfile[2]['chr'], 'chr2', "Incorrect data")
     self.assertEqual(tabfile.nColumns(), 4)
Esempio n. 16
0
 def test_load_data_setting_explicit_header(self):
     """Create and load TabFile setting the header explicitly
     """
     tabfile = TabFile('test',
                       self.fp,
                       first_line_is_header=True,
                       column_names=('CHROM', 'START', 'STOP', 'VALUES'))
     self.assertEqual(len(tabfile), 3, "Input has 3 lines of data")
     self.assertEqual(tabfile.header(),
                      ['CHROM', 'START', 'STOP', 'VALUES'], "Wrong header")
     self.assertEqual(str(tabfile[0]), "chr1\t1\t234\t4.6",
                      "Incorrect string representation")
     self.assertEqual(tabfile[2]['CHROM'], 'chr2', "Incorrect data")
     self.assertEqual(tabfile.nColumns(), 4)
Esempio n. 17
0
 def test_apply_operation_to_column(self):
     """Divide values in a column by 10
     """
     tabfile = TabFile('test', self.fp, first_line_is_header=True)
     # Check number of columns and header items
     self.assertEqual(tabfile.nColumns(), 4)
     self.assertEqual(tabfile.header(), ['chr', 'start', 'end', 'data'])
     # Divide data column by 10
     tabfile.transformColumn('data', lambda x: x / 10.0)
     results = [0.46, 0.57, 0.68]
     for i in range(len(tabfile)):
         # When checking the transformed column, coerce
         # the values to two decimal places to avoid tests
         # failing because of rounding errors (e.g.
         # 0.45999999999999996 != 0.46)
         self.assertEqual(float("%.2f" % tabfile[i]['data']), results[i])