def test_transpose_tab_file(self): """Test transposing TabFile """ tabfile1 = TabFile('test', self.fp, first_line_is_header=False) tabfile2 = tabfile1.transpose() self.assertEqual(len(tabfile1), tabfile2.nColumns()) self.assertEqual(len(tabfile2), tabfile1.nColumns())
def save(self, filen=None): """Save metadata to tab-delimited file Writes key-value paires to a tab-delimited file. The data can be recovered using the 'load' method. Note that if the specified file already exists then it will be overwritten. Arguments: filen: name of the tab-delimited file with key-value pairs; if None then the file specified when the object was instantiated will be used instead. """ metadata = TabFile.TabFile() for key in self.__key_order: # Retrieve value and convert to appropriate # format for persistent storage value = self[key] if value is None: value = '.' elif value is True: value = 'Y' elif value is False: value = 'N' # Get the equivalent file key attr = self.__attributes[key] # Store in the file metadata.append(data=(attr, value)) # Write the file if filen is not None: self.__filen = filen metadata.write(self.__filen)
def test_write_data_to_file(self): """Write data to file """ tabfile = TabFile(fp=self.fp) out_file = os.path.join(self.working_dir, "test.tsv") tabfile.write(filen=out_file) self.assertEqual(io.open(out_file, 'rt').read(), self.data)
def test_preserve_trailing_spaces_on_lines(self): """Check that trailing spaces aren't lost """ tabfile = TabFile('test', self.fp) self.assertEqual(tabfile[0][4], "A comment") self.assertEqual(tabfile[1][4], "Comment with a trailing space ") self.assertEqual(tabfile[2][4], ".")
def test_remove_commented_lines_by_default(self): """ TabFile: check commented lines are removed by default """ content = \ u"""#chr\tstart\tend\tdata chr1\t1\t234\t1.2 #chr1\t567\t890\t5.7 #chr2\t1234\t5678\t6.8 chr2\t2345\t6789\t12.1 """ final = \ u"""chr1\t1\t234\t1.2 chr2\t2345\t6789\t12.1""" fp = io.StringIO(content) tabfile = TabFile(fp=fp, first_line_is_header=True) self.assertEqual(len(tabfile), 2) self.assertEqual(tabfile[0]['chr'], "chr1") self.assertEqual(tabfile[0]['start'], 1) self.assertEqual(tabfile[0]['end'], 234) self.assertEqual(tabfile[0]['data'], 1.2) self.assertEqual(tabfile[1]['chr'], "chr2") self.assertEqual(tabfile[1]['start'], 2345) self.assertEqual(tabfile[1]['end'], 6789) self.assertEqual(tabfile[1]['data'], 12.1) self.assertEqual(str(tabfile), final)
def test_write_data_to_file_include_header(self): """Write data to file including header """ tabfile = TabFile(fp=self.fp, first_line_is_header=True) out_file = os.path.join(self.working_dir, "test.tsv") tabfile.write(filen=out_file, include_header=True) self.assertEqual( io.open(out_file, 'rt').read(), self.header + self.data)
def test_write_data_include_header(self): """Write data to file-like object including header """ tabfile = TabFile(fp=self.fp, first_line_is_header=True) fp = io.StringIO() tabfile.write(fp=fp, include_header=True) self.assertEqual(fp.getvalue(), self.header + self.data) fp.close()
def test_reverse_sort_on_column(self): """Sort data on a numerical column into (reverse) descending order """ tabfile = TabFile('test', self.fp, first_line_is_header=True) tabfile.sort(lambda line: line['data'], reverse=True) sorted_data = [6.8, 5.7, 3.4] for i in range(len(tabfile)): self.assertEqual(tabfile[i]['data'], sorted_data[i])
def test_add_tab_data_to_new_tabfile(self): """Test adding data as a tab-delimited line to a new empty TabFile """ data = 'chr1\t10000\t20000\t+' tabfile = TabFile() tabfile.append(tabdata=data) self.assertEqual(len(tabfile), 1, "TabFile should now have one line") self.assertEqual(str(tabfile[0]), data)
def test_convert_values_to_type_read_from_file(self): """Convert input values to appropriate types (e.g. integer) when reading from file """ tabfile = TabFile('test', self.fp, first_line_is_header=True) for line in tabfile: self.assertTrue(isinstance(line[0], str)) self.assertTrue(isinstance(line[1], (int, long))) self.assertTrue(isinstance(line[2], float))
def test_write_data(self): """Write data to file-like object """ tabfile = TabFile('test', self.fp) fp = cStringIO.StringIO() tabfile.write(fp=fp) self.assertEqual(fp.getvalue(), self.data) fp.close()
def test_append_column(self): """Append new column to a Tabfile """ tabfile = TabFile('test', self.fp, first_line_is_header=True) self.assertEqual(len(tabfile.header()), 4) tabfile.appendColumn('new') self.assertEqual(len(tabfile.header()), 5) self.assertEqual(tabfile.header()[4], 'new') self.assertEqual(tabfile[0]['new'], '')
def test_add_data_to_new_tabfile(self): """Test adding data as a list of items to a new empty TabFile """ data = ['chr1', 10000, 20000, '+'] tabfile = TabFile() tabfile.append(data=data) self.assertEqual(len(tabfile), 1, "TabFile should now have one line") for i in range(len(data)): self.assertEqual(tabfile[0][i], data[i])
def test_convert_values_to_type_append_tabdata(self): """Convert input values to appropriate types (e.g. integer) when appending tabdata """ tabfile = TabFile('test', self.fp, first_line_is_header=True) tabfile.append(tabdata="chr3\t5678\t7.9") for line in tabfile: self.assertTrue(isinstance(line[0], str)) self.assertTrue(isinstance(line[1], (int, long))) self.assertTrue(isinstance(line[2], float))
def test_convert_values_to_type_append_list(self): """Convert input values to appropriate types (e.g. integer) when appending a list """ tabfile = TabFile('test', self.fp, first_line_is_header=True) tabfile.append(data=["chr3", "5678", "7.9"]) tabfile.append(data=["chr3", 5678, 7.9]) for line in tabfile: self.assertTrue(isinstance(line[0], str)) self.assertTrue(isinstance(line[1], (int, long))) self.assertTrue(isinstance(line[2], float))
def test_convert_values_to_str_read_from_file(self): """Convert all input values to strings when reading from file """ tabfile = TabFile('test', self.fp, first_line_is_header=True, convert=False) for line in tabfile: for value in line: self.assertTrue(isinstance(value, str))
def test_append_line_as_data(self): """Append a line to a file with data supplied as a list """ tabfile = TabFile('test', self.fp, first_line_is_header=True, delimiter=',') data = ['chr3', '10', '9', '8'] tabfile.append(data=data) self.assertEqual(str(tabfile[-1]), ','.join([str(x) for x in data]))
def test_unexpected_uncommented_header(self): """Test reading in a tab file with an unexpected uncommented header """ tabfile = TabFile('test', self.fp) self.assertEqual(len(tabfile), 4, "Input has 4 lines of data") self.assertEqual(tabfile.header(), [], "Wrong header") self.assertEqual(str(tabfile[0]), "chr\tstart\tend\tdata", "Incorrect string representation") self.assertRaises(KeyError, tabfile[3].__getitem__, 'chr') self.assertEqual(tabfile.nColumns(), 4)
def test_append_line(self): """Append a line to a file """ tabfile = TabFile('test', self.fp, first_line_is_header=True, delimiter=',') line = 'chr3,10,9,8' tabfile.append(tabdata=line) self.assertEqual(str(tabfile[-1]), line)
def test_insert_line_with_tab_data(self): """Insert line into a TabFile populated from tabbed data """ data = 'chr1\t10000\t20000\t+' tabfile = TabFile('test', self.fp) self.assertEqual(len(tabfile), 3) line = tabfile.insert(2, tabdata=data) self.assertEqual(len(tabfile), 4) # Check new line is correct self.assertTrue(str(line) == data)
def test_insert_empty_line(self): """Insert a blank line into a TabFile """ tabfile = TabFile('test', self.fp) self.assertEqual(len(tabfile), 3) line = tabfile.insert(2) self.assertEqual(len(tabfile), 4) # Check new line is empty for i in range(len(line)): self.assertTrue(str(line[i]) == '')
def test_append_tab_data_line(self): """Append a TabDataLine to a TabFile """ tabfile = TabFile('test', self.fp) self.assertEqual(len(tabfile), 3) tabdataline = TabDataLine('chr1\t10000\t20000\t+') line = tabfile.append(tabdataline=tabdataline) self.assertEqual(len(tabfile), 4) # Check new line is correct self.assertTrue(line is tabdataline)
def test_append_empty_line(self): """Append a blank line to a TabFile """ tabfile = TabFile('test', self.fp) self.assertEqual(len(tabfile), 3) line = tabfile.append() self.assertEqual(len(tabfile), 4) # Check new line is empty for i in range(len(line)): self.assertTrue(str(line[i]) == '')
def test_insert_tab_data_line(self): """Insert a TabDataLine into a TabFile """ tabfile = TabFile('test', self.fp) self.assertEqual(len(tabfile), 3) tabdataline = TabDataLine('chr1\t10000\t20000\t+') line = tabfile.insert(2, tabdataline=tabdataline) self.assertEqual(len(tabfile), 4) # Check new line is correct self.assertTrue(line is tabdataline)
def test_load_data_with_header(self): """Create and load Tabfile using first line as header """ tabfile = TabFile('test', self.fp, first_line_is_header=True) self.assertEqual(len(tabfile), 3, "Input has 3 lines of data") self.assertEqual(tabfile.header(), ['chr', 'start', 'end', 'data'], "Wrong header") self.assertEqual(str(tabfile[0]), "chr1\t1\t234\t4.6", "Incorrect string representation") self.assertEqual(tabfile[2]['chr'], 'chr2', "Incorrect data") self.assertEqual(tabfile.nColumns(), 4)
def test_convert_values_to_str_append_tabdata(self): """Convert all input values to strings when appending tabdata """ tabfile = TabFile('test', self.fp, first_line_is_header=True, convert=False) tabfile.append(tabdata="chr3\t5678\t7.9") for line in tabfile: for value in line: self.assertTrue(isinstance(value, str))
def test_load_data(self): """Create and load new TabFile instance """ tabfile = TabFile('test', self.fp, delimiter=',') self.assertEqual(len(tabfile), 3, "Input has 3 lines of data") self.assertEqual(tabfile.header(), [], "Header should be empty") self.assertEqual(str(tabfile[0]), "chr1,1,234,4.6", "Incorrect string representation") self.assertEqual(tabfile[2][0], 'chr2', "Incorrect data") self.assertEqual(tabfile.nColumns(), 4) self.assertEqual(tabfile.filename(), 'test')
def test_insert_line_with_data(self): """Insert line into a TabFile populated with data """ data = ['chr1', 678, 901, 6.1] tabfile = TabFile('test', self.fp) self.assertEqual(len(tabfile), 3) line = tabfile.insert(2, data=data) self.assertEqual(len(tabfile), 4) # Check new line is correct for i in range(len(data)): self.assertTrue(line[i] == data[i])
def test_expected_uncommented_header(self): """Test reading in a tab file with an expected uncommented header """ tabfile = TabFile('test', self.fp, first_line_is_header=True) self.assertEqual(len(tabfile), 3, "Input has 3 lines of data") self.assertEqual(tabfile.header(), ['chr', 'start', 'end', 'data'], "Wrong header") self.assertEqual(str(tabfile[0]), "chr1\t1\t234\t4.6", "Incorrect string representation") self.assertEqual(tabfile[2]['chr'], 'chr2', "Incorrect data") self.assertEqual(tabfile.nColumns(), 4)
def test_apply_operation_to_column(self): """Divide values in a column by 10 """ tabfile = TabFile('test', self.fp, first_line_is_header=True) # Check number of columns and header items self.assertEqual(tabfile.nColumns(), 4) self.assertEqual(tabfile.header(), ['chr', 'start', 'end', 'data']) # Divide data column by 10 tabfile.transformColumn('data', lambda x: x / 10) results = [0.46, 0.57, 0.68] for i in range(len(tabfile)): self.assertEqual(tabfile[i]['data'], results[i])