def generate_output_from_results(results, output, alphabet_info): """ Given an iterable of dictionaries, each representing the results of mapping a single read, output a mapped-read file. This version outputs to the V8 'chunk' file format (actually containing mapped reads, not chunks) param: results : an iterable of read dictionaries (with mappings) param: output : output filename param: alphabet_info : taiyaki.alphabet.AlphabetInfo instance """ progress = helpers.Progress() err_types = defaultdict(int) with mapped_signal_files.HDF5Writer(output, alphabet_info) as f: for resultdict, mesg in results: # filter out error messages for reporting later if resultdict is None: err_types[mesg] += 1 else: progress.step() f.write_read(resultdict) sys.stderr.write('\n') # report errors at the end to avoid spamming stderr if len(err_types) > 0: for err_str, n_errs in err_types.items(): sys.stderr.write( ('* {} reads failed to produce remapping results ' + 'due to: {}\n').format(n_errs, err_str))
def test_check_HDF5_mapped_read_file(self): """Check that constructing a read object which doesn't conform leads to errors. """ print("Creating flawed Read object from test data") read_dict = construct_mapped_read() read_dict['Reference'] = "I'm not a numpy array!" # Wrong type! read_object = mapped_signal_files.Read(read_dict) print("Checking contents") check_text = read_object.check() print("Check result on read object: should fail") print(check_text) self.assertNotEqual(check_text, "pass") print("Writing to file") alphabet_info = alphabet.AlphabetInfo(DEFAULT_ALPHABET, DEFAULT_ALPHABET) with mapped_signal_files.HDF5Writer(self.testfilepath, alphabet_info) as f: f.write_read(read_object) print("Current dir = ", os.getcwd()) print("File written to ", self.testfilepath) print("\nOpening file for reading") with mapped_signal_files.HDF5Reader(self.testfilepath) as f: ids = f.get_read_ids() print("Read ids=", ids[0]) print("Version number = ", f.version) self.assertEqual(ids[0], read_dict['read_id']) file_test_report = f.check() print("Test report (should fail):", file_test_report) self.assertNotEqual(file_test_report, "pass")
def test_HDF5_mapped_read_file(self): """Test that we can save a mapped read file, open it again and use some methods to get data from it. Plot a picture for diagnostics. """ print("Creating Read object from test data") read_dict = construct_mapped_read() read_object = mapped_signal_files.Read(read_dict) print("Checking contents") check_text = read_object.check() print("Check result on read object:") print(check_text) self.assertEqual(check_text, "pass") print("Writing to file") alphabet_info = alphabet.AlphabetInfo(DEFAULT_ALPHABET, DEFAULT_ALPHABET) with mapped_signal_files.HDF5Writer(self.testfilepath, alphabet_info) as f: f.write_read(read_object) print("Current dir = ", os.getcwd()) print("File written to ", self.testfilepath) print("\nOpening file for reading") with mapped_signal_files.HDF5Reader(self.testfilepath) as f: ids = f.get_read_ids() print("Read ids=", ids[0]) print("Version number = ", f.version) self.assertEqual(ids[0], read_dict['read_id']) file_test_report = f.check() print("Test report:", file_test_report) self.assertEqual(file_test_report, "pass") read_list = f.get_multiple_reads("all") recovered_read = read_list[0] reflen = len(recovered_read['Reference']) siglen = len(recovered_read['Dacs']) # Get a chunk - note that chunkstart is relative to the start of the mapped # region, not relative to the start of the signal chunklen, chunkstart = 5, 3 chunkdict = recovered_read.get_chunk_with_sample_length(chunklen, chunkstart) # Check that the extracted chunk is the right length self.assertEqual(len(chunkdict['current']), chunklen) # Check that the mapping data agrees with what we put in self.assertTrue(np.all(recovered_read['Ref_to_signal']==read_dict['Ref_to_signal'])) # Plot a picture showing ref_to_sig from the read object, def setup(): # and the result of searches to find the inverse if False: plt.figure() plt.xlabel('Signal coord') plt.ylabel('Ref coord') ix = np.array([0, -1]) plt.scatter(chunkdict['current'][ix], chunkdict['sequence'][ix], s=50, label='chunk limits', marker='s', color='black') plt.scatter(recovered_read['Ref_to_signal'], np.arange(reflen + 1), label='reftosig (source data)', color='none', edgecolor='blue', s=60) siglocs = np.arange(siglen, dtype=np.int32) sigtoref_fromsearch = recovered_read.get_reference_locations(siglocs) plt.scatter(siglocs, sigtoref_fromsearch, label='from search', color='red', marker='x', s=50) plt.legend() plt.grid() plt.savefig(self.plotfilepath) print("Saved plot to", self.plotfilepath)