コード例 #1
0
def generate_output_from_results(results, output, alphabet_info):
    """
    Given an iterable of dictionaries, each representing the results of mapping
    a single read, output a mapped-read file.
    This version outputs to the V8 'chunk' file format (actually containing mapped reads, not chunks)

    param: results     : an iterable of read dictionaries
                         (with mappings)
    param: output      : output filename
    param: alphabet_info : taiyaki.alphabet.AlphabetInfo instance
    """
    progress = helpers.Progress()
    err_types = defaultdict(int)
    with mapped_signal_files.HDF5Writer(output, alphabet_info) as f:
        for resultdict, mesg in results:
            # filter out error messages for reporting later
            if resultdict is None:
                err_types[mesg] += 1
            else:
                progress.step()
                f.write_read(resultdict)
    sys.stderr.write('\n')

    # report errors at the end to avoid spamming stderr
    if len(err_types) > 0:
        for err_str, n_errs in err_types.items():
            sys.stderr.write(
                ('* {} reads failed to produce remapping results ' +
                 'due to: {}\n').format(n_errs, err_str))
コード例 #2
0
    def test_check_HDF5_mapped_read_file(self):
        """Check that constructing a read object which doesn't conform
        leads to errors.
        """
        print("Creating flawed Read object from test data")
        read_dict = construct_mapped_read()
        read_dict['Reference'] = "I'm not a numpy array!"  # Wrong type!
        read_object = mapped_signal_files.Read(read_dict)
        print("Checking contents")
        check_text = read_object.check()
        print("Check result on read object: should fail")
        print(check_text)
        self.assertNotEqual(check_text, "pass")

        print("Writing to file")
        alphabet_info = alphabet.AlphabetInfo(DEFAULT_ALPHABET, DEFAULT_ALPHABET)
        with mapped_signal_files.HDF5Writer(self.testfilepath, alphabet_info) as f:
            f.write_read(read_object)

        print("Current dir = ", os.getcwd())
        print("File written to ", self.testfilepath)

        print("\nOpening file for reading")
        with mapped_signal_files.HDF5Reader(self.testfilepath) as f:
            ids = f.get_read_ids()
            print("Read ids=", ids[0])
            print("Version number = ", f.version)
            self.assertEqual(ids[0], read_dict['read_id'])

            file_test_report = f.check()
            print("Test report (should fail):", file_test_report)
            self.assertNotEqual(file_test_report, "pass")
コード例 #3
0
    def test_HDF5_mapped_read_file(self):
        """Test that we can save a mapped read file, open it again and
        use some methods to get data from it. Plot a picture for diagnostics.
        """

        print("Creating Read object from test data")
        read_dict = construct_mapped_read()
        read_object = mapped_signal_files.Read(read_dict)
        print("Checking contents")
        check_text = read_object.check()
        print("Check result on read object:")
        print(check_text)
        self.assertEqual(check_text, "pass")

        print("Writing to file")
        alphabet_info = alphabet.AlphabetInfo(DEFAULT_ALPHABET, DEFAULT_ALPHABET)
        with mapped_signal_files.HDF5Writer(self.testfilepath, alphabet_info) as f:
            f.write_read(read_object)

        print("Current dir = ", os.getcwd())
        print("File written to ", self.testfilepath)

        print("\nOpening file for reading")
        with mapped_signal_files.HDF5Reader(self.testfilepath) as f:
            ids = f.get_read_ids()
            print("Read ids=", ids[0])
            print("Version number = ", f.version)
            self.assertEqual(ids[0], read_dict['read_id'])

            file_test_report = f.check()
            print("Test report:", file_test_report)
            self.assertEqual(file_test_report, "pass")

            read_list = f.get_multiple_reads("all")

        recovered_read = read_list[0]
        reflen = len(recovered_read['Reference'])
        siglen = len(recovered_read['Dacs'])

        # Get a chunk - note that chunkstart is relative to the start of the mapped
        # region, not relative to the start of the signal
        chunklen, chunkstart = 5, 3
        chunkdict = recovered_read.get_chunk_with_sample_length(chunklen, chunkstart)

        # Check that the extracted chunk is the right length
        self.assertEqual(len(chunkdict['current']), chunklen)

        # Check that the mapping data agrees with what we put in
        self.assertTrue(np.all(recovered_read['Ref_to_signal']==read_dict['Ref_to_signal']))

        # Plot a picture showing ref_to_sig from the read object,    def setup():
        # and the result of searches to find the inverse
        if False:
            plt.figure()
            plt.xlabel('Signal coord')
            plt.ylabel('Ref coord')
            ix = np.array([0, -1])
            plt.scatter(chunkdict['current'][ix], chunkdict['sequence'][ix],
                        s=50, label='chunk limits', marker='s', color='black')
            plt.scatter(recovered_read['Ref_to_signal'], np.arange(reflen + 1), label='reftosig (source data)',
                        color='none', edgecolor='blue', s=60)
            siglocs = np.arange(siglen, dtype=np.int32)
            sigtoref_fromsearch = recovered_read.get_reference_locations(siglocs)
            plt.scatter(siglocs, sigtoref_fromsearch, label='from search', color='red', marker='x', s=50)
            plt.legend()
            plt.grid()
            plt.savefig(self.plotfilepath)
            print("Saved plot to", self.plotfilepath)