Ejemplo n.º 1
0
    def set_fastq(self,
                  destination_root,
                  data,
                  section=__default_section__,
                  overwrite=False):
        """Write new fasta file to file

        :param destination_root: root directory; data will be stored in {destination_root}/Basecalled_{section}/Fastq
        :param data: fastq file
        :param section: name of basecall analysis default (template)
        """
        check_fastq_line(data)

        # get location and sanity check
        path = self._join_path(destination_root,
                               self.__default_basecall_fastq__.format(section))
        if path in self:
            if overwrite:
                self.delete(path, ignore=True)
            else:
                raise Exception("Destination {} already exists in {}".format(
                    path, self.filename))

        # save
        self._add_string_dataset(data, path)
Ejemplo n.º 2
0
    def set_fastq(self, path, data, section='template'):
        """Write new fasta file to file

        :param path: path to fasta file
        :param data: fastq file
        :param section: name of basecall analysis default (template)
        """
        check_fastq_line(data)
        path = self._join_path(self.__base_analysis__, path,
                               "BaseCalled_{}".format(section))
        path = self.check_path(path, latest=True)
        self._add_string_dataset(data, self._join_path(path, 'Fastq'))
Ejemplo n.º 3
0
def get_resegment_accuracy(fast5handle, section="template"):
    """Get accuracy comparison between original sequence and resegmented generated sequence

    :param fast5handle: Fast5 object with re-segemented read
    """
    assert isinstance(fast5handle, Fast5), "fast5handle needs to be a Fast5 instance"
    # get fastqs
    resegment_fastq = fast5handle.get_fastq(analysis="ReSegmentBasecall", section=section)
    original_fastq = bytes.decode(fast5handle.get_fastq(analysis="Basecall_1D", section=section))[:-1]
    # make sure the underlying assumption that we can split on newline is ok
    check_fastq_line(resegment_fastq)
    check_fastq_line(original_fastq)
    # get sequence
    resegment_seq = resegment_fastq.split('\n')[1]
    original_seq = original_fastq.split('\n')[1]
    return pairwise_alignment_accuracy(original_seq, resegment_seq, soft_clip=True)
Ejemplo n.º 4
0
 def test_check_fastq_line(self):
     with captured_output() as (_, _):
         fastq_record = "@asdf\nasdf\n+\nasdf"
         self.assertTrue(check_fastq_line(fastq_record))
         with self.assertRaises(AssertionError):
             check_fastq_line("asdf\nasdf\n+\nasdf")
             check_fastq_line("@asdfasdf\n+\nasdf")
             check_fastq_line("@asdf\nasdf+\nasdf")
             check_fastq_line("@asdf\nasdf\n+\nasdf\n")
             check_fastq_line("@asdf\nasdf\n+asdf")
             check_fastq_line("@asdf\nasdsf\n+\nasdf")
             check_fastq_line("@asdf\nasdf\n+\nassdf")