def estimate_params(fast5, binary_path="./estimateNanoporeParams",
                    template_lookup_table="../models/testModelR9p4_acegt_template.model",
                    complement_lookup_table="../models/testModelR9_complement_pop2.model",
                    twoD=False, verbose=False):
    temp_folder = FolderHandler()
    temp_folder.open_folder("npParamEstimation")

    read_name = fast5.split("/")[-1][:-6]  # get the name without the '.fast5'

    npRead_path = temp_folder.add_file_path(read_name + ".npRead")
    npRead_fasta = temp_folder.add_file_path(read_name + ".seq.fasta")

    if twoD:
        success, version, complement = get_npRead_2dseq_and_models(fast5=fast5,
                                                                   npRead_path=npRead_path,
                                                                   twod_read_path=npRead_fasta)
        # print(version, complement)
    else:
        success, version, complement = prepareOneD(fast5=fast5, npRead_path=npRead_path, oneD_read_path=npRead_fasta)
        # print(version, complement)

    if success is False:
        return False

    command = "{bin} -T {tLuT} -C {cLuT} -q {npRead}" \
              "".format(bin=binary_path, tLuT=template_lookup_table, cLuT=complement_lookup_table, npRead=npRead_path)

    if verbose:
        print("running command {command}".format(command=command), file=sys.stderr)

    # os.system(command)
    result = subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT)
    params = result.split()
    param_dict = dict(list(zip([bytes.decode(x) for x in params[::2]], [float(x) for x in params[1::2]])))
    # print(type(param_dict["scale"]))
    # clean up temp folder
    temp_folder.remove_file(npRead_path)
    temp_folder.remove_file(npRead_fasta)
    temp_folder.remove_folder()
    return param_dict
def main(args):
    args = parse_args()

    # make directory to put temporary files
    temp_folder = FolderHandler()
    temp_dir_path = temp_folder.open_folder(args.out + "npParamEstimation")

    fast5s = [x for x in os.listdir(args.files_dir) if x.endswith(".fast5")]

    if len(fast5s) > args.nb_files:
        shuffle(fast5s)
        fast5s = fast5s[:args.nb_files]
    for fast5 in fast5s:
        print(fast5)
        # estimate_params(fast5=args.files_dir + fast5, working_folder=temp_folder, bwa_index=bwa_ref_index,
        #                forward_reference_path=plus_strand_sequence, backward_reference_path=minus_strand_sequence,
        #                threshold=args.threshold)
        try:
            params = estimate_params(fast5=args.files_dir + fast5, twoD=True)
            print(params)
        except Exception as e:
            print(e)
    temp_folder.remove_folder()
    return True
class SignalAlignment(object):
    def __init__(
            self,
            in_fast5,
            destination,
            stateMachineType,
            in_templateHmm,
            in_complementHmm,
            in_templateHdp,
            in_complementHdp,
            threshold,
            diagonal_expansion,
            constraint_trim,
            degenerate,
            forward_reference,
            backward_reference=None,
            # one of these needs to be set
            alignment_file=None,
            bwa_reference=None,
            # reasonable defaults
            twoD_chemistry=False,
            target_regions=None,
            output_format="full",
            embed=False,
            event_table=False,
            check_for_temp_file_existance=True,
            track_memory_usage=False,
            get_expectations=False,
            path_to_bin=''):
        self.in_fast5 = in_fast5  # fast5 file to align
        self.destination = destination  # place where the alignments go, should already exist
        self.stateMachineType = stateMachineType  # flag for signalMachine
        self.bwa_reference = bwa_reference  # path to reference sequence to generate guide alignment
        self.threshold = threshold  # min posterior probability to keep
        self.diagonal_expansion = diagonal_expansion  # alignment algorithm param
        self.constraint_trim = constraint_trim  # alignment algorithm param
        self.output_format = output_format  # smaller output files
        self.degenerate = degenerate  # set of nucleotides for degenerate characters
        self.twoD_chemistry = twoD_chemistry  # flag for 2D sequencing runs
        self.temp_folder = FolderHandler(
        )  # object for holding temporary files (non-toil)
        self.read_name = self.in_fast5.split(
            "/")[-1][:-6]  # get the name without the '.fast5'
        self.target_regions = target_regions
        self.output_formats = {"full": 0, "variantCaller": 1, "assignments": 2}
        self.embed = embed  # embed the output into the fast5 file
        self.event_table = event_table  # specify which event table to use to generate alignments
        self.backward_reference = backward_reference  # fasta path to backward reference if modified bases are used
        self.forward_reference = forward_reference  # fasta path to forward reference
        self.alignment_file = alignment_file  # guide aligments will be gotten from here if set
        self.check_for_temp_file_existance = check_for_temp_file_existance  # don't recreate if files exist
        self.track_memory_usage = track_memory_usage  # has the 'time' program append mem usage stats to output
        self.max_memory_usage_kb = None
        self.read_label = None
        self.get_expectations = get_expectations  # option to gather expectations of transitions and emissions
        self.path_to_signalMachine = os.path.join(
            path_to_bin, "signalMachine")  # path to signalMachine

        assert os.path.exists(
            self.path_to_signalMachine), "Path to signalMachine does not exist"
        assert self.bwa_reference is not None or self.alignment_file is not None, \
            "either 'bwa_reference' or 'alignment_file' argument is needed to generate cigar strings"

        if (in_templateHmm is not None) and os.path.isfile(in_templateHmm):
            self.in_templateHmm = in_templateHmm
        else:
            self.in_templateHmm = None
        if (in_complementHmm is not None) and os.path.isfile(in_complementHmm):
            self.in_complementHmm = in_complementHmm
        else:
            self.in_complementHmm = None

        # similarly for HDPs
        if (in_templateHdp is not None) and os.path.isfile(in_templateHdp):
            self.in_templateHdp = in_templateHdp
        else:
            self.in_templateHdp = None
        if (in_complementHdp is not None) and os.path.isfile(in_complementHdp):
            self.in_complementHdp = in_complementHdp
        else:
            self.in_complementHdp = None
        assert os.path.exists(self.destination), \
            "Destination path does not exist: {}".format(self.destination)

    def run(self):
        print("[SignalAlignment.run] INFO: Starting on {read}".format(
            read=self.in_fast5))
        if self.get_expectations:
            assert self.in_templateHmm is not None, "Need template HMM files for model training"
            if self.twoD_chemistry:
                assert self.in_complementHmm is not None, "Need compement HMM files for model training"
        if not os.path.isfile(self.in_fast5):
            print("[SignalAlignment.run] ERROR: Did not find .fast5 at{file}".
                  format(file=self.in_fast5))
            return False

        # prep
        self.openTempFolder("tempFiles_%s" % self.read_name)
        if self.twoD_chemistry:
            npRead = NanoporeRead2D(fast_five_file=self.in_fast5,
                                    event_table=self.event_table,
                                    initialize=True)
        else:
            npRead = NanoporeRead(fast_five_file=self.in_fast5,
                                  event_table=self.event_table,
                                  initialize=True)
        #todo need to validate / generate events and nucleotide read

        # read label
        read_label = npRead.read_label  # use this to identify the read throughout
        self.read_label = read_label

        # nanopore read (event table, etc)
        npRead_ = self.addTempFilePath("temp_%s.npRead" % self.read_name)
        if not (self.check_for_temp_file_existance
                and os.path.isfile(npRead_)):
            # TODO is this totally f****d for RNA because of 3'-5' mapping?
            fH = open(npRead_, "w")
            ok = npRead.Write(out_file=fH, initialize=True)
            fH.close()
            if not ok:
                self.failStop(
                    "[SignalAlignment.run] File: %s did not pass initial checks"
                    % self.read_name, npRead)
                return False

        # nucleotide read
        read_fasta_ = self.addTempFilePath("temp_seq_%s.fa" % read_label)
        ok = self.write_nucleotide_read(npRead, read_fasta_)
        if not ok:
            print(
                "[SignalAlignment.run] Failed to write nucleotide read.  Continuing execution."
            )

        # alignment info
        cigar_file_ = self.addTempFilePath("temp_cigar_%s.txt" % read_label)
        temp_samfile_ = self.addTempFilePath("temp_sam_file_%s.sam" %
                                             read_label)
        strand = None
        reference_name = None
        if not (self.check_for_temp_file_existance
                and os.path.isfile(cigar_file_)):

            # need guide alignment to generate cigar file
            guide_alignment = None

            # get from alignment file
            if self.alignment_file is not None:
                guide_alignment = getGuideAlignmentFromAlignmentFile(
                    self.alignment_file, read_name=read_label)
                if guide_alignment is None:
                    print(
                        "[SignalAlignment.run] read {} not found in {}".format(
                            read_label, self.alignment_file))

            # get from bwa
            if guide_alignment is None and self.bwa_reference is not None:
                guide_alignment = generateGuideAlignment(
                    reference_fasta=self.bwa_reference,
                    query=read_fasta_,
                    temp_sam_path=temp_samfile_,
                    target_regions=self.target_regions)
                if guide_alignment is None:
                    print(
                        "[SignalAlignment.run] read {} could not be aligned with BWA"
                        .format(read_label))

            # could not map
            if guide_alignment is None:
                self.failStop(
                    "[SignalAlignment.run] ERROR getting guide alignment",
                    npRead)
                return False

            # ensure valid
            if not guide_alignment.validate():
                self.failStop(
                    "[SignalAlignment.run] ERROR invalid guide alignment",
                    npRead)
                return False
            strand = guide_alignment.strand
            reference_name = guide_alignment.reference_name

            # write cigar to file
            cig_handle = open(cigar_file_, "w")
            cig_handle.write(guide_alignment.cigar + "\n")
            cig_handle.close()

        # otherwise, get strand from file
        else:
            strand, reference_name = getInfoFromCigarFile(cigar_file_)

        # add an indicator for the model being used
        if self.stateMachineType == "threeState":
            model_label = ".sm"
            stateMachineType_flag = ""
        elif self.stateMachineType == "threeStateHdp":
            model_label = ".sm3Hdp"
            stateMachineType_flag = "--sm3Hdp "
            if self.twoD_chemistry:
                assert (self.in_templateHdp
                        is not None) and (self.in_complementHdp
                                          is not None), "Need to provide HDPs"
            else:
                assert self.in_templateHdp is not None, "Need to provide Template HDP"
        else:  # make invalid stateMachine control?
            model_label = ".sm"
            stateMachineType_flag = ""

        # next section makes the output file name with the format: /directory/for/files/file.model.orientation.tsv
        # forward strand
        if strand == "+":
            if self.output_format == "full":
                posteriors_file_path = os.path.join(
                    self.destination,
                    read_label + model_label + ".forward.tsv")
            elif self.output_format == "variantCaller":
                posteriors_file_path = os.path.join(
                    self.destination, read_label + model_label + ".tsv")
            else:
                posteriors_file_path = os.path.join(
                    self.destination,
                    read_label + model_label + ".assignments.tsv")

        # backward strand
        elif strand == "-":
            if self.output_format == "full":
                posteriors_file_path = os.path.join(
                    self.destination,
                    read_label + model_label + ".backward.tsv")
            elif self.output_format == "variantCaller":
                posteriors_file_path = os.path.join(
                    self.destination, read_label + model_label + ".tsv")
            else:
                posteriors_file_path = os.path.join(
                    self.destination,
                    read_label + model_label + ".assignments.tsv")

        # sanity check
        else:
            self.failStop(
                "[SignalAlignment.run] ERROR Unexpected strand {}".format(
                    strand), npRead)
            return False

        # flags

        # input (match) models
        if self.in_templateHmm is None:
            self.in_templateHmm = defaultModelFromVersion(
                strand="template", version=npRead.version)
        if self.twoD_chemistry and self.in_complementHmm is None:
            pop1_complement = npRead.complement_model_id == "complement_median68pA_pop1.model"
            self.in_complementHmm = defaultModelFromVersion(
                strand="complement",
                version=npRead.version,
                pop1_complement=pop1_complement)

        assert self.in_templateHmm is not None
        if self.twoD_chemistry:
            if self.in_complementHmm is None:
                self.failStop(
                    "[SignalAlignment.run] ERROR Need to have complement HMM for 2D analysis",
                    npRead)
                return False

        template_model_flag = "-T {} ".format(self.in_templateHmm)
        if self.twoD_chemistry:
            complement_model_flag = "-C {} ".format(self.in_complementHmm)
        else:
            complement_model_flag = ""

        print(
            "[SignalAlignment.run] NOTICE: template model {t} complement model {c}"
            "".format(t=self.in_templateHmm, c=self.in_complementHmm))

        # reference sequences
        assert os.path.isfile(self.forward_reference)
        forward_ref_flag = "-f {f_ref} ".format(f_ref=self.forward_reference)
        if self.backward_reference:
            assert os.path.isfile(self.backward_reference)
            backward_ref_flag = "-b {b_ref} ".format(
                b_ref=self.backward_reference)
        else:
            backward_ref_flag = ""

        # input HDPs
        if (self.in_templateHdp is not None) or (self.in_complementHdp
                                                 is not None):
            hdp_flags = "-v {tHdp_loc} ".format(tHdp_loc=self.in_templateHdp)
            if self.twoD_chemistry and self.in_complementHdp is not None:
                hdp_flags += "-w {cHdp_loc} ".format(
                    cHdp_loc=self.in_complementHdp)
        else:
            hdp_flags = ""

        # threshold
        if self.threshold is not None:
            threshold_flag = "-D {threshold} ".format(threshold=self.threshold)
        else:
            threshold_flag = ""

        # diagonal expansion
        if self.diagonal_expansion is not None:
            diag_expansion_flag = "-x {expansion} ".format(
                expansion=self.diagonal_expansion)
        else:
            diag_expansion_flag = ""

        # constraint trim
        if self.constraint_trim is not None:
            trim_flag = "-m {trim} ".format(trim=self.constraint_trim)
        else:
            trim_flag = ""

        # output format
        if self.output_format not in list(self.output_formats.keys()):
            self.failStop(
                "[SignalAlignment.run] ERROR illegal output format selected %s"
                % self.output_format)
            return False
        out_fmt = "-s {fmt} ".format(
            fmt=self.output_formats[self.output_format])

        # degenerate nucleotide information
        if self.degenerate is not None:
            degenerate_flag = "-o {} ".format(self.degenerate)
        else:
            degenerate_flag = ""

        # twoD flag
        if self.twoD_chemistry:
            twoD_flag = "--twoD"
        else:
            twoD_flag = ""

        # commands
        if self.get_expectations:
            template_expectations_file_path = os.path.join(
                self.destination, read_label + ".template.expectations.tsv")
            complement_expectations_file_path = os.path.join(
                self.destination, read_label + ".complement.expectations.tsv")
            command = \
                "{vA} {td} {degen}{sparse}{model} -q {npRead} " \
                "{t_model}{c_model}{thresh}{expansion}{trim} {hdp}-L {readLabel} -p {cigarFile} " \
                "-t {templateExpectations} -c {complementExpectations} -n {seq_name} {f_ref_fa} {b_ref_fa}" \
                    .format(vA=self.path_to_signalMachine, model=stateMachineType_flag,
                            cigarFile=cigar_file_,
                            npRead=npRead_, readLabel=read_label, td=twoD_flag,
                            templateExpectations=template_expectations_file_path, hdp=hdp_flags,
                            complementExpectations=complement_expectations_file_path, t_model=template_model_flag,
                            c_model=complement_model_flag, thresh=threshold_flag, expansion=diag_expansion_flag,
                            trim=trim_flag, degen=degenerate_flag, sparse=out_fmt, seq_name=reference_name,
                            f_ref_fa=forward_ref_flag, b_ref_fa=backward_ref_flag)
        else:
            command = \
                "{vA} {td} {degen}{sparse}{model} -q {npRead} " \
                "{t_model}{c_model}{thresh}{expansion}{trim} -p {cigarFile} " \
                "-u {posteriors} {hdp}-L {readLabel} -n {seq_name} {f_ref_fa} {b_ref_fa}" \
                    .format(vA=self.path_to_signalMachine, model=stateMachineType_flag, sparse=out_fmt,
                            cigarFile=cigar_file_,
                            readLabel=read_label, npRead=npRead_, td=twoD_flag,
                            t_model=template_model_flag, c_model=complement_model_flag,
                            posteriors=posteriors_file_path, thresh=threshold_flag, expansion=diag_expansion_flag,
                            trim=trim_flag, hdp=hdp_flags, degen=degenerate_flag, seq_name=reference_name,
                            f_ref_fa=forward_ref_flag, b_ref_fa=backward_ref_flag)

        # run
        print("[SignalAlignment.run] running command: ", command, end="\n")
        try:
            command = command.split()

            if self.track_memory_usage:
                mem_command = ['/usr/bin/time', '-f', '\\nDEBUG_MAX_MEM:%M\\n']
                print(
                    "[SignalAlignment.run] Prepending command to track mem usage: {}"
                    .format(mem_command))
                mem_command.extend(command)
                command = mem_command

            output = subprocess.check_output(command, stderr=subprocess.STDOUT)
            output = str(output).split("\\n")
            for line in output:
                print("[SignalAlignment.run]    {}: {}".format(
                    read_label, line))
                if line.startswith("DEBUG_MAX_MEM"):
                    self.max_memory_usage_kb = int(line.split(":")[1])

        except Exception as e:
            print(
                "[SignalAlignment.run] exception ({}) running signalAlign: {}".
                format(type(e), e))
            raise e

        # save to fast5 file (if appropriate)
        if self.embed:
            print("[SignalAlignment.run] embedding into Fast5 ")

            data = self.read_in_signal_align_tsv(posteriors_file_path,
                                                 file_type=self.output_format)
            npRead = NanoporeRead(fast_five_file=self.in_fast5,
                                  twoD=self.twoD_chemistry,
                                  event_table=self.event_table)
            npRead.Initialize(None)
            signal_align_path = npRead.get_latest_basecall_edition(
                "/Analyses/SignalAlign_00{}", new=False)
            assert signal_align_path, "There is no path in Fast5 file: {}".format(
                "/Analyses/SignalAlign_00{}")
            output_path = npRead._join_path(signal_align_path,
                                            self.output_format)
            npRead.write_data(data, output_path)

            # Todo add attributes to signalalign output
            if self.output_format == "full":
                print(
                    "[SignalAlignment.run] writing maximum expected alignment "
                )
                alignment = mea_alignment_from_signal_align(None, events=data)
                mae_path = npRead._join_path(signal_align_path,
                                             "MEA_alignment_labels")
                events = npRead.get_template_events()
                if events:
                    if strand == "-":
                        minus = True
                    else:
                        minus = False
                    labels = match_events_with_signalalign(
                        sa_events=alignment,
                        event_detections=np.asanyarray(npRead.template_events),
                        minus=minus,
                        rna=npRead.is_read_rna())
                    npRead.write_data(labels, mae_path)
                    sam_string = str()
                    if os.path.isfile(temp_samfile_):
                        with open(temp_samfile_, 'r') as test:
                            for line in test:
                                sam_string += line
                    sam_path = npRead._join_path(signal_align_path, "sam")
                    # print(sam_string)
                    npRead.write_data(data=sam_string,
                                      location=sam_path,
                                      compression=None)

        # self.temp_folder.remove_folder()
        return True

    def write_nucleotide_read(self, nanopore_read, file_path):
        try:
            with open(file_path, "w") as read_file:
                # get appropriate read
                if self.twoD_chemistry:
                    # check for table to make 'assembled' 2D alignment table fasta with
                    if not nanopore_read.has2D_alignment_table:
                        nanopore_read.close()
                        return False
                    nucleotide_read = nanopore_read.alignment_table_sequence
                else:
                    nucleotide_read = nanopore_read.template_read

                # write read
                fastaWrite(fileHandleOrFile=read_file,
                           name=nanopore_read.read_label,
                           seq=nucleotide_read)

            return True
        except Exception as e:
            print('[SignalAlignment.write_nucleotide_read] {} exception: {}'.
                  format(type(e), str(e)),
                  file=sys.stderr)
            return False

    def openTempFolder(self, temp_dir):
        self.temp_folder.open_folder(os.path.join(self.destination, temp_dir))

    def addTempFilePath(self, path_to_add):
        return self.temp_folder.add_file_path(path_to_add)

    def failStop(self, message, nanopore_read=None):
        self.temp_folder.remove_folder()
        if nanopore_read is not None:
            nanopore_read.close()
        print(message)

    def read_in_signal_align_tsv(self, tsv_path, file_type):
        """Read in tsv file"""
        assert file_type in ("full", "assignments", "variantCaller")
        with open(tsv_path, 'r') as tsvin:
            if file_type == "full":
                dtype = [('contig', 'S10'), ('reference_index', int),
                         ('reference_kmer', 'S5'), ('read_file', 'S57'),
                         ('strand', 'S1'), ('event_index', int),
                         ('event_mean', float), ('event_noise', float),
                         ('event_duration', float), ('aligned_kmer', 'S5'),
                         ('scaled_mean_current', float),
                         ('scaled_noise', float),
                         ('posterior_probability', float),
                         ('descaled_event_mean', float),
                         ('ont_model_mean', float), ('path_kmer', 'S5')]
            elif file_type == "assignments":
                dtype = [('k-mer', 'S10'), ('read_file', 'S57'),
                         ('descaled_event_mean', float),
                         ('posterior_probability', float)]

            else:
                dtype = [('event_index', int), ('reference_position', int),
                         ('base', 'S6'), ('posterior_probability', float),
                         ('strand', 'S1'), ('forward_mapped', int),
                         ('read_file', 'S57')]

            event_table = np.loadtxt(tsvin, dtype=dtype)

            def remove_field_name(a, name):
                names = list(a.dtype.names)
                if name in names:
                    names.remove(name)
                b = a[names]
                return b

            event_table = remove_field_name(event_table, "read_file")

        return event_table
Beispiel #4
0
class SignalAlignment(object):
    def __init__(self,
                 in_fast5,
                 reference_map,
                 destination,
                 stateMachineType,
                 bwa_index,
                 in_templateHmm,
                 in_complementHmm,
                 in_templateHdp,
                 in_complementHdp,
                 threshold,
                 diagonal_expansion,
                 constraint_trim,
                 degenerate,
                 twoD_chemistry,
                 target_regions=None,
                 output_format="full"):
        self.in_fast5 = in_fast5  # fast5 file to align
        self.reference_map = reference_map  # map with paths to reference sequences
        self.destination = destination  # place where the alignments go, should already exist
        self.stateMachineType = stateMachineType  # flag for signalMachine
        self.bwa_index = bwa_index  # index of reference sequence
        self.threshold = threshold  # min posterior probability to keep
        self.diagonal_expansion = diagonal_expansion  # alignment algorithm param
        self.constraint_trim = constraint_trim  # alignment algorithm param
        self.output_format = output_format  # smaller output files
        self.degenerate = degenerate  # set of nucleotides for degenerate characters
        self.twoD_chemistry = twoD_chemistry  # flag for 2D sequencing runs
        self.temp_folder = FolderHandler(
        )  # object for holding temporary files (non-toil)
        self.read_name = self.in_fast5.split(
            "/")[-1][:-6]  # get the name without the '.fast5'
        self.target_regions = target_regions
        self.output_formats = {"full": 0, "variantCaller": 1, "assignments": 2}

        if (in_templateHmm is not None) and os.path.isfile(in_templateHmm):
            self.in_templateHmm = in_templateHmm
        else:
            self.in_templateHmm = None
        if (in_complementHmm is not None) and os.path.isfile(in_complementHmm):
            self.in_complementHmm = in_complementHmm
        else:
            self.in_complementHmm = None

        # similarly for HDPs
        if (in_templateHdp is not None) and os.path.isfile(in_templateHdp):
            self.in_templateHdp = in_templateHdp
        else:
            self.in_templateHdp = None
        if (in_complementHdp is not None) and os.path.isfile(in_complementHdp):
            self.in_complementHdp = in_complementHdp
        else:
            self.in_complementHdp = None

    def run(self, get_expectations=False):
        print("[SignalAlignment.run]INFO: Starting on {read}".format(
            read=self.in_fast5),
              file=sys.stderr)
        if get_expectations:
            assert self.in_templateHmm is not None and self.in_complementHmm is not None,\
                "Need HMM files for model training"
        # file checks
        if os.path.isfile(self.in_fast5) is False:
            print("[SignalAlignment.run]ERROR: Did not find .fast5 at{file}".
                  format(file=self.in_fast5))
            return False

        self.openTempFolder("tempFiles_%s" % self.read_name)
        npRead_ = self.addTempFilePath("temp_%s.npRead" % self.read_name)
        npRead = NanoporeRead(fast_five_file=self.in_fast5,
                              twoD=self.twoD_chemistry)
        fH = open(npRead_, "w")
        ok = npRead.Write(parent_job=None, out_file=fH, initialize=True)
        fH.close()
        if not ok:
            self.failStop(
                "[SignalAlignment.run]File: %s did not pass initial checks" %
                self.read_name, npRead)
            return False

        read_label = npRead.read_label  # use this to identify the read throughout
        read_fasta_ = self.addTempFilePath("temp_seq_%s.fa" % read_label)
        temp_samfile_ = self.addTempFilePath("temp_sam_file_%s.sam" %
                                             read_label)
        cigar_file_ = self.addTempFilePath("temp_cigar_%s.txt" % read_label)
        if self.twoD_chemistry:
            ok, version, pop1_complement = self.prepare_twod(
                nanopore_read=npRead, twod_read_path=read_fasta_)
        else:
            ok, version, _ = self.prepare_oned(nanopore_read=npRead,
                                               oned_read_path=read_fasta_)
            pop1_complement = None

        # add an indicator for the model being used
        if self.stateMachineType == "threeState":
            model_label = ".sm"
            stateMachineType_flag = ""
        elif self.stateMachineType == "threeStateHdp":
            model_label = ".sm3Hdp"
            stateMachineType_flag = "--sm3Hdp "
            if self.twoD_chemistry:
                assert (self.in_templateHdp
                        is not None) and (self.in_complementHdp
                                          is not None), "Need to provide HDPs"
            else:
                assert self.in_templateHdp is not None, "Need to provide Template HDP"
        else:  # make invalid stateMachine control?
            model_label = ".sm"
            stateMachineType_flag = ""

        guide_alignment = generateGuideAlignment(
            bwa_index=self.bwa_index,
            query=read_fasta_,
            temp_sam_path=temp_samfile_,
            target_regions=self.target_regions)
        ok = guide_alignment.validate(self.reference_map.keys())
        if not ok:
            self.failStop("[SignalAlignment.run]ERROR getting guide alignment",
                          npRead)
            return False

        cig_handle = open(cigar_file_, "w")
        cig_handle.write(guide_alignment.cigar + "\n")
        cig_handle.close()

        # next section makes the output file name with the format: /directory/for/files/file.model.orientation.tsv
        posteriors_file_path = ''
        # forward strand
        if guide_alignment.strand == "+":
            if self.output_format == "full":
                posteriors_file_path = self.destination + read_label + model_label + ".forward.tsv"
            elif self.output_format == "variantCaller":
                posteriors_file_path = self.destination + read_label + model_label + ".tsv"
            else:
                posteriors_file_path = self.destination + read_label + model_label + ".assignments"

        # backward strand
        if guide_alignment.strand == "-":
            if self.output_format == "full":
                posteriors_file_path = self.destination + read_label + model_label + ".backward.tsv"
            elif self.output_format == "variantCaller":
                posteriors_file_path = self.destination + read_label + model_label + ".tsv"
            else:
                posteriors_file_path = self.destination + read_label + model_label + ".assignments"

        # Alignment/Expectations routine
        path_to_signalAlign = "./signalMachine"

        # flags

        # input (match) models
        if self.in_templateHmm is None:
            self.in_templateHmm = defaultModelFromVersion(strand="template",
                                                          version=version)
        if self.twoD_chemistry:
            if self.in_complementHmm is None:
                self.in_complementHmm = defaultModelFromVersion(
                    strand="complement",
                    version=version,
                    pop1_complement=pop1_complement)

        assert self.in_templateHmm is not None
        if self.twoD_chemistry:
            if self.in_complementHmm is None:
                self.failStop(
                    "[SignalAlignment.run]ERROR Need to have complement HMM for 2D analysis",
                    npRead)
                return False

        template_model_flag = "-T {} ".format(self.in_templateHmm)
        if self.twoD_chemistry:
            complement_model_flag = "-C {} ".format(self.in_complementHmm)
        else:
            complement_model_flag = ""

        print(
            "[SignalALignment.run]NOTICE: template model {t} complement model {c}"
            "".format(t=self.in_templateHmm, c=self.in_complementHmm),
            file=sys.stderr)

        # reference sequences
        assert self.reference_map[
            guide_alignment.reference_name]["forward"] is not None
        assert self.reference_map[
            guide_alignment.reference_name]["backward"] is not None
        forward_reference = self.reference_map[
            guide_alignment.reference_name]["forward"]
        backward_reference = self.reference_map[
            guide_alignment.reference_name]["backward"]
        assert os.path.isfile(forward_reference)
        assert os.path.isfile(backward_reference)
        forward_ref_flag = "-f {f_ref} ".format(f_ref=forward_reference)
        backward_ref_flag = "-b {b_ref} ".format(b_ref=backward_reference)

        # input HDPs
        if (self.in_templateHdp is not None) or (self.in_complementHdp
                                                 is not None):
            hdp_flags = "-v {tHdp_loc} ".format(tHdp_loc=self.in_templateHdp)
            if self.twoD_chemistry and self.in_complementHdp is not None:
                hdp_flags += "-w {cHdp_loc} ".format(
                    cHdp_loc=self.in_complementHdp)
        else:
            hdp_flags = ""

        # threshold
        if self.threshold is not None:
            threshold_flag = "-D {threshold} ".format(threshold=self.threshold)
        else:
            threshold_flag = ""

        # diagonal expansion
        if self.diagonal_expansion is not None:
            diag_expansion_flag = "-x {expansion} ".format(
                expansion=self.diagonal_expansion)
        else:
            diag_expansion_flag = ""

        # constraint trim
        if self.constraint_trim is not None:
            trim_flag = "-m {trim} ".format(trim=self.constraint_trim)
        else:
            trim_flag = ""

        # output format
        if self.output_format not in self.output_formats.keys():
            self.failStop(
                "[SignalAlignment.run]ERROR illegal outpur format selected %s"
                % self.output_format)
            return False
        out_fmt = "-s {fmt} ".format(
            fmt=self.output_formats[self.output_format])

        # degenerate nucleotide information
        if self.degenerate is not None:
            degenerate_flag = "-o {} ".format(self.degenerate)
        else:
            degenerate_flag = ""

        if self.twoD_chemistry:
            twoD_flag = "--twoD"
        else:
            twoD_flag = ""
        # commands
        if get_expectations:
            template_expectations_file_path = self.destination + read_label + ".template.expectations"
            complement_expectations_file_path = self.destination + read_label + ".complement.expectations"

            command = \
                "{vA} {td} {degen}{sparse}{model}{f_ref}{b_ref} -q {npRead} " \
                "{t_model}{c_model}{thresh}{expansion}{trim} {hdp}-L {readLabel} -p {cigarFile} " \
                "-t {templateExpectations} -c {complementExpectations}"\
                .format(vA=path_to_signalAlign, model=stateMachineType_flag,
                        f_ref=forward_ref_flag, b_ref=backward_ref_flag, cigarFile=cigar_file_,
                        npRead=npRead_, readLabel=read_label, td=twoD_flag,
                        templateExpectations=template_expectations_file_path, hdp=hdp_flags,
                        complementExpectations=complement_expectations_file_path, t_model=template_model_flag,
                        c_model=complement_model_flag, thresh=threshold_flag, expansion=diag_expansion_flag,
                        trim=trim_flag, degen=degenerate_flag, sparse=out_fmt)
        else:
            print("read_label", read_label)
            command = \
                "{vA} {td} {degen}{sparse}{model}{f_ref}{b_ref} -q {npRead} " \
                "{t_model}{c_model}{thresh}{expansion}{trim} -p {cigarFile} " \
                "-u {posteriors} {hdp}-L {readLabel}"\
                .format(vA=path_to_signalAlign, model=stateMachineType_flag, sparse=out_fmt,
                        f_ref=forward_ref_flag, b_ref=backward_ref_flag, cigarFile=cigar_file_,
                        readLabel=read_label, npRead=npRead_, td=twoD_flag,
                        t_model=template_model_flag, c_model=complement_model_flag,
                        posteriors=posteriors_file_path, thresh=threshold_flag, expansion=diag_expansion_flag,
                        trim=trim_flag, hdp=hdp_flags, degen=degenerate_flag)

        # run
        print("signalAlign - running command: ",
              command,
              end="\n",
              file=sys.stderr)
        os.system(command)
        self.temp_folder.remove_folder()
        return True

    def prepare_oned(self, nanopore_read, oned_read_path):
        try:
            read_file = open(oned_read_path, "w")
            fastaWrite(fileHandleOrFile=read_file,
                       name=nanopore_read.read_label,
                       seq=nanopore_read.template_read)
            version = nanopore_read.version
            read_file.close()
            nanopore_read.close()
            return True, version, False
        except Exception:
            return False, None, False

    def prepare_twod(self, nanopore_read, twod_read_path):
        # check for table to make 'assembled' 2D alignment table fasta with
        if nanopore_read.has2D_alignment_table is False:
            nanopore_read.close()
            return False, None, False
        fasta_handle = open(twod_read_path, "w")
        fastaWrite(fileHandleOrFile=fasta_handle,
                   name=nanopore_read.read_label,
                   seq=nanopore_read.alignment_table_sequence)
        if nanopore_read.complement_model_id == "complement_median68pA_pop1.model":
            pop1_complement = True
        else:
            pop1_complement = False
        version = nanopore_read.version
        fasta_handle.close()
        nanopore_read.close()
        return True, version, pop1_complement

    def openTempFolder(self, temp_dir):
        self.temp_folder.open_folder("%s%s" % (self.destination, temp_dir))

    def addTempFilePath(self, path_to_add):
        return self.temp_folder.add_file_path(path_to_add)

    def failStop(self, message, nanopore_read=None):
        self.temp_folder.remove_folder()
        if nanopore_read is not None:
            nanopore_read.close()
        print(message, file=sys.stderr)
Beispiel #5
0
class SignalAlignment(object):
    def __init__(self,
                 in_fast5,
                 destination,
                 stateMachineType,
                 bwa_index,
                 in_templateHmm,
                 in_complementHmm,
                 in_templateHdp,
                 in_complementHdp,
                 threshold,
                 diagonal_expansion,
                 constraint_trim,
                 degenerate,
                 twoD_chemistry,
                 forward_reference,
                 backward_reference=None,
                 target_regions=None,
                 output_format="full",
                 embed=False,
                 event_table=False):
        self.in_fast5 = in_fast5  # fast5 file to align
        self.destination = destination  # place where the alignments go, should already exist
        self.stateMachineType = stateMachineType  # flag for signalMachine
        self.bwa_index = bwa_index  # index of reference sequence
        self.threshold = threshold  # min posterior probability to keep
        self.diagonal_expansion = diagonal_expansion  # alignment algorithm param
        self.constraint_trim = constraint_trim  # alignment algorithm param
        self.output_format = output_format  # smaller output files
        self.degenerate = degenerate  # set of nucleotides for degenerate characters
        self.twoD_chemistry = twoD_chemistry  # flag for 2D sequencing runs
        self.temp_folder = FolderHandler(
        )  # object for holding temporary files (non-toil)
        self.read_name = self.in_fast5.split(
            "/")[-1][:-6]  # get the name without the '.fast5'
        self.target_regions = target_regions
        self.output_formats = {"full": 0, "variantCaller": 1, "assignments": 2}
        self.embed = embed  # embed the output into the fast5 file
        self.event_table = event_table  # specify which event table to use to generate alignments
        self.backward_reference = backward_reference  # fasta path to backward reference if modified bases are used
        self.forward_reference = forward_reference  # fasta path to forward reference

        if (in_templateHmm is not None) and os.path.isfile(in_templateHmm):
            self.in_templateHmm = in_templateHmm
        else:
            self.in_templateHmm = None
        if (in_complementHmm is not None) and os.path.isfile(in_complementHmm):
            self.in_complementHmm = in_complementHmm
        else:
            self.in_complementHmm = None

        # similarly for HDPs
        if (in_templateHdp is not None) and os.path.isfile(in_templateHdp):
            self.in_templateHdp = in_templateHdp
        else:
            self.in_templateHdp = None
        if (in_complementHdp is not None) and os.path.isfile(in_complementHdp):
            self.in_complementHdp = in_complementHdp
        else:
            self.in_complementHdp = None

    def run(self, get_expectations=False):
        print("[SignalAlignment.run]INFO: Starting on {read}".format(
            read=self.in_fast5),
              file=sys.stderr)
        if get_expectations:
            assert self.in_templateHmm is not None and self.in_complementHmm is not None, \
                "Need HMM files for model training"
        # file checks
        if os.path.isfile(self.in_fast5) is False:
            print("[SignalAlignment.run]ERROR: Did not find .fast5 at{file}".
                  format(file=self.in_fast5))
            return False

        self.openTempFolder("tempFiles_%s" % self.read_name)
        npRead_ = self.addTempFilePath("temp_%s.npRead" % self.read_name)
        # TODO is this totally f****d for RNA because of 3'-5' mapping?
        npRead = NanoporeRead(fast_five_file=self.in_fast5,
                              twoD=self.twoD_chemistry,
                              event_table=self.event_table)
        fH = open(npRead_, "w")
        ok = npRead.Write(parent_job=None, out_file=fH, initialize=True)
        fH.close()
        if not ok:
            self.failStop(
                "[SignalAlignment.run]File: %s did not pass initial checks" %
                self.read_name, npRead)
            return False

        read_label = npRead.read_label  # use this to identify the read throughout
        read_fasta_ = self.addTempFilePath("temp_seq_%s.fa" % read_label)
        temp_samfile_ = self.addTempFilePath("temp_sam_file_%s.sam" %
                                             read_label)
        cigar_file_ = self.addTempFilePath("temp_cigar_%s.txt" % read_label)
        if self.twoD_chemistry:
            ok, version, pop1_complement = self.prepare_twod(
                nanopore_read=npRead, twod_read_path=read_fasta_)
        else:
            ok, version, _ = self.prepare_oned(nanopore_read=npRead,
                                               oned_read_path=read_fasta_)
            pop1_complement = None
        # add an indicator for the model being used
        if self.stateMachineType == "threeState":
            model_label = ".sm"
            stateMachineType_flag = ""
        elif self.stateMachineType == "threeStateHdp":
            model_label = ".sm3Hdp"
            stateMachineType_flag = "--sm3Hdp "
            if self.twoD_chemistry:
                assert (self.in_templateHdp
                        is not None) and (self.in_complementHdp
                                          is not None), "Need to provide HDPs"
            else:
                assert self.in_templateHdp is not None, "Need to provide Template HDP"
        else:  # make invalid stateMachine control?
            model_label = ".sm"
            stateMachineType_flag = ""

        guide_alignment = generateGuideAlignment(
            bwa_index=self.bwa_index,
            query=read_fasta_,
            temp_sam_path=temp_samfile_,
            target_regions=self.target_regions)
        # ok = guide_alignment.validate(list(self.reference_map.keys()))
        ok = guide_alignment.validate()

        if not ok:
            self.failStop("[SignalAlignment.run]ERROR getting guide alignment",
                          npRead)
            return False

        cig_handle = open(cigar_file_, "w")
        cig_handle.write(guide_alignment.cigar + "\n")
        cig_handle.close()

        # next section makes the output file name with the format: /directory/for/files/file.model.orientation.tsv
        posteriors_file_path = ''
        # forward strand
        if guide_alignment.strand == "+":
            if self.output_format == "full":
                posteriors_file_path = self.destination + read_label + model_label + ".forward.tsv"
            elif self.output_format == "variantCaller":
                posteriors_file_path = self.destination + read_label + model_label + ".tsv"
            else:
                posteriors_file_path = self.destination + read_label + model_label + ".assignments"

        # backward strand
        if guide_alignment.strand == "-":
            if self.output_format == "full":
                posteriors_file_path = self.destination + read_label + model_label + ".backward.tsv"
            elif self.output_format == "variantCaller":
                posteriors_file_path = self.destination + read_label + model_label + ".tsv"
            else:
                posteriors_file_path = self.destination + read_label + model_label + ".assignments"

        # Alignment/Expectations routine
        path_to_signalAlign = "./signalMachine"

        # flags

        # input (match) models
        if self.in_templateHmm is None:
            self.in_templateHmm = defaultModelFromVersion(strand="template",
                                                          version=version)
        if self.twoD_chemistry:
            if self.in_complementHmm is None:
                self.in_complementHmm = defaultModelFromVersion(
                    strand="complement",
                    version=version,
                    pop1_complement=pop1_complement)

        assert self.in_templateHmm is not None
        if self.twoD_chemistry:
            if self.in_complementHmm is None:
                self.failStop(
                    "[SignalAlignment.run]ERROR Need to have complement HMM for 2D analysis",
                    npRead)
                return False

        template_model_flag = "-T {} ".format(self.in_templateHmm)
        if self.twoD_chemistry:
            complement_model_flag = "-C {} ".format(self.in_complementHmm)
        else:
            complement_model_flag = ""

        print(
            "[SignalALignment.run]NOTICE: template model {t} complement model {c}"
            "".format(t=self.in_templateHmm, c=self.in_complementHmm),
            file=sys.stderr)

        # reference sequences
        assert os.path.isfile(self.forward_reference)
        forward_ref_flag = "-f {f_ref} ".format(f_ref=self.forward_reference)
        if self.backward_reference:
            assert os.path.isfile(self.backward_reference)
            backward_ref_flag = "-b {b_ref} ".format(
                b_ref=self.backward_reference)
        else:
            backward_ref_flag = ""

        # input HDPs
        if (self.in_templateHdp is not None) or (self.in_complementHdp
                                                 is not None):
            hdp_flags = "-v {tHdp_loc} ".format(tHdp_loc=self.in_templateHdp)
            if self.twoD_chemistry and self.in_complementHdp is not None:
                hdp_flags += "-w {cHdp_loc} ".format(
                    cHdp_loc=self.in_complementHdp)
        else:
            hdp_flags = ""

        # threshold
        if self.threshold is not None:
            threshold_flag = "-D {threshold} ".format(threshold=self.threshold)
        else:
            threshold_flag = ""

        # diagonal expansion
        if self.diagonal_expansion is not None:
            diag_expansion_flag = "-x {expansion} ".format(
                expansion=self.diagonal_expansion)
        else:
            diag_expansion_flag = ""

        # constraint trim
        if self.constraint_trim is not None:
            trim_flag = "-m {trim} ".format(trim=self.constraint_trim)
        else:
            trim_flag = ""

        # output format
        if self.output_format not in list(self.output_formats.keys()):
            self.failStop(
                "[SignalAlignment.run]ERROR illegal output format selected %s"
                % self.output_format)
            return False
        out_fmt = "-s {fmt} ".format(
            fmt=self.output_formats[self.output_format])

        # degenerate nucleotide information
        if self.degenerate is not None:
            degenerate_flag = "-o {} ".format(self.degenerate)
        else:
            degenerate_flag = ""

        if self.twoD_chemistry:
            twoD_flag = "--twoD"
        else:
            twoD_flag = ""
        # commands
        if get_expectations:
            template_expectations_file_path = self.destination + read_label + ".template.expectations"
            complement_expectations_file_path = self.destination + read_label + ".complement.expectations"

            command = \
                "{vA} {td} {degen}{sparse}{model} -q {npRead} " \
                "{t_model}{c_model}{thresh}{expansion}{trim} {hdp}-L {readLabel} -p {cigarFile} " \
                "-t {templateExpectations} -c {complementExpectations} -n {seq_name} {f_ref_fa} {b_ref_fa}" \
                    .format(vA=path_to_signalAlign, model=stateMachineType_flag,
                            cigarFile=cigar_file_,
                            npRead=npRead_, readLabel=read_label, td=twoD_flag,
                            templateExpectations=template_expectations_file_path, hdp=hdp_flags,
                            complementExpectations=complement_expectations_file_path, t_model=template_model_flag,
                            c_model=complement_model_flag, thresh=threshold_flag, expansion=diag_expansion_flag,
                            trim=trim_flag, degen=degenerate_flag, sparse=out_fmt, seq_name=guide_alignment.reference_name,
                            f_ref_fa=forward_ref_flag, b_ref_fa=backward_ref_flag)
        else:
            command = \
                "{vA} {td} {degen}{sparse}{model} -q {npRead} " \
                "{t_model}{c_model}{thresh}{expansion}{trim} -p {cigarFile} " \
                "-u {posteriors} {hdp}-L {readLabel} -n {seq_name} {f_ref_fa} {b_ref_fa}" \
                    .format(vA=path_to_signalAlign, model=stateMachineType_flag, sparse=out_fmt,
                            cigarFile=cigar_file_,
                            readLabel=read_label, npRead=npRead_, td=twoD_flag,
                            t_model=template_model_flag, c_model=complement_model_flag,
                            posteriors=posteriors_file_path, thresh=threshold_flag, expansion=diag_expansion_flag,
                            trim=trim_flag, hdp=hdp_flags, degen=degenerate_flag, seq_name=guide_alignment.reference_name,
                            f_ref_fa=forward_ref_flag, b_ref_fa=backward_ref_flag)

        # run
        print("signalAlign - running command: ",
              command,
              end="\n",
              file=sys.stderr)
        os.system(command)
        if self.embed:
            print("signalAlign - embedding into Fast5 ", file=sys.stderr)

            data = self.read_in_signal_align_tsv(posteriors_file_path,
                                                 file_type=self.output_format)
            npRead = NanoporeRead(fast_five_file=self.in_fast5,
                                  twoD=self.twoD_chemistry,
                                  event_table=self.event_table)
            npRead.Initialize(None)
            signal_align_path = npRead.get_latest_basecall_edition(
                "/Analyses/SignalAlign_00{}", new=False)
            assert signal_align_path, "There is no path in Fast5 file: {}".format(
                "/Analyses/SignalAlign_00{}")
            output_path = npRead._join_path(signal_align_path,
                                            self.output_format)
            npRead.write_data(data, output_path)

            # Todo add attributes to signalalign output
            if self.output_format == "full":
                print("signalAlign - writing maximum expected alignment ",
                      file=sys.stderr)
                alignment = mea_alignment_from_signal_align(None, events=data)
                mae_path = npRead._join_path(signal_align_path,
                                             "MEA_alignment_labels")
                events = npRead.get_template_events()
                if events:
                    if guide_alignment.strand == "-":
                        minus = True
                    else:
                        minus = False
                    labels = match_events_with_signalalign(
                        sa_events=alignment,
                        event_detections=np.asanyarray(npRead.template_events),
                        minus=minus,
                        rna=npRead.is_read_rna())
                    npRead.write_data(labels, mae_path)
                    sam_string = str()
                    with open(temp_samfile_, 'r') as test:
                        for line in test:
                            sam_string += line
                    sam_path = npRead._join_path(signal_align_path, "sam")
                    # print(sam_string)
                    npRead.write_data(data=sam_string,
                                      location=sam_path,
                                      compression=None)

        # self.temp_folder.remove_folder()
        return True

    def prepare_oned(self, nanopore_read, oned_read_path):
        try:
            read_file = open(oned_read_path, "w")
            fastaWrite(fileHandleOrFile=read_file,
                       name=nanopore_read.read_label,
                       seq=nanopore_read.template_read)

            version = nanopore_read.version
            read_file.close()

            return True, version, False
        except Exception as e:
            return False, None, False

    def prepare_twod(self, nanopore_read, twod_read_path):
        # check for table to make 'assembled' 2D alignment table fasta with
        if nanopore_read.has2D_alignment_table is False:
            nanopore_read.close()
            return False, None, False
        fasta_handle = open(twod_read_path, "w")
        fastaWrite(fileHandleOrFile=fasta_handle,
                   name=nanopore_read.read_label,
                   seq=nanopore_read.alignment_table_sequence)
        if nanopore_read.complement_model_id == "complement_median68pA_pop1.model":
            pop1_complement = True
        else:
            pop1_complement = False
        version = nanopore_read.version
        fasta_handle.close()
        nanopore_read.close()
        return True, version, pop1_complement

    def openTempFolder(self, temp_dir):
        self.temp_folder.open_folder("%s%s" % (self.destination, temp_dir))

    def addTempFilePath(self, path_to_add):
        return self.temp_folder.add_file_path(path_to_add)

    def failStop(self, message, nanopore_read=None):
        self.temp_folder.remove_folder()
        if nanopore_read is not None:
            nanopore_read.close()
        print(message, file=sys.stderr)

    def read_in_signal_align_tsv(self, tsv_path, file_type):
        """Read in tsv file"""
        assert file_type in ("full", "assignments", "variantCaller")
        with open(tsv_path, 'r') as tsvin:
            if file_type == "full":
                dtype = [('contig', 'S10'), ('reference_index', int),
                         ('reference_kmer', 'S5'), ('read_file', 'S57'),
                         ('strand', 'S1'), ('event_index', int),
                         ('event_mean', float), ('event_noise', float),
                         ('event_duration', float), ('aligned_kmer', 'S5'),
                         ('scaled_mean_current', float),
                         ('scaled_noise', float),
                         ('posterior_probability', float),
                         ('descaled_event_mean', float),
                         ('ont_model_mean', float), ('path_kmer', 'S5')]
            elif file_type == "assignments":
                dtype = [('k-mer', 'S10'), ('read_file', 'S57'),
                         ('descaled_event_mean', float),
                         ('posterior_probability', float)]

            else:
                dtype = [('event_index', int), ('reference_position', int),
                         ('base', 'S6'), ('posterior_probability', float),
                         ('strand', 'S1'), ('forward_mapped', int),
                         ('read_file', 'S57')]

            event_table = np.loadtxt(tsvin, dtype=dtype)

            def remove_field_name(a, name):
                names = list(a.dtype.names)
                if name in names:
                    names.remove(name)
                b = a[names]
                return b

            event_table = remove_field_name(event_table, "read_file")

        return event_table