def makeNanoporeRead(f5_path): # here we load the NanoporeRead and write it to a file np = NanoporeRead(fast_five_file=f5_path, twoD=False) # make this a config arg ok = np.Initialize(job) if not ok: return None _l = np.read_label tF = job.fileStore.getLocalTempFile() fH = open(tF, "w") ok = np.Write(job, fH, initialize=False) if not ok: fH.close() return None fH.close() # then we gzip it and deliver it to the readstore and return the ledger line fn = LocalFile(workdir=workdir, filename="%s.np.gz" % _l) fH = open(tF, "rb") gz = gzip.open(fn.fullpathGetter(), "wb") shutil.copyfileobj(fH, gz) fH.close() gz.close() try: deliverOutput(job, fn, readstore_dir) except RuntimeError: job.fileStore.logToMaster("[makeNanoporeReadsJobFunction]Read %s failed to upload" % _l) return None return (_l, "%s%s\n" % (readstore_dir, fn.filenameGetter()))
def run(self, get_expectations=False): print("[SignalAlignment.run]INFO: Starting on {read}".format( read=self.in_fast5), file=sys.stderr) if get_expectations: assert self.in_templateHmm is not None and self.in_complementHmm is not None,\ "Need HMM files for model training" # file checks if os.path.isfile(self.in_fast5) is False: print("[SignalAlignment.run]ERROR: Did not find .fast5 at{file}". format(file=self.in_fast5)) return False self.openTempFolder("tempFiles_%s" % self.read_name) npRead_ = self.addTempFilePath("temp_%s.npRead" % self.read_name) npRead = NanoporeRead(fast_five_file=self.in_fast5, twoD=self.twoD_chemistry) fH = open(npRead_, "w") ok = npRead.Write(parent_job=None, out_file=fH, initialize=True) fH.close() if not ok: self.failStop( "[SignalAlignment.run]File: %s did not pass initial checks" % self.read_name, npRead) return False read_label = npRead.read_label # use this to identify the read throughout read_fasta_ = self.addTempFilePath("temp_seq_%s.fa" % read_label) temp_samfile_ = self.addTempFilePath("temp_sam_file_%s.sam" % read_label) cigar_file_ = self.addTempFilePath("temp_cigar_%s.txt" % read_label) if self.twoD_chemistry: ok, version, pop1_complement = self.prepare_twod( nanopore_read=npRead, twod_read_path=read_fasta_) else: ok, version, _ = self.prepare_oned(nanopore_read=npRead, oned_read_path=read_fasta_) pop1_complement = None # add an indicator for the model being used if self.stateMachineType == "threeState": model_label = ".sm" stateMachineType_flag = "" elif self.stateMachineType == "threeStateHdp": model_label = ".sm3Hdp" stateMachineType_flag = "--sm3Hdp " if self.twoD_chemistry: assert (self.in_templateHdp is not None) and (self.in_complementHdp is not None), "Need to provide HDPs" else: assert self.in_templateHdp is not None, "Need to provide Template HDP" else: # make invalid stateMachine control? model_label = ".sm" stateMachineType_flag = "" guide_alignment = generateGuideAlignment( bwa_index=self.bwa_index, query=read_fasta_, temp_sam_path=temp_samfile_, target_regions=self.target_regions) ok = guide_alignment.validate(self.reference_map.keys()) if not ok: self.failStop("[SignalAlignment.run]ERROR getting guide alignment", npRead) return False cig_handle = open(cigar_file_, "w") cig_handle.write(guide_alignment.cigar + "\n") cig_handle.close() # next section makes the output file name with the format: /directory/for/files/file.model.orientation.tsv posteriors_file_path = '' # forward strand if guide_alignment.strand == "+": if self.output_format == "full": posteriors_file_path = self.destination + read_label + model_label + ".forward.tsv" elif self.output_format == "variantCaller": posteriors_file_path = self.destination + read_label + model_label + ".tsv" else: posteriors_file_path = self.destination + read_label + model_label + ".assignments" # backward strand if guide_alignment.strand == "-": if self.output_format == "full": posteriors_file_path = self.destination + read_label + model_label + ".backward.tsv" elif self.output_format == "variantCaller": posteriors_file_path = self.destination + read_label + model_label + ".tsv" else: posteriors_file_path = self.destination + read_label + model_label + ".assignments" # Alignment/Expectations routine path_to_signalAlign = "./signalMachine" # flags # input (match) models if self.in_templateHmm is None: self.in_templateHmm = defaultModelFromVersion(strand="template", version=version) if self.twoD_chemistry: if self.in_complementHmm is None: self.in_complementHmm = defaultModelFromVersion( strand="complement", version=version, pop1_complement=pop1_complement) assert self.in_templateHmm is not None if self.twoD_chemistry: if self.in_complementHmm is None: self.failStop( "[SignalAlignment.run]ERROR Need to have complement HMM for 2D analysis", npRead) return False template_model_flag = "-T {} ".format(self.in_templateHmm) if self.twoD_chemistry: complement_model_flag = "-C {} ".format(self.in_complementHmm) else: complement_model_flag = "" print( "[SignalALignment.run]NOTICE: template model {t} complement model {c}" "".format(t=self.in_templateHmm, c=self.in_complementHmm), file=sys.stderr) # reference sequences assert self.reference_map[ guide_alignment.reference_name]["forward"] is not None assert self.reference_map[ guide_alignment.reference_name]["backward"] is not None forward_reference = self.reference_map[ guide_alignment.reference_name]["forward"] backward_reference = self.reference_map[ guide_alignment.reference_name]["backward"] assert os.path.isfile(forward_reference) assert os.path.isfile(backward_reference) forward_ref_flag = "-f {f_ref} ".format(f_ref=forward_reference) backward_ref_flag = "-b {b_ref} ".format(b_ref=backward_reference) # input HDPs if (self.in_templateHdp is not None) or (self.in_complementHdp is not None): hdp_flags = "-v {tHdp_loc} ".format(tHdp_loc=self.in_templateHdp) if self.twoD_chemistry and self.in_complementHdp is not None: hdp_flags += "-w {cHdp_loc} ".format( cHdp_loc=self.in_complementHdp) else: hdp_flags = "" # threshold if self.threshold is not None: threshold_flag = "-D {threshold} ".format(threshold=self.threshold) else: threshold_flag = "" # diagonal expansion if self.diagonal_expansion is not None: diag_expansion_flag = "-x {expansion} ".format( expansion=self.diagonal_expansion) else: diag_expansion_flag = "" # constraint trim if self.constraint_trim is not None: trim_flag = "-m {trim} ".format(trim=self.constraint_trim) else: trim_flag = "" # output format if self.output_format not in self.output_formats.keys(): self.failStop( "[SignalAlignment.run]ERROR illegal outpur format selected %s" % self.output_format) return False out_fmt = "-s {fmt} ".format( fmt=self.output_formats[self.output_format]) # degenerate nucleotide information if self.degenerate is not None: degenerate_flag = "-o {} ".format(self.degenerate) else: degenerate_flag = "" if self.twoD_chemistry: twoD_flag = "--twoD" else: twoD_flag = "" # commands if get_expectations: template_expectations_file_path = self.destination + read_label + ".template.expectations" complement_expectations_file_path = self.destination + read_label + ".complement.expectations" command = \ "{vA} {td} {degen}{sparse}{model}{f_ref}{b_ref} -q {npRead} " \ "{t_model}{c_model}{thresh}{expansion}{trim} {hdp}-L {readLabel} -p {cigarFile} " \ "-t {templateExpectations} -c {complementExpectations}"\ .format(vA=path_to_signalAlign, model=stateMachineType_flag, f_ref=forward_ref_flag, b_ref=backward_ref_flag, cigarFile=cigar_file_, npRead=npRead_, readLabel=read_label, td=twoD_flag, templateExpectations=template_expectations_file_path, hdp=hdp_flags, complementExpectations=complement_expectations_file_path, t_model=template_model_flag, c_model=complement_model_flag, thresh=threshold_flag, expansion=diag_expansion_flag, trim=trim_flag, degen=degenerate_flag, sparse=out_fmt) else: print("read_label", read_label) command = \ "{vA} {td} {degen}{sparse}{model}{f_ref}{b_ref} -q {npRead} " \ "{t_model}{c_model}{thresh}{expansion}{trim} -p {cigarFile} " \ "-u {posteriors} {hdp}-L {readLabel}"\ .format(vA=path_to_signalAlign, model=stateMachineType_flag, sparse=out_fmt, f_ref=forward_ref_flag, b_ref=backward_ref_flag, cigarFile=cigar_file_, readLabel=read_label, npRead=npRead_, td=twoD_flag, t_model=template_model_flag, c_model=complement_model_flag, posteriors=posteriors_file_path, thresh=threshold_flag, expansion=diag_expansion_flag, trim=trim_flag, hdp=hdp_flags, degen=degenerate_flag) # run print("signalAlign - running command: ", command, end="\n", file=sys.stderr) os.system(command) self.temp_folder.remove_folder() return True
def run(self): print("[SignalAlignment.run] INFO: Starting on {read}".format( read=self.in_fast5)) if self.get_expectations: assert self.in_templateHmm is not None, "Need template HMM files for model training" if self.twoD_chemistry: assert self.in_complementHmm is not None, "Need compement HMM files for model training" if not os.path.isfile(self.in_fast5): print("[SignalAlignment.run] ERROR: Did not find .fast5 at{file}". format(file=self.in_fast5)) return False # prep self.openTempFolder("tempFiles_%s" % self.read_name) if self.twoD_chemistry: npRead = NanoporeRead2D(fast_five_file=self.in_fast5, event_table=self.event_table, initialize=True) else: npRead = NanoporeRead(fast_five_file=self.in_fast5, event_table=self.event_table, initialize=True) #todo need to validate / generate events and nucleotide read # read label read_label = npRead.read_label # use this to identify the read throughout self.read_label = read_label # nanopore read (event table, etc) npRead_ = self.addTempFilePath("temp_%s.npRead" % self.read_name) if not (self.check_for_temp_file_existance and os.path.isfile(npRead_)): # TODO is this totally f****d for RNA because of 3'-5' mapping? fH = open(npRead_, "w") ok = npRead.Write(out_file=fH, initialize=True) fH.close() if not ok: self.failStop( "[SignalAlignment.run] File: %s did not pass initial checks" % self.read_name, npRead) return False # nucleotide read read_fasta_ = self.addTempFilePath("temp_seq_%s.fa" % read_label) ok = self.write_nucleotide_read(npRead, read_fasta_) if not ok: print( "[SignalAlignment.run] Failed to write nucleotide read. Continuing execution." ) # alignment info cigar_file_ = self.addTempFilePath("temp_cigar_%s.txt" % read_label) temp_samfile_ = self.addTempFilePath("temp_sam_file_%s.sam" % read_label) strand = None reference_name = None if not (self.check_for_temp_file_existance and os.path.isfile(cigar_file_)): # need guide alignment to generate cigar file guide_alignment = None # get from alignment file if self.alignment_file is not None: guide_alignment = getGuideAlignmentFromAlignmentFile( self.alignment_file, read_name=read_label) if guide_alignment is None: print( "[SignalAlignment.run] read {} not found in {}".format( read_label, self.alignment_file)) # get from bwa if guide_alignment is None and self.bwa_reference is not None: guide_alignment = generateGuideAlignment( reference_fasta=self.bwa_reference, query=read_fasta_, temp_sam_path=temp_samfile_, target_regions=self.target_regions) if guide_alignment is None: print( "[SignalAlignment.run] read {} could not be aligned with BWA" .format(read_label)) # could not map if guide_alignment is None: self.failStop( "[SignalAlignment.run] ERROR getting guide alignment", npRead) return False # ensure valid if not guide_alignment.validate(): self.failStop( "[SignalAlignment.run] ERROR invalid guide alignment", npRead) return False strand = guide_alignment.strand reference_name = guide_alignment.reference_name # write cigar to file cig_handle = open(cigar_file_, "w") cig_handle.write(guide_alignment.cigar + "\n") cig_handle.close() # otherwise, get strand from file else: strand, reference_name = getInfoFromCigarFile(cigar_file_) # add an indicator for the model being used if self.stateMachineType == "threeState": model_label = ".sm" stateMachineType_flag = "" elif self.stateMachineType == "threeStateHdp": model_label = ".sm3Hdp" stateMachineType_flag = "--sm3Hdp " if self.twoD_chemistry: assert (self.in_templateHdp is not None) and (self.in_complementHdp is not None), "Need to provide HDPs" else: assert self.in_templateHdp is not None, "Need to provide Template HDP" else: # make invalid stateMachine control? model_label = ".sm" stateMachineType_flag = "" # next section makes the output file name with the format: /directory/for/files/file.model.orientation.tsv # forward strand if strand == "+": if self.output_format == "full": posteriors_file_path = os.path.join( self.destination, read_label + model_label + ".forward.tsv") elif self.output_format == "variantCaller": posteriors_file_path = os.path.join( self.destination, read_label + model_label + ".tsv") else: posteriors_file_path = os.path.join( self.destination, read_label + model_label + ".assignments.tsv") # backward strand elif strand == "-": if self.output_format == "full": posteriors_file_path = os.path.join( self.destination, read_label + model_label + ".backward.tsv") elif self.output_format == "variantCaller": posteriors_file_path = os.path.join( self.destination, read_label + model_label + ".tsv") else: posteriors_file_path = os.path.join( self.destination, read_label + model_label + ".assignments.tsv") # sanity check else: self.failStop( "[SignalAlignment.run] ERROR Unexpected strand {}".format( strand), npRead) return False # flags # input (match) models if self.in_templateHmm is None: self.in_templateHmm = defaultModelFromVersion( strand="template", version=npRead.version) if self.twoD_chemistry and self.in_complementHmm is None: pop1_complement = npRead.complement_model_id == "complement_median68pA_pop1.model" self.in_complementHmm = defaultModelFromVersion( strand="complement", version=npRead.version, pop1_complement=pop1_complement) assert self.in_templateHmm is not None if self.twoD_chemistry: if self.in_complementHmm is None: self.failStop( "[SignalAlignment.run] ERROR Need to have complement HMM for 2D analysis", npRead) return False template_model_flag = "-T {} ".format(self.in_templateHmm) if self.twoD_chemistry: complement_model_flag = "-C {} ".format(self.in_complementHmm) else: complement_model_flag = "" print( "[SignalAlignment.run] NOTICE: template model {t} complement model {c}" "".format(t=self.in_templateHmm, c=self.in_complementHmm)) # reference sequences assert os.path.isfile(self.forward_reference) forward_ref_flag = "-f {f_ref} ".format(f_ref=self.forward_reference) if self.backward_reference: assert os.path.isfile(self.backward_reference) backward_ref_flag = "-b {b_ref} ".format( b_ref=self.backward_reference) else: backward_ref_flag = "" # input HDPs if (self.in_templateHdp is not None) or (self.in_complementHdp is not None): hdp_flags = "-v {tHdp_loc} ".format(tHdp_loc=self.in_templateHdp) if self.twoD_chemistry and self.in_complementHdp is not None: hdp_flags += "-w {cHdp_loc} ".format( cHdp_loc=self.in_complementHdp) else: hdp_flags = "" # threshold if self.threshold is not None: threshold_flag = "-D {threshold} ".format(threshold=self.threshold) else: threshold_flag = "" # diagonal expansion if self.diagonal_expansion is not None: diag_expansion_flag = "-x {expansion} ".format( expansion=self.diagonal_expansion) else: diag_expansion_flag = "" # constraint trim if self.constraint_trim is not None: trim_flag = "-m {trim} ".format(trim=self.constraint_trim) else: trim_flag = "" # output format if self.output_format not in list(self.output_formats.keys()): self.failStop( "[SignalAlignment.run] ERROR illegal output format selected %s" % self.output_format) return False out_fmt = "-s {fmt} ".format( fmt=self.output_formats[self.output_format]) # degenerate nucleotide information if self.degenerate is not None: degenerate_flag = "-o {} ".format(self.degenerate) else: degenerate_flag = "" # twoD flag if self.twoD_chemistry: twoD_flag = "--twoD" else: twoD_flag = "" # commands if self.get_expectations: template_expectations_file_path = os.path.join( self.destination, read_label + ".template.expectations.tsv") complement_expectations_file_path = os.path.join( self.destination, read_label + ".complement.expectations.tsv") command = \ "{vA} {td} {degen}{sparse}{model} -q {npRead} " \ "{t_model}{c_model}{thresh}{expansion}{trim} {hdp}-L {readLabel} -p {cigarFile} " \ "-t {templateExpectations} -c {complementExpectations} -n {seq_name} {f_ref_fa} {b_ref_fa}" \ .format(vA=self.path_to_signalMachine, model=stateMachineType_flag, cigarFile=cigar_file_, npRead=npRead_, readLabel=read_label, td=twoD_flag, templateExpectations=template_expectations_file_path, hdp=hdp_flags, complementExpectations=complement_expectations_file_path, t_model=template_model_flag, c_model=complement_model_flag, thresh=threshold_flag, expansion=diag_expansion_flag, trim=trim_flag, degen=degenerate_flag, sparse=out_fmt, seq_name=reference_name, f_ref_fa=forward_ref_flag, b_ref_fa=backward_ref_flag) else: command = \ "{vA} {td} {degen}{sparse}{model} -q {npRead} " \ "{t_model}{c_model}{thresh}{expansion}{trim} -p {cigarFile} " \ "-u {posteriors} {hdp}-L {readLabel} -n {seq_name} {f_ref_fa} {b_ref_fa}" \ .format(vA=self.path_to_signalMachine, model=stateMachineType_flag, sparse=out_fmt, cigarFile=cigar_file_, readLabel=read_label, npRead=npRead_, td=twoD_flag, t_model=template_model_flag, c_model=complement_model_flag, posteriors=posteriors_file_path, thresh=threshold_flag, expansion=diag_expansion_flag, trim=trim_flag, hdp=hdp_flags, degen=degenerate_flag, seq_name=reference_name, f_ref_fa=forward_ref_flag, b_ref_fa=backward_ref_flag) # run print("[SignalAlignment.run] running command: ", command, end="\n") try: command = command.split() if self.track_memory_usage: mem_command = ['/usr/bin/time', '-f', '\\nDEBUG_MAX_MEM:%M\\n'] print( "[SignalAlignment.run] Prepending command to track mem usage: {}" .format(mem_command)) mem_command.extend(command) command = mem_command output = subprocess.check_output(command, stderr=subprocess.STDOUT) output = str(output).split("\\n") for line in output: print("[SignalAlignment.run] {}: {}".format( read_label, line)) if line.startswith("DEBUG_MAX_MEM"): self.max_memory_usage_kb = int(line.split(":")[1]) except Exception as e: print( "[SignalAlignment.run] exception ({}) running signalAlign: {}". format(type(e), e)) raise e # save to fast5 file (if appropriate) if self.embed: print("[SignalAlignment.run] embedding into Fast5 ") data = self.read_in_signal_align_tsv(posteriors_file_path, file_type=self.output_format) npRead = NanoporeRead(fast_five_file=self.in_fast5, twoD=self.twoD_chemistry, event_table=self.event_table) npRead.Initialize(None) signal_align_path = npRead.get_latest_basecall_edition( "/Analyses/SignalAlign_00{}", new=False) assert signal_align_path, "There is no path in Fast5 file: {}".format( "/Analyses/SignalAlign_00{}") output_path = npRead._join_path(signal_align_path, self.output_format) npRead.write_data(data, output_path) # Todo add attributes to signalalign output if self.output_format == "full": print( "[SignalAlignment.run] writing maximum expected alignment " ) alignment = mea_alignment_from_signal_align(None, events=data) mae_path = npRead._join_path(signal_align_path, "MEA_alignment_labels") events = npRead.get_template_events() if events: if strand == "-": minus = True else: minus = False labels = match_events_with_signalalign( sa_events=alignment, event_detections=np.asanyarray(npRead.template_events), minus=minus, rna=npRead.is_read_rna()) npRead.write_data(labels, mae_path) sam_string = str() if os.path.isfile(temp_samfile_): with open(temp_samfile_, 'r') as test: for line in test: sam_string += line sam_path = npRead._join_path(signal_align_path, "sam") # print(sam_string) npRead.write_data(data=sam_string, location=sam_path, compression=None) # self.temp_folder.remove_folder() return True
def run(self, get_expectations=False): print("[SignalAlignment.run]INFO: Starting on {read}".format( read=self.in_fast5), file=sys.stderr) if get_expectations: assert self.in_templateHmm is not None and self.in_complementHmm is not None, \ "Need HMM files for model training" # file checks if os.path.isfile(self.in_fast5) is False: print("[SignalAlignment.run]ERROR: Did not find .fast5 at{file}". format(file=self.in_fast5)) return False self.openTempFolder("tempFiles_%s" % self.read_name) npRead_ = self.addTempFilePath("temp_%s.npRead" % self.read_name) # TODO is this totally f****d for RNA because of 3'-5' mapping? npRead = NanoporeRead(fast_five_file=self.in_fast5, twoD=self.twoD_chemistry, event_table=self.event_table) fH = open(npRead_, "w") ok = npRead.Write(parent_job=None, out_file=fH, initialize=True) fH.close() if not ok: self.failStop( "[SignalAlignment.run]File: %s did not pass initial checks" % self.read_name, npRead) return False read_label = npRead.read_label # use this to identify the read throughout read_fasta_ = self.addTempFilePath("temp_seq_%s.fa" % read_label) temp_samfile_ = self.addTempFilePath("temp_sam_file_%s.sam" % read_label) cigar_file_ = self.addTempFilePath("temp_cigar_%s.txt" % read_label) if self.twoD_chemistry: ok, version, pop1_complement = self.prepare_twod( nanopore_read=npRead, twod_read_path=read_fasta_) else: ok, version, _ = self.prepare_oned(nanopore_read=npRead, oned_read_path=read_fasta_) pop1_complement = None # add an indicator for the model being used if self.stateMachineType == "threeState": model_label = ".sm" stateMachineType_flag = "" elif self.stateMachineType == "threeStateHdp": model_label = ".sm3Hdp" stateMachineType_flag = "--sm3Hdp " if self.twoD_chemistry: assert (self.in_templateHdp is not None) and (self.in_complementHdp is not None), "Need to provide HDPs" else: assert self.in_templateHdp is not None, "Need to provide Template HDP" else: # make invalid stateMachine control? model_label = ".sm" stateMachineType_flag = "" guide_alignment = generateGuideAlignment( bwa_index=self.bwa_index, query=read_fasta_, temp_sam_path=temp_samfile_, target_regions=self.target_regions) # ok = guide_alignment.validate(list(self.reference_map.keys())) ok = guide_alignment.validate() if not ok: self.failStop("[SignalAlignment.run]ERROR getting guide alignment", npRead) return False cig_handle = open(cigar_file_, "w") cig_handle.write(guide_alignment.cigar + "\n") cig_handle.close() # next section makes the output file name with the format: /directory/for/files/file.model.orientation.tsv posteriors_file_path = '' # forward strand if guide_alignment.strand == "+": if self.output_format == "full": posteriors_file_path = self.destination + read_label + model_label + ".forward.tsv" elif self.output_format == "variantCaller": posteriors_file_path = self.destination + read_label + model_label + ".tsv" else: posteriors_file_path = self.destination + read_label + model_label + ".assignments" # backward strand if guide_alignment.strand == "-": if self.output_format == "full": posteriors_file_path = self.destination + read_label + model_label + ".backward.tsv" elif self.output_format == "variantCaller": posteriors_file_path = self.destination + read_label + model_label + ".tsv" else: posteriors_file_path = self.destination + read_label + model_label + ".assignments" # Alignment/Expectations routine path_to_signalAlign = "./signalMachine" # flags # input (match) models if self.in_templateHmm is None: self.in_templateHmm = defaultModelFromVersion(strand="template", version=version) if self.twoD_chemistry: if self.in_complementHmm is None: self.in_complementHmm = defaultModelFromVersion( strand="complement", version=version, pop1_complement=pop1_complement) assert self.in_templateHmm is not None if self.twoD_chemistry: if self.in_complementHmm is None: self.failStop( "[SignalAlignment.run]ERROR Need to have complement HMM for 2D analysis", npRead) return False template_model_flag = "-T {} ".format(self.in_templateHmm) if self.twoD_chemistry: complement_model_flag = "-C {} ".format(self.in_complementHmm) else: complement_model_flag = "" print( "[SignalALignment.run]NOTICE: template model {t} complement model {c}" "".format(t=self.in_templateHmm, c=self.in_complementHmm), file=sys.stderr) # reference sequences assert os.path.isfile(self.forward_reference) forward_ref_flag = "-f {f_ref} ".format(f_ref=self.forward_reference) if self.backward_reference: assert os.path.isfile(self.backward_reference) backward_ref_flag = "-b {b_ref} ".format( b_ref=self.backward_reference) else: backward_ref_flag = "" # input HDPs if (self.in_templateHdp is not None) or (self.in_complementHdp is not None): hdp_flags = "-v {tHdp_loc} ".format(tHdp_loc=self.in_templateHdp) if self.twoD_chemistry and self.in_complementHdp is not None: hdp_flags += "-w {cHdp_loc} ".format( cHdp_loc=self.in_complementHdp) else: hdp_flags = "" # threshold if self.threshold is not None: threshold_flag = "-D {threshold} ".format(threshold=self.threshold) else: threshold_flag = "" # diagonal expansion if self.diagonal_expansion is not None: diag_expansion_flag = "-x {expansion} ".format( expansion=self.diagonal_expansion) else: diag_expansion_flag = "" # constraint trim if self.constraint_trim is not None: trim_flag = "-m {trim} ".format(trim=self.constraint_trim) else: trim_flag = "" # output format if self.output_format not in list(self.output_formats.keys()): self.failStop( "[SignalAlignment.run]ERROR illegal output format selected %s" % self.output_format) return False out_fmt = "-s {fmt} ".format( fmt=self.output_formats[self.output_format]) # degenerate nucleotide information if self.degenerate is not None: degenerate_flag = "-o {} ".format(self.degenerate) else: degenerate_flag = "" if self.twoD_chemistry: twoD_flag = "--twoD" else: twoD_flag = "" # commands if get_expectations: template_expectations_file_path = self.destination + read_label + ".template.expectations" complement_expectations_file_path = self.destination + read_label + ".complement.expectations" command = \ "{vA} {td} {degen}{sparse}{model} -q {npRead} " \ "{t_model}{c_model}{thresh}{expansion}{trim} {hdp}-L {readLabel} -p {cigarFile} " \ "-t {templateExpectations} -c {complementExpectations} -n {seq_name} {f_ref_fa} {b_ref_fa}" \ .format(vA=path_to_signalAlign, model=stateMachineType_flag, cigarFile=cigar_file_, npRead=npRead_, readLabel=read_label, td=twoD_flag, templateExpectations=template_expectations_file_path, hdp=hdp_flags, complementExpectations=complement_expectations_file_path, t_model=template_model_flag, c_model=complement_model_flag, thresh=threshold_flag, expansion=diag_expansion_flag, trim=trim_flag, degen=degenerate_flag, sparse=out_fmt, seq_name=guide_alignment.reference_name, f_ref_fa=forward_ref_flag, b_ref_fa=backward_ref_flag) else: command = \ "{vA} {td} {degen}{sparse}{model} -q {npRead} " \ "{t_model}{c_model}{thresh}{expansion}{trim} -p {cigarFile} " \ "-u {posteriors} {hdp}-L {readLabel} -n {seq_name} {f_ref_fa} {b_ref_fa}" \ .format(vA=path_to_signalAlign, model=stateMachineType_flag, sparse=out_fmt, cigarFile=cigar_file_, readLabel=read_label, npRead=npRead_, td=twoD_flag, t_model=template_model_flag, c_model=complement_model_flag, posteriors=posteriors_file_path, thresh=threshold_flag, expansion=diag_expansion_flag, trim=trim_flag, hdp=hdp_flags, degen=degenerate_flag, seq_name=guide_alignment.reference_name, f_ref_fa=forward_ref_flag, b_ref_fa=backward_ref_flag) # run print("signalAlign - running command: ", command, end="\n", file=sys.stderr) os.system(command) if self.embed: print("signalAlign - embedding into Fast5 ", file=sys.stderr) data = self.read_in_signal_align_tsv(posteriors_file_path, file_type=self.output_format) npRead = NanoporeRead(fast_five_file=self.in_fast5, twoD=self.twoD_chemistry, event_table=self.event_table) npRead.Initialize(None) signal_align_path = npRead.get_latest_basecall_edition( "/Analyses/SignalAlign_00{}", new=False) assert signal_align_path, "There is no path in Fast5 file: {}".format( "/Analyses/SignalAlign_00{}") output_path = npRead._join_path(signal_align_path, self.output_format) npRead.write_data(data, output_path) # Todo add attributes to signalalign output if self.output_format == "full": print("signalAlign - writing maximum expected alignment ", file=sys.stderr) alignment = mea_alignment_from_signal_align(None, events=data) mae_path = npRead._join_path(signal_align_path, "MEA_alignment_labels") events = npRead.get_template_events() if events: if guide_alignment.strand == "-": minus = True else: minus = False labels = match_events_with_signalalign( sa_events=alignment, event_detections=np.asanyarray(npRead.template_events), minus=minus, rna=npRead.is_read_rna()) npRead.write_data(labels, mae_path) sam_string = str() with open(temp_samfile_, 'r') as test: for line in test: sam_string += line sam_path = npRead._join_path(signal_align_path, "sam") # print(sam_string) npRead.write_data(data=sam_string, location=sam_path, compression=None) # self.temp_folder.remove_folder() return True