def pre_begin_file(self): self.vars = [] self.items = align.Align(titles=1) self.items.add(['# MN', 'HR', 'DOM', 'MON', 'DOW', 'COMMAND_____']) alignment = 'aaaccl' self.items.set_title_alignment(alignment) self.items.set_alignment(alignment) return
def pre_begin_file(self): self.vars = [] self.settings = align.Align(titles=1) self.settings.add([ '# NAME', 'VALUE', ]) alignment = 'll' self.settings.set_title_alignment(alignment) self.settings.set_alignment(alignment) # self.actions = align.Align(titles=1) self.actions.add(['# ID', 'ACTION']) alignment = 'rl' self.actions.set_title_alignment(alignment) self.actions.set_alignment(alignment) return
def report(self, final=False): if not final: self.lines.sort() a = align.Align() for parts in self.lines: a.add(parts) for _, parts in a.get_items(): self.println(' '.join(parts)) return
def find_faces(self, image): faces = [] # aligned_list, bboxes = align.Align(image) if aligned_list and bboxes: for bb, cropped_face in zip(bboxes, aligned_list): face = Face() face.container_image = image face.bounding_box = np.zeros(4, dtype=np.int32) # 获取bbox for i in range(0, 4): face.bounding_box[i] = bb[i] # 获取矫正后的人脸 # new_image = np.transpose(cropped_face, (1, 2, 0))[:, :, ::-1] # out = Image.fromarray(new_image) # out = out.resize((112, 112)) # out = np.asarray(out) # face.image = out face.image = cropped_face faces.append(face) return faces
def main(): """Main body of the script.""" # input / output variables musclepath = '/Users/jbloom/muscle3.8/' outdir = 'redesigned_sequences' # output directory remove_targets = { # targets for removing motifs # name : (out name, sequence file, comparison file, codonmotifs) 'PR8-NP': ('PR8-NP-lowCTG', 'sequences/PR8-NP-cDNA.fasta', 'comparison_sequence_sets/NP_aligned.fasta', ['GTG', 'ATG', 'CTG']) } addctg_targets = { # targets for adding CTG motifs in frame 1 # name : (out name, sequence file, comparison file) 'PR8-NP-lowCTG' : ('PR8-NP-highCTG', '%s/PR8-NP-lowCTG.fasta' % outdir, 'comparison_sequence_sets/NP_aligned.fasta'), } count_threshold = 100 # any new codons introduced must be found naturally in at least this many sequences spliced_segments = [ # (spliced name, first gene, its start, its end, second gene, its start, first exon end, second exon start, its end) with all numbering 1, 2, 3 from beginning of spliced gene ('PR8-M', 'PR8-M1', 1, 756, 'PR8-M2', 1, 27, 716, 979), ('PR8-NS', 'PR8-NS1', 1, 693, 'PR8-NS2', 1, 30, 503, 909), ] nonoverlap_range = { } # gives range of non-overlapping reading frame portion for tup in spliced_segments: nonoverlap_range[tup[1]] = (tup[6], tup[7]) s2length = tup[8] - tup[7] + tup[6] - tup[5] nonoverlap_range[tup[4]] = (max(tup[6], tup[3] - tup[7]), s2length) # begin removing motifs for (name, (outname, seqfile, comparisonfile, codonmotifs)) in remove_targets.iteritems(): print "\nRedesigning %s to create %s." % (name, outname) originalseq = fasta.Read(seqfile) assert len(originalseq) == 1 originalseq = originalseq[0] (head, seq) = originalseq seq = seq.upper() assert len(seq) % 3 == 0 ncodons = len(seq) / 3 comparison_seqs = fasta.Read(comparisonfile) print "Will use %d comparison sequences for the redesign." % len( comparison_seqs) for codonmotif in codonmotifs: if len(codonmotif) != 3: raise IOError( "Method currently only works for 3-nucleotide motifs.") for icodon in range(1, ncodons + 1): for iframe in [3, 2, 1]: if icodon == ncodons and iframe in [2, 3]: continue # don't look past the end of the last codon index = 3 * (icodon - 1) + iframe - 1 if seq[index:index + 3] == codonmotif: newseq = RemoveCodonMotif(seq, codonmotif, icodon, iframe, comparison_seqs, count_threshold) assert fasta.Translate([ ('newseq', newseq) ])[0][1] == fasta.Translate([('seq', seq)])[0][ 1], "icodon = %d, iframe = %d, seq[index : index + 3] = %s, newseq[index : index + 3] = %s, seq[3 * (icodon - 1) : 3 * (icodon + 1)] = %s, newseq[3 * (icodon - 1) : 3 * (icodon + 1)] = %s" % ( icodon, iframe, seq[index:index + 3], newseq[index:index + 3], seq[3 * (icodon - 1):3 * (icodon + 1)], newseq[3 * (icodon - 1):3 * (icodon + 1)]) seq = newseq # if the gene has overlapping reading frames, only use recoded portions of non-overlapping if name in nonoverlap_range: print "Adjusting redesign of %s to account for the overlapping reading frames." % name (start_no, end_no) = nonoverlap_range[name] (startcodon_no, endcodon_no) = (start_no / 3 + 1, end_no / 3 - 1) seq = "%s%s%s" % (originalseq[1][:3 * startcodon_no], seq[3 * startcodon_no:3 * endcodon_no], originalseq[1][3 * endcodon_no:]) assert fasta.Translate([originalseq])[0][1] == fasta.Translate([ ('seq', seq) ])[0][1] # make sure the protein sequence is unchanged header = "%s: %s redesigned to eliminate possible alternative start motifs. The protein sequence is unchanged, and codons are only used if they occur in %d of %d natural sequences." % ( outname, name, count_threshold, len(comparison_seqs)) for codonmotif in codonmotifs: for iframe in [1, 2, 3]: s = "%s in frame %d reduced from %d to %d." % ( codonmotif, iframe, CountMotifsInFrame(originalseq[1], codonmotif, iframe), CountMotifsInFrame(seq, codonmotif, iframe)) print s header = "%s %s" % (header, s) outfile = '%s/%s.fasta' % (outdir, outname) print "Writing %s to %s." % (outname, outfile) fasta.Write([(header, seq)], outfile) # now add CTG motifs targetcodon = 'CTG' targetaa = fasta.Translate([('codon', targetcodon)])[0][1] for (name, (outname, seqfile, comparisonfile)) in addctg_targets.iteritems(): print "\nRedesigning %s to create %s." % (name, outname) originalseq = fasta.Read(seqfile) assert len(originalseq) == 1 originalseq = originalseq[0] (head, seq) = originalseq seq = seq.upper() assert len(seq) % 3 == 0 ncodons = len(seq) / 3 comparison_seqs = fasta.Read(comparisonfile) print "Will use %d comparison sequences for the redesign." % len( comparison_seqs) for icodon in range(ncodons): codon = seq[icodon * 3:icodon * 3 + 3] aa = fasta.Translate([('codon', codon)])[0][1] if aa == targetaa and codon != targetcodon: # a replacement possibility ctgcounts = CountCodonOccurrences(comparison_seqs, icodon + 1, [targetcodon])[targetcodon] if ctgcounts >= count_threshold: # replacement possible newseq = "%s%s%s" % (seq[:icodon * 3], targetcodon, seq[icodon * 3 + 3:]) assert fasta.Translate([ ('newseq', newseq) ])[0][1] == fasta.Translate([ ('seq', seq) ])[0][1] # make sure the protein sequence is unchanged seq = newseq assert fasta.Translate([originalseq])[0][1] == fasta.Translate([ ('seq', seq) ])[0][1] # make sure the protein sequence is unchanged s = "%s in frame 1 increased from %d to %d." % ( targetcodon, CountMotifsInFrame(originalseq[1], codonmotif, 1), CountMotifsInFrame(seq, codonmotif, 1)) print s header = "%s: %s redesigned to add %s motifs in frame 1. The protein sequence is unchanged, and codons are only used if they occur in %d of %d natural sequences." % ( outname, name, targetcodon, count_threshold, len(comparison_seqs)) header = "%s %s" % (header, s) outfile = '%s/%s.fasta' % (outdir, outname) print "Writing %s to %s." % (outname, outfile) fasta.Write([(header, seq)], outfile) # write full alternatively spliced segment coding regions for (name, g1, g1_start, g1_end, g2, g2e1_start, g2e1_end, g2e2_start, g2e2_end) in spliced_segments: outname = "%s-lowCTG" % name (g1name, g2name) = ('%s-lowCTG' % g1, '%s-lowCTG' % g2) g1_file = '%s/%s.fasta' % (outdir, g1name) g2_file = '%s/%s.fasta' % (outdir, g2name) outfile = "%s/%s.fasta" % (outdir, outname) print "Creating %s from %s and %s." % (outname, g1name, g2name) (g1_head, g1_seq) = fasta.Read(g1_file)[0] (g2_head, g2_seq) = fasta.Read(g2_file)[0] merged = "%s%s" % (g1_seq[:g2e2_start - 1], g2_seq[g2e1_end:]) assert g1_seq in merged a = align.Align([(outname, merged), (g1name, g1_seq), (g2name, g2_seq)], musclepath, 'MUSCLE') a = align.AddDots(a) print "Here is the alignment:\n>%s\n%s\n>%s\n%s\n>%s\n%s" % ( a[0][0], a[0][1], a[1][0], a[1][1], a[2][0], a[2][1]) print "Writing %s to %s" % (outname, outfile) header = "%s, made by merging the following: %s; %s" % ( outname, g1_head, g2_head) fasta.Write([(header, merged)], outfile)
def calibrate( pack='D21/eightpack', I_tof_dir='Si-I_tof', peak_fractional_width=0.02, dvalues=[ 1.10860231, 1.24596143, 1.357755, 1.63751414, 1.92015553, 3.13560085 ], dmin=2.5, dmax=3.5, maxchisq=5., min_counts=800, T0_of_E=SEQ_T0_of_E, l2table_nxs='./L2table.nxs', geometrical_constraints=None, align=True, ): """ geometrical_constraints: ex. dict(Xposition=(-0.005, 0.005)) """ packname, packtype = pack.split('/') fitter = get_difc_from_Itof.Fitter( peak_fractional_width=peak_fractional_width, bg_type='linear', t0_range=(0, 0.01)) L2 = load_L2_from_nxs(l2table_nxs) detIDs = np.load(os.path.join(I_tof_dir, 'detIDs.npy')) detID_list = list(detIDs) import yaml packinfo = yaml.load( open(os.path.join(I_tof_dir, 'pack-%s.yaml' % packname))) firstpixelID = packinfo['pixelIDs']['first'] firstpixel_index = detID_list.index(firstpixelID) L2_pack = L2[firstpixel_index:firstpixel_index + 1024] gpd = get_difc_from_Itof.GetPackDifc( pack=packname, dvalues=dvalues, dmin=dmin, dmax=dmax, I_tof_dir=I_tof_dir, fitter=fitter, maxchisq=maxchisq, min_counts=min_counts, T0_of_E=T0_of_E, L2=L2_pack, ) difc, mask, signature_d = gpd() np.save(os.path.join(I_tof_dir, 'difc-%s.npy' % packname), difc) np.save(os.path.join(I_tof_dir, 'mask-%s.npy' % packname), mask) if align: import align alignment = align.Align(I_tof_dir) if geometrical_constraints: alignment.options.update(geometrical_constraints) alignment.load_L2_from_nxs(l2table_nxs) alignment.align(difc, mask, packname, ofile=open('new-%s.xml' % packname, 'wt')) return difc, mask
def __init__(self): super(PrettyPrint, self).__init__() self.items = align.Align(lj=True, titles=1) self.items.add(['Type', 'Name', 'Major', 'Minor']) return
def pre_begin_file( self, name = None ): self.items = align.Align( titles = 1 ) return
def pre_open_file(self): self.items = align.Align(titles=1) return
def _prepare(self): self.timestamp = None self.entries = align.Align(titles=1) return
'data/genes/caePb2.augustusGene.txt') genome_jap1 = genomereader.GenomeReader( 'data/genome/caeJap1/chrUn.fa.gz', 'data/genes/caeJap1.augustusGene.txt') genome_rem3 = genomereader.GenomeReader( 'data/genome/caeRem3/chrUn.fa.gz', 'data/genes/caeRem3.augustusGene.txt') # all genomes have the same genes #all_gene_names = [g["name"] for g in genome_pb2.genes] all_gene_names = genome_pb2.genes.keys() a = align.Align("pam250", 5) # eg. blosum62, pam250, pam30, rao ... or PAM250.txt (directly from file) #sum of scores of global alignments of nucleotids over all genes with # linear gap penalty 5 rem_jap_nucleotid_seq_pam = 0 pb_jap_nucleotid_seq_pam = 0 rem_pb_nucleotid_seq_pam = 0 #sum of scores of alignments of aminoacid sequnces over all genes with # pam250 scoring matrix # and linear gap penalty 5 rem_jap_aminoacid_seq_pam = 0 pb_jap_aminoacid_seq_pam = 0 rem_pb_aminoacid_seq_pam = 0
genome_jap1 = genomereader.GenomeReader( GENOMES_PATH + 'caeJap1/chrUn.fa', GENES_PATH + 'caeJap1.augustusGene.txt') genome_rem3 = genomereader.GenomeReader( GENOMES_PATH + 'caeRem3/chrUn.fa', GENES_PATH + 'caeRem3.augustusGene.txt') gene_names = [g['name'] for g in genome_pb2.genes] alignment_scores_pb_jap = {} alignment_scores_pb_rem = {} alignment_scores_jap_rem = {} amino_alignment_scores_pb_jap = {} amino_alignment_scores_pb_rem = {} amino_alignment_scores_jap_rem = {} amino_a = align.Align('blosum62', 1) dna_a = align.Align('dnafull.txt', 1) for name in gene_names: n1 = genome_pb2.join_exons(name) n2 = genome_jap1.join_exons(name) n3 = genome_rem3.join_exons(name) alignment_scores_pb_jap[name] = dna_a.global_alignment(n1, n2)[0] alignment_scores_pb_rem[name] = dna_a.global_alignment(n1, n3)[0] alignment_scores_jap_rem[name] = dna_a.global_alignment(n2, n3)[0] a1 = genome_pb2.get_amino_acid_sequence(name) a2 = genome_jap1.get_amino_acid_sequence(name) a3 = genome_rem3.get_amino_acid_sequence(name)
def main(self): ''' purpose: main function to run shortstack Runs in the following order: - parse_input.py: parse input files - encoder.py: match basecalls with sequences from encoding file - align.py: run first round of FTM ''' ######################### #### Parse Input #### ######################### # instantiate parsing class from parse_input.py parse = parse_input.Parse_files(self.input_s6, self.output_dir, self.target_fa, self.mutation_vcf, self.encoding_file, self.qc_threshold, self.num_cores) s6_df, qc_df, mutation_df, encoding_df, fasta_df = parse.main_parser() ################################### #### Encode S6 ##### ################################### log.info("Reads encoded using file:\n {}".format(self.encoding_file)) # instantiate encoder class from encoder.py encode = encoder.Encode_files(s6_df, encoding_df) # return dataframe of targets found for each molecule encoded_df, parity_df = encode.map_basecalls(encoding_df=encoding_df, s6_df=s6_df, dropna=False) # add parity check information to qc_df # qc_df comes from parse_input.py # parity_df comes from encoder.py qc_df = pd.concat([qc_df, parity_df], axis=0) qc_df.set_index("FeatureID", inplace=True, drop=True) ################################### #### Assemble Mutations ##### ################################### ## Supervised mode only ## # if mutations are provided, assemble mutation seqs from mutation_vcf if self.mutation_vcf != "none": log.info("Mutations assembled from:\n {}".format( self.mutation_vcf)) # instantiate aligner module assembler = assemble.AssembleMutations(fasta_df, mutation_df, self.run_info_file, s6_df) # add mutated reference sequences to fasta_df mutant_fasta = assembler.main() # no mutations provided = unsupervised mode and mutant_fasta is empty else: mut_message = "No mutations provided. Entering unsupervised mode." print(mut_message) log.info(mut_message) mutant_fasta = "" ############### ### FTM ### ############### align_message = "Running FTM for perfect matches.\n" print(align_message) log.info(align_message) # instantiate alignment module from align.py aligner = align.Align(fasta_df, encoded_df, mutant_fasta, self.detection_mode, self.deltaz_threshold, self.kmer_length, self.output_dir, self.diversity_threshold, self.qc_out_file, self.run_info_file) # run first round of FTM ftm_df, nonmatches = aligner.main() ######################### #### Reporting ##### ######################### qc_df.to_csv(self.qc_out_file, header=True, index=True, sep="\t")
def pre_begin_file(self, name=None): super(PrettyPrint, self).pre_begin_file(name) self.entries = align.Align(lj=True) return
def pre_begin_file(self): self.items = align.Align() return
sample code for loading numbers. """ #globs['username']=username if parent == None: app = wx.PySimpleApp( redirect=False ) # Create app object if not called from called from within another object. dlg = NumbersDialog(parent, data) dlg.Destroy() if parent == None: app.MainLoop() return if __name__ == "__main__": d = [[ 'ajjjjjjjaa', 'bbb', 'ccc', 'aaa', 'bbb', 'ccc', 'aaa', 'bbb', 'ccc', 'aaa', 'bbb', 'ccc' ], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], [ True, False, False, True, False, False, True, False, False, True, False, False ]] data = align.Align() data.set_trial_colum(colum_name='scenario4') data.make_z_from_scenario() data.solve() data.disp_solve() numbers(data)