Beispiel #1
0
	def rvscomp(self):
		self.seq = rvs_comp_str(self.seq)
Beispiel #2
0
#!/usr/bin/python

import sys, string
from AshworthUtil import rvs_comp_str

if len(sys.argv) > 1: input = string.join( sys.argv[1:], '' )
else: input = sys.stdin.read()

print rvs_comp_str(input.rstrip('\n'))
    def makeprobes(self, fastafiles):
        f = FastaSeqs()
        f.loadseqs(fastafiles)
        print f.summarize()

        if self.regions == {}:
            for seq in f.seqs.values():
                # create region spanning whole seq
                l = len(seq)
                for d in ["+", "-"]:
                    self.regions[(seq.name, d)] = {}
                    ss = RegionSet("noid")
                    ss.add(Region(seq.name, d, 1, l))
                    self.regions[(seq.name, d)]["noid"] = ss
        else:
            # prevent lookup errors
            for seq in f.seqs.values():
                for d in ["+", "-"]:
                    if not self.regions.has_key((seq.name, d)):
                        self.regions[(seq.name, d)] = {"noid": RegionSet()}

        for seq in f.seqs.values():
            seqlen = len(seq)
            sys.stderr.write("%s\n" % seq.name)
            seqabbv = re.sub("_", "", seq.name[: min(len(seq.name), 8)])

            # the probes are created starting from the 3' end of the region. This makes it easy to create '3'-biased' probe sets of 'n' probes
            # forward strand
            for regionset in self.regions[(seq.name, "+")].values():
                # 				sys.stderr.write('+:\n%s\n' %str(regionset))
                # start with last region (exon), create probes from 3' end until nprobes reached
                nprobes_id = 0
                for region in reversed(sorted(regionset.regions)):
                    nprobes_region = 0
                    cursor = region.end - self.probelength + self.posoffset
                    # note: strings 0-indexed (i), genome sequences 1-indexed (i+1)
                    while cursor >= region.start:
                        if cursor < 0 or cursor + self.probelength >= seqlen:
                            break
                        if self.probespergene > 0 and nprobes_id >= self.probespergene:
                            break
                        if self.probesperregion > 0 and nprobes_region >= self.probesperregion:
                            break
                        probeseq = seq.seq[cursor : cursor + self.probelength]
                        if not re.search(opt.mask, probeseq):  # skip probes containing masked sequence
                            probename = "%s_%i_+" % (seqabbv, cursor + 1)
                            p = Probe(start=cursor + 1, strand="+", name=probename, seq=probeseq, parent=seq.name)
                            self.probes.append(p)
                            nprobes_id += 1
                            nprobes_region += 1
                        cursor -= self.probelength + self.gap

                        # reverse strand
            for regionset in self.regions[(seq.name, "-")].values():
                # 				sys.stderr.write('-:\n%s\n' %str(regionset))
                nprobes_id = 0
                for region in sorted(regionset.regions):
                    nprobes_region = 0
                    cursor = region.start + self.negoffset
                    while cursor < region.end - self.probelength:
                        if cursor < 0 or cursor + self.probelength >= seqlen:
                            break
                        if self.probespergene > 0 and nprobes_id >= self.probespergene:
                            break
                        if self.probesperregion > 0 and nprobes_region >= self.probesperregion:
                            break
                        probeseq = seq.seq[cursor : cursor + self.probelength]
                        if not re.search(opt.mask, probeseq):
                            probename = "%s_%i_-" % (seqabbv, cursor + 1)
                            p = Probe(
                                start=cursor + 1,
                                strand="-",
                                name=probename,
                                seq=rvs_comp_str(probeseq),
                                parent=seq.name,
                            )
                            self.probes.append(p)
                            nprobes_id += 1
                            nprobes_region += 1
                        cursor += self.probelength + self.gap
	def makeprobes(self,fastafiles):
		f=FastaSeqs()
		f.loadseqs(fastafiles)
		print f.summarize()

		if self.regions == {}:
			for seq in f.seqs.values():
				# create region spanning whole seq
				l = len(seq)
				for d in ['+','-']:
					self.regions[(seq.name,d)] = {}
					ss = RegionSet('noid')
					ss.add( Region(seq.name,d,1,l) )
					self.regions[ (seq.name,d) ]['noid'] = ss
		else:
			# prevent lookup errors
			for seq in f.seqs.values():
				for d in ['+','-']:
					if not self.regions.has_key( (seq.name,d) ):
						self.regions[ (seq.name,d) ] = {'noid':RegionSet()}

		for seq in f.seqs.values():
			seqlen = len(seq)
			sys.stderr.write('%s\n' %seq.name)
			seqabbv=re.sub('_','',seq.name[ : min(len(seq.name),8) ])

			# the probes are created starting from the 3' end of the region. This makes it easy to create '3'-biased' probe sets of 'n' probes
			# forward strand
			for regionset in self.regions[(seq.name,'+')].values():
#				sys.stderr.write('+:\n%s\n' %str(regionset))
				# start with last region (exon), create probes from 3' end until nprobes reached
				nprobes_id = 0
				for region in reversed(sorted(regionset.regions)):
					nprobes_region = 0
					cursor = region.end - self.probelength + self.posoffset
					# note: strings 0-indexed (i), genome sequences 1-indexed (i+1)
					while cursor >= region.start:
						if cursor < 0 or cursor + self.probelength >= seqlen: break
						if self.probespergene > 0 and nprobes_id >= self.probespergene: break
						if self.probesperregion > 0 and nprobes_region >= self.probesperregion: break
						probeseq=seq.seq[cursor:cursor+self.probelength]
						if not re.search(opt.mask,probeseq): # skip probes containing masked sequence
							probename='%s_%i_+' %(seqabbv,cursor+1)
							p=Probe( start=cursor+1, strand='+', name=probename, seq=probeseq, parent=seq.name )
							self.probes.append(p)
							nprobes_id += 1
							nprobes_region += 1
						cursor -= self.probelength + self.gap

			# reverse strand
			for regionset in self.regions[(seq.name,'-')].values():
#				sys.stderr.write('-:\n%s\n' %str(regionset))
				nprobes_id = 0
				for region in sorted(regionset.regions):
					nprobes_region = 0
					cursor = region.start + self.negoffset
					while cursor < region.end - self.probelength:
						if cursor < 0 or cursor + self.probelength >= seqlen: break
						if self.probespergene > 0 and nprobes_id >= self.probespergene: break
						if self.probesperregion > 0 and nprobes_region >= self.probesperregion: break
						probeseq=seq.seq[cursor:cursor+self.probelength]
						if not re.search(opt.mask,probeseq):
							probename='%s_%i_-' %(seqabbv,cursor+1)
							p=Probe( start=cursor+1, strand='-', name=probename, seq=rvs_comp_str(probeseq), parent=seq.name )
							self.probes.append(p)
							nprobes_id += 1
							nprobes_region += 1
						cursor += self.probelength + self.gap
 def countsites(self, seq, length):
     n = range(len(seq) - length)
     for i in n:
         subseq = seq[i:i + length]
         self.sites[subseq] += 1
         self.sites[rvs_comp_str(subseq)] += 1
	def countsites(self,seq,length):
		n = range(len(seq)-length)
		for i in n:
			subseq = seq[i:i+length]
			self.sites[subseq] += 1
			self.sites[rvs_comp_str(subseq)] += 1
        if not parent in genes:
            genes[parent] = Gene()
            genes[parent]['seq'] = seq
            genes[parent]['cds'] = []
        genes[parent]['cds'].append((start, end, strand))

seqfile = sys.argv[2]
src = FastaSeqs()
src.loadseqs([seqfile])

outp = []

for id, gene in genes.items():
    seq = gene['seq']
    if not seq in src.seqs:
        msg('%s not found in source!' % seq)
        sys.exit()
    if 'cds' in gene:
        fullcds = []
        rvs = False
        for start, end, strand in sorted(gene['cds'], key=lambda x: x[0]):
            ss = src.seqs[seq].seq[(start - 1):end]
            if strand == '-':
                ss = rvs_comp_str(ss)
                rvs = True
            fullcds.append(ss)
        if rvs: fullcds.reverse()
        outp.append('>%s_%s\n%s' % (id, 'cds', ''.join(fullcds)))

print('\n'.join(outp))