コード例 #1
0
def main():
    """ The driver function of the program. This function
	demonstrates gthe central dogma of bio in biopython. """

    print("\t Welcome to the Central Dogma of Biology Demo")
    print("\t BioPython version", Bio.__version__)

    # define the DNA sequences
    myDNASeqA = Seq("AGTACAGTA")
    myDNASeqB = Seq("AGTAGAGAA")

    print("\t The sequences:")
    print("\t myDNASeqA :", myDNASeqA)
    print("\t myDNASeqB :", myDNASeqB)

    # compliment of sequences

    compOfMyDNASeqA = myDNASeqA.complement()
    print("\n\t The comp of seq A is:", compOfMyDNASeqA)
    compOfMyDNASeqB = myDNASeqB.complement()
    print("\t The comp of seq B is:", compOfMyDNASeqB)

    # reverse compliment of sequences

    revCompOfMyDNASeqA = myDNASeqA.reverse_complement()
    print("\n\t The REV comp of seq A is:", revCompOfMyDNASeqA)
    revCompOfMyDNASeqB = myDNASeqB.reverse_complement()
    print("\t The REV comp of seq B is:", revCompOfMyDNASeqB)

    # transcribe: DNA to RNA

    RNAOfMyDNASeqA = myDNASeqA.transcribe()
    print("\n\t The RNA of seq A is:", RNAOfMyDNASeqA)
    RNAOfMyDNASeqB = myDNASeqB.transcribe()
    print("\t The RNA of seq B is:", RNAOfMyDNASeqB)

    # translate: RNA to Protein

    protOfMyDNASeqA = myDNASeqA.translate()
    print("\n\t The protein of seq A is:", protOfMyDNASeqA)
    protMyDNASeqB = myDNASeqB.translate()
    print("\t The protein of seq B is:", protMyDNASeqB)

    # comparison

    print("\n\t Compare the sequences char by char")
    print("\n\t seqA seqB")
    mismatchCount_int = 0  # keep a tally of the number of mismatches
    for i in range(len(myDNASeqA)):
        match_str = "  "
        if (myDNASeqA[i] != myDNASeqB[i]):
            mismatchCount_int += 1
            match_str = "!="  # replace this connection string to show that there is no match

        tmp_str = f"\t {myDNASeqA[i]} {match_str} {myDNASeqB[i]}"
        print(tmp_str)
    print(f"\n\t Total mismatches: {mismatchCount_int}")
コード例 #2
0
def main():
    try:
        my_seq = Seq("AGTACACTGGT")
        print(my_seq, end='\n')
        my_seq.complement()
        print(my_seq, end='\n')

    except:
        pass
コード例 #3
0
def complementSeq():
    my_seq = Seq("GGATCGAAATCGC", IUPAC.unambiguous_dna)
    print('My_Seq = ', my_seq)
    print('My_Seq complement = ', my_seq.complement())
    print('My_Seq reverse complement = ', my_seq.reverse_complement())
    print('My_Seq reverse reverse complement = ',
          my_seq.reverse_complement().reverse_complement())
コード例 #4
0
def extract_single(seq, strand_info, seq_start, seq_end, bases):

    if strand_info == "-":
        dna = Seq(seq, NucleotideAlphabet())
        seq_comp = dna.complement()
        # complement = {'a':'t','c':'g','g':'c','t':'a','n':'n'}
        # seq_comp = "".join([complement[nt.lower()] for nt in seq])
        exon_sequence = ((str(seq_comp[seq_start - 1 : seq_end]))[::-1]).upper()

        upstream_start = seq_end
        upstream_end = seq_end + bases
        upstream_seq = (str(seq_comp[upstream_start:upstream_end]))[::-1]

        downstream_start = (seq_start - 1) - bases
        downstream_end = seq_start - 1
        downstream_seq = (str(seq_comp[downstream_start:downstream_end]))[::-1]
    else:
        exon_sequence = (seq[seq_start - 1 : seq_end]).upper()

        upstream_start = (seq_start - 1) - bases
        upstream_end = seq_start - 1
        upstream_seq = seq[upstream_start:upstream_end]

        downstream_start = seq_end
        downstream_end = seq_end + bases
        downstream_seq = seq[downstream_start:downstream_end]

    return upstream_seq, exon_sequence, downstream_seq
コード例 #5
0
def find_palindromes_variable(seq):
    """
    Go through for each length (starting with 2) by steps of 2 (even numbers are
    the only lenghts capable of being palindromes) and determine if there are
    palindromes in seq.

    Return once you go through the entire sequence or once you reach a length with no palindromes
    (once you find no palindromes there won't be any in the future)
    """
    palindromeDict = {}
    seq_len = len(seq)
    max_size = seq_len
    for size in range(2, max_size + 1, 2):
        palindromeDict[size] = {}
        found = False
        for i in range(seq_len - size + 1):
            sub_seq = seq[i:i + size]
            biopy_seq = Seq(sub_seq, generic_dna)
            #if sub_seq[::-1] == mckinney_complement(sub_seq):
            if sub_seq[::-1] == biopy_seq.complement().tostring():
                found = True
                palindromeDict[size][i] = sub_seq
        if not found:
            #return if no palindromes found
            return palindromeDict
    return palindromeDict
コード例 #6
0
ファイル: a01_process.py プロジェクト: shl198/Pipeline
def AA_sequence(refDNA_dic,cds_df,gene,seq_type='AA'):
    pr_seqs = []
    tr_seqs = []
    # 1. get all proteins
    gene_df = cds_df[cds_df['geneid'].values==gene]
    prs = list(set(gene_df['access'].tolist()))
    prs = sorted(prs)
    obj = trpr(gene_df)
    # 2. loop for each pr
    for pr in prs:
        # 1) get chromosome
        chrom = obj.get_chrom(pr,id_type='access')
        pos = obj.get_trpr_pos(pr)
        ref_seq = refDNA_dic[chrom].seq
        sequence = ''.join([ref_seq[p-1] for p in pos])
        nt_seq = Seq(sequence,generic_dna)
        if pos[0]>pos[1]:
            nt_seq = nt_seq.complement()
        AA = str(nt_seq.translate())
        tr_seqs.append(str(nt_seq))
        pr_seqs.append(AA)
    if seq_type=='AA':
        return pr_seqs,prs
    else:
        return tr_seqs,prs
def fasta_iter(fasta_name):
    contig_no = 0
    fh = fasta_name
    faiter = (x[1] for x in groupby(fh, lambda line: line[0] == ">"))
    for header in faiter:
        contig_no += 1
        header = header.next()[1:].strip()
        seq = "".join(s.strip() for s in faiter.next())
        extract = seq[int(start):int(end)]
        if s.lower(reverse) == 'no':
            pass
        elif s.lower(reverse) == 'yes':
            extract = extract[::-1]
        else:
            print ' Please use only "Yes" or "No" when specifying if the sequence is in reverse orientation.'
            sys.exit()
        if s.lower(complement) == 'no':
            pass
        elif s.lower(complement) == 'yes':
            dna = Seq(extract, generic_dna)
            extract = str(dna.complement())
        else:
            print 'Please use only "Yes" or "No" when specifying if the sequence is on the complement strand.'
            sys.exit()

        print extract
        yield header, extract, contig_no
コード例 #8
0
ファイル: api.py プロジェクト: derjogi/Gibthon
def get_seq_meta(g, request):
	"""get all the sequence metadata"""		
	#get features
	feats = []
	for f in g.features.all():
		quals = []
		for q in f.qualifiers.all():
			quals.append({	'name': q.name,
							'data': q.data,
						 })
		s = None
		if f.direction == 'f':
			s = 1
		elif f.direction == 'r':
			s = -1
		feats.append({	'start': f.start,
							'end': f.end,
							'strand': s,
							'type': f.type,
							'qualifiers': quals,
						})
		
	#assume Ambiguous DNA
	let = Seq(IUPAC.IUPACAmbiguousDNA.letters, IUPAC.IUPACAmbiguousDNA())
	rlet = let.complement()
	alpha = {}
	for i in range(len(let)):
		alpha[let[i].lower()] = rlet[i].lower()
		alpha[let[i].upper()] = rlet[i].upper()
	
	return JsonResponse({	'len': len(g.sequence),
							'feats': feats,
							'alpha': alpha,
						})
コード例 #9
0
def make_complementary_strand(strand_in):

    dna = Seq(strand_in)
    com_dna = dna.complement()
    res = str(com_dna)

    return res
コード例 #10
0
def extract_single(seq, strand_info, seq_start, seq_end, bases):

    if strand_info == '-':
        dna = Seq(seq, NucleotideAlphabet())
        seq_comp = dna.complement()
        #complement = {'a':'t','c':'g','g':'c','t':'a','n':'n'}
        #seq_comp = "".join([complement[nt.lower()] for nt in seq])
        exon_sequence = ((str(seq_comp[seq_start-1:seq_end]))[::-1]).upper()

        upstream_start = seq_end
        upstream_end = seq_end + bases
        upstream_seq = (str(seq_comp[upstream_start:upstream_end]))[::-1]

        downstream_start = (seq_start - 1) - bases
        downstream_end = seq_start - 1
        downstream_seq = (str(seq_comp[downstream_start:downstream_end]))[::-1]
    else:
        exon_sequence = (seq[seq_start-1:seq_end]).upper()

        upstream_start = (seq_start - 1) - bases
        upstream_end = seq_start - 1
        upstream_seq = seq[upstream_start:upstream_end]

        downstream_start = seq_end
        downstream_end = seq_end + bases
        downstream_seq = seq[downstream_start:downstream_end]

    return upstream_seq, exon_sequence, downstream_seq
コード例 #11
0
ファイル: lab7.py プロジェクト: avontd2868/bioinformatics
def find_palindromes_variable(seq):
    """
    Go through for each length (starting with 2) by steps of 2 (even numbers are
    the only lenghts capable of being palindromes) and determine if there are
    palindromes in seq.

    Return once you go through the entire sequence or once you reach a length with no palindromes
    (once you find no palindromes there won't be any in the future)
    """
    palindromeDict={}
    seq_len = len(seq)
    max_size = seq_len
    for size in range(2,max_size+1,2):
        palindromeDict[size]={}
        found = False
        for i in range(seq_len-size+1):
            sub_seq=seq[i:i+size]
            biopy_seq=Seq(sub_seq, generic_dna)
            #if sub_seq[::-1] == mckinney_complement(sub_seq):
            if sub_seq[::-1] == biopy_seq.complement().tostring():
                found=True
                palindromeDict[size][i]=sub_seq
        if not found:
            #return if no palindromes found
            return palindromeDict
    return palindromeDict
コード例 #12
0
ファイル: 5top_mrna.py プロジェクト: jbkerry/NGS
def get_top_genes(f_name):
    p = re.compile('\s+')
    p_5TOP = re.compile('^[TC]{4,14}')
    #p_5TOP_like = re.compile('^[AG]{1,3}')
    p_5TOP_like = re.compile('^[AGTC]{0,3}[AG]')
    p_5TOP_2 = re.compile('^[TC]{5,14}')
    top_list = []
    top_like_list = []
    with open(f_name) as f:
        for x in f:
            name, seq = p.split(x.strip())
            seq = seq.upper()
            if name.endswith('-'):
                min_seq = Seq(seq)
                seq = str(min_seq.complement())
            m = p_5TOP_like.match(seq)
            if p_5TOP.match(seq):
                if name[:-2] not in top_list:
                    top_list.append(name[:-2])
                    print('{}\t5\'TOP mRNA\t{}'.format(name[:-2], seq))
            elif m:
                pur_end = m.end()
                if p_5TOP_2.match(seq[pur_end:]):
                    if name[:-2] not in top_like_list:
                        top_like_list.append(name[:-2])
                        print('{}\t5\'TOP-like mRNA\t{}'.format(
                            name[:-2], seq))
コード例 #13
0
def AA_sequence(refDNA_dic, cds_df, gene, seq_type='AA'):
    pr_seqs = []
    tr_seqs = []
    # 1. get all proteins
    gene_df = cds_df[cds_df['geneid'].values == gene]
    prs = list(set(gene_df['access'].tolist()))
    prs = sorted(prs)
    obj = trpr(gene_df)
    # 2. loop for each pr
    for pr in prs:
        # 1) get chromosome
        chrom = obj.get_chrom(pr, id_type='access')
        pos = obj.get_trpr_pos(pr)
        ref_seq = refDNA_dic[chrom].seq
        sequence = ''.join([ref_seq[p - 1] for p in pos])
        nt_seq = Seq(sequence, generic_dna)
        if pos[0] > pos[1]:
            nt_seq = nt_seq.complement()
        AA = str(nt_seq.translate())
        tr_seqs.append(str(nt_seq))
        pr_seqs.append(AA)
    if seq_type == 'AA':
        return pr_seqs, prs
    else:
        return tr_seqs, prs
コード例 #14
0
    def mapGenoToPheno(self, genotype):
        #for each possible trait see if we have a genotype
        #if we do add it with its value to the traits dict
        results = []
        for trait in self.possibleTraitsList:
            if genotype.has_key(trait.rsid):
                self.traits[trait.rsid] = genotype[trait.rsid]
                if trait.alleles.has_key(genotype[trait.rsid]):
                    results.append( trait.rsid + " - " + genotype[trait.rsid] + " - " + trait.alleles[genotype[trait.rsid]] )
                # if not try flipping the order of the alleles
                elif trait.alleles.has_key(genotype[trait.rsid][::-1]):
                     results.append( trait.rsid + " - " + genotype[trait.rsid][::-1] + " (flipped) - " + trait.alleles[genotype[trait.rsid][::-1]] )
                else:
                    #try reverse complement
                    #print genotype[trait.rsid]
                    my_dna = Seq(genotype[trait.rsid], generic_dna)
                    rev = str(my_dna.complement())
                    #print rev
                    if trait.alleles.has_key(rev):
                        results.append( trait.rsid + " - " + rev + " (rev comp) -" + trait.alleles[rev] )
                    # if not try flipping the order of the alleles
                    elif trait.alleles.has_key(rev[::-1]):
                        results.append( trait.rsid + " - " + rev[::-1] + " (flipped) - " + trait.alleles[rev[::-1]] )
                    else:
                        results.append( "genotype " + genotype[trait.rsid] + " and rev comp " + rev + " not found in traits for " + trait.rsid )

            else:
               # print trait, " genotype not found for ", trait.rsid, " available genotype mappings: ", trait.alleles
                results.append( "genotype not found for " + trait.rsid )
        return results
コード例 #15
0
def clipboard_content_manager(inst):
    """
    This function changes clipboard contents according to
    action presented on button.

    Input:
            1. Instance of Button.
    """
    # Get sequence from clipboard and delete all non-sequence characters
    seq = clip.paste()
    seq = Seq(re.sub(r'[\d\s]*', '', seq))
    # Biopython functions are applied to sequence.
    # Modified sequences are returned to clipboard.
    try:
        if (inst.text == 'Reverse'):
            clip.copy(str(seq[::-1]))
        elif (inst.text == 'Complement'):
            clip.copy(str(seq.complement()))
        elif (inst.text == 'Reversed\ncomplement'):
            clip.copy(str(seq.reverse_complement()))
        elif (inst.text == 'Translate'):
            clip.copy(str(seq.translate()))
    # Various errors are possible.
    except Exception as exc:
        print(exc)
コード例 #16
0
def genSplintSeq(filename, *, splint_len=20):
    chosen_3_pairs = ['Pair_2']
    chosen_5_pairs = ['Pair_1']
    chosen_primers = list()
    with open(filename, 'r') as f:
        file = json.load(f)

    half_splint_len = splint_len // 2
    splint_oligo_complement = ''
    for dic in file.values():
        if dic['extension'] == '3_prime':
            oligo_4_splint = dic['sequence'][-half_splint_len:]
            splint_oligo_complement = oligo_4_splint + splint_oligo_complement

        elif dic['extension'] == '5_prime':
            oligo_4_splint = dic['sequence'][:half_splint_len]
            splint_oligo_complement += oligo_4_splint
    splint_oligo_complement = Seq(splint_oligo_complement)
    print("Sequence:",
          splint_oligo_complement,
          "\nCompliment:",
          splint_oligo_complement.complement(),
          "\nReverse Compliment:",
          splint_oligo_complement.reverse_complement(),
          end='\n\n')

    return splint_oligo_complement.reverse_complement()
コード例 #17
0
ファイル: motif_finder.py プロジェクト: darencard/SimpleTools
def motif_finder(reference, ref_ids, query_seq, output):
	# Read contig
	DNA = reference
	# Read query
	query = Seq(query_seq, IUPAC.unambiguous_dna)
	# Make query a str and get str of complement (for minus strand)
	query_reg = str(query)
	query_comp = str(query.complement()) # must also search for complement
		
	# For bp in range from 0 to length of sequence - RE motif length, iterating by 1bp
	for i in range(0, len(DNA)-len(query), 1):
		# Rare test sequence is i + length of rare RE motif
		testseq = str(DNA[i:i+len(query)])
		pos = i+1
		# If test sequence equals query sequence (plus strand), print line to terminal and output file
		if testseq == query_reg:
			# out format = tab separated columns of sequence/contig id, position/coordinate, and + for strand
			line = str(ref_ids)+"\t"+str(pos)+"\t+\n"
			output.write(line)
			print line
		
		# If test sequence equal complement of query sequence (minus strand), print line to terminal and output file	
		elif testseq == query_comp:
			# out format = tab separated columns of sequence/contig id, position/coordinate, and - for strand
			line = str(ref_ids)+"\t"+str(pos)+"\t-\n"
			output.write(line)
			print line
コード例 #18
0
    def complement_sequences(self):
        self.get_seq_names_and_contents()

        for seq_name, seq in zip(self.seq_names, self.seq_contents):
            raw_seq = Seq(seq, IUPAC.unambiguous_dna)
            transformed_seq = raw_seq.complement()

            add_result(self, seq_name, str(transformed_seq))
コード例 #19
0
def translate(seq):
    seq = Seq(seq)
    try:
        return seq.translate()
    except ValueError:
        try:
            return seq.translate(seq.complement())
        except ValueError:
            return ['None']
コード例 #20
0
    def selfDimerizeTest(primer):
        if not isinstance(primer, Seq):
            primer = Seq(primer, generic_dna)
        length = len(primer)

        primer_Rev = primer[::-1]
        primer_Com = primer.complement()

        return PLA_Seq.calcPhaseMatch(primer_Rev, primer_Com)
コード例 #21
0
    def manage_dna(data):

        sequence = Seq(data.sequence, IUPAC.unambiguous_dna)

        treated_data = Processed_dna_rna(
            creation_date=data.creation_date.strftime("%d/%m/%Y, %H:%M:%S"),
            translation_table=data.translation_table,
            coding_dna=str(sequence),
            dna_c=str(sequence.complement()),
            dna_rc=str(sequence.reverse_complement()),
            rna_m=str(sequence.transcribe()),
            rna_m_c=str(sequence.complement().transcribe()),
            protein=str(sequence.translate(table=data.translation_table)),
            protein_to_stop=str(
                sequence.translate(table=data.translation_table,
                                   to_stop=True)))

        return Sequencer.extract_sequence_data(treated_data)
コード例 #22
0
ファイル: api.py プロジェクト: derjogi/Gibthon
def get_alpha(g, request):
	#assume Ambiguous DNA
	let = Seq(IUPAC.IUPACAmbiguousDNA.letters, IUPAC.IUPACAmbiguousDNA())
	rlet = let.complement()
	data = {}
	for i in range(len(let)):
		data[let[i].lower()] = rlet[i].lower()
		data[let[i].upper()] = rlet[i].upper()
	return JsonResponse(data)
コード例 #23
0
    def in_silico_pcr(Primers_Tm_GC, fasta_seq):

        product_list = []

        for data in Primers_Tm_GC:

            left = data[0][0]
            right = data[0][1]
            start = fasta_seq.find(left)
            reverse_right = ''.join(reversed(right))
            seq_right = Seq(reverse_right)
            complement_right = str(seq_right.complement())
            end = fasta_seq.find(complement_right) + len(right)
            distance = end - start
            product_leght = str(distance) + ' bp'
            product = fasta_seq[start:end]
            seq_product = Seq(product)
            complement_product = str(seq_product.complement())
            '''
            cont = 0
            lines = []
            
            while cont < distance:
                line = '|'
                lines.append(line)
                cont += 1
            
            product_pair = [data, product_leght, product, complement_product, lines]
            product_list.append(product_pair)
            '''
            cont = 0
            lines = ''

            while cont < distance:
                line = '|'
                lines += line
                cont += 1

            amplified = product + '\n' + lines + '\n' + complement_product
            product_pair = [data, product_leght, amplified]
            product_list.append(product_pair)

        return product_list
コード例 #24
0
ファイル: classes.py プロジェクト: gmoyerbrailean/PyRec
    def comp(self):
        '''Return the complement of the primer

        Creates a Biopython Seq object, and uses the
        Seq object complement method'''
        from Bio.Seq import Seq
        from Bio.Alphabet import IUPAC
        s = Seq(self.seq,IUPAC.unambiguous_dna)
        s = s.complement()
        return Primer(str(s),self.strand,self.location)
コード例 #25
0
ファイル: lab7.py プロジェクト: avontd2868/bioinformatics
def find_palindromes(seq,size):
    palindromeDict={}
    seq_len=len(seq)
    for i in range(seq_len-size+1):
        sub_seq=seq[i:i+size]
        biopy_seq=Seq(sub_seq, generic_dna)
        #if sub_seq[::-1] == mckinney_complement(sub_seq):
        if sub_seq[::-1] == biopy_seq.complement().tostring():
            palindromeDict[i]=sub_seq
    return palindromeDict
コード例 #26
0
def find_palindromes(seq, size):
    palindromeDict = {}
    seq_len = len(seq)
    for i in range(seq_len - size + 1):
        sub_seq = seq[i:i + size]
        biopy_seq = Seq(sub_seq, generic_dna)
        #if sub_seq[::-1] == mckinney_complement(sub_seq):
        if sub_seq[::-1] == biopy_seq.complement().tostring():
            palindromeDict[i] = sub_seq
    return palindromeDict
コード例 #27
0
def chkSelfDimerization(all_seq):

    filtered_seq = list()
    for (index, (_, forwP, revP, *MTs)) in enumerate(all_seq):

        forwP,revP = Seq(forwP,generic_dna), Seq(revP,generic_dna)
        forwP_Rev = forwP[::-1]
        forwP_Com = forwP.complement()

        print(index)
        print(f"Forward Primer: {forwP.tostring()}")
        print(f"Reverse Primer: {revP.tostring()}")

        match_ForwP = SequenceMatcher(a=forwP_Rev,b=forwP_Com).find_longest_match(0,len(forwP_Rev),0,len(forwP_Com))
        match_forwP_block = SequenceMatcher(a=forwP_Rev, b=forwP_Com).get_matching_blocks()

        print(match_ForwP)
        print(match_forwP_block)
        print(forwP_Rev[match_ForwP.a:match_ForwP.a+match_ForwP.size], \
              forwP_Com[match_ForwP.b:match_ForwP.b+match_ForwP.size],sep='\n')
        print("Forw_Rev: ",forwP_Rev, "Complement: ",forwP.complement(),sep='\n',end='\n\n')

        revP_Rev = revP[::-1]
        revP_Com = revP.complement()
        match_RevP = SequenceMatcher(a=revP_Rev, b=revP_Com).find_longest_match(0, len(revP_Rev), 0, len(revP_Com))
        match_RevP_block = SequenceMatcher(a=revP_Rev, b=revP_Com).get_matching_blocks()
        print("Reverse Primer")
        print(match_RevP)
        print(match_RevP_block)
        print(revP_Rev[match_RevP.a:match_RevP.a + match_RevP.size], \
              revP_Com[match_RevP.b:match_RevP.b + match_RevP.size], sep='\n')
        print("RevP_Rev: ", revP_Rev, "Complement: ", revP.complement(), sep='\n', end='\n\n')

        if match_ForwP.size > 3 or match_RevP.size > 3:
            continue
        else:
            print(f'    Adding index: {index}',end='\n\n')
            filtered_seq.append(all_seq[index])

    print(filtered_seq)
    print(len(filtered_seq),end='\n\n')
    return filtered_seq
コード例 #28
0
ファイル: phenotype.py プロジェクト: pyTh0n39/strangerVisions
 def mapGenoToPheno(self, genotype):
     #for each possible trait see if we have a genotype
     #if we do add it with its value to the traits dict
     results = []
     for trait in self.possibleTraitsList:
         if genotype.has_key(trait.rsid):
             self.traits[trait.rsid] = genotype[trait.rsid]
             if trait.alleles.has_key(genotype[trait.rsid]):
                 results.append(
                     dict(rsid=trait.rsid,
                          genotype=genotype[trait.rsid],
                          description=trait.alleles[genotype[trait.rsid]],
                          flipped=False,
                          revComp=False))
             # if not try flipping the order of the alleles
             elif trait.alleles.has_key(genotype[trait.rsid][::-1]):
                 results.append(
                     dict(rsid=trait.rsid,
                          genotype=genotype[trait.rsid][::-1],
                          description=trait.alleles[genotype[trait.rsid]
                                                    [::-1]],
                          flipped=True,
                          revComp=False))
             else:
                 #try reverse complement
                 #print genotype[trait.rsid]
                 my_dna = Seq(genotype[trait.rsid], generic_dna)
                 rev = str(my_dna.complement())
                 #print rev
                 if trait.alleles.has_key(rev):
                     results.append(
                         dict(rsid=trait.rsid,
                              genotype=rev,
                              description=trait.alleles[rev],
                              flipped=False,
                              revComp=True))
                 # if not try flipping the order of the alleles
                 elif trait.alleles.has_key(rev[::-1]):
                     results.append(
                         dict(rsid=trait.rsid,
                              genotype=rev[::-1],
                              description=trait.alleles[rev[::-1]],
                              flipped=True,
                              revComp=True))
                 else:
                     results.append(
                         dict(rsid=trait.rsid,
                              description="NOT FOUND w/ genotype " +
                              genotype[trait.rsid] + " and rev comp " +
                              rev))
         else:
             # print trait, " genotype not found for ", trait.rsid, " available genotype mappings: ", trait.alleles
             results.append(dict(rsid=trait.rsid, description="NOT FOUND"))
     return results
コード例 #29
0
def excelWithGenomicPositions(inputFile, outputFile, columnWithcDNAPos, parentDict, chromosome, strand=1):
    fin = open(inputFile, 'rU')
    fout = open(outputFile, 'w')
    startCol = 0
    
    for line in fin:
        text_tokens = line.split(',')
        for i in range(0, len(text_tokens)):
            if columnWithcDNAPos in text_tokens[i]:
                startCol = i
                text_tokens[startCol] = "start"
                text_tokens.insert(startCol+1, "end")
                text_tokens[startCol-1] = "chr"
        
        if startCol !=0 and text_tokens[startCol] != "start":
            cDNApos = int(text_tokens[startCol])
            results = get_key_from_value(parentDict, cDNApos)
            newValue = cDNA_to_genomic(results, strand)
            text_tokens[startCol] = newValue
            text_tokens.insert(startCol+1, newValue)
            text_tokens[startCol-1] = chromosome
            if strand < 0:
                ref = Seq(text_tokens[startCol+2])
                ref = ref.complement()
                ref = str(ref)
                text_tokens[startCol+2] = ref
                var = Seq(text_tokens[startCol+3])
                var = var.complement()
                var = str(var)
                text_tokens[startCol+3] = var

        text_tokens = str(text_tokens[startCol-1:startCol+4]) + "," + str(text_tokens[startCol+6:startCol+9])
        newLine = text_tokens.replace('[','').replace(']', '').replace("'", "").replace(',', '\t')
        newLine = newLine + ' \n'
        fout.write(newLine)  
    fin.close()
    fout.close()


#excelWithGenomicPositions("LOVD_BRCA1_12.2.13.csv", "LOVD_BRCA1_12.2.13B.vcf", "BIC DNA change", brca1Dict, 17, -1)
#excelWithGenomicPositions("LOVD_BRCA2_12.10.13.csv", "LOVD_BRCA2_12.10.13B.vcf", "BIC DNA change", brca2_dictIARC, 13)
コード例 #30
0
def extract_fragment(args):
    fo = open(args['fastafile'])
    seqid = args['seqid']
    pos = 0
    found = False
    startpos = args.get('startpos', 0)
    endpos = args.get('endpos', False)
    seq = ""
    for line in fo:
        line = line.rstrip()
        if line.startswith('>'):
            fid = line[1:].split()[0]
            if fid == seqid:
                found = True
                out = '>' + fid
                if args.has_key('startpos') or args.has_key('endpos'):
                    out += " %s:%s" % (startpos, endpos)
                if args['reverse']: out += " reverse"
                if args['complement']: out += " complement"
                print out
            elif found:
                break
            else:
                continue
        elif found:
            if not args['countGaps']: line = line.replace('-', '')
            if pos > endpos: break
            if pos < startpos and pos + len(line) < startpos:
                pos += len(line)
                continue
            if pos < startpos and pos + len(line) >= startpos:
                out = line[startpos - pos:]
            elif pos >= startpos:
                out = line
            if not endpos == False and endpos < pos + len(line):
                out = out[:endpos - pos]
            if not args['reverse'] and not args['complement']:
                print out
            else:
                seq += out
            pos += len(line)
    fo.close()
    if args['reverse'] or args['complement']:
        seq = Seq(seq, IUPAC.unambiguous_dna)
        if args['reverse'] and args['complement']:
            seq = seq.reverse_complement()
        elif args['complement']:
            seq = seq.complement()
        elif args['reverse']:
            seq = seq[::-1]
        seq = str(seq)
        print seq
コード例 #31
0
def match(line1, line2, width):
    from Bio.Seq import Seq
    l1 = []
    l2 = []
    d1 = slyce(line1, width)
    d2 = slyce(line2, width)

    for i in d1:
        seq = Seq(i)

        #i in d1 and in d2??
        if i in d2:
            for j in range(len(d1[i])):
                for k in range(len(d2[i])):
                    l1.append(d1[i][j])
                    l2.append(d2[i][k])

        #complementary?
        if str(seq.complement()) in d2:
            for j in range(len(d1[i])):
                for k in range(len(d2[str(seq.complement())])):
                    l1.append(d1[i][j])
                    l2.append(d2[str(seq.complement())][k])

        #it is in rev list?
        if i[::-1] in d2:
            for j in range(len(d1[i])):
                for k in range(len(d2[i[::-1]])):
                    l1.append(d1[i][j])
                    l2.append(d2[i[::-1]][k])

        #reverse complementary?
        if str(seq[::-1].complement()) in d2:
            for j in range(len(d1[i])):
                for k in range(len(d2[str(seq[::-1].complement())])):
                    l1.append(d1[i][j])
                    l2.append(d2[str(seq[::-1].complement())][k])

    return l1, l2
コード例 #32
0
def transcriptionSeq():
    coding_dna = Seq("ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG",
                     IUPAC.unambiguous_dna)
    complement_dna = coding_dna.complement()
    print('coding_dna = ', coding_dna)
    print('complement_dna = ', complement_dna)

    messenger_rna = coding_dna.transcribe()
    print('messenger_rna = ', messenger_rna)

    back = messenger_rna.back_transcribe()
    print('back = ', back)
    '''
コード例 #33
0
    def OrthogonalityTest(seq_1, seq_2, limit=3):
        if isinstance(seq_1, str) and isinstance(seq_2, str):
            seq_1 = Seq(seq_1, generic_dna)
            seq_2 = Seq(seq_2, generic_dna)

        elif not (isinstance(seq_1, Seq) and isinstance(seq_2, Seq)):
            raise AssertionError(
                'Seq 1 and Seq 2 must both be either a str or Seq object.')

        #print(f"Checking orthogonality between:\n{seq_1} and \n{seq_2}")
        seq_1_Rev = seq_1[::-1]
        seq_2_Com = seq_2.complement()

        return PLA_Seq.calcPhaseMatch(seq_1_Rev, seq_2_Com, limit)
コード例 #34
0
ファイル: classes.py プロジェクト: gmoyerbrailean/PyRec
    def comp(self):
        '''Return the complement of the oligo

        Creates a Biopython Seq object, and uses the
        Seq object complement method'''
        from Bio.Seq import Seq
        from Bio.Alphabet import IUPAC
        s = Seq(self.seq,IUPAC.unambiguous_dna)
        s = s.complement()
        if self.strand in ['coding','Coding']:
            strand = 'Template'
        else:
            strand = 'Coding'
        return Oligo(str(s),strand,self.loc,self.enz)
コード例 #35
0
def make_reads(df_source, max_length, update_product_id, coverage, pos_coverage, neg_coverage, class_column):
    df_reads = pd.DataFrame().reindex_like(df_source)
    df_reads.drop(df_reads.index, inplace=True)
    reads = []
    # not using apply here because in pandas .24 this wouldn't properly reduce
    count = 0
    for index, row in df_source.iterrows():
        seq = row['sequence']
        if update_product_id:
            product_id = str(row['product_id'])
        n = int(len(seq) / max_length)
        # 4 = forward, reverse, complement, reverse complement
        cov = 1
        if coverage == 0:
            cov = coverage
            if pos_coverage != 0 and row[class_column] == 1:
                cov = pos_coverage
            elif neg_coverage != 0 and row[class_column] == 0:
                cov = neg_coverage

        starts = random.choices(range(len(seq) - max_length), k=n*4*cov)
        for i, start in enumerate(starts):
            read_row = row
            if update_product_id:
                read_row['product_id'] = product_id + '_' + str(i)
            read = str(seq[start:start+max_length])
            if i % 4 == 0:
                # forward
                read_row['sequence'] = read
            elif i % 4 == 1:
                # reverse
                read_row['sequence'] = read[::-1]
            elif i % 4 == 2:
                # complement
                seqr = Seq(read)
                complement = str(seqr.complement())
                read_row['sequence'] = complement
            else:
                # reverse complement
                seqr = Seq(read)
                reverse_complement = str(seqr.reverse_complement())
                read_row['sequence'] = reverse_complement
            reads.append(read_row)

            count = count + 1
            if count > 0 and count % 1000000 == 0:
                print(f"{count} reads generated")
    print(f"assembling data frame from {count} reads")
    return df_reads.append(pd.DataFrame(reads, columns=df_source.columns)).reset_index()
コード例 #36
0
    def fetch_seq(self, chromosome, start, end, reverse=True, complement=True):

        if not isinstance(start, int) or not isinstance(end, int):
            raise ValueError("Start and End coordinates must be integers.")
        seq = self.hg[chromosome - 1][start - 1:end]
        seq = seq.seq
        if reverse is True and complement is False:
            return seq[::-1]
        if reverse is False and complement is True:
            seq = Seq(seq)
            return seq.complement()
        if reverse is True and complement is True:
            seq = Seq(seq)
            return seq.reverse_complement()
        return seq
コード例 #37
0
def translate(sequence):
    DNA_seq = Seq(sequence, IUPAC.ambiguous_dna)
    cDNA_seq = DNA_seq.complement()
    mRNA_seq = DNA_seq.transcribe()
    read_seq = str(mRNA_seq)
    dic=[]
    if re.findall(r"AUG",read_seq) == []:
        dic.append({'STT':0,'STP':0,'LEN':0,'codon':'','protain':'','DNA':str(DNA_seq),'cDNA': str(cDNA_seq),'mRNA': read_seq})
    else:
        start = re.finditer(r"AUG",read_seq)
        for s in start:
            tmp=make_codon(read_seq,s.start())
            tmp.update({'DNA':str(DNA_seq),'cDNA': str(cDNA_seq),'mRNA': read_seq})
            dic.append(tmp)
    return dic
コード例 #38
0
def orfs(data):
	for row in data[1]:
		start = row[1]-1
		stop = row[2]-1
		print '>'+row[0]
		if int(stop) < int(start):
			dna = data[0][2][start:stop:-1]
			my_dna = Seq(dna,generic_dna)
			orfseq = my_dna.complement()
			aa = translatedna(str(orfseq))
			print aa
		else:
			orfseq = data[0][2][start:stop]
			aa = translatedna(orfseq)	
			print aa
コード例 #39
0
def extract_fragment(args):
  fo = open(args['fastafile'])
  seqid = args['seqid']
  pos = 0
  found = False
  startpos = args.get('startpos', 0)
  endpos = args.get('endpos', False)
  seq = ""
  for line in fo:
    line = line.rstrip()
    if line.startswith('>'):
      fid = line[1:].split()[0]
      if fid == seqid: 
        found = True
        out = '>' + fid
        if args.has_key('startpos') or args.has_key('endpos'): out += " %s:%s" %(startpos, endpos)
        if args['reverse']: out += " reverse"
        if args['complement']: out += " complement"
        print out
      elif found: break
      else: continue
    elif found: 
      if not args['countGaps']: line = line.replace('-','')
      if pos > endpos: break
      if pos < startpos and pos+len(line) < startpos: 
        pos += len(line)
        continue
      if pos < startpos and pos+len(line) >= startpos: out = line[startpos-pos:]
      elif pos >= startpos: out = line
      if not endpos == False and endpos < pos+len(line): out = out[:endpos-pos]
      if not args['reverse'] and not args['complement']:
        print out
      else:
        seq += out
      pos += len(line)
  fo.close()
  if args['reverse'] or args['complement']:
    seq = Seq(seq, IUPAC.unambiguous_dna)
    if args['reverse'] and args['complement']:
      seq = seq.reverse_complement()
    elif args['complement']: seq = seq.complement()
    elif args['reverse']: seq = seq[::-1]
    seq = str(seq)
    print seq
コード例 #40
0
ファイル: dnds.py プロジェクト: moritzbuck/Pyscratches
def get_seq(g, cutof=0.95, rare_filter = 0.05):
    vec = ['fA','fT','fG','fC']
    comp= {'A':'T', 'T':'A', 'C':'G', 'G':'C', 'N':'N'}
    codon_table = standard_dna_table.forward_table
    for c in standard_dna_table.stop_codons:
        codon_table[c] = "*"   
    start = pos_dict[g]['start']-1
    end = pos_dict[g]['end']
    c = pos_dict[g]['contig']
    sense = pos_dict[g]['sense']
    dat = nucl_compo[c][start:end]
    if sense == "-":
         dat = dat[::-1]
    str_seq = "".join([v['base'] for v in dat])
    max_freq = [ max([v['fA'],v['fT'],v['fG'],v['fC']]) for v in dat if v['base'] != 'X']
    rate = mean(max_freq)
    seq = Seq(str_seq)
    if sense == "-":
        seq = seq.complement()
    variants = [(i,dat[i]['base'],[v.split("f")[1] for v in vec if dat[i]['base']!='X' and dat[i][v] > rare_filter and dat[i]['base'] not in v]) for i,f in enumerate(max_freq) if f<cutof if dat[i]['coverage'] != 0]
    if sense == "-":
        variants = [(a, comp[b], [comp[d] for d in c]) for  a,b,c in variants]
    codons = re.findall('...',str(seq))

    syn = 0
    non = 0
    stop = False

    for i,b,vv in variants:
        off = i-(i/3)*3
        codon = codons[i/3]
        if codon_table.has_key(codon):
            aa = codon_table[codon]
            for v in vv:
                c = list(codon)
                c[off]=v
                if codon_table["".join(c)] == aa:
                    syn += 1
                else:
                    non += 1
                    if codon_table["".join(c)] == "*":
                        stop = True
                
    return {"rate": rate,"syn":syn, "non":non , "stop": stop, "snp_freq" : float(syn+non)/float(end-start), "len" : end-start  }
コード例 #41
0
def Orthogonality_Test(seq_1, seq_2, limit=4):
    if not isinstance(seq_1, Seq):
        seq_1 = Seq(seq_1, generic_dna)

    if not isinstance(seq_2, Seq):
        seq_2 = Seq(seq_2, generic_dna)

    if len(seq_1) == len(seq_2):
        length = len(seq_1)
    else:
        length = (max([len(seq_2), len(seq_2)]))

    print(f"Checking orthogonality between:\n{seq_1} and \n{seq_2}")

    seq_1_Rev = seq_1[::-1]
    seq_2_Com = seq_2.complement()

    match = SequenceMatcher(a=seq_1_Rev, b=seq_2_Com).find_longest_match(
        0, length, 0, length)

    all_match = SequenceMatcher(a=seq_1_Rev, b=seq_2_Com).get_matching_blocks()

    # print("rev  ", primer_Rev)
    # print('comp ', primer_Com)

    print(match)
    step = match.b - match.a
    print("step: ", step)
    print(all_match)
    phase_matches = []
    for match in all_match:
        diff = match.b - match.a
        if diff == step:
            phase_matches.append(match)
    phase_match_size = sum(match.size for match in phase_matches)
    # print(phase_matches)
    print("phase match: ", phase_match_size)

    if phase_match_size > limit:
        # print("rev  ",' ' * step, primer_Rev)
        # print('comp ',primer_Com)
        return False

    return True
コード例 #42
0
def Oligo(target_dna):
    '''
    return should be dict type, GC_contents, Tm_value, Reverse compliment
    '''
    result = {
        'GC_contents': 0,
        'Tm_value': 0,
        'Complement_seq': 0,
        'Reverse_complement_seq': 0,
        'Length_of_oligo': 0
    }
    dna = Seq(target_dna)  # set biopython seq type
    result['GC_contents'] = '{:.2f} %'.format(GC(dna))
    result['Tm_value'] = MeltingTemp.Tm_Wallace(dna)
    result['Complement_seq'] = str(dna.complement())
    result['Reverse_complement_seq'] = str(dna.reverse_complement())
    result['Length_of_oligo'] = str(len(dna))

    return result
コード例 #43
0
 def mapGenoToPheno(self, genotype):
     #for each possible trait see if we have a genotype
     #if we do add it with its value to the traits dict
     for trait in self.possibleTraitsList:
         if genotype.has_key(trait.rsid):
             self.traits[trait.rsid] = genotype[trait.rsid]
             if trait.alleles.has_key(genotype[trait.rsid]):
                 print trait.rsid, " - ", genotype[trait.rsid], " - ", trait.alleles[genotype[trait.rsid]]
             else:
                 #try reverse complement
                 #print genotype[trait.rsid]
                 my_dna = Seq(genotype[trait.rsid], generic_dna)
                 rev = str(my_dna.complement())
                 #print rev
                 if trait.alleles.has_key(rev):
                     print trait.rsid, " - ", rev, " (rev comp) -", trait.alleles[rev]
                 else:
                     print "genotype " , genotype[trait.rsid], "and rev comp " , rev, " not found in traits for " , trait.rsid   
                     
         else:
             print trait.rsid, " - unavailable"
コード例 #44
0
def simple():
    my_seq = Seq("AGTACACTGGT")
    print my_seq.complement()
    print my_seq.reverse_complement()
コード例 #45
0
ファイル: sequtils.py プロジェクト: graik/rotmic
def dna2complement( seq ):
    """Convert DNA sequence to complement dna sequence"""
    return Seq.complement( Seq( str(seq) )).tostring()
コード例 #46
0
ファイル: bio_02.py プロジェクト: MoisesTedeschi/python
'''
Criando uma sequência complementar de DNA
e imprimindo a sequência complementar e o reverso
Complementar.
'''
from Bio.Seq import Seq

seq = Seq("ACCCCTATGTGACCACTG")
print("Imprimindo sequência complementar: ", seq.complement())

print("Imprimindo o reverso complementar:", seq.reverse_complement())
コード例 #47
0
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC
from Bio.SeqRecord import SeqRecord
from Bio import SeqIO
import re

sequence = Seq("ATGtcccacta",IUPAC.unambiguous_dna)
print(sequence)

#4 nić komplementarna
print(sequence.complement())
# ::-1 to to samo co [0:len(seq):-1], początek, koniec i krok, sam : oznacza domyślne [0:len(seq):-1]
print(sequence.complement()[::-1])
# to samo co
print(sequence.reverse_complement())

simple_seq = Seq("TCTGTGCTAAAGTGTAACTCGTAGGCACTATCTAC")
simple_seq_r = SeqRecord(simple_seq, id="AC834343", name="seqX", description="H**o erectus, chr25")
print(simple_seq_r)

record = SeqIO.read("/Users/Kozel/Documents/UJ/Biotechnologia molekuarna/3 semestr/Bioinformatyka/Kody/hemoglobin.txt","fasta")
print(record.id)
print(record.name)
print(record.description)
print(record.annotations)
#print(record.seq)

## wyrażenia regularne
#re.match(pattern,string) - szuka na początku stringa
#re.search(pattern,string) - szuka w całym stringu
print(re.search("C.T","ATCATGGC"))
コード例 #48
0
ファイル: models.py プロジェクト: derjogi/Gibthon
class SeqLine:
	def __init__(self,_number,_seq):
		self.number = _number
		self.seq = Seq(_seq, IUPAC.IUPACUnambiguousDNA())
		self.rseq = self.seq.complement()
		self.features = []
コード例 #49
0
#-1
my_dna.count("GG")
#note that count is non-overlapping
"AAAAAAA".count("AA")


"""
BioPython has several built-in functions for biological applications:
complement, reverse complement, translation, back translation
"""

#from Bio.Seq import Seq
#from Bio.Alphabet import generic_dna
#my_dna = Seq("AGTACACTGGT", generic_dna)
print my_dna
my_dna.complement()
#Seq('TCATGTGACCA', DNAAlphabet())
my_dna.reverse_complement()
#Seq('ACCAGTGTACT', DNAAlphabet())
my_dna.transcribe()










コード例 #50
0
def double_digest(sequence, id, r_renz, r_cut_pos, c_renz, c_cut_pos, low, up, output):
	# Read contig
	DNA = sequence
	
	# Handles for various restriction enzymes, cut placements, and their complements for the rare-cutting enzyme
	rare_renz = Seq(r_renz, IUPAC.unambiguous_dna)
	rare_compmotif = str(rare_renz.complement()) # must also search for complement
	rare_motif = str(rare_renz)
	rare_cut_motif = int(r_cut_pos)
	rare_cut_compmotif = int(len(rare_motif))-int(r_cut_pos)
	
	# Handles for various restriction enzymes, cut placements, and their complements for the common-cutting enzyme
	common_renz = Seq(c_renz, IUPAC.unambiguous_dna)
	common_compmotif = str(common_renz.complement()) # must also search for complement
	common_motif = str(common_renz)
	common_cut_motif = int(c_cut_pos)
	common_cut_compmotif = int(len(common_motif))-int(c_cut_pos)
	
	# For bp in range from 0 to length of sequence - RE motif length, iterating by 1bp
	for i in range(0, len(DNA)-len(rare_motif), 1):
		# Rare test sequence is i + length of rare RE motif
		rare_testseq = str(DNA[i:i+len(rare_motif)])
		rare_pos = i+1
		if rare_testseq == rare_motif:
		# if rare enzyme test sequence equals the rare restriction enzyme motif, report position as position in loop + cut location in enzyme
			rare_digest = rare_pos + rare_cut_motif
			# whenever there is a rare enzyme cut, scan a window of basepairs upstream (based on lower/upper limits designed) for a common enzyme cut
			for j in range(rare_digest+int(low), (rare_digest+int(up))-len(rare_motif),1):
				common_testseq = str(DNA[j:j+len(common_motif)])
				common_pos = j+1
				# if common enzyme test sequence equals the common restriction enzyme motif, report position
				if common_testseq == common_motif:
					common_digest = common_pos + common_cut_motif
					if common_digest < len(DNA):
						rare_j_line = id+"\t"+str(rare_digest)+"\t"+str(common_digest)+"\t+\n"
						output.write(rare_j_line)
						print rare_j_line
			# whenever there is a rare enzyme cut, scan a window of basepairs downstream (based on lower/upper limits designed) for a common enzyme cut
#			for k in range(rare_digest-int(up), (rare_digest-int(low))-len(rare_motif),1):
#				common_testseq = str(DNA[k:k+len(common_motif)])
#				common_pos = k+1
#				if common_testseq == common_motif:
#					common_digest = common_pos + common_cut_motif
#					if common_digest > 0:
#						rare_k_line = id+"\t"+str(rare_digest)+"\t"+str(common_digest)+"\n"
#						output.write(rare_k_line)
#						print rare_k_line
		elif rare_testseq == rare_compmotif:
		# must do the same as above but with complement sequences (for opposite strand)
			rare_digest = rare_pos + rare_cut_compmotif
			# whenever there is a rare enzyme cut, scan a window of basepairs upstream (based on lower/upper limits designed) for a common enzyme cut. This actually ends up being downstream on the strand we care about.
#			for j in range(rare_digest+int(low), (rare_digest+int(up))-len(rare_motif),1):
#				common_testseq = str(DNA[j:j+len(common_compmotif)])
#				common_pos = j+1
#				if common_testseq == common_compmotif:
#					common_digest = common_pos + common_cut_compmotif
#					if common_digest < len(DNA):
#						rare_j_line = id+"\t"+str(rare_digest)+"\t"+str(common_digest)+"\n"
#						output.write(rare_j_line)
#						print rare_j_line
			# whenever there is a rare enzyme cut, scan a window of basepairs downstream (based on lower/upper limits designed) for a common enzyme cut. This actually ends up being upstream on the strand we care about.
			for k in range(rare_digest-int(up), (rare_digest-int(low))-len(rare_compmotif),1):
				common_testseq = str(DNA[k:k+len(common_compmotif)])
				common_pos = k+1
				if common_testseq == common_compmotif:
					common_digest = common_pos + common_cut_compmotif
					if common_digest > 0:
						rare_k_line = id+"\t"+str(common_digest)+"\t"+str(rare_digest)+"\t-\n"
						output.write(rare_k_line)
						print rare_k_line
コード例 #51
0
from Bio.Seq import Seq        # for Seq
from Bio.Alphabet import IUPAC # for alphabet

## sequence with generic alphabet ##
my_seq = Seq("AGTACACTGGT")
print "sequence = ", my_seq
print "alphabet = ", my_seq.alphabet
print

## DNA sequence ##
dna = Seq("ATGACACTGTAGGAA", IUPAC.unambiguous_dna)
print "sequence = ", dna
print "alphabet type = ", dna.alphabet
print "DNA nucleotides = ", dna.alphabet.letters
print

# print DNA complement
print "complement = ", dna.complement()
print

# trascribe from DNA (sense strand) to RNA
rna =dna.transcribe()
print "rna = ", rna
print

# translate from RNA to protein
protein1 = rna.translate()  # dna.translate() also works
print "protein = ", protein1

protein = rna.translate(to_stop = True)
print "protein = ", protein
コード例 #52
0
ファイル: 03.code.py プロジェクト: guochangjiang/Python.learn
#!/usr/bin/env python3
# --*-- utf-8 --*--

from Bio.Seq import Seq
from Bio.Alphabet import IUPAC
from Bio.Alphabet import generic_dna

my_seq = Seq("AGTACACTGGT", IUPAC.unambiguous_dna)
my_seq
print(my_seq)
print(my_seq.alphabet)
print(my_seq.complement())
print(my_seq.reverse_complement())

my_seq2 = Seq("AGTACACTGGT", IUPAC.ambiguous_dna)
print(my_seq2.alphabet)
my_seq3 = Seq("AGTACACTGGT", IUPAC.extended_dna)
print(my_seq3.alphabet)

my_prot = Seq("AGTACACTGGT", IUPAC.protein)
print(my_prot)
print(my_prot.alphabet)

##seq object act like string
for index, letter in enumerate(my_seq):
    print("%i %s" % (index, letter))

print(len(my_seq))
print(my_seq[0]) #first letter
print(my_seq[-1]) #last letter
コード例 #53
0
ファイル: seq.py プロジェクト: vipints/tutorials_2014
from Bio.Seq import Seq 

# working with sequences 
my_seq = Seq("AGTACACTGGT") 

print my_seq 

print "complement: " + my_seq.complement() 
print "reverse complement: " + my_seq.reverse_complement()
print "transcribe: " + my_seq.transcribe() 
print "my_seq[2:4]: " + my_seq[2:4]

my_rna = my_seq.transcribe()
my_dna = my_rna.back_transcribe() 
コード例 #54
0
# convert to string
my_seq.tostring()

# concatenate sequences
seq1 + seq2  # ONLY if alphabets are compatible

# otherwise, convers both seq to generic alphabets
from Bio.Alphabet import generic_alphabet
seq1.alphabet = generic_alphabet
seq2.alphabet = generic_alphabet
seq1 + seq2


# sequence complement (only if alphabet allows complement)
my_seq.complement()

# reverse complement (only if alphabet allows complement)
my_seq.reverse_complement()


# transcribe RNA  (DNA -> mRNA)
#The actual biological transcription process works from the template strand, doing a reverse complement
#(TCAG → CUGA) to give the mRNA. However, in Biopython and bioinformatics in general, we typically
#work directly with the coding strand because this means we can get the mRNA sequence just by switching
#T → U.

from Bio.Seq import transcribe
# just changes T with U from the coding strand (5' -> 3') 
messenger_rna = transcribe(coding_dna)  
コード例 #55
0
ファイル: intro.py プロジェクト: jimmcgaw/bioinformatic
from Bio.Seq import Seq

#create a sequence object
my_seq = Seq('CATGTAGACTAG')

#print out some details about it
print 'seq %s is %i bases long' % (my_seq, len(my_seq))
print 'reverse complement is %s' % my_seq.reverse_complement()
print 'protein translation is %s' % my_seq.translate()

print 'complement is %s' % my_seq.complement()
コード例 #56
0

"""
Biopython官方文档中实例演示,以备查询。
"""

#序列的创建于输出
print("\n###############\n1. 简单序列处理\n---------------")
from Bio.Seq import Seq
my_seq = Seq("AGTACACTGGT") #创建Seq()
print("my_seq:", my_seq)    #输出
print(repr(my_seq)) #原始输出
print("alphabet of my_seq:", my_seq.alphabet)   #序列类型

#互补
print("正向互补:", my_seq.complement())
print("反向互补:", my_seq.reverse_complement())

#外部导入序列
print("\n###############\n2. FASTA 解析示例\n---------------")
from Bio import SeqIO
for seq_record in SeqIO.parse("ls_orchid.fasta", "fasta"):  #解析fasta文件
    print("序列名称:", seq_record.id)
    print("序列原始输出:", repr(seq_record.seq))
    print("序列长度:", len(seq_record))
    break
#FASTA 文件并没有指定字母表,因此默认使用相当通用的 SingleLetterAlphabet()

print("\n###############\n3. GenBank 解析示例\n---------------")
for seq_record in SeqIO.parse("ls_orchid.gbk", "genbank"):  #解析genbank文件
    print("序列名称:", seq_record.id)
コード例 #57
0
ファイル: nuc_info.py プロジェクト: aersoares81/compbio-utils
from Bio.Seq import Seq
from Bio.Alphabet import generic_dna, generic_rna

for line in sys.stdin:
    line = line.rstrip()
    if len(line)==0 or line.startswith("#"):
        continue
    
    line = line.upper()

    counts = Counter(line)
    n_u = counts.get('U', 0) + counts.get('u', 0)
    n_t = counts.get('T', 0) + counts.get('t', 0)
    if n_u > n_t:
        seq = Seq(line, generic_rna)
        seqtype = "RNA"
    else:
        seq = Seq(line, generic_dna)
        seqtype = "DNA"

    print "Parsed input as: %s" % seq
    print "Seq. type: %s" % seqtype
    print "Length: %d" % len(seq)
    print "Composition: %s" % (' '.join(
            ["%c:%.2f%% " % (nuc, cnt/float(len(seq))) 
             for (nuc, cnt) in counts.iteritems()]))
    print "Complement: %s" % seq.complement()
    print "Reverse complement: %s" % seq.reverse_complement()
    print "Translated: %s" % seq.translate()
    # See http://biopython.org/wiki/Seq
コード例 #58
0
ファイル: biopy.py プロジェクト: geparada/my_src
from Bio.Seq import Seq
from Bio.Alphabet import generic_dna
my_dna = Seq("GTAG,GCTG,ATAC", generic_dna)
print my_dna
print my_dna.complement()
print my_dna.reverse_complement()
コード例 #59
0
ファイル: 318_test1.py プロジェクト: cgregg/codonmassager
from Bio.Alphabet import generic_rna
from Bio.Alphabet import generic_protein

my_dna = Seq("ATGGGGAGAAGGCCGTAG", generic_dna)
#print my_dna

#a = my_dna + 'aaa'
#print a

print my_dna.find('AGG')
print my_dna.find('AGA')
print my_dna
print my_dna.count('A')
print len(my_dna)

your_dna = my_dna.complement()
print your_dna
my_rna = my_dna.transcribe()
print my_rna

my_protr = my_rna.translate(table=1, to_stop=True) 
#table = 1 is default std genetic code, http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi#SG1
#to_stop=True tells it to stop at stops
print my_protr
my_protd = my_dna.translate(to_stop=True)
print my_protd

#playing with complete CDS'
#yaaX = Seq("GTGAAAAAGATGCAATCTATCGTACTCGCACTTTCCCTGGTTCTGGTCGCTCCCATGGCA" + \
#            "GCACAGGCTGCGGAAATTACGTTAGTCCCGTCAGTAAAATTACAGATAGGCGATCGTGAT" + \
#            "AATCGTGGCTATTACTGGGATGGAGGTCACTGGCGCGACCACGGCTGGTGGAAACAACAT" + \