def CDS_markup(Search_Query): #Take the search term and search database for corresponding information Database_Result = dbapi.dict_entries(Search_Query) if Database_Result == False: print( 'Please enter valid search value for either accession, location, gene_id or protein_id' ) p = re.compile(r'(\d+)..(\d+),?') CDS = Database_Result['cds'] it = p.finditer(CDS) start_end_list = [(int(match.group(1)), int(match.group(2))) for match in it] DNA_seq = Database_Result['dna_seq'] #Mark up the sequence with < > to locate start and end of CDS regions DNA_list = list(DNA_seq) offset = 0 for start, end in start_end_list: DNA_list.insert((start - 1) + offset, "<") offset += 1 DNA_list.insert((end - 1) + offset, ">") offset += 1 DNA_str = "" for elem in DNA_list: DNA_str += elem return (DNA_str)
def codon_freq(Search_Query): '''this function takes a user search query and will return a dictionary of codon usage frequencies from the DNA seq. param: user search query The search format must be in a string return: dictionary of codons and their frequencies ''' Database_Result = dbapi.dict_entries(Search_Query) if Database_Result == False: print( 'Please enter valid search value for either accession, location, gene_id or protein_id' ) DNA_seq = Database_Result['dna_seq'] #create a list of DNA seq split into groups of 3 non-overlapping nucleotides p5 = re.compile(r'\w\w\w') codon = p5.findall(DNA_seq) #Create a dictionary of codons and populate with the frequency of each codon codon_dict = {} for elem in codon: if elem in codon_dict: codon_dict[elem] += 1 else: codon_dict[elem] = 1 return (codon_dict)
def search_RE(Search_Query, RE_name): amb = IUPACAmbiguousDNA() Database_Result = dbapi.dict_entries(Search_Query) if Database_Result == False: return ( 'Please enter valid search value for either accession, location, gene_id or protein_id' ) DNA_seq = Database_Result['dna_seq'] DNASeq = Seq(DNA_seq, amb) if RE_name == 'EcoRI': EcoRI = Restriction.EcoRI.search(DNASeq) if EcoRI == []: print('There are no EcoRI restriction sites in this DNA sequence') else: return 'The location sites for EcoRI are', EcoRI elif RE_name == 'BamHI': BamHI = Restriction.BamHI.search(DNASeq) if BamHI == []: print('There are no BamHI restriction sites in this DNA sequence') else: return 'The location sites for BamHI are', BamHI elif RE_name == 'BsuRI': BsuRI = Restriction.BsuRI.search(DNASeq) if BsuRI == []: print('There are no BsuRI restriction sites in this DNA sequence') else: return 'The location sites for BsuRI are', BsuRI else: print('search_RE function error: invalid RE name')
def search_db(Search_Query): '''will search DB for query value, either accession, gene_id, location or protein_id and return a dictionary with the search outcome param: nil - an input prompt will appear to insert a value return: a dictionary of all raw DB information''' Database_Result = dbapi.dict_entries(Search_Query) return (Database_Result)
def codon_freq(Search_Query): Database_Result = dbapi.dict_entries(Search_Query) if Database_Result == False: print( 'Please enter valid search value for either accession, location, gene_id or protein_id' ) DNA_seq = Database_Result['dna_seq'] #create a list of DNA seq split into groups of 3 non-overlapping nucleotides p5 = re.compile(r'\w\w\w') codon = p5.findall(DNA_seq) #Create a dictionary of codons and populate with the frequency of each codon codon_dict = {} for elem in codon: if elem in codon_dict: codon_dict[elem] += 1 else: codon_dict[elem] = 1 return (codon_dict)
def search_RE(Search_Query, RE_name): '''takes a restriction enzyme name and will search the gene DNA sequence and return the location of the restriction enzyme. param RE_name: the name of the restriction enzyme you want to search either EcoRI, BamHI or BsuRI. The search format must be in a string. returns: a list of the location of the searched enzyme in the sequence ''' amb = IUPACAmbiguousDNA() Database_Result = dbapi.dict_entries(Search_Query) if Database_Result == False: return ( 'Please enter valid search value for either accession, location, gene_id or protein_id' ) DNA_seq = Database_Result['dna_seq'] DNASeq = Seq(DNA_seq, amb) if RE_name == 'EcoRI': EcoRI = Restriction.EcoRI.search(DNASeq) if EcoRI == []: print('There are no EcoRI restriction sites in this DNA sequence') else: return 'The location sites for EcoRI are', EcoRI elif RE_name == 'BamHI': BamHI = Restriction.BamHI.search(DNASeq) if BamHI == []: print('There are no BamHI restriction sites in this DNA sequence') else: return 'The location sites for BamHI are', BamHI elif RE_name == 'BsuRI': BsuRI = Restriction.BsuRI.search(DNASeq) if BsuRI == []: print('There are no BsuRI restriction sites in this DNA sequence') else: return 'The location sites for BsuRI are', BsuRI else: print('search_RE function error: invalid RE name')
def CDS_markup(Search_Query): '''This function will take a DNA sequence, identify CDS sites and produce a marked up sequence that identifies one or more CDS sites in the DNA sequence. param: user search query. The search format must be in a string return: marked up sequence in string format ''' #Take the search term and search database for corresponding information Database_Result = dbapi.dict_entries(Search_Query) if Database_Result == False: print( 'Please enter valid search value for either accession, location, gene_id or protein_id' ) p = re.compile(r'(\d+)..(\d+),?') CDS = Database_Result['cds'] it = p.finditer(CDS) start_end_list = [(int(match.group(1)), int(match.group(2))) for match in it] DNA_seq = Database_Result['dna_seq'] #Mark up the sequence with < > to locate start and end of CDS regions DNA_list = list(DNA_seq) offset = 0 for start, end in start_end_list: DNA_list.insert((start - 1) + offset, "<") offset += 1 DNA_list.insert((end - 1) + offset, ">") offset += 1 DNA_str = "" for elem in DNA_list: DNA_str += elem return (DNA_str)
def search_db(Search_Query): Database_Result = dbapi.dict_entries(Search_Query) return (Database_Result)