Esempio n. 1
0
def CDS_markup(Search_Query):
    #Take the search term and search database for corresponding information
    Database_Result = dbapi.dict_entries(Search_Query)
    if Database_Result == False:
        print(
            'Please enter valid search value for either accession, location, gene_id or protein_id'
        )

    p = re.compile(r'(\d+)..(\d+),?')
    CDS = Database_Result['cds']
    it = p.finditer(CDS)

    start_end_list = [(int(match.group(1)), int(match.group(2)))
                      for match in it]
    DNA_seq = Database_Result['dna_seq']

    #Mark up the sequence with < > to locate start and end of CDS regions
    DNA_list = list(DNA_seq)
    offset = 0

    for start, end in start_end_list:
        DNA_list.insert((start - 1) + offset, "<")
        offset += 1
        DNA_list.insert((end - 1) + offset, ">")
        offset += 1

    DNA_str = ""
    for elem in DNA_list:
        DNA_str += elem
    return (DNA_str)
Esempio n. 2
0
def codon_freq(Search_Query):
    '''this function takes a user search query and will return a dictionary of codon
    usage frequencies from the DNA seq.

    param: user search query The search format must be in a string
    return: dictionary of codons and their frequencies '''

    Database_Result = dbapi.dict_entries(Search_Query)
    if Database_Result == False:
        print(
            'Please enter valid search value for either accession, location, gene_id or protein_id'
        )
    DNA_seq = Database_Result['dna_seq']

    #create a list of DNA seq split into groups of 3 non-overlapping nucleotides
    p5 = re.compile(r'\w\w\w')
    codon = p5.findall(DNA_seq)

    #Create a dictionary of codons and populate with the frequency of each codon
    codon_dict = {}
    for elem in codon:
        if elem in codon_dict:
            codon_dict[elem] += 1
        else:
            codon_dict[elem] = 1
    return (codon_dict)
Esempio n. 3
0
def search_RE(Search_Query, RE_name):

    amb = IUPACAmbiguousDNA()
    Database_Result = dbapi.dict_entries(Search_Query)
    if Database_Result == False:
        return (
            'Please enter valid search value for either accession, location, gene_id or protein_id'
        )
    DNA_seq = Database_Result['dna_seq']
    DNASeq = Seq(DNA_seq, amb)

    if RE_name == 'EcoRI':
        EcoRI = Restriction.EcoRI.search(DNASeq)
        if EcoRI == []:
            print('There are no EcoRI restriction sites in this DNA sequence')
        else:
            return 'The location sites for EcoRI are', EcoRI
    elif RE_name == 'BamHI':
        BamHI = Restriction.BamHI.search(DNASeq)
        if BamHI == []:
            print('There are no BamHI restriction sites in this DNA sequence')
        else:
            return 'The location sites for BamHI are', BamHI
    elif RE_name == 'BsuRI':
        BsuRI = Restriction.BsuRI.search(DNASeq)
        if BsuRI == []:
            print('There are no BsuRI restriction sites in this DNA sequence')
        else:
            return 'The location sites for BsuRI are', BsuRI
    else:
        print('search_RE function error: invalid RE name')
Esempio n. 4
0
def search_db(Search_Query):
    '''will search DB for query value, either accession, gene_id, location or protein_id and return a dictionary with the       search outcome
    
    param: nil - an input prompt will appear to insert a value
    return: a dictionary of all raw DB information'''

    Database_Result = dbapi.dict_entries(Search_Query)
    return (Database_Result)
Esempio n. 5
0
def codon_freq(Search_Query):

    Database_Result = dbapi.dict_entries(Search_Query)
    if Database_Result == False:
        print(
            'Please enter valid search value for either accession, location, gene_id or protein_id'
        )
    DNA_seq = Database_Result['dna_seq']

    #create a list of DNA seq split into groups of 3 non-overlapping nucleotides
    p5 = re.compile(r'\w\w\w')
    codon = p5.findall(DNA_seq)

    #Create a dictionary of codons and populate with the frequency of each codon
    codon_dict = {}
    for elem in codon:
        if elem in codon_dict:
            codon_dict[elem] += 1
        else:
            codon_dict[elem] = 1
    return (codon_dict)
Esempio n. 6
0
def search_RE(Search_Query, RE_name):
    '''takes a restriction enzyme name and will search the gene DNA sequence
    and return the location of the restriction enzyme.

    param RE_name: the name of the restriction enzyme you want to search
                    either EcoRI, BamHI or BsuRI. The search format must be in
                    a string.
    returns: a list of the location of the searched enzyme in the sequence '''

    amb = IUPACAmbiguousDNA()
    Database_Result = dbapi.dict_entries(Search_Query)
    if Database_Result == False:
        return (
            'Please enter valid search value for either accession, location, gene_id or protein_id'
        )
    DNA_seq = Database_Result['dna_seq']
    DNASeq = Seq(DNA_seq, amb)

    if RE_name == 'EcoRI':
        EcoRI = Restriction.EcoRI.search(DNASeq)
        if EcoRI == []:
            print('There are no EcoRI restriction sites in this DNA sequence')
        else:
            return 'The location sites for EcoRI are', EcoRI
    elif RE_name == 'BamHI':
        BamHI = Restriction.BamHI.search(DNASeq)
        if BamHI == []:
            print('There are no BamHI restriction sites in this DNA sequence')
        else:
            return 'The location sites for BamHI are', BamHI
    elif RE_name == 'BsuRI':
        BsuRI = Restriction.BsuRI.search(DNASeq)
        if BsuRI == []:
            print('There are no BsuRI restriction sites in this DNA sequence')
        else:
            return 'The location sites for BsuRI are', BsuRI
    else:
        print('search_RE function error: invalid RE name')
Esempio n. 7
0
def CDS_markup(Search_Query):
    '''This function will take a DNA sequence, identify CDS sites and produce a marked up sequence
    that identifies one or more CDS sites in the DNA sequence.

    param: user search query. The search format must be in a string
    return: marked up sequence in string format
    '''
    #Take the search term and search database for corresponding information
    Database_Result = dbapi.dict_entries(Search_Query)
    if Database_Result == False:
        print(
            'Please enter valid search value for either accession, location, gene_id or protein_id'
        )

    p = re.compile(r'(\d+)..(\d+),?')
    CDS = Database_Result['cds']
    it = p.finditer(CDS)

    start_end_list = [(int(match.group(1)), int(match.group(2)))
                      for match in it]
    DNA_seq = Database_Result['dna_seq']

    #Mark up the sequence with < > to locate start and end of CDS regions
    DNA_list = list(DNA_seq)
    offset = 0

    for start, end in start_end_list:
        DNA_list.insert((start - 1) + offset, "<")
        offset += 1
        DNA_list.insert((end - 1) + offset, ">")
        offset += 1

    DNA_str = ""
    for elem in DNA_list:
        DNA_str += elem
    return (DNA_str)
Esempio n. 8
0
def search_db(Search_Query):

    Database_Result = dbapi.dict_entries(Search_Query)
    return (Database_Result)