Ejemplo n.º 1
0
def dat_parser(sequence,
               fields=["name", "accession", "description", "pattern"]):
    """ Finds domain hits from prosite.dat in input sequence """
    hits = []
    pattern_replacements = {
        '-': '',
        '{': '[^',  # {X} = [^X]
        '}': ']',
        '(': '{',  # (from, to) = {from, to}
        ')': '}',
        'X': '.',  # x, X = any (.)
        'x': '.',
        '<': '^',  # < = N-terminal
        '>': '$'  # > = C-terminal
    }
    with open("prosite_files/prosite.dat", "r") as handle:
        records = Prosite.parse(handle)
        for record in records:
            pattern = record.pattern.strip('.')
            # Transform ProSite patterns
            # to regular expressions readable by re module
            for pat, repl in pattern_replacements.items():
                pattern = pattern.replace(pat, repl)
            if pattern != "" and re.search(pattern, sequence):
                hits.append(
                    [record.name, record.pdoc, record.description, pattern])
    return hits
Ejemplo n.º 2
0
def parseo_dat(input_file):
    """
    Función para pasear un archivo .dat con la informacion de todos los
    dominios de la base de datos de prosite, además de su nombre, aceso
    y descripcion, se guardar en una archivo en la carpta de "DATA/"
    """

    if os.path.isfile(input_file) == True:
        pass
    else:
        print('ERROR:No existe el archivo indicado')
        sys.exit()

    path="DATA/data_base_prosite/"
    if os.path.isdir(path) == True:
        pass
    else:
        os.mkdir(path)
    
    handle = open(input_file,"r")
    out=path + "/db"
    output=open(out, "w")
    records = Prosite.parse(handle)
    for record in records:
        output.write(record.name + "\t")
        output.write(record.accession + "\t")
        output.write(record.description + "\t")
        output.write(record.pattern + "\n")
    handle.close()
    output.close()

    return out
Ejemplo n.º 3
0
def prositeToJSON(prositeDb, fp=sys.stdout):
    """
    This is a parser for the prosite database, to turn the relevant entries of
    the prosite database into JSON. Currently it only makes two entries,
    the accession and the pattern. The pattern is converted into a regex.

    The prosite database is available at:
    ftp://ftp.expasy.org/databases/prosite/prosite.dat
    An explanation about the fields and structure of the database is available
    at: http://prosite.expasy.org/prosuser.html

    @param prositeDb: The C{str} filename of the prosite database.
    @param fp: A file pointer.
    @raises AssertionError: if any accession string in the database does not
        start with "PS" or if the database contains a duplicate accession
        string.
    """
    seen = set()
    for record in Prosite.parse(open(prositeDb)):
        accession = record.accession
        assert accession not in seen
        assert accession.startswith('PS')
        seen.add(accession)
        pattern = patternToRegex(record.pattern[:-1])
        if pattern:
            print(dumps(
                {
                    'accession': accession[2:],
                    'pattern': pattern,
                }, separators=(',', ':')), file=fp)
Ejemplo n.º 4
0
def create_dic_dominios():
    """
        Crea un diccionario con los patrones de la base de datos de
        prosite transformados para que pueda utilizarlos el módulo re
        y el accession correspondiente a cada patrón

    """

    dic_dominios = {}

    with open("prosite.dat", "r") as handle:

        records = Prosite.parse(handle)

        for record in records:

            accession = record.accession
            pattern = record.pattern

            if pattern == "":
                continue
            else:
                pattern = pattern.replace('{', '[^')
                pattern = pattern.replace('}', ']')
                pattern = pattern.replace('(', '{')
                pattern = pattern.replace(')', '}')
                pattern = pattern.replace('-', '')
                pattern = pattern.replace('x', '.')
                pattern = pattern.replace('>', '$')
                pattern = pattern.replace('<', '^')

            dic_dominios[pattern] = accession

    return dic_dominios
Ejemplo n.º 5
0
def prositeGetPloop():
    handle = open("prosite.dat")
    handle = open("prosite.dat")
    records = Prosite.parse(handle)
    for record in records:
        if (record.accession == "PS00017"):
            print(record.pattern)
Ejemplo n.º 6
0
def c_patterns():
    """
	Create a dictionary with names and pattern of the each record.
	And convert prosite patterns into REGEX pattern
	"""
    handle = open("prosite.dat", "r")
    records = Prosite.parse(handle)
    pattern_dict = dict()
    for record in records:
        if record.pattern != "":
            pattern_dict[record.accession] = [
                record.pattern, record.name, record.description
            ]
    handle.close()

    #change prosite patters to REGEX patterns
    for key in pattern_dict:
        p = pattern_dict[key][0]
        p = p.replace("-", "")
        p = p.replace("x", ".")
        p = p.replace("(", "{")
        p = p.replace(")", "}")
        pattern_dict[key][0] = p

    return pattern_dict
Ejemplo n.º 7
0
def search_pattern(dic, input, output):
    """
        Busca cada patrón que encuentra en el diccionario en las
        secuencias proteicas que encuentra en el fasta que hace de input.
        Devuelve un fichero tipo txt con información sobre los dominios
        encontrados en cada una de las proteínas.
    """

    with open(input, "r") as input_handle:

        with open(output, "w") as output_handle:

            for record in SeqIO.parse(input_handle, "fasta"):

                prot_id = record.id
                prot = str(record.seq)

                print("\n>" + prot_id + "\n", file=output_handle)

                for pattern in dic:

                    accession = dic[pattern]

                    if re.search(pattern, prot):

                        with open("prosite.dat", "r") as handle:

                            records = Prosite.parse(handle)

                            for record in records:

                                if (record.accession == accession):

                                    name = record.name
                                    description = record.description
                                    pattern_2 = record.pattern

                                    print("\tDominio: " + name + " | " +
                                          accession,
                                          file=output_handle)
                                    print("\tDescripción: " + description,
                                          file=output_handle)
                                    print("\tPatrón: " + pattern_2,
                                          file=output_handle)

                                    match = re.finditer(pattern, prot)

                                    for m in match:
                                        start = m.start()
                                        end = m.end()
                                        base = m.group()

                                        print("\t- " + base + " - Posición: " +
                                              str(start) + " - " + str(end),
                                              file=output_handle)

                                    print("", file=output_handle)

    return output
def RecupererPattern(ident):
    # la récupération des séquences à partir de la Base des données SwissProt
    handle = ExPASy.get_prosite_raw(ident)
    record = Prosite.read(handle)
    if record.type == 'PATTERN':
        return record.pattern
    elif record.type == 'MATRIX':
        print("L'identifiant que vous avez entré correspont à un profile !")
def RecupererPattern(ident):

    handle = ExPASy.get_prosite_raw(ident)
    record = Prosite.read(handle)
    if record.type == 'PATTERN':
        print("La forme régulière est: ", record.pattern)
        print("La traduction en Python est: ",
              traduireSequence(record.pattern))
    elif record.type == 'MATRIX':
        print("L'identifiant que vous avez entré correspont à un profile !")
Ejemplo n.º 10
0
def dat_parser(prosite_dat):
    print('\n' + ('Parsing prosite file...').center(80))
    handle = open(prosite_dat, "r")
    records = Prosite.parse(handle)
    PatternDict = {}
    for record in records:
        patron = pattern_translation(record.pattern)
        PatternDict[patron] = record.accession

    return PatternDict
Ejemplo n.º 11
0
def donneesProt():
    handle = open("prosite.dat")
    records = Prosite.parse(handle)
    save_file = open("prosite_entries.dat", "w")  #Sauvegarde
    for record in records:
        save_file.write(record.accession + ',')
        save_file.write(record.name + ',')
        save_file.write(record.pattern + ',')
        save_file.write(record.pdoc + '\r\n')
    save_file.close()
    records.close()
Ejemplo n.º 12
0
def prosite_to_pandas(input,temp_out):
    # ACC[accession]   TYPE[type]   NAME[name]   DESCRIPTION[description]
    print('ACC\tTYPE\tNAME\tDESCRIPTION',file=open(temp_out,"a"))
    with open(input) as handle:
        prosite_db=Prosite.parse(handle)
        for prosite_record in prosite_db:
            ACC = prosite_record.accession
            TYPE = prosite_record.type
            NAME = prosite_record.name
            DESCRIPTION = prosite_record.description
            print(ACC+'\t'+TYPE+'\t'+NAME+'\t'+DESCRIPTION,file=open(temp_out,"a"))
    pandatab = pd.read_csv(temp_out, sep='\t')
    return pandatab
Ejemplo n.º 13
0
def finder(folder_name):
	"""
	Funcion para compara los dominios de la base de datos con las distintas 
	proteínas. Se itera para cada alineamiento independientemente
	"""
	
	#Cabecera archivos resultados
	for file in os.listdir("./Results/" + folder_name + "/Domains/tmp"):
		with open("./Results/" + folder_name + "/Domains/" 
				  + file.replace("_tmp", "") + "_domains", 'a') as f:
			f.write("Domain Name\tDomain Accession\tDomain Description\t"
					"Domain Pattern\t Protein ID\tPosition\n")


		with open("./Results/" + folder_name + "/Domains/tmp/" + file, 
				  'r') as input_handle:
			#Iteracion en cada proteina
			for protein in SeqIO.parse(input_handle, "fasta"):
				seq_re = str(protein.seq)

				handle = open("./Data/Domain_DB/prosite.dat", "r")
				domains = Prosite.parse(handle)

				#Iteracion sobre cada dominio
				for domain in domains:
					pattern_pro = domain.pattern
					pattern_re = (pattern_pro.replace(".","").replace("x",".")
								 .replace("{","[^").replace("}","]").replace("(","{")
								 .replace(")","}").replace("<","^").replace(">","$")
								 .replace("-",""))
					
					#Busqueda patron en secuencia
					if pattern_re != "" and re.search(pattern_re, seq_re):
						
						with open("./Results/" + folder_name + "/Domains/" 
			  					  + file.replace("_tmp", "") 
			  					  + "_domains", 'a') as f:

							f.write("\n" + domain.name + "\t" + domain.accession 
									+ "\t" + domain.description + "\t" 
									+ domain.pattern + "\t" + protein.id + "\t")

							for m in re.finditer(pattern_re, seq_re):
								f.write(str(m.span()))

		os.remove("./Results/" + folder_name + "/Domains/tmp/" + file)
		input_handle.close()
	f.close()
	os.rmdir("./Results/" + folder_name + "/Domains/tmp")

	return
Ejemplo n.º 14
0
def get_documentation(accession):
    """
    get_documentation  function gets as arg :
        accesion (signature_ac example "PS00001" )
    returns 
        html file containing full documentaion 
    """

    handle_1 = ExPASy.get_prosite_raw(accession)
    records = Prosite.read(handle_1)
    handle_2 = ExPASy.get_prodoc_entry(records.pdoc)
    #record = Prodoc.read(handle_2)"
    html = handle_2.read()
    with open("my_prodoc_record.html", "w") as out_handle:
        out_handle.write(html)
Ejemplo n.º 15
0
def parse_dat(prositedat, output_file):
    try:
        with open(output_file, 'w') as dominios:
            dominios.write('name\taccession\tdescription\tpattern\n')
            handle = open(prositedat, 'r')
            records = Prosite.parse(handle)
            for record in records:
                dominios.write(record.name + '\t' + record.accession + '\t' +
                               record.description + '\t' + record.pattern +
                               '\n')
            handle.close()
        dominios.close()
        return (dominios)
    except:
        print('No se ha podido leer el archivo: ' + prositedat +
              '. Abortando módulo...')
Ejemplo n.º 16
0
 def Prosite_Domain(self):
     from Bio import ExPASy
     from Bio.ExPASy import Prosite, ScanProsite
     try:
         handle = ScanProsite.scan(seq=self.__seq_input)
         result = ScanProsite.read(handle)
         if len(result) != 0:
             for res in range(len(result)):
                 prosite_acession = result[res]['signature_ac']
                 r = ExPASy.get_prosite_raw(prosite_acession)
                 html = Prosite.read(r)
                 r.close()
                 print('Foi encontrado um dominio %s.' % (html.name))
         else:
             print('Não foram encontradas correspondências.')
     except:
         print('A sequência fornecida não é uma sequência proteica.')
Ejemplo n.º 17
0
def Prosite_parser():
    #function to parse protein domains usind ProSite database
    prosite_handle = open("Prosite_DB/prosite.dat", "r")
    records = Prosite.parse(prosite_handle)

    os.makedirs("Results/Protein_Domains", exist_ok=True)
    file_list = os.listdir("Results/Blast_Hits/")
    substitutions = {
        "-": "",
        "{": "[^",
        "}": "]",
        "(": "{",
        ")": "}",
        "X": ".",
        "x": ".",
        "<": "^",
        ">": "$"
    }

    for file in file_list:
        file_handle = open('Results/Blast_Hits/' + file, 'r')
        cds_seqs = file_handle.read().splitlines()

        for i in range(0, len(cds_seqs), 2):
            print("Parsing domains of " + cds_seqs[i][1:])
            result_name = "Results/Protein_Domains/" + cds_seqs[i][
                1:] + "_domains"
            protein = open(result_name, "w")
            sequence = cds_seqs[i + 1]
            for record in records:
                pattern = record.pattern[:-1]
                for key in substitutions.keys():
                    pattern = pattern.replace(key, substitutions[key])
                if re.search(pattern, sequence):
                    protein.write(
                        "Name: %s\nAccesion: %s\nDescription: %s\nPattern: %s\n\n"
                        % (record.name, record.accession, record.description,
                           record.pattern))
                else:
                    pass
            print("Domains parsed\n")
            protein.close()
    prosite_handle.close()

    return
Ejemplo n.º 18
0
def domainsearch(filename):
    """filename: fasta file containing the protein sequences.

    This function extracts the domain patterns from PROSITE and 
    use them to search for this patterns in the protein sequences 
    of a fasta file. Returns a list with all the matches."""

    with open(filename, "r") as handle:
        fasta = list(SeqIO.parse(handle, "fasta"))

    repatterns = []
    propatterns = []
    initialre = [".", "x", "X", "-", "{", "}", "<", ">", "(", ")"]
    finalre = ["", ".", ".", "", "[^", "]", "^", "$", "{", "}"]
    handle = open("../prosite.dat", "r")
    records = Prosite.parse(handle)
    for record in records:
        if record.pattern:
            pattern = record.pattern
            for i in range(0, len(initialre)):
                pattern = pattern.replace(initialre[i], finalre[i])
            repatterns.append(pattern)
            propatterns.append(record.pattern)
        else:
            continue

    results = []
    for j in range(0, len(fasta)):
        protresults = []
        for k in range(0, len(repatterns)):
            matches = re.finditer(r"" + repatterns[k], str(fasta[j].seq))
            for m in matches:
                pattresults = []
                pattresults.append(fasta[j].id)
                pattresults.append(m.group())
                pattresults.append(m.start())
                pattresults.append(m.end())
                pattresults.append(propatterns[k])
                if pattresults:
                    protresults.append(pattresults)

        results.append(protresults)

    return results
Ejemplo n.º 19
0
def output_results(prosite_dat, ResultDict, Results_Dir):

    output = open(Results_Dir + 'prosite_result.txt', "w")
    HitIds = ResultDict.keys()
    for protein in HitIds:
        output.write('Protein ' + protein + ' has the following domains:\n\n')
        for dominio in ResultDict[protein]:
            handle = open(prosite_dat, "r")
            records = Prosite.parse(handle)
            for record in records:
                if record.accession == dominio:
                    output.write("\tDomain name: " + record.name + '\n')
                    output.write("\tDomain accession: " + record.accession +
                                 '\n')
                    output.write("\tDomain description: " +
                                 record.description + '\n')
                    output.write("\tPattern found: " + record.pattern + '\n\n')

    return
Ejemplo n.º 20
0
def Parsear_prosite(folder_result):
    """Function to parse the data base selecting the elements
    necessary for the create file"""

    with open("prosite.dat", "r") as data_base,\
            open("prosite_parser.tsv", "w") as result_file:
        result_file.write("Name" + "\t" + "Accesion" + "\t" + "Desctiption" +
                          "\t" + "Pattern" + "\n")
        records = Prosite.parse(data_base)

        for record in records:
            name = record.name
            accesion = record.accession
            description = record.description
            patt = record.pattern
            result = str(name + "\t" + accesion + "\t" + description + "\t" +
                         patt + "\n")
            result_file.write(result)

        records.close()
        result_file.close()
Ejemplo n.º 21
0
def Parsing_prosite(handle="prosite.dat"):

	with open("prosite.dat", "r") as handle, \
			open("prosite_parsed.tsv", "w") as parsed_out:

		parsed_out.write("NAME\tACCESSION\tDESCRIPTION\tPATTERN\n")
		records = Prosite.parse(handle)

		#Finding each element


		for record in records:
			parsed_out.write("%s\t%s\t%s\t%s\n" % (
				record.name,
				record.accession,
				record.description,
				record.pattern)
							)

		handle.close()
		records.close()
		parsed_out.close()
Ejemplo n.º 22
0
def parsear():
	"""
	Parsear a file.dat with Prosite. Extraction of the name, accession,
	pattern and description.
	"""
	inpfile = 'prosite.dat'
	path1 = "results/prosite"
	try:
		os.stat(path1)
	except:
		os.mkdir(path1)
	
	out = open(path1 + "database", "w")
	handle = open(inpfile, "r")
	records = Prosite.parse(handle)
	for record in records:
		out.write(record.name + "\t")
		out.write(record.accession + "\t")
		out.write(record.description + "\t")
		out.write(record.pattern + "\n")
	handle.close()
	out.close()
Ejemplo n.º 23
0
def encuentra(filtrado):
    """Funcion para extraer las listas con las diferentes caracteristicas que queremos
    obtener para la formacion del archivo prosite, parseando tanto .doc como .dat para obtenerla
    """
    #lista con los patrones a buscar en el archivo "filtro.fasta"
    patrones = []
    #lista de los nombres de los dominios en prosite
    nombres_prosite = []
    #lista de la descripcion
    descripcion = []
    #lista de los accession
    accesion = []
    handle = open("prosite.dat", "r")
    records = Prosite.parse(handle)
    for record in records:
        patrones.append(record.pattern)
        nombres_prosite.append(record.name)
        accesion.append(record.accession)
        descripcion.append(record.description)
    handle.close()

    #Modificacion de la lista patrones para buscarlo con re
    for numero in range(len(patrones)):
        if numero <= (len(patrones) - 1):
            valor = patrones[numero][:-1]

            for letra in valor:
                if letra == "x" or letra == "X":
                    valor = valor.replace("x", ".").replace("X", ".")
                if letra == "{" or letra == "}":
                    valor = valor.replace("{", "[^").replace("}", "]")
                if letra == "(" or letra == ")":
                    valor = valor.replace("(", "{").replace(")", "}")

                if letra == "-":
                    valor = valor.replace("-", "")
                    patrones[numero] = valor
    return busca(filtrado, patrones, nombres_prosite, accesion, descripcion)
Ejemplo n.º 24
0
def domaininfo(keydomains):
    """keydomains: a list of the PROSITE domain matches.

    This function takes the matches domains founded and extend
    the information about them."""

    handle = open("../prosite.dat", "r")
    recordsdat = Prosite.parse(handle)

    for record in recordsdat:
        for i in range(0, len(keydomains)):
            for j in range(0, len(keydomains[i])):
                if record.pattern == keydomains[i][j][-1]:
                    keydomains[i][j].append(record.accession)
                    keydomains[i][j].append(record.name)
                    keydomains[i][j].append(record.description)
                    handle = open("../prosite.doc")
                    recordsdoc = Prodoc.parse(handle)
                    for info in recordsdoc:
                        if str(keydomains[i][j][5]) in str(info.prosite_refs):
                            keydomains[i][j].append(info.text)

    return keydomains
Ejemplo n.º 25
0
def findDomains(multifasta, output = '') :
	""" Multifasta: archivo con todas las proteinas en las que se van a buscar dominios
	    Output: nombre de la query """

	file = open(multifasta, 'r')
	
	if not os.path.exists('results/prosite') :
		os.mkdir('results/prosite')
	output_file = str('results/prosite/dominios_' + output + '.txt') # un archivo para cada query
	result = open(output_file, 'w')
	accession_bruto = [] # todos los numeros de acceison de dominios encontrados en el multifasta
	accession = [] # lo mismo pero eliminando repeticiones

	for line in file : 
		if line.startswith('>') :
			result.write('*************************************************************************************************************'+'\n')
			result.write(line.replace('>', '') + '\n') # titulo: nombre de la proteina
		else :
			handle = open('prosite.dat', 'r')
			records = Prosite.parse(handle)
			for record in records :
				patron = repl(record.pattern) # traduccion patron 
				if len(patron) !=0 and re.search(patron, line) : # si existe el patron y se encuentra
					result.write('Patron: ' + record.pattern + '\nName: ' + record.name + '\nAccession: ' + record.accession + '\nDescription: ' + record.description + '\n\n')
					accession_bruto.append(record.accession) # guardamos info y el numero de accesion (necesario para buscar en prodoc)
	
	for a in accession_bruto : # para eliminar los repetidos
		if a not in accession :
			accession.append(a)

	result.write('\n\n\n\nINFORMACION DE LOS DOMINIOS\n\n')
	handle = open('prosite.doc', 'r')
	records = Prodoc.parse(handle)
	for record in records :
		if len(record.prosite_refs) != 0 and record.prosite_refs[0][0] in accession : # el numero de accesion de prosite esta en accesion
			result.write(record.text + '\n\nAccession prodoc: ' + record.accession + '\nAccession prosite: ' + record.prosite_refs[0][0] + '\n')
			result.write('**************************************************************************************\n\n\n')
def dataparse(Contador):
    """
    Para cada secuencia de los hits, hace una búsqueda de dominios y escribe en domain 
    el nombre, accesion, description y pattern del dominio reconocido
    """
    print("\nEsta parte puede tardar un tiempo, por favor espere")

    with open("blast_hits{}.fasta".format(Contador),
              "r") as input_handle, open("domain_prosite{}".format(Contador),
                                         "w") as output_handle:
        for record in SeqIO.parse(input_handle, "fasta"):
            sequence = str(record.seq)
            output_handle.write("\n" + record.name + "\n" + "\n")
            handle = open("prosite.dat", "r")
            records = Prosite.parse(handle)
            for hey in records:
                ozo = str(hey.pattern)
                Final = correct_pattern(ozo)
                if re.search(Final, sequence) and Final != "":
                    output_handle.write("name:" + hey.name + "\n")
                    output_handle.write("accession:" + hey.accession + "\n")
                    output_handle.write("description:" + hey.description +
                                        "\n")
                    output_handle.write("pattern:" + Final + "\n" + "\n")
Ejemplo n.º 27
0
 def test_prosite_raw(self):
     handle = ExPASy.get_prosite_raw("PS00001")
     record = Prosite.read(handle)
     handle.close()
     self.assertEqual(record.accession, "PS00001")
     self.assertEqual(record.name, "ASN_GLYCOSYLATION")
Ejemplo n.º 28
0
 def test_prosite_raw(self):
     handle = ExPASy.get_prosite_raw('PS00001')
     record = Prosite.read(handle)
     handle.close()
     self.assertEqual(record.accession, 'PS00001')
     self.assertEqual(record.name, 'ASN_GLYCOSYLATION')
Ejemplo n.º 29
0
 def test_prosite_raw(self):
     with ExPASy.get_prosite_raw("PS00001") as handle:
         record = Prosite.read(handle)
     self.assertEqual(record.accession, "PS00001")
     self.assertEqual(record.name, "ASN_GLYCOSYLATION")
Ejemplo n.º 30
0
def patfinder(fasta, output):
    """fasta: name of the FASTA file with the sequences to search domains on (str).
	output: name of the file where the results will be stored (str).

	This function searchs for domains of the prosite database on the given sequences,
	and shows some information about them. 
	"""

    for x in range(0, 4):
        print(">> Starting Prosite pattern search " + "." * x, end="\r")
        sleep(0.2)
    print()

    # Count number of proteins to analyze.
    nseqs = 0
    with open(fasta, "r") as file:
        for line in file:
            if line[0] == ">":
                nseqs += 1

    print("\n> %d proteins to analyze." % nseqs)
    print(
        "\nThe results will be shown sequence by sequence. Each query sequence (name ended by _QUERYSEQ) will be followed by its blastp hits."
    )
    print(
        "On the results folder, you'll find also a txt file with all the results, including also the exact sequence of the domains on the proteins and their position."
    )
    print(
        "You'll be given also the option to see further information of the found domains of your choice."
    )
    input("\nPRESS ENTER TO CONTINUE\n")

    initial = [".", "-", "<", ">", "x", "X", "{", "}", "(", ")"]
    final = ["", "", "^", "$", ".", ".", "[^", "]", "{", "}"]

    out_file = open(output, "w")

    j = 0
    with open(fasta, "r") as seqs_handle:
        # Parse prosite.dat
        for seq_record in SeqIO.parse(seqs_handle, "fasta"):
            call('clear')
            j += 1
            seq, seq_id = seq_record.seq, seq_record.id
            print("\n---------------------------------------")
            print(">> (%d of %d) Prosite domains on sequence %s" %
                  (j, nseqs, seq_id))
            print("-----------------------------------------")
            out_file.write("\n\n>> Prosite domains on sequence %s" % seq_id)
            with open("prosite.dat", "r") as handle:
                pat_records = Prosite.parse(handle)
                total = 0
                results = []
                for record in pat_records:

                    # Some patterns are empty. If not, convert them to regular expresions.
                    if record.pattern != "":
                        pattern = record.pattern
                        for i in range(0, len(initial)):
                            pattern = pattern.replace(initial[i], final[i])

                        # Search domains.
                        matches = re.finditer(pattern, str(seq))
                        hit = False
                        domains, pos = [], []
                        for m in matches:
                            domains.append(m.group())
                            pos.append(m.start())
                            hit = True

                        # Show found domains.
                        if hit == True:
                            total += 1
                            print("\n> Found %d hits for domain %s." %
                                  (len(domains), record.name))
                            out_file.write(
                                "\n> Found %d hits for domain %s:\n" %
                                (len(domains), record.name))
                            out_file.write("Pos\tHit sequence\n")
                            out_file.write("---\t------------\n")
                            for i in range(0, len(domains)):
                                out_file.write("%s\t%s\n" %
                                               (pos[i], domains[i]))

                            results.append(record.accession)
                            print("Domain accesion id: %s" % record.accession)
                            print("Description: %s" % record.description)
                            print("Pattern: %s" % record.pattern)
                            out_file.write("Domain accesion id: %s\n" %
                                           record.accession)
                            out_file.write("Description: %s\n" %
                                           record.description)
                            out_file.write("Pattern: %s\n" % record.pattern)

                if total == 0:
                    print("No domains found for this protein.")
                    out_file.write("No domains found for this protein.")
                    print("\n----------------------------")
                    input("PRESS ENTER TO CONTINUE\n")
                else:
                    print("\nTotal: %d different domains found.\n" % total)
                    out_file.write(
                        "\n\nTotal: %d different domains found.\n\n" % total)
                    print(
                        "\n---------------------------------------------------------"
                    )
                    print(
                        "If you want further information of these domains, press Y."
                    )
                    print("Press ENTER or any other key to continue.")
                    selection = input("> ")
                    if selection.upper() == "Y":
                        # Parse prosite.doc for further information.
                        with open("prosite.doc", "r") as doc_handle:
                            doc_records = Prodoc.parse(doc_handle)
                            for doc_record in doc_records:
                                for x in results:
                                    if x in str(doc_record.prosite_refs):
                                        print("> %s domain." % x)
                                        print(doc_record.text)
                            print("\n----------------------------")
                            input("PRESS ENTER TO CONTINUE\n")

    out_file.close()
Ejemplo n.º 31
0
from Bio.ExPASy import Prosite
from urllib.request import urlopen
handle=open("/home/koreanraichu/prosite.dat")
records = Prosite.parse(handle)
record = next(records)
print(record.accession)
# 단식 소환

# for문 마려울 때 시도해보자.
for i in range(0,5):
    record = next(records)
    print(record.accession)

# 어떻게 사람이 for문만 씁니까 while도 적용해봐요
i=0
while i < 5:
    record = next(records)
    print(record.accession)
    i=i+1

#셋이 같이 적용하면 첫번째-2, 3, 4, 5, 6번째-7, 8, 9, 10, 11번째 불러오니까 반드시 각개로 해볼 것.

n=0
for record in records:
    n+=1
print(n)
# record 몇개십니까 선생님