예제 #1
0
def doc_parser(accession):
    """ Returns information on domain with input accession number """
    with open("prosite_files/prosite.doc") as handle:
        records = Prodoc.parse(handle)
        for record in records:
            if accession == record.accession:
                return record.text
예제 #2
0
def DocParser(accession_list):
    """Get record.txt info for domains detected
    Args: list of accesions of the domains found
    Returns: record.txt of all patterns
    """
    handle = open("prosite.txt")
    records = Prodoc.parse(handle)
    record_text_list = []
    try:
        # Loop to go through prosite.doc entries.
        for record in records:
            if record.accession in accession_list:  # If an entry is in the list of the already found domains
                record_text_list.append(record.text)  # Save it
    except:
        print(False)
    return record_text_list
예제 #3
0
def doc_parser(prosite_doc, key, ResultDict, Results_Dir):
    Ext_Info = open(Results_Dir + 'Extended_Domain_Info_%s.txt' % (key), 'w')
    Ext_Info.write('\n' + (' Extended information about the domains in %s ' %
                           (key)).center(78, '#') + '\n\n')
    for Prosite_Id in ResultDict[key]:
        handle = open(prosite_doc)
        records = Prodoc.parse(handle)
        for record in records:
            try:
                if len(record.prosite_refs) > 0:
                    for domain in range(len(record.prosite_refs)):
                        if Prosite_Id == record.prosite_refs[domain][0]:
                            Ext_Info.write(Prosite_Id.center(80))
                            Ext_Info.write('\n\n')
                            Ext_Info.write(record.text)
            except:
                pass

    return
예제 #4
0
def domaininfo(keydomains):
    """keydomains: a list of the PROSITE domain matches.

    This function takes the matches domains founded and extend
    the information about them."""

    handle = open("../prosite.dat", "r")
    recordsdat = Prosite.parse(handle)

    for record in recordsdat:
        for i in range(0, len(keydomains)):
            for j in range(0, len(keydomains[i])):
                if record.pattern == keydomains[i][j][-1]:
                    keydomains[i][j].append(record.accession)
                    keydomains[i][j].append(record.name)
                    keydomains[i][j].append(record.description)
                    handle = open("../prosite.doc")
                    recordsdoc = Prodoc.parse(handle)
                    for info in recordsdoc:
                        if str(keydomains[i][j][5]) in str(info.prosite_refs):
                            keydomains[i][j].append(info.text)

    return keydomains
예제 #5
0
def findDomains(multifasta, output = '') :
	""" Multifasta: archivo con todas las proteinas en las que se van a buscar dominios
	    Output: nombre de la query """

	file = open(multifasta, 'r')
	
	if not os.path.exists('results/prosite') :
		os.mkdir('results/prosite')
	output_file = str('results/prosite/dominios_' + output + '.txt') # un archivo para cada query
	result = open(output_file, 'w')
	accession_bruto = [] # todos los numeros de acceison de dominios encontrados en el multifasta
	accession = [] # lo mismo pero eliminando repeticiones

	for line in file : 
		if line.startswith('>') :
			result.write('*************************************************************************************************************'+'\n')
			result.write(line.replace('>', '') + '\n') # titulo: nombre de la proteina
		else :
			handle = open('prosite.dat', 'r')
			records = Prosite.parse(handle)
			for record in records :
				patron = repl(record.pattern) # traduccion patron 
				if len(patron) !=0 and re.search(patron, line) : # si existe el patron y se encuentra
					result.write('Patron: ' + record.pattern + '\nName: ' + record.name + '\nAccession: ' + record.accession + '\nDescription: ' + record.description + '\n\n')
					accession_bruto.append(record.accession) # guardamos info y el numero de accesion (necesario para buscar en prodoc)
	
	for a in accession_bruto : # para eliminar los repetidos
		if a not in accession :
			accession.append(a)

	result.write('\n\n\n\nINFORMACION DE LOS DOMINIOS\n\n')
	handle = open('prosite.doc', 'r')
	records = Prodoc.parse(handle)
	for record in records :
		if len(record.prosite_refs) != 0 and record.prosite_refs[0][0] in accession : # el numero de accesion de prosite esta en accesion
			result.write(record.text + '\n\nAccession prodoc: ' + record.accession + '\nAccession prosite: ' + record.prosite_refs[0][0] + '\n')
			result.write('**************************************************************************************\n\n\n')
예제 #6
0
 def test_prodoc_raw(self):
     handle = ExPASy.get_prosite_raw("PDOC00001")
     record = Prodoc.read(handle)
     handle.close()
     self.assertEqual(record.accession, "PDOC00001")
def parse_doc(prositedoc, id):
    with open(prositedoc, 'r', encoding='cp1252') as prositeschachi:
        for record in Prodoc.parse(prositeschachi):
            if record.accession == id:
                return record.text
예제 #8
0
 def test_prodoc_raw(self):
     handle = ExPASy.get_prosite_raw('PDOC00001')
     record = Prodoc.read(handle)
     handle.close()
     self.assertEqual(record.accession, 'PDOC00001')
예제 #9
0
 def test_prodoc_raw(self):
     with ExPASy.get_prosite_raw("PDOC00001") as handle:
         record = Prodoc.read(handle)
     self.assertEqual(record.accession, "PDOC00001")
예제 #10
0
# script para parsear la base de datos prosite presentes en los archivos
# prosite.doc y prosite.dat utilizando el modulo Biopython

from Bio.ExPASy import Prosite, Prodoc

# con este script podeis parsear el archivo .dat
# handle = open("prosite.dat","r")
# records = Prosite.parse(handle)
# for record in records:
# 	print("name:"+record.name)
# 	print("accession:"+record.accession)
# 	print("description:"+record.description)
# 	print("pattern:"+record.pattern)

# con este script podemos parsear el archivo .doc
handle = open("prosite.doc")
records = Prodoc.parse(handle)
for record in records:
    print(record.accession)
    #print(record.prosite_refs)
    print(record.text)
    print(count)
    #print(record.references)
예제 #11
0
from Bio import ExPASy
handle = ExPASy.get_prosite_raw("PS00001")
text = handle.read()
print(text)
# 가장 원시적인 긁는 방법 (Expasy만 있으면 됨)
"""from Bio import Prosite
handle = ExPASy.get_prosite_raw("PS51036")
record = Prosite.read(handle)
print(record)"""
# ImportError: cannot import name 'Prosite' from 'Bio' (/home/koreanraichu/PycharmProjects/pythonProject/venv/lib/python3.8/site-packages/Bio/__init__.py)가 나를 반기는디?

from Bio.ExPASy import Prodoc
handle = ExPASy.get_prosite_raw("PDOC00001")
record = Prodoc.read(handle)
print(record)

handle = ExPASy.get_prosite_entry("PS51036")
html = handle.read()
with open("myprositerecord.html", "w") as out_handle:
    out_handle.write(html)
# HTML format으로 다운로드 받을 수 있다.

handle = ExPASy.get_prodoc_entry("PDOC51036")
html = handle.read()
with open("myprositerecord2.html", "w") as out_handle:
    out_handle.write(html)
# 얘는 prodoc 다운로드 하는 코드
예제 #12
0
def prodoc(lista_hits,prosite_doc):
    
    """"
    DEFINICIÓN= Funcion que imprime por pantalla información extra contenida en prosite.doc del
    dominio proteico que le solicites
    
    ARGUMENTOS:
        - lista_hits = lista con los dominios proteicos obtenidos tras hacer un parseado en prosite.dat
                       (lista obtenida tras haber empleado el modulo "prosite.py")
        - prosite_doc= archivo prosite.txt  
    
    RESULTADO:
        - Esta funcion no genera ningun archivo ni devuelve nada, la info que proporciona la imprime por pantalla
    
    
        *prosite.txt y prosite.dat constituyen la base de datos de PROSITE*

    """
    print()
    print()

    while True:
        print(lista_hits)
        dominio=input("De la tabla anterior imprimida por pantalla, escriba el accession del dominio que quiera consultar:   ")
        if dominio in lista_hits:
#            try: 
            #APERTURA DEL ARCHIVO .doc
            doc = open(prosite_doc,"r")
            
            #PARSEADO DEL DOC, BUSCANDO EL DOMINIO DEL HIT SELECCIONADO
            records = Prodoc.parse(doc)
            for record in records:
                
                accession = record.accession
                
                # conversion del accession del .doc al accession del .dat
                if accession == dominio:
                    print("-----------------------------------------------------------------------------")
                    print()
                    print(""""
                ###########################################################         
                #### Información extra de la base de datos prosite.doc ####
                ########################################################### 
                """)
                    print()
                    print
                    print("DOMINIO:  "+record.accession)
                    print()
                    print(record.text)
                    print("################################################################")

            #CIERRE DEL ARCHIVO .doc
            doc.close()
            
            # Menu para decidir si consultar mas accession o no
            mas_doc=input("¿ Quiere consultar informacion sobre algún dominio más ?  (Si/No):   ")
            if mas_doc in ["NO","No","no","n","N"]:
                print("""
----------------------------------------------------------------------------------------------------
        
        ##############################################################
        #### Muchas gracias por haber confiado en nuestro software. ##
        ##############################################################
        
        
                  """)
                break
            
                
        else:
            print()
            print("--------------------------------------------------------------------------------------")
            print("Respuesta no válida, por favor, introduzca algun accession de los mostrados en la tabla anterior, es decir, alguno de estos:")
            print(lista_hits)
            print("--------------------------------------------------------------------------------------")
예제 #13
0
def patfinder(fasta, output):
    """fasta: name of the FASTA file with the sequences to search domains on (str).
	output: name of the file where the results will be stored (str).

	This function searchs for domains of the prosite database on the given sequences,
	and shows some information about them. 
	"""

    for x in range(0, 4):
        print(">> Starting Prosite pattern search " + "." * x, end="\r")
        sleep(0.2)
    print()

    # Count number of proteins to analyze.
    nseqs = 0
    with open(fasta, "r") as file:
        for line in file:
            if line[0] == ">":
                nseqs += 1

    print("\n> %d proteins to analyze." % nseqs)
    print(
        "\nThe results will be shown sequence by sequence. Each query sequence (name ended by _QUERYSEQ) will be followed by its blastp hits."
    )
    print(
        "On the results folder, you'll find also a txt file with all the results, including also the exact sequence of the domains on the proteins and their position."
    )
    print(
        "You'll be given also the option to see further information of the found domains of your choice."
    )
    input("\nPRESS ENTER TO CONTINUE\n")

    initial = [".", "-", "<", ">", "x", "X", "{", "}", "(", ")"]
    final = ["", "", "^", "$", ".", ".", "[^", "]", "{", "}"]

    out_file = open(output, "w")

    j = 0
    with open(fasta, "r") as seqs_handle:
        # Parse prosite.dat
        for seq_record in SeqIO.parse(seqs_handle, "fasta"):
            call('clear')
            j += 1
            seq, seq_id = seq_record.seq, seq_record.id
            print("\n---------------------------------------")
            print(">> (%d of %d) Prosite domains on sequence %s" %
                  (j, nseqs, seq_id))
            print("-----------------------------------------")
            out_file.write("\n\n>> Prosite domains on sequence %s" % seq_id)
            with open("prosite.dat", "r") as handle:
                pat_records = Prosite.parse(handle)
                total = 0
                results = []
                for record in pat_records:

                    # Some patterns are empty. If not, convert them to regular expresions.
                    if record.pattern != "":
                        pattern = record.pattern
                        for i in range(0, len(initial)):
                            pattern = pattern.replace(initial[i], final[i])

                        # Search domains.
                        matches = re.finditer(pattern, str(seq))
                        hit = False
                        domains, pos = [], []
                        for m in matches:
                            domains.append(m.group())
                            pos.append(m.start())
                            hit = True

                        # Show found domains.
                        if hit == True:
                            total += 1
                            print("\n> Found %d hits for domain %s." %
                                  (len(domains), record.name))
                            out_file.write(
                                "\n> Found %d hits for domain %s:\n" %
                                (len(domains), record.name))
                            out_file.write("Pos\tHit sequence\n")
                            out_file.write("---\t------------\n")
                            for i in range(0, len(domains)):
                                out_file.write("%s\t%s\n" %
                                               (pos[i], domains[i]))

                            results.append(record.accession)
                            print("Domain accesion id: %s" % record.accession)
                            print("Description: %s" % record.description)
                            print("Pattern: %s" % record.pattern)
                            out_file.write("Domain accesion id: %s\n" %
                                           record.accession)
                            out_file.write("Description: %s\n" %
                                           record.description)
                            out_file.write("Pattern: %s\n" % record.pattern)

                if total == 0:
                    print("No domains found for this protein.")
                    out_file.write("No domains found for this protein.")
                    print("\n----------------------------")
                    input("PRESS ENTER TO CONTINUE\n")
                else:
                    print("\nTotal: %d different domains found.\n" % total)
                    out_file.write(
                        "\n\nTotal: %d different domains found.\n\n" % total)
                    print(
                        "\n---------------------------------------------------------"
                    )
                    print(
                        "If you want further information of these domains, press Y."
                    )
                    print("Press ENTER or any other key to continue.")
                    selection = input("> ")
                    if selection.upper() == "Y":
                        # Parse prosite.doc for further information.
                        with open("prosite.doc", "r") as doc_handle:
                            doc_records = Prodoc.parse(doc_handle)
                            for doc_record in doc_records:
                                for x in results:
                                    if x in str(doc_record.prosite_refs):
                                        print("> %s domain." % x)
                                        print(doc_record.text)
                            print("\n----------------------------")
                            input("PRESS ENTER TO CONTINUE\n")

    out_file.close()
def escribe_archivo(multifasta, lista, dicc, nombres, accesion, descripcion):
    """Funcion que almacenara en "Archivo_prosite" la informacion de cada una de las secuencias
    que se han filtrado en blast
    multifasta_filtrado=archivo obtenido en la primera funcion de este modulo
    lista=lista con los patrones
    dicc=diccionario creado patron:linea
    nombres= lista nombres de prosite
    accesion=lista con los accession de prosite
    descripcion=lista con las descripciones
    """
    lee = open(multifasta, "r")
    #diccionario texto del archivo .doc: referencias del archivo .doc
    referencias = dict()
    handle = open("prosite.doc", encoding="utf8", errors="ignore")
    records = Prodoc.parse(handle)
    for record in records:

        valor = record.prosite_refs
        texto = record.text
        referencias[texto] = valor

    with open("Archivo_prosite",
              mode="w+") as escribir:  #Archivo que se creara
        for x, linea in enumerate(lee):
            for i in dicc.keys():
                if x == i:
                    escribir.write(linea)

                    for valor in dicc.values(
                    ):  #recorre el diccionario por los valores
                        for posicion in valor:  #saca el numero
                            for patron in range(len(
                                    lista)):  #recorre la lista de los patrones

                                if patron == posicion:
                                    if lista[patron] != "":  #debido a que hay varios patrones vacios, cuyo resultado es ""
                                        #asi solamente buscara patrones que tengan contenido
                                        escribir.write("El nombre es: " +
                                                       nombres[patron] + "\n")
                                        escribir.write("El accession es: " +
                                                       accesion[patron] + "\n")
                                        escribir.write("El patron es: " +
                                                       lista[patron] + "\n")
                                        escribir.write("La descripción es: " +
                                                       descripcion[patron] +
                                                       "\n")
                                        for v in referencias.items():
                                            for i in v[1]:
                                                for x in i:
                                                    if x == accesion[patron]:
                                                        escribir.write(
                                                            "Información adicional: \n"
                                                            + v[0])
                                                        #La siguiente linea nos permitira realizar la separacion de cada uno de los resultados
                                                        escribir.write(
                                                            "---------------------------------------------------------------------------------------------------------------------------------\n"
                                                        )
                else:
                    break
    lee.close()
    escribir.close()

    return