Exemple #1
0
def download_proteins(proteins, data_dir, fileroot="uniprot"):
    """
    Given a list of proteins, download their sequences to a FASTA file.

    Parameters
    ----------
    proteins : Tuple[str]
        Uniprot accessions to retrieve

    data_dir : str
        Where to download the FASTA file.

    fileroot : str
        How should the file be named?

    Returns
    -------
    str
        The FASTA file created.
    """
    uniprot = bioservices.UniProt()
    outfile = os.path.join(data_dir, fileroot + ".fasta")
    with open(outfile, "w") as fasta_out:
        lines = uniprot.retrieve(proteins, frmt="fasta")
        lines = "".join(lines)
        fasta_out.write(lines)

    return outfile
Exemple #2
0
def resolveAnnotationHelper(annotation):
    if not hasattr(resolveAnnotation, "db"):
        resolveAnnotation.db = {}
        resolveAnnotation.ch = bioservices.ChEBI(verbose=False)
        resolveAnnotation.uni = bioservices.UniProt(verbose=False)
        resolveAnnotation.k = bioservices.kegg.KEGG(verbose=False)
        resolveAnnotation.qg = bioservices.QuickGO(verbose=False)
        resolveAnnotation.t = bioservices.Taxon()
        resolveAnnotation.db[
            "http://identifiers.org/uniprot/P62988"
        ] = "http://identifiers.org/uniprot/P62988"
        resolveAnnotation.db[
            "http://identifiers.org/uniprot/P06842"
        ] = "http://identifiers.org/uniprot/P06842"
        resolveAnnotation.db[
            "http://identifiers.org/uniprot/P07006"
        ] = "http://identifiers.org/uniprot/P06842"

    if annotation in resolveAnnotation.db:
        return annotation, resolveAnnotation.db[annotation]

    tAnnotation = annotation.replace("%3A", ":")
    tAnnotation = annotation.split("/")[-1]
    # tAnnotation = re.search(':([^:]+:[^:]+$)',tAnnotation).group(1)
    try:
        if "obo.go" in annotation or "/go/GO" in annotation:

            res = resolveAnnotation.qg.Term(tAnnotation)
            finalArray = []
            if type(res) not in [int]:
                res = bioservices.Service("name").easyXML(res)
                tmp = res.findAll("name")

                for x in tmp:
                    try:
                        tagString = str(goGrammar.parseString(str(x))[0])
                        if tagString not in ["Systematic synonym"]:
                            finalArray.append(str(goGrammar.parseString(str(x))[0]))
                    except pyp.ParseBaseException:
                        continue
            if len(finalArray) > 0:
                resolveAnnotation.db[annotation] = finalArray[0]
            else:
                resolveAnnotation.db[annotation] = ""
            finalAnnotation = resolveAnnotation.db[annotation]

        elif "kegg" in annotation:

            data = resolveAnnotation.k.get(tAnnotation)
            dict_data = resolveAnnotation.k.parse(data)
            if type(dict_data) == int:
                resolveAnnotation.db[annotation] = ""
            else:
                resolveAnnotation.db[annotation] = dict_data["name"]
            finalAnnotation = resolveAnnotation.db[annotation]

        elif "uniprot" in annotation:
            identifier = annotation.split("/")[-1]
            result = resolveAnnotation.uni.quick_search(identifier)
            if identifier in result:
                resolveAnnotation.db[annotation] = result[identifier][
                    "Protein names"
                ].split("(")[0]
            else:
                finalAnnotation = ""
            finalAnnotation = resolveAnnotation.db[annotation]

        elif "chebi" in annotation:
            tmp = annotation.split("/")[-1]

            entry = resolveAnnotation.ch.getLiteEntity(tmp)
            finalAnnotation = ""
            for element in entry:
                resolveAnnotation.db[annotation] = str(element["chebiAsciiName"])
                finalAnnotation = resolveAnnotation.db[annotation]
        elif (
            "cco" in annotation
            or "pirsf" in annotation
            or "pubchem" in annotation
            or "omim" in annotation
        ):
            finalAnnotation = ""
        elif "taxonomy" in annotation:
            # uniprot stuff for taxonomy
            result = resolveAnnotation.t.search_by_taxon(tAnnotation)
            resolveAnnotation.db[annotation] = result["Scientific Name"]
            finalAnnotation = resolveAnnotation.db[annotation]
            """
            url = 'http://www.uniprot.org/taxonomy/'
            params = {
            'from':'ACC',
            'to':'P_REFSEQ_AC',
            'format':'tab',
            'query':'P13368 P20806 Q9UM73 P97793 Q17192'
            }
            
            data = urllib.urlencode(params)
            request = urllib2.Request(url, data)
            contact = "" # Please set your email address here to help us debug in case of problems.
            request.add_header('User-Agent', 'Python contact')
            response = urllib2.urlopen(request)
            page = response.read(200000)
            """
        else:
            return annotation, ""
            # assert(False)
            finalAnnotation = ""
    except (IOError, KeyError) as e:
        return annotation, ""
    return annotation, finalAnnotation
Exemple #3
0
def resolveAnnotationHelper(annotation):
    if not hasattr(resolveAnnotation, 'db'):
        resolveAnnotation.db = {}
        resolveAnnotation.ch = bioservices.ChEBI(verbose=False)
        resolveAnnotation.uni = bioservices.UniProt(verbose=False)
        resolveAnnotation.k = bioservices.kegg.KEGG(verbose=False)
        resolveAnnotation.qg = bioservices.QuickGO(verbose=False)
        resolveAnnotation.t = bioservices.Taxon()
        resolveAnnotation.db[
            'http://identifiers.org/uniprot/P62988'] = 'http://identifiers.org/uniprot/P62988'
        resolveAnnotation.db[
            'http://identifiers.org/uniprot/P06842'] = 'http://identifiers.org/uniprot/P06842'
        resolveAnnotation.db[
            'http://identifiers.org/uniprot/P07006'] = 'http://identifiers.org/uniprot/P06842'

    if annotation in resolveAnnotation.db:
        return annotation, resolveAnnotation.db[annotation]

    tAnnotation = annotation.replace('%3A', ':')
    tAnnotation = annotation.split('/')[-1]
    #tAnnotation = re.search(':([^:]+:[^:]+$)',tAnnotation).group(1)
    try:
        if 'obo.go' in annotation or '/go/GO' in annotation:

            res = resolveAnnotation.qg.Term(tAnnotation)
            finalArray = []
            if type(res) not in [int]:
                res = bioservices.Service('name').easyXML(res)
                tmp = res.findAll('name')

                for x in tmp:
                    try:
                        tagString = str(goGrammar.parseString(str(x))[0])
                        if tagString not in ['Systematic synonym']:
                            finalArray.append(
                                str(goGrammar.parseString(str(x))[0]))
                    except pyp.ParseBaseException:
                        continue
            if len(finalArray) > 0:
                resolveAnnotation.db[annotation] = finalArray[0]
            else:
                resolveAnnotation.db[annotation] = ''
            finalAnnotation = resolveAnnotation.db[annotation]

        elif 'kegg' in annotation:

            data = resolveAnnotation.k.get(tAnnotation)
            dict_data = resolveAnnotation.k.parse(data)
            if type(dict_data) == int:
                resolveAnnotation.db[annotation] = ''
            else:
                resolveAnnotation.db[annotation] = dict_data['name']
            finalAnnotation = resolveAnnotation.db[annotation]

        elif 'uniprot' in annotation:
            identifier = annotation.split('/')[-1]
            result = resolveAnnotation.uni.quick_search(identifier)
            if identifier in result:
                resolveAnnotation.db[annotation] = result[identifier][
                    'Protein names'].split('(')[0]
            else:
                finalAnnotation = ''
            finalAnnotation = resolveAnnotation.db[annotation]

        elif 'chebi' in annotation:
            tmp = annotation.split('/')[-1]

            entry = resolveAnnotation.ch.getLiteEntity(tmp)
            finalAnnotation = ''
            for element in entry:
                resolveAnnotation.db[annotation] = str(
                    element['chebiAsciiName'])
                finalAnnotation = resolveAnnotation.db[annotation]
        elif 'cco' in annotation or 'pirsf' in annotation or 'pubchem' in annotation or 'omim' in annotation:
            finalAnnotation = ''
        elif 'taxonomy' in annotation:
            #uniprot stuff for taxonomy
            result = resolveAnnotation.t.search_by_taxon(tAnnotation)
            resolveAnnotation.db[annotation] = result['Scientific Name']
            finalAnnotation = resolveAnnotation.db[annotation]
            '''
            url = 'http://www.uniprot.org/taxonomy/'
            params = {
            'from':'ACC',
            'to':'P_REFSEQ_AC',
            'format':'tab',
            'query':'P13368 P20806 Q9UM73 P97793 Q17192'
            }
            
            data = urllib.urlencode(params)
            request = urllib2.Request(url, data)
            contact = "" # Please set your email address here to help us debug in case of problems.
            request.add_header('User-Agent', 'Python contact')
            response = urllib2.urlopen(request)
            page = response.read(200000)
            '''
        else:
            return annotation, ''
            #assert(False)
            finalAnnotation = ''
    except (IOError, KeyError) as e:
        return annotation, ''
    return annotation, finalAnnotation