Ejemplo n.º 1
0
Archivo: seq.py Proyecto: kkaris/pypath
        def loader(ncbi_tax_id=9606):

            all_up = uniprot_input.all_uniprots(organism=ncbi_tax_id)

            return (dataio.get_pfam_regions(uniprots=all_up,
                                            dicts='uniprot',
                                            keepfile=True))
Ejemplo n.º 2
0
def get_uniprot_sec(organism=9606):
    """
    Downloads and processes the mapping between secondary and
    primary UniProt IDs.
    
    Yields pairs of secondary and primary UniProt IDs.
    
    :param int organism:
        NCBI Taxonomy ID of the organism.
    """

    if organism is not None:
        proteome = uniprot_input.all_uniprots(organism=organism)
        proteome = set(proteome)
    sec_pri = []
    url = urls.urls['uniprot_sec']['url']
    c = curl.Curl(url, silent=False, large=True)

    for line in filter(
            lambda line: len(line) == 2 and
        (organism is None or line[1] in proteome),
            map(lambda i: i[1].decode('utf-8').split(),
                filter(lambda i: i[0] >= 30, enumerate(c.result)))):

        yield line
Ejemplo n.º 3
0
    def read_mapping_uniprot_list(self,
                                  param,
                                  uniprots=None,
                                  ncbi_tax_id=None):

        mapping_o = {}
        mapping_i = {}

        ncbi_tax_id = param.ncbi_tax_id \
            if ncbi_tax_id is None else ncbi_tax_id

        if uniprots is None:
            uniprots = uniprot_input.all_uniprots(ncbi_tax_id,
                                                  swissprot=param.swissprot)

        if param.targetNameType != 'uniprot':
            utarget = self._read_mapping_uniprot_list('ACC',
                                                      param.target_ac_name,
                                                      uniprots)

            _ = utarget.readline()
            ac_list = list(
                map(lambda l: l.decode('ascii').split('\t')[1].strip(),
                    utarget))
        else:
            ac_list = uniprots

        udata = self._read_mapping_uniprot_list(param.target_ac_name,
                                                param.ac_name, ac_list)

        _ = udata.readline()

        for l in udata:

            l = l.decode('ascii').strip().split('\t')

            if l[1] not in mapping_o:
                mapping_o[l[1]] = []

            mapping_o[l[1]].append(l[0])

            if param.bi:

                if l[0] not in mapping_i:
                    mapping_i[l[0]] = []

                mapping_i[l[0]].append(l[1])

        self.mapping["to"] = mapping_o
        self.cleanDict(self.mapping["to"])
        if param.bi:
            self.mapping["from"] = mapping_i
            self.cleanDict(self.mapping["from"])
Ejemplo n.º 4
0
def get_uniprot_sec(organism=9606):
    if organism is not None:
        proteome = uniprot_input.all_uniprots(organism=organism)
        proteome = set(proteome)
    sec_pri = []
    url = urls.urls['uniprot_sec']['url']
    c = curl.Curl(url, silent=False, large=True)
    data = c.result
    return filter(
        lambda line: len(line) == 2 and
        (organism is None or line[1] in proteome),
        map(lambda i: i[1].decode('utf-8').split(),
            filter(lambda i: i[0] >= 30, enumerate(data))))
Ejemplo n.º 5
0
    def load_proteome(self, taxon, swissprot_only=True):

        key = (taxon, swissprot_only)

        if key not in self._proteomes:

            self._proteomes[key] = (set(uniprot_input.all_uniprots(*key)))

            for protein in self._proteomes[key]:

                self._taxonomy[protein] = key

            if not swissprot_only:

                self.load_proteome(taxon, True)