Esempio n. 1
0
File: go.py Progetto: kkaris/pypath
def annotate(graph, organism=9606, aspects=('C', 'F', 'P')):
    """
    Adds Gene Ontology annotations to the nodes of a graph.
    
    :param igraph.Graph graph:
        Any ``igraph.Graph`` object with uniprot IDs
        in its ``name`` vertex attribute.
    """

    aspects = aspects if type(aspects) in {list, tuple} else (aspects, )

    graph.vs['go'] = [{
        'C': set(),
        'F': set(),
        'P': set()
    } for _ in xrange(graph.vcount())]

    terms, annot = dataio.go_annotations_goa(organism=organism)

    prg = progress.Progress(graph.vcount(), 'Loading GO annotations', 9)

    for v in graph.vs:

        prg.step()

        for asp in aspects:

            if v['name'] in annot[asp]:

                v['go'][asp] = annot[asp][v['name']]

    prg.terminate()
Esempio n. 2
0
 def tissues_x_proteins(self, normalized=True, tissues=None):
     '''
     For all tissues downloads the expression of all the proteins.
     In the result, a dict of dicts will hold the expression values
     of each proteins, grouped by samples.
     '''
     self.get_tissues()
     tissues_selected = set([
         t['TISSUE_ID'] for t in self.tissues
         if tissues is None or t['TISSUE_ID'] in tissues
     ]) - self.tissues_loaded
     prg = progress.Progress(
         len(tissues_selected),
         'Downloading expression data',
         1,
         percent=False)
     for tis in tissues_selected:
         prg.step()
         sys.stdout.write('Querying tissue %s\n' % tis)
         sys.stdout.flush()
         self.get_proteins(tis)
         if not hasattr(self.result, 'read'):
             sys.stdout.write('\tFailed: %s\n' % tis)
             sys.stdout.flush()
         else:
             self.tissues_loaded.add(tis)
             self.get_expression(normalized)
             if tis not in self.samples:
                 self.samples[tis] = []
             self.samples[tis] = uniqList(self.samples[tis] + list(
                 self.current_samples))
             self.current_samples = set([])
     prg.terminate()
Esempio n. 3
0
 def load_collection(self,
                     collname,
                     id_type='entrez',
                     map_ids=True,
                     cachedir='cache'):
     if os.path.exists(os.path.join(cachedir, 'gsea-%s.pickle' % collname)):
         self.load([collname])
         return None
     url = self.collections[collname]['urls'][id_type]
     data = dataio.curl(url,
                        req_headers=self.session,
                        silent=False,
                        cache=False,
                        write_cache=True)
     data = data.split('\n')
     names = []
     prg = progress.Progress(len(data), 'Loading gene sets', 1)
     for line in (l.split('\t') for l in data if len(l) > 0):
         prg.step()
         setname = line[0].strip()
         self.write_set(line[2:], setname, id_type, map_ids)
         self.get_desc(setname)
         names.append(setname)
     prg.terminate()
     self.groups[collname] = set(names)
     self.save([collname], cachedir=cachedir)
Esempio n. 4
0
 def smiles2chembl(self, smiles):
     self.result = {}
     prg = progress.Progress(total=len(smiles),
                             name='Translating SMILEs',
                             interval=1)
     for sml in smiles:
         url = self.chembl_url.format(sml)
         c = curl.Curl(url, large=False)
         result = c.result
         self.result[sml] = []
         if result is not None:
             try:
                 data = json.loads(result)
                 for d in data['compounds']:
                     this_smile = d['smiles']
                     this_chembl = d['chemblId']
                     # if this_smile == sml:
                     self.result[sml].append(this_chembl)
             except ValueError:
                 soup = bs4.BeautifulSoup(result)
                 compounds = soup.find_all('compound')
                 if compounds is not None:
                     for compound in compounds:
                         this_smile = compound.find('smiles').text
                         this_chembl = compound.find('chemblid').text
                         # if this_smile == sml:
                         self.result[sml].append(this_chembl)
         prg.step()
     prg.terminate()
Esempio n. 5
0
 def translate(self, source, target, lst):
     if source == 'inchikey':
         self.inchikey2anything(target, lst)
         return None
     if source == 'smiles':
         self.smiles2chembl(lst)
         return None
     self.result = {}
     source = str(source) if type(source) is int else self.name_dict[source]
     target = str(target) if type(target) is int else self.name_dict[target]
     prg = progress.Progress(
         total=len(lst),
         name='Translating compound identifiers',
         interval=1)
     for comp in lst:
         url = '/'.join([self.url_stem, comp, source, target])
         c = curl.Curl(url, large = False)
         result = c.result
         self.result[comp] = []
         if result is not None:
             data = json.loads(result)
             for d in data:
                 self.result[comp].append(d['src_compound_id'])
         prg.step()
     prg.terminate()
Esempio n. 6
0
    def load_uniprot_mappings(self, ac_types=None, bi=False, ncbi_tax_id=None):

        ncbi_tax_id = self.get_tax_id(ncbi_tax_id)
        tables = self.tables[ncbi_tax_id]
        ac_types = ac_types if ac_types is not None else self.name_types.keys()
        # creating empty MappingTable objects:
        for ac_typ in ac_types:
            tables[(ac_typ, 'uniprot')] = MappingTable(ac_typ,
                                                       'uniprot',
                                                       'protein',
                                                       ac_typ,
                                                       None,
                                                       ncbi_tax_id,
                                                       None,
                                                       log=self.ownlog)
        # attempting to load them from Pickle
        i = 0
        for ac_typ in ac_types:
            md5ac = common.md5((ac_typ, 'uniprot', bi, ncbi_tax_id))
            cachefile = os.path.join('cache', md5ac)
            if self.cache and os.path.isfile(cachefile):
                tables[(ac_typ, 'uniprot')].mapping = \
                    pickle.load(open(cachefile, 'rb'))
                ac_types.remove(ac_typ)
                tables[(ac_typ, 'uniprot')].mid = md5ac
        # loading the remaining from the big UniProt mapping file:
        if len(ac_types) > 0:
            url = urls.urls['uniprot_idmap_ftp']['url']
            c = curl.Curl(url, silent=False, large=True)

            prg = progress.Progress(c.size, "Processing ID conversion list",
                                    99)
            for l in c.result:
                prg.step(len(l))
                l = l.decode('ascii').strip().split('\t')
                for ac_typ in ac_types:
                    if len(l) > 2 and self.name_types[ac_typ] == l[1]:
                        other = l[2].split('.')[0]
                        if l[2] not in tables[(ac_typ,
                                               'uniprot')].mapping['to']:
                            tables[(ac_typ,
                                    'uniprot')].mapping['to'][other] = []
                        tables[(ac_typ, 'uniprot')].mapping['to'][other].\
                            append(l[0].split('-')[0])
                        if bi:
                            uniprot = l[0].split('-')[0]
                            if uniprot not in tables[(ac_typ, 'uniprot')].\
                                    mapping['from']:
                                tables[(ac_typ, 'uniprot')].\
                                    mapping['from'][uniprot] = []
                            tables[(ac_typ, 'uniprot')].mapping['from'][uniprot].\
                                append(other)
            prg.terminate()
            if self.cache:
                for ac_typ in ac_types:
                    md5ac = common.md5((ac_typ, bi))
                    cachefile = os.path.join('cache', md5ac)
                    pickle.dump(tables[(ac_typ, 'uniprot')].mapping,
                                open(cachefile, 'wb'))
Esempio n. 7
0
 def compounds_targets_mechanism(self,
                                 id_list,
                                 id_type='uniprot',
                                 domains=False,
                                 pred_bind_d=False,
                                 activities=False,
                                 pchembl=False,
                                 one_query=False,
                                 client_side=False):
     if id_type == 'uniprot':
         compound_lookup = True
         id_list = self.get_chembl_uniprots(id_list)
     self.result = []
     id_list = id_list if type(id_list) is list else [id_list]
     if one_query:
         query_thread = threading.Thread(
             target=self.compound_target_mechanism,
             args=[id_list],
             kwargs={
                 'id_type': id_type,
                 'domains': domains,
                 'pred_bind_d': pred_bind_d,
                 'activities': activities,
                 'pchembl': pchembl
             })
         query_thread.daemon = True
         query_thread.start()
         sys.stdout.write('\n')
         sys.stdout.flush()
         while query_thread.isAlive():
             self.mysql.print_status()
             time.sleep(1)
         self.mysql_ready()
         if client_side:
             self.result = list(self.result)
     else:
         prg = progress.Progress(total=len(id_list),
                                 name='Sending queries',
                                 interval=5)
         qids = []
         for identifier in id_list:
             prg.step()
             qids.append(
                 self.compound_target_mechanism(identifier,
                                                id_type=id_type,
                                                domains=domains,
                                                pred_bind_d=pred_bind_d,
                                                activities=activities,
                                                pchembl=pchembl,
                                                wait=False))
         prg.terminate()
         self.mysql_ready(qids)
         for qid in qids:
             self.result += list(self.mysql.get_result(qid))
Esempio n. 8
0
def load_go(graph, aspect=['C', 'F', 'P']):
    '''
    @graph : igraph.Graph
    Any igraph.Graph object with uniprot IDs in its `name` vertex attribute.
    '''
    aspect = aspect if type(aspect) is list else [aspect]
    graph.vs['go'] = [{'C': [], 'F': [], 'P': []} for _ in graph.vs]
    go = dataio.get_go_goa()
    prg = progress.Progress(graph.vcount(), 'Loading GO annotations', 9)
    for v in graph.vs:
        prg.step()
        for asp in aspect:
            if v['name'] in go[asp]:
                v['go'][asp] = go[asp][v['name']]
    prg.terminate()
Esempio n. 9
0
 def inchikey2anything(self, target, lst):
     self.result = {}
     target = str(target) if type(target) is int else self.name_dict[target]
     prg = progress.Progress(
         total=len(lst), name='Translating InChi-Keys', interval=1)
     for inchik in lst:
         url = self.inchi_stem % inchik
         c = curl.Curl(url, large = False)
         result = c.result
         if result is not None:
             data = json.loads(result)
             self.result[inchik] = [
                 d['src_compound_id'] for d in data if d['src_id'] == target
             ]
         prg.step()
     prg.terminate()
Esempio n. 10
0
 def connectivity_search(self,
                         id_list,
                         id_type,
                         parameters=[1, 0, 0, 0, 0, 1, 0]):
     '''
     [1,0,0,0,0,1,0,  1]
     '''
     '''
     parameters is a list of parameters A-H as described in 
     https://www.ebi.ac.uk/unichem/info/widesearchInfo
     '''
     parameters.append(1)  # H parameter must be 1 to process the result
     parameters = [str(i) for i in parameters]
     self.result = {}
     if id_type == 'inchikey':
         id_type = ''
         method = 'key_search'
     elif id_type == 'smiles':
         self.result = None
         return None
     else:
         id_type = str(
             id_type) if type(id_type) is int else self.name_dict[id_type]
         id_type = '%s/' % id_type
         method = 'cpd_search'
     prg = progress.Progress(total=len(id_list),
                             name='Connectivity search',
                             interval=1)
     for i in id_list:
         prg.step()
         url = self.cpd_search.format(method, i, id_type,
                                      '/'.join(parameters))
         c = curl.Curl(url, large=False)
         result = c.result
         self.result[i] = []
         if result is not None:
             data = json.loads(result)
             for k, v in iteritems(data):
                 for j in range(1, len(v)):
                     self.result[i].append(v[j][0])
         self.result[i] = list(set(self.result[i]))
     prg.terminate()
Esempio n. 11
0
 def read_mapping_mysql(self, param):
     if param.mysql is None:
         self.ownlog.msg(2, 'No MySQL parameters given.', 'ERROR')
         return {"o": {}, "i": {}}
     tax_filter = ("" if param.ncbi_tax_id is None else "AND %s = %u" %
                   (param.ncbi_tax_id, self.ncbi_tax_id))
     query = """
         SELECT %s AS one,%s AS two FROM %s 
         WHERE %s IS NOT NULL AND %s IS NOT NULL %s""" % (
         param.fieldOne, param.fieldTwo, param.tableName, param.fieldOne,
         param.fieldTwo, tax_filter)
     try:
         param.mysql.run_query(query)
     except _mysql.Error as e:
         self.ownlog.msg(2,
                         "MySQL error: %s\nFAILED QUERY: %s" % (e, query),
                         'ERROR')
         return {"o": {}, "i": {}}
     total = len(param.mysql.result) + 1
     prg = progress.Progress(total=total,
                             name="Processing data",
                             interval=42)
     mapping_o = {}
     mapping_i = {}
     for rr in param.mysql.result:
         if rr["one"] not in mapping_o:
             mapping_o[rr["one"]] = []
         if rr["two"] not in mapping_i:
             mapping_i[rr["two"]] = []
         mapping_o[rr["one"]].append(rr["two"])
         mapping_i[rr["two"]].append(rr["one"])
         prg.step()
     self.mapping["to"] = mapping_o
     self.cleanDict(self.mapping["to"])
     if param.bi:
         self.mapping["from"] = mapping_i
         self.cleanDict(self.mapping["from"])
     prg.terminate()
Esempio n. 12
0
 def compounds_targets(self,
                       id_list,
                       id_type='uniprot',
                       assay_types=['B', 'F'],
                       relationship_types=['D', 'H'],
                       compound_props=[],
                       domains=False,
                       pred_bind_d=False,
                       action_type=False,
                       activities=False,
                       pchembl=False,
                       one_query=False,
                       client_side=False):
     '''
     Same as compounds_targets(), but queries each id by separate mysql query.
     Better performance expected in case the batch query requires disk_tmp_table.
     '''
     if id_type == 'uniprot':
         compound_lookup = True
         id_list = self.get_chembl_uniprots(id_list)
     self.result = []
     id_list = id_list if type(id_list) is list else [id_list]
     if one_query:
         query_thread = threading.Thread(target=self.compound_target,
                                         args=[id_list],
                                         kwargs={
                                             'id_type': id_type,
                                             'assay_types': assay_types,
                                             'relationship_types':
                                             relationship_types,
                                             'compound_props':
                                             compound_props,
                                             'domains': domains,
                                             'pred_bind_d': pred_bind_d,
                                             'action_type': action_type,
                                             'activities': activities,
                                             'pchembl': pchembl
                                         })
         query_thread.daemon = True
         query_thread.start()
         sys.stdout.write('\n')
         sys.stdout.flush()
         while query_thread.isAlive():
             self.mysql.print_status()
             time.sleep(1)
         self.mysql_ready()
         if client_side:
             self.result = list(self.result)
     else:
         prg = progress.Progress(total=len(id_list),
                                 name='Starting queries',
                                 interval=5)
         qids = []
         for identifier in id_list:
             prg.step()
             qids.append(
                 self.compound_target(identifier,
                                      id_type=id_type,
                                      assay_types=assay_types,
                                      relationship_types=relationship_types,
                                      compound_props=compound_props,
                                      domains=domains,
                                      pred_bind_d=pred_bind_d,
                                      action_type=action_type,
                                      activities=activities,
                                      pchembl=pchembl,
                                      wait=False))
         prg.terminate()
         self.mysql_ready(qids)
         for qid in qids:
             self.result.extend(list(self.mysql.get_result(qid)))
Esempio n. 13
0
 def progress_setup(self):
     if not self.silent and self.progress is None and not self.debug:
         self.progress = progress.Progress(name=self.title,
                                           interval=1,
                                           status='initializing curl')
Esempio n. 14
0
File: bel.py Progetto: kkaris/pypath
    def resource_to_relationships_graph(
            self,
            graph,
        ) -> None:
        """
        Convert a PyPath igraph object into list of BEL relationships.
        """
        
        self._log('Building bel graph from PyPath object (igraph graph).')
        
        edges = graph.es
        prg = progress.Progress(
            len(edges),
            'Building bel graph from PyPath object (igraph graph).',
            1,
        )
        for edge in edges:
            prg.step()
            directions = edge['dirs']

            for direction in (directions.straight, directions.reverse):
                
                if not directions.dirs[direction]:
                    # this direction does not exist
                    continue

                dir_sources = directions.get_dir(direction, sources = True)

                if self.only_sources and not dir_sources & self.only_sources:
                    # this direction not provided
                    # in the currently enabled set of sources
                    continue

                predicates = set()

                activation, inhibition = (
                    directions.get_sign(direction, sources=True)
                )

                if self._check_sign(activation):
                    predicates.add(pc.DIRECTLY_INCREASES)

                if self._check_sign(inhibition):
                    predicates.add(pc.DIRECTLY_DECREASES)

                if not predicates:
                    # use `regulates` if sign is unknown
                    predicates.add(pc.REGULATES)

                source = self._protein(direction[0])
                target = self._protein(direction[1])
                evid_cits = self._references(edge, direction)
                
                for (
                    predicate, (evid, cits)
                ) in itertools.product(predicates, evid_cits):
                    
                    for cit in cits:
                        
                        self.bel_graph.add_qualified_edge(
                            source,
                            target,
                            relation = predicate,
                            citation = cit,
                            evidence = 'OmniPath',
                        )
                        self.bel_graph.add_qualified_edge(
                            source,
                            target,
                            relation = predicate,
                            citation = cit,
                            evidence = evid,
                        )

            if not self._has_direction(directions):
                # add an undirected relationship
                # if no direction available

                evid_cits = self._references(edge, 'undirected')
                source = self._protein(directions.nodes[0])
                target = self._protein(directions.nodes[1])

                for evid, cits in evid_cits:
                    
                    for cit in cits:
                        
                        self.bel_graph.add_association(
                            source, target,
                            citation = cit,
                            evidence = 'OmniPath',
                        )
                        self.bel_graph.add_association(
                            source, target,
                            citation = cit,
                            evidence = evid,
                        )
        
        prg.terminate()
        self._log('Building bel graph from PyPath object finished.')
Esempio n. 15
0
    def make_df(
            self,
            unique_pairs = True,
            extra_node_attrs = None,
            extra_edge_attrs = None,
        ):
        """
        Creates a data frame from the network.

        By default UniProt IDs, Gene Symbols, source databases, literature
        references, directionality and sign information and interaction type
        are included.

        Args:
        -----
        :param bool unique_pairs:
            If `True` each line corresponds to a unique pair of molecules,
            all directionality and sign information are covered in other
            columns. If `False`, order of `A` and `B` IDs corresponds to
            the direction while sign covered in further columns.
        :param dict extra_node_attrs:
            Additional node attributes to be included in the exported table.
            Keys are column ames used in the header while values are names
            of vertex attributes. Values also might be methods which then
            will be called then on each vertex. These should return strings
            or their result will be converted to string.
            In the header `_A` and `_B` suffixes will be appended to the
            column names so the values can be assigned to A and B side
            interaction partners.
        :param dict extra_edge_attrs:
            Additional edge attributes to be included in the exported table.
            Keys are column ames used in the header while values are names
            of edge attributes or callables accepting an edge as single
            argument.
        :param str outfile:
            Name of the output file. If `None` a file name
            "netrowk-<session id>.tab" is used.
        """

        result = []

        self.pa.genesymbol_labels()

        self.extra_node_attrs = extra_node_attrs or self.extra_node_attrs
        self.extra_edge_attrs = extra_edge_attrs or self.extra_edge_attrs

        suffix_a = 'A' if unique_pairs else 'source'
        suffix_b = 'B' if unique_pairs else 'target'

        dtypes = (
            self.default_dtypes_uniquepairs
                if unique_pairs else
            self.default_dtypes_bydirs
        )
        header = copy.copy(
            self.default_header_uniquepairs
            if unique_pairs else
            self.default_header_bydirs
        )
        header += self.extra_edge_attrs.keys()
        header += [
            '%s_%s' % (x, suffix_a)
            for x in self.extra_node_attrs.keys()
        ]
        header += [
            '%s_%s' % (x, suffix_b)
            for x in self.extra_node_attrs.keys()
        ]

        prg = progress.Progress(
            total = self.graph.ecount(),
            name = 'Creating table',
            interval=31
        )

        for e in self.graph.es:

            # adding default fields
            lines = (
                self.process_edge_uniquepairs(e)
                    if unique_pairs else
                self.process_edge_bydirection(e)
            )

            result.extend(lines)

            prg.step()

        prg.terminate()

        self.df = pd.DataFrame(result, columns = header)
        self.df = self.df.astype(dtypes)
Esempio n. 16
0
    def read_mapping_file(self, param, ncbi_tax_id=None):

        ncbi_tax_id = self.get_tax_id(ncbi_tax_id)

        if param.__class__.__name__ != "FileMapping":
            self.ownlog.msg(2, "Invalid parameter for read_mapping_file()",
                            'ERROR')
            return {}

        if (not os.path.exists(param.input)
                and not hasattr(mapping_input, param.input)):

            return {}

        if hasattr(mapping_input, param.input):

            toCall = getattr(mapping_input, param.input)
            inputArgs = param.inputArgs if hasattr(param, 'inputArgs') else {}
            infile = list(toCall(**inputArgs))

            total = sum([sys.getsizeof(i) for i in infile])

        else:
            infile = codecs.open(param.input, encoding='utf-8', mode='r')
            total = os.path.getsize(param.input)

        prg = progress.Progress(total=total,
                                name="Reading from file",
                                interval=18)

        lnum = 0
        lsum = 0
        mapping_o = {}
        mapping_i = {}

        for line in infile:

            if len(line) == 0:
                continue

            if lnum == 0 and param.header != 0:
                lnum += 1
                continue

            if type(line is list):
                prg.step(sys.getsizeof(line))

            else:
                line = line.decode('utf-8')
                prg.step(len(line))
                line = line.rstrip().split(param.separator)

            if len(line) > max([param.oneCol, param.twoCol]):

                if line[param.oneCol] not in mapping_o:

                    mapping_o[line[param.oneCol]] = []
                mapping_o[line[param.oneCol]].append(line[param.twoCol])

                if param.bi:

                    if line[param.twoCol] not in mapping_i:

                        mapping_i[line[param.twoCol]] = []
                    mapping_i[line[param.twoCol]].append(line[param.oneCol])

            lnum += 1

        if hasattr(infile, 'close'):
            infile.close()

        self.mapping["to"] = mapping_o
        self.cleanDict(self.mapping["to"])

        if param.bi:
            self.mapping["from"] = mapping_i
            self.cleanDict(self.mapping["from"])

        prg.terminate()