Beispiel #1
0
def go_annotations(dburl=config.get("databases.go_url",None), genus="Saccharomyces", species="cerevisiae", include_ancestors=False):
    """Accesses GO annotations in a MySQL database.

       Database data can be obtained from the geneontology website.

       example url: "mysql://username:password@hostname:port/go
    """
    go = open_go(dburl)
    g = go.species |Match(_.id,                   _.species_id)|      go.gene_product
    g = g          |Match(_.gene_product.id,      _.gene_product_id)| go.association
    g = g          |Match(_.association.id,       _.association_id)|  go.evidence
    g = g          |Match(_.gene_product.dbxref_id,           _.id)|  go.dbxref
    if include_ancestors:
        g = g          |Match(_.association.term_id, _.term2_id)|     go.graph_path
        g = g          |Match(_.term1_id, _.id)|                      go.term//"annot"
        g = g          |Match(_.relationship_type_id, _.id)|          go.term//"rel"
        g = g[_.rel.name == "is_a"]
    else:
        g = g          |Match(_.association.term_id,   _.id)|         go.term//"annot"
    g = g[(_.genus==genus) & (_.species == species)][_.is_not == False][_.evidence.code != "ND"]
    g = g.ReplaceMissing()
    g = g.Get(_.symbol   /"gene_symbol",   _.xref_key / "gene_id",  
                 _.annot.acc/"go_id",         _.annot.name/"annotation", _.annot.term_type/"go_type", 
                 _.evidence.code/"evidence")%"annotations"
    return g.Copy()
Beispiel #2
0
def string_interaction_types(dburl=config.get('databases.string_url',None), species="Saccharomyces cerevisiae", external_names=False):
    """Given a Postgres db with String data, specified in dburl, and a species, returns all interactions and their score.

    The database data can be obtained from String.

    example url: "postgres://*****:*****@hostname:port/string_dbname"

    Use ``connect`` to access the whole database::
       
       #Get available species names: 
       >>> connect(dburl).items.species.offical_name

    """
    z = string(dburl)
    inter  = z.network.actions
    inter  = inter |Match(_.item_id_a, _.protein_id)| z.items.proteins//"left"
    inter  = inter |Match(_.item_id_b, _.protein_id)| z.items.proteins//"right"
    inter = inter |Match(_.left.species_id, _.species_id)| z.items.species
    inter  = inter[_.official_name == species]
    if external_names:
        names = inter.Get(_.left.protein_external_id/"left", _.right.protein_external_id/"right").Each(lambda x : x.split('.')[1],dtype="bytes")
    else:
        names = inter.Get(_.left.preferred_name/"left", _.right.preferred_name/"right")
    return   inter.Get(names,
                    _.mode, _.action, _.a_is_acting,
                    _.score) % "interactions"
Beispiel #3
0
def string_interactions(dburl=config.get('databases.string_url',None), species="Saccharomyces cerevisiae", subscores=False, external_names=False):
    """Given a Postgres db with String data, specified in dburl, and a species, returns all interactions and their score.

    The database data can be obtained from String.

    example url: "postgres://*****:*****@hostname:port/string_dbname"

    Use ``connect`` to access the whole database::
       
       #Get available species names: 
       >>> connect(dburl).items.species.offical_name

    """
    z = string(dburl)
    inter  = z.items.species |Match| z.network.protein_protein_links
    inter  = inter[_.official_name == species]
    inter  = inter |Match(_.protein_id_a, _.protein_id)| z.items.proteins//"left"
    inter  = inter |Match(_.protein_id_b, _.protein_id)| z.items.proteins//"right"
    if external_names:
        names = inter.Get(_.left.protein_external_id/"left", _.right.protein_external_id/"right").Each(lambda x : x.split('.')[1],dtype="bytes")
    else:
        names = inter.Get(_.left.preferred_name/"left", _.right.preferred_name/"right")

    if(subscores):
        return   inter.Get(names,
                        _.equiv_nscore/"neighborhood_score", _.equiv_nscore_transferred/"neighborhood_score_transferred", 
                        _.equiv_fscore/"fusion_score", 
                        _.equiv_pscore/"phylo_cooccurence_score", 
                        _.equiv_hscore/"homology_score", 
                        _.array_score/"coexpression_score", 
                        _.array_score_transferred/"coexpression_score_transferred",
                        _.experimental_score/"experimental_score",
                        _.experimental_score_transferred/"experimental_score_transferred", 
                        _.database_score/"curated_score", 
                        _.database_score_transferred/"curated_score_transferred", 
                        _.textmining_score/"textmining_score", 
                        _.textmining_score_transferred/"textmining_score_transferred",
                        _.combined_score) % "interactions"
    else:
        return   inter.Get(names, 
                        _.combined_score) % "interactions"
Beispiel #4
0
def go_info(dburl=config.get("databases.go_url",None), genus="Saccharomyces", species="cerevisiae", include_ancestors=False):
    """Accesses GO term info in a MySQL database.

       Database data can be obtained from the geneontology website.

       example url: "mysql://username:password@hostname:port/go
    """
    go = open_go(dburl)

    #select annotations from a certain species, and their ancestor annotations
    g = go.species
    g = g |Match(_.id,                  _.species_id)|      go.gene_product
    g = g |Match(_.gene_product.id,     _.gene_product_id)| go.association
    g = g |Match(_.association.term_id, _.term2_id)|        go.graph_path
    g = g[(_.genus==genus) & (_.species == species)]

    #link them to terms to get actual go accession id
    g = g |Match(_.term2_id,             _.id)| go.term//"child"
    g = g |Match(_.term1_id,             _.id)| go.term//"parent"
    g = g |Match(_.relationship_type_id, _.id)| go.term//"rel"

    #get relevant fields
    g = g.ReplaceMissing()

    if include_ancestors:
        g = g.Get(_.child.acc/"go_id", _.parent.acc/"parent_id", _.gene_product.id/"gene_id").Copy()
    else:
        g = g.Get(_.child.acc/"go_id", _.parent.acc/"parent_id", _.child.term_type/"go_type", _.child.name/"annotation",
                _.distance, _.rel.name/"relationship", _.gene_product.id/"gene_id").Copy()

    #step A: calculate number of genes associated with go terms
    ngenes = g.GroupBy(_.parent_id).Get(_.parent_id/"go_id",_.gene_id.Unique().Count()/"ngenes")


    if include_ancestors:
        g2 = go.graph_path
        g2 = g2 |Match(_.term2_id,             _.id)| go.term//"child"
        g2 = g2 |Match(_.term1_id,             _.id)| go.term//"parent"
        g2 = g2 |Match(_.relationship_type_id, _.id)| go.term//"rel"
        g2 = g2.Get(_.child.acc/"go_id", _.parent.acc/"parent_id", _.child.term_type/"go_type", _.child.name/"annotation",
                _.distance, _.rel.name/"relationship").Copy()
      
        #step B1: group per term
        g2 = g2[_.go_id |In| ngenes.go_id]
        goinfo = g2.GroupBy(_.go_id, flat=(_.go_type, _.annotation))
        
        #step B2: for ech term, determine type, max depth to root, ancestors (non-unique) and relationship to ancestors
        goinfo = goinfo.Get(_.go_id, _.go_type, _.annotation,  
                            _[_.parent_id == "all"].distance.Max()/"depth",
                            _.parent_id / "ancestor", _.relationship, _.distance)

        #step B3: select for each ancestor the minimum path length
        goinfo = goinfo.GroupBy(_.ancestor)[..., _.distance.Argmin()].Copy()
    else:
        #step B1: group per term
        goinfo = g.GroupBy(_.go_id, flat=(_.go_type, _.annotation))
        
        #step B2: for ech term, determine type, max depth to root, ancestors (non-unique) and relationship to ancestors
        goinfo = goinfo.Get(_.go_id, _.go_type, _.annotation,  
                            _[_.parent_id == "all"].distance.Max()/"depth",
                            _.parent_id / "ancestor", _.relationship, _.distance)

        #step B3: select for each ancestor the minimum path length
        goinfo = goinfo.GroupBy(_.ancestor)[..., _.distance.Argmin()].Copy()

    #step C: combine, copy, return
    return (goinfo |Match| ngenes).Copy()
Beispiel #5
0
def open_go(dburl=config.get("databases.go_url",None)):
    go = Connect(dburl)
    return go
Beispiel #6
0
def string(dburl=config.get('databases.string_url',None)):
    return Connect(dburl)
Beispiel #7
0
    def run(cls, query, run_manager, portnumber=config.get('debug.cytoscape_port_number',9000)):
        self = cls()
        self.rand = random.randint(0,10000000)
        self.graph = run_manager.pass_results[create_graph.CreateGraph]
        self.graph.pruneGraph()
        self.server = xmlrpclib.ServerProxy("http://localhost:" + str(portnumber)).Cytoscape
        self.network = self.server.createNetwork("network" + str(networkid()))
        
        self.unique_names = defaultdict(int)
        self.names = dict()

        self.node_name = dict()
        self.node_class = dict()
        self.node_type = dict()
        self.node_rep = dict()

        self.edge_from = []
        self.edge_to = []
        self.edge_type = []
        self.edge_attr = []

        for node in self.graph.nodes:
            self.node(node)

        self.server.createNodes(self.network, self.names.values())
        for source,edges in self.graph.edge_source.iteritems():
            for edge in edges:
                assert edge.source is source, "Source in edge and index not equal"
                self.edgeKey(edge.__class__.__name__,edge)
        
        self.edgeids = self.server.createEdges(self.network,self.edge_from, self.edge_to, self.edge_type,[True] * len(self.edge_type),False)

        self.server.addNodeAttributes("name","STRING",self.node_name,False)
        self.server.addNodeAttributes("type","STRING",self.node_type,False)
        self.server.addNodeAttributes("class","STRING",self.node_class,False)
        self.server.addNodeAttributes("rep","STRING",self.node_rep,False)
        self.server.addEdgeAttributes("type","STRING",dict(zip(self.edgeids,self.edge_type)))
        self.server.addEdgeAttributes("attr","STRING",dict(zip(self.edgeids,self.edge_attr)))
        for attribute,attribute_dict in self.graph.node_attributes.iteritems():
            attribute_name_dict = {}
            if(isinstance(attribute_dict.values()[0], float)):
                xtype = "FLOATING"
                cls = float
            elif(isinstance(attribute_dict.values()[0], int)):
                xtype = "INTEGER"
                cls = int
            else:
                xtype = "STRING"
                cls = str
            for node, node_name in self.names.iteritems():
                try:
                    r = attribute_dict.get(node,"")
                    if(isinstance(r, representor.Representor)):
                        r = str(r.__class__.__name__)
                    else:
                        r = cls(r)
                    if isinstance(r,str) and len(r) > 500:
                        r = r[:500] + ' ...'
                    attribute_name_dict[node_name] = r
                except:
                    pass
            self.server.addNodeAttributes(attribute,xtype,attribute_name_dict,False)
            if(attribute == 'links'):
                import matplotlib.cm
                cm = discrete_color_map(attribute_name_dict.values(), matplotlib.cm.gist_rainbow)
                self.server.createDiscreteMapper('default','links', 'Node Color','#444444',cm)
            if(attribute == "time"):
                self.server.createContinuousMapper('default','time', 'Node Size',[0.0, max(attribute_dict.values())],[20.0, 20.0, 100.0, 100.0])
    
            

        self.server.setNodeLabel(self.network, "name", "","default")
        self.server.setDiscreteNodeShapeMapper(self.network, 'default',
                'type', 'diamond', {'else':'ellipse', 'unaryop':'octagon', 'rep':'round_rect'}, True)
        self.server.setEdgeTargetArrowRule(self.network,"type","Arrow",["ParamListEdge","ParamChoiceListEdge"],["T","T"])
        self.server.setEdgeLineStyleRule(self.network,"type","SOLID",["ParamChoiceEdge","ParamChoiceListEdge",'SQLResultEdge'],["DOT","DOT","SINEWAVE"])
        self.server.performLayout(self.network, "hierarchical")