예제 #1
0
def get_GO_dag():
    try:
        GO_dag = GODag(obo_file=GO_PATH.as_posix())
    except Exception:
        obo_fname = download_go_basic_obo(obo=GO_PATH.as_posix())
        GO_dag = GODag(obo_file=GO_PATH.as_posix())
    return GO_dag
예제 #2
0
def build_hierarcy():
    print "fetching ppi"
    go_edges = fetch_string_ppi_edges()

    go2geneids, geneids2go = fetch_go_hierarcy()

    """Run numerous tests for various reports."""
    dag_fin = os.path.join(constants.GO_DIR, constants.GO_FILE_NAME)
    tic = timeit.default_timer()
    godag = GODag(dag_fin, optional_attrs=['relationship'])
    gosubdag = GoSubDag(godag.keys(), godag)
    toc = timeit.default_timer()
    out = file(os.path.join(constants.BASE_PROFILE, "output", "go_hierarcy.txt"), "w+")  # sys.stdout
    dict_result = {}
    for cur_term in ['GO:0005575']:
        vertices, edges = extract_hier_all(gosubdag, out, cur_term, go2geneids)
        dict_result[cur_term] = {"vertices": vertices, "edges": edges}


    driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "Hh123456"))

    def add_edge(tx, src, dst, score):
        tx.run(("MERGE (n1: GO{{term:\"{TERM1}\"}})"+ \
                "MERGE (n2: GO{{term:\"{TERM2}\"}})"+ \
                "MERGE (n1)-[r:SCR {{ score: {SCORE} }}]->(n2)").format(TERM1=src, TERM2=dst, SCORE=score))

    def add_node(tx, nd):
        tx.run(("CREATE (n1: GO{{term:\"{TERM1}\"}})".format(TERM1=nd)))

    def add_friends(tx, name, friend_name):
        tx.run("MERGE (a:Person {name: $name}) "
               "MERGE (a)-[:KNOWS]->(friend:Person {name: $friend_name})",
               name=name, friend_name=friend_name)

    def print_friends(tx, name):
        for record in tx.run("MATCH (a:Person)-[:KNOWS]->(friend) WHERE a.name = $name "
                             "RETURN friend.name ORDER BY friend.name", name=name):
            print(record["friend.name"])

    # with driver.session() as session:
    #     count=0
    #     for k, v in dict_result['GO:0005575']['vertices'].iteritems():
    #         if dict_result['GO:0005575']['vertices'].has_key(k) \
    #                         and dict_result['GO:0005575']['vertices'][k]['isleaf']:
    #                     session.write_transaction(add_node,k)
    #                     count+=1
    #     print "total vartices: {}".foramt(count)

    with driver.session() as session:
        count=0
        for cur_edges, score in go_edges.iteritems():

            vertices = cur_edges.split("=")
            if dict_result['GO:0005575']['vertices'].has_key(vertices[0]) and dict_result['GO:0005575'][
                'vertices'].has_key(vertices[1]) and score > 100000 \
                    and dict_result['GO:0005575']['vertices'][vertices[0]]['isleaf'] and \
                    dict_result['GO:0005575']['vertices'][vertices[1]]['isleaf']:
                count+=1
                session.write_transaction(add_edge, vertices[0], vertices[1], score)
        print "total edges: {}".format(count)
예제 #3
0
def build_hierarcy():
    print "fetching ppi"
    go_edges = fetch_string_ppi_edges()

    go2geneids, geneids2go = fetch_go_hierarcy()
    """Run numerous tests for various reports."""
    dag_fin = os.path.join(constants.GO_DIR, constants.GO_FILE_NAME)
    tic = timeit.default_timer()
    godag = GODag(dag_fin, optional_attrs=['relationship'])
    gosubdag = GoSubDag(godag.keys(), godag)
    toc = timeit.default_timer()
    out = file(
        os.path.join(constants.BASE_PROFILE, "output", "go_hierarcy.txt"),
        "w+")  # sys.stdout
    dict_result = {}
    for cur_term in ['GO:0005575']:
        vertices, edges = extract_hier_all(gosubdag, out, cur_term, go2geneids)
        dict_result[cur_term] = {"vertices": vertices, "edges": edges}

    go_edges_filtered = {}
    lines = []
    for cur_edges, score in go_edges.iteritems():
        vertices = cur_edges.split("=")
        if dict_result['GO:0005575']['vertices'].has_key(vertices[0]) and dict_result['GO:0005575']['vertices'].has_key(vertices[1]) and score > 1000 \
                and dict_result['GO:0005575']['vertices'][vertices[0]]['isleaf'] and dict_result['GO:0005575']['vertices'][vertices[1]]['isleaf']:
            go_edges_filtered[cur_edges] = score
            lines.append("{}\t{}\n".format(cur_edges, score))

    print "about to write filtered ppi go edges to file ({} lines)".format(
        len(lines))
    with file(
            os.path.join(constants.OUTPUT_GLOBAL_DIR,
                         "GO_edges_ppi_filtered.txt"), "w+") as f:
        f.writelines(lines)
예제 #4
0
def build_hierarcy(go_folder,
                   roots=['GO:0008150'],
                   ev_exclude=set()):  #  0008150 0005575 0003674

    go2geneids, geneids2go = fetch_go_hierarcy(go_folder, ev_exclude)
    """Run numerous tests for various reports."""
    dag_fin = os.path.join(constants.GO_DIR, constants.GO_FILE_NAME)
    tic = timeit.default_timer()
    godag = GODag(dag_fin, optional_attrs=['relationship'])
    gosubdag = GoSubDag(godag.keys(), godag)
    toc = timeit.default_timer()
    dict_result = {}
    for cur_term in roots:
        vertices, edges = extract_hier_all(gosubdag, cur_term, go2geneids)

        # all_go_ids=set(vertices.keys())
        # for cur_id in all_go_ids:
        #     if not cur_id in go2geneids:
        #         go2geneids[cur_id]=set()

        msg = "Elapsed HMS: {}\n\n".format(
            str(datetime.timedelta(seconds=(toc - tic))))
        sys.stdout.write(msg)
        dict_result[cur_term] = {"vertices": vertices, "edges": edges}
    return dict_result, go2geneids, geneids2go, get_entrez2ensembl_dictionary()
예제 #5
0
def gen_anno_small():
    """Generate a maller nnotations containing 10% of the oringal genes"""
    godag = GODag(os.path.join(REPO, '../goatools/tests/data/yangRWC/fig2a.obo'))
    name2go = {o.name: o.item_id for o in godag.values()}
    file_id2gos = os.path.join(REPO, '../goatools/tests/data/yangRWC/fig2a_small.anno')
    name2num = {e:i/10 for e, i in NAME2NUM.items()}
    _get_id2gos(file_id2gos, godag, name2go, name2num)
    print(name2num)
예제 #6
0
def test_paths_to_top():
    dag = GODag(ROOT + "mini_obo.obo")
    expected_paths = [['GO:0000001', 'GO:0000002', 'GO:0000005', 'GO:0000010'],
                      ['GO:0000001', 'GO:0000003', 'GO:0000005', 'GO:0000010'],
                      ['GO:0000001', 'GO:0000003', 'GO:0000006', 'GO:0000008', 'GO:0000010']]
    actual_paths = dag.paths_to_top("GO:0000010")
    chk_results(actual_paths, expected_paths)
    print_paths(actual_paths)
예제 #7
0
 def __init__(self, genes, resource_manager=None):
     self.genes = genes
     self.graph = nx.MultiGraph()
     if not resource_manager:
         self.resource_manager = ResourceManager()
     else:
         self.resource_manager = resource_manager
     self.go_dag = GODag(self.resource_manager.get_go_obo())
     self.goa = self._load_goa_gaf()
예제 #8
0
def test_paths_to_top():
  #dag = GODag("./tests/data/mini_obo.obo")  
  dag = GODag("./data/mini_obo.obo")  
  expected_paths = [
    ['GO:0000001', 'GO:0000002', 'GO:0000005', 'GO:0000010'],
    ['GO:0000001', 'GO:0000003', 'GO:0000005', 'GO:0000010'],
    ['GO:0000001', 'GO:0000003', 'GO:0000006', 'GO:0000008', 'GO:0000010'] ]
  actual_paths = dag.paths_to_top("GO:0000010")
  chk_results(actual_paths, expected_paths)
  prt_paths(actual_paths)
예제 #9
0
def get_highest_ic():
    if not os.path.isfile(HIGHEST_IC_FILE_PATH):
        go_dag = GODag(GO_DAG_FILE_PATH, prt=open(os.devnull, 'w'))
        compute_highest_inc_parallel(list(go_dag.keys()))

    ic_file = open(HIGHEST_IC_FILE_PATH, 'r')
    highest_ic_anc = json.load(ic_file)
    ic_file.close()

    return highest_ic_anc
예제 #10
0
 def __init__(self, fin_obo):
     self.repo = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                              "../")
     self.fin_obo = os.path.join(self.repo, fin_obo)
     self.dag = GODag(self.fin_obo)
     self.go2obj = {
         go: o
         for go, o in self.dag.items() if not o.is_obsolete
     }
     self.goids_all = self.go2obj.keys()
예제 #11
0
def test_semantic_similarity():
    """Test faster version of sematic similarity"""
    godag = GODag(os.path.join(REPO, 'tests/data/yangRWC/fig1a.obo'))
    name2go = {o.name: o.item_id for o in godag.values()}
    assoc = _get_id2gos(os.path.join(REPO, 'tests/data/yangRWC/fig1a.anno'), godag, name2go)
    tcntobj = TermCounts(godag, assoc)
    assert tcntobj.gocnts[name2go['I']] == 50
    assert tcntobj.gocnts[name2go['L']] == 50
    assert tcntobj.gocnts[name2go['M']] == 50
    assert tcntobj.gocnts[name2go['N']] == 50
def test_paths_to_top():
    #dag = GODag("./tests/data/mini_obo.obo")
    dag = GODag("./data/mini_obo.obo")
    expected_paths = [['GO:0000001', 'GO:0000002', 'GO:0000005', 'GO:0000010'],
                      ['GO:0000001', 'GO:0000003', 'GO:0000005', 'GO:0000010'],
                      [
                          'GO:0000001', 'GO:0000003', 'GO:0000006',
                          'GO:0000008', 'GO:0000010'
                      ]]
    actual_paths = dag.paths_to_top("GO:0000010")
    chk_results(actual_paths, expected_paths)
    prt_paths(actual_paths)
예제 #13
0
def intialize_term_counts():
    go_freq_dict = dict()
    go_dag = GODag(os.path.join(DATA_DIR, "go-basic.obo"))

    associations = IdToGosReader(UNIPROT_ASSOCIATIONS_FILE_PATH,
                                 godag=go_dag).get_id2gos('all')
    term_counts = TermCounts(go_dag, associations)
    for i in go_dag.values():
        go_freq_dict[i.id] = term_counts.get_count(i.id)
    # write frequency dict to JSON file
    with open(JSON_INDEXED_FILE_PATH, 'w') as json_file:
        json.dump(go_freq_dict, json_file)
예제 #14
0
    def init(self):
        _log.debug("Cargando archivo de ontologias:" + self.obo_file)
        self.go_dag = GODag(self.obo_file)
        _log.debug("Se cargo el archivo:" + self.obo_file)

        if os.path.exists(self.graph_file):
            self.graph = nx.read_gpickle(self.graph_file)
        else:
            self._build_graph()
            nx.write_gpickle(self.graph, self.graph_file)

        _log.debug("Se genero el grafo de terminos")
예제 #15
0
def main():
    """Print a GO term's lower-level hierarchy."""
    import argparse
    prs = argparse.ArgumentParser(__doc__,
                                  formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    prs.add_argument('go_ids', type=str, nargs='*',
                     help='GO Term, e.g. GO:0070458')
    prs.add_argument('--o', default=None, type=str,
                     help="Specifies the name of the output file")
    prs.add_argument('--no_indent', default=False,
                     help="Do not indent GO terms", action='store_true')
    prs.add_argument('--obo', default="go-basic.obo", type=str,
                     help="Location and name of the obo file")
    prs.add_argument('--dash_len', default=1, type=int,
                     help="Printed width of the dashes column")
    prs.add_argument('--max_depth', default=None, type=int,
                     help="max depth for printing relative to GO Term")
    prs.add_argument('--num_child', default=None, action='store_true',
                     help="Print count of total number of children for each GO")
    prs.add_argument('--short', default=False, action='store_true',
                     help="If a branch has already been printed, do not re-print."
                          "Print '===' instead of dashes to note the point of compression")

    args = prs.parse_args()

    obo_dag = GODag(obo_file=args.obo)

    file_out = sys.stdout if args.o is None else open(args.o, 'w')
    lenprt = args.dash_len if not args.no_indent else None

    if args.go_ids:
        for go_id in args.go_ids:
            obo_dag.write_hier(
                go_id,
                file_out,
                len_dash=lenprt,
                max_depth=args.max_depth,
                num_child=args.num_child,
                short_prt=args.short)
    else:
        obo_dag.write_hier_all(
            file_out,
            len_dash=lenprt,
            max_depth=args.max_depth,
            num_child=args.num_child,
            short_prt=args.short)

    if args.o is not None:
        file_out.close()
        sys.stdout.write("  WROTE: {}\n".format(args.o))
예제 #16
0
def get_pathway_mapping(organism=9606, ontology='basic', exclude=None, force=False):

	obo = 'goslim_generic.obo' if 'slim' in ontology else 'go-basic.obo'

	namespace_filter = get_namespace_filter(exclude)

	if force & (os.path.isfile(obo)):
		os.remove(obo)

	obo_fname = goatools.base.download_go_basic_obo(obo)

	obodag = GODag(obo_fname)

	return {term_id:term.name for term_id,term in obodag.items() if namespace_filter(term.namespace)}
예제 #17
0
def test_gosubdag_relationships(prt=sys.stdout):
    """Plot both the standard 'is_a' field and the 'part_of' relationship."""
    goids = set([
        "GO:0032501",
        "GO:0044707",  # alt_id: GO:0032501  # BP  1011 L01 D01 B multicellular organismal process
        "GO:0050874",
        "GO:0007608",  # sensory perception of smell
        "GO:0050911"
    ])  # detection of chemical stimulus involved in sensory perception of smell

    # Load GO-DAG: Load optional 'relationship'
    fin_obo = os.path.join(REPO, "go-basic.obo")
    download_go_basic_obo(fin_obo, prt, loading_bar=None)
    go2obj_plain = GODag(fin_obo)
    go2obj_relat = GODag(fin_obo, optional_attrs=['relationship'])

    print("\nCreate GoSubDag with GO DAG containing no relationships.")
    tic = timeit.default_timer()
    # Create Plot object; Plot both 'is_a' and optional 'part_of' relationship
    gosubdag = GoSubDag(goids, go2obj_plain, relationships=False, prt=prt)
    # gosubdag.prt_goids(gosubdag.go2obj)
    goids_plain = set(gosubdag.go2obj)
    tic = _rpt_hms(tic, len(gosubdag.go2obj))

    print("\nCreate GoSubDag while IGNORING relationships")
    # Create Plot object; Plot both 'is_a' and optional 'part_of' relationship
    gosubdag = GoSubDag(goids, go2obj_relat, relationships=False, prt=prt)
    # gosubdag.prt_goids(gosubdag.go2obj)
    goids_false = set(gosubdag.go2obj)
    tic = _rpt_hms(tic, len(gosubdag.go2obj))
    assert goids_plain == goids_false

    print("\nCreate GoSubDag while loading only the 'part_of' relationship")
    gosubdag = GoSubDag(goids,
                        go2obj_relat,
                        relationships=['part_of'],
                        prt=prt)
    # gosubdag.prt_goids(gosubdag.go2obj)
    goids_part_of = set(gosubdag.go2obj)
    tic = _rpt_hms(tic, len(gosubdag.go2obj))
    assert goids_plain.intersection(goids_part_of) == goids_plain
    assert len(goids_part_of) > len(goids_plain)

    print("\nCreate GoSubDag while loading all relationships")
    gosubdag = GoSubDag(goids, go2obj_relat, relationships=True, prt=prt)
    # gosubdag.prt_goids(gosubdag.go2obj)
    goids_true = set(gosubdag.go2obj)
    tic = _rpt_hms(tic, len(gosubdag.go2obj))
    assert goids_part_of.intersection(goids_true) == goids_part_of
    assert len(goids_true) >= len(goids_part_of)
예제 #18
0
def test_all():

    obo_dag = GODag(os.path.join(constants.GO_DIR, constants.GO_FILE_NAME))

    assoc = read_ncbi_gene2go(os.path.join(constants.GO_DIR, constants.GO_ASSOCIATION_FILE_NAME), no_top=True)

    """Run numerous tests for various reports."""
    dag_fin = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data/mini_obo.obo")

    godag = GODag(dag_fin)
    gosubdag = GoSubDag(godag.keys(), godag)

    out = sys.stdout
    write_hier_all(gosubdag, out)
예제 #19
0
def test_semantic_i150():
    """Test that comparing two identical GO IDs returns true"""
    fin_dag = os.path.join(REPO, 'tests/data/yangRWC/fig1a.obo')
    ## fin_gaf = os.path.join(REPO, 'tests/data/yangRWC/fig2a_nonleaf0.gaf')
    # Read files
    godag = GODag(fin_dag)
    ## objanno = GafReader(fin_gaf)
    ## gene2gos = objanno.get_id2gos(namespace='CC')
    ## # Termcounts
    ## termcounts = TermCounts(godag, gene2gos, prt=sys.stdout)
    # Compare all GO terms with itself
    for goterm in set(godag.values()):
        goid = goterm.item_id
        assert semantic_similarity(goid, goid, godag) == 1.0
예제 #20
0
def test_i148b_semsim_lin(do_plt=False):
    """Test for issue 148, Lin Similarity if a term has no annotations"""
    fin_gaf = os.path.join(REPO, 'tests/data/yangRWC/fig2a_nonleaf0.gaf')
    godag = GODag(os.path.join(REPO, "tests/data/yangRWC/fig2a.obo"))
    annoobj = GafReader(fin_gaf, godag=godag)

    associations = annoobj.get_id2gos('CC')
    tcntobj = TermCounts(godag, associations)

    if do_plt:
        _do_plt(tcntobj, godag)

    goids = list(godag.keys())

    ##print(lin_sim('GO:0000006', 'GO:0000002', godag, tcntobj, 1.0))
    ## print(lin_sim('GO:0005575', 'GO:0005575', godag, tcntobj, 1.0))
    ##return

    # Calculate Resnik values
    p2r = {
        frozenset([a, b]): resnik_sim(a, b, godag, tcntobj)
        for a, b in combo_w_rplc(goids, 2)
    }
    _prt_values('Resnik', goids, p2r)

    # Calculate Lin values
    p2l = {
        frozenset([a, b]): lin_sim(a, b, godag, tcntobj)
        for a, b in combo_w_rplc(goids, 2)
    }
    _prt_values('Lin', goids, p2l)
    _chk_lin(p2l)
    return

    # Calculate Resnik values
    p2r = {
        frozenset([a, b]): resnik_sim(a, b, godag, tcntobj)
        for a, b in combo_w_rplc(goids, 2)
    }
    _prt_values('Resnik', goids, p2r)

    # Calculate Lin values
    p2l = {
        frozenset([a, b]): lin_sim(a, b, godag, tcntobj)
        for a, b in combo_w_rplc(goids, 2)
    }
    _prt_values('Lin', goids, p2l)
    _chk_lin(p2l)
예제 #21
0
    def __init__(self, dir, params):
        """
        """
        super().__init__(dir, params)
        
        set_logger(os.path.join(self.dir, 'experiment.log'), 
                   level=logging.INFO, console=True)

        logging.info("Loading disease associations...")
        self.diseases_dict = load_diseases(self.params["associations_path"], 
                                           self.params["disease_subset"],
                                           exclude_splits=['none'])
        
        logging.info("Loading network...")
        self.network = Network(self.params["ppi_network"]) 
        self.degrees = np.array(list(dict(self.network.nx.degree()).values()))
        
        logging.info("Loading weights...")
        with open(os.path.join(params["model_path"], "models", "models.tar"), "rb") as f:
            split_to_model = pickle.load(f)
            
        self.ci_weights = ci_weights = np.mean([model['ci_weight'][0, 0].numpy() 
                                                for model in split_to_model.values()], axis=0)
        self.ci_weights_norm = self.ci_weights / np.sqrt(self.degrees)
        
        logging.info("Loading enrichment study...")
        geneid2go = read_ncbi_gene2go("data/go/gene2go.txt", taxids=[9606])
        obodag = GODag("data/go/go-basic.obo")
        self.go_study = GOEnrichmentStudy(self.network.get_names(),
                                          geneid2go,
                                          obodag, 
                                          propagate_counts = True,
                                          alpha = 0.05,
                                          methods = ['fdr_bh'])
예제 #22
0
def test_all():
    dag = GODag("./data/mini_obo.obo")
    out = sys.stdout
    test_write_hier_all(dag, out)
    test_write_hier_norep(dag, out)
    test_write_hier_lim(dag, out)
    test_write_hier_mrk(dag, out)
예제 #23
0
    def __init__(self, go_obo_path='data/go.obo'):
        canonical_orfs = paper_orfs

        self.obodag = GODag(go_obo_path)

        # read genes containing GO Ontology annotations
        orfs_with_go = read_sgd_orfs()

        # only use canonical orfs dataset
        self.orfs_with_go = orfs_with_go.join(canonical_orfs[[]], how='inner')

        # create mapping of gene names to set of GO annotaitons
        assoc = defaultdict(set)
        for idx, gene in self.orfs_with_go.iterrows():
            assoc[gene['name']] = set(gene.ontology.split(','))
        self.assoc = assoc
        self.methods = ['fdr_bh', 'bonferroni']

        self.devnull = open('/dev/null', 'w')

        # create GO enrichment object to run GO
        self.goeaobj = GOEnrichmentStudy(
            assoc.keys(),  # List of protein-coding genes
            assoc,  # geneid/GO associations
            self.obodag,  # Ontologies
            propagate_counts=False,
            alpha=0.05,  # default significance cut-off
            methods=self.methods,
            log=self.devnull)
예제 #24
0
    def __GO_enrich__(self):
        go_file = "go-basic.obo"
        if not os.path.exists(go_file):
            download_go_basic_obo()

        # Load gene ontologies
        obodag = GODag("go-basic.obo")

        # Read NCBI's gene2go. Store annotations in a list of namedtuples
        fin_gene2go = download_ncbi_associations()
        objanno = Gene2GoReader(fin_gene2go, taxids=[9606])
        # Get namespace2association where:
        #    namespace is:
        #        BP: biological_process
        #        MF: molecular_function
        #        CC: cellular_component
        #    association is a dict:
        #        key: NCBI GeneID
        #        value: A set of GO IDs associated with that gene
        ns2assoc = objanno.get_ns2assc()

        self.goeaobj = GOEnrichmentStudyNS(
            GeneID2nt_hum.keys(),  # List of human protein-acoding genes
            ns2assoc,  # geneID/GO associations
            obodag,  # Ontologies
            propagate_counts=False,
            alpha=0.05,  # default significance cut-off
            methods=['fdr_bh'])  # default multipletest correction method
예제 #25
0
def test_semantic_similarity():
    """Test initializing TermCounts with annotations made to alternate GO ID"""
    godag = GODag(os.path.join(REPO, '../goatools/tests/data/yangRWC/fig2a.obo'))
    file_id2gos = os.path.join(REPO, '../goatools/tests/data/yangRWC/fig2a.anno')
    name2go = {o.name: o.item_id for o in godag.values()}
    assoc = _get_id2gos(file_id2gos, godag, name2go, NAME2NUM)
    tcntobj = TermCounts(godag, assoc)
    # N_v: Test accuracy of Python equivalent to Java: getNumberOfAnnotations
    # Test number of unique genes annotated to a GO Term PLUS genes annotated to a descendant
    assert tcntobj.gocnts[name2go['A']] == 100, tcntobj.gocnts
    assert tcntobj.gocnts[name2go['B']] == 40, tcntobj.gocnts
    assert tcntobj.gocnts[name2go['C']] == 50, tcntobj.gocnts
    assert tcntobj.gocnts[name2go['D']] == 10, tcntobj.gocnts
    assert tcntobj.gocnts[name2go['E']] == 10, tcntobj.gocnts
    assert tcntobj.gocnts[name2go['F']] == 10, tcntobj.gocnts
    assert tcntobj.gocnts[name2go['G']] == 30, tcntobj.gocnts
예제 #26
0
def show_go_dag_for_terms(terms, add_relationships=True):

    if type(terms) is pd.core.series.Series:
        terms = terms.tolist()

    if not terms:
        return

    with open(os.devnull, 'w') as null, redirect_stdout(null):

        obo_fname = download_and_move_go_basic_obo(prt=null)

        file_gene2go = download_ncbi_associations(prt=null)

        if add_relationships:
            optional_attrs = ['relationship', 'def']
        else:
            optional_attrs = ['def']
        obodag = GODag("geneinfo_cache/go-basic.obo",
                       optional_attrs=optional_attrs,
                       prt=null)

        gosubdag = GoSubDag(terms, obodag, relationships=add_relationships)
        GoSubDagPlot(gosubdag).plt_dag('geneinfo_cache/plot.png')

    return Image('geneinfo_cache/plot.png')
예제 #27
0
def check_group_enrichment(tested_gene_file_name, total_gene_file_name):
    total_gene_list = load_gene_list(total_gene_file_name)
    tested_gene = load_gene_list(tested_gene_file_name)

    if not os.path.exists(os.path.join(constants.GO_DIR, constants.GO_FILE_NAME)):
        download(constants.GO_OBO_URL, constants.GO_DIR)

    obo_dag = GODag(os.path.join(constants.GO_DIR, constants.GO_FILE_NAME))

    if not os.path.exists(os.path.join(constants.GO_DIR, constants.GO_ASSOCIATION_FILE_NAME)):
        download(constants.GO_ASSOCIATION_GENE2GEO_URL, constants.GO_DIR)
        with gzip.open(os.path.join(constants.GO_DIR, os.path.basename(constants.GO_ASSOCIATION_GENE2GEO_URL)), 'rb') as f_in:
            with open(os.path.join(constants.GO_DIR, constants.GO_ASSOCIATION_FILE_NAME),'wb') as f_out:
                shutil.copyfileobj(f_in, f_out)

    assoc = read_ncbi_gene2go(os.path.join(constants.GO_DIR, constants.GO_ASSOCIATION_FILE_NAME), no_top=True)

    g = GOEnrichmentStudy([int(cur) for cur in ensembl2entrez_convertor(total_gene_list)],
                          assoc, obo_dag, methods=["bonferroni", "fdr_bh"])
    g_res = g.run_study([int(cur) for cur in ensembl2entrez_convertor(tested_gene)])

    GO_results = [(cur.NS, cur.GO, cur.goterm.name, cur.p_uncorrected, cur.p_fdr_bh) for cur in g_res if
                  cur.p_fdr_bh <= 0.05]
    if len(GO_results) > 0:
        go_ns, go_terms, go_names, uncorrectd_pvals, FDRs = zip(*GO_results)
    else:
        go_terms = []
        uncorrectd_pvals = []
        FDRs = []
        go_names = []
        go_ns = []
    output_rows = [("\r\n".join(e2g_convertor(tested_gene)),  "\r\n".join(go_ns),
                        "\r\n".join(go_terms), "\r\n".join(go_names), "\r\n".join(map(str, uncorrectd_pvals)),
                        "\r\n".join(map(str, FDRs)))]
    print_to_excel(output_rows, tested_gene_file_name, total_gene_file_name)
예제 #28
0
    def __init__(self, uniprot_file_path, csv_file_path, save_dir, mode='EC'):
        self.depth = 4  #specifies the depth of labels to consider
        self.mode = mode
        self.GODag = GODag(
            '/net/data.isilon/igem/2017/data/gene_ontology/go.obo',
            optional_attrs=['relationship'])
        self.max_depth = 4  #max_depth in goDAG to consider
        self.max_write = 1000000  #specify the max amount of labels to be written for one class:
        self.write_count = 0  #counter to see how much we already worte
        self.save_dir = save_dir
        self.uniprot_csv = csv_file_path
        self.uniprot_file_path = uniprot_file_path
        self.class_to_id_EC = _recursively_default_dict()
        self.class_to_id_GO = {}
        self.filter_minlength = True
        self.minlength = 175
        self.filter_AA = True
        self.train_dataset_csv_path = '/net/data.isilon/igem/2017/data/uniprot_with_EC/SAfetyNEt/'
        print(save_dir)

        # load the dict if it's there:
        try:
            with open(
                    os.path.join(
                        self.save_dir, 'csv_by_EC',
                        os.path.join('class2id_{}.p'.format(self.mode))),
                    "rb") as pickle_f:
                self.class_to_id_EC = pickle.load(pickle_f)
                # freeze the default dict
                self.class_to_id_EC.default_factory = None
                print('Loaded EC-class dict.')
        except OSError:
            print('Failed to load EC-class dict. Generating EC-class dict.')
예제 #29
0
def test_all():
    """Run numerous tests for various reports."""
    dag_fin = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data/mini_obo.obo")
    tic = timeit.default_timer()
    godag = GODag(dag_fin)
    gosubdag = GoSubDag(godag.keys(), godag)
    toc = timeit.default_timer()
    out = sys.stdout
    write_hier_all(gosubdag, out)
    write_hier_norep(gosubdag, out)
    write_hier_lim(gosubdag, out)
    write_hier_mrk_lst(gosubdag, out)
    write_hier_mrk_dct(gosubdag, out)
    write_hier_up(gosubdag, out)
    msg = "Elapsed HMS: {}\n\n".format(str(datetime.timedelta(seconds=(toc-tic))))
    sys.stdout.write(msg)
예제 #30
0
def plotGO(clusterIDs, clusters, outdir, base):

    obodag = GODag("../../obo/go.obo")

    for id in clusterIDs:

        geneset = clusters[id]['geneset']

        goIDs = clusters[id]['go']['terms']

        for category in goIDs.keys():

            success = False

            ids = goIDs[category]

            while not success:

                try:

                    plot_gos(
                        "{}/{}_{}_{}.png".format(outdir, base, id, category),
                        ids, obodag)

                    success = True

                except KeyError as e:

                    value = str(e).replace("'", '')

                    goIDs.remove(value)
예제 #31
0
def get_goeaobj(methods=None):
    """Test GOEA with method, fdr."""
    obo_dag = GODag(ROOT + "goslim_generic.obo")
    assoc = read_associations(ROOT + "slim_association", no_top=True)
    popul_ids = [line.rstrip() for line in open(ROOT + "small_population")]
    goeaobj = GOEnrichmentStudy(popul_ids, assoc, obo_dag, methods=methods)
    return goeaobj
예제 #32
0
def get_goeaobj(methods=None):
    """Test GOEA with method, fdr."""
    obo_dag = GODag("go-basic.obo")
    assoc = read_associations("../data/association", no_top=True)
    popul_ids = [line.rstrip() for line in open("../data/population")]
    goeaobj = GOEnrichmentStudy(popul_ids, assoc, obo_dag, methods=methods)
    return goeaobj
예제 #33
0
class _Run(object):
    """Group entire go-basic.obo"""

    obo = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../go-basic.obo")

    def __init__(self):
        download_go_basic_obo(self.obo, sys.stdout, loading_bar=None)
        self.godag_r0 = GODag(self.obo)
        self.godag_r1 = GODag(self.obo, optional_attrs=set(['relationship']))
        self.goids = list(set(o.id for o in self.godag_r0.values()))
        # GoSubDag (plain)
        tic = timeit.default_timer()
        self.gosubdag_r0 = GoSubDag(self.goids, self.godag_r0, prt=None)
        prt_hms(tic, "GoSubDag r0 {N:4} GOs {S:3} srcs".format(
            N=len(self.gosubdag_r0.go2obj), S=len(self.gosubdag_r0.go_sources)))
        # GoSubDag with relationships
        self.gosubdag_r1 = GoSubDag(self.goids, self.godag_r1, prt=None, relationships=True)
        prt_hms(tic, "GoSubDag r1 {N:4} GOs {S:3} srcs".format(
            N=len(self.gosubdag_r1.go2obj), S=len(self.gosubdag_r1.go_sources)))

    def prt_cnts(self, cnts):
        """Compare ancestor/descendant counts with relatives=False/True."""
        k2v = {k:self.str_stats(v) for k, v in cnts.items()}
        print(k2v)

    @staticmethod
    def str_stats(vals):
        """Print statistics on values."""
        ntd = stats.describe(vals)
        std = int(round(np.sqrt(ntd.variance)))
        return "({m} {M}) STD={STD:,}".format(m=ntd.minmax[0], M=ntd.minmax[1], STD=std)

    def get_gosubdag_r0(self, goids):
        """Return a GoSubDag with N randomly chosen GO sources."""
        tic = timeit.default_timer()
        gosubdag = GoSubDag(goids, self.godag_r0, relationships=None,
                            #rcntobj=self.gosubdag_r0.rcntobj,
                            prt=None)
        prt_hms(tic, "GoSubDag r0 {N:4} GOs {S:3} srcs".format(
            N=len(gosubdag.go2obj), S=len(gosubdag.go_sources)))
        return gosubdag

    def get_gosubdag_r1(self, goids):
        """Return a GoSubDag with N randomly chosen GO sources."""
        tic = timeit.default_timer()
        gosubdag = GoSubDag(goids, self.godag_r1, relationships=True,
                            #rcntobj=self.gosubdag_r1.rcntobj,
                            prt=None)
        prt_hms(tic, "GoSubDag r1 {N:4} GOs {S:3} srcs".format(
            N=len(gosubdag.go2obj), S=len(gosubdag.go_sources)))
        return gosubdag

    def get_goids_rand(self, qty):
        """Return N randomly chosen GO IDs."""
        shuffle(self.goids)
        return self.goids[:qty]
예제 #34
0
def main():
    data = collections.defaultdict(set) 
    g = GODag()
    selection = set()
    for name, rec in g.items():
        if rec.namespace!="biological_process" or rec.level < 1: continue
        selection.add(rec.id)
    
    fp = file("gene_association.tair")
    for row in fp:
        if row[0]=="!": continue
        atoms = row.split("\t")
        #['TAIR', 'locus:2185485', 'AT5G14850', '', 'GO:0000030', 'TAIR:Communication:501714663', 'ISS', 'NCBI_gi:1552169|NCBI_gi:7634741', 'F', 'AT5G14850', 'AT5G14850|T9L3.150|T9L3_150', 'protein', 'taxon:3702', '20021003', 'TIGR', '', 'TAIR:locus:2185485\n']
        domain, name, go = atoms[0], atoms[10], atoms[4]
        name = name.split("|", 1)[0]
        if go in selection and domain=="TAIR":
            data[name].add(go)

    fw = file("microarray.assoc", "w")
    print >>fw, "#gene,go_terms"
    for key, val in sorted(data.items()):
        print >>fw, "%s,%s" % (key, ";".join(sorted(val)))
예제 #35
0
class Data(object):
    """Holds data used in test."""

    def __init__(self, fin_obo):
        self.repo = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../")
        self.fin_obo = os.path.join(self.repo, fin_obo)
        self.dag = GODag(self.fin_obo)
        self.go2obj = {go:o for go, o in self.dag.items() if not o.is_obsolete}
        self.goids_all = self.go2obj.keys()

    def get_goids(self, num):
        """Return N randomly chosen GO IDs."""
        shuffle(self.goids_all)
        return set(self.goids_all[:num])
예제 #36
0
 def __init__(self):
     download_go_basic_obo(self.obo, sys.stdout, loading_bar=None)
     self.godag_r0 = GODag(self.obo)
     self.godag_r1 = GODag(self.obo, optional_attrs=set(['relationship']))
     self.goids = list(set(o.id for o in self.godag_r0.values()))
     # GoSubDag (plain)
     tic = timeit.default_timer()
     self.gosubdag_r0 = GoSubDag(self.goids, self.godag_r0, prt=None)
     prt_hms(tic, "GoSubDag r0 {N:4} GOs {S:3} srcs".format(
         N=len(self.gosubdag_r0.go2obj), S=len(self.gosubdag_r0.go_sources)))
     # GoSubDag with relationships
     self.gosubdag_r1 = GoSubDag(self.goids, self.godag_r1, prt=None, relationships=True)
     prt_hms(tic, "GoSubDag r1 {N:4} GOs {S:3} srcs".format(
         N=len(self.gosubdag_r1.go2obj), S=len(self.gosubdag_r1.go_sources)))
예제 #37
0
                 help="Do not indent GO terms", action='store_true')
    p.add_argument('--obo', default="go-basic.obo", type=str,
                 help="Location and name of the obo file")
    p.add_argument('--dash_len', default=1, type=int,
                 help="Printed width of the dashes column")
    p.add_argument('--max_depth', default=None, type=int,
                 help="max depth for printing relative to GO Term")
    p.add_argument('--num_child', default=None, action='store_true',
                 help="Print count of total number of children for each GO")
    p.add_argument('--short', default=False, action='store_true',
                 help="If a branch has already been printed, do not re-print." 
                      "Print '===' instead of dashes to note the point of compression")

    args = p.parse_args()

    obo_dag = GODag(obo_file=args.obo)

    file_out = sys.stdout if args.o is None else open(args.o, 'w')
    lenprt = args.dash_len if not args.no_indent else None

    if args.go_ids:
      for go_id in args.go_ids:
        obo_dag.write_hier(
            go_id, 
            file_out, 
            len_dash=lenprt,
            max_depth=args.max_depth,
            num_child=args.num_child,
            short_prt=args.short)
    else:
      obo_dag.write_hier_all(
예제 #38
0
                 dest='draw_parents',
                 help="Do not draw parents of the query term")
    p.add_option("--disable-draw-children",
                 action="store_false",
                 dest='draw_children',
                 help="Do not draw children of the query term")

    p.set_defaults(draw_parents=True)
    p.set_defaults(draw_children=True)

    opts, args = p.parse_args()

    if not len(args):
        obo_file = "go-basic.obo"
    else:
        obo_file = args[0]
        assert os.path.exists(obo_file), "file %s not found!" % obo_file

    g = GODag(obo_file)

    if opts.desc:
        g.write_dag()

    # run a test case
    if opts.term is not None:
        rec = g.query_term(opts.term, verbose=True)
        g.draw_lineage([rec], engine=opts.engine,
                       gml=opts.gml,
                       draw_parents=opts.draw_parents,
                       draw_children=opts.draw_children)
예제 #39
0
class WrSubObo(object):
    """Read a large GO-DAG from an obo file. Write a subset GO-DAG into a small obo file."""

    def __init__(self, fin_obo=None, optional_attrs=None, load_obsolete=None):
        self.fin_obo = fin_obo
        self.godag = GODag(fin_obo, optional_attrs, load_obsolete) if fin_obo is not None else None
        self.relationships = optional_attrs is not None and 'relationship' in optional_attrs

    def wrobo(self, fout_obo, goid_sources):
        """Write a subset obo file containing GO ID sources and their parents."""
        goids_all = self._get_goids_all(goid_sources)
        with open(fout_obo, 'w') as prt:
            self._prt_info(prt, goid_sources, goids_all)
            self.prt_goterms(prt, self.fin_obo, goids_all)
            print("  WROTE {N} GO TERMS: {OBO}\n".format(N=len(goids_all), OBO=fout_obo))

    @staticmethod
    def prt_goterms(fin_obo, goids, prt, b_prt=True):
        """Print the specified GO terms for GO IDs in arg."""
        b_trm = False
        with open(fin_obo) as ifstrm:
            for line in ifstrm:
                if not b_trm:
                    if line[:6] == "[Term]":
                        b_trm = True
                        b_prt = False
                    elif line[:6] == "[Typedef]":
                        b_prt = True
                else:
                    if line[:6] == 'id: GO':
                        b_trm = False
                        b_prt = line[4:14] in goids
                        if b_prt:
                            prt.write("[Term]\n")
                if b_prt:
                    prt.write(line)

    @staticmethod
    def get_goids(fin_obo, name):
        """Get GO IDs whose name matches given name."""
        goids = set()
        # pylint: disable=unsubscriptable-object
        goterm = None
        with open(fin_obo) as ifstrm:
            for line in ifstrm:
                if goterm is not None:
                    semi = line.find(':')
                    if semi != -1:
                        goterm[line[:semi]] = line[semi+2:].rstrip()
                    else:
                        if name in goterm['name']:
                            goids.add(goterm['id'])
                        goterm = None
                elif line[:6] == "[Term]":
                    goterm = {}
        return goids

    def _get_goids_all(self, go_sources):
        """Given GO ID sources and optionally the relationship attribute, return all GO IDs."""
        go2obj_user = {}
        objrel = CurNHigher(self.relationships, self.godag)
        objrel.get_id2obj_cur_n_high(go2obj_user, go_sources)
        goids = set(go2obj_user)
        for goterm in go2obj_user.values():
            if goterm.alt_ids:
                goids.update(goterm.alt_ids)
        return goids

    def _prt_info(self, prt, goid_sources, goids_all):
        """Print information describing how this obo setset was created."""
        prt.write("! Contains {N} GO IDs. Created using {M} GO sources:\n".format(
            N=len(goids_all), M=len(goid_sources)))
        for goid in goid_sources:
            prt.write("!    {GO}\n".format(GO=str(self.godag.get(goid, ""))))
        prt.write("\n")
예제 #40
0
 def __init__(self, fin_obo):
     self.repo = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../")
     self.fin_obo = os.path.join(self.repo, fin_obo)
     self.dag = GODag(self.fin_obo)
     self.go2obj = {go:o for go, o in self.dag.items() if not o.is_obsolete}
     self.goids_all = self.go2obj.keys()
예제 #41
0
 def __init__(self, fin_obo=None, optional_attrs=None, load_obsolete=None):
     self.fin_obo = fin_obo
     self.godag = GODag(fin_obo, optional_attrs, load_obsolete) if fin_obo is not None else None
     self.relationships = optional_attrs is not None and 'relationship' in optional_attrs
예제 #42
0
    import optparse
    p = optparse.OptionParser("%prog [obo_file]")
    p.add_option("--description", dest="desc", 
            help="write term descriptions to stdout" \
                 " from the obo file specified in args", action="store_true")
    p.add_option("--term", dest="term", help="write the parents and children" \
            "of the query term", action="store", type="string", default=None)

    (options, args) = p.parse_args()

    if not len(args):
        obo_file = None
    else:
        obo_file = args[0]
        assert os.path.exists(obo_file), "file %s not found!" % obo_file

    if obo_file is None:
        g = GODag()
    else:
        g = GODag(obo_file)

    if options.desc:
        g.write_dag()

    # run a test case
    if options.term is not None:
        rec = g.query_term(options.term, verbose=True)
        g.draw_lineage(rec, dpi=50, verbose=True)

예제 #43
0
if __name__ == '__main__':

    import optparse
    p = optparse.OptionParser("%prog [obo_file]")
    p.add_option("--description", dest="desc",
                 help="write term descriptions to stdout"
                 " from the obo file specified in args", action="store_true")
    p.add_option("--term", dest="term", help="write the parents and children"
                 "of the query term", action="store", type="string",
                 default=None)
    p.add_option("--gml", action="store_true",
                 help="Write GML output (for Cytoscape) [default: %default]")

    opts, args = p.parse_args()

    if not len(args):
        obo_file = "gene_ontology.1_2.obo"
    else:
        obo_file = args[0]
        assert os.path.exists(obo_file), "file %s not found!" % obo_file

    g = GODag(obo_file)

    if opts.desc:
        g.write_dag()

    # run a test case
    if opts.term is not None:
        rec = g.query_term(opts.term, verbose=True)
        g.draw_lineage([rec], gml=opts.gml)