def get_all_manual_exptl_AE(con, tstat=0):
    """Finds all manual experimental annotations with an annotation_extension, 
    Returns a tab object of them, with all AEs unrolled to -> one per line"""
    pre = ''
    post = ''
    if tstat:
        pre = "SELECT * FROM ("
        post = ") WHERE ROWNUM <= 100"
    cursor = con.cursor()
    query = "%s SELECT a.ENTITY_ID as GP_ID, a.GO_ID, t.name as GO_NAME, e2g.code as EVIDENCE, " \
             "a.REF_DB_CODE AS REF_TYPE, a.REF_DB_ID AS REF_ACC, a.ANNOTATION_EXTENSION, a.SOURCE " \
                    "from go.v_manual_annotations a " \
                    "join go.terms t on (t.go_id = a.go_id) " \
                    "join GO.EVIDENCE2ECO e2g ON (e2g.eco_id = a.eco_id) " \
                    "where a.is_public = 'Y' " \
                    "and a.annotation_extension is not null %s" % (pre, post)
    cursor.execute(query)
    # Specify headers in order to set order of columns for printing.
    results_tab = tab(headers =  ['SOURCE', 'GP_ID', 'GO_ID', 'GO_NAME', 'ANNOTATION_EXTENSION', 'EVIDENCE', 'REF_TYPE', 'REF_ACC'])
    for r in dict_cursor(cursor):
        if r['ANNOTATION_EXTENSION']:
            results_tab.tab.extend(unroll_AE(r))
        else:
            results_tab.tab.append(r)
    results_tab.validate()
    return results_tab
Exemple #2
0
def get_all_manual_exptl_AE(con, tstat=0):
    """Finds all manual experimental annotations with an annotation_extension, 
    Returns a tab object of them, with all AEs unrolled to -> one per line"""
    pre = ''
    post = ''
    if tstat:
        pre = "SELECT * FROM ("
        post = ") WHERE ROWNUM <= 100"
    cursor = con.cursor()
    query = "%s SELECT a.ENTITY_ID as GP_ID, a.GO_ID, t.name as GO_NAME, e2g.code as EVIDENCE, " \
             "a.REF_DB_CODE AS REF_TYPE, a.REF_DB_ID AS REF_ACC, a.ANNOTATION_EXTENSION, a.SOURCE " \
                    "from go.v_manual_annotations a " \
                    "join go.terms t on (t.go_id = a.go_id) " \
                    "join GO.EVIDENCE2ECO e2g ON (e2g.eco_id = a.eco_id) " \
                    "where a.is_public = 'Y' " \
                    "and a.annotation_extension is not null %s" % (pre, post)
    cursor.execute(query)
    # Specify headers in order to set order of columns for printing.
    results_tab = tab(headers=[
        'SOURCE', 'GP_ID', 'GO_ID', 'GO_NAME', 'ANNOTATION_EXTENSION',
        'EVIDENCE', 'REF_TYPE', 'REF_ACC'
    ])
    for r in dict_cursor(cursor):
        if r['ANNOTATION_EXTENSION']:
            results_tab.tab.extend(unroll_AE(r))
        else:
            results_tab.tab.append(r)
    results_tab.validate()
    return results_tab
def __main__():
    ont = Brain()
    ont.learn(sys.argv[1])
    qtab = tab("./", "queries.tsv") 
    for r in qtab.tab:
        q = Query(r, ont)
        q.qtest()
    ont.sleep()
	def __init__(self, manual_map, owl_map, RCV, go):
		"""manual_map is a list of dicts containing the manual mapping table;
		 owl_map is a dict(row) of dicts(columns) containing the OWL mapping table,
		 RCV is the Roche term table as a dict of dicts
		 go is a Brain object containing the ontology used for mapping."""
		self.manual_map = manual_map
		self.owl_map = owl_map
		self.go = go
		self.rcv = RCV
		self.obs_status = {} # A dictionary of manually mapped GO terms, with value = is obsolete True/False 
		self.update_manual_map_obs_stat()
		self.combined_results = tab()
		self.combined_results.headers = ["RCV_ID", "RCV_NAME", "GO_ID", "GO_NAME", "STATUS"]
def plot_count(column, stats, path):
    
    nc = 'Number ' + column
    plot = tab(headers = ['Number RCV', nc])
    clist = stats.extract_column(column)
    c = Counter(clist)
    for l,n in c.iteritems():
        d = {}
        d['Number RCV'] = l
        d[nc] = n
        plot.tab.append(d)
        plot.validate
    out_plot = open(re.sub(' ', '_', column) + "_plot.tsv", "w")
    out_plot.write(plot.print_tab(sort_keys = [nc]))
    out_plot.close()
    return plot
    def __init__(self, manual_map, owl_map, RCV, go):
        """manual_map is a list of dicts containing the manual mapping table;
		 owl_map is a dict(row) of dicts(columns) containing the OWL mapping table,
		 RCV is the Roche term table as a dict of dicts
		 go is a Brain object containing the ontology used for mapping."""
        self.manual_map = manual_map
        self.owl_map = owl_map
        self.go = go
        self.rcv = RCV
        self.obs_status = {
        }  # A dictionary of manually mapped GO terms, with value = is obsolete True/False
        self.update_manual_map_obs_stat()
        self.combined_results = tab()
        self.combined_results.headers = [
            "RCV_ID", "RCV_NAME", "GO_ID", "GO_NAME", "STATUS"
        ]
def plot_count(column, stats, path):

    nc = 'Number ' + column
    plot = tab(headers=['Number RCV', nc])
    clist = stats.extract_column(column)
    c = Counter(clist)
    for l, n in c.iteritems():
        d = {}
        d['Number RCV'] = l
        d[nc] = n
        plot.tab.append(d)
        plot.validate
    out_plot = open(re.sub(' ', '_', column) + "_plot.tsv", "w")
    out_plot.write(plot.print_tab(sort_keys=[nc]))
    out_plot.close()
    return plot
def gen_report_tab(od):
    headers = ['a.annotation_type', 'a.text', 'op_label', 'op_id', 'class_label', 'class_id']  # Doesn't seem right to set this here...
    report = tab(headers = headers)
    dc = od.gen_annotation_report()
    for d in dc:
        row = {}
        row['a.annotation_type'] = d['annotation_class']
        row['a.text'] = d['annotation_text']
        row['op_label'] = d['op_label']
        if d['op_id'] == None:
            row['op_id'] = ''
        else:
            row['op_id'] = d['op_id']
        row['class_label'] = d['class_label'] 
        row['class_id'] = d['class_id']
        report.tab.append(row)
    return report
Exemple #9
0
def get_compact_AE(con):
    """Finds distinct set of GO + AE from manual experimental annotations with an annotation_extension, 
    Returns a tab object of them, with all AEs unrolled to -> one per line"""
    cursor = con.cursor()
    cursor.execute("SELECT DISTINCT a.GO_ID, t.name as GO_NAME, a.ANNOTATION_EXTENSION " \
                    "FROM go.v_manual_annotations a " \
                    "JOIN go.terms t ON (t.go_id = a.go_id)" \
                    "JOIN  go.eco_terms et ON (a.ECO_ID = et.ECO_ID) " \
                    "WHERE a.is_public = 'Y'" \
                    "and a.annotation_extension is not null ")
    results_tab = tab(headers=['GO_ID', 'GO_NAME', 'ANNOTATION_EXTENSION'])
    for r in dict_cursor(cursor):
        if r['ANNOTATION_EXTENSION']:
            results_tab.tab.extend(unroll_AE(r))
        else:
            results_tab.tab.append(r)
    results_tab.validate()
    return results_tab
def get_compact_AE(con):
    """Finds distinct set of GO + AE from manual experimental annotations with an annotation_extension, 
    Returns a tab object of them, with all AEs unrolled to -> one per line"""
    cursor = con.cursor()
    cursor.execute("SELECT DISTINCT a.GO_ID, t.name as GO_NAME, a.ANNOTATION_EXTENSION " \
                    "FROM go.v_manual_annotations a " \
                    "JOIN go.terms t ON (t.go_id = a.go_id)" \
                    "JOIN  go.eco_terms et ON (a.ECO_ID = et.ECO_ID) " \
                    "WHERE a.is_public = 'Y'" \
                    "and a.annotation_extension is not null ")
    results_tab = tab(headers =  ['GO_ID', 'GO_NAME', 'ANNOTATION_EXTENSION'])
    for r in dict_cursor(cursor):
        if r['ANNOTATION_EXTENSION']:
            results_tab.tab.extend(unroll_AE(r))
        else:
            results_tab.tab.append(r)
    results_tab.validate()
    return results_tab
"""


sys.path.append("../mod/")
con = get_con(usr = sys.argv[1] , pwd = sys.argv[2])  # connection to LMB DB. Need to make ssh tunnel first.

ontologies = Brain() # Construct Brain object

# Now load up ontologies.  These are used to check validity for addition of new classes or 
# relations to DB. You can load as many as you need.
ontologies.learn("http://purl.obolibrary.org/obo/fbbt/fbbt-simple.owl") # Switch to specific release if necessary.

odbo = owlDbOnt(conn = con, ont = ontologies) # Object for interacting with the database + ontologies.
# Also detects anything that looks like a FlyBase feature and checks validity against public FlyBase.

annotation_table = tab(path = sys.argv[3], file_name=sys.argv[4])  # tsv file with headers: ind_name, class, rel, ind_source

# ind_source must already be in the DB

ID_range_start = 20000
for row in annotation_table.tab:
    print str(row)
    new_ind = odbo.add_ind(name = row['ind_name'], source = row['ind_source'], ID_range_start = 20000) # Returns FALSE and warns if addn fails
    if not new_ind:
        new_ind = odbo.ind_NameId[row['ind_name']]
        warnings.warn("Assuming existing individual called %s (%s) is the correct one, and adding types accordingly." % 
                      (new_ind, row['ind_name']))
    print new_ind
    odbo.add_ind_type(ind = new_ind, OWLclass = row['class'], objectProperty = row['rel'])
    
con.close()

# Rather scrappy, Perlish procedural code for generating mappings. Annoyingly monolithic: Have to run all mappings or none.  

"""Reads owl_map and uses it to automatically populate RCV classes.  
Compares these to manual mappings. Prints a results summary and results tables.
Ontology to use must be specified as argv[1] when runnning this script."""

from mapping_tools import (map_obj, load_ont, mappingTabs)
from tsv2pdm import tab, rcd


go = load_ont(sys.argv[1])


manMap = tab('../mapping_tables/', 'manual_mapping.tsv')  # No key row.  Stored as list of dicts.
owlMap = rcd('../mapping_tables/', 'owl_map.tsv', 'RCV_ID') # dict of dicts.
RCV = rcd('../mapping_tables/', 'RocheCV_def.tsv', 'RCV_ID') # dict of dicts.

mapping_tabs = mappingTabs(manMap.tab, owlMap.rowColDict, RCV.rowColDict, go) 
# ...Hmmm - would give much more flexibility if passed objects rather than data structures.

manMap_updated = open('../mapping_tables/manual_mapping.tsv', "w")
manMap_updated.write(manMap.print_tab(sort_keys=('RCV_ID',)))
manMap_updated.close()

RCV_id_name = {} # Residual perlishness ?
for row in manMap.tab:
	RCV_id_name[row['RCV_ID']]=row['RCV_NAME']

report_path = '../mapping_tables/results/'
from tsv2pdm import tab
from json_tree_tools import blank_treeContent_node, write_json, add_leaf, load_json, roll_readable_tree, get_nodeId_name
import operator

#  Assume existing, valid domain tree files
#  Add two new nodes - one for tracts and one for neuropils?  - But do we have a terms?  
#  Definitely not for neuropils...  

# Or could try interleaving node. 
# TO do this, need a list of subnodes  # 

# Or just bung them on the end...

dts = load_json("../BrainName_domains/json/treeStructure.jso")
dtc = load_json("../BrainName_domains/json/treeContent.jso")
ttc_tab = tab("../BrainName_tracts/", "domain_data.tsv")

dlist = []
for d in dtc:
    if 'extId' in d.keys():
        if len(d['extId']) > 0:
            dlist.append(d['extId'][0])

# sort table on name field (ultimately) => alphabetic ordering of tree
ttc_tab.tab.sort(key=operator.itemgetter('name'))

D_nodeIds = []
for n in dtc:
    D_nodeIds.append(int(n['nodeId']))  
i = max(D_nodeIds) + 1
from tsv2pdm import tab
from json_tree_tools import blank_treeContent_node, write_json, add_leaf, init_treeStructure
import operator
from __builtin__ import str

# Aim = single root: adult brain.  With an alphanumeric list of tracts underneath

tc_tab = tab("./", "domain_data.tsv")

# sort table on name field (ultimately) => alphabetic ordering of tree
tc_tab.tab.sort(key=operator.itemgetter('name'))

tc = []
i = 1

ts = init_treeStructure()

adult_brain_node = blank_treeContent_node(nodeId='0', name = 'adult brain', oboID = 'FBbt:00003624')
tc.append(adult_brain_node)

for r in tc_tab.tab:
    if r['oboId']:
        n = blank_treeContent_node(domainId=r['domainID'], nodeId=str(i), name = r['name'], oboID = r['oboId'], 
                           color = r['domainColour'], centre = r['domainCentre'])
        tc.append(n)
        add_leaf(str(i), ts, '0')
        i += 1


write_json(json_var = tc, path = "json/treeContent.jso")
write_json(json_var = ts, path = "json/treeStructure.jso")
    deleted = ct.tab1_only()
    # Only in the update tab
    new = ct.tab2_only()
    for r in new.tab:
        warnings.warn("Processes %s" % r)
        if r['class_id']:
            od.add_akv_type(key = r['a.annotation_type'], value =r['a.text'] , OWLclass = r['class_id'], objectProperty =r['op_id'] )
    else:
        for r in deleted.tab:
            if not safe_mode:
                od.remove_akv_type(key = r['a.annotation_type'], value =r['a.text'] , OWLclass = r['class_id'], objectProperty =r['op_id'] )     
            else:
                warnings.warn("Row present in DB, now missing from mapping: %s. %s.  " \
                              "Safe mode set so not deleting" % (r['a.annotation_type'], r['a.text']))

c = get_con(sys.argv[1], sys.argv[2])
b = Brain()
b.learn(sys.argv[3]) # Path to ontology file with referenced terms (presumably fbbt_simple will suffice) 
od = owlDbOnt(conn = c, ont = b)
update_table = tab("../../../doc/", "annotation_map.tsv")
update_akv_from_tab(od, update_table) # Assumes update table has all mappings. If it lacks any, assumes these mappings are to be deleted!  This is potentially dangerous if mapping table is out of sync with DB.
outfile = open("../../../doc/annotation_map_report.tsv", "w")  
report_tab = gen_report_tab(od)
outfile.write(report_tab.print_tab(sort_keys = ('a.annotation_type', 'a.text')))
outfile.close()


c.commit()
c.close()
b.sleep()
import glob
from tsv2pdm import tab

results_files = glob.glob("*RCV_*.tsv")
results_files.append("results_template.tsv")

for r in results_files:
    t = tab("", r)
    t.append_column("is_obsolete", 0)
    f = open(r, "w")
    f.write(t.print_tab())
    f.close()
from tsv2pdm import tab, rcd
from glob import glob1
import re
from numpy import average, median, sum, round
from collections import Counter

# General comment - this would be so much easier to do with a DB!

# TODO - hook, directly or indirectly, into the ticket system.  Could pull from owl_map.

results = glob1("../mapping_tables/results/", "*_RCV_*.tsv")

stats = tab(key_column="RCV_ID",
            headers=[
                'RCV_ID', 'RCV_name', 'Auto sufficient', 'Manual only',
                'Auto only', 'Manual blacklist', 'Auto blacklist', 'pattern'
            ])  # Should really load as rcd to enforce key column uniqueness

owl_map = rcd(path="../mapping_tables/",
              file_name="owl_map.tsv",
              key_column='RCV_ID')

total_sufficient_maps = 0

# Lists for doing basic statistical analysis of results
# Sure this could be done more elegantly with list comps on tab, but still...

Auto_sufficient = []
Manual_only = []
Auto_only = []
# Number of classes where auto mapping is sufficient

from tsv2pdm import tab, rcd
from glob import glob1
import re
from numpy import average, median, sum, round
from collections import Counter

# General comment - this would be so much easier to do with a DB!

    # TODO - hook, directly or indirectly, into the ticket system.  Could pull from owl_map.

results = glob1("../mapping_tables/results/", "*_RCV_*.tsv")

stats = tab(key_column = "RCV_ID", headers = ['RCV_ID', 'RCV_name', 
                                              'Auto sufficient', 'Manual only',
                                               'Auto only', 'Manual blacklist',
                                               'Auto blacklist', 'pattern'])  # Should really load as rcd to enforce key column uniqueness

owl_map = rcd(path = "../mapping_tables/", file_name = "owl_map.tsv", key_column = 'RCV_ID' )

total_sufficient_maps = 0

# Lists for doing basic statistical analysis of results
# Sure this could be done more elegantly with list comps on tab, but still...


Auto_sufficient = []
Manual_only = []
Auto_only = []
Auto_blacklist = []
Manual_blacklist = []
# Need to work on balance between the generating script and the module

#  TODO: Add code to generate full mapping table.  This can be derived from results tables + ticket info without a further reasoner run.
## Spec: Include combined manual & auto mappings that are not blacklists from results files for while a ticket exists with label: mapping_complete.

# Rather scrappy, Perlish procedural code for generating mappings. Annoyingly monolithic: Have to run all mappings or none.
"""Reads owl_map and uses it to automatically populate RCV classes.  
Compares these to manual mappings. Prints a results summary and results tables.
Ontology to use must be specified as argv[1] when runnning this script."""

from mapping_tools import (map_obj, load_ont, mappingTabs)
from tsv2pdm import tab, rcd

go = load_ont(sys.argv[1])

manMap = tab('../mapping_tables/',
             'manual_mapping.tsv')  # No key row.  Stored as list of dicts.
owlMap = rcd('../mapping_tables/', 'owl_map.tsv', 'RCV_ID')  # dict of dicts.
RCV = rcd('../mapping_tables/', 'RocheCV_def.tsv', 'RCV_ID')  # dict of dicts.

mapping_tabs = mappingTabs(manMap.tab, owlMap.rowColDict, RCV.rowColDict, go)
# ...Hmmm - would give much more flexibility if passed objects rather than data structures.

manMap_updated = open('../mapping_tables/manual_mapping.tsv', "w")
manMap_updated.write(manMap.print_tab(sort_keys=('RCV_ID', )))
manMap_updated.close()

RCV_id_name = {}  # Residual perlishness ?
for row in manMap.tab:
    RCV_id_name[row['RCV_ID']] = row['RCV_NAME']

report_path = '../mapping_tables/results/'