import re
from numpy import average, median, sum, round
from collections import Counter

# General comment - this would be so much easier to do with a DB!

    # TODO - hook, directly or indirectly, into the ticket system.  Could pull from owl_map.

results = glob1("../mapping_tables/results/", "*_RCV_*.tsv")

stats = tab(key_column = "RCV_ID", headers = ['RCV_ID', 'RCV_name', 
                                              'Auto sufficient', 'Manual only',
                                               'Auto only', 'Manual blacklist',
                                               'Auto blacklist', 'pattern'])  # Should really load as rcd to enforce key column uniqueness

owl_map = rcd(path = "../mapping_tables/", file_name = "owl_map.tsv", key_column = 'RCV_ID' )

total_sufficient_maps = 0

# Lists for doing basic statistical analysis of results
# Sure this could be done more elegantly with list comps on tab, but still...


Auto_sufficient = []
Manual_only = []
Auto_only = []
Auto_blacklist = []
Manual_blacklist = []

mapping_table = rcd(path = "../mapping_tables/", file_name = "owl_map.tsv", key_column = "RCV_ID")
#  TODO: Add code to generate full mapping table.  This can be derived from results tables + ticket info without a further reasoner run.
## Spec: Include combined manual & auto mappings that are not blacklists from results files for while a ticket exists with label: mapping_complete.

# Rather scrappy, Perlish procedural code for generating mappings. Annoyingly monolithic: Have to run all mappings or none.
"""Reads owl_map and uses it to automatically populate RCV classes.  
Compares these to manual mappings. Prints a results summary and results tables.
Ontology to use must be specified as argv[1] when runnning this script."""

from mapping_tools import (map_obj, load_ont, mappingTabs)
from tsv2pdm import tab, rcd

go = load_ont(sys.argv[1])

manMap = tab('../mapping_tables/',
             'manual_mapping.tsv')  # No key row.  Stored as list of dicts.
owlMap = rcd('../mapping_tables/', 'owl_map.tsv', 'RCV_ID')  # dict of dicts.
RCV = rcd('../mapping_tables/', 'RocheCV_def.tsv', 'RCV_ID')  # dict of dicts.

mapping_tabs = mappingTabs(manMap.tab, owlMap.rowColDict, RCV.rowColDict, go)
# ...Hmmm - would give much more flexibility if passed objects rather than data structures.

manMap_updated = open('../mapping_tables/manual_mapping.tsv', "w")
manMap_updated.write(manMap.print_tab(sort_keys=('RCV_ID', )))
manMap_updated.close()

RCV_id_name = {}  # Residual perlishness ?
for row in manMap.tab:
    RCV_id_name[row['RCV_ID']] = row['RCV_NAME']

report_path = '../mapping_tables/results/'
Esempio n. 3
0
#!/usr/bin/env python

import re
from github_tools import issueConn
from tsv2pdm import rcd, tab
import glob
import warnings
"""Generates a set of proforma tickets using owl_map.tsv 
for entries in which the pattern field does not begin with a '?'
and for which there is not currently ticket following the standard name pattern.
Writes ticket number and state back to owl_map;
If mapping complete, updates RCV master file to record mapping_complete = 1
"""

ic = issueConn('GO-ROCHE-COLLAB', 'Roche_CV_mapping', 'dosumis')
owlMap = rcd('../mapping_tables/', 'owl_map.tsv', 'RCV_ID')  # dict of dicts.
RCV = rcd('../mapping_tables/', 'owl_map.tsv', 'RCV_ID')

for RCV_id, rd in owlMap.rowColDict.items():
    if not re.match("\?.*", rd['Applied pattern']):
        ticket_name = "Review %s %s" % (rd['RCV_NAME'], RCV_id)
        issue = ''
        issues = ic.ticket_exists(ticket_name, ['Mapping_review'])
        if not issues:
            issue = ic.create_standard_review_ticket(RCV_id, rd['RCV_NAME'])
        elif len(issues) > 1:
            warnings.warn(
                "Multiple tickets exist with the name '%s' and the label 'Mapping_review'."
                % ticket_name)
            continue
        else:
from collections import Counter

# General comment - this would be so much easier to do with a DB!

# TODO - hook, directly or indirectly, into the ticket system.  Could pull from owl_map.

results = glob1("../mapping_tables/results/", "*_RCV_*.tsv")

stats = tab(key_column="RCV_ID",
            headers=[
                'RCV_ID', 'RCV_name', 'Auto sufficient', 'Manual only',
                'Auto only', 'Manual blacklist', 'Auto blacklist', 'pattern'
            ])  # Should really load as rcd to enforce key column uniqueness

owl_map = rcd(path="../mapping_tables/",
              file_name="owl_map.tsv",
              key_column='RCV_ID')

total_sufficient_maps = 0

# Lists for doing basic statistical analysis of results
# Sure this could be done more elegantly with list comps on tab, but still...

Auto_sufficient = []
Manual_only = []
Auto_only = []
Auto_blacklist = []
Manual_blacklist = []

mapping_table = rcd(path="../mapping_tables/",
                    file_name="owl_map.tsv",
import re
from github_tools import issueConn
from tsv2pdm import rcd, tab
import glob
import warnings

"""Generates a set of proforma tickets using owl_map.tsv 
for entries in which the pattern field does not begin with a '?'
and for which there is not currently ticket following the standard name pattern.
Writes ticket number and state back to owl_map;
If mapping complete, updates RCV master file to record mapping_complete = 1
"""

ic = issueConn('GO-ROCHE-COLLAB', 'Roche_CV_mapping', 'dosumis')
owlMap = rcd('../mapping_tables/', 'owl_map.tsv', 'RCV_ID') # dict of dicts.
RCV = rcd('../mapping_tables/', 'owl_map.tsv', 'RCV_ID')

for RCV_id, rd in owlMap.rowColDict.items():
    if not re.match("\?.*", rd['Applied pattern']):
        ticket_name = "Review %s %s" % (rd['RCV_NAME'], RCV_id)
        issue = ''
        issues = ic.ticket_exists(ticket_name, ['Mapping_review'])
        if not issues:
            issue = ic.create_standard_review_ticket(RCV_id, rd['RCV_NAME'])
        elif len(issues) > 1:
            warnings.warn("Multiple tickets exist with the name '%s' and the label 'Mapping_review'." % ticket_name)
            continue
        else:
            issue = issues[0]
        if issue['state'] == 'closed':
# Rather scrappy, Perlish procedural code for generating mappings. Annoyingly monolithic: Have to run all mappings or none.  

"""Reads owl_map and uses it to automatically populate RCV classes.  
Compares these to manual mappings. Prints a results summary and results tables.
Ontology to use must be specified as argv[1] when runnning this script."""

from mapping_tools import (map_obj, load_ont, mappingTabs)
from tsv2pdm import tab, rcd


go = load_ont(sys.argv[1])


manMap = tab('../mapping_tables/', 'manual_mapping.tsv')  # No key row.  Stored as list of dicts.
owlMap = rcd('../mapping_tables/', 'owl_map.tsv', 'RCV_ID') # dict of dicts.
RCV = rcd('../mapping_tables/', 'RocheCV_def.tsv', 'RCV_ID') # dict of dicts.

mapping_tabs = mappingTabs(manMap.tab, owlMap.rowColDict, RCV.rowColDict, go) 
# ...Hmmm - would give much more flexibility if passed objects rather than data structures.

manMap_updated = open('../mapping_tables/manual_mapping.tsv', "w")
manMap_updated.write(manMap.print_tab(sort_keys=('RCV_ID',)))
manMap_updated.close()

RCV_id_name = {} # Residual perlishness ?
for row in manMap.tab:
	RCV_id_name[row['RCV_ID']]=row['RCV_NAME']

report_path = '../mapping_tables/results/'