import re from numpy import average, median, sum, round from collections import Counter # General comment - this would be so much easier to do with a DB! # TODO - hook, directly or indirectly, into the ticket system. Could pull from owl_map. results = glob1("../mapping_tables/results/", "*_RCV_*.tsv") stats = tab(key_column = "RCV_ID", headers = ['RCV_ID', 'RCV_name', 'Auto sufficient', 'Manual only', 'Auto only', 'Manual blacklist', 'Auto blacklist', 'pattern']) # Should really load as rcd to enforce key column uniqueness owl_map = rcd(path = "../mapping_tables/", file_name = "owl_map.tsv", key_column = 'RCV_ID' ) total_sufficient_maps = 0 # Lists for doing basic statistical analysis of results # Sure this could be done more elegantly with list comps on tab, but still... Auto_sufficient = [] Manual_only = [] Auto_only = [] Auto_blacklist = [] Manual_blacklist = [] mapping_table = rcd(path = "../mapping_tables/", file_name = "owl_map.tsv", key_column = "RCV_ID")
# TODO: Add code to generate full mapping table. This can be derived from results tables + ticket info without a further reasoner run. ## Spec: Include combined manual & auto mappings that are not blacklists from results files for while a ticket exists with label: mapping_complete. # Rather scrappy, Perlish procedural code for generating mappings. Annoyingly monolithic: Have to run all mappings or none. """Reads owl_map and uses it to automatically populate RCV classes. Compares these to manual mappings. Prints a results summary and results tables. Ontology to use must be specified as argv[1] when runnning this script.""" from mapping_tools import (map_obj, load_ont, mappingTabs) from tsv2pdm import tab, rcd go = load_ont(sys.argv[1]) manMap = tab('../mapping_tables/', 'manual_mapping.tsv') # No key row. Stored as list of dicts. owlMap = rcd('../mapping_tables/', 'owl_map.tsv', 'RCV_ID') # dict of dicts. RCV = rcd('../mapping_tables/', 'RocheCV_def.tsv', 'RCV_ID') # dict of dicts. mapping_tabs = mappingTabs(manMap.tab, owlMap.rowColDict, RCV.rowColDict, go) # ...Hmmm - would give much more flexibility if passed objects rather than data structures. manMap_updated = open('../mapping_tables/manual_mapping.tsv', "w") manMap_updated.write(manMap.print_tab(sort_keys=('RCV_ID', ))) manMap_updated.close() RCV_id_name = {} # Residual perlishness ? for row in manMap.tab: RCV_id_name[row['RCV_ID']] = row['RCV_NAME'] report_path = '../mapping_tables/results/'
#!/usr/bin/env python import re from github_tools import issueConn from tsv2pdm import rcd, tab import glob import warnings """Generates a set of proforma tickets using owl_map.tsv for entries in which the pattern field does not begin with a '?' and for which there is not currently ticket following the standard name pattern. Writes ticket number and state back to owl_map; If mapping complete, updates RCV master file to record mapping_complete = 1 """ ic = issueConn('GO-ROCHE-COLLAB', 'Roche_CV_mapping', 'dosumis') owlMap = rcd('../mapping_tables/', 'owl_map.tsv', 'RCV_ID') # dict of dicts. RCV = rcd('../mapping_tables/', 'owl_map.tsv', 'RCV_ID') for RCV_id, rd in owlMap.rowColDict.items(): if not re.match("\?.*", rd['Applied pattern']): ticket_name = "Review %s %s" % (rd['RCV_NAME'], RCV_id) issue = '' issues = ic.ticket_exists(ticket_name, ['Mapping_review']) if not issues: issue = ic.create_standard_review_ticket(RCV_id, rd['RCV_NAME']) elif len(issues) > 1: warnings.warn( "Multiple tickets exist with the name '%s' and the label 'Mapping_review'." % ticket_name) continue else:
from collections import Counter # General comment - this would be so much easier to do with a DB! # TODO - hook, directly or indirectly, into the ticket system. Could pull from owl_map. results = glob1("../mapping_tables/results/", "*_RCV_*.tsv") stats = tab(key_column="RCV_ID", headers=[ 'RCV_ID', 'RCV_name', 'Auto sufficient', 'Manual only', 'Auto only', 'Manual blacklist', 'Auto blacklist', 'pattern' ]) # Should really load as rcd to enforce key column uniqueness owl_map = rcd(path="../mapping_tables/", file_name="owl_map.tsv", key_column='RCV_ID') total_sufficient_maps = 0 # Lists for doing basic statistical analysis of results # Sure this could be done more elegantly with list comps on tab, but still... Auto_sufficient = [] Manual_only = [] Auto_only = [] Auto_blacklist = [] Manual_blacklist = [] mapping_table = rcd(path="../mapping_tables/", file_name="owl_map.tsv",
import re from github_tools import issueConn from tsv2pdm import rcd, tab import glob import warnings """Generates a set of proforma tickets using owl_map.tsv for entries in which the pattern field does not begin with a '?' and for which there is not currently ticket following the standard name pattern. Writes ticket number and state back to owl_map; If mapping complete, updates RCV master file to record mapping_complete = 1 """ ic = issueConn('GO-ROCHE-COLLAB', 'Roche_CV_mapping', 'dosumis') owlMap = rcd('../mapping_tables/', 'owl_map.tsv', 'RCV_ID') # dict of dicts. RCV = rcd('../mapping_tables/', 'owl_map.tsv', 'RCV_ID') for RCV_id, rd in owlMap.rowColDict.items(): if not re.match("\?.*", rd['Applied pattern']): ticket_name = "Review %s %s" % (rd['RCV_NAME'], RCV_id) issue = '' issues = ic.ticket_exists(ticket_name, ['Mapping_review']) if not issues: issue = ic.create_standard_review_ticket(RCV_id, rd['RCV_NAME']) elif len(issues) > 1: warnings.warn("Multiple tickets exist with the name '%s' and the label 'Mapping_review'." % ticket_name) continue else: issue = issues[0] if issue['state'] == 'closed':
# Rather scrappy, Perlish procedural code for generating mappings. Annoyingly monolithic: Have to run all mappings or none. """Reads owl_map and uses it to automatically populate RCV classes. Compares these to manual mappings. Prints a results summary and results tables. Ontology to use must be specified as argv[1] when runnning this script.""" from mapping_tools import (map_obj, load_ont, mappingTabs) from tsv2pdm import tab, rcd go = load_ont(sys.argv[1]) manMap = tab('../mapping_tables/', 'manual_mapping.tsv') # No key row. Stored as list of dicts. owlMap = rcd('../mapping_tables/', 'owl_map.tsv', 'RCV_ID') # dict of dicts. RCV = rcd('../mapping_tables/', 'RocheCV_def.tsv', 'RCV_ID') # dict of dicts. mapping_tabs = mappingTabs(manMap.tab, owlMap.rowColDict, RCV.rowColDict, go) # ...Hmmm - would give much more flexibility if passed objects rather than data structures. manMap_updated = open('../mapping_tables/manual_mapping.tsv', "w") manMap_updated.write(manMap.print_tab(sort_keys=('RCV_ID',))) manMap_updated.close() RCV_id_name = {} # Residual perlishness ? for row in manMap.tab: RCV_id_name[row['RCV_ID']]=row['RCV_NAME'] report_path = '../mapping_tables/results/'