def __init__(self, comments_tablename, classification_version_sid, db_caller: DBCaller = None): self.db_caller = db_caller if self.db_caller is None: self.db_caller = DBCaller() self.comments_tablename = comments_tablename self.classification_version_sid = classification_version_sid
class PaintCurationStatusHelper: def __init__(self, curation_status_tablename, classification_version_sid, db_caller: DBCaller = None): self.db_caller = db_caller if self.db_caller is None: self.db_caller = DBCaller() self.curation_status_tablename = curation_status_tablename self.classification_version_sid = classification_version_sid def get_family_classification_id(self, family_id): query = """ select classification_id from panther_upl.classification where classification_version_sid = {classification_version_sid} and accession = '{family_id}'; """.format(classification_version_sid=self.classification_version_sid, family_id=family_id) results = self.db_caller.run_cmd_line_args(query.rstrip(), no_header_footer=True) if len(results[1:]) > 0: return results[1][0] else: raise Exception( "No classification found for '{}'".format(family_id)) def insert_curation_status(self, family_id, status_id): family_cls_id = self.get_family_classification_id(family_id) # Insert new curation_status query = """ INSERT INTO panther_upl.{curation_status_tablename} (curation_status_id, status_type_sid, classification_id, user_id, creation_date) VALUES (nextval('uids'), {status_id}, {family_cls_id}, 1113, now()) """.format(curation_status_tablename=self.curation_status_tablename, status_id=status_id, family_cls_id=family_cls_id) self.db_caller.run_cmd_line_args(query.rstrip(), no_header_footer=True)
order by c.accession, cc.confidence_code; """ query_14_1 = """ select c.accession, cc.confidence_code, count(distinct(pe.annotation_id)) from panther_upl.paint_annotation pa join panther_upl.node n on n.node_id = pa.node_id join panther_upl.classification c on c.accession = split_part(n.accession, ':', 1) join panther_upl.paint_evidence pe on pe.annotation_id = pa.annotation_id join panther_upl.confidence_code cc on cc.confidence_code_sid = pe.confidence_code_sid where c.classification_version_sid = 26 and n.classification_version_sid = 26 group by c.accession, cc.confidence_code order by c.accession, cc.confidence_code; """ caller = DBCaller() results_14_1 = caller.run_cmd_line_args(query_14_1, no_header_footer=True) results_13_1 = caller.run_cmd_line_args(query_13_1, no_header_footer=True) ev_codes = [] def parse_results(results): counts = {} for r in results: if r[0].startswith("PTHR"): # PTHR row pthr_id = r[0] if pthr_id not in counts: counts[pthr_id] = {} ev_code = r[1]
from pthr_db_caller.db_caller import DBCaller from util.publish_google_sheet import SheetPublishHandler, Sheet import datetime CALLER = DBCaller() query = """ select c.accession, cst.status, u.name, cs.creation_date from panther_upl.curation_status cs --select count(*) from curation_status cs join panther_upl.classification c on c.classification_id = cs.classification_id join panther_upl.users u on u.user_id = cs.user_id join panther_upl.curation_status_type cst on cst.status_type_sid = cs.status_type_sid where c.classification_version_sid = {classification_version_sid} order by cs.creation_date desc; """ start_date = "2019-06-01" # results = CALLER.run_cmd_line_args(query.format(start_date), no_header_footer=True) results = CALLER.run_cmd_line_args(query, no_header_footer=True) date_str = datetime.date.today().isoformat() sheet_title = "{}_curation_status".format(date_str) sheet = Sheet(title=sheet_title) headers = ["PTHR ID", "status", "name", "creation_date"] sheet.append_row(headers) for r in results[1:]: family = r[0] status = r[1] curator = r[2] status_creation_date = r[3]
class PthrCommentHelper: def __init__(self, comments_tablename, classification_version_sid, db_caller: DBCaller = None): self.db_caller = db_caller if self.db_caller is None: self.db_caller = DBCaller() self.comments_tablename = comments_tablename self.classification_version_sid = classification_version_sid def get_family_classification_id(self, family_id): query = """ select classification_id from panther_upl.classification where classification_version_sid = {classification_version_sid} and accession = '{family_id}'; """.format(classification_version_sid=self.classification_version_sid, family_id=family_id) results = self.db_caller.run_cmd_line_args(query.rstrip(), no_header_footer=True) if len(results[1:]) > 0: return results[1][0] else: raise Exception( "No classification found for '{}'".format(family_id)) def get_comments(self, family_id): # Return record query = """ select cm.* from panther_upl.{comments_tablename} cm join panther_upl.classification c on c.classification_id = cm.classification_id where c.classification_version_sid = {classification_version_sid} and c.accession = '{family_id}'; """.format(comments_tablename=self.comments_tablename, classification_version_sid=self.classification_version_sid, family_id=family_id) # print(query) results = self.db_caller.run_cmd_line_args(query.rstrip(), no_header_footer=True) return parse_results_to_comments(results[1:]) def update_comment(self, family_cls_id, comment_text): # Append to remark field query = """ update panther_upl.{comments_tablename} cm set remark = remark || '\n' || current_date || ': {comment_text}\n' where cm.classification_id = {family_cls_id}; """.format(comments_tablename=self.comments_tablename, comment_text=comment_text, family_cls_id=family_cls_id) self.db_caller.run_cmd_line_args(query.rstrip(), no_header_footer=True) def insert_comment(self, family_cls_id, comment_text): # Insert new comment query = """ insert into panther_upl.{comments_tablename} (comment_id, classification_id, protein_id, remark, created_by, creation_date, obsoleted_by, obsolescence_date, node_id) VALUES (nextval('uids'), {family_cls_id}, null, current_date || ': {comment_text}\n', 1113, now(), null, null, null); """.format(comments_tablename=self.comments_tablename, family_cls_id=family_cls_id, comment_text=comment_text) self.db_caller.run_cmd_line_args(query.rstrip(), no_header_footer=True) def update_or_insert_comment(self, family_id, comment_text): existing_comments = self.get_comments(family_id) if len(existing_comments) > 0: # Can update cls_id = existing_comments[0].classification_id self.update_comment(cls_id, comment_text) else: # Gonna have to get family classification_id cls_id = self.get_family_classification_id(family_id) self.insert_comment(cls_id, comment_text)
from pthr_db_caller.db_caller import DBCaller from util.publish_google_sheet import SheetPublishHandler, Sheet from os import path import csv import argparse import yaml import datetime parser = argparse.ArgumentParser() parser.add_argument('-a', '--a_yaml') parser.add_argument('-b', '--b_yaml') parser.add_argument('-r', '--reload_data', action="store_const", const=True) args = parser.parse_args() CALLER = DBCaller() A_DATA = None B_DATA = None ALL_FAMS = [] # Just to be safe query = """ select split_part(n.accession,':',1), count(*) from panther_upl.{} pa join panther_upl.node n on n.node_id = pa.node_id where pa.obsolescence_date is null group by split_part(n.accession,':',1); """ def get_results(table_name, cache_file, reload_data=None): # Cache results if path.isfile(cache_file) and not reload_data:
# Only use query variables for single query SQL files, otherwise managing these gets tricky due to the per-statement cleaning step. parser.add_argument( "-v", "--query_variables", type=str, required=False, help= "comma-delimited, ordered list of values to replace variables in SQL script.\ Only use query variables for single query SQL files, otherwise managing these gets tricky due to the per-statement cleaning step." ) parser.add_argument("-o", "--rows_outfile", help="Write result rows to specified filename.") parser.add_argument("-d", "--delimiter", help="column delimiter to display in query output.") parser.add_argument( "-n", "--no_header_footer", action='store_const', const=True, help="No header or footer will be included in query result output") args = parser.parse_args() caller = DBCaller() caller.run_cmd_line_args(args.query_filename, query_variables=args.query_variables, rows_outfile=args.rows_outfile, delimiter=args.delimiter, no_header_footer=args.no_header_footer)
import argparse from pthr_db_caller.db_caller import DBCaller parser = argparse.ArgumentParser() parser.add_argument('outfile') args = parser.parse_args() caller = DBCaller() query = "select go_annotation_release_date, panther_version from panther_upl.fullgo_version;" results = caller.run_cmd_line_args(query.rstrip(), no_header_footer=True) version_date = results[1][0] panther_version = results[1][1] with open(args.outfile, "w+") as out_f: out_f.write("GO\t{}\n".format(version_date)) out_f.write("PANTHER\tv.{}".format(panther_version))
def __init__(self, curation_status_tablename, classification_version_sid): self.db_caller = DBCaller() self.curation_status_tablename = curation_status_tablename self.classification_version_sid = classification_version_sid
def __init__(self, comments_tablename, classification_version_sid): self.db_caller = DBCaller() self.comments_tablename = comments_tablename self.classification_version_sid = classification_version_sid
args = parser.parse_args() # TABLE_SUFFIX = "_new" # TABLE_SUFFIX = "" TABLE_SUFFIX = args.table_suffix # LIB_DIR = "/home/pmd-02/pdt/pdthomas/panther/famlib/rel/PANTHER14.1" # HPC # LIB_DIR = "resources/tree_files" # local # LIB_DIR = args.tree_files_dir ignore_cache = False if args.config_file: config = DBCallerConfig(config_path=args.config_file) else: config = DBCallerConfig() CALLER = DBCaller(config=config) CLS_VER_SID = config.query_variables.get("classification_version_sid") COMMENT_HELPER = PthrCommentHelper( comments_tablename="comments{}".format(TABLE_SUFFIX), classification_version_sid=CLS_VER_SID) CURATION_STATUS_HELPER = PaintCurationStatusHelper( curation_status_tablename="curation_status{}".format(TABLE_SUFFIX), classification_version_sid=CLS_VER_SID) LIB_DIR = config.query_variables.get("tree_files_dir") QUERY = """ select distinct split_part(na.accession, ':', 1) as family, paa.annotation_id anc_annotation_id, na.public_id ancestor_ptn, na.accession ancestor_an, pa.annotation_id desc_annotation_id, n.public_id descendant_ptn, n.accession descendant_an, gc.accession, gc.classification_id, q.qualifier from panther_upl.paint_annotation{table_suffix} pa join panther_upl.paint_evidence{table_suffix} pe on pe.annotation_id = pa.annotation_id join panther_upl.node n on n.node_id = pa.node_id join (select child_node_acc, unnest(string_to_array(ancestor_node_acc, ',')) as ancestor from panther_upl.node_all_ancestors) naa on naa.child_node_acc = n.accession
from pthr_db_caller.db_caller import DBCaller from util.publish_google_sheet import SheetPublishHandler, Sheet import argparse import json import datetime parser = argparse.ArgumentParser() parser.add_argument('-b', '--before_date') parser.add_argument('-a', '--after_date') parser.add_argument('-p', '--publish_report', action='store_const', const=True) args = parser.parse_args() query_file = "scripts/sql/reports/created_ibds_by_curator.sql" caller = DBCaller() q_vars = {"before_date": args.before_date, "after_date": args.after_date} results = caller.run_cmd_line_args(query_file, query_variables=json.dumps(q_vars), no_header_footer=True) handler = SheetPublishHandler() date_str = datetime.date.today().isoformat() sheet_title = "{}-ibd_count_by_curator".format(date_str) sheet = Sheet(title=sheet_title) headers = [ "Name", f"IBDs created between {args.before_date} and {args.after_date}" ] sheet.append_row(headers) total_count = 0