Exemple #1
0
 def __init__(self,
              comments_tablename,
              classification_version_sid,
              db_caller: DBCaller = None):
     self.db_caller = db_caller
     if self.db_caller is None:
         self.db_caller = DBCaller()
     self.comments_tablename = comments_tablename
     self.classification_version_sid = classification_version_sid
class PaintCurationStatusHelper:
    def __init__(self,
                 curation_status_tablename,
                 classification_version_sid,
                 db_caller: DBCaller = None):
        self.db_caller = db_caller
        if self.db_caller is None:
            self.db_caller = DBCaller()
        self.curation_status_tablename = curation_status_tablename
        self.classification_version_sid = classification_version_sid

    def get_family_classification_id(self, family_id):
        query = """
        select classification_id from panther_upl.classification
        where classification_version_sid = {classification_version_sid}
        and accession = '{family_id}';
        """.format(classification_version_sid=self.classification_version_sid,
                   family_id=family_id)

        results = self.db_caller.run_cmd_line_args(query.rstrip(),
                                                   no_header_footer=True)
        if len(results[1:]) > 0:
            return results[1][0]
        else:
            raise Exception(
                "No classification found for '{}'".format(family_id))

    def insert_curation_status(self, family_id, status_id):
        family_cls_id = self.get_family_classification_id(family_id)
        # Insert new curation_status

        query = """
        INSERT INTO panther_upl.{curation_status_tablename}
        (curation_status_id, status_type_sid, classification_id, user_id, 
            creation_date)
        VALUES (nextval('uids'), {status_id}, {family_cls_id}, 1113, now())
        """.format(curation_status_tablename=self.curation_status_tablename,
                   status_id=status_id,
                   family_cls_id=family_cls_id)

        self.db_caller.run_cmd_line_args(query.rstrip(), no_header_footer=True)
order by c.accession, cc.confidence_code;
"""

query_14_1 = """
select c.accession, cc.confidence_code, count(distinct(pe.annotation_id)) from panther_upl.paint_annotation pa
join panther_upl.node n on n.node_id = pa.node_id
join panther_upl.classification c on c.accession = split_part(n.accession, ':', 1)
join panther_upl.paint_evidence pe on pe.annotation_id = pa.annotation_id
join panther_upl.confidence_code cc on cc.confidence_code_sid = pe.confidence_code_sid
where c.classification_version_sid = 26
and n.classification_version_sid = 26
group by c.accession, cc.confidence_code
order by c.accession, cc.confidence_code;
"""

caller = DBCaller()
results_14_1 = caller.run_cmd_line_args(query_14_1, no_header_footer=True)
results_13_1 = caller.run_cmd_line_args(query_13_1, no_header_footer=True)

ev_codes = []


def parse_results(results):
    counts = {}
    for r in results:
        if r[0].startswith("PTHR"):
            # PTHR row
            pthr_id = r[0]
            if pthr_id not in counts:
                counts[pthr_id] = {}
            ev_code = r[1]
from pthr_db_caller.db_caller import DBCaller
from util.publish_google_sheet import SheetPublishHandler, Sheet
import datetime

CALLER = DBCaller()

query = """
select c.accession, cst.status, u.name, cs.creation_date from panther_upl.curation_status cs
--select count(*) from curation_status cs
join panther_upl.classification c on c.classification_id = cs.classification_id
join panther_upl.users u on u.user_id = cs.user_id
join panther_upl.curation_status_type cst on cst.status_type_sid = cs.status_type_sid
where c.classification_version_sid = {classification_version_sid}
order by cs.creation_date desc;
"""

start_date = "2019-06-01"
# results = CALLER.run_cmd_line_args(query.format(start_date), no_header_footer=True)
results = CALLER.run_cmd_line_args(query, no_header_footer=True)

date_str = datetime.date.today().isoformat()
sheet_title = "{}_curation_status".format(date_str)
sheet = Sheet(title=sheet_title)
headers = ["PTHR ID", "status", "name", "creation_date"]
sheet.append_row(headers)

for r in results[1:]:
    family = r[0]
    status = r[1]
    curator = r[2]
    status_creation_date = r[3]
Exemple #5
0
class PthrCommentHelper:
    def __init__(self,
                 comments_tablename,
                 classification_version_sid,
                 db_caller: DBCaller = None):
        self.db_caller = db_caller
        if self.db_caller is None:
            self.db_caller = DBCaller()
        self.comments_tablename = comments_tablename
        self.classification_version_sid = classification_version_sid

    def get_family_classification_id(self, family_id):
        query = """
        select classification_id from panther_upl.classification
        where classification_version_sid = {classification_version_sid}
        and accession = '{family_id}';
        """.format(classification_version_sid=self.classification_version_sid,
                   family_id=family_id)

        results = self.db_caller.run_cmd_line_args(query.rstrip(),
                                                   no_header_footer=True)
        if len(results[1:]) > 0:
            return results[1][0]
        else:
            raise Exception(
                "No classification found for '{}'".format(family_id))

    def get_comments(self, family_id):
        # Return record
        query = """
        select cm.* from panther_upl.{comments_tablename} cm
        join panther_upl.classification c on c.classification_id = cm.classification_id
        where c.classification_version_sid = {classification_version_sid}
        and c.accession = '{family_id}';
        """.format(comments_tablename=self.comments_tablename,
                   classification_version_sid=self.classification_version_sid,
                   family_id=family_id)

        # print(query)
        results = self.db_caller.run_cmd_line_args(query.rstrip(),
                                                   no_header_footer=True)
        return parse_results_to_comments(results[1:])

    def update_comment(self, family_cls_id, comment_text):
        # Append to remark field
        query = """
        update panther_upl.{comments_tablename} cm
        set remark = remark || '\n' || current_date || ': {comment_text}\n'
        where cm.classification_id = {family_cls_id};
        """.format(comments_tablename=self.comments_tablename,
                   comment_text=comment_text,
                   family_cls_id=family_cls_id)

        self.db_caller.run_cmd_line_args(query.rstrip(), no_header_footer=True)

    def insert_comment(self, family_cls_id, comment_text):
        # Insert new comment
        query = """
        insert into panther_upl.{comments_tablename}
        (comment_id, classification_id, protein_id, remark, created_by, 
            creation_date, obsoleted_by, obsolescence_date, node_id)
        VALUES (nextval('uids'), {family_cls_id}, null, current_date || ': {comment_text}\n', 1113, now(), null, null, null);
        """.format(comments_tablename=self.comments_tablename,
                   family_cls_id=family_cls_id,
                   comment_text=comment_text)

        self.db_caller.run_cmd_line_args(query.rstrip(), no_header_footer=True)

    def update_or_insert_comment(self, family_id, comment_text):
        existing_comments = self.get_comments(family_id)
        if len(existing_comments) > 0:
            # Can update
            cls_id = existing_comments[0].classification_id
            self.update_comment(cls_id, comment_text)
        else:
            # Gonna have to get family classification_id
            cls_id = self.get_family_classification_id(family_id)
            self.insert_comment(cls_id, comment_text)
Exemple #6
0
from pthr_db_caller.db_caller import DBCaller
from util.publish_google_sheet import SheetPublishHandler, Sheet
from os import path
import csv
import argparse
import yaml
import datetime

parser = argparse.ArgumentParser()
parser.add_argument('-a', '--a_yaml')
parser.add_argument('-b', '--b_yaml')
parser.add_argument('-r', '--reload_data', action="store_const", const=True)
args = parser.parse_args()

CALLER = DBCaller()
A_DATA = None
B_DATA = None
ALL_FAMS = []  # Just to be safe

query = """
select split_part(n.accession,':',1), count(*) from panther_upl.{} pa
join panther_upl.node n on n.node_id = pa.node_id
where pa.obsolescence_date is null
group by split_part(n.accession,':',1);
"""


def get_results(table_name, cache_file, reload_data=None):
    
    # Cache results
    if path.isfile(cache_file) and not reload_data:
# Only use query variables for single query SQL files, otherwise managing these gets tricky due to the per-statement cleaning step.
parser.add_argument(
    "-v",
    "--query_variables",
    type=str,
    required=False,
    help=
    "comma-delimited, ordered list of values to replace variables in SQL script.\
                                                Only use query variables for single query SQL files, otherwise managing these gets tricky due to the per-statement cleaning step."
)
parser.add_argument("-o",
                    "--rows_outfile",
                    help="Write result rows to specified filename.")
parser.add_argument("-d",
                    "--delimiter",
                    help="column delimiter to display in query output.")
parser.add_argument(
    "-n",
    "--no_header_footer",
    action='store_const',
    const=True,
    help="No header or footer will be included in query result output")

args = parser.parse_args()

caller = DBCaller()
caller.run_cmd_line_args(args.query_filename,
                         query_variables=args.query_variables,
                         rows_outfile=args.rows_outfile,
                         delimiter=args.delimiter,
                         no_header_footer=args.no_header_footer)
Exemple #8
0
import argparse
from pthr_db_caller.db_caller import DBCaller

parser = argparse.ArgumentParser()
parser.add_argument('outfile')

args = parser.parse_args()

caller = DBCaller()

query = "select go_annotation_release_date, panther_version from panther_upl.fullgo_version;"

results = caller.run_cmd_line_args(query.rstrip(), no_header_footer=True)

version_date = results[1][0]
panther_version = results[1][1]

with open(args.outfile, "w+") as out_f:
    out_f.write("GO\t{}\n".format(version_date))
    out_f.write("PANTHER\tv.{}".format(panther_version))
Exemple #9
0
 def __init__(self, curation_status_tablename, classification_version_sid):
     self.db_caller = DBCaller()
     self.curation_status_tablename = curation_status_tablename
     self.classification_version_sid = classification_version_sid
Exemple #10
0
 def __init__(self, comments_tablename, classification_version_sid):
     self.db_caller = DBCaller()
     self.comments_tablename = comments_tablename
     self.classification_version_sid = classification_version_sid
Exemple #11
0
args = parser.parse_args()

# TABLE_SUFFIX = "_new"
# TABLE_SUFFIX = ""
TABLE_SUFFIX = args.table_suffix
# LIB_DIR = "/home/pmd-02/pdt/pdthomas/panther/famlib/rel/PANTHER14.1"  # HPC
# LIB_DIR = "resources/tree_files"  # local
# LIB_DIR = args.tree_files_dir
ignore_cache = False

if args.config_file:
    config = DBCallerConfig(config_path=args.config_file)
else:
    config = DBCallerConfig()
CALLER = DBCaller(config=config)
CLS_VER_SID = config.query_variables.get("classification_version_sid")
COMMENT_HELPER = PthrCommentHelper(
    comments_tablename="comments{}".format(TABLE_SUFFIX),
    classification_version_sid=CLS_VER_SID)
CURATION_STATUS_HELPER = PaintCurationStatusHelper(
    curation_status_tablename="curation_status{}".format(TABLE_SUFFIX),
    classification_version_sid=CLS_VER_SID)
LIB_DIR = config.query_variables.get("tree_files_dir")

QUERY = """
select distinct split_part(na.accession, ':', 1) as family, paa.annotation_id anc_annotation_id, na.public_id ancestor_ptn, na.accession ancestor_an, pa.annotation_id desc_annotation_id, n.public_id descendant_ptn, n.accession descendant_an, gc.accession, gc.classification_id, q.qualifier
from panther_upl.paint_annotation{table_suffix} pa
join panther_upl.paint_evidence{table_suffix} pe on pe.annotation_id = pa.annotation_id
join panther_upl.node n on n.node_id = pa.node_id
join (select child_node_acc, unnest(string_to_array(ancestor_node_acc, ',')) as ancestor from panther_upl.node_all_ancestors) naa on naa.child_node_acc = n.accession
from pthr_db_caller.db_caller import DBCaller
from util.publish_google_sheet import SheetPublishHandler, Sheet
import argparse
import json
import datetime

parser = argparse.ArgumentParser()
parser.add_argument('-b', '--before_date')
parser.add_argument('-a', '--after_date')
parser.add_argument('-p', '--publish_report', action='store_const', const=True)

args = parser.parse_args()

query_file = "scripts/sql/reports/created_ibds_by_curator.sql"

caller = DBCaller()
q_vars = {"before_date": args.before_date, "after_date": args.after_date}
results = caller.run_cmd_line_args(query_file,
                                   query_variables=json.dumps(q_vars),
                                   no_header_footer=True)

handler = SheetPublishHandler()
date_str = datetime.date.today().isoformat()
sheet_title = "{}-ibd_count_by_curator".format(date_str)
sheet = Sheet(title=sheet_title)

headers = [
    "Name", f"IBDs created between {args.before_date} and {args.after_date}"
]
sheet.append_row(headers)
total_count = 0