def _check_housekeeping(id): ''' For each study_id missing "Event" data, check it's Housekeeping information. ''' try: ip, port, sid, username, password = gwas_data_sources.get_db_properties('DEV3') dsn_tns = cx_Oracle.makedsn(ip, port, sid) connection = cx_Oracle.connect(username, password, dsn_tns) cursor = connection.cursor() cursor.prepare(""" SELECT P.ID, P.PUBMED_ID, S.ID, TO_CHAR(H.LAST_UPDATE_DATE, 'yyyy-mm-dd hh24:mi:ss'), TO_CHAR(H.STUDY_ADDED_DATE, 'yyyy-mm-dd hh24:mi:ss'), TO_CHAR(H.CATALOG_PUBLISH_DATE, 'yyyy-mm-dd hh24:mi:ss'), P.PUBLICATION_DATE FROM PUBLICATION P, STUDY S, HOUSEKEEPING H WHERE S.PUBLICATION_ID=P.ID and S.HOUSEKEEPING_ID=H.ID and S.ID = :id """) r = cursor.execute(None, {'id': id}) housekeeping_details = cursor.fetchall() cursor.close() connection.close() except cx_Oracle.DatabaseError, exception: print exception
def _execute_statements(study_id, sql_statement): ''' Insert a list of Insert statements into the EVENT and STUDY_EVENT tables. ''' # Prepare output file to Append STUDY_EVENT Insert statements to existing file output_file = open('missing_event_data.txt', 'a') csvout = csv.writer(output_file) try: ip, port, sid, username, password = gwas_data_sources.get_db_properties('DEV3') dsn_tns = cx_Oracle.makedsn(ip, port, sid) connection = cx_Oracle.connect(username, password, dsn_tns) cursor = connection.cursor() # https://stackoverflow.com/questions/35327135/retrieving-identity-of-most-recent-insert-in-oracle-db-12c new_id = cursor.var(cx_Oracle.NUMBER) sql_event = sql_statement + " returning id into :new_id" # Write data to file to review csvout.writerow(["StudyID: "+study_id]) csvout.writerow([sql_event]) # Get Event_ID of last row inserted cursor.execute(sql_event, {'new_id': new_id}) event_id = int(new_id.getvalue()) # Execute STUDY_EVENT Insert statement sql_study_event = "INSERT INTO STUDY_EVENT VALUES (" + str(study_id) + ", "+ str(event_id) +")" # Write data to file to review csvout.writerow([sql_study_event+"\n"]) cursor.execute(sql_study_event) # commit or rollback changes if args.mode == 'production': cursor.execute('COMMIT') else: cursor.execute('ROLLBACK') cursor.close() connection.close() except cx_Oracle.DatabaseError, exception: print exception
def get_all_studies_missing_event_data(event_type): ''' Get all Study Ids that are Published (Housekeeping.Is_published=1) but do not have any data in the StudyEvent and Event tables. ''' try: ip, port, sid, username, password = gwas_data_sources.get_db_properties('DEV3') dsn_tns = cx_Oracle.makedsn(ip, port, sid) connection = cx_Oracle.connect(username, password, dsn_tns) cursor = connection.cursor() cursor.prepare(""" SELECT P.ID, P.PUBMED_ID, S.ID AS STUDY_ID FROM PUBLICATION P, STUDY S, HOUSEKEEPING H WHERE P.ID=S.PUBLICATION_ID and S.HOUSEKEEPING_ID=H.ID and H.IS_PUBLISHED=1 MINUS SELECT P.ID, P.PUBMED_ID, S.ID AS STUDY_ID FROM PUBLICATION P, STUDY S, HOUSEKEEPING H, STUDY_EVENT SE, EVENT E WHERE S.PUBLICATION_ID=P.ID and S.HOUSEKEEPING_ID=H.ID and S.ID=SE.STUDY_ID and SE.EVENT_ID=E.ID and E.EVENT_TYPE = :event_type and H.IS_PUBLISHED=1 """) r = cursor.execute(None, {'event_type': event_type}) studies = cursor.fetchall() cursor.close() connection.close() except cx_Oracle.DatabaseError, exception: print exception
def get_studies_missing_first_publication_event(): ''' For studies that have both a 'STUDY_STATUS_CHANGE_PUBLISH_STUDY' Event and a Housekeeping.Catalog_Publish_Date, check if the Housekeeping.Catalog_Publish_Date is earlier than the 'STUDY_STATUS_CHANGE_PUBLISH_STUDY' Event and if so create a new Event for this "first" Publication event. ''' try: ip, port, sid, username, password = gwas_data_sources.get_db_properties('DEV3') dsn_tns = cx_Oracle.makedsn(ip, port, sid) connection = cx_Oracle.connect(username, password, dsn_tns) cursor = connection.cursor() # Get all StudyIds from Housekeeping where Status is Published sql_housekeeping_published_status = """ SELECT P.ID, P.PUBMED_ID, S.ID, H.CATALOG_PUBLISH_DATE FROM PUBLICATION P, STUDY S, HOUSEKEEPING H, CURATION_STATUS CS WHERE S.PUBLICATION_ID=P.ID and S.HOUSEKEEPING_ID=H.ID and H.CURATION_STATUS_ID=CS.ID and H.CURATION_STATUS_ID=6 and H.CATALOG_PUBLISH_DATE IS NOT NULL """ cursor.execute(sql_housekeeping_published_status) housekeeping_published_studies = cursor.fetchall() # For Testing missing_initial_publication_event = [] # Review Catalog_Publish_Date for each StudyId for hp_study in housekeeping_published_studies: hp_study_id = str(hp_study[2]) hp_publish_date = hp_study[3] # Query Study-StudyEvent-Event with this study_id cursor.prepare(""" SELECT P.ID, P.PUBMED_ID, S.ID, E.EVENT_DATE FROM PUBLICATION P, STUDY S, STUDY_EVENT SE, EVENT E WHERE S.PUBLICATION_ID=P.ID and S.ID=SE.STUDY_ID and SE.EVENT_ID=E.ID and E.EVENT_TYPE='STUDY_STATUS_CHANGE_PUBLISH_STUDY' and S.ID = :id and ROWNUM <= 1 ORDER BY E.EVENT_DATE DESC """) r = cursor.execute(None, {'id': hp_study_id}) initial_publication_event = cursor.fetchone() event_type = 'STUDY_STATUS_CHANGE_PUBLISH_STUDY' if initial_publication_event is not None and hp_publish_date < initial_publication_event[3]: insert_initial_published = "INSERT INTO EVENT VALUES (NULL, TO_DATE("+ \ "'"+str(hp_publish_date)+"'" +", 'yyyy-mm-dd hh24:mi:ss'), "+ \ "'"+event_type+"'" + ", 14978333, null)" _execute_statements(hp_study_id, insert_initial_published) elif initial_publication_event is None: missing_initial_publication_event.append(hp_study_id) else: pass # print "Studies missing InitialPubEvent Date: ", len(missing_initial_publication_event) cursor.close() connection.close() except cx_Oracle.DatabaseError, exception: print exception
def __init__(self, database): self.ip, self.port, self.sid, self.username, self.password = \ gwas_data_sources.get_db_properties(database)
def get_curation_review_data(pmid, ancestry_mode, curator): ''' Get data for Level 2 review. ''' # List of queries curation_level2_data_sql = """ SELECT S.ID, S.ACCESSION_ID, P.PUBMED_ID, A.FULLNAME, TO_CHAR(P.PUBLICATION_DATE, 'dd-mm-yyyy'), P.PUBLICATION, P.TITLE, TO_CHAR(H.STUDY_ADDED_DATE, 'dd-mm-yyyy'), CS.STATUS, TO_CHAR(H.CATALOG_PUBLISH_DATE, 'dd-mm-yyyy'), S.INITIAL_SAMPLE_SIZE, S.REPLICATE_SAMPLE_SIZE FROM STUDY S, HOUSEKEEPING H, PUBLICATION P, AUTHOR A, CURATION_STATUS CS WHERE S.HOUSEKEEPING_ID=H.ID and S.PUBLICATION_ID=P.ID and P.FIRST_AUTHOR_ID=A.ID and H.CURATION_STATUS_ID=CS.ID and P.PUBMED_ID= :pmid """ study_dup_tag_sql = """ SELECT N.TEXT_NOTE FROM STUDY S, NOTE N WHERE S.ID=N.STUDY_ID and N.NOTE_SUBJECT_ID=9 and S.ID= :study_id """ study_platform_sql = """ SELECT listagg(PLA.MANUFACTURER, ', ') WITHIN GROUP (ORDER BY PLA.MANUFACTURER), S.QUALIFIER, S.SNP_COUNT, S.IMPUTED FROM PLATFORM PLA, STUDY_PLATFORM SP, STUDY S WHERE PLA.ID=SP.PLATFORM_ID and SP.STUDY_ID=S.ID and S.ID= :study_id GROUP BY S.QUALIFIER, S.SNP_COUNT, S.IMPUTED """ study_genotyping_technology_sql = """ SELECT listagg(GT.GENOTYPING_TECHNOLOGY) WITHIN GROUP (ORDER BY GT.GENOTYPING_TECHNOLOGY) FROM STUDY S, STUDY_GENOTYPING_TECHNOLOGY SGT, GENOTYPING_TECHNOLOGY GT WHERE S.ID=SGT.STUDY_ID and SGT.GENOTYPING_TECHNOLOGY_ID=GT.ID and S.ID= :study_id """ study_reported_trait_sql = """ SELECT listagg(DT.TRAIT, ', ') WITHIN GROUP (ORDER BY DT.TRAIT) FROM STUDY S, STUDY_DISEASE_TRAIT SDT, DISEASE_TRAIT DT WHERE S.ID=SDT.STUDY_ID and SDT.DISEASE_TRAIT_ID=DT.ID and S.ID= :study_id """ study_mapped_trait_sql = """ SELECT listagg(ET.TRAIT, ', ') WITHIN GROUP (ORDER BY ET.TRAIT), ET.URI FROM STUDY S, STUDY_EFO_TRAIT SETR, EFO_TRAIT ET WHERE S.ID=SETR.STUDY_ID and SETR.EFO_TRAIT_ID=ET.ID and S.ID= :study_id GROUP BY ET.URI """ study_association_cnt_sql = """ SELECT COUNT(A.ID) FROM STUDY S, ASSOCIATION A WHERE S.ID=A.STUDY_ID and S.ID= :study_id """ # Queries for Expanded Ancestry Option expanded_ancestry_sql = """ SELECT A.ID, A.TYPE, A.NUMBER_OF_INDIVIDUALS, A.DESCRIPTION FROM STUDY S, ANCESTRY A WHERE A.STUDY_ID=S.ID and S.ID= :study_id """ ancestral_group_sql = """ SELECT listagg(AG.ANCESTRAL_GROUP, ', ') WITHIN GROUP (ORDER BY AG.ANCESTRAL_GROUP) FROM ANCESTRY A, ANCESTRY_ANCESTRAL_GROUP AAG, ANCESTRAL_GROUP AG WHERE A.ID=AAG.ANCESTRY_ID and AAG.ANCESTRAL_GROUP_ID=AG.ID and A.ID= :ancestry_id """ ancestry_country_of_origin_sql = """ SELECT listagg(C.COUNTRY_NAME, ', ') WITHIN GROUP (ORDER BY C.COUNTRY_NAME) FROM ANCESTRY A, ANCESTRY_COUNTRY_OF_ORIGIN ACOO, COUNTRY C WHERE A.ID=ACOO.ANCESTRY_ID and ACOO.COUNTRY_ID=C.ID and A.ID= :ancestry_id """ ancestry_country_of_recruitment = """ SELECT listagg(C.COUNTRY_NAME, ', ') WITHIN GROUP (ORDER BY C.COUNTRY_NAME) FROM ANCESTRY A, ANCESTRY_COUNTRY_RECRUITMENT ACOR, COUNTRY C WHERE A.ID=ACOR.ANCESTRY_ID and ACOR.COUNTRY_ID=C.ID and A.ID= :ancestry_id """ all_level2_data = [] if ancestry_mode == 'collapsed': level2_attr_list = ['STUDY_ID', 'DUP_TAG', 'REPORTED_TRAIT', 'STUDY_CREATION_DATE', 'CURATION_STATUS', 'STUDY_ACCCESSION',\ 'CATALOG_PUBLISH_DATE', 'PUBMED_ID', 'FIRST_AUTHOR', 'PUBLICATION_DATE', 'JOURNAL', 'LINK', 'TITLE', 'PLATFORM [SNPS PASSING QC]',\ 'ASSOCIATION_COUNT', 'MAPPED_TRAIT', 'MAPPED_TRAIT_URI', 'GENOTYPING_TECHNOLOGY', 'INITIAL_SAMPLE_DESCRIPTION', \ 'REPLICATION_SAMPLE_DESCRIPTION'] else: level2_attr_list = ['STUDY_ID', 'DUP_TAG', 'REPORTED_TRAIT', 'STUDY_CREATION_DATE', 'CURATION_STATUS', 'STUDY_ACCCESSION',\ 'CATALOG_PUBLISH_DATE', 'PUBMED_ID', 'FIRST_AUTHOR', 'PUBLICATION_DATE', 'JOURNAL', 'LINK', 'TITLE', 'PLATFORM [SNPS PASSING QC]',\ 'ASSOCIATION_COUNT', 'MAPPED_TRAIT', 'MAPPED_TRAIT_URI', 'GENOTYPING_TECHNOLOGY', 'INITIAL_SAMPLE_DESCRIPTION', \ 'REPLICATION_SAMPLE_DESCRIPTION', 'STAGE', 'NUMBER_OF_INDIVIDUALS', 'BROAD_ANCESTRAL_CATEGORY', 'COUNTRY_OF_ORIGIN',\ 'COUNTRY_OF_RECRUITMENT', 'ADDITONAL_ANCESTRY_DESCRIPTION'] # Get First Author name to include in output filename first_author_sql = """ SELECT REPLACE(A.FULLNAME_STANDARD, ' ', '') FROM PUBLICATION P, AUTHOR A WHERE P.FIRST_AUTHOR_ID=A.ID and P.PUBMED_ID= :pmid """ first_author = "" try: ip, port, sid, username, password = gwas_data_sources.get_db_properties( DATABASE_NAME) dsn_tns = cx_Oracle.makedsn(ip, port, sid) connection = cx_Oracle.connect(username, password, dsn_tns) with contextlib.closing(connection.cursor()) as cursor: cursor.prepare(first_author_sql) cursor.execute(None, {'pmid': pmid}) first_author_data = cursor.fetchone() first_author = first_author_data[0] connection.close() except cx_Oracle.DatabaseError, exception: print exception
connection.close() except cx_Oracle.DatabaseError, exception: print exception TIMESTAMP = get_timestamp() outfile = open( first_author + "_" + pmid + "-" + ancestry_mode + "_" + curator + "_" + TIMESTAMP + ".csv", "w") csvout = csv.writer(outfile) csvout.writerow(level2_attr_list) # Get data for curation review file try: ip, port, sid, username, password = gwas_data_sources.get_db_properties( DATABASE_NAME) dsn_tns = cx_Oracle.makedsn(ip, port, sid) connection = cx_Oracle.connect(username, password, dsn_tns) with contextlib.closing(connection.cursor()) as cursor: cursor.prepare(curation_level2_data_sql) cursor.execute(None, {'pmid': pmid}) curation_queue_data = cursor.fetchall() for data in curation_queue_data: data_results = {} data_results['STUDY_ID'] = data[0] # Account for studies that do not yet have an AccessionId