def bcpFiles(): if DEBUG or not bcpon: #print execSQL return markerFile.close() refFile.close() aliasFile.close() db.commit() bcp1 = bcpCommand % (markerTable, markerFileName) bcp2 = bcpCommand % (refTable, refFileName) bcp3 = bcpCommand % (aliasTable, aliasFileName) # execute the sql deletions for r in execSQL: db.sql(r, None) for bcpCmd in [bcp1, bcp2, bcp3]: diagFile.write('%s\n' % bcpCmd) os.system(bcpCmd) db.commit() return
def initialize(): global inFileName, inFile global annotFileName, annotFile global errorFileName, errorFile global jnumID global markerList global evidenceCodeList # # open files # inFileName = os.environ['INFILE_NAME_GAF'] annotFileName = os.environ['INFILE_NAME'] errorFileName = os.environ['INFILE_NAME_ERROR'] jnumID = os.environ['JNUMBER'] try: inFile = open(inFileName, 'r') except: print 'Cannot open input file: ' + inFileName return 1 try: annotFile = open(annotFileName, 'w') except: print 'Cannot open annotation file for writing: ' + annotFileName return 1 try: errorFile = open(errorFileName, 'w') except: print 'Cannot open error file for writing: ' + errorFileName return 1 # # list of markers type 'gene' # results = db.sql('''select a.accID from MRK_Marker m, ACC_Accession a where m._Marker_Type_key = 1 and m._Marker_key = a._Object_key and a._MGIType_key = 2 and a._LogicalDB_key = 1 and a.preferred = 1 ''', 'auto') for r in results: value = r['accID'] markerList.append(value) # # list of evidence codes # results = db.sql('select _Term_key, abbreviation from VOC_Term where _Vocab_key = 3', 'auto') for r in results: evidenceCodeList[r['abbreviation']] = r['_Term_key'] return 0
def bcpFiles(): # requires: # # effects: # BCPs the data into the database # # returns: # nothing # synFile.close() if not bcpon: print 'Skipping BCP. Mode: %s' % mode sys.stdout.flush() return print 'Executing BCP' sys.stdout.flush() db.commit() bcp1 = bcpCommand % ('MGI_Synonym', synFileName) diagFile.write('%s\n' % bcp1) os.system(bcp1) db.commit() db.sql(''' select setval('mgi_synonym_seq', (select max(_Synonym_key) from MGI_Synonym)) ''', None) db.commit()
def getPrimaryKeys(): ''' # requires: # # effects: # get/store next primary keys # # returns: # nothing # ''' global exptKey, accKey, mgiKey results = db.sql(''' select nextval('mld_expts_seq') as maxKey ''', 'auto') exptKey = results[0]['maxKey'] results = db.sql( '''select max(_Accession_key) + 1 as maxKey from ACC_Accession''', 'auto') if results[0]['maxKey'] is None: accKey = 1000 else: accKey = results[0]['maxKey'] results = db.sql( '''select maxNumericPart + 1 as maxKey from ACC_AccessionMax where prefixPart = '%s' ''' % (mgiPrefix), 'auto') mgiKey = results[0]['maxKey']
def doCounts(): ''' Get counts of sample records from db and write them to stdout ''' sys.stdout.write(time.ctime() + '\n') sys.stdout.write("Hitting database %s %s as mgd_public\n" % \ (args.host, args.db)) sys.stdout.write(getRestrictedArticleText()) selectCountSQL = 'select count(distinct _refs_key) as num from %s\n' db.sql(BUILD_OMIT_TABLE, 'auto') db.sql(BUILD_BASE_TABLE, 'auto') doCount(OMIT_TEXT, [selectCountSQL % "tmp_omit"]) tmpTableName, finalTmpTableSQL = buildFinalTmpTableSQL('discard_after') doCount("Discard after: %s - %s" % (LIT_TRIAGE_DATE, END_DATE), finalTmpTableSQL + [selectCountSQL % tmpTableName]) tmpTableName, finalTmpTableSQL = buildFinalTmpTableSQL('keep_after') doCount("Keep after: %s - %s" % (LIT_TRIAGE_DATE, END_DATE), finalTmpTableSQL + [selectCountSQL % tmpTableName]) tmpTableName, finalTmpTableSQL = buildFinalTmpTableSQL('keep_before') doCount("Keep before: %s - %s" % (START_DATE, LIT_TRIAGE_DATE), finalTmpTableSQL + [selectCountSQL % tmpTableName]) tmpTableName, finalTmpTableSQL = buildFinalTmpTableSQL('keep_tumor') doCount("Tumor papers: %s - %s" % (TUMOR_START_DATE, START_DATE), finalTmpTableSQL + [selectCountSQL % tmpTableName]) tmpTableName, finalTmpTableSQL = buildFinalTmpTableSQL('test_2020') doCount("Test set from 2020", finalTmpTableSQL + [selectCountSQL % tmpTableName])
def loadDictionaries(): ''' # requires: # # effects: # loads global dictionaries/lists: chromosomeList for lookup # # returns: # nothing ''' global chromosomeList, assayDict, inputChrList results = db.sql( '''select chromosome from MRK_Chromosome where _Organism_key = 1 and chromosome not in ('UN') order by sequenceNum''', 'auto') for r in results: chromosomeList.append(r['chromosome']) results = db.sql('select * from MLD_Assay_Types', 'auto') for r in results: assayDict[r['description']] = r['_Assay_Type_key'] # create unique list of chromosomes from input file for line in inputFile.readlines(): tokens = str.split(line[:-1], '|') chromosome = tokens[1] if chromosome not in inputChrList: inputChrList.append(chromosome) inputFile.close()
def mmrrc(): mmrrcfp = reportlib.init(sys.argv[0], outputdir = os.environ['QCOUTPUTDIR'], fileExt = '.mmrrc.rpt') title = 'MMRRC Strains w/ Genotype Associations where the Markers/Alleles of the Strain record\n' + \ 'do not exactly match the Markers/Alleles of the Genotype record.' mmrrcfp.write(title + '\n\n') mmrrcfp.write('MMRRC#' + reportlib.TAB) mmrrcfp.write('Strain' + reportlib.TAB) mmrrcfp.write('Genotypes' + reportlib.TAB) mmrrcfp.write(reportlib.CRT) # MMNC Strains w/ Genotype Associations; exclude wild type alleles db.sql(''' select distinct sa.accID, s.strain, g._Genotype_key, g._Strain_key, a._Marker_key, a._Allele_key into temporary table strains from PRB_Strain s, PRB_Strain_Genotype g, GXD_AlleleGenotype a, ALL_Allele aa, ACC_Accession sa where s.strain like '%/Mmnc' and s._Strain_key = g._Strain_key and g._Genotype_key = a._Genotype_key and a._Allele_key = aa._Allele_key and aa.isWildType = 0 and s._Strain_key = sa._Object_key and sa._MGIType_key = 10 and sa._LogicalDB_key = 38 and sa.preferred = 1 ''', None) db.sql('create index strains_idx2 on strains(_Strain_key)', None) printReport(mmrrcfp)
def processSlim(): dosanityFileName = os.environ['DO_MGI_SLIM_SANITY_FILE'] dosanityFile = open(dosanityFileName, 'w') DELETE_SLIM = 'delete from MGI_SetMember where _Set_key = 1048 and _SetMember_key = %s' SPACE = ' ' dosanityFile.write('\n\nDO slim terms that are decendents of another DO slim term\n\n') dosanityFile.write('descendent_term' + 35*SPACE + 'another_slim_term\n') dosanityFile.write('---------------' + 35*SPACE + '-----------------\n\n') results = db.sql(''' select tt.term as descendent_term, ss.term as another_slim_term, t._SetMember_key from MGI_SetMember t, DAG_Closure dc, MGI_SetMember s, VOC_Term tt, VOC_Term ss where t._Set_key = 1048 and t._Object_key = dc._DescendentObject_key and dc._AncestorObject_key = s._Object_key and s._Set_key = 1048 and t._Object_key != s._Object_key and t._Object_key = tt._Term_key and s._Object_key = ss._Term_key ''', 'auto') for r in results: dosanityFile.write('%-50s %-50s\n' % (r['descendent_term'], r['another_slim_term'])) deleteSQL = DELETE_SLIM % (r['_SetMember_key']) #dosanityFile.write(deleteSQL + '\n\n') db.sql(deleteSQL, None) dosanityFile.close() db.commit() return 0
def bcpFiles(): # requires: # # effects: # BCPs the data into the database # # returns: # nothing # bcpdelim = "|" if DEBUG or not bcpon: return refFile.close() bcp1 = 'cat %s | bcp %s..%s in %s -c -t\"%s" -S%s -U%s' \ % (passwordFileName, db.get_sqlDatabase(), \ 'MGI_Reference_Assoc', refFileName, bcpdelim, db.get_sqlServer(), db.get_sqlUser()) diagFile.write('%s\n' % bcp1) os.system(bcp1) # update mgi_reference_assoc_seq auto-sequence db.sql( ''' select setval('mgi_reference_assoc_seq', (select max(_Assoc_key) + 1 from MGI_Reference_Assoc)) ''', None)
def getQueryResults(i, baseQ, textQ): """ Run SQL for basic fields and extracted text fields, & join them. Return list of records. Each record represents one article w/ its basic fields & its extracted text """ #### get basic reference fields startTime = time.time() refResults = db.sql( baseQ.split(SQLSEPARATOR), 'auto') refRcds = refResults[-1] verbose( "Query %d: %d references retrieved\n" % (i, len(refRcds))) verbose( "SQL time: %8.3f seconds\n\n" % (time.time()-startTime)) #### get extended text parts startTime = time.time() extTextResults = db.sql(textQ.split(SQLSEPARATOR), 'auto') extTextRcds = extTextResults[-1] verbose( "Query %d: %d extracted text rcds retrieved\n" % \ (i, len(extTextRcds))) verbose( "SQL time: %8.3f seconds\n\n" % (time.time()-startTime)) #### join basic fields and extracted text startTime = time.time() verbose( "Joining ref info to extracted text:\n") extTextSet = ExtractedTextSet( extTextRcds ) extTextSet.joinRefs2ExtText( refRcds, allowNoText=True ) verbose( "%8.3f seconds\n\n" % (time.time()-startTime)) return refRcds
def deleteAccession(aKey): print "Deleting _accession_key = %s" % aKey # delete from ACC_AccessionReference first db.sql('''delete from ACC_AccessionReference where _Accession_key = %s''' % aKey, None) db.sql('''delete from ACC_Accession where _Accession_key = %s''' % aKey, None)
def load ( self, markerType ): #------------------------------------------------------------------ # INPUTS: # markerType : (markerType) or None # OUTPUTS: none # ASSUMES: # - That db.sql() can access needed environment variables in order # to determine a default server/database. # SIDE EFFECTS: populates the class's primary, internal data members # from the default MGD instance. # EXCEPTIONS: # COMMENTS: #------------------------------------------------------------------ # for significant marker info qryS = 'select a.accID as sid, m.symbol, m._Marker_key ' qryF = 'from ACC_Accession a, MRK_Marker m ' qryW = 'where a._MGIType_key = 2 ' + \ 'and a._LogicalDB_key = %u ' % (SEQDB) + \ 'and a._Object_key = m._Marker_key ' + \ 'and m._Organism_key = 1' if markerType: qryF = qryF + ', MRK_Types t ' qryW = qryW + ' and m._Marker_Type_key = t._Marker_Type_key ' + \ 'and t.name = \'%s\' ' % (markerType) sql = qryS + qryF + qryW print sql db.sql ( qryS + qryF + qryW, self.sidParser ) return
def cdnas(): fp.write('cDNAs:' + 2*CRT) # # number of mouse cDNAs # db.sql('select _Source_key into temporary table mussource from PRB_Source where _Organism_key = 1', None) db.sql('create index mussource_idx on mussource(_Source_key)', None) db.sql('''select p._Probe_key into temporary table cdnas from mussource s, PRB_Probe p where s._Source_key = p._Source_key and p._SegmentType_key = 63468 ''', None) db.sql('create index cdnas_idx on cdnas(_Probe_key)', None) results = db.sql('select count(_Probe_key) as ccount from cdnas', 'auto') for r in results: fp.write('mouse cDNAs : ' + str(r['ccount']) + CRT) # # number of markers curated to mouse cDNAs # results = db.sql(''' select count(distinct(pm._Marker_key)) as mcount from cdnas c, PRB_Marker pm where c._Probe_key = pm._Probe_key ''', 'auto') for r in results: fp.write('Markers curated to cDNAs: ' + str(r['mcount']) + CRT)
def verifyMode(): ''' # requires: # # effects: # Verifies the processing mode is valid. If it is not valid, # the program is aborted. # Sets globals based on processing mode. # Deletes data based on processing mode. # # returns: # nothing # ''' global DEBUG if mode == 'load': DEBUG = 0 db.sql('delete from MGI_Note where _MGIType_key = %s and _NoteType_key = %s' % (objectTypeKey, noteTypeKey), None) elif mode == 'incremental': DEBUG = 0 elif mode == 'preview': DEBUG = 1 else: exit(1, 'Invalid Processing Mode: %s\n' % (mode))
def miscellaneous(): # # alias # fp.write('#\n# Alias\n#\n') results = db.sql(''' select alias, cdate = convert(char(10), creation_date, 101) from MRK_Alias_View where _Marker_key = %s order by alias ''' % (markerKey), 'auto') for r in results: fp.write(string.ljust(r['alias'],30) + TAB) fp.write(string.ljust(r['cdate'],15) + CRT) # # synonym # fp.write('#\n# Synonym\n#\n') results = db.sql(''' select synonym, cdate = convert(char(10), creation_date, 101) from MGI_Synonym where _MGIType_key = 2 and _Object_key = %s ''' % (markerKey), 'auto') for r in results: fp.write(string.ljust(r['synonym'],30) + TAB) fp.write(string.ljust(r['cdate'],15) + CRT)
def loadFileSource(file, tempno): # # Make sure the input file exists and open it. # if (not os.path.exists(file)): print("Input file does not exist: %s" % file) return 1 inFile = open(file, 'r') # # Create a temp table to load the cluster set into. # create_stmt = "create temporary table cluster_set%d " % tempno + \ "(cid varchar(30), cmid varchar(30))" db.sql(create_stmt, None) # # Build the SQL statement that will be used to insert the clusters # into the temp table. # insert_stmt = "insert into cluster_set%d " % tempno + \ "values ('%s', '%s')" # # Loop through each record in the input file and load the clusters. # for line in inFile.readlines(): [cid, cmid] = line[:-1].split('\t') db.sql(insert_stmt % (cid, cmid), None) inFile.close() return 0
def processFile(): lineNum = 0 # For each line in the input file for line in inputFile.readlines(): lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: probeID = tokens[0] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) probeKey = loadlib.verifyObject(probeID, mgiTypeKey, None, lineNum, errorFile) if probeKey == 0: continue if DEBUG: print deleteSQL % (probeKey) continue db.sql(deleteSQL % (probeKey), None)
def bcpFiles(): # requires: # # effects: # BCPs the data into the database # # returns: # nothing # bcpdelim = "|" if DEBUG or not bcpon: return refFile.close() bcp1 = 'cat %s | bcp %s..%s in %s -c -t\"%s" -S%s -U%s' \ % (passwordFileName, db.get_sqlDatabase(), \ 'MGI_Reference_Assoc', refFileName, bcpdelim, db.get_sqlServer(), db.get_sqlUser()) diagFile.write('%s\n' % bcp1) os.system(bcp1) # update mgi_reference_assoc_seq auto-sequence db.sql(''' select setval('mgi_reference_assoc_seq', (select max(_Assoc_key) + 1 from MGI_Reference_Assoc)) ''', None)
def processB6(): print('Processing B6') db.sql('''-- get b6 strain/markers and their accids select a.accid as b6ID, a._Object_key as _StrainMarker_key into temporary table b6Ids from ACC_Accession a where a._MGIType_key = 44 --MRK_StrainMarker and a._LogicalDB_key = 212 --MGI B6 and a.preferred = 1''', None) db.sql('''create index idx1 on b6Ids(b6ID)''', None) results = db.sql('''-- get the biotypes for the B6 gene models select b6.b6ID, sgm.rawBiotype as biotype from b6Ids b6, ACC_Accession a, SEQ_GeneModel sgm where b6.b6ID = a.accid and a._MGIType_key = 19 and a._LogicalDB_key = 212 --MGI B6 and a.preferred = 1 and a._Object_key = sgm._Sequence_key''', 'auto') for r in results: b6ID = r['b6ID'] biotype = r['biotype'] # actually the feature type if biotype not in featureTypeLookup: print('Cannot resolve B6: %s to Feature Type' % biotype) continue mcvID = featureTypeLookup[biotype] fpB6AnnotFile.write('%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s' % (mcvID, TAB, b6ID, TAB, B6_JNUM, TAB, EVIDENCE, TAB, B6_JNUM, TAB, QUALIFIER, TAB, EDITOR, TAB, DATE, TAB, NOTES, TAB, B6_LDBNAME, CRT)) return 0
def getDagKey ( dag, # str. corresponds to DAG_DAG.name vocab = None # str.vocab name or integer vocab key ): # Purpose: return the _DAG_key for the given 'dag' name # Returns: integer # Assumes: nothing # Effects: queries the database # Throws: 1. error if the given 'dag' is not in the database; # 2. propagates any exceptions raised by db.sql() # Notes: Since the DAG_DAG.name field is does not require unique # values, we may also need to know which 'vocab' it relates to. if vocab: if type(vocab) == str: vocab = getVocabKey (vocab) result = db.sql ('''select dd._DAG_key from DAG_DAG dd, VOC_VocabDAG vvd where dd._DAG_key = vvd._DAG_key and vvd._Vocab_key = %d and dd.name = \'%s\'''' % (vocab, dag)) else: result = db.sql ('''select _DAG_key from DAG_DAG where name = \'%s\'''' % dag) if len(result) != 1: raise VocloadlibError(unknown_dag % dag) return result[0]['_DAG_key']
def updateDatabase(cmds): dbServer = os.environ['MGD_DBSERVER'] dbName = os.environ['MGD_DBNAME'] dbUser = os.environ['MGD_DBUSER'] dbPasswordFile = os.environ['MGD_DBPASSWORDFILE'] dbPassword = string.strip(open(dbPasswordFile, 'r').readline()) db.set_sqlLogin(dbUser, dbPassword, dbServer, dbName) # process in batches of 100 total = len(cmds) try: db.useOneConnection(1) while cmds: print('Current running time (secs): %s' % (time.time() - STARTTIME)) db.sql(cmds[:100], 'auto') cmds = cmds[100:] db.useOneConnection(0) except: bailout('Failed during database updates') print('Processed %d updates to SEQ_Sequence._SequenceStatus_key' % total) print('Total running time (secs): %s' % (time.time() - STARTTIME)) return
def processMGP(): print('Processing MGP') db.sql('''-- get mgp strain/markers and their accids select a.accid as mgpID, a._Object_key as _StrainMarker_key into temporary table mgpIds from ACC_Accession a where a._MGIType_key = 44 --MRK_StrainMarker and a._LogicalDB_key = 209 --MGP and a.preferred = 1''', None) db.sql('''create index idx2 on mgpIds(mgpID)''', None) results = db.sql('''-- get the biotypes for the MGP gene models select mgp.mgpID, sgm.rawBiotype as biotype from mgpIds mgp, ACC_Accession a, SEQ_GeneModel sgm where mgp.mgpID = a.accid and a._MGIType_key = 19 and a._LogicalDB_key = 209 --MGP and a.preferred = 1 and a._Object_key = sgm._Sequence_key''', 'auto') for r in results: mgpID = r['mgpID'] biotype = r['biotype'] # actually the feature type if biotype not in featureTypeLookup: print('Cannot resolve MGP: %s to Feature Type' % biotype) continue mcvID = featureTypeLookup[biotype] fpMgpAnnotFile.write('%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s' % (mcvID, TAB, mgpID, TAB, MGP_JNUM, TAB, EVIDENCE, TAB, MGP_JNUM, TAB, QUALIFIER, TAB, EDITOR, TAB, DATE, TAB, NOTES, TAB, MGP_LDBNAME, CRT)) return 0
def loadDictionaries(): ''' # requires: # # effects: # loads global dictionaries/lists: chromosomeList for lookup # # returns: # nothing ''' global chromosomeList, assayDict, inputChrList results = db.sql('''select chromosome from MRK_Chromosome where _Organism_key = 1 and chromosome not in ('UN') order by sequenceNum''', 'auto') for r in results: chromosomeList.append(r['chromosome']) results = db.sql('select * from MLD_Assay_Types', 'auto') for r in results: assayDict[r['description']] = r['_Assay_Type_key'] # create unique list of chromosomes from input file for line in inputFile.readlines(): tokens = string.split(line[:-1], '|') chromosome = tokens[1] if chromosome not in inputChrList: inputChrList.append(chromosome) inputFile.close()
def make_datasets(): db.connect() for dataset in db.sql("select * from dataset limit 10", as_dict=True): # set group group = db.sql( "select `group` from dataset_group where dataset=%s limit 1", dataset["name"])[0][0] dataset["group_info"] = db.sql("select * from `group` where name=%s", group, as_dict=True)[0] # other libs dataset["json"] = json dataset["public_path"] = "../" # all paths relative to # make page jenv = Environment(loader=FileSystemLoader("templates")) html = jenv.get_template("dataset.html").render(dataset) fname = re.sub(r'\W+', '', dataset["name"].lower()).replace( " ", "_") + ".html" with open(os.path.join("public", "datasets", fname), "w") as htmlfile: htmlfile.write(html) db.conn.execute("update dataset set html_filename=%s where name=%s", (fname, dataset["name"]))
def getPrimaryKeys(): ''' # requires: # # effects: # get/store next primary keys # # returns: # nothing # ''' global exptKey, accKey, mgiKey results = db.sql('''select max(_Expt_key) + 1 as maxKey from MLD_Expts''', 'auto') if results[0]['maxKey'] is None: exptKey = 1000 else: exptKey = results[0]['maxKey'] results = db.sql('''select max(_Accession_key) + 1 as maxKey from ACC_Accession''', 'auto') if results[0]['maxKey'] is None: accKey = 1000 else: accKey = results[0]['maxKey'] results = db.sql('''select maxNumericPart + 1 as maxKey from ACC_AccessionMax where prefixPart = '%s' ''' % (mgiPrefix), 'auto') mgiKey = results[0]['maxKey']
def executeBCP(): ''' # requires: # # effects: # BCPs the data into the database # # returns: # nothing # ''' synFile.close() db.commit() bcpCommand = os.environ['PG_DBUTILS'] + '/bin/bcpin.csh' bcp1 = '%s %s %s %s %s %s "|" "\\n" mgd' % \ (bcpCommand, db.get_sqlServer(), db.get_sqlDatabase(), 'MGI_Synonym', datadir, 'MGI_Synonym.bcp') diagFile.write('%s\n' % bcp1) os.system(bcp1) # update mgi_synonym_seq auto-sequence db.sql(''' select setval('mgi_synonym_seq', (select max(_synonym_key) from MGI_Synonym)) ''', None) db.commit()
def init(): # Purpose: Initialization of database connection and file descriptors, # create database lookup dictionaries; create dictionary from # input file # Returns: 1 if file descriptors cannot be initialized # Assumes: Nothing # Effects: opens a database connection # Throws: Nothing global egToMarkerDict, mgiToMarkerDict global fpInFile, fpClustererFile, fpLoadFile, fpQcRpt user = os.environ['MGD_DBUSER'] passwordFileName = os.environ['MGD_DBPASSWORDFILE'] db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) try: fpInFile = open(inFilePath, 'r') except: exit('Could not open file for reading %s\n' % inFilePath) try: fpLoadFile = open(loadFilePath, 'w') except: exit('Could not open file for writing %s\n' % loadFilePath) try: fpQcRpt = open(qcRptPath, 'w') except: exit('Could not open file for writing %s\n' % qcRptPath) # Create lookup of homology IDs to their marker keys results = db.sql( '''select a.accid, a._object_key as markerKey, m._organism_key from acc_accession a, mrk_marker m where a._mgitype_key = 2 and a._logicalDB_key in (47, 64, 172) and a._object_key = m._marker_key and m._marker_status_key = 1''', 'auto') for r in results: #print('hMrkID: %s orgKey: %s hMrkKey: %s' % (r['accid'], int(r['_organism_key']), int(r['markerKey']) )) homologyLookup[r['accid']] = [ int(r['_organism_key']), int(r['markerKey']) ] # Create lookup of mouse MGI IDs to their marker keys results = db.sql( '''select a.accid, a._object_key as markerKey from acc_accession a, mrk_marker m where a._mgitype_key = 2 and a._logicalDB_key = 1 and a.prefixPart = 'MGI:' and a._object_key = m._marker_key and m._marker_status_key = 1''', 'auto') for r in results: mouseLookup[r['accid']] = r['markerKey'] return
def qtl2(): fp2.write('QTL markers that have mapping but no allele associated (yes/no)\n') fp2.write('mapping = yes/alleles = no\n') results = db.sql('''%s and not exists (select 1 from MRK_Notes n where m._Marker_key = n._Marker_key) and exists (select 1 from MLD_Expt_Marker mld where m._Marker_key = mld._Marker_key) and not exists (select 1 from ALL_Allele al where m._Marker_key = al._Marker_key and al.isWildType = 0) order by symbol ''' % (query1), 'auto') for r in results: fp2.write(r['mgiID'] + TAB) fp2.write(mgi_utils.prvalue(r['refID']) + TAB) fp2.write(r['symbol'] + TAB) fp2.write(r['name'] + TAB) fp2.write(CRT) fp2.write(CRT + '(%d rows affected)' % (len(results)) + CRT) fp2.write('\n\nQTL References with map records that have QTL associated w/o Alleles:\n\n') results = db.sql('''%s and not exists (select 1 from MRK_Notes n where m._Marker_key = n._Marker_key) and exists (select 1 from MLD_Expt_Marker mld where m._Marker_key = mld._Marker_key) and not exists (select 1 from ALL_Allele al where m._Marker_key = al._Marker_key and al.isWildType = 0) order by numericPart ''' % (query2), 'auto') for r in results: fp2.write(mgi_utils.prvalue(r['refID']) + CRT) fp2.write(CRT + '(%d rows affected)' % (len(results)) + CRT)
def createVocabulary ( vocabName, # string; name of vocabulary to be created refsKey, # integer; reference for the vocabulary ldbKey # integer; logical db for the vocabulary ): # Purpose: create a new vocabulary with the given 'vocabName' # Returns: integer _Vocab_key for the new vocabulary # Assumes: nobody is inserting records into VOC_Vocab while this # function is running # Modifies: writes to the VOC_Vocab table in the database # Throws: propagates SystemExit if problems occur # find the current highest vocab key cmd1 = 'SELECT MAX(_Vocab_key) FROM VOC_Vocab' results = db.sql (cmd1, 'auto') if not results: vocabKey = 1 else: vocabKey = results[0][''] + 1 # add a new record for the new vocabulary cmd2 = '''INSERT VOC_Vocab (_Vocab_key, _Refs_key, _LogicalDB_key, isSimple, isPrivate, name) VALUES (%d, %d, %d, %d, %d, '%s')''' % (vocabKey, refsKey, ldbKey, 1, 0, vocabName) try: results = db.sql (cmd2, 'auto') except: bailout ('Cannot create new vocabulary "%s" as key %d' % \ (vocabName, vocabKey)) return vocabKey
def bcpFiles(): for r in execSQL: diagFile.write(r + '\n') if DEBUG or not bcpon: return notesFile.close() db.commit() # execute the sql deletions for r in execSQL: db.sql(r, None) db.commit() bcp1 = bcpCommand % (notesTable, notesFileName) for bcpCmd in [bcp1]: diagFile.write('%s\n' % bcpCmd) os.system(bcpCmd) db.commit() return
def bcpFiles( recordsProcessed # number of records processed (integer) ): global referenceKey if DEBUG or not bcpon: return outImageFile.close() outPaneFile.close() outAccFile.close() outCopyrightFile.close() outCaptionFile.close() db.commit() bcp1 = bcpCommand % (imageTable, iFileName) bcp2 = bcpCommand % (paneTable, pFileName) bcp3 = bcpCommand % (accTable, aFileName) for bcpCmd in [bcp1, bcp2, bcp3]: diagFile.write('%s\n' % bcpCmd) os.system(bcpCmd) # update the max Accession ID value db.sql('''select * from ACC_setMax (%d)''' % (recordsProcessed), None) db.commit() return
def updateMarkerType (): for mgiID in markersToUpdateDict: typeTerm = markersToUpdateDict[mgiID] mrkTypeKey = mkrTypeToKeyDict[typeTerm] results = db.sql(MARKER_KEY % mgiID, 'auto') mrkKey = results[0]['_Marker_key'] db.sql(UPDATE % (mrkTypeKey, updatedByKey, mrkKey), None) db.commit()
def updateMarkerType(): for mgiID in markersToUpdateDict: typeTerm = markersToUpdateDict[mgiID] mrkTypeKey = mkrTypeToKeyDict[typeTerm] results = db.sql(MARKER_KEY % mgiID, 'auto') mrkKey = results[0]['_Marker_key'] db.sql(UPDATE % (mrkTypeKey, updatedByKey, mrkKey), None) db.commit()
def updateAll(): """ Update all the annotation extension display notes """ # drop existing notes cmd = ''' delete from mgi_note where _notetype_key = %d ''' % DISPLAY_NOTE_TYPE_KEY db.sql(cmd, None) # get _note_key to use for inserts startingNoteKey = _queryMaxNoteKey() + 1 # begin batch processing batchSize = 10000 offset = 0 properties = _queryAnnotExtensions(limit=batchSize, offset=offset) providerLinkMap = _queryProviderLinkMap() noteFile = open(NOTE_BCP_FILE, 'w') chunkFile = open(NOTECHUNK_BCP_FILE, 'w') try: while properties: # setup the lookups for IDs to display values _createTempIDTable(properties) termIDMap = _queryTermIDMap() markerIDMap = _queryMarkerIDMap() # transform the properties to their display/links properties = transformProperties(properties, termIDMap, markerIDMap, providerLinkMap) # write BCP files _writeToBCPFile(properties, noteFile, chunkFile, startingNoteKey) # fetch new batch of properties startingNoteKey += batchSize offset += batchSize properties = _queryAnnotExtensions(limit=batchSize, offset=offset) finally: noteFile.close() chunkFile.close() # insert the new data db.bcp(NOTE_BCP_FILE, 'MGI_Note') db.bcp(NOTECHUNK_BCP_FILE, 'MGI_NoteChunk')
def writeAccBCP(): ''' # requires: # # effects: # Creates approrpriate BCP records # # returns: # nothing # ''' global accKey, userKey # records that require a reference results = db.sql('select _Object_key, _LogicalDB_key, accID, private, geneID ' + \ 'from WRK_EntrezGene_Bucket0 ' + \ 'where taxID = %s and refRequired = 1 ' % (taxId), 'auto') for r in results: if r['_Object_key'] == -1: objectKey = geneIDtoMarkerKey[r['geneID']] else: objectKey = r['_Object_key'] prefixPart, numericPart = accessionlib.split_accnum(r['accID']) accFile.write( '%d|%s|%s|%s|%d|%d|%s|%d|1|%s|%s|%s|%s\n' % (accKey, r['accID'], mgi_utils.prvalue(prefixPart), mgi_utils.prvalue(numericPart), r['_LogicalDB_key'], objectKey, mgiTypeKey, r['private'], userKey, userKey, loaddate, loaddate)) accrefFile.write( '%d|%s|%s|%s|%s|%s\n' % (accKey, referenceKey, userKey, userKey, loaddate, loaddate)) accKey = accKey + 1 # records that don't require a reference results = db.sql('select _Object_key, _LogicalDB_key, accID, private, geneID ' + \ 'from WRK_EntrezGene_Bucket0 ' + \ 'where taxID = %s and refRequired = 0' % (taxId), 'auto') for r in results: if r['_Object_key'] == -1: objectKey = geneIDtoMarkerKey[r['geneID']] else: objectKey = r['_Object_key'] prefixPart, numericPart = accessionlib.split_accnum(r['accID']) accFile.write( '%d|%s|%s|%s|%d|%d|%s|%d|1|%s|%s|%s|%s\n' % (accKey, r['accID'], mgi_utils.prvalue(prefixPart), mgi_utils.prvalue(numericPart), r['_LogicalDB_key'], objectKey, mgiTypeKey, r['private'], userKey, userKey, loaddate, loaddate)) accKey = accKey + 1
def init(): # Purpose: Initialization of database connection and file descriptors, # and next available database keys # Returns: 1 if file descriptors cannot be initialized # Assumes: Nothing # Effects: opens a database connection # Throws: Nothing global fpInFile, fpClusterBCP, fpMemberBCP, fpAccessionBCP global nextClusterKey, nextMemberKey, nextAccessionKey # create file descriptors for input/output files try: fpInFile = open(inFile, 'r') except: exit(1, 'Could not open file %s\n' % inFile) try: fpClusterBCP = open(clusterBCP, 'w') except: exit(1, 'Could not open file %s\n' % clusterBCP) try: fpMemberBCP = open(memberBCP, 'w') except: exit(1, 'Could not open file %s\n' % memberBCP) try: fpAccessionBCP = open(accessionBCP, 'w') except: exit(1, 'Could not open file %s\n' % accessionBCP) # get next ACC_Accession, MRK_Cluster and MRK_ClusterMember key user = os.environ['MGD_DBUSER'] passwordFileName = os.environ['MGD_DBPASSWORDFILE'] db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) results = db.sql('''select max(_Cluster_key) + 1 as nextKey from MRK_Cluster''', 'auto') if results[0]['nextKey'] is None: nextClusterKey = 1000 else: nextClusterKey = results[0]['nextKey'] results = db.sql('''select max(_ClusterMember_key) + 1 as nextKey from MRK_ClusterMember''', 'auto') if results[0]['nextKey'] is None: nextMemberKey = 1000 else: nextMemberKey = results[0]['nextKey'] results = db.sql('''select max(_Accession_key) + 1 as nextKey from ACC_Accession''', 'auto') nextAccessionKey = results[0]['nextKey'] return
def setPrimaryKeys(): global refKey, aliasKey results = db.sql('select maxKey = max(_Reference_key) + 1 from PRB_Reference', 'auto') refKey = results[0]['maxKey'] results = db.sql('select maxKey = max(_Alias_key) + 1 from PRB_Alias', 'auto') aliasKey = results[0]['maxKey']
def processByAllele(objectKey): # Purpose: processes data for a specific Allele global deleteSQL db.sql(querySQL1 + " and aa._Allele_key = " + objectKey, None) db.sql(querySQL2 + " and aa._Allele_key = " + objectKey, None) deleteSQL = deleteSQLAllele % (objectKey) process('sql')
def bcpFiles(): # Purpose: BCPs the data into the database # Returns: 1 if error, else 0 # Assumes: connection to the database # Effects: copies data into the db # Throws: Nothing if DEBUG == 'true': return 0 closeFiles() bcpI = '%s %s %s' % (BCP_COMMAND, db.get_sqlServer(), db.get_sqlDatabase()) bcpII = '"|" "\\n" mgd' bcp1 = '%s %s "/" %s %s' % (bcpI, alleleTable, alleleFileName, bcpII) bcp2 = '%s %s "/" %s %s' % (bcpI, mutationTable, mutationFileName, bcpII) bcp3 = '%s %s "/" %s %s' % (bcpI, refTable, refFileName, bcpII) bcp4 = '%s %s "/" %s %s' % (bcpI, accTable, accFileName, bcpII) bcp5 = '%s %s "/" %s %s' % (bcpI, noteTable, noteFileName, bcpII) bcp6 = '%s %s "/" %s %s' % (bcpI, annotTable, annotFileName, bcpII) db.commit() for bcpCmd in [bcp1, bcp2, bcp3, bcp4, bcp5, bcp6]: fpDiagFile.write('%s\n' % bcpCmd) os.system(bcpCmd) # update all_allele_mutation_seq auto-sequence db.sql( ''' select setval('all_allele_mutation_seq', (select max(_Assoc_key) from ALL_Allele_Mutation)) ''', None) # update all_allele_seq auto-sequence db.sql( ''' select setval('all_allele_seq', (select max(_Allele_key) from ALL_Allele)) ''', None) # update mgi_reference_assoc auto-sequence db.sql( ''' select setval('mgi_reference_assoc_seq', (select max(_Assoc_key) + 1 from MGI_Reference_Assoc)) ''', None) # update mgi_note_seq auto-sequence db.sql( ''' select setval('mgi_note_seq', (select max(_Note_key) from MGI_Note)) ''', None) # update voc_annot_seq auto-sequence db.sql( ''' select setval('voc_annot_seq', (select max(_Annot_key) from VOC_Annot)) ''', None) db.commit() return 0
def createExcluded(): excludeNote = 'The source of the material used to create this cDNA probe was different than that used to create the GenBank sequence record.' print 'excluded begin...%s' % (mgi_utils.date()) db.sql('''select _Probe_key INTO TEMPORARY TABLE excluded from PRB_Notes where note like 'The source of the material used to create this cDNA probe was different%' ''', None) db.sql('create index idx1 on excluded(_Probe_key)', None) print 'excluded end...%s' % (mgi_utils.date())
def processByAssay(objectKey): # Purpose: processes data for a specific Allele global deleteSQL db.sql(querySQL1 + " and e._Assay_key = " + objectKey, None) db.sql(querySQL2 + " and 0 = 1", None) deleteSQL = deleteSQLAssay % (objectKey) isQuerySQL2 = 0 process('sql')
def processFile(): ''' # requires: # # effects: # Reads input file # Writes output file # # returns: # nothing # ''' # For each line in the input file for line in inputFile.readlines(): if line[0] == '!': continue tokens = string.split(line[:-1], delim) try: if parseType == 'Library': badName = tokens[0] goodName = tokens[2] else: badName = tokens[1] goodName = tokens[2] except: errorFile.write('Invalid line: %s\n' % (line)) continue if parseType == 'Tissues': results = db.sql('select _Tissue_key from PRB_Tissue where tissue = "%s"' % (goodName), 'auto') elif parseType == 'Cell': results = db.sql('select term from VOC_Term where term = "%s"' % (goodName), 'auto') elif parseType == 'Library': results = db.sql('select _Source_key from PRB_Source where name = "%s"' % (goodName), 'auto') elif parseType == 'Strains': results = db.sql('select a.accID from PRB_Strain_Acc_View a, PRB_Strain s ' + \ 'where s.strain = "%s" ' % (goodName) + \ 'and s._Strain_key *= a._Object_key ' + \ 'and a._LogicalDB_key = 1 ' + \ 'and a.prefixPart = "MGI:" ' + \ 'and a.preferred = 1', 'auto') if len(results) > 0 and badName != goodName: if parseType == 'strain': outputFile.write(mgi_utils.prvalue(results[0]['accID']) + delim + goodName + delim + badName + delim + createdBy + '\n') else: outputFile.write(delim + goodName + delim + badName + delim + createdBy + '\n') elif len(results) == 0: errorFile.write('Invalid good name: %s\n' % (goodName))
def setPrimaryKeys(): global refKey, aliasKey results = db.sql( 'select max(_Reference_key) + 1 as maxKey from PRB_Reference', 'auto') refKey = results[0]['maxKey'] results = db.sql('select max(_Alias_key) + 1 as maxKey from PRB_Alias', 'auto') aliasKey = results[0]['maxKey']
def verifyGenotype(): global testPassed query = ''' select ap._Marker_key, ap._Allele_key_1, ap._Allele_key_2, ap._MutantCellLine_key_1, ap._MutantCellLine_key_2, ap._PairState_key into #allelepair from GXD_Genotype g, GXD_AllelePair ap, MGI_User u where g._Genotype_key = ap._Genotype_key and g._CreatedBy_key = u._User_key and u.login = '******' group by _Marker_key, _Allele_key_1, _Allele_key_2, _MutantCellLine_key_1, _MutantCellLine_key_2, _PairState_key having count(*) > 1 ''' % (createdby) #print query db.sql(query, 'None') query2 = ''' select a.symbol as alleleSymbol, ma.accID as markerID, aa.accID as alleleID, mcl.cellLine as mutantID, t.term as alleleState from #allelepair ap, ALL_Allele a, ACC_Accession ma, ACC_Accession aa, ALL_CellLine mcl, VOC_Term t where ap._Allele_key_1 = a._Allele_key and ap._Marker_key = ma._Object_key and ma._MGIType_key = 2 and ma._LogicalDB_key = 1 and ap._Allele_key_1 = aa._Object_key and aa._MGIType_key = 11 and aa._LogicalDB_key = 1 and ap._MutantCellLine_key_1 = mcl._CellLine_key and ap._PairState_key = t._Term_key ''' #print query2 results = db.sql(query2, 'auto') if len(results) == 0: testPassed = 'pass' fpLogTest.write(testDisplay % \ (testPassed, 'duplicate genotypes', '', '', '', '', '', '', '', '', '', '')) else: for r in results: alleleSymbol = r['alleleSymbol'] markerID = r['markerID'] alleleID = r['alleleID'] mutantID = r['mutantID'] alleleStatus = r['alleleState'] fpLogTest.write(testDisplay % \ (testPassed, 'duplicate genotypes', '', '', \ alleleSymbol, markerID, alleleID, mutantID, \ alleleState, '', '', '', '')) return 0
def bcpFiles(): ''' # requires: # # effects: # BCPs the data into the database # # returns: # nothing # ''' exptFile.close() exptMarkerFile.close() accFile.close() noteFile.close() db.commit() bcpCommand = os.environ['PG_DBUTILS'] + '/bin/bcpin.csh' currentDir = os.getcwd() cmd1 = '%s %s %s %s %s %s "%s" "\\n" mgd' % \ (bcpCommand, db.get_sqlServer(), db.get_sqlDatabase(), 'MLD_Expts', currentDir, exptFileName, bcpdelim) cmd2 = '%s %s %s %s %s %s "%s" "\\n" mgd' % \ (bcpCommand, db.get_sqlServer(), db.get_sqlDatabase(), 'MLD_Expt_Marker', currentDir, exptMarkerFileName, bcpdelim) cmd3 = '%s %s %s %s %s %s "%s" "\\n" mgd' % \ (bcpCommand, db.get_sqlServer(), db.get_sqlDatabase(), 'ACC_Accession', currentDir, accFileName, bcpdelim) cmd4 = '%s %s %s %s %s %s "%s" "\\n" mgd' % \ (bcpCommand, db.get_sqlServer(), db.get_sqlDatabase(), 'MLD_Notes', currentDir, noteFileName, bcpdelim) diagFile.write('%s\n' % cmd1) diagFile.write('%s\n' % cmd2) diagFile.write('%s\n' % cmd3) diagFile.write('%s\n' % cmd4) os.system(cmd1) os.system(cmd2) os.system(cmd3) os.system(cmd4) # update mld_expts_seq auto-sequence db.sql( ''' select setval('mld_expts_seq', (select max(_Expt_key) from MLD_Expts)) ''', None) db.commit() # update mld_expt_marker_seq auto-sequence db.sql( ''' select setval('mld_expt_marker_seq', (select max(_Assoc_key) from MLD_Expt_Marker)) ''', None) db.commit()
def deleteByUser(): # Purpose: delete records created by current load # Returns: nothing # Assumes: a connection has been made to the database # Effects: deletes records from a database # Throws: nothing print '%s' % mgi_utils.date() print 'Deleting records for this user' db.sql('''delete from %s where _CreatedBy_key = %s''' % (table, CREATEDBY_KEY), None) db.commit()
def init (): global markerToUniprotLookup global markerLookup openFiles() # load lookups # lookup of existing uniprot load associations results = db.sql('''select a1.accid as uniprotID, a1._LogicalDB_key, m.symbol, a2.accid as mgiID from ACC_Accession a1, MRK_Marker m, ACC_Accession a2 where a1. _MGIType_key = 2 and a1._LogicalDB_key in (13, 41) and a1._CreatedBy_key = 1442 /*uniprotload_assocload*/ and a1._Object_key = m._Marker_key and m._Organism_key = 1 and m._Marker_Status_key = 1 and m._Marker_key = a2._Object_key and a2. _MGIType_key = 2 and a2._LogicalDB_key = 1 and a2.preferred = 1 and a2.prefixPart = 'MGI:' ''', 'auto') for r in results: a = Association() uniprotID = string.lower(r['uniprotID']) a.uniprotID = uniprotID mgiID = string.lower(r['mgiID']) a.mgiID = mgiID a.logicalDbKey = r['_LogicalDB_key'] # swiss-prot or trembl if not markerToUniprotLookup.has_key(mgiID): markerToUniprotLookup[mgiID] = [] markerToUniprotLookup[mgiID].append(a) # load lookup of all marker MGI IDs results = db.sql('''select m.symbol, m._Organism_key, m._Marker_Status_key, a.accid as mgiID, a.preferred from ACC_Accession a, MRK_Marker m where a. _MGIType_key = 2 and a._LogicalDB_key = 1 and a.prefixPart = 'MGI:' and a._Object_key = m._Marker_key ''', 'auto') for r in results: m = Marker() m.markerID = string.lower(r['mgiID']) m.organism = r['_Organism_Key'] m.markerStatus = r['_Marker_Status_key'] m.markerPreferred = r['preferred'] markerLookup[m.markerID] = m return
def processCommandLine(): if len(sys.argv) != 3: bailout('Incorrect command-line; need two parameters.') db.set_sqlServer(sys.argv[1]) db.set_sqlDatabase(sys.argv[2]) db.useOneConnection(1) try: db.sql('select count(1) from MGI_dbInfo', 'auto') except: bailout('Cannot query database %s..%s' % (sys.argv[1], sys.argv[2])) return
def runSQL(self, sql, label): """ Run an SQL stmt and return results sql is list of SQLstmts or a single stmt (string) """ startTime = time.time() verbose(label + '...') if type(sql) == type(''): results = db.sql(sql.split(self.SQLSEPARATOR), 'auto') else: results = db.sql(sql, 'auto') verbose("SQL time: %8.3f seconds\n" % (time.time() - startTime)) return results
def deleteByUser(): # Purpose: delete records created by current load # Returns: nothing # Assumes: a connection has been made to the database # Effects: deletes records from a database # Throws: nothing print('%s' % mgi_utils.date()) print('Deleting records for this user') db.sql( '''delete from %s where _CreatedBy_key = %s''' % (table, CREATEDBY_KEY), None) db.commit()
def processSusceptibility(): # do formatted file doFileName = None # do file pointer doFile = None # insert statement INSERT_ACCESSION = '''insert into ACC_Accession values ((select max(_Accession_key) + 1 from ACC_Accession), '%s', '%s', %s, 15, %s, 13, 0, 0) ''' doFileName = os.environ['OBO_FILE'] doFile = open(doFileName, 'r') omimIdValue = 'id: OMIM:' relValue = 'relationship: RO:0003304' skipValue = 'OMIM:000000' foundOMIM = 0 for line in doFile.readlines(): # find [Term] # find relationship: RO:0003304 if line == '[Term]': foundOMIM = 0 elif line[:9] == omimIdValue: omimId = line[4:-1] if omimId == skipValue: continue foundOMIM = 1 elif foundOMIM and line[:24] == relValue: tokens = line[25:-1].split(' ') doId = tokens[0] prefixPart, numericPart = accessionlib.split_accnum(omimId) objectKey = loadlib.verifyObject(doId, 13, None, None, None) addSQL = INSERT_ACCESSION % (omimId, prefixPart, numericPart, objectKey) db.sql(addSQL, None) else: continue doFile.close() db.commit() return 0
def writeAccBCP(): ''' # requires: # # effects: # Creates approrpriate BCP records # # returns: # nothing # ''' global accKey, userKey # records that require a reference results = db.sql('select _Object_key, _LogicalDB_key, accID, private, geneID ' + \ 'from WRK_EntrezGene_Bucket0 ' + \ 'where taxID = %s and refRequired = 1 ' % (taxId), 'auto') for r in results: if r['_Object_key'] == -1: objectKey = geneIDtoMarkerKey[r['geneID']] else: objectKey = r['_Object_key'] prefixPart, numericPart = accessionlib.split_accnum(r['accID']) accFile.write('%d|%s|%s|%s|%d|%d|%s|%d|1|%s|%s|%s|%s\n' % (accKey, r['accID'], mgi_utils.prvalue(prefixPart), mgi_utils.prvalue(numericPart), r['_LogicalDB_key'], objectKey, mgiTypeKey, r['private'], userKey, userKey, loaddate, loaddate)) accrefFile.write('%d|%s|%s|%s|%s|%s\n' % (accKey, referenceKey, userKey, userKey, loaddate, loaddate)) accKey = accKey + 1 # records that don't require a reference results = db.sql('select _Object_key, _LogicalDB_key, accID, private, geneID ' + \ 'from WRK_EntrezGene_Bucket0 ' + \ 'where taxID = %s and refRequired = 0' % (taxId), 'auto') for r in results: if r['_Object_key'] == -1: objectKey = geneIDtoMarkerKey[r['geneID']] else: objectKey = r['_Object_key'] prefixPart, numericPart = accessionlib.split_accnum(r['accID']) accFile.write('%d|%s|%s|%s|%d|%d|%s|%d|1|%s|%s|%s|%s\n' % (accKey, r['accID'], mgi_utils.prvalue(prefixPart), mgi_utils.prvalue(numericPart), r['_LogicalDB_key'], objectKey, mgiTypeKey, r['private'], userKey, userKey, loaddate, loaddate)) accKey = accKey + 1
def setPrimaryKeys(): global alleleKey, refAssocKey, accKey, noteKey, mgiKey, mutationKey, mutantKey, annotKey results = db.sql(''' select nextval('all_allele_seq') as maxKey ''', 'auto') alleleKey = results[0]['maxKey'] results = db.sql( ''' select nextval('mgi_reference_assoc_seq') as maxKey ''', 'auto') refAssocKey = results[0]['maxKey'] results = db.sql( 'select max(_Accession_key) + 1 as maxKey from ACC_Accession', 'auto') accKey = results[0]['maxKey'] results = db.sql(''' select nextval('mgi_note_seq') as maxKey ''', 'auto') noteKey = results[0]['maxKey'] results = db.sql( ''' select max(maxNumericPart) + 1 as maxKey from ACC_AccessionMax where prefixPart = '%s' ''' % (mgiPrefix), 'auto') mgiKey = results[0]['maxKey'] results = db.sql( ''' select nextval('all_allele_mutation_seq') as maxKey ''', 'auto') mutationKey = results[0]['maxKey'] results = db.sql( ''' select nextval('all_allele_cellline_seq') as maxKey ''', 'auto') mutantKey = results[0]['maxKey'] results = db.sql(''' select nextval('voc_annot_seq') as maxKey ''', 'auto') annotKey = results[0]['maxKey']
def init(): ''' # requires: # # effects: # 1. Processes command line options # 2. Initializes local DBMS parameters # 3. Initializes global file descriptors # # returns: # ''' global accFile, accrefFile, markerFile, diagFile global accKey, userKey, markerKey try: diagFile = open(diagFileName, 'w') except: exit(1, 'Could not open file %s\n' % diagFileName) try: accFile = open(accFileName, 'w') except: exit(1, 'Could not open file %s\n' % accFileName) try: accrefFile = open(accrefFileName, 'w') except: exit(1, 'Could not open file %s\n' % accrefFileName) try: markerFile = open(markerFileName, 'w') except: exit(1, 'Could not open file %s\n' % markerFileName) # # Get next available primary key # results = db.sql( 'select max(_Accession_key) + 1 as maxKey from ACC_Accession', 'auto') accKey = results[0]['maxKey'] results = db.sql(''' select nextval('mrk_marker_seq') as maxKey ''', 'auto') markerKey = results[0]['maxKey'] userKey = loadlib.verifyUser(user, 0, None)
def enroll(): global username global user_directory if request.method == 'POST': data = request.get_json() username = data['username'] email = data['email'] password = data['password'] bank = data['bankName'] # 해당하는 User에서 username 기준으로 비밀번호 암호화 cipher = AES.new( os.getenv("PASSWORD_ECD").encode('utf-8'), AES.MODE_ECB) # never use ECB in strong systems obviously encoded_password = base64.b64encode( cipher.encrypt(password.encode('utf-8').rjust(32))) # 서버에서 파일을 저장하는 경로를 암호화 encoded_path = base64.b64encode( cipher.encrypt(username.encode('utf-8').rjust(32))).decode('utf-8') user_path = removeSpecialChars(encoded_path) user_directory = "Users/" + user_path + "/" if not os.path.exists(user_directory): try: os.makedirs(user_directory) db.sql( "INSERT INTO users (user_id, password, email, pathvoice, isvoice, bank) VALUES (%s, %s, %s, %s, %s, %s)", (username, encoded_password, email, user_path, 1, bank)) print("[ * ] Directory ", username, " Created ...") return "created user" pass except ValueError as error: print(error) return "fail create user" else: # print("[ * ] Directory ", username, " already exists ...") # print("[ * ] Overwriting existing directory ...") # shutil.rmtree(user_directory, ignore_errors=False, onerror=None) # os.makedirs(user_directory) # print("[ * ] Directory ", username, " Created ...") return "user already exists" else: return "fail"
def createBCPFile(): global accKey print 'Create the bcp file for the GENSAT associations' # # Find the marker key that the EntrezGene ID should be associated with. # Do not make an association for any EntrezGene IDs that are on the # discrepancy report. # cmds = [] cmds.append('select t.entrezgeneID, a._Object_key as markerKey ' + \ 'from ' + tempTable + ' t, ACC_Accession a ' + \ 'where lower(t.entrezgeneID) = lower(a.accID) and ' + \ 'a._MGIType_key = ' + str(markerMGITypeKey) + ' and ' + \ 'a._LogicalDB_key = ' + str(egLogicalDBKey) + ' ' + \ 'order by t.entrezgeneID') results = db.sql(cmds, 'auto') count = 0 # # Write the records to the bcp file. # for r in results[0]: entrezgeneID = r['entrezgeneID'] markerKey = r['markerKey'] # # Skip the EntrezGene ID if it was written to the discrepancy report. # if badIDs.has_key(entrezgeneID): continue # # Get the prefix and numeric parts of the EntrezGene ID and write # a record to the bcp file. # (prefixPart, numericPart) = accessionlib.split_accnum(entrezgeneID) fpAccBCPFile.write(str(accKey) + TAB + \ entrezgeneID + TAB + \ prefixPart + TAB + \ str(numericPart) + TAB + \ str(gensatLogicalDBKey) + TAB + \ str(markerKey) + TAB + \ str(markerMGITypeKey) + TAB + \ PRIVATE + TAB + PREFERRED + TAB + \ str(createdByKey) + TAB + \ str(createdByKey) + TAB + \ loadDate + TAB + \ loadDate + NL) count = count + 1 accKey = accKey + 1 print 'Number of GENSAT associations: ' + str(count) return
def doCount( label, q # list of sql stmts. last one being 'select count as num' ): results = db.sql(q, 'auto') num = results[-1][0]['num'] sys.stdout.write("%7d\t%s\n" % (num, label))