def process(): args = getArgs() db.set_sqlServer ( args.host) db.set_sqlDatabase( args.db) db.set_sqlUser ("mgd_public") db.set_sqlPassword("mgdpub") if args.verbose: sys.stderr.write( "Hitting database %s %s as mgd_public\n\n" % \ (args.host, args.db)) startTime = time.time() sys.stdout.write( FIELDSEP.join(OutputColumns) + RECORDSEP ) for i, q in enumerate(getQueries(args)): qStartTime = time.time() results = db.sql( string.split(q, SQLSEPARATOR), 'auto') if args.verbose: sys.stderr.write( "Query %d SQL time: %8.3f seconds\n\n" % \ (i, time.time()-qStartTime)) nResults = writeResults(results[-1]) # db.sql returns list of rslt lists if args.verbose: sys.stderr.write( "%d references processed\n\n" % (nResults) ) if args.verbose: sys.stderr.write( "Total time: %8.3f seconds\n\n" % \ (time.time()-startTime))
def main(): #################### db.set_sqlServer(args.host) db.set_sqlDatabase(args.db) db.set_sqlUser("mgd_public") db.set_sqlPassword("mgdpub") startTime = time.time() if args.option == 'counts': doCounts() else: doSamples() verbose("Total time: %8.3f seconds\n\n" % (time.time() - startTime))
def main(): #################### db.set_sqlServer ( args.host) db.set_sqlDatabase( args.db) db.set_sqlUser ("mgd_public") db.set_sqlPassword("mgdpub") verbose( "Hitting database %s %s as mgd_public\n" % (args.host, args.db)) verbose( "Query option: %s\n" % args.queryKey) startTime = time.time() if args.counts: doCounts(args) else: doSamples(args) verbose( "Total time: %8.3f seconds\n\n" % (time.time()-startTime))
def process (): # Purpose: Main routine of this script # Returns: nothing args = getArgs() notQuiet = not args[ "QUIET"] db.set_sqlServer ( args["DBSERVER"]) db.set_sqlDatabase( args["DBNAME"]) db.set_sqlUser ("MGD_PUBLIC") db.set_sqlPassword("mgdpub") query = SQL % {'year' : '2013'} queries = string.split(query, args[ "SQLSEPARATOR"]) if notQuiet: sys.stderr.write("Running %d SQL command(s) on %s..%s\n" % \ ( len( queries), args[ "DBSERVER"], args[ "DBNAME"]) ) sys.stderr.flush() startTime = time.time() results = db.sql( queries, 'auto') endTime = time.time() if notQuiet: sys.stderr.write( "Total SQL time: %8.3f seconds\n" % \ (endTime-startTime)) sys.stderr.flush() delim = args[ "DELIMITER"] result = results[2] # print column headers sys.stdout.write( string.join( COLUMNS, delim) ) sys.stdout.write( "\n") # print results, one line per row (result), tab-delimited for r in result: vals = [ r[col] for col in COLUMNS ] vals = map( cleanVal, vals) sys.stdout.write( string.join(vals, delim ) ) sys.stdout.write( "\n")
def getPubmedIDs(): args = getArgs() db.set_sqlServer(args.host) db.set_sqlDatabase(args.db) db.set_sqlUser("mgd_public") db.set_sqlPassword("mgdpub") if args.verbose: sys.stderr.write( "Hitting database %s %s as mgd_public\n\n" % \ (args.host, args.db)) queries = string.split(QUERY, SQLSEPARATOR) startTime = time.time() results = db.sql(queries, 'auto') endTime = time.time() if args.verbose: sys.stderr.write( "Total SQL time: %8.3f seconds\n\n" % \ (endTime-startTime)) fp = open(args.outputFile, 'w') fp.write('\t'.join([ 'pubmed', 'haspdf', 'year', 'journal', ]) + '\n') for i, r in enumerate(results[-1]): fp.write('\t'.join([ str(r['pubmed']), str(r['haspdf']), str(r['year']), r['journal'], ]) + '\n') if args.verbose and i % 1000 == 0: # write progress indicator sys.stderr.write('%d..' % i)
""" datatest classes """ import logging import os import db ### Globals ### # Track test failures FAILURES = [] CACHELOADS = set([]) ### initialize database settings ### db.set_sqlUser('mgd_public') db.set_sqlPassword('mgdpub') db.set_sqlServer( os.environ['DATATEST_DBSERVER'] ) db.set_sqlDatabase( os.environ['DATATEST_DBNAME'] ) ### Classes ### class DataTestCase(object): """ datatest Test Case Exposes special assertion methods Tracks and reports failures """ def __init__(self):
args.db = 'prod' else: args.host = args.server + '.jax.org' args.db = args.database return args #----------------------------------- args = getArgs() db.set_sqlServer(args.host) db.set_sqlDatabase(args.db) db.set_sqlUser("mgd_public") db.set_sqlPassword("mgdpub") #----------------------------------- class BaseRefSearch(object): # { """ Is: base class for a reference (article) search from the database Has: all the necessary SQL for the search, the result set, Does: Encapsulates the common SQL for specific searches that return result sets of references and counts/stats for these result sets. """ #################### # SQL fragments used to build up queries #################### SQLSEPARATOR = '||'
def process (): # Purpose: Main routine of this script # Returns: nothing global notQuiet #args = getArgs() # NOT USED YET notQuiet = True #not args[ "QUIET"] db.set_sqlServer ( "PROD1_MGI") #db.set_sqlServer ( "DEV_MGI") db.set_sqlDatabase( "mgd") db.set_sqlUser ( "MGD_PUBLIC") db.set_sqlPassword( "mgdpub") #-- For papers w/ pubmed IDs #-- get their MGD dataset associations for Alleles/Pheno, Expr, GO, Tumor query = """ select a.accid pubmedid, b._refs_key, bd.abbreviation from acc_accession a inner join bib_refs b on (a._mgitype_key = 1 and a._object_key = b._refs_key and a._logicaldb_key = 29) inner join bib_dataset_assoc bda on (b._refs_key = bda._refs_key and bda.isneverused=0) inner join bib_dataset bd on (bda._dataset_key = bd._dataset_key and bd._dataset_key in (1002, 1004, 1005, 1007) ) """ results = runsql( query) if notQuiet: sys.stderr.write("Found %d pubmed to corpora associations\n" % \ len(results) ) # build dict w/ keys = pubmedIDs, values = {_refs_key : 0 } # build dict w/ keys = pubmedIDs, values = { classification : 0 } # If we find a pubmed ID w/ multiple _refs_keys, we remove it from # the second dict because we don't want to collapse multiple # classifications from multiple references onto a single pubmed ID # (there are some pubmedIDs for books/proceedings that are associated # with multiple references, these individual references have # classifications) pm2refs_key = {} pm2classes = {} for rcd in results: pmid = rcd[ "pubmedid"] refs_key = rcd[ "_refs_key"] classification = rcd[ "abbreviation"][0:1] # 1st letter of abbrev if not pm2refs_key.has_key( pmid): # 1st time we've seen this pmid pm2refs_key[ pmid] = { refs_key : 0} pm2classes[ pmid] = { classification : 0} else: # already seen this pmid pm2refs_key[ pmid][ refs_key] = 0 pm2classes[ pmid][ classification] = 0 numIDs = 0 numMultRefs = 0 # loop through pubmed IDs, and output those w/ their classifications for pmid in pm2refs_key.keys(): numIDs = numIDs +1 if len( pm2refs_key[ pmid]) == 1: # 1 _refs_key for this pmid cls = pm2classes[ pmid].keys() cn = "%s_%s.pdf" % ( str(pmid), string.join( cls, "") ) print cn else: # pmid w/ multipls _refs_key numMultRefs = numMultRefs +1 sys.stderr.write( "Pubmed ID '%s' has %d _refs_key's, skipping\n" \ % ( str(pmid), len( pm2refs_key[ pmid]) ) ) sys.stderr.write( "Pubmed IDs: %d, \t skipped IDs: %d\n" \ % (numIDs, numMultRefs) )