def process():

    args = getArgs()

    db.set_sqlServer  ( args.host)
    db.set_sqlDatabase( args.db)
    db.set_sqlUser    ("mgd_public")
    db.set_sqlPassword("mgdpub")

    if args.verbose:
	sys.stderr.write( "Hitting database %s %s as mgd_public\n\n" % \
							(args.host, args.db))
    startTime = time.time()

    sys.stdout.write( FIELDSEP.join(OutputColumns) + RECORDSEP )

    for i, q in enumerate(getQueries(args)):
	qStartTime = time.time()

	results = db.sql( string.split(q, SQLSEPARATOR), 'auto')

	if args.verbose:
	    sys.stderr.write( "Query %d SQL time: %8.3f seconds\n\n" % \
						(i, time.time()-qStartTime))
	nResults = writeResults(results[-1]) # db.sql returns list of rslt lists

	if args.verbose:
	    sys.stderr.write( "%d references processed\n\n" % (nResults) )

    if args.verbose:
	sys.stderr.write( "Total time: %8.3f seconds\n\n" % \
						    (time.time()-startTime))
Esempio n. 2
0
def main():
    ####################
    db.set_sqlServer(args.host)
    db.set_sqlDatabase(args.db)
    db.set_sqlUser("mgd_public")
    db.set_sqlPassword("mgdpub")
    startTime = time.time()

    if args.option == 'counts': doCounts()
    else: doSamples()

    verbose("Total time: %8.3f seconds\n\n" % (time.time() - startTime))
Esempio n. 3
0
def main():
####################
    db.set_sqlServer  ( args.host)
    db.set_sqlDatabase( args.db)
    db.set_sqlUser    ("mgd_public")
    db.set_sqlPassword("mgdpub")

    verbose( "Hitting database %s %s as mgd_public\n" % (args.host, args.db))
    verbose( "Query option:  %s\n" % args.queryKey)

    startTime = time.time()

    if args.counts: doCounts(args)
    else: doSamples(args)

    verbose( "Total time: %8.3f seconds\n\n" % (time.time()-startTime))
Esempio n. 4
0
def process ():
# Purpose: Main routine of this script
# Returns: nothing

    args = getArgs()

    notQuiet = not args[ "QUIET"]

    db.set_sqlServer  ( args["DBSERVER"])
    db.set_sqlDatabase( args["DBNAME"])
    db.set_sqlUser    ("MGD_PUBLIC")
    db.set_sqlPassword("mgdpub")

    query = SQL % {'year' : '2013'}
    queries = string.split(query, args[ "SQLSEPARATOR"])

    if notQuiet:
	sys.stderr.write("Running %d SQL command(s) on %s..%s\n" % \
			( len( queries), args[ "DBSERVER"], args[ "DBNAME"]) )
	sys.stderr.flush()

    startTime = time.time()
    results = db.sql( queries, 'auto')
    endTime = time.time()
    if notQuiet:
	sys.stderr.write( "Total SQL time: %8.3f seconds\n" % \
							(endTime-startTime))
	sys.stderr.flush()

    delim = args[ "DELIMITER"]

    result = results[2]

    # print column headers
    sys.stdout.write( string.join( COLUMNS, delim) )
    sys.stdout.write( "\n")

    # print results, one line per row (result), tab-delimited
    for r in result:
	vals =  [ r[col] for col in COLUMNS ]
	vals = map( cleanVal, vals)
	sys.stdout.write( string.join(vals, delim ) )
	sys.stdout.write( "\n")
Esempio n. 5
0
def getPubmedIDs():

    args = getArgs()

    db.set_sqlServer(args.host)
    db.set_sqlDatabase(args.db)
    db.set_sqlUser("mgd_public")
    db.set_sqlPassword("mgdpub")

    if args.verbose:
        sys.stderr.write( "Hitting database %s %s as mgd_public\n\n" % \
              (args.host, args.db))

    queries = string.split(QUERY, SQLSEPARATOR)

    startTime = time.time()
    results = db.sql(queries, 'auto')
    endTime = time.time()
    if args.verbose:
        sys.stderr.write( "Total SQL time: %8.3f seconds\n\n" % \
              (endTime-startTime))

    fp = open(args.outputFile, 'w')
    fp.write('\t'.join([
        'pubmed',
        'haspdf',
        'year',
        'journal',
    ]) + '\n')
    for i, r in enumerate(results[-1]):
        fp.write('\t'.join([
            str(r['pubmed']),
            str(r['haspdf']),
            str(r['year']),
            r['journal'],
        ]) + '\n')
        if args.verbose and i % 1000 == 0:  # write progress indicator
            sys.stderr.write('%d..' % i)
Esempio n. 6
0
"""
datatest classes
"""
import logging
import os

import db

### Globals ###
# Track test failures
FAILURES = []
CACHELOADS = set([])

### initialize database settings ###
db.set_sqlUser('mgd_public')
db.set_sqlPassword('mgdpub')
db.set_sqlServer( os.environ['DATATEST_DBSERVER'] )
db.set_sqlDatabase( os.environ['DATATEST_DBNAME'] )

### Classes ###

class DataTestCase(object):
	"""
	datatest Test Case
	Exposes special assertion methods

	Tracks and reports failures
	"""

	def __init__(self):
Esempio n. 7
0
        args.db = 'prod'
    else:
        args.host = args.server + '.jax.org'
        args.db = args.database

    return args


#-----------------------------------

args = getArgs()

db.set_sqlServer(args.host)
db.set_sqlDatabase(args.db)
db.set_sqlUser("mgd_public")
db.set_sqlPassword("mgdpub")

#-----------------------------------


class BaseRefSearch(object):  # {
    """
    Is:   base class for a reference (article) search from the database
    Has:  all the necessary SQL for the search, the result set, 
    Does: Encapsulates the common SQL for specific searches that return
            result sets of references and counts/stats for these result sets.
    """
    ####################
    # SQL fragments used to build up queries
    ####################
    SQLSEPARATOR = '||'
Esempio n. 8
0
def process ():
# Purpose: Main routine of this script
# Returns: nothing

    global notQuiet
    #args = getArgs()	# NOT USED YET

    notQuiet = True   #not args[ "QUIET"]

    db.set_sqlServer  ( "PROD1_MGI")
    #db.set_sqlServer  ( "DEV_MGI")
    db.set_sqlDatabase( "mgd")
    db.set_sqlUser    ( "MGD_PUBLIC")
    db.set_sqlPassword( "mgdpub")

    #-- For papers w/ pubmed IDs
    #--  get their MGD dataset associations for Alleles/Pheno, Expr, GO, Tumor
    query = """
    select a.accid pubmedid, b._refs_key, bd.abbreviation
    from acc_accession a inner join bib_refs b on
	    (a._mgitype_key = 1 and a._object_key = b._refs_key
	    and a._logicaldb_key = 29)
	inner join bib_dataset_assoc bda on
	    (b._refs_key = bda._refs_key and bda.isneverused=0)
	inner join bib_dataset bd on
	    (bda._dataset_key = bd._dataset_key
	     and bd._dataset_key in (1002, 1004, 1005, 1007) )
    """	

    results = runsql( query)
    if notQuiet:
	sys.stderr.write("Found %d pubmed to corpora associations\n" % \
			    len(results) )

    # build dict w/ keys = pubmedIDs, values = {_refs_key : 0 }
    # build dict w/ keys = pubmedIDs, values = { classification : 0 }
    # If we find a pubmed ID w/ multiple _refs_keys, we remove it from
    #   the second dict because we don't want to collapse multiple
    #    classifications from multiple references onto a single pubmed ID
    #   (there are some pubmedIDs for books/proceedings that are associated
    #    with multiple references, these individual references have
    #    classifications)
    pm2refs_key = {}
    pm2classes = {}
    for rcd in results:
	pmid = rcd[ "pubmedid"]
	refs_key = rcd[ "_refs_key"]
	classification = rcd[ "abbreviation"][0:1]   # 1st letter of abbrev

	if not pm2refs_key.has_key( pmid):	# 1st time we've seen this pmid
	    pm2refs_key[ pmid] = { refs_key : 0}
	    pm2classes[ pmid] = { classification : 0}
	else:					# already seen this pmid
	    pm2refs_key[ pmid][ refs_key] = 0
	    pm2classes[ pmid][ classification] = 0
    
    numIDs = 0
    numMultRefs = 0
    # loop through pubmed IDs, and output those w/ their classifications
    for pmid in pm2refs_key.keys():
	numIDs = numIDs +1
	if len( pm2refs_key[ pmid]) == 1:	# 1 _refs_key for this pmid
	    cls = pm2classes[ pmid].keys()
	    cn = "%s_%s.pdf" % ( str(pmid), string.join( cls, "") )
	    print cn
	else:				# pmid w/ multipls _refs_key
	    numMultRefs = numMultRefs +1
	    sys.stderr.write( "Pubmed ID '%s' has %d _refs_key's, skipping\n" \
				% ( str(pmid), len( pm2refs_key[ pmid]) ) )
    
    sys.stderr.write( "Pubmed IDs: %d, \t skipped IDs: %d\n" \
			% (numIDs, numMultRefs) )