Example #1
0
def bcpFiles():

    if DEBUG or not bcpon:
	#print execSQL
        return

    markerFile.close()
    refFile.close()
    aliasFile.close()

    db.commit()

    bcp1 = bcpCommand % (markerTable, markerFileName)
    bcp2 = bcpCommand % (refTable, refFileName)
    bcp3 = bcpCommand % (aliasTable, aliasFileName)

    # execute the sql deletions
    for r in execSQL:
        db.sql(r, None)

    for bcpCmd in [bcp1, bcp2, bcp3]:
	diagFile.write('%s\n' % bcpCmd)
	os.system(bcpCmd)

    db.commit()

    return
Example #2
0
def initialize():

    global inFileName, inFile
    global annotFileName, annotFile
    global errorFileName, errorFile
    global jnumID
    global markerList
    global evidenceCodeList

    #
    # open files
    #

    inFileName =  os.environ['INFILE_NAME_GAF']
    annotFileName = os.environ['INFILE_NAME']
    errorFileName = os.environ['INFILE_NAME_ERROR']
    jnumID = os.environ['JNUMBER']

    try:
        inFile = open(inFileName, 'r')
    except:
	print 'Cannot open input file: ' + inFileName
	return 1

    try:
        annotFile = open(annotFileName, 'w')
    except:
	print 'Cannot open annotation file for writing: ' + annotFileName
	return 1

    try:
        errorFile = open(errorFileName, 'w')
    except:
	print 'Cannot open error file for writing: ' + errorFileName
	return 1

    #
    # list of markers type 'gene'
    #
    results = db.sql('''select a.accID
              from MRK_Marker m, ACC_Accession a
              where m._Marker_Type_key = 1
	      and m._Marker_key = a._Object_key
	      and a._MGIType_key = 2
	      and a._LogicalDB_key = 1
	      and a.preferred = 1
	  ''', 'auto')

    for r in results:
        value = r['accID']
        markerList.append(value)

    #
    # list of evidence codes
    #
    results = db.sql('select _Term_key, abbreviation from VOC_Term where _Vocab_key = 3', 'auto')
    for r in results:
        evidenceCodeList[r['abbreviation']] = r['_Term_key']

    return 0
Example #3
0
def bcpFiles():
	# requires:
	#
	# effects:
	#	BCPs the data into the database
	#
	# returns:
	#	nothing
	#

	synFile.close()
        if not bcpon:
	    print 'Skipping BCP. Mode: %s' % mode
	    sys.stdout.flush()
            return
	print 'Executing BCP'
	sys.stdout.flush()
	db.commit()
	bcp1 = bcpCommand % ('MGI_Synonym', synFileName)
	diagFile.write('%s\n' % bcp1)
	os.system(bcp1)
	db.commit()

        db.sql(''' select setval('mgi_synonym_seq', (select max(_Synonym_key) from MGI_Synonym)) ''', None)
        db.commit()
Example #4
0
def getPrimaryKeys():
    '''
        # requires:
        #
        # effects:
        #	get/store next primary keys
        #
        # returns:
        #	nothing
        #
        '''

    global exptKey, accKey, mgiKey

    results = db.sql(''' select nextval('mld_expts_seq') as maxKey ''', 'auto')
    exptKey = results[0]['maxKey']

    results = db.sql(
        '''select max(_Accession_key) + 1  as maxKey
                from ACC_Accession''', 'auto')
    if results[0]['maxKey'] is None:
        accKey = 1000
    else:
        accKey = results[0]['maxKey']

    results = db.sql(
        '''select maxNumericPart + 1 as maxKey
                from ACC_AccessionMax 
                where prefixPart = '%s' ''' % (mgiPrefix), 'auto')
    mgiKey = results[0]['maxKey']
Example #5
0
def doCounts():
    '''
    Get counts of sample records from db and write them to stdout
    '''
    sys.stdout.write(time.ctime() + '\n')
    sys.stdout.write("Hitting database %s %s as mgd_public\n" % \
                                                    (args.host, args.db))
    sys.stdout.write(getRestrictedArticleText())

    selectCountSQL = 'select count(distinct _refs_key) as num from %s\n'

    db.sql(BUILD_OMIT_TABLE, 'auto')
    db.sql(BUILD_BASE_TABLE, 'auto')

    doCount(OMIT_TEXT, [selectCountSQL % "tmp_omit"])

    tmpTableName, finalTmpTableSQL = buildFinalTmpTableSQL('discard_after')
    doCount("Discard after: %s - %s" % (LIT_TRIAGE_DATE, END_DATE),
            finalTmpTableSQL + [selectCountSQL % tmpTableName])

    tmpTableName, finalTmpTableSQL = buildFinalTmpTableSQL('keep_after')
    doCount("Keep after: %s - %s" % (LIT_TRIAGE_DATE, END_DATE),
            finalTmpTableSQL + [selectCountSQL % tmpTableName])

    tmpTableName, finalTmpTableSQL = buildFinalTmpTableSQL('keep_before')
    doCount("Keep before: %s - %s" % (START_DATE, LIT_TRIAGE_DATE),
            finalTmpTableSQL + [selectCountSQL % tmpTableName])

    tmpTableName, finalTmpTableSQL = buildFinalTmpTableSQL('keep_tumor')
    doCount("Tumor papers: %s - %s" % (TUMOR_START_DATE, START_DATE),
            finalTmpTableSQL + [selectCountSQL % tmpTableName])

    tmpTableName, finalTmpTableSQL = buildFinalTmpTableSQL('test_2020')
    doCount("Test set from 2020",
            finalTmpTableSQL + [selectCountSQL % tmpTableName])
Example #6
0
def bcpFiles():

    if DEBUG or not bcpon:
        #print execSQL
        return

    markerFile.close()
    refFile.close()
    aliasFile.close()

    db.commit()

    bcp1 = bcpCommand % (markerTable, markerFileName)
    bcp2 = bcpCommand % (refTable, refFileName)
    bcp3 = bcpCommand % (aliasTable, aliasFileName)

    # execute the sql deletions
    for r in execSQL:
        db.sql(r, None)

    for bcpCmd in [bcp1, bcp2, bcp3]:
        diagFile.write('%s\n' % bcpCmd)
        os.system(bcpCmd)

    db.commit()

    return
Example #7
0
def loadDictionaries():
    '''
        # requires:
        #
        # effects:
        #	loads global dictionaries/lists: chromosomeList for lookup
        #
        # returns:
        #	nothing
        '''

    global chromosomeList, assayDict, inputChrList

    results = db.sql(
        '''select chromosome from MRK_Chromosome 
                where _Organism_key = 1 
                and chromosome not in ('UN') 
                order by sequenceNum''', 'auto')
    for r in results:
        chromosomeList.append(r['chromosome'])

    results = db.sql('select * from MLD_Assay_Types', 'auto')
    for r in results:
        assayDict[r['description']] = r['_Assay_Type_key']

    # create unique list of chromosomes from input file
    for line in inputFile.readlines():
        tokens = str.split(line[:-1], '|')
        chromosome = tokens[1]
        if chromosome not in inputChrList:
            inputChrList.append(chromosome)
    inputFile.close()
Example #8
0
def mmrrc():

    mmrrcfp = reportlib.init(sys.argv[0], outputdir = os.environ['QCOUTPUTDIR'], fileExt = '.mmrrc.rpt')

    title = 'MMRRC Strains w/ Genotype Associations where the Markers/Alleles of the Strain record\n' + \
	    'do not exactly match the Markers/Alleles of the Genotype record.'

    mmrrcfp.write(title + '\n\n')
    mmrrcfp.write('MMRRC#' + reportlib.TAB)
    mmrrcfp.write('Strain' + reportlib.TAB)
    mmrrcfp.write('Genotypes' + reportlib.TAB)
    mmrrcfp.write(reportlib.CRT)

    # MMNC Strains w/ Genotype Associations; exclude wild type alleles
    db.sql('''
	    select distinct sa.accID, s.strain, g._Genotype_key, g._Strain_key, a._Marker_key, a._Allele_key 
	    into temporary table strains 
	    from PRB_Strain s, PRB_Strain_Genotype g, GXD_AlleleGenotype a, ALL_Allele aa, ACC_Accession sa 
	    where s.strain like '%/Mmnc'
	    and s._Strain_key = g._Strain_key 
	    and g._Genotype_key = a._Genotype_key 
	    and a._Allele_key = aa._Allele_key 
	    and aa.isWildType = 0 
	    and s._Strain_key = sa._Object_key 
	    and sa._MGIType_key = 10 
	    and sa._LogicalDB_key = 38 
	    and sa.preferred = 1 
	    ''', None)
    db.sql('create index strains_idx2 on strains(_Strain_key)', None)

    printReport(mmrrcfp)
Example #9
0
def processSlim():

    dosanityFileName = os.environ['DO_MGI_SLIM_SANITY_FILE']
    dosanityFile = open(dosanityFileName, 'w')

    DELETE_SLIM = 'delete from MGI_SetMember where _Set_key = 1048 and _SetMember_key = %s'
    SPACE = ' '
    
    dosanityFile.write('\n\nDO slim terms that are decendents of another DO slim term\n\n')
    dosanityFile.write('descendent_term' + 35*SPACE + 'another_slim_term\n')
    dosanityFile.write('---------------' + 35*SPACE + '-----------------\n\n')

    results = db.sql('''
               select tt.term as descendent_term, ss.term as another_slim_term, t._SetMember_key
               from MGI_SetMember t, DAG_Closure dc, MGI_SetMember s, VOC_Term tt, VOC_Term ss
               where t._Set_key = 1048
               and t._Object_key = dc._DescendentObject_key
               and dc._AncestorObject_key = s._Object_key
               and s._Set_key = 1048
               and t._Object_key != s._Object_key
               and t._Object_key = tt._Term_key
               and s._Object_key = ss._Term_key
       ''', 'auto')

    for r in results:
	dosanityFile.write('%-50s %-50s\n' % (r['descendent_term'], r['another_slim_term']))
	deleteSQL = DELETE_SLIM % (r['_SetMember_key'])
	#dosanityFile.write(deleteSQL + '\n\n')
        db.sql(deleteSQL, None)

    dosanityFile.close()
    db.commit()
    return 0
Example #10
0
def bcpFiles():
    # requires:
    #
    # effects:
    #	BCPs the data into the database
    #
    # returns:
    #	nothing
    #

    bcpdelim = "|"

    if DEBUG or not bcpon:
        return

    refFile.close()

    bcp1 = 'cat %s | bcp %s..%s in %s -c -t\"%s" -S%s -U%s' \
     % (passwordFileName, db.get_sqlDatabase(), \
        'MGI_Reference_Assoc', refFileName, bcpdelim, db.get_sqlServer(), db.get_sqlUser())

    diagFile.write('%s\n' % bcp1)

    os.system(bcp1)

    # update mgi_reference_assoc_seq auto-sequence
    db.sql(
        ''' select setval('mgi_reference_assoc_seq', (select max(_Assoc_key) + 1 from MGI_Reference_Assoc)) ''',
        None)
Example #11
0
def getQueryResults(i, baseQ, textQ):
    """
    Run SQL for basic fields and extracted text fields, & join them.
    Return list of records.
    Each record represents one article w/ its basic fields & its extracted text
    """
    #### get basic reference fields
    startTime = time.time()
    refResults = db.sql( baseQ.split(SQLSEPARATOR), 'auto')
    refRcds = refResults[-1]

    verbose( "Query %d:  %d references retrieved\n" % (i, len(refRcds)))
    verbose( "SQL time: %8.3f seconds\n\n" % (time.time()-startTime))

    #### get extended text parts
    startTime = time.time()
    extTextResults = db.sql(textQ.split(SQLSEPARATOR), 'auto')
    extTextRcds = extTextResults[-1]

    verbose( "Query %d:  %d extracted text rcds retrieved\n" % \
                                                (i, len(extTextRcds)))
    verbose( "SQL time: %8.3f seconds\n\n" % (time.time()-startTime))

    #### join basic fields and extracted text
    startTime = time.time()
    verbose( "Joining ref info to extracted text:\n")

    extTextSet = ExtractedTextSet( extTextRcds )
    extTextSet.joinRefs2ExtText( refRcds, allowNoText=True )

    verbose( "%8.3f seconds\n\n" %  (time.time()-startTime))

    return refRcds
Example #12
0
def deleteAccession(aKey):
    print "Deleting _accession_key = %s" % aKey
    # delete from ACC_AccessionReference first
    db.sql('''delete from ACC_AccessionReference
	where _Accession_key = %s''' % aKey, None)
    db.sql('''delete from ACC_Accession
	where _Accession_key = %s''' % aKey, None)
Example #13
0
    def load ( self, markerType ):
	#------------------------------------------------------------------
	# INPUTS:
	#    markerType : (markerType) or None
	# OUTPUTS: none
	# ASSUMES:
	# - That db.sql() can access needed environment variables in order
	#   to determine a default server/database.
	# SIDE EFFECTS: populates the class's primary, internal data members
	#               from the default MGD instance.
	# EXCEPTIONS: 
	# COMMENTS:
	#------------------------------------------------------------------

	# for significant marker info

	qryS = 'select a.accID as sid, m.symbol, m._Marker_key '
	qryF = 'from ACC_Accession a, MRK_Marker m '
	qryW = 'where a._MGIType_key = 2 ' + \
	       'and a._LogicalDB_key = %u ' % (SEQDB) + \
	       'and a._Object_key = m._Marker_key ' + \
	       'and m._Organism_key = 1'

	if markerType:
	    qryF = qryF + ', MRK_Types t '
	    qryW = qryW + ' and m._Marker_Type_key = t._Marker_Type_key ' + \
		'and t.name = \'%s\' ' % (markerType)

	sql = qryS + qryF + qryW
	print sql
	db.sql ( qryS + qryF + qryW, self.sidParser )
	
	return
Example #14
0
def cdnas():

    fp.write('cDNAs:' + 2*CRT)

    #
    # number of mouse cDNAs
    #

    db.sql('select _Source_key into temporary table mussource from PRB_Source where _Organism_key = 1', None)
    db.sql('create index mussource_idx on mussource(_Source_key)', None)

    db.sql('''select p._Probe_key 
        into temporary table cdnas
	from mussource s, PRB_Probe p 
	where s._Source_key = p._Source_key 
	and p._SegmentType_key = 63468 
	''', None)
    db.sql('create index cdnas_idx on cdnas(_Probe_key)', None)

    results = db.sql('select count(_Probe_key) as ccount from cdnas', 'auto')
    for r in results:
	fp.write('mouse cDNAs             : ' + str(r['ccount']) + CRT)

    #
    # number of markers curated to mouse cDNAs
    #

    results = db.sql('''
	select count(distinct(pm._Marker_key)) as mcount 
	from cdnas c, PRB_Marker pm 
	where c._Probe_key = pm._Probe_key
	''', 'auto')
    for r in results:
	fp.write('Markers curated to cDNAs:  ' + str(r['mcount']) + CRT)
Example #15
0
def verifyMode():
	'''
	# requires:
	#
	# effects:
	#	Verifies the processing mode is valid.  If it is not valid,
	#	the program is aborted.
	#	Sets globals based on processing mode.
	#	Deletes data based on processing mode.
	#
	# returns:
	#	nothing
	#
	'''

	global DEBUG

	if mode == 'load':
		DEBUG = 0
		db.sql('delete from MGI_Note where _MGIType_key = %s and _NoteType_key = %s' % (objectTypeKey, noteTypeKey), None)
	elif mode == 'incremental':
		DEBUG = 0
	elif mode == 'preview':
		DEBUG = 1
	else:
		exit(1, 'Invalid Processing Mode:  %s\n' % (mode))
Example #16
0
def miscellaneous():

    #
    # alias
    #

    fp.write('#\n# Alias\n#\n')

    results = db.sql('''
	    select alias,
		   cdate = convert(char(10), creation_date, 101)
	    from MRK_Alias_View where _Marker_key =  %s
	    order by alias
	    ''' % (markerKey), 'auto')

    for r in results:
        fp.write(string.ljust(r['alias'],30) + TAB)
        fp.write(string.ljust(r['cdate'],15) + CRT)

    #
    # synonym
    #

    fp.write('#\n# Synonym\n#\n')

    results = db.sql('''
	    select synonym,
		   cdate = convert(char(10), creation_date, 101)
	    from MGI_Synonym where _MGIType_key = 2 and _Object_key =  %s
	    ''' % (markerKey), 'auto')

    for r in results:
        fp.write(string.ljust(r['synonym'],30) + TAB)
        fp.write(string.ljust(r['cdate'],15) + CRT)
Example #17
0
def loadFileSource(file, tempno):

    #
    #  Make sure the input file exists and open it.
    #
    if (not os.path.exists(file)):
        print("Input file does not exist: %s" % file)
        return 1
    inFile = open(file, 'r')

    #
    #  Create a temp table to load the cluster set into.
    #
    create_stmt = "create temporary table cluster_set%d " % tempno + \
                  "(cid varchar(30), cmid varchar(30))"
    db.sql(create_stmt, None)

    #
    #  Build the SQL statement that will be used to insert the clusters
    #  into the temp table.
    #
    insert_stmt = "insert into cluster_set%d " % tempno + \
                  "values ('%s', '%s')"

    #
    #  Loop through each record in the input file and load the clusters.
    #
    for line in inFile.readlines():
        [cid, cmid] = line[:-1].split('\t')
        db.sql(insert_stmt % (cid, cmid), None)

    inFile.close()
    return 0
Example #18
0
def processFile():

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
            probeID = tokens[0]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        probeKey = loadlib.verifyObject(probeID, mgiTypeKey, None, lineNum,
                                        errorFile)

        if probeKey == 0:
            continue

        if DEBUG:
            print deleteSQL % (probeKey)
            continue

        db.sql(deleteSQL % (probeKey), None)
Example #19
0
def bcpFiles():
	# requires:
	#
	# effects:
	#	BCPs the data into the database
	#
	# returns:
	#	nothing
	#

	bcpdelim = "|"

	if DEBUG or not bcpon:
		return

	refFile.close()

	bcp1 = 'cat %s | bcp %s..%s in %s -c -t\"%s" -S%s -U%s' \
		% (passwordFileName, db.get_sqlDatabase(), \
	   	'MGI_Reference_Assoc', refFileName, bcpdelim, db.get_sqlServer(), db.get_sqlUser())

	diagFile.write('%s\n' % bcp1)

	os.system(bcp1)

    	# update mgi_reference_assoc_seq auto-sequence
        db.sql(''' select setval('mgi_reference_assoc_seq', (select max(_Assoc_key) + 1 from MGI_Reference_Assoc)) ''', None)
def processB6():
    print('Processing B6')
    db.sql('''-- get b6 strain/markers and their accids
        select a.accid as b6ID, a._Object_key as _StrainMarker_key
        into temporary table b6Ids
        from ACC_Accession a
        where a._MGIType_key = 44 --MRK_StrainMarker
        and a._LogicalDB_key = 212 --MGI B6
        and a.preferred = 1''', None)

    db.sql('''create index idx1 on b6Ids(b6ID)''', None)
    
    results = db.sql('''-- get the biotypes for the B6 gene models
        select b6.b6ID, sgm.rawBiotype as biotype
        from b6Ids b6, ACC_Accession a, SEQ_GeneModel sgm
        where b6.b6ID = a.accid
        and a._MGIType_key = 19
        and a._LogicalDB_key = 212 --MGI B6
        and a.preferred = 1
        and a._Object_key = sgm._Sequence_key''', 'auto')
    
    for r in results:
        b6ID = r['b6ID']
        biotype = r['biotype'] # actually the feature type
        if biotype not in featureTypeLookup:
            print('Cannot resolve B6: %s to Feature Type' % biotype)
            continue
        mcvID = featureTypeLookup[biotype]
        fpB6AnnotFile.write('%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s' % (mcvID, TAB, b6ID, TAB, B6_JNUM, TAB, EVIDENCE, TAB, B6_JNUM, TAB, QUALIFIER, TAB, EDITOR, TAB, DATE, TAB, NOTES, TAB, B6_LDBNAME, CRT))

    return 0
Example #21
0
def getDagKey (
    dag,        # str. corresponds to DAG_DAG.name
    vocab = None    # str.vocab name or integer vocab key
    ):
    # Purpose: return the _DAG_key for the given 'dag' name
    # Returns: integer
    # Assumes: nothing
    # Effects: queries the database
    # Throws: 1. error if the given 'dag' is not in the database;
    #   2. propagates any exceptions raised by db.sql()
    # Notes: Since the DAG_DAG.name field is does not require unique
    #   values, we may also need to know which 'vocab' it relates to.

    if vocab:
        if type(vocab) == str:
            vocab = getVocabKey (vocab)
        result = db.sql ('''select dd._DAG_key
                from DAG_DAG dd, VOC_VocabDAG vvd
                where dd._DAG_key = vvd._DAG_key
                    and vvd._Vocab_key = %d
                    and dd.name = \'%s\'''' % (vocab, dag))
    else:
        result = db.sql ('''select _DAG_key
                from DAG_DAG
                where name = \'%s\'''' % dag)
    if len(result) != 1:
        raise VocloadlibError(unknown_dag % dag)
    return result[0]['_DAG_key']
Example #22
0
def updateDatabase(cmds):
    dbServer = os.environ['MGD_DBSERVER']
    dbName = os.environ['MGD_DBNAME']
    dbUser = os.environ['MGD_DBUSER']
    dbPasswordFile = os.environ['MGD_DBPASSWORDFILE']
    dbPassword = string.strip(open(dbPasswordFile, 'r').readline())
    db.set_sqlLogin(dbUser, dbPassword, dbServer, dbName)

    # process in batches of 100
    total = len(cmds)

    try:
        db.useOneConnection(1)
        while cmds:
            print('Current running time (secs): %s' %
                  (time.time() - STARTTIME))
            db.sql(cmds[:100], 'auto')
            cmds = cmds[100:]
        db.useOneConnection(0)
    except:
        bailout('Failed during database updates')

    print('Processed %d updates to SEQ_Sequence._SequenceStatus_key' % total)
    print('Total running time (secs): %s' % (time.time() - STARTTIME))
    return
def processMGP():
    print('Processing MGP')
    db.sql('''-- get mgp strain/markers and their accids
        select a.accid as mgpID, a._Object_key as _StrainMarker_key
        into temporary table mgpIds
        from ACC_Accession a
        where a._MGIType_key = 44 --MRK_StrainMarker
        and a._LogicalDB_key = 209 --MGP
        and a.preferred = 1''', None)
    db.sql('''create index idx2 on mgpIds(mgpID)''', None)

    results = db.sql('''-- get the biotypes for the MGP gene models
        select mgp.mgpID, sgm.rawBiotype as biotype
        from mgpIds mgp, ACC_Accession a, SEQ_GeneModel sgm
        where mgp.mgpID = a.accid
        and a._MGIType_key = 19
        and a._LogicalDB_key = 209 --MGP
        and a.preferred = 1
        and a._Object_key = sgm._Sequence_key''', 'auto')
    
    for r in results:
        mgpID = r['mgpID']
        biotype = r['biotype'] # actually the feature type
        if biotype not in featureTypeLookup:
            print('Cannot resolve MGP: %s to Feature Type' % biotype)
            continue
        mcvID = featureTypeLookup[biotype]
        fpMgpAnnotFile.write('%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s' % (mcvID, TAB, mgpID, TAB, MGP_JNUM, TAB, EVIDENCE, TAB, MGP_JNUM, TAB, QUALIFIER, TAB, EDITOR, TAB, DATE, TAB, NOTES, TAB, MGP_LDBNAME, CRT))

    return 0
Example #24
0
def loadDictionaries():
	'''
	# requires:
	#
	# effects:
	#	loads global dictionaries/lists: chromosomeList for lookup
	#
	# returns:
	#	nothing
	'''

	global chromosomeList, assayDict, inputChrList

	results = db.sql('''select chromosome from MRK_Chromosome 
		where _Organism_key = 1 
		and chromosome not in ('UN') 
		order by sequenceNum''', 'auto')
	for r in results:
		chromosomeList.append(r['chromosome'])

        results = db.sql('select * from MLD_Assay_Types', 'auto')
	for r in results:
		assayDict[r['description']] = r['_Assay_Type_key']

	# create unique list of chromosomes from input file
	for line in inputFile.readlines():
	    tokens = string.split(line[:-1], '|')
	    chromosome = tokens[1]
	    if chromosome not in inputChrList:
		inputChrList.append(chromosome)
        inputFile.close()
Example #25
0
def processFile():

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
	    probeID = tokens[0]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        probeKey = loadlib.verifyObject(probeID, mgiTypeKey, None, lineNum, errorFile)

	if probeKey == 0:
            continue

	if DEBUG:
	    print deleteSQL % (probeKey)
	    continue

	db.sql(deleteSQL % (probeKey), None)
Example #26
0
def make_datasets():
    db.connect()
    for dataset in db.sql("select * from dataset limit 10", as_dict=True):

        # set group
        group = db.sql(
            "select `group` from dataset_group where dataset=%s limit 1",
            dataset["name"])[0][0]
        dataset["group_info"] = db.sql("select * from `group` where name=%s",
                                       group,
                                       as_dict=True)[0]

        # other libs
        dataset["json"] = json
        dataset["public_path"] = "../"  # all paths relative to

        # make page
        jenv = Environment(loader=FileSystemLoader("templates"))
        html = jenv.get_template("dataset.html").render(dataset)
        fname = re.sub(r'\W+', '', dataset["name"].lower()).replace(
            " ", "_") + ".html"
        with open(os.path.join("public", "datasets", fname), "w") as htmlfile:
            htmlfile.write(html)

        db.conn.execute("update dataset set html_filename=%s where name=%s",
                        (fname, dataset["name"]))
Example #27
0
def getPrimaryKeys():
	'''
	# requires:
	#
	# effects:
	#	get/store next primary keys
	#
	# returns:
	#	nothing
	#
	'''

	global exptKey, accKey, mgiKey

       	results = db.sql('''select max(_Expt_key) + 1 as maxKey 
		from MLD_Expts''', 'auto')
       	if results[0]['maxKey'] is None:
               	exptKey = 1000
       	else:
               	exptKey = results[0]['maxKey']

       	results = db.sql('''select max(_Accession_key) + 1  as maxKey
		from ACC_Accession''', 'auto')
       	if results[0]['maxKey'] is None:
               	accKey = 1000
       	else:
               	accKey = results[0]['maxKey']

       	results = db.sql('''select maxNumericPart + 1 as maxKey
		from ACC_AccessionMax 
		where prefixPart = '%s' ''' % (mgiPrefix), 'auto')
       	mgiKey = results[0]['maxKey']
Example #28
0
def executeBCP():
    ''' 
    # requires:
    #   
    # effects:
    #   BCPs the data into the database
    #   
    # returns:
    #   nothing
    #   
    ''' 

    synFile.close()
    db.commit()

    bcpCommand = os.environ['PG_DBUTILS'] + '/bin/bcpin.csh'

    bcp1 = '%s %s %s %s %s %s "|" "\\n" mgd' % \
        (bcpCommand, db.get_sqlServer(), db.get_sqlDatabase(), 'MGI_Synonym', datadir, 'MGI_Synonym.bcp')

    diagFile.write('%s\n' % bcp1)
    os.system(bcp1)

    # update mgi_synonym_seq auto-sequence
    db.sql(''' select setval('mgi_synonym_seq', (select max(_synonym_key) from MGI_Synonym)) ''', None)
    db.commit()
Example #29
0
def init():
    # Purpose: Initialization of  database connection and file descriptors,
    #       create database lookup dictionaries; create dictionary from
    #       input file
    # Returns: 1 if file descriptors cannot be initialized
    # Assumes: Nothing
    # Effects: opens a database connection
    # Throws: Nothing

    global egToMarkerDict, mgiToMarkerDict
    global fpInFile, fpClustererFile, fpLoadFile, fpQcRpt

    user = os.environ['MGD_DBUSER']
    passwordFileName = os.environ['MGD_DBPASSWORDFILE']
    db.useOneConnection(1)
    db.set_sqlUser(user)
    db.set_sqlPasswordFromFile(passwordFileName)

    try:
        fpInFile = open(inFilePath, 'r')
    except:
        exit('Could not open file for reading %s\n' % inFilePath)
    try:
        fpLoadFile = open(loadFilePath, 'w')
    except:
        exit('Could not open file for writing %s\n' % loadFilePath)

    try:
        fpQcRpt = open(qcRptPath, 'w')
    except:
        exit('Could not open file for writing %s\n' % qcRptPath)

    # Create lookup of homology IDs to their marker keys
    results = db.sql(
        '''select a.accid, a._object_key as markerKey, m._organism_key
        from acc_accession a, mrk_marker m 
        where a._mgitype_key = 2 
        and a._logicalDB_key in (47, 64, 172)
        and a._object_key = m._marker_key
        and m._marker_status_key = 1''', 'auto')
    for r in results:
        #print('hMrkID: %s orgKey: %s hMrkKey: %s' % (r['accid'], int(r['_organism_key']), int(r['markerKey']) ))
        homologyLookup[r['accid']] = [
            int(r['_organism_key']),
            int(r['markerKey'])
        ]

    # Create lookup of mouse MGI IDs to their marker keys
    results = db.sql(
        '''select a.accid, a._object_key as markerKey
        from acc_accession a, mrk_marker m
        where a._mgitype_key = 2
        and a._logicalDB_key = 1
        and a.prefixPart = 'MGI:'
        and a._object_key = m._marker_key
        and m._marker_status_key = 1''', 'auto')
    for r in results:
        mouseLookup[r['accid']] = r['markerKey']

    return
Example #30
0
def qtl2():

	fp2.write('QTL markers that have mapping but no allele associated (yes/no)\n')
	fp2.write('mapping = yes/alleles = no\n')

	results = db.sql('''%s
	  and not exists (select 1 from MRK_Notes n where m._Marker_key = n._Marker_key)
    	  and exists (select 1 from MLD_Expt_Marker mld where m._Marker_key = mld._Marker_key)
    	  and not exists (select 1 from ALL_Allele al where m._Marker_key = al._Marker_key and al.isWildType = 0)
	  order by symbol
    	''' % (query1), 'auto')

	for r in results:
    		fp2.write(r['mgiID'] + TAB)
    		fp2.write(mgi_utils.prvalue(r['refID']) + TAB)
    		fp2.write(r['symbol'] + TAB)
    		fp2.write(r['name'] + TAB)
    		fp2.write(CRT)

	fp2.write(CRT + '(%d rows affected)' % (len(results)) + CRT)

	fp2.write('\n\nQTL References with map records that have QTL associated w/o Alleles:\n\n')

	results = db.sql('''%s
	  and not exists (select 1 from MRK_Notes n where m._Marker_key = n._Marker_key)
    	  and exists (select 1 from MLD_Expt_Marker mld where m._Marker_key = mld._Marker_key)
    	  and not exists (select 1 from ALL_Allele al where m._Marker_key = al._Marker_key and al.isWildType = 0)
	  order by numericPart
    	''' % (query2), 'auto')

	for r in results:
    		fp2.write(mgi_utils.prvalue(r['refID']) + CRT)
	fp2.write(CRT + '(%d rows affected)' % (len(results)) + CRT)
Example #31
0
def createVocabulary (
	vocabName, 	# string; name of vocabulary to be created
	refsKey, 	# integer; reference for the vocabulary
	ldbKey		# integer; logical db for the vocabulary
	):
	# Purpose: create a new vocabulary with the given 'vocabName'
	# Returns: integer _Vocab_key for the new vocabulary
	# Assumes: nobody is inserting records into VOC_Vocab while this
	#	function is running
	# Modifies: writes to the VOC_Vocab table in the database
	# Throws: propagates SystemExit if problems occur

	# find the current highest vocab key

	cmd1 = 'SELECT MAX(_Vocab_key) FROM VOC_Vocab'

	results = db.sql (cmd1, 'auto')
	if not results:
		vocabKey = 1
	else:
		vocabKey = results[0][''] + 1

	# add a new record for the new vocabulary

	cmd2 = '''INSERT VOC_Vocab (_Vocab_key, _Refs_key, _LogicalDB_key,
			isSimple, isPrivate, name)
		VALUES (%d, %d, %d, %d, %d, '%s')''' % (vocabKey, refsKey,
			ldbKey, 1, 0, vocabName)
	try:
		results = db.sql (cmd2, 'auto')
	except:
		bailout ('Cannot create new vocabulary "%s" as key %d' % \
			(vocabName, vocabKey))

	return vocabKey
Example #32
0
def executeBCP():
    ''' 
    # requires:
    #   
    # effects:
    #   BCPs the data into the database
    #   
    # returns:
    #   nothing
    #   
    ''' 

    synFile.close()
    db.commit()

    bcpCommand = os.environ['PG_DBUTILS'] + '/bin/bcpin.csh'

    bcp1 = '%s %s %s %s %s %s "|" "\\n" mgd' % \
        (bcpCommand, db.get_sqlServer(), db.get_sqlDatabase(), 'MGI_Synonym', datadir, 'MGI_Synonym.bcp')

    diagFile.write('%s\n' % bcp1)
    os.system(bcp1)

    # update mgi_synonym_seq auto-sequence
    db.sql(''' select setval('mgi_synonym_seq', (select max(_synonym_key) from MGI_Synonym)) ''', None)
    db.commit()
Example #33
0
def bcpFiles():

    for r in execSQL:
        diagFile.write(r + '\n')

    if DEBUG or not bcpon:
        return

    notesFile.close()

    db.commit()

    # execute the sql deletions
    for r in execSQL:
        db.sql(r, None)
    db.commit()

    bcp1 = bcpCommand % (notesTable, notesFileName)

    for bcpCmd in [bcp1]:
        diagFile.write('%s\n' % bcpCmd)
        os.system(bcpCmd)

    db.commit()

    return
Example #34
0
def bcpFiles(
   recordsProcessed	# number of records processed (integer)
   ):

    global referenceKey

    if DEBUG or not bcpon:
        return

    outImageFile.close()
    outPaneFile.close()
    outAccFile.close()
    outCopyrightFile.close()
    outCaptionFile.close()

    db.commit()

    bcp1 = bcpCommand % (imageTable, iFileName)
    bcp2 = bcpCommand % (paneTable, pFileName)
    bcp3 = bcpCommand % (accTable, aFileName)

    for bcpCmd in [bcp1, bcp2, bcp3]:
	diagFile.write('%s\n' % bcpCmd)
	os.system(bcpCmd)

    # update the max Accession ID value
    db.sql('''select * from ACC_setMax (%d)''' % (recordsProcessed), None)
    db.commit()

    return
Example #35
0
def updateMarkerType ():
    for mgiID in markersToUpdateDict:
	typeTerm = markersToUpdateDict[mgiID]
	mrkTypeKey = mkrTypeToKeyDict[typeTerm]
	results = db.sql(MARKER_KEY % mgiID, 'auto')
	mrkKey = results[0]['_Marker_key']
	db.sql(UPDATE % (mrkTypeKey, updatedByKey, mrkKey), None)
    db.commit()
Example #36
0
def updateMarkerType():
    for mgiID in markersToUpdateDict:
        typeTerm = markersToUpdateDict[mgiID]
        mrkTypeKey = mkrTypeToKeyDict[typeTerm]
        results = db.sql(MARKER_KEY % mgiID, 'auto')
        mrkKey = results[0]['_Marker_key']
        db.sql(UPDATE % (mrkTypeKey, updatedByKey, mrkKey), None)
    db.commit()
def updateAll():
    """
    Update all the annotation extension display notes
    """
    
    # drop existing notes
    cmd = '''
    delete from mgi_note
    where _notetype_key = %d
    ''' % DISPLAY_NOTE_TYPE_KEY
    db.sql(cmd, None)
    
    
    # get _note_key to use for inserts
    startingNoteKey = _queryMaxNoteKey() + 1
    
    
    # begin batch processing
    batchSize = 10000
    offset = 0
    properties = _queryAnnotExtensions(limit=batchSize, offset=offset)
    providerLinkMap = _queryProviderLinkMap()
    
    noteFile = open(NOTE_BCP_FILE, 'w')
    chunkFile = open(NOTECHUNK_BCP_FILE, 'w')
    
    try:
        while properties:
            
            # setup the lookups for IDs to display values
            _createTempIDTable(properties)
            
            termIDMap = _queryTermIDMap()
            
            markerIDMap = _queryMarkerIDMap()
            
            
            # transform the properties to their display/links
            properties = transformProperties(properties, termIDMap, markerIDMap, providerLinkMap)
            
            
            # write BCP files
            _writeToBCPFile(properties, noteFile, chunkFile, startingNoteKey)
            
            
            # fetch new batch of properties
            startingNoteKey += batchSize
            offset += batchSize
            properties = _queryAnnotExtensions(limit=batchSize, offset=offset)
    
    finally:
        noteFile.close()
        chunkFile.close()
    
    
    # insert the new data    
    db.bcp(NOTE_BCP_FILE, 'MGI_Note')
    db.bcp(NOTECHUNK_BCP_FILE, 'MGI_NoteChunk')
Example #38
0
def writeAccBCP():
    '''
        # requires:
        #
        # effects:
        #	Creates approrpriate BCP records
        #
        # returns:
        #	nothing
        #
        '''

    global accKey, userKey

    # records that require a reference

    results = db.sql('select _Object_key, _LogicalDB_key, accID, private, geneID ' + \
            'from WRK_EntrezGene_Bucket0 ' + \
            'where taxID = %s and refRequired = 1 ' % (taxId), 'auto')

    for r in results:

        if r['_Object_key'] == -1:
            objectKey = geneIDtoMarkerKey[r['geneID']]
        else:
            objectKey = r['_Object_key']

        prefixPart, numericPart = accessionlib.split_accnum(r['accID'])
        accFile.write(
            '%d|%s|%s|%s|%d|%d|%s|%d|1|%s|%s|%s|%s\n' %
            (accKey, r['accID'], mgi_utils.prvalue(prefixPart),
             mgi_utils.prvalue(numericPart), r['_LogicalDB_key'], objectKey,
             mgiTypeKey, r['private'], userKey, userKey, loaddate, loaddate))
        accrefFile.write(
            '%d|%s|%s|%s|%s|%s\n' %
            (accKey, referenceKey, userKey, userKey, loaddate, loaddate))
        accKey = accKey + 1

    # records that don't require a reference

    results = db.sql('select _Object_key, _LogicalDB_key, accID, private, geneID ' + \
            'from WRK_EntrezGene_Bucket0 ' + \
            'where taxID = %s and refRequired = 0' % (taxId), 'auto')

    for r in results:

        if r['_Object_key'] == -1:
            objectKey = geneIDtoMarkerKey[r['geneID']]
        else:
            objectKey = r['_Object_key']

        prefixPart, numericPart = accessionlib.split_accnum(r['accID'])
        accFile.write(
            '%d|%s|%s|%s|%d|%d|%s|%d|1|%s|%s|%s|%s\n' %
            (accKey, r['accID'], mgi_utils.prvalue(prefixPart),
             mgi_utils.prvalue(numericPart), r['_LogicalDB_key'], objectKey,
             mgiTypeKey, r['private'], userKey, userKey, loaddate, loaddate))
        accKey = accKey + 1
Example #39
0
def init():
    # Purpose: Initialization of  database connection and file descriptors,
    #       and next available database keys
    # Returns: 1 if file descriptors cannot be initialized
    # Assumes: Nothing
    # Effects: opens a database connection
    # Throws: Nothing

    global fpInFile, fpClusterBCP, fpMemberBCP, fpAccessionBCP 
    global nextClusterKey, nextMemberKey, nextAccessionKey

    # create file descriptors for input/output files
    try:
	fpInFile = open(inFile, 'r')
    except:
	exit(1, 'Could not open file %s\n' % inFile)

    try:
	fpClusterBCP = open(clusterBCP, 'w')
    except:
	exit(1, 'Could not open file %s\n' % clusterBCP)

    try:
	fpMemberBCP = open(memberBCP, 'w')
    except:
	exit(1, 'Could not open file %s\n' % memberBCP)

    try:
	fpAccessionBCP = open(accessionBCP, 'w')
    except:
	exit(1, 'Could not open file %s\n' % accessionBCP)

    # get next ACC_Accession, MRK_Cluster and MRK_ClusterMember key
    user = os.environ['MGD_DBUSER']
    passwordFileName = os.environ['MGD_DBPASSWORDFILE']
    db.useOneConnection(1)
    db.set_sqlUser(user)
    db.set_sqlPasswordFromFile(passwordFileName)

    results = db.sql('''select max(_Cluster_key) + 1 as nextKey
	    from MRK_Cluster''', 'auto')
    if results[0]['nextKey'] is None:
	nextClusterKey = 1000
    else:
	nextClusterKey = results[0]['nextKey']

    results = db.sql('''select max(_ClusterMember_key) + 1 as nextKey
	    from MRK_ClusterMember''', 'auto')
    if results[0]['nextKey'] is None:
	nextMemberKey = 1000
    else:
	nextMemberKey = results[0]['nextKey']

    results = db.sql('''select max(_Accession_key) + 1 as nextKey
	    from ACC_Accession''', 'auto')
    nextAccessionKey = results[0]['nextKey']

    return
Example #40
0
def setPrimaryKeys():

    global refKey, aliasKey

    results = db.sql('select maxKey = max(_Reference_key) + 1 from PRB_Reference', 'auto')
    refKey = results[0]['maxKey']

    results = db.sql('select maxKey = max(_Alias_key) + 1 from PRB_Alias', 'auto')
    aliasKey = results[0]['maxKey']
Example #41
0
def processByAllele(objectKey):
    # Purpose: processes data for a specific Allele

    global deleteSQL

    db.sql(querySQL1 + " and aa._Allele_key = " + objectKey, None)
    db.sql(querySQL2 + " and aa._Allele_key = " + objectKey, None)
    deleteSQL = deleteSQLAllele % (objectKey)
    process('sql')
Example #42
0
def bcpFiles():
    # Purpose: BCPs the data into the database
    # Returns: 1 if error,  else 0
    # Assumes: connection to the database
    # Effects: copies data into the db
    # Throws: Nothing

    if DEBUG == 'true':
        return 0

    closeFiles()

    bcpI = '%s %s %s' % (BCP_COMMAND, db.get_sqlServer(), db.get_sqlDatabase())
    bcpII = '"|" "\\n" mgd'

    bcp1 = '%s %s "/" %s %s' % (bcpI, alleleTable, alleleFileName, bcpII)
    bcp2 = '%s %s "/" %s %s' % (bcpI, mutationTable, mutationFileName, bcpII)
    bcp3 = '%s %s "/" %s %s' % (bcpI, refTable, refFileName, bcpII)
    bcp4 = '%s %s "/" %s %s' % (bcpI, accTable, accFileName, bcpII)
    bcp5 = '%s %s "/" %s %s' % (bcpI, noteTable, noteFileName, bcpII)
    bcp6 = '%s %s "/" %s %s' % (bcpI, annotTable, annotFileName, bcpII)

    db.commit()

    for bcpCmd in [bcp1, bcp2, bcp3, bcp4, bcp5, bcp6]:
        fpDiagFile.write('%s\n' % bcpCmd)
        os.system(bcpCmd)

    # update all_allele_mutation_seq auto-sequence
    db.sql(
        ''' select setval('all_allele_mutation_seq', (select max(_Assoc_key) from ALL_Allele_Mutation)) ''',
        None)

    # update all_allele_seq auto-sequence
    db.sql(
        ''' select setval('all_allele_seq', (select max(_Allele_key) from ALL_Allele)) ''',
        None)

    # update mgi_reference_assoc auto-sequence
    db.sql(
        ''' select setval('mgi_reference_assoc_seq', (select max(_Assoc_key) + 1 from MGI_Reference_Assoc)) ''',
        None)

    # update mgi_note_seq auto-sequence
    db.sql(
        ''' select setval('mgi_note_seq', (select max(_Note_key) from MGI_Note)) ''',
        None)

    # update voc_annot_seq auto-sequence
    db.sql(
        ''' select setval('voc_annot_seq', (select max(_Annot_key) from VOC_Annot)) ''',
        None)

    db.commit()

    return 0
Example #43
0
def createExcluded():

    excludeNote = 'The source of the material used to create this cDNA probe was different than that used to create the GenBank sequence record.'

    print 'excluded begin...%s' % (mgi_utils.date())
    db.sql('''select _Probe_key INTO TEMPORARY TABLE excluded from PRB_Notes 
	where note like 'The source of the material used to create this cDNA probe was different%'
	''', None)
    db.sql('create index idx1 on excluded(_Probe_key)', None)
    print 'excluded end...%s' % (mgi_utils.date())
Example #44
0
def processByAssay(objectKey):
    # Purpose: processes data for a specific Allele

    global deleteSQL

    db.sql(querySQL1 + " and e._Assay_key = " + objectKey, None)
    db.sql(querySQL2 + " and 0 = 1", None)
    deleteSQL = deleteSQLAssay % (objectKey)
    isQuerySQL2 = 0
    process('sql')
Example #45
0
def processFile():
	'''
	# requires:
	#
	# effects:
	#	Reads input file
	#	Writes output file
	#
	# returns:
	#	nothing
	#
	'''

	# For each line in the input file

	for line in inputFile.readlines():

		if line[0] == '!':
			continue

		tokens = string.split(line[:-1], delim)

		try:
			if parseType == 'Library':
			    badName = tokens[0]
			    goodName = tokens[2]
			else:
			    badName = tokens[1]
			    goodName = tokens[2]

		except:
			errorFile.write('Invalid line: %s\n' % (line))
			continue

		if parseType == 'Tissues':
		    results = db.sql('select _Tissue_key from PRB_Tissue where tissue = "%s"' % (goodName), 'auto')
		elif parseType == 'Cell':
		    results = db.sql('select term from VOC_Term where term = "%s"' % (goodName), 'auto')
		elif parseType == 'Library':
		    results = db.sql('select _Source_key from PRB_Source where name = "%s"' % (goodName), 'auto')
		elif parseType == 'Strains':
		    results = db.sql('select a.accID from PRB_Strain_Acc_View a, PRB_Strain s ' + \
			'where s.strain = "%s" ' % (goodName) + \
			'and s._Strain_key *= a._Object_key ' + \
			'and a._LogicalDB_key = 1 ' + \
			'and a.prefixPart = "MGI:" ' + \
			'and a.preferred = 1', 'auto')

		if len(results) > 0 and badName != goodName:
			if parseType == 'strain':
			  outputFile.write(mgi_utils.prvalue(results[0]['accID']) + delim + goodName + delim + badName + delim + createdBy + '\n')
			else:
			  outputFile.write(delim + goodName + delim + badName + delim + createdBy + '\n')
		elif len(results) == 0:
			errorFile.write('Invalid good name: %s\n' % (goodName))
Example #46
0
def setPrimaryKeys():

    global refKey, aliasKey

    results = db.sql(
        'select max(_Reference_key) + 1 as maxKey from PRB_Reference', 'auto')
    refKey = results[0]['maxKey']

    results = db.sql('select max(_Alias_key) + 1 as maxKey from PRB_Alias',
                     'auto')
    aliasKey = results[0]['maxKey']
Example #47
0
def verifyGenotype():

    global testPassed

    query = '''
        select ap._Marker_key, ap._Allele_key_1, ap._Allele_key_2, 
	       ap._MutantCellLine_key_1, ap._MutantCellLine_key_2, ap._PairState_key
	into #allelepair
        from GXD_Genotype g, GXD_AllelePair ap, MGI_User u
        where g._Genotype_key = ap._Genotype_key
        and g._CreatedBy_key = u._User_key
	and u.login = '******'
        group by _Marker_key, _Allele_key_1, _Allele_key_2, _MutantCellLine_key_1, _MutantCellLine_key_2, _PairState_key
        having count(*) > 1
	''' % (createdby)

    #print query
    db.sql(query, 'None')

    query2 = '''
	select a.symbol as alleleSymbol, ma.accID as markerID, aa.accID as alleleID,
		mcl.cellLine as mutantID, t.term as alleleState
	from #allelepair ap, ALL_Allele a, ACC_Accession ma, ACC_Accession aa, ALL_CellLine mcl, VOC_Term t
	where ap._Allele_key_1 = a._Allele_key
     	and ap._Marker_key = ma._Object_key
     	and ma._MGIType_key = 2
     	and ma._LogicalDB_key = 1
     	and ap._Allele_key_1 = aa._Object_key
     	and aa._MGIType_key = 11
     	and aa._LogicalDB_key = 1
     	and ap._MutantCellLine_key_1 = mcl._CellLine_key
	and ap._PairState_key = t._Term_key
	'''

    #print query2
    results = db.sql(query2, 'auto')
    if len(results) == 0:
        testPassed = 'pass'

        fpLogTest.write(testDisplay % \
	    (testPassed, 'duplicate genotypes', '', '', '', '', '', '', '', '', '', ''))
    else:
	for r in results:
	    alleleSymbol = r['alleleSymbol']
	    markerID = r['markerID']
	    alleleID = r['alleleID']
	    mutantID = r['mutantID']
	    alleleStatus = r['alleleState']
            fpLogTest.write(testDisplay % \
	        (testPassed, 'duplicate genotypes', '', '', \
                 alleleSymbol, markerID, alleleID, mutantID, \
                 alleleState, '', '', '', ''))

    return 0
Example #48
0
def bcpFiles():
    '''
        # requires:
        #
        # effects:
        #	BCPs the data into the database
        #
        # returns:
        #	nothing
        #
        '''

    exptFile.close()
    exptMarkerFile.close()
    accFile.close()
    noteFile.close()
    db.commit()

    bcpCommand = os.environ['PG_DBUTILS'] + '/bin/bcpin.csh'
    currentDir = os.getcwd()

    cmd1 = '%s %s %s %s %s %s "%s" "\\n" mgd' % \
       (bcpCommand, db.get_sqlServer(), db.get_sqlDatabase(), 'MLD_Expts', currentDir, exptFileName, bcpdelim)

    cmd2 = '%s %s %s %s %s %s "%s" "\\n" mgd' % \
       (bcpCommand, db.get_sqlServer(), db.get_sqlDatabase(), 'MLD_Expt_Marker', currentDir, exptMarkerFileName, bcpdelim)

    cmd3 = '%s %s %s %s %s %s "%s" "\\n" mgd' % \
       (bcpCommand, db.get_sqlServer(), db.get_sqlDatabase(), 'ACC_Accession', currentDir, accFileName, bcpdelim)

    cmd4 = '%s %s %s %s %s %s "%s" "\\n" mgd' % \
       (bcpCommand, db.get_sqlServer(), db.get_sqlDatabase(), 'MLD_Notes', currentDir, noteFileName, bcpdelim)

    diagFile.write('%s\n' % cmd1)
    diagFile.write('%s\n' % cmd2)
    diagFile.write('%s\n' % cmd3)
    diagFile.write('%s\n' % cmd4)

    os.system(cmd1)
    os.system(cmd2)
    os.system(cmd3)
    os.system(cmd4)

    # update mld_expts_seq auto-sequence
    db.sql(
        ''' select setval('mld_expts_seq', (select max(_Expt_key) from MLD_Expts)) ''',
        None)
    db.commit()

    # update mld_expt_marker_seq auto-sequence
    db.sql(
        ''' select setval('mld_expt_marker_seq', (select max(_Assoc_key) from MLD_Expt_Marker)) ''',
        None)
    db.commit()
Example #49
0
def deleteByUser():
    # Purpose: delete records created by current load
    # Returns: nothing
    # Assumes: a connection has been made to the database
    # Effects: deletes records from a database
    # Throws: nothing

    print '%s' % mgi_utils.date()
    print 'Deleting records for this user'
    db.sql('''delete from %s where _CreatedBy_key = %s''' % (table, CREATEDBY_KEY), None)
    db.commit()
Example #50
0
def init ():
    global markerToUniprotLookup
    global markerLookup 
    openFiles()
   
    # load lookups 
    # lookup of existing uniprot load associations
    results = db.sql('''select a1.accid as uniprotID, a1._LogicalDB_key, 
	m.symbol, a2.accid as mgiID
    from ACC_Accession a1, MRK_Marker m, ACC_Accession a2
    where a1. _MGIType_key = 2
    and a1._LogicalDB_key in (13, 41)
    and a1._CreatedBy_key = 1442 /*uniprotload_assocload*/
    and a1._Object_key = m._Marker_key
    and m._Organism_key = 1
    and m._Marker_Status_key = 1
    and m._Marker_key = a2._Object_key
    and a2. _MGIType_key = 2
    and a2._LogicalDB_key = 1
    and a2.preferred = 1
    and a2.prefixPart = 'MGI:' ''', 'auto')
 
    for r in results:
	a = Association()
	uniprotID = string.lower(r['uniprotID'])
	a.uniprotID = uniprotID
	mgiID = string.lower(r['mgiID'])
	a.mgiID = mgiID
	a.logicalDbKey = r['_LogicalDB_key']  # swiss-prot or trembl

	if not markerToUniprotLookup.has_key(mgiID):
	    markerToUniprotLookup[mgiID] = []
	markerToUniprotLookup[mgiID].append(a)
    
    # load lookup of all marker MGI IDs
    results = db.sql('''select m.symbol, m._Organism_key, 
	m._Marker_Status_key, a.accid as mgiID, a.preferred
    from ACC_Accession a, MRK_Marker m
    where a. _MGIType_key = 2
    and a._LogicalDB_key = 1
    and a.prefixPart = 'MGI:'
    and a._Object_key = m._Marker_key
    ''', 'auto')
    for r in results:
	m = Marker()
	m.markerID = string.lower(r['mgiID'])
	m.organism = r['_Organism_Key']
	m.markerStatus = r['_Marker_Status_key']
	m.markerPreferred = r['preferred']

	markerLookup[m.markerID] = m

    return
Example #51
0
def processCommandLine():
    if len(sys.argv) != 3:
        bailout('Incorrect command-line; need two parameters.')

    db.set_sqlServer(sys.argv[1])
    db.set_sqlDatabase(sys.argv[2])
    db.useOneConnection(1)

    try:
        db.sql('select count(1) from MGI_dbInfo', 'auto')
    except:
        bailout('Cannot query database %s..%s' % (sys.argv[1], sys.argv[2]))
    return
Example #52
0
 def runSQL(self, sql, label):
     """
     Run an SQL stmt and return results
     sql is list of SQLstmts or a single stmt (string)
     """
     startTime = time.time()
     verbose(label + '...')
     if type(sql) == type(''):
         results = db.sql(sql.split(self.SQLSEPARATOR), 'auto')
     else:
         results = db.sql(sql, 'auto')
     verbose("SQL time: %8.3f seconds\n" % (time.time() - startTime))
     return results
Example #53
0
def deleteByUser():
    # Purpose: delete records created by current load
    # Returns: nothing
    # Assumes: a connection has been made to the database
    # Effects: deletes records from a database
    # Throws: nothing

    print('%s' % mgi_utils.date())
    print('Deleting records for this user')
    db.sql(
        '''delete from %s where _CreatedBy_key = %s''' %
        (table, CREATEDBY_KEY), None)
    db.commit()
Example #54
0
def processSusceptibility():

    # do formatted file
    doFileName = None
    # do file pointer
    doFile = None

    # insert statement
    INSERT_ACCESSION = '''insert into ACC_Accession 
      values ((select max(_Accession_key) + 1 from ACC_Accession), 
	   '%s', '%s', %s, 15, %s, 13, 0, 0)
    '''

    doFileName = os.environ['OBO_FILE']
    doFile = open(doFileName, 'r')

    omimIdValue = 'id: OMIM:'
    relValue = 'relationship: RO:0003304'
    skipValue = 'OMIM:000000'
    foundOMIM = 0

    for line in doFile.readlines():

        # find [Term]
        # find relationship: RO:0003304

        if line == '[Term]':
            foundOMIM = 0

        elif line[:9] == omimIdValue:
            omimId = line[4:-1]
	    if omimId == skipValue:
	        continue
	    foundOMIM = 1

        elif foundOMIM and line[:24] == relValue:

            tokens = line[25:-1].split(' ')
	    doId = tokens[0]

            prefixPart, numericPart = accessionlib.split_accnum(omimId)
            objectKey = loadlib.verifyObject(doId, 13, None, None, None)
            addSQL = INSERT_ACCESSION % (omimId, prefixPart, numericPart, objectKey)
            db.sql(addSQL, None)
    
        else:
            continue
    
    doFile.close()
    db.commit()
    return 0
Example #55
0
def writeAccBCP():
	'''
	# requires:
	#
	# effects:
	#	Creates approrpriate BCP records
	#
	# returns:
	#	nothing
	#
	'''

	global accKey, userKey

	# records that require a reference

	results = db.sql('select _Object_key, _LogicalDB_key, accID, private, geneID ' + \
		'from WRK_EntrezGene_Bucket0 ' + \
		'where taxID = %s and refRequired = 1 ' % (taxId), 'auto')

	for r in results:

		if r['_Object_key'] == -1:
		    objectKey = geneIDtoMarkerKey[r['geneID']]
                else:
		    objectKey = r['_Object_key']

		prefixPart, numericPart = accessionlib.split_accnum(r['accID'])
		accFile.write('%d|%s|%s|%s|%d|%d|%s|%d|1|%s|%s|%s|%s\n'
			% (accKey, r['accID'], mgi_utils.prvalue(prefixPart), mgi_utils.prvalue(numericPart), r['_LogicalDB_key'], objectKey, mgiTypeKey, r['private'], userKey, userKey, loaddate, loaddate))
		accrefFile.write('%d|%s|%s|%s|%s|%s\n' % (accKey, referenceKey, userKey, userKey, loaddate, loaddate))
		accKey = accKey + 1

	# records that don't require a reference

	results = db.sql('select _Object_key, _LogicalDB_key, accID, private, geneID ' + \
		'from WRK_EntrezGene_Bucket0 ' + \
		'where taxID = %s and refRequired = 0' % (taxId), 'auto')

	for r in results:

		if r['_Object_key'] == -1:
		    objectKey = geneIDtoMarkerKey[r['geneID']]
                else:
		    objectKey = r['_Object_key']

		prefixPart, numericPart = accessionlib.split_accnum(r['accID'])
		accFile.write('%d|%s|%s|%s|%d|%d|%s|%d|1|%s|%s|%s|%s\n'
			% (accKey, r['accID'], mgi_utils.prvalue(prefixPart), mgi_utils.prvalue(numericPart), r['_LogicalDB_key'], objectKey, mgiTypeKey, r['private'], userKey, userKey, loaddate, loaddate))
		accKey = accKey + 1
Example #56
0
def setPrimaryKeys():

    global alleleKey, refAssocKey, accKey, noteKey, mgiKey, mutationKey, mutantKey, annotKey

    results = db.sql(''' select nextval('all_allele_seq') as maxKey ''',
                     'auto')
    alleleKey = results[0]['maxKey']

    results = db.sql(
        ''' select nextval('mgi_reference_assoc_seq') as maxKey ''', 'auto')
    refAssocKey = results[0]['maxKey']

    results = db.sql(
        'select max(_Accession_key) + 1 as maxKey from ACC_Accession', 'auto')
    accKey = results[0]['maxKey']

    results = db.sql(''' select nextval('mgi_note_seq') as maxKey ''', 'auto')
    noteKey = results[0]['maxKey']

    results = db.sql(
        ''' select max(maxNumericPart) + 1 as maxKey from ACC_AccessionMax where prefixPart = '%s' '''
        % (mgiPrefix), 'auto')
    mgiKey = results[0]['maxKey']

    results = db.sql(
        ''' select nextval('all_allele_mutation_seq') as maxKey ''', 'auto')
    mutationKey = results[0]['maxKey']

    results = db.sql(
        ''' select nextval('all_allele_cellline_seq') as maxKey ''', 'auto')
    mutantKey = results[0]['maxKey']

    results = db.sql(''' select nextval('voc_annot_seq') as maxKey ''', 'auto')
    annotKey = results[0]['maxKey']
Example #57
0
def init():
    '''
        # requires: 
        #
        # effects: 
        # 1. Processes command line options
        # 2. Initializes local DBMS parameters
        # 3. Initializes global file descriptors
        #
        # returns:
        #
        '''

    global accFile, accrefFile, markerFile, diagFile
    global accKey, userKey, markerKey

    try:
        diagFile = open(diagFileName, 'w')
    except:
        exit(1, 'Could not open file %s\n' % diagFileName)

    try:
        accFile = open(accFileName, 'w')
    except:
        exit(1, 'Could not open file %s\n' % accFileName)

    try:
        accrefFile = open(accrefFileName, 'w')
    except:
        exit(1, 'Could not open file %s\n' % accrefFileName)

    try:
        markerFile = open(markerFileName, 'w')
    except:
        exit(1, 'Could not open file %s\n' % markerFileName)

    #
    # Get next available primary key
    #

    results = db.sql(
        'select max(_Accession_key) + 1 as maxKey from ACC_Accession', 'auto')
    accKey = results[0]['maxKey']

    results = db.sql(''' select nextval('mrk_marker_seq') as maxKey ''',
                     'auto')
    markerKey = results[0]['maxKey']

    userKey = loadlib.verifyUser(user, 0, None)
Example #58
0
def enroll():
    global username
    global user_directory

    if request.method == 'POST':
        data = request.get_json()

        username = data['username']
        email = data['email']
        password = data['password']
        bank = data['bankName']

        # 해당하는 User에서 username 기준으로 비밀번호 암호화
        cipher = AES.new(
            os.getenv("PASSWORD_ECD").encode('utf-8'),
            AES.MODE_ECB)  # never use ECB in strong systems obviously
        encoded_password = base64.b64encode(
            cipher.encrypt(password.encode('utf-8').rjust(32)))

        # 서버에서 파일을 저장하는 경로를 암호화
        encoded_path = base64.b64encode(
            cipher.encrypt(username.encode('utf-8').rjust(32))).decode('utf-8')
        user_path = removeSpecialChars(encoded_path)

        user_directory = "Users/" + user_path + "/"

        if not os.path.exists(user_directory):
            try:
                os.makedirs(user_directory)
                db.sql(
                    "INSERT INTO users (user_id, password, email, pathvoice, isvoice, bank) VALUES (%s, %s, %s, %s, %s, %s)",
                    (username, encoded_password, email, user_path, 1, bank))
                print("[ * ] Directory ", username, " Created ...")
                return "created user"
                pass
            except ValueError as error:
                print(error)
                return "fail create user"
        else:
            # print("[ * ] Directory ", username,  " already exists ...")
            # print("[ * ] Overwriting existing directory ...")
            # shutil.rmtree(user_directory, ignore_errors=False, onerror=None)
            # os.makedirs(user_directory)
            # print("[ * ] Directory ", username,  " Created ...")
            return "user already exists"

    else:
        return "fail"
Example #59
0
def createBCPFile():
    global accKey

    print 'Create the bcp file for the GENSAT associations'

    #
    # Find the marker key that the EntrezGene ID should be associated with.
    # Do not make an association for any EntrezGene IDs that are on the
    # discrepancy report.
    #
    cmds = []
    cmds.append('select t.entrezgeneID, a._Object_key as markerKey ' + \
                'from ' + tempTable + ' t, ACC_Accession a ' + \
                'where lower(t.entrezgeneID) = lower(a.accID) and ' + \
                      'a._MGIType_key = ' + str(markerMGITypeKey) + ' and ' + \
                      'a._LogicalDB_key = ' + str(egLogicalDBKey) + ' ' + \
                'order by t.entrezgeneID')

    results = db.sql(cmds, 'auto')

    count = 0

    #
    # Write the records to the bcp file.
    #
    for r in results[0]:
        entrezgeneID = r['entrezgeneID']
        markerKey = r['markerKey']

        #
        # Skip the EntrezGene ID if it was written to the discrepancy report.
        #
        if badIDs.has_key(entrezgeneID):
            continue

        #
        # Get the prefix and numeric parts of the EntrezGene ID and write
        # a record to the bcp file.
        #
        (prefixPart, numericPart) = accessionlib.split_accnum(entrezgeneID)

        fpAccBCPFile.write(str(accKey) + TAB + \
                           entrezgeneID + TAB + \
                           prefixPart + TAB + \
                           str(numericPart) + TAB + \
                           str(gensatLogicalDBKey) + TAB + \
                           str(markerKey) + TAB + \
                           str(markerMGITypeKey) + TAB + \
                           PRIVATE + TAB + PREFERRED + TAB + \
                           str(createdByKey) + TAB + \
                           str(createdByKey) + TAB + \
                           loadDate + TAB + \
                           loadDate + NL)

        count = count + 1
        accKey = accKey + 1

    print 'Number of GENSAT associations: ' + str(count)

    return
Example #60
0
def doCount(
        label,
        q  # list of sql stmts. last one being 'select count as num'
):
    results = db.sql(q, 'auto')
    num = results[-1][0]['num']
    sys.stdout.write("%7d\t%s\n" % (num, label))