def init(): ''' # requires: # # effects: # 1. Processes command line options # 2. Initializes local DBMS parameters # 3. Initializes global file descriptors # # returns: # ''' global synFile, diagFile global synKey, userKey try: diagFile = open(diagFileName, 'w') except: exit(1, 'Could not open file %s\n' % diagFileName) try: synFile = open(synFileName, 'w') except: exit(1, 'Could not open file %s\n' % synFileName) # # Get next available primary key # results = db.sql(''' select nextval('mgi_synonym_seq') as maxKey ''', 'auto') synKey = results[0]['maxKey'] userKey = loadlib.verifyUser(user, 0, None)
def init(): ''' # requires: # # effects: # 1. Processes command line options # 2. Initializes local DBMS parameters # 3. Initializes global file descriptors # # returns: # ''' global accFile, accrefFile, markerFile, diagFile global accKey, userKey, markerKey try: diagFile = open(diagFileName, 'w') except: exit(1, 'Could not open file %s\n' % diagFileName) try: accFile = open(accFileName, 'w') except: exit(1, 'Could not open file %s\n' % accFileName) try: accrefFile = open(accrefFileName, 'w') except: exit(1, 'Could not open file %s\n' % accrefFileName) try: markerFile = open(markerFileName, 'w') except: exit(1, 'Could not open file %s\n' % markerFileName) # # Get next available primary key # results = db.sql( 'select max(_Accession_key) + 1 as maxKey from ACC_Accession', 'auto') accKey = results[0]['maxKey'] results = db.sql(''' select nextval('mrk_marker_seq') as maxKey ''', 'auto') markerKey = results[0]['maxKey'] userKey = loadlib.verifyUser(user, 0, None)
def processFile(): global execSQL lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: probeID = tokens[0] notes = tokens[1] createdBy = tokens[2] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) probeKey = loadlib.verifyProbe(probeID, lineNum, errorFile) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if probeKey == 0: errorFile.write('Invalid Probe: %s\n' % (probeID)) error = 1 if createdByKey == 0: errorFile.write('Invalid Creator: %s\n\n' % (createdBy)) error = 1 # if errors, continue to next record if error: continue # if no errors, process # Notes # automatically deletes any existing notes for this probe if mode in ('preview', 'load'): execSQL.append(deleteSQL % (probeKey)) if len(notes) > 0: notesFile.write('%s\t%s\t%s\t%s\n' % (probeKey, notes, loaddate, loaddate))
def init(): ''' # requires: # # effects: # 1. Processes command line options # 2. Initializes local DBMS parameters # 3. Initializes global file descriptors # # returns: # ''' global accFile, accrefFile, markerFile, diagFile global accKey, userKey, markerKey try: diagFile = open(diagFileName, 'w') except: exit(1, 'Could not open file %s\n' % diagFileName) try: accFile = open(accFileName, 'w') except: exit(1, 'Could not open file %s\n' % accFileName) try: accrefFile = open(accrefFileName, 'w') except: exit(1, 'Could not open file %s\n' % accrefFileName) try: markerFile = open(markerFileName, 'w') except: exit(1, 'Could not open file %s\n' % markerFileName) # # Get next available primary key # results = db.sql('select max(_Accession_key) + 1 as maxKey from ACC_Accession', 'auto') accKey = results[0]['maxKey'] results = db.sql(''' select nextval('mrk_marker_seq') as maxKey ''', 'auto') markerKey = results[0]['maxKey'] userKey = loadlib.verifyUser(user, 0, None)
def setPrimaryKeys(): """ Assign global primary key variables using max keys from database """ global seqKey, assocKey, accKey, userKey results = db.sql("select max(_Sequence_key) + 1 as maxKey from %s" % (seqTable), "auto") seqKey = results[0]["maxKey"] results = db.sql("select max(_Assoc_key) + 1 as maxKey from %s" % (sourceTable), "auto") assocKey = results[0]["maxKey"] results = db.sql("select max(_Accession_key) + 1 as maxKey from %s" % (accTable), "auto") accKey = results[0]["maxKey"] userKey = loadlib.verifyUser(os.environ['MGD_DBUSER'], 1, None)
def init (): global createdByKey, refKey, accKey db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFile) # # Get the created by key for the user. # createdByKey = loadlib.verifyUser(createdBy, 0, None) # # Get the reference key for the J-Number. # refKey = loadlib.verifyReference(jNumber, 0, None) # # Get the next available accession key. # results = db.sql('select max(_Accession_key) + 1 as maxKey from ACC_Accession', 'auto') accKey = results[0]['maxKey'] return
def processFile(): ''' # requires: # # effects: # Reads input file # Verifies and Processes each line in the input file # # returns: # nothing # ''' global referenceKey global exptDict, seqExptDict lineNum = 0 note = '' # For each line in the input file inputFile = open(inputFileName, 'r') for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '|') try: markerID = tokens[0] chromosome = tokens[1] updateChr = tokens[2] band = tokens[3] assay = tokens[4] description = tokens[5] jnum = tokens[6] createdBy = tokens[7] except: # if it's not a valid line, assume it's the note note = line continue # exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) markerKey, markerSymbol = verifyMarker(markerID, lineNum) assayKey = verifyAssay(assay) referenceKey = loadlib.verifyReference(jnum, 0, errorFile) createdByKey = loadlib.verifyUser(createdBy, 0, errorFile) error = not verifyChromosome(chromosome, lineNum) if markerKey == 0 or \ assayKey == 0 or \ referenceKey == 0 or \ createdByKey == 0: # set error flag to true error = 1 # if errors, continue to next record if error: continue # if no errors, process # run once...needs the reference if lineNum == 1: createExperimentMaster() # determine experiment key for this chromosome # if it doesn't exist, create it if not exptDict.has_key(chromosome): createExperimentBCP(chromosome) if not exptDict.has_key(chromosome): errorFile.write('Cannot Find Experiment Key For Chromosome (%d): %s\n' % (lineNum, chromosome)) chrExptKey = 0 else: chrExptKey = exptDict[chromosome] # if errors, continue to next record if chrExptKey == 0: continue # add marker to experiment marker file bcpWrite(exptMarkerFile, \ [chrExptKey, \ markerKey, \ alleleKey, \ assayKey, \ seqExptDict[chrExptKey], \ markerSymbol, \ description, \ matrixData, \ loaddate, loaddate]) # increment marker sequence number for the experiment seqExptDict[chrExptKey] = seqExptDict[chrExptKey] + 1 # end of "for line in inputFile.readlines():" if len(note) > 0: bcpWrite(noteFile, [referenceKey, note, loaddate, loaddate])
def processFile(): # requires: # # effects: # Reads input file # Verifies and Processes each line in the input file # # returns: # nothing # global strainalleleKey lineNum = 0 notDeleted = 1 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: strainID = tokens[0] alleleID = tokens[1] qualifier = tokens[2] createdBy = tokens[3] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) if len(strainID) == 4: strainID = '00' + strainID if len(strainID) == 3: strainID = '000' + strainID if len(strainID) == 2: strainID = '0000' + strainID if len(strainID) == 1: strainID = '00000' + strainID strainKey = loadlib.verifyObject(strainID, strainTypeKey, None, lineNum, errorFile) # this could generate an error because the ID is a marker, not an allele # just ignore the error in the error file if it gets resolved later alleleKey = loadlib.verifyObject(alleleID, alleleTypeKey, None, lineNum, errorFile) markerKey = 0 if alleleKey == 0: markerKey = loadlib.verifyObject(alleleID, markerTypeKey, None, lineNum, errorFile) qualifierKey = verifyQualifier(qualifier, lineNum) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if notDeleted: db.sql('delete PRB_Strain_Marker where _CreatedBy_key = %s' % (createdByKey), None) notDeleted = 0 # if Allele found, resolve to Marker if alleleKey > 0: results = db.sql('select _Marker_key from ALL_Allele where _Allele_key = %s' % (alleleKey), 'auto') if len(results) > 0: markerKey = results[0]['_Marker_key'] elif markerKey == 0: errorFile.write('Invalid Allele (%s): %s\n' % (lineNum, alleleID)) error = 1 if strainKey == 0 or markerKey == 0 or qualifierKey == 0: # set error flag to true error = 1 # if errors, continue to next record if error: continue # if no errors, process if alleleKey == 0: alleleKey = '' strainFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (strainalleleKey, strainKey, markerKey, alleleKey, qualifierKey, createdByKey, createdByKey, loaddate, loaddate)) strainalleleKey = strainalleleKey + 1 # end of "for line in inputFile.readlines():" # # Update the AccessionMax value # db.sql('select * from ACC_setMax (%d);' % (lineNum), None) db.commit() # update prb_strain_marker_seq auto-sequence db.sql(''' select setval('prb_strain_marker_seq', (select max(_StrainMarker_key) from PRB_Strain_Marker)) ''', None) db.commit()
def init(): # requires: # # effects: # 1. Processes command line options # 2. Initializes local DBMS parameters # 3. Initializes global file descriptors/file names # 4. Initializes global keys # # returns: # global inputFile, diagFile, errorFile, errorFileName, diagFileName global refFileName, refFile global mgiTypeKey global refAssocKey, createdByKey db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) fdate = mgi_utils.date('%m%d%Y') # current date head, tail = os.path.split(inputFileName) diagFileName = tail + '.' + fdate + '.diagnostics' errorFileName = tail + '.' + fdate + '.error' refFileName = tail + '.MGI_Reference_Assoc.bcp' try: inputFile = open(inputFileName, 'r') except: exit(1, 'Could not open file %s\n' % inputFileName) try: diagFile = open(diagFileName, 'w') except: exit(1, 'Could not open file %s\n' % diagFileName) try: errorFile = open(errorFileName, 'w') except: exit(1, 'Could not open file %s\n' % errorFileName) try: refFile = open(refFileName, 'w') except: exit(1, 'Could not open file %s\n' % refFileName) # Log all SQL db.set_sqlLogFunction(db.sqlLogAll) # Set Log File Descriptor db.set_sqlLogFD(diagFile) diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date())) diagFile.write('Server: %s\n' % (db.get_sqlServer())) diagFile.write('Database: %s\n' % (db.get_sqlDatabase())) diagFile.write('Object Type: %s\n' % (mgiType)) diagFile.write('Input File: %s\n' % (inputFileName)) errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date())) mgiTypeKey = loadlib.verifyMGIType(mgiType, 0, errorFile) createdByKey = loadlib.verifyUser(createdBy, 0, errorFile)
def processFile(): global primerKey, refKey, aliasKey, accKey, mgiKey lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: markerSymbol = tokens[0] # not used markerIDs = string.split(tokens[1], '|') name = tokens[2] jnum = tokens[3] regionCovered = tokens[4] sequence1 = tokens[5] sequence2 = tokens[6] productSize = tokens[7] notes = tokens[8] sequenceIDs = tokens[9] aliasList = string.split(tokens[10], '|') createdBy = tokens[11] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) # marker IDs markerList = [] for markerID in markerIDs: markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile) if len(markerID) > 0 and markerKey == 0: errorFile.write('Invalid Marker: %s, %s\n' % (name, markerID)) error = 1 elif len(markerID) > 0: markerList.append(markerKey) referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) # sequence IDs seqAccList = string.split(sequenceIDs, '|') # if errors, continue to next record if error: continue # if no errors, process the primer primerFile.write('%d\t%s\t\t%d\t%d\t%s\t%s\t%s\t%s\t\t\t%s\t%s\t%s\t%s\t%s\n' \ % (primerKey, name, NA, vectorKey, segmentTypeKey, mgi_utils.prvalue(sequence1), \ mgi_utils.prvalue(sequence2), mgi_utils.prvalue(regionCovered), mgi_utils.prvalue(productSize), \ createdByKey, createdByKey, loaddate, loaddate)) for markerKey in markerList: if markerList.count(markerKey) == 1: markerFile.write('%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n' \ % (primerKey, markerKey, referenceKey, relationship, createdByKey, createdByKey, loaddate, loaddate)) else: errorFile.write('Invalid Marker Duplicate: %s, %s\n' % (name, markerID)) # loaddate)) refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' % (refKey, primerKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate)) # aliases for alias in aliasList: if len(alias) == 0: continue aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \ % (aliasKey, refKey, alias, createdByKey, createdByKey, loaddate, loaddate)) aliasKey = aliasKey + 1 # MGI Accession ID for the marker accFile.write('%s\t%s%d\t%s\t%s\t1\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \ % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, primerKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate)) newPrimerFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%d\n' \ % (markerSymbol, string.join(markerIDs, '|'), name, jnum, regionCovered, sequence1, sequence2, productSize, notes, sequenceIDs, createdBy, mgiPrefix, mgiKey)) accKey = accKey + 1 mgiKey = mgiKey + 1 # sequence accession ids for acc in seqAccList: if len(acc) == 0: continue prefixPart, numericPart = accessionlib.split_accnum(acc) accFile.write('%s\t%s\t%s\t%s\t%s\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \ % (accKey, acc, prefixPart, numericPart, logicalDBKey, primerKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate)) accRefFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' \ % (accKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate)) accKey = accKey + 1 # notes if len(notes) > 0: noteFile.write('%s|1\t%s\t%s\t%s\n' \ % (primerKey, notes, loaddate, loaddate)) refKey = refKey + 1 primerKey = primerKey + 1 # end of "for line in inputFile.readlines():" # # Update the AccessionMax value # if not DEBUG: db.sql('select * from ACC_setMax (%d)' % (lineNum), None)
def init(): global diagFile, errorFile, inputFile, errorFileName, diagFileName global outSetFile, outMemberFile global setKey, setMemberKey, createdByKey, mgiTypeKey, useSetKey global DEBUG db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) diagFileName = '%s/setload.diagnostics' % (outputDir) errorFileName = '%s/setload.error' % (outputDir) try: diagFile = open(diagFileName, 'w') except: exit(1, 'Could not open file %s\n' % diagFileName) try: errorFile = open(errorFileName, 'w') except: exit(1, 'Could not open file %s\n' % errorFileName) try: inputFile = open(inputFileName, 'r') except: exit(1, 'Could not open file %s\n' % inputFileName) # Output Files try: fullPathSetFile = '%s/%s' % (outputDir, outSetFileName) outSetFile = open(fullPathSetFile, 'w') except: exit(1, 'Could not open file %s\n' % fullPathSetFile) try: fullPathMemberFile = '%s/%s' % (outputDir, outMemberFileName) outMemberFile = open(fullPathMemberFile, 'w') except: exit(1, 'Could not open file %s\n' % fullPathMemberFile) # Log all SQL db.set_sqlLogFunction(db.sqlLogAll) diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date())) diagFile.write('Server: %s\n' % (db.get_sqlServer())) diagFile.write('Database: %s\n' % (db.get_sqlDatabase())) errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date())) if mode == 'preview': DEBUG = 1 bcpon = 0 elif mode != 'load': exit(1, 'Invalid Processing Mode: %s\n' % (mode)) results = db.sql('select max(_Set_key) + 1 as maxKey from MGI_Set', 'auto') setKey = results[0]['maxKey'] createdByKey = loadlib.verifyUser(createdBy, 0, errorFile) mgiTypeKey = loadlib.verifyMGIType(setType, 0, errorFile) # # use existing MGI_Set, or create a new one # results = db.sql('select _Set_key from MGI_Set where _MGIType_key = %s and name = \'%s\'' % (mgiTypeKey, setName), 'auto') if len(results) > 0: for r in results: setKey = r['_Set_key'] # delete/reload db.sql('delete from MGI_SetMember where _Set_key = %s' % (setKey), None) else: outSetFile.write(str(setKey) + TAB + \ str(mgiTypeKey) + TAB + \ str(setName) + TAB + \ '1' + TAB + \ str(createdByKey) + TAB + str(createdByKey) + TAB + \ loaddate + TAB + loaddate + CRT) results = db.sql('select max(_SetMember_key) + 1 as maxKey from MGI_SetMember', 'auto') setMemberKey = results[0]['maxKey'] return
def init(): global bcpCommand global diagFile, errorFile, inputFile, errorFileName, diagFileName global outImageFile, outPaneFile, outAccFile global outCopyrightFile, outCaptionFile global inImageFile, inPaneFile global createdByKey db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) bcpCommand = bcpCommand + db.get_sqlServer() + ' ' + db.get_sqlDatabase() + ' %s ' + currentDir + ' %s "\\t" "\\n" mgd' diagFileName = currentDir + '/gxdimageload.diagnostics' errorFileName = currentDir + '/gxdimageload.error' try: diagFile = open(diagFileName, 'w') except: exit(1, 'Could not open file %s\n' % diagFileName) try: errorFile = open(errorFileName, 'w') except: exit(1, 'Could not open file %s\n' % errorFileName) # Input Files try: inImageFile = open(inImageFileName, 'r') except: exit(1, 'Could not open file %s\n' % inImageFileName) try: inPaneFile = open(inPaneFileName, 'r') except: exit(1, 'Could not open file %s\n' % inPaneFileName) # Output Files try: outImageFile = open(outImageFileName, 'w') except: exit(1, 'Could not open file %s\n' % outImageFileName) try: outPaneFile = open(outPaneFileName, 'w') except: exit(1, 'Could not open file %s\n' % outPaneFileName) try: outAccFile = open(outAccFileName, 'w') except: exit(1, 'Could not open file %s\n' % outAccFileName) try: outCaptionFile = open(outCaptionFileName, 'w') except: exit(1, 'Could not open file %s\n' % outCaptionFileName) try: outCopyrightFile = open(outCopyrightFileName, 'w') except: exit(1, 'Could not open file %s\n' % outCopyrightFileName) db.setTrace(True) diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date())) diagFile.write('Server: %s\n' % (db.get_sqlServer())) diagFile.write('Database: %s\n' % (db.get_sqlDatabase())) errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date())) createdByKey = loadlib.verifyUser(createdBy, 0, errorFile) return
def processFile(): global refKey, aliasKey global execProbeSQL global execAssaySQL global execRefSQL lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: fromID = tokens[0] name = tokens[1] toID = tokens[2] jnum = tokens[3] createdBy = tokens[4] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) fromKey = loadlib.verifyObject(fromID, mgiTypeKey, None, lineNum, errorFile) toKey = loadlib.verifyObject(toID, mgiTypeKey, None, lineNum, errorFile) referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if fromKey == 0: errorFile.write('Invalid Probe "From": %s\n' % (fromID)) error = 1 if toKey == 0: errorFile.write('Invalid Probe "To": %s\n' % (toID)) error = 1 if referenceKey == 0: errorFile.write('Invalid Reference: %s\n' % (jnum)) error = 1 if createdByKey == 0: errorFile.write('Invalid Creator: %s\n\n' % (createdBy)) error = 1 # check that all genes are the same checkGenesSQL = ''' select f.* from PRB_Marker f, PRB_Marker t, GXD_ProbePrep p, GXD_Assay a where f._Probe_key = %s and t._Probe_key = %s and p._Probe_key = %s and p._ProbePrep_key = a._ProbePrep_key and f._Marker_key = t._Marker_key and f._Marker_key = a._Marker_key ''' % (fromKey, toKey, fromKey) checkGenes = db.sql(checkGenesSQL, 'auto') if len(checkGenes) == 0: errorFile.write( 'Gene of GenePaint, Eurexpress and Assay are not the same: %s, %s\n' % (fromID, toID)) error = 1 # check that the J: is on at least one Assay checkJAssaySQL = ''' select a.* from GXD_ProbePrep p, GXD_Assay a where p._Probe_key = %s and p._ProbePrep_key = a._ProbePrep_key and a._Refs_key = %s ''' % (fromKey, referenceKey) checkJAssay = db.sql(checkJAssaySQL, 'auto') if len(checkJAssay) == 0: errorFile.write( 'J: is not on any Assays attached to the probe: %s\n' % (fromID)) error = 1 # if errors, continue to next record if error: continue # add alias using fromID name (from) to toID refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \ % (refKey, toKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate)) aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \ % (aliasKey, refKey, name, createdByKey, createdByKey, loaddate, loaddate)) refKey = refKey + 1 aliasKey = aliasKey + 1 # move assay information from fromID to toID execAssaySQL.append(updateAssaySQL % (toKey, fromKey)) # move fromID (from) references to toID execRefSQL.append(updateRefSQL % (toKey, fromKey, referenceKey)) # delete fromID (from) execProbeSQL.append(deleteProbeSQL % (fromKey))
def processFile(): global refKey, aliasKey, execSQL lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: probeID = tokens[0] markerIDs = string.split(tokens[1], '|') jnum = tokens[2] relationship = tokens[3] aliasList = string.split(tokens[4], '|') createdBy = tokens[5] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) probeKey = loadlib.verifyProbe(probeID, lineNum, errorFile) refsKey = loadlib.verifyReference(jnum, lineNum, errorFile) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if probeKey == 0: errorFile.write('Invalid Probe: %s\n' % (probeID)) error = 1 if refsKey == 0: errorFile.write('Invalid Reference: %s\n' % (jnum)) error = 1 if createdByKey == 0: errorFile.write('Invalid Creator: %s\n\n' % (createdBy)) error = 1 results = db.sql( '''select _Reference_key from PRB_Reference where _Probe_key = %s and _Refs_key = %s ''' % (probeKey, refsKey), 'auto') referenceKey = results[0]['_Reference_key'] if referenceKey == 0: errorFile.write('Invalid Probe/Reference: %s\n' % (jnum)) error = 1 # marker IDs markerList = [] for markerID in markerIDs: if markerID == 'none': break markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile) if markerKey == 0: errorFile.write('Invalid Marker: %s\n' % (markerID)) error = 1 else: markerList.append(markerKey) # if errors, continue to next record if error: continue # if no errors, process for markerKey in markerList: if markerList.count(markerKey) == 1: markerFile.write('%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n' \ % (probeKey, markerKey, refsKey, relationship, createdByKey, createdByKey, loaddate, loaddate)) execSQL.append(deleteSQL % (probeKey, markerKey)) else: errorFile.write('Invalid Marker Duplicate: %s\n' % (markerID)) if referenceKey > 0: refKey = referenceKey else: refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \ % (refKey, probeKey, refsKey, createdByKey, createdByKey, loaddate, loaddate)) # aliases for alias in aliasList: aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \ % (aliasKey, refKey, alias, createdByKey, createdByKey, loaddate, loaddate)) aliasKey = aliasKey + 1 # only used if referenceKey == 0 refKey = refKey + 1
def processFile(): ''' # requires: # # effects: # Reads input file # Verifies and Processes each line in the input file # # returns: # nothing # ''' global referenceKey global exptDict, seqExptDict lineNum = 0 note = '' # For each line in the input file inputFile = open(inputFileName, 'r') for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = str.split(line[:-1], '|') try: mappingKey = tokens[0] markerID = tokens[1] chromosome = tokens[2] updateChr = tokens[3] band = tokens[4] assay = tokens[5] description = tokens[6] jnum = tokens[7] createdBy = tokens[8] except: # if it's not a valid line, assume it's the note note = line continue # exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) markerKey, markerSymbol = verifyMarker(markerID, lineNum) assayKey = verifyAssay(assay) referenceKey = loadlib.verifyReference(jnum, 0, errorFile) createdByKey = loadlib.verifyUser(createdBy, 0, errorFile) error = not verifyChromosome(chromosome, lineNum) if markerKey == 0 or \ assayKey == 0 or \ referenceKey == 0 or \ createdByKey == 0: # set error flag to true error = 1 # if errors, continue to next record if error: continue # if no errors, process # run once...needs the reference if lineNum == 1: createExperimentMaster() # determine experiment key for this chromosome # if it doesn't exist, create it if chromosome not in exptDict: createExperimentBCP(chromosome) if chromosome not in exptDict: errorFile.write( 'Cannot Find Experiment Key For Chromosome (%d): %s\n' % (lineNum, chromosome)) chrExptKey = 0 else: chrExptKey = exptDict[chromosome] # if errors, continue to next record if chrExptKey == 0: continue # add marker to experiment marker file bcpWrite(exptMarkerFile, \ [mappingKey, \ chrExptKey, \ markerKey, \ alleleKey, \ assayKey, \ seqExptDict[chrExptKey], \ description, \ matrixData, \ loaddate, loaddate]) # increment marker sequence number for the experiment seqExptDict[chrExptKey] = seqExptDict[chrExptKey] + 1 # end of "for line in inputFile.readlines():" if len(note) > 0: bcpWrite(noteFile, [referenceKey, note, loaddate, loaddate])
def processFile(): global strainKey, strainmarkerKey, accKey, mgiKey, annotKey, noteKey lineNum = 0 # For each line in the input file for line in inputFile.readlines(): lineNum = lineNum + 1 #print line # Split the line into tokens tokens = line[:-1].split('\t') try: name = tokens[0] alleleIDs = tokens[1] strainType = tokens[2] species = tokens[3] isStandard = tokens[4] createdBy = tokens[5] mutantNote = tokens[6] colonyNote = tokens[7] annotations = tokens[8].split('|') except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) strainExistKey = verifyStrain(name, lineNum) strainTypeKey = verifyStrainType(strainType, lineNum) speciesKey = verifySpecies(species, lineNum) createdByKey = loadlib.verifyUser(createdBy, 0, errorFile) # if the strain exist, but with no colony id note, create one if strainExistKey > 0: print 'strain in database checking colony note : %s' % line if (not checkColonyNote(strainExistKey) ): #print 'colony note not in the database: %s' % colonyNote createNote(strainExistKey, colonyNote, mgiColonyNoteTypeKey, createdByKey) else: print 'colony note in database: %s' % colonyNote continue else: print 'strain not in database : %s' % line # if strain does not exist and verification failed on strain type, # species or createdBy, skip the record if strainTypeKey == 0 or speciesKey == 0 \ or createdByKey == 0: #print 'verification failed on strain type, species or createdBy: %s %s %s ' % (strainTypeKey, speciesKey, createdByKey) continue # if no errors, process strainFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (strainKey, speciesKey, strainTypeKey, name, isStandard, isPrivate, isGeneticBackground, createdByKey, createdByKey, cdate, cdate)) # if Allele found, resolve to Marker allAlleles = alleleIDs.split('|') for a in allAlleles: alleleKey = loadlib.verifyObject(a, alleleTypeKey, None, lineNum, errorFile) #print 'makeStrains.py allele: %s marker key: %s' % (a, alleleKey) results = db.sql('select _Marker_key from ALL_Allele where _Allele_key = %s' % (alleleKey), 'auto') markerKey = results[0]['_Marker_key'] markerFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (strainmarkerKey, strainKey, markerKey, alleleKey, qualifierKey, createdByKey, createdByKey, cdate, cdate)) strainmarkerKey = strainmarkerKey + 1 # MGI Accession ID for the strain if isStandard == '1': accFile.write('%d|%s%d|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \ % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, strainKey, mgiTypeKey, createdByKey, createdByKey, cdate, cdate)) accKey = accKey + 1 # storing data in MGI_Note/MGI_NoteChunk # Colony ID Note if len(colonyNote) > 0: createNote(strainKey, colonyNote, mgiColonyNoteTypeKey, createdByKey) # storing data in MGI_Note/MGI_NoteChunk # Mutant Cell Line of Origin Note if len(mutantNote) > 0: createNote(strainKey, mutantNote, mgiMutOrigNoteTypeKey, createdByKey) # # Annotations # # _AnnotType_key = 1009 = "Strain/Attributes" # _Qualifier_key = 1614158 = null # for a in annotations: # strain annotation type annotTypeKey = 1009 # this is a null qualifier key annotQualifierKey = 1614158 annotTermKey = loadlib.verifyTerm('', 27, a, lineNum, errorFile) if annotTermKey == 0: continue annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (annotKey, annotTypeKey, strainKey, annotTermKey, annotQualifierKey, cdate, cdate)) annotKey = annotKey + 1 mgiKey = mgiKey + 1 strainKey = strainKey + 1 # end of "for line in inputFile.readlines():" # # Update the AccessionMax value # if not DEBUG: db.sql('select * from ACC_setMax (%d)' % (lineNum), None)
def processFile(): ''' # requires: # # effects: # Reads input file # Verifies and Processes each line in the input file # # returns: # nothing # ''' results = db.sql('select maxKey = max(_Translation_key) + 1 from MGI_Translation', 'auto') transKey = results[0]['maxKey'] if transKey is None: transKey = 1000 lineNum = 0 # sequence number of bad name in translation list seq = 1 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: objectID = tokens[0] objectDescription = tokens[1] term = tokens[2] userID = tokens[3] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) continue if vocabKey > 0: objectKey = loadlib.verifyTerm(objectID, vocabKey, objectDescription, lineNum, errorFile) else: objectKey = loadlib.verifyObject(objectID, mgiTypeKey, objectDescription, lineNum, errorFile) userKey = loadlib.verifyUser(userID, lineNum, errorFile) if objectKey == 0 or userKey == 0: # set error flag to true error = 1 # if errors, continue to next record if error: continue # if no errors, process # add term to translation file bcpWrite(transFile, [transKey, transTypeKey, objectKey, term, seq, userKey, userKey, loaddate, loaddate]) transKey = transKey + 1 seq = seq + 1 # end of "for line in inputFile.readlines():" if newTransType: bcpWrite(transTypeFile, [transTypeKey, mgiTypeKey, vocabKey, transTypeName, transCompression, 0, userKey, userKey, loaddate, loaddate])
def processFile(): global refKey, aliasKey, execSQL lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: probeID = tokens[0] markerIDs = string.split(tokens[1], '|') jnum = tokens[2] relationship = tokens[3] aliasList = string.split(tokens[4], '|') createdBy = tokens[5] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) probeKey = loadlib.verifyProbe(probeID, lineNum, errorFile) refsKey = loadlib.verifyReference(jnum, lineNum, errorFile) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if probeKey == 0: errorFile.write('Invalid Probe: %s\n' % (probeID)) error = 1 if refsKey == 0: errorFile.write('Invalid Reference: %s\n' % (jnum)) error = 1 if createdByKey == 0: errorFile.write('Invalid Creator: %s\n\n' % (createdBy)) error = 1 results = db.sql('''select _Reference_key from PRB_Reference where _Probe_key = %s and _Refs_key = %s ''' % (probeKey, refsKey), 'auto') referenceKey = results[0]['_Reference_key'] if referenceKey == 0: errorFile.write('Invalid Probe/Reference: %s\n' % (jnum)) error = 1 # marker IDs markerList = [] for markerID in markerIDs: if markerID == 'none': break markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile) if markerKey == 0: errorFile.write('Invalid Marker: %s\n' % (markerID)) error = 1 else: markerList.append(markerKey) # if errors, continue to next record if error: continue # if no errors, process for markerKey in markerList: if markerList.count(markerKey) == 1: markerFile.write('%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n' \ % (probeKey, markerKey, refsKey, relationship, createdByKey, createdByKey, loaddate, loaddate)) execSQL.append(deleteSQL % (probeKey, markerKey)) else: errorFile.write('Invalid Marker Duplicate: %s\n' % (markerID)) if referenceKey > 0: refKey = referenceKey else: refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \ % (refKey, probeKey, refsKey, createdByKey, createdByKey, loaddate, loaddate)) # aliases for alias in aliasList: aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \ % (aliasKey, refKey, alias, createdByKey, createdByKey, loaddate, loaddate)) aliasKey = aliasKey + 1 # only used if referenceKey == 0 refKey = refKey + 1
def processFile(): global execSQL lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: probeID = tokens[0] markerIDs = string.split(tokens[1], '|') jnum = tokens[2] relationship = tokens[3] createdBy = tokens[4] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) probeKey = loadlib.verifyProbe(probeID, lineNum, errorFile) referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if probeKey == 0: errorFile.write('Invalid Probe: %s\n' % (probeID)) error = 1 if referenceKey == 0: errorFile.write('Invalid Reference: %s\n' % (jnum)) error = 1 if createdByKey == 0: errorFile.write('Invalid Creator: %s\n\n' % (createdBy)) error = 1 # marker IDs markerList = [] for markerID in markerIDs: markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile) if markerKey == 0: errorFile.write('Invalid Marker: %s, %s\n' % (name, markerID)) error = 1 else: markerList.append(markerKey) # if errors, continue to next record if error: continue # if no errors, process for markerKey in markerList: if markerList.count(markerKey) == 1: markerFile.write('%s|%s|%d|%s|%s|%s|%s|%s\n' \ % (probeKey, markerKey, referenceKey, relationship, createdByKey, createdByKey, loaddate, loaddate)) execSQL.append(deleteSQL % (probeKey, markerKey)) else: errorFile.write('Invalid Marker Duplicate: %s, %s\n' % (name, markerID))
def processFile(): global alleleKey, refAssocKey, accKey, noteKey, mgiKey, annotKey, mutationKey global alleleLookup lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = line[:-1].split('\t') #print line try: markerID = tokens[0] symbol = tokens[1] name = tokens[2] alleleStatus = tokens[3] alleleType = tokens[4] alleleSubtypes = tokens[5] collectionKey = tokens[6] germLine = tokens[7] references = tokens[8] strainOfOrigin = tokens[9] mutantCellLine = tokens[10] molecularNotes = tokens[11] driverNotes = tokens[12] ikmcNotes = tokens[13] mutations = tokens[14] inheritanceMode = tokens[15] isMixed = tokens[16] isExtinct = tokens[17] createdBy = tokens[18] createMCL = tokens[19] createNote = tokens[20] setStatus = tokens[21] existingAlleleID = tokens[22] ikmcSymbol = tokens[23] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) # creator createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if createdByKey == 0: continue # processing for IKMC-only if len(createMCL) > 0 or len(createNote) > 0 or len(setStatus) > 0: processFileIKMC(createMCL, createNote, setStatus, \ symbol, ikmcSymbol, mutantCellLine, ikmcNotes, \ createdByKey, existingAlleleID) continue # marker key markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile) # hard-coded # _vocab_key = 73 (Marker-Allele Association Status) # _term_key = 4268545 (Curated) markerStatusKey = 4268545 # _vocab_key = 37 (Allele Status) alleleStatusKey = loadlib.verifyTerm('', 37, alleleStatus, lineNum, errorFile) # _vocab_key = 38 (Allele Type) alleleTypeKey = loadlib.verifyTerm('', 38, alleleType, lineNum, errorFile) # _vocab_key = 61 (Allele Transmission) germLineKey = loadlib.verifyTerm('', 61, germLine, lineNum, errorFile) # _vocab_key = 36 (Allele Molecular Mutation) allMutations = mutations.split('|') # _vocab_key = 35 (Allele Status) inheritanceModeKey = loadlib.verifyTerm('', 35, inheritanceMode, lineNum, errorFile) # strains strainOfOriginKey = sourceloadlib.verifyStrain(strainOfOrigin, lineNum, errorFile) # reference refKey = loadlib.verifyReference(jnum, lineNum, errorFile) # if errors, continue to next record # errors are stored (via loadlib) in the .error log if markerKey == 0 \ or markerStatusKey == 0 \ or alleleStatusKey == 0 \ or alleleTypeKey == 0 \ or germLineKey == 0 \ or allMutations == 0 \ or inheritanceModeKey == 0 \ or strainOfOriginKey == 0 \ or refKey == 0 \ or createdByKey == 0: continue # if no errors, process the allele # not specified/testing #collectionKey = 11025586 # allele (master) alleleFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|0|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (alleleKey, markerKey, strainOfOriginKey, inheritanceModeKey, alleleTypeKey, \ alleleStatusKey, germLineKey, collectionKey, symbol, name, \ isExtinct, isMixed, refKey, markerStatusKey, \ createdByKey, createdByKey, createdByKey, loaddate, loaddate, loaddate)) # molecular mutation for mutation in allMutations: mutationTermKey = loadlib.verifyTerm('', 36, mutation, lineNum, errorFile) mutationFile.write('%s|%s|%s|%s|%s\n' \ % (mutationKey, alleleKey, mutationTermKey, loaddate, loaddate)) mutationKey = mutationKey + 1 # # allele references # allReferences = references.split('||') for reference in allReferences: refType, refID = reference.split('|') refKey = loadlib.verifyReference(refID, lineNum, errorFile) if refType == 'Original': refAssocTypeKey = 1011 elif refType == 'Transmission': refAssocTypeKey = 1023 elif refType == 'Molecular': refAssocTypeKey = 1012 refFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (refAssocKey, refKey, alleleKey, mgiTypeKey, refAssocTypeKey, \ createdByKey, createdByKey, loaddate, loaddate)) refAssocKey = refAssocKey + 1 # # allele subtypes # allSubtypes = alleleSubtypes.split('|') for s in allSubtypes: # _vocab_key = 93 (Allele Subtype) alleleSubtypeKey = loadlib.verifyTerm('', 93, s, lineNum, errorFile) annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (annotKey, annotTypeKey, alleleKey, alleleSubtypeKey, \ qualifierKey, loaddate, loaddate)) annotKey = annotKey + 1 # # mutant cell line # if len(mutantCellLine) > 0: addMutantCellLine(alleleKey, mutantCellLine, createdByKey) # MGI Accession ID for the allelearker accFile.write('%s|%s%d|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \ % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, alleleKey, mgiTypeKey, \ createdByKey, createdByKey, loaddate, loaddate)) # storing data in MGI_Note # molecular notes mgiNoteSeqNum = 1 if len(molecularNotes) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, alleleKey, mgiNoteObjectKey, mgiMolecularNoteTypeKey, \ molecularNotes, createdByKey, createdByKey, loaddate, loaddate)) noteKey = noteKey + 1 # driver notes # TR12662/MGI_Relationship._Category_key = 1006 # removed noteFile code # place hodler for MGI_Relationship code # the IKMC is the only product using this and IKMC does not add any driver note #mgiNoteSeqNum = 1 #if len(driverNotes) > 0: # ikmc notes useIKMCnotekey = 0 if len(ikmcNotes) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, alleleKey, mgiNoteObjectKey, mgiIKMCNoteTypeKey, \ ikmcNotes, createdByKey, createdByKey, loaddate, loaddate)) useIKMCnotekey = noteKey noteKey = noteKey + 1 # Print out a new text file and attach the new MGI Allele IDs as the last field if createdBy == 'ikmc_alleleload': newAlleleFile.write('%s\t%s%s\t%s\n' \ % (mgi_utils.prvalue(ikmcNotes), \ mgi_utils.prvalue(mgiPrefix), mgi_utils.prvalue(mgiKey), \ mgi_utils.prvalue(ikmcSymbol))) else: newAlleleFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%s\n' \ % (mgi_utils.prvalue(markerID), \ mgi_utils.prvalue(symbol), \ mgi_utils.prvalue(name), \ mgi_utils.prvalue(alleleStatus), \ mgi_utils.prvalue(alleleType), \ mgi_utils.prvalue(alleleSubtype), \ mgi_utils.prvalue(collection), \ mgi_utils.prvalue(germLine), \ mgi_utils.prvalue(references), \ mgi_utils.prvalue(strainOfOrigin), \ mgi_utils.prvalue(mutantCellLine), \ mgi_utils.prvalue(allMutations), \ mgi_utils.prvalue(inheritanceMode), \ mgi_utils.prvalue(isMixed), \ mgi_utils.prvalue(isExtinct), \ mgi_utils.prvalue(refKey), \ mgi_utils.prvalue(markerStatusKey), \ mgi_utils.prvalue(createdBy), \ mgi_utils.prvalue(mgiPrefix), mgi_utils.prvalue(mgiKey))) # save symbol/alleleKey/ikmc note key alleleLookup[symbol] = [] alleleLookup[symbol].append( (alleleKey, useIKMCnotekey, mgiPrefix + str(mgiKey))) accKey = accKey + 1 mgiKey = mgiKey + 1 alleleKey = alleleKey + 1 # end of "for line in inputFile.readlines():" # # Update the AccessionMax value # if not DEBUG: db.sql('select * from ACC_setMax(%d)' % (lineNum), None) db.commit()
def processFile(): global alleleKey, refAssocKey, accKey, noteKey, mgiKey, annotKey global alleleLookup lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = line[:-1].split('\t') #print line try: markerID = tokens[0] symbol = tokens[1] name = tokens[2] alleleStatus = tokens[3] alleleType = tokens[4] alleleSubtypes = tokens[5] collectionKey = tokens[6] germLine = tokens[7] references = tokens[8] strainOfOrigin = tokens[9] mutantCellLine = tokens[10] molecularNotes = tokens[11] driverNotes = tokens[12] ikmcNotes = tokens[13] mutations = tokens[14] inheritanceMode = tokens[15] isMixed = tokens[16] isExtinct = tokens[17] createdBy = tokens[18] createMCL = tokens[19] createNote = tokens[20] setStatus = tokens[21] existingAlleleID = tokens[22] ikmcSymbol = tokens[23] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) # creator createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if createdByKey == 0: continue # processing for IKMC-only if len(createMCL) > 0 or len(createNote) > 0 or len(setStatus) > 0: processFileIKMC(createMCL, createNote, setStatus, \ symbol, ikmcSymbol, mutantCellLine, ikmcNotes, \ createdByKey, existingAlleleID) continue # marker key markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile) # hard-coded # _vocab_key = 73 (Marker-Allele Association Status) # _term_key = 4268545 (Curated) markerStatusKey = 4268545 # _vocab_key = 37 (Allele Status) alleleStatusKey = loadlib.verifyTerm('', 37, alleleStatus, lineNum, errorFile) # _vocab_key = 38 (Allele Type) alleleTypeKey = loadlib.verifyTerm('', 38, alleleType, lineNum, errorFile) # _vocab_key = 61 (Allele Transmission) germLineKey = loadlib.verifyTerm('', 61, germLine, lineNum, errorFile) # _vocab_key = 36 (Allele Molecular Mutation) allMutations = mutations.split('|') # _vocab_key = 35 (Allele Status) inheritanceModeKey = loadlib.verifyTerm('', 35, inheritanceMode, lineNum, errorFile) # strains strainOfOriginKey = sourceloadlib.verifyStrain(strainOfOrigin, lineNum, errorFile) # reference refKey = loadlib.verifyReference(jnum, lineNum, errorFile) # if errors, continue to next record # errors are stored (via loadlib) in the .error log if markerKey == 0 \ or markerStatusKey == 0 \ or alleleStatusKey == 0 \ or alleleTypeKey == 0 \ or germLineKey == 0 \ or allMutations == 0 \ or inheritanceModeKey == 0 \ or strainOfOriginKey == 0 \ or refKey == 0 \ or createdByKey == 0: continue # if no errors, process the allele # not specified/testing #collectionKey = 11025586 # allele (master) alleleFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|0|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (alleleKey, markerKey, strainOfOriginKey, inheritanceModeKey, alleleTypeKey, \ alleleStatusKey, germLineKey, collectionKey, symbol, name, \ isExtinct, isMixed, refKey, markerStatusKey, \ createdByKey, createdByKey, createdByKey, loaddate, loaddate, loaddate)) # molecular mutation for mutation in allMutations: mutationKey = loadlib.verifyTerm('', 36, mutation, lineNum, errorFile) mutationFile.write('%s|%s|%s|%s\n' \ % (alleleKey, mutationKey, loaddate, loaddate)) # # allele references # allReferences = references.split('||') for reference in allReferences: refType, refID = reference.split('|') refKey = loadlib.verifyReference(refID, lineNum, errorFile) if refType == 'Original': refAssocTypeKey = 1011 elif refType == 'Transmission': refAssocTypeKey = 1023 elif refType == 'Molecular': refAssocTypeKey = 1012 refFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (refAssocKey, refKey, alleleKey, mgiTypeKey, refAssocTypeKey, \ createdByKey, createdByKey, loaddate, loaddate)) refAssocKey = refAssocKey + 1 # # allele subtypes # allSubtypes = alleleSubtypes.split('|') for s in allSubtypes: # _vocab_key = 93 (Allele Subtype) alleleSubtypeKey = loadlib.verifyTerm('', 93, s, lineNum, errorFile) annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (annotKey, annotTypeKey, alleleKey, alleleSubtypeKey, \ qualifierKey, loaddate, loaddate)) annotKey = annotKey + 1 # # mutant cell line # if len(mutantCellLine) > 0: addMutantCellLine(alleleKey, mutantCellLine, createdByKey) # MGI Accession ID for the allelearker accFile.write('%s|%s%d|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \ % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, alleleKey, mgiTypeKey, \ createdByKey, createdByKey, loaddate, loaddate)) # storing data in MGI_Note/MGI_NoteChunk # molecular notes mgiNoteSeqNum = 1 if len(molecularNotes) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, alleleKey, mgiNoteObjectKey, mgiMolecularNoteTypeKey, \ createdByKey, createdByKey, loaddate, loaddate)) noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, mgiNoteSeqNum, molecularNotes, createdByKey, createdByKey, loaddate, loaddate)) noteKey = noteKey + 1 # driver notes # TR12662/MGI_Relationship._Category_key = 1006 # removed noteFile code # place hodler for MGI_Relationship code # the IKMC is the only product using this and IKMC does not add any driver note #mgiNoteSeqNum = 1 #if len(driverNotes) > 0: # ikmc notes useIKMCnotekey = 0 if len(ikmcNotes) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, alleleKey, mgiNoteObjectKey, mgiIKMCNoteTypeKey, \ createdByKey, createdByKey, loaddate, loaddate)) noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, 1, ikmcNotes, createdByKey, createdByKey, loaddate, loaddate)) useIKMCnotekey = noteKey noteKey = noteKey + 1 # Print out a new text file and attach the new MGI Allele IDs as the last field if createdBy == 'ikmc_alleleload': newAlleleFile.write('%s\t%s%s\t%s\n' \ % (mgi_utils.prvalue(ikmcNotes), \ mgi_utils.prvalue(mgiPrefix), mgi_utils.prvalue(mgiKey), \ mgi_utils.prvalue(ikmcSymbol))) else: newAlleleFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%s\n' \ % (mgi_utils.prvalue(markerID), \ mgi_utils.prvalue(symbol), \ mgi_utils.prvalue(name), \ mgi_utils.prvalue(alleleStatus), \ mgi_utils.prvalue(alleleType), \ mgi_utils.prvalue(alleleSubtype), \ mgi_utils.prvalue(collection), \ mgi_utils.prvalue(germLine), \ mgi_utils.prvalue(references), \ mgi_utils.prvalue(strainOfOrigin), \ mgi_utils.prvalue(mutantCellLine), \ mgi_utils.prvalue(allMutations), \ mgi_utils.prvalue(inheritanceMode), \ mgi_utils.prvalue(isMixed), \ mgi_utils.prvalue(isExtinct), \ mgi_utils.prvalue(refKey), \ mgi_utils.prvalue(markerStatusKey), \ mgi_utils.prvalue(createdBy), \ mgi_utils.prvalue(mgiPrefix), mgi_utils.prvalue(mgiKey))) # save symbol/alleleKey/ikmc note key alleleLookup[symbol] = [] alleleLookup[symbol].append((alleleKey, useIKMCnotekey, mgiPrefix + str(mgiKey))) accKey = accKey + 1 mgiKey = mgiKey + 1 alleleKey = alleleKey + 1 # end of "for line in inputFile.readlines():" # # Update the AccessionMax value # if not DEBUG: db.sql('select * from ACC_setMax(%d)' % (lineNum), None) db.commit()
def processFile(): global strainKey, strainmarkerKey, accKey, mgiKey, annotKey, noteKey lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = line[:-1].split('\t') try: id = tokens[0] externalPrefix = id externalNumeric = '' #(externalPrefix, externalNumeric) = id.split(':') name = tokens[1] alleleIDs = tokens[2] strainType = tokens[3] species = tokens[4] isStandard = tokens[5] sooNote = tokens[6] externalLDB = tokens[7] externalTypeKey = tokens[8] annotations = tokens[9] createdBy = tokens[10] mutantNote = tokens[11] isPrivate = tokens[12] impcColonyNote = tokens[13] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) strainExistKey = verifyStrain(name, lineNum) strainTypeKey = verifyStrainType(strainType, lineNum) speciesKey = verifySpecies(species, lineNum) createdByKey = loadlib.verifyUser(createdBy, 0, errorFile) if strainExistKey > 0 or strainTypeKey == 0 or speciesKey == 0 or createdByKey == 0: # set error flag to true error = 1 # if errors, continue to next record if error: continue # if no errors, process strainFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (strainKey, speciesKey, strainTypeKey, name, isStandard, isPrivate, isGeneticBackground, createdByKey, createdByKey, cdate, cdate)) # if Allele found, resolve to Marker if len(alleleIDs) > 0: allAlleles = alleleIDs.split('|') for a in allAlleles: alleleKey = loadlib.verifyObject(a, alleleTypeKey, None, lineNum, errorFile) if alleleKey == 0: continue results = db.sql('select _Marker_key from ALL_Allele where _Allele_key = %s' % (alleleKey), 'auto') markerKey = results[0]['_Marker_key'] markerFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (strainmarkerKey, strainKey, markerKey, alleleKey, qualifierKey, createdByKey, createdByKey, cdate, cdate)) strainmarkerKey = strainmarkerKey + 1 # MGI Accession ID for all strain accFile.write('%d|%s%d|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \ % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, strainKey, mgiTypeKey, createdByKey, createdByKey, cdate, cdate)) accKey = accKey + 1 # external accession id # % (accKey, id, '', id, externalLDB, strainKey, externalTypeKey, #for ids that contain prefix:numeric accFile.write('%d|%s|%s|%s|%s|%s|%s|0|1|%s|%s|%s|%s\n' \ % (accKey, id, externalPrefix, externalNumeric, externalLDB, strainKey, externalTypeKey, createdByKey, createdByKey, cdate, cdate)) accKey = accKey + 1 # storing data in MGI_Note/MGI_NoteChunk # Strain of Origin Note if len(sooNote) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, strainKey, mgiNoteObjectKey, mgiStrainOriginTypeKey, \ createdByKey, createdByKey, cdate, cdate)) noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, 1, sooNote, createdByKey, createdByKey, cdate, cdate)) noteKey = noteKey + 1 # storing data in MGI_Note/MGI_NoteChunk # Mutant Cell Line of Origin Note if len(mutantNote) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, strainKey, mgiNoteObjectKey, mgiMutantOriginTypeKey, \ createdByKey, createdByKey, cdate, cdate)) if len(mutantNote) > 0: noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, 1, mutantNote, createdByKey, createdByKey, cdate, cdate)) noteKey = noteKey + 1 # storing data in MGI_Note/MGI_NoteChunk # IMPC Colony Note if len(impcColonyNote) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, strainKey, mgiNoteObjectKey, mgiIMPCColonyTypeKey, \ createdByKey, createdByKey, cdate, cdate)) noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, 1, sooNote, createdByKey, createdByKey, cdate, cdate)) noteKey = noteKey + 1 # # Annotations # # _AnnotType_key = 1009 # _Qualifier_ke = 1614158 # if len(annotations) > 0: annotations = annotations.split('|') for a in annotations: # strain annotation type annotTypeKey = 1009 # this is a null qualifier key annotQualifierKey = 1614158 annotTermKey = loadlib.verifyTerm('', 27, a, lineNum, errorFile) if annotTermKey == 0: continue annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (annotKey, annotTypeKey, strainKey, annotTermKey, annotQualifierKey, cdate, cdate)) annotKey = annotKey + 1 mgiKey = mgiKey + 1 strainKey = strainKey + 1 # end of "for line in inputFile.readlines():" # # Update the AccessionMax value # db.sql('select * from ACC_setMax (%d)' % (lineNum), None) db.commit() # update prb_strain_marker_seq auto-sequence db.sql(''' select setval('prb_strain_marker_seq', (select max(_StrainMarker_key) from PRB_Strain_Marker)) ''', None) db.commit() # update voc_annot_seq auto-sequence db.sql(''' select setval('voc_annot_seq', (select max(_Annot_key) from VOC_Annot)) ''', None) db.commit()
def processFile(): global refKey, aliasKey lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: probeID = probeName = tokens[0] jnum = tokens[1] aliasList = string.split(tokens[2], '|') createdBy = tokens[3] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) if probeID.find('MGI:') >= 0: probeKey = loadlib.verifyProbe(probeID, lineNum, errorFile) else: probeKey, probeID = verifyProbe(probeName, lineNum, errorFile) probeReferenceKey = verifyProbeReference(probeID, jnum, lineNum, errorFile) referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if probeKey == 0: errorFile.write('Invalid Probe: %s\n' % (probeID)) error = 1 if referenceKey == 0: errorFile.write('Invalid Reference: %s\n' % (jnum)) error = 1 #if probeReferenceKey == 0: # errorFile.write('Invalid Probe Reference: %s, %s\n' % (probeID, jnum)) # error = 1 if createdByKey == 0: errorFile.write('Invalid Creator: %s\n\n' % (createdBy)) error = 1 # if errors, continue to next record if error: continue # if no errors, process # create a new probe-reference key if one does not already exist # else use the existing probe-reference key if probeReferenceKey == 0: refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \ % (refKey, probeKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate)) aliasrefKey = refKey refKey = refKey + 1 else: #errorFile.write('Probe/Reference Already Exists: %s\n' % (tokens)) aliasrefKey = probeReferenceKey # aliases for alias in aliasList: if len(alias) == 0: continue aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \ % (aliasKey, aliasrefKey, alias, createdByKey, createdByKey, loaddate, loaddate)) aliasKey = aliasKey + 1
def processFile(): # requires: # # effects: # Reads input file # Verifies and Processes each line in the input file # # returns: # nothing # global refAssocKey lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: accID = tokens[0] jnum = tokens[1] refAssocType = tokens[2] createdBy = tokens[3] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) objectKey = loadlib.verifyObject(accID, mgiTypeKey, None, lineNum, errorFile) referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile) refAssocTypeKey = verifyRefAssocType(refAssocType, lineNum) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if objectKey == 0 or \ referenceKey == 0 or \ refAssocTypeKey == 0 or \ createdByKey == 0: # set error flag to true error = 1 # if errors, continue to next record if error: continue # if no errors, process the marker # could move to verifyDuplicate routine key = '%s:%s:%s' % (objectKey, referenceKey, refAssocTypeKey) if refDict.has_key(key): errorFile.write('Duplicate (%d) %s\n' % (lineNum, line)) continue refFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (refAssocKey, referenceKey, objectKey, mgiTypeKey, refAssocTypeKey, createdByKey, createdByKey, loaddate, loaddate)) refAssocKey = refAssocKey + 1
def main(): global userKey try: optlist, args = getopt.getopt(sys.argv[1:], 'S:D:U:P:K:') except: showUsage() server = None database = None user = None password = None objectKey = None for opt in optlist: if opt[0] == '-S': server = opt[1] elif opt[0] == '-D': database = opt[1] elif opt[0] == '-U': user = opt[1] elif opt[0] == '-P': password = string.strip(open(opt[1], 'r').readline()) elif opt[0] == '-K': objectKey = opt[1] else: showUsage() if server is None or \ database is None or \ user is None or \ password is None or \ objectKey is None: showUsage() db.set_sqlLogin(user, password, server, database) db.useOneConnection(1) userKey = loadlib.verifyUser(user, 0, None) # call functions based on the way the program is invoked scriptName = os.path.basename(sys.argv[0]) # initialize the cre-system lookups initCreSystems() # all of these invocations will only affect a certain subset of data if scriptName == 'allelecrecache.py': processAll() elif scriptName == 'allelecrecacheByAllele.py': processByAllele(objectKey) elif scriptName == 'allelecrecacheByAssay.py': processByAssay(objectKey) db.commit() db.useOneConnection(0) return
def processFile(): global lineNum global strainKey, strainmarkerKey, accKey, mgiKey, annotKey, noteKey # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = line[:-1].split('\t') try: id = tokens[0] externalPrefix = id externalNumeric = '' #(externalPrefix, externalNumeric) = id.split(':') name = tokens[1] alleleIDs = tokens[2] strainType = tokens[3] species = tokens[4] isStandard = tokens[5] sooNote = tokens[6] externalLDB = tokens[7] externalTypeKey = tokens[8] annotations = tokens[9] createdBy = tokens[10] mutantNote = tokens[11] isPrivate = tokens[12] impcColonyNote = tokens[13] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) strainExistKey = verifyStrain(name, lineNum) strainTypeKey = verifyStrainType(strainType, lineNum) speciesKey = verifySpecies(species, lineNum) createdByKey = loadlib.verifyUser(createdBy, 0, errorFile) if strainExistKey > 0 or strainTypeKey == 0 or speciesKey == 0 or createdByKey == 0: # set error flag to true error = 1 # if errors, continue to next record if error: continue # if no errors, process strainFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (strainKey, speciesKey, strainTypeKey, name, isStandard, isPrivate, isGeneticBackground, createdByKey, createdByKey, cdate, cdate)) # if Allele found, resolve to Marker if len(alleleIDs) > 0: allAlleles = alleleIDs.split('|') for a in allAlleles: alleleKey = loadlib.verifyObject(a, alleleTypeKey, None, lineNum, errorFile) if alleleKey == 0: continue if alleleKey == None: continue results = db.sql('select _Marker_key from ALL_Allele where _Allele_key = %s' % (alleleKey), 'auto') markerKey = results[0]['_Marker_key'] if markerKey != None: markerFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (strainmarkerKey, strainKey, markerKey, alleleKey, qualifierKey, createdByKey, createdByKey, cdate, cdate)) else: markerFile.write('%s|%s||%s|%s|%s|%s|%s|%s\n' \ % (strainmarkerKey, strainKey, alleleKey, qualifierKey, createdByKey, createdByKey, cdate, cdate)) strainmarkerKey = strainmarkerKey + 1 # MGI Accession ID for all strain # all private = 0 (false) accFile.write('%d|%s%d|%s|%s|1|%d|%d|%s|1|%s|%s|%s|%s\n' \ % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, strainKey, mgiTypeKey, isPrivate, createdByKey, createdByKey, cdate, cdate)) accKey = accKey + 1 # external accession id # % (accKey, id, '', id, externalLDB, strainKey, externalTypeKey, #for ids that contain prefix:numeric accFile.write('%d|%s|%s|%s|%s|%s|%s|0|1|%s|%s|%s|%s\n' \ % (accKey, id, externalPrefix, externalNumeric, externalLDB, strainKey, externalTypeKey, createdByKey, createdByKey, cdate, cdate)) accKey = accKey + 1 # storing data in MGI_Note # Strain of Origin Note if len(sooNote) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, strainKey, mgiNoteObjectKey, mgiStrainOriginTypeKey, sooNote, \ createdByKey, createdByKey, cdate, cdate)) noteKey = noteKey + 1 # storing data in MGI_Note # Mutant Cell Line of Origin Note if len(mutantNote) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, strainKey, mgiNoteObjectKey, mgiMutantOriginTypeKey, mutantNote, \ createdByKey, createdByKey, cdate, cdate)) noteKey = noteKey + 1 # storing data in MGI_Note # IMPC Colony Note if len(impcColonyNote) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, strainKey, mgiNoteObjectKey, mgiIMPCColonyTypeKey, impcColonyNote, \ createdByKey, createdByKey, cdate, cdate)) noteKey = noteKey + 1 # # Annotations # # _AnnotType_key = 1009 # _Qualifier_ke = 1614158 # if len(annotations) > 0: annotations = annotations.split('|') for a in annotations: # strain annotation type annotTypeKey = 1009 # this is a null qualifier key annotQualifierKey = 1614158 annotTermKey = loadlib.verifyTerm('', 27, a, lineNum, errorFile) if annotTermKey == 0: continue annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (annotKey, annotTypeKey, strainKey, annotTermKey, annotQualifierKey, cdate, cdate)) annotKey = annotKey + 1 mgiKey = mgiKey + 1 strainKey = strainKey + 1
def processFile(): global probeKey, refKey, aliasKey, accKey, mgiKey lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: name = tokens[0] jnum = tokens[1] parentID = tokens[2] sourceName = tokens[3] organism = tokens[4] strain = tokens[5] tissue = tokens[6] gender = tokens[7] cellLine = tokens[8] age = tokens[9] vectorType = tokens[10] segmentType = tokens[11] regionCovered = tokens[12] insertSite = tokens[13] insertSize = tokens[14] markerIDs = string.split(tokens[15], '|') relationship = tokens[16] sequenceIDs = tokens[17] aliasList = string.split(tokens[18], '|') notes = tokens[19] rawnotes = tokens[20] createdBy = tokens[21] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) isParent = 0 isSource = 0 parentProbeKey = ''; sourceKey = 0 if parentID != '': isParent = 1 if sourceName != '': isSource = 1 if not isParent and not isSource: organismKey = sourceloadlib.verifyOrganism(organism, lineNum, errorFile) strainKey = sourceloadlib.verifyStrain(strain, lineNum, errorFile) tissueKey = sourceloadlib.verifyTissue(tissue, lineNum, errorFile) genderKey = sourceloadlib.verifyGender(gender, lineNum, errorFile) cellLineKey = sourceloadlib.verifyCellLine(cellLine, lineNum, errorFile) vectorKey = sourceloadlib.verifyVectorType(vectorType, lineNum, errorFile) segmentTypeKey = sourceloadlib.verifySegmentType(segmentType, lineNum, errorFile) sourceKey = sourceloadlib.verifySource(segmentTypeKey, \ vectorKey, organismKey, strainKey, \ tissueKey, genderKey, cellLineKey, age, lineNum, errorFile) if organismKey == 0 or strainKey == 0 or tissueKey == 0 or \ genderKey == 0 or cellLineKey == 0 or vectorKey == 0 or \ segmentTypeKey == 0 or sourceKey == 0: errorFile.write('%s, %s, %s, %s, %s, %s, %s, %s\n' % (segmentType, vectorType, organism, strain, tissue, gender, cellLine, age)) error = 1 elif not isParent and isSource: vectorKey = sourceloadlib.verifyVectorType(vectorType, lineNum, errorFile) segmentTypeKey = sourceloadlib.verifySegmentType(segmentType, lineNum, errorFile) sourceKey = sourceloadlib.verifyLibrary(sourceName, lineNum, errorFile) if vectorKey == 0 or segmentTypeKey == 0 or sourceKey == 0: error = 1 # parent from = yes, source given = yes or no (ignored) else: parentProbeKey, sourceKey = verifyParentProbe(parentID, lineNum, errorFile) vectorKey = sourceloadlib.verifyVectorType(vectorType, lineNum, errorFile) segmentTypeKey = sourceloadlib.verifySegmentType(segmentType, lineNum, errorFile) if parentProbeKey == 0 or sourceKey == 0 or vectorKey == 0 or segmentTypeKey == 0: error = 1 referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if referenceKey == 0: errorFile.write('Invalid Reference: %s\n' % (jnum)) error = 1 if createdByKey == 0: errorFile.write('Invalid Creator: %s\n\n' % (createdBy)) error = 1 # marker IDs markerList = [] for markerID in markerIDs: markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile) if len(markerID) > 0 and markerKey == 0: errorFile.write('Invalid Marker: %s, %s\n' % (name, markerID)) error = 1 elif len(markerID) > 0: markerList.append(markerKey) # sequence IDs seqAccDict = {} for seqID in string.split(sequenceIDs, '|'): if len(seqID) > 0: [logicalDB, acc] = string.split(seqID, ':') logicalDBKey = loadlib.verifyLogicalDB(logicalDB, lineNum, errorFile) if logicalDBKey > 0: seqAccDict[acc] = logicalDBKey # if errors, continue to next record if error: continue # if no errors, process the probe probeFile.write('%d\t%s\t%s\t%s\t%s\t%s\t\t\t%s\t%s\t%s\t\t%s\t%s\t%s\t%s\n' \ % (probeKey, name, parentProbeKey, sourceKey, vectorKey, segmentTypeKey, mgi_utils.prvalue(regionCovered), \ mgi_utils.prvalue(insertSite), mgi_utils.prvalue(insertSize), createdByKey, createdByKey, loaddate, loaddate)) for markerKey in markerList: if markerList.count(markerKey) == 1: markerFile.write('%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n' \ % (probeKey, markerKey, referenceKey, relationship, createdByKey, createdByKey, loaddate, loaddate)) else: errorFile.write('Invalid Marker Duplicate: %s, %s\n' % (name, markerID)) refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \ % (refKey, probeKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate)) # aliases for alias in aliasList: if len(alias) == 0: continue aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \ % (aliasKey, refKey, alias, createdByKey, createdByKey, loaddate, loaddate)) aliasKey = aliasKey + 1 # MGI Accession ID for the marker accFile.write('%s\t%s%d\t%s\t%s\t1\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \ % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, probeKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate)) # Print out a new text file and attach the new MGI Probe IDs as the last field newProbeFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%d\n' \ % (name, jnum, \ mgi_utils.prvalue(sourceName), \ organism, \ mgi_utils.prvalue(strain), \ mgi_utils.prvalue(tissue), \ mgi_utils.prvalue(gender), \ mgi_utils.prvalue(cellLine), \ mgi_utils.prvalue(age), \ mgi_utils.prvalue(vectorType), \ mgi_utils.prvalue(segmentType), \ mgi_utils.prvalue(regionCovered) + \ mgi_utils.prvalue(insertSite), \ mgi_utils.prvalue(insertSize), \ string.join(markerIDs, '|'), \ relationship, \ mgi_utils.prvalue(sequenceIDs), \ string.join(aliasList, '|'), \ mgi_utils.prvalue(notes), \ createdBy, mgiPrefix, mgiKey)) # Print out a raw note file if len(rawnotes) > 0: rawNoteFile.write('%s%d\t%s\n' % (mgiPrefix, mgiKey, rawnotes)) # Notes if len(notes) > 0: noteFile.write('%s\t%s\t%s\t%s\n' % (probeKey, notes, loaddate, loaddate)) accKey = accKey + 1 mgiKey = mgiKey + 1 # sequence accession ids for acc in seqAccDict.keys(): prefixPart, numericPart = accessionlib.split_accnum(acc) accFile.write('%s\t%s\t%s\t%s\t%s\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \ % (accKey, acc, prefixPart, numericPart, seqAccDict[acc], probeKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate)) accRefFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' \ % (accKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate)) accKey = accKey + 1 refKey = refKey + 1 probeKey = probeKey + 1 # end of "for line in inputFile.readlines():" # # Update the AccessionMax value # if not DEBUG: db.sql('select * from ACC_setMax (%d)' % (lineNum), None)
results = db.sql(cmd, 'auto') for r in results: outBCP.write(str(r['_Map_key']) + DL + \ str(r['_Object_key']) + DL + \ r['chromosome'] + DL + \ str(r['startCoordinate']) + DL + \ str(r['endCoordinate']) + DL + \ str(r['strand']) + DL + \ str(r['mapUnits']) + DL + \ str(r['provider']) + DL + \ str(r['version']) + DL + \ str(userKey) + DL + str(userKey) + DL + \ loaddate + DL + loaddate + NL) outBCP.close() # # Main Routine # userKey = loadlib.verifyUser(os.environ['MGD_DBUSER'], 1, None) db.useOneConnection(1) print '%s' % mgi_utils.date() createBCP() print '%s' % mgi_utils.date() db.useOneConnection(0)
def processFile(): # Purpose: processes input file # Returns: nothing # Assumes: nothing # Effects: nothing # Throws: nothing global libraryName, libraryID, libraryKey, logicalDBKey global segmentTypeKey, vectorTypeKey, organismKey, referenceKey, strainKey, tissueKey global age, ageMin, ageMax, genderKey, cellLineKey, createdByKey global strainNS, tissueNS, genderNS, cellLineNS, ageNS lineNum = 0 # retrieve next available primary key for Library record results = db.sql( 'select maxKey = max(_Source_key) + 1 from %s' % (libraryTable), 'auto') newlibraryKey = results[0]['maxKey'] strainNS = sourceloadlib.verifyStrain(NS, 0, None) tissueNS = sourceloadlib.verifyTissue(NS, 0, None) genderNS = sourceloadlib.verifyGender(NS, 0, None) cellLineNS = sourceloadlib.verifyCellLine(NS, 0, None) ageNS = NS # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens try: [libraryName, \ logicalDB, \ libraryID, \ segmentType, \ vectorType, \ organism, \ strain, \ tissue, \ age, \ gender, \ cellLine, \ jnum, \ note, \ cloneCollections, \ createdBy] = string.split(line[:-1], TAB) except: exit(1, 'Invalid Line (line: %d): %s\n' % (lineNum, line)) continue libraryKey = sourceloadlib.verifyLibrary(libraryName, lineNum) if len(logicalDB) > 0: logicalDBKey = loadlib.verifyLogicalDB(logicalDB, lineNum, errorFile) else: logicalDBKey = 0 if libraryKey == 0 and len(libraryID) > 0: libraryKey = sourceloadlib.verifyLibraryID(libraryID, logicalDBKey, lineNum, errorFile) segmentTypeKey = sourceloadlib.verifySegmentType( segmentType, lineNum, errorFile) vectorTypeKey = sourceloadlib.verifyVectorType(vectorType, lineNum, errorFile) strainKey = sourceloadlib.verifyStrain(strain, lineNum, errorFile) tissueKey = sourceloadlib.verifyTissue(tissue, lineNum, errorFile) genderKey = sourceloadlib.verifyGender(gender, lineNum, errorFile) cellLineKey = sourceloadlib.verifyCellLine(cellLine, lineNum, errorFile) ageMin, ageMax = sourceloadlib.verifyAge(age, lineNum, errorFile) referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if segmentTypeKey == 0 or \ vectorTypeKey == 0 or \ strainKey == 0 or \ tissueKey == 0 or \ genderKey == 0 or \ cellLineKey == 0 or \ organismKey == 0 or \ referenceKey == 0 or \ createdByKey == 0 or \ ageMin is None: # set error flag to true error = 1 # print str(segmentTypeKey) # print str(vectorTypeKey) # print str(strainKey) # print str(tissueKey) # print str(genderKey) # print str(cellLineKey) # print str(organismKey) # print str(referenceKey) # print str(createdByKey) # print str(ageMin) errorFile.write('Errors: %s\n' % (libraryName)) # if errors, continue to next record if error: continue # if no errors, continue processing # process new library if libraryKey == 0: libraryKey = newlibraryKey addLibrary() # increment primary keys newlibraryKey = newlibraryKey + 1 # else, process existing library else: updateLibrary() addCloneCollections(cloneCollections) return
def init(): ''' # requires: # # effects: # 1. Processes command line options # 2. Initializes local DBMS parameters # 3. Initializes global file descriptors/file names # # returns: # ''' global inputFile, diagFile, errorFile, errorFileName, diagFileName global passwordFileName global noteFile, noteFileName, noteChunkFile, noteChunkFileName, sqlFile, sqlFileName global mode global noteTypeName global objectTypeKey, createdByKey global mgiObjects try: optlist, args = getopt.getopt(sys.argv[1:], 'S:D:U:P:M:I:O:T:') except: showUsage() # # Set server, database, user, passwords depending on options # specified by user. # server = None database = None user = None password = None for opt in optlist: if opt[0] == '-S': server = opt[1] elif opt[0] == '-D': database = opt[1] elif opt[0] == '-U': user = opt[1] elif opt[0] == '-P': passwordFileName = opt[1] elif opt[0] == '-M': mode = opt[1] elif opt[0] == '-I': inputFileName = opt[1] elif opt[0] == '-O': objectType = opt[1] elif opt[0] == '-T': noteTypeName = re.sub('"', '', opt[1]) else: showUsage() # Initialize db.py DBMS parameters password = string.strip(open(passwordFileName, 'r').readline()) db.set_sqlLogin(user, password, server, database) db.useOneConnection(1) head, tail = os.path.split(inputFileName) diagFileName = tail + '.diagnostics' errorFileName = tail + '.error' noteFileName = tail + '.' + noteTable + '.bcp' noteChunkFileName = tail + '.' + noteChunkTable + '.bcp' sqlFileName = tail + '.sql' try: inputFile = open(inputFileName, 'r') except: exit(1, 'Could not open file %s\n' % inputFileName) try: diagFile = open(diagFileName, 'w') except: exit(1, 'Could not open file %s\n' % diagFileName) try: errorFile = open(errorFileName, 'w') except: exit(1, 'Could not open file %s\n' % errorFileName) try: noteFile = open(noteFileName, 'w') except: exit(1, 'Could not open file %s\n' % noteFileName) try: noteChunkFile = open(noteChunkFileName, 'w') except: exit(1, 'Could not open file %s\n' % noteChunkFileName) try: sqlFile = open(sqlFileName, 'w') except: exit(1, 'Could not open file %s\n' % sqlFileName) # Set Log File Descriptor try: db.set_sqlLogFD(diagFile) except: pass diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date())) diagFile.write('Server: %s\n' % (server)) diagFile.write('Database: %s\n' % (database)) diagFile.write('User: %s\n' % (user)) diagFile.write('Input File: %s\n' % (inputFileName)) diagFile.write('Object Type: %s\n' % (objectType)) diagFile.write('Note Type: %s\n' % (noteTypeName)) errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date())) objectTypeKey = accessionlib.get_MGIType_key(objectType) createdByKey = loadlib.verifyUser(db.get_sqlUser(), 0, errorFile) results = db.sql(''' select accID, _Object_key from ACC_Accession where _MGIType_key = %s and _LogicalDB_key = 1 and prefixPart = 'MGI:' and preferred = 1 ''' % (objectTypeKey), 'auto') for r in results: mgiObjects[r['accID']] = r['_Object_key']
def processFile(): # Purpose: Read the input file, resolve values to keys. Create bcp files # Returns: 1 if error, else 0 # Assumes: file descriptors have been initialized # Effects: exits if the line does not have 15 columns # Throws: Nothing global alleleKey, refAssocKey, accKey, noteKey, mgiKey, annotKey global alleleLookup, alleleMutationKey lineNum = 0 # For each line in the input file for line in fpInputFile.readlines(): error = 0 lineNum = lineNum + 1 print('%s: %s' % (lineNum, line)) # Split the line into tokens tokens = line[:-1].split('\t') try: markerID = tokens[0] markerSymbol = tokens[1] mutationType = tokens[2] # IMPC allele type description = tokens[3] colonyID = tokens[4] strainOfOrigin = tokens[5] alleleSymbol = tokens[6] alleleName = tokens[7] inheritanceMode = tokens[8] alleleType = tokens[9] # IMPC allele class alleleSubType = tokens[10] alleleStatus = tokens[11] transmission = tokens[12] collection = tokens[13] jNum = tokens[14] createdBy = tokens[15] except: print('exiting with invalid line') exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) print('validating data and getting keys') # marker key markerKey = loadlib.verifyMarker(markerID, lineNum, fpErrorFile) # _vocab_key = 36 (Allele Molecular Mutation) mutationList = str.split(mutationType, ';') if len(mutationList) > 1: print('mutationList: %s' % mutationList) mutationKeyList = [] for m in mutationList: mutationKey = loadlib.verifyTerm('', 36, m, lineNum, fpErrorFile) if mutationKey != 0: mutationKeyList.append(mutationKey) if len(mutationKeyList) > 1: print('mutationKeyList: %s' % mutationKeyList) # strains strainOfOriginKey = sourceloadlib.verifyStrain(strainOfOrigin, lineNum, fpErrorFile) # _vocab_key = 35 (Allele Inheritance Mode) inheritanceModeKey = loadlib.verifyTerm('', 35, inheritanceMode, lineNum, fpErrorFile) # _vocab_key = 38 (Allele Type) alleleTypeKey = loadlib.verifyTerm('', 38, alleleType, lineNum, fpErrorFile) # _vocab_key = 93 (Allele Subtype) subTypeList = str.split(alleleSubType, ';') if len(subTypeList) > 1: print('subTypeList: %s' % subTypeList) subTypeKeyList = [] for s in subTypeList: if s != '': # if we have a subtype, get it's key subTypeKey = loadlib.verifyTerm('', 93, s, lineNum, fpErrorFile) if subTypeKey != 0: subTypeKeyList.append(subTypeKey) if len(subTypeKeyList) > 1: print('subTypeKeyList: %s' % subTypeKeyList) # _vocab_key = 37 (Allele Status) alleleStatusKey = loadlib.verifyTerm('', 37, alleleStatus, lineNum, fpErrorFile) # _vocab_key = 61 (Allele Transmission) transmissionKey = loadlib.verifyTerm('', 61, transmission, lineNum, fpErrorFile) # _vocab_key = 92 collectionKey = loadlib.verifyTerm('', 92, collection, lineNum, fpErrorFile) # _vocab_key = 73 (Marker-Allele Association Status) # _term_key = 4268545 (Curated) markerStatusKey = 4268545 # reference refKey = loadlib.verifyReference(jNum, lineNum, fpErrorFile) # creator createdByKey = loadlib.verifyUser(createdBy, lineNum, fpErrorFile) if createdByKey == 0: continue print('checking for missing data') # if errors, continue to next record # errors are stored (via loadlib) in the .error log if markerKey == 0 \ or mutationKeyList == [] \ or strainOfOriginKey == 0 \ or inheritanceModeKey == 0 \ or alleleTypeKey == 0 \ or alleleStatusKey == 0 \ or transmissionKey == 0 \ or collectionKey == 0 \ or refKey == 0 \ or createdByKey == 0: print('missing data, skipping this line') continue # if no errors, process the allele print('writing to allele file') # allele (isWildType = 0) fpAlleleFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|0|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (alleleKey, markerKey, strainOfOriginKey, inheritanceModeKey, alleleTypeKey, \ alleleStatusKey, transmissionKey, collectionKey, alleleSymbol, alleleName, \ isExtinct, isMixed, refKey, markerStatusKey, \ createdByKey, createdByKey, createdByKey, loaddate, loaddate, loaddate)) # molecular mutation for mutationKey in mutationKeyList: fpMutationFile.write('%s|%s|%s|%s|%s\n' \ % (alleleMutationKey, alleleKey, mutationKey, loaddate, loaddate)) alleleMutationKey += 1 # reference associations # Original fpRefFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (refAssocKey, refKey, alleleKey, mgiTypeKey, origRefTypeKey, \ createdByKey, createdByKey, loaddate, loaddate)) refAssocKey = refAssocKey + 1 # Molecular fpRefFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (refAssocKey, refKey, alleleKey, mgiTypeKey, molRefTypeKey, \ createdByKey, createdByKey, loaddate, loaddate)) refAssocKey = refAssocKey + 1 # allele subtype for subTypeKey in subTypeKeyList: fpAnnotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (annotKey, annotTypeKey, alleleKey, subTypeKey, \ qualifierKey, loaddate, loaddate)) annotKey = annotKey + 1 # MGI Accession ID for the allele alleleID = '%s%s' % (mgiPrefix, mgiKey) fpAccFile.write('%s|%s|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \ % (accKey, alleleID, mgiPrefix, mgiKey, alleleKey, mgiTypeKey, \ createdByKey, createdByKey, loaddate, loaddate)) # storing data in MGI_Note # molecular note fpNoteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, alleleKey, mgiTypeKey, molecularNoteTypeKey, description,\ createdByKey, createdByKey, loaddate, loaddate)) noteKey = noteKey + 1 # colony ID note fpNoteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, alleleKey, mgiTypeKey, colonyIdNoteTypeKey, colonyID, \ createdByKey, createdByKey, loaddate, loaddate)) noteKey = noteKey + 1 # Print out a new text file and attach the new MGI Allele IDs # as the last field fpNewAlleleRptFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' \ % (mgi_utils.prvalue(alleleID), \ mgi_utils.prvalue(alleleSymbol), \ mgi_utils.prvalue(alleleName), \ mgi_utils.prvalue(markerID), \ mgi_utils.prvalue(markerSymbol), \ mgi_utils.prvalue(colonyID))) accKey = accKey + 1 mgiKey = mgiKey + 1 alleleKey = alleleKey + 1 # # Update the AccessionMax value # print('DEBUG: %s' % DEBUG) if DEBUG == 'false': db.sql('select * from ACC_setMax(%d)' % (lineNum), None) db.commit() return 0
password = string.strip(open(opt[1], 'r').readline()) elif opt[0] == '-K': objectKey = opt[1] else: showUsage() if server is None or \ database is None or \ user is None or \ password is None or \ objectKey is None: showUsage() db.set_sqlLogin(user, password, server, database) db.useOneConnection(1) userKey = loadlib.verifyUser(user, 0, None) # call functions based on the way the program is invoked scriptName = os.path.basename(sys.argv[0]) # all of these invocations will only affect a certain subset of data if scriptName == 'allelecombination.py': processAll() elif scriptName == 'allelecombinationByAllele.py': processByAllele(objectKey) elif scriptName == 'allelecombinationByMarker.py':
def processFile(): global refKey, aliasKey global execProbeSQL global execAssaySQL global execRefSQL lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: fromID = tokens[0] name = tokens[1] toID = tokens[2] jnum = tokens[3] createdBy = tokens[4] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) fromKey = loadlib.verifyObject(fromID, mgiTypeKey, None, lineNum, errorFile) toKey = loadlib.verifyObject(toID, mgiTypeKey, None, lineNum, errorFile) referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if fromKey == 0: errorFile.write('Invalid Probe "From": %s\n' % (fromID)) error = 1 if toKey == 0: errorFile.write('Invalid Probe "To": %s\n' % (toID)) error = 1 if referenceKey == 0: errorFile.write('Invalid Reference: %s\n' % (jnum)) error = 1 if createdByKey == 0: errorFile.write('Invalid Creator: %s\n\n' % (createdBy)) error = 1 # check that all genes are the same checkGenesSQL = ''' select f.* from PRB_Marker f, PRB_Marker t, GXD_ProbePrep p, GXD_Assay a where f._Probe_key = %s and t._Probe_key = %s and p._Probe_key = %s and p._ProbePrep_key = a._ProbePrep_key and f._Marker_key = t._Marker_key and f._Marker_key = a._Marker_key ''' % (fromKey, toKey, fromKey) checkGenes = db.sql(checkGenesSQL, 'auto') if len(checkGenes) == 0: errorFile.write('Gene of GenePaint, Eurexpress and Assay are not the same: %s, %s\n' % (fromID, toID)) error = 1 # check that the J: is on at least one Assay checkJAssaySQL = ''' select a.* from GXD_ProbePrep p, GXD_Assay a where p._Probe_key = %s and p._ProbePrep_key = a._ProbePrep_key and a._Refs_key = %s ''' % (fromKey, referenceKey) checkJAssay = db.sql(checkJAssaySQL, 'auto') if len(checkJAssay) == 0: errorFile.write('J: is not on any Assays attached to the probe: %s\n' % (fromID)) error = 1 # if errors, continue to next record if error: continue # add alias using fromID name (from) to toID refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \ % (refKey, toKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate)) aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \ % (aliasKey, refKey, name, createdByKey, createdByKey, loaddate, loaddate)) refKey = refKey + 1 aliasKey = aliasKey + 1 # move assay information from fromID to toID execAssaySQL.append(updateAssaySQL % (toKey, fromKey)) # move fromID (from) references to toID execRefSQL.append(updateRefSQL % (toKey, fromKey, referenceKey)) # delete fromID (from) execProbeSQL.append(deleteProbeSQL % (fromKey))
def processFile(): global probeKey, refKey, aliasKey, accKey, mgiKey lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: name = tokens[0] jnum = tokens[1] parentID = tokens[2] sourceName = tokens[3] organism = tokens[4] strain = tokens[5] tissue = tokens[6] gender = tokens[7] cellLine = tokens[8] age = tokens[9] vectorType = tokens[10] segmentType = tokens[11] regionCovered = tokens[12] insertSite = tokens[13] insertSize = tokens[14] markerIDs = string.split(tokens[15], '|') relationship = tokens[16] sequenceIDs = tokens[17] aliasList = string.split(tokens[18], '|') notes = tokens[19] rawnotes = tokens[20] createdBy = tokens[21] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) isParent = 0 isSource = 0 parentProbeKey = '' sourceKey = 0 if parentID != '': isParent = 1 if sourceName != '': isSource = 1 if not isParent and not isSource: organismKey = sourceloadlib.verifyOrganism(organism, lineNum, errorFile) strainKey = sourceloadlib.verifyStrain(strain, lineNum, errorFile) tissueKey = sourceloadlib.verifyTissue(tissue, lineNum, errorFile) genderKey = sourceloadlib.verifyGender(gender, lineNum, errorFile) cellLineKey = sourceloadlib.verifyCellLine(cellLine, lineNum, errorFile) vectorKey = sourceloadlib.verifyVectorType(vectorType, lineNum, errorFile) segmentTypeKey = sourceloadlib.verifySegmentType( segmentType, lineNum, errorFile) sourceKey = sourceloadlib.verifySource(segmentTypeKey, \ vectorKey, organismKey, strainKey, \ tissueKey, genderKey, cellLineKey, age, lineNum, errorFile) if organismKey == 0 or strainKey == 0 or tissueKey == 0 or \ genderKey == 0 or cellLineKey == 0 or vectorKey == 0 or \ segmentTypeKey == 0 or sourceKey == 0: errorFile.write('%s, %s, %s, %s, %s, %s, %s, %s\n' % (segmentType, vectorType, organism, strain, tissue, gender, cellLine, age)) error = 1 elif not isParent and isSource: vectorKey = sourceloadlib.verifyVectorType(vectorType, lineNum, errorFile) segmentTypeKey = sourceloadlib.verifySegmentType( segmentType, lineNum, errorFile) sourceKey = sourceloadlib.verifyLibrary(sourceName, lineNum, errorFile) if vectorKey == 0 or segmentTypeKey == 0 or sourceKey == 0: error = 1 # parent from = yes, source given = yes or no (ignored) else: parentProbeKey, sourceKey = verifyParentProbe( parentID, lineNum, errorFile) vectorKey = sourceloadlib.verifyVectorType(vectorType, lineNum, errorFile) segmentTypeKey = sourceloadlib.verifySegmentType( segmentType, lineNum, errorFile) if parentProbeKey == 0 or sourceKey == 0 or vectorKey == 0 or segmentTypeKey == 0: error = 1 referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if referenceKey == 0: errorFile.write('Invalid Reference: %s\n' % (jnum)) error = 1 if createdByKey == 0: errorFile.write('Invalid Creator: %s\n\n' % (createdBy)) error = 1 # marker IDs markerList = [] for markerID in markerIDs: markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile) if len(markerID) > 0 and markerKey == 0: errorFile.write('Invalid Marker: %s, %s\n' % (name, markerID)) error = 1 elif len(markerID) > 0: markerList.append(markerKey) # sequence IDs seqAccDict = {} for seqID in string.split(sequenceIDs, '|'): if len(seqID) > 0: [logicalDB, acc] = string.split(seqID, ':') logicalDBKey = loadlib.verifyLogicalDB(logicalDB, lineNum, errorFile) if logicalDBKey > 0: seqAccDict[acc] = logicalDBKey # if errors, continue to next record if error: continue # if no errors, process the probe probeFile.write('%d\t%s\t%s\t%s\t%s\t%s\t\t\t%s\t%s\t%s\t\t%s\t%s\t%s\t%s\n' \ % (probeKey, name, parentProbeKey, sourceKey, vectorKey, segmentTypeKey, mgi_utils.prvalue(regionCovered), \ mgi_utils.prvalue(insertSite), mgi_utils.prvalue(insertSize), createdByKey, createdByKey, loaddate, loaddate)) for markerKey in markerList: if markerList.count(markerKey) == 1: markerFile.write('%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n' \ % (probeKey, markerKey, referenceKey, relationship, createdByKey, createdByKey, loaddate, loaddate)) else: errorFile.write('Invalid Marker Duplicate: %s, %s\n' % (name, markerID)) refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \ % (refKey, probeKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate)) # aliases for alias in aliasList: if len(alias) == 0: continue aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \ % (aliasKey, refKey, alias, createdByKey, createdByKey, loaddate, loaddate)) aliasKey = aliasKey + 1 # MGI Accession ID for the marker accFile.write('%s\t%s%d\t%s\t%s\t1\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \ % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, probeKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate)) # Print out a new text file and attach the new MGI Probe IDs as the last field newProbeFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%d\n' \ % (name, jnum, \ mgi_utils.prvalue(sourceName), \ organism, \ mgi_utils.prvalue(strain), \ mgi_utils.prvalue(tissue), \ mgi_utils.prvalue(gender), \ mgi_utils.prvalue(cellLine), \ mgi_utils.prvalue(age), \ mgi_utils.prvalue(vectorType), \ mgi_utils.prvalue(segmentType), \ mgi_utils.prvalue(regionCovered) + \ mgi_utils.prvalue(insertSite), \ mgi_utils.prvalue(insertSize), \ string.join(markerIDs, '|'), \ relationship, \ mgi_utils.prvalue(sequenceIDs), \ string.join(aliasList, '|'), \ mgi_utils.prvalue(notes), \ createdBy, mgiPrefix, mgiKey)) # Print out a raw note file if len(rawnotes) > 0: rawNoteFile.write('%s%d\t%s\n' % (mgiPrefix, mgiKey, rawnotes)) # Notes if len(notes) > 0: noteFile.write('%s\t%s\t%s\t%s\n' % (probeKey, notes, loaddate, loaddate)) accKey = accKey + 1 mgiKey = mgiKey + 1 # sequence accession ids for acc in seqAccDict.keys(): prefixPart, numericPart = accessionlib.split_accnum(acc) accFile.write('%s\t%s\t%s\t%s\t%s\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \ % (accKey, acc, prefixPart, numericPart, seqAccDict[acc], probeKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate)) accRefFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' \ % (accKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate)) accKey = accKey + 1 refKey = refKey + 1 probeKey = probeKey + 1 # end of "for line in inputFile.readlines():" # # Update the AccessionMax value # if not DEBUG: db.sql('select * from ACC_setMax (%d)' % (lineNum), None)
def init(): global diagFile, errorFile, inputFile, errorFileName, diagFileName global outSetFile, outMemberFile global setKey, setMemberKey, createdByKey, mgiTypeKey, useSetKey global DEBUG db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) diagFileName = '%s/setload.diagnostics' % (outputDir) errorFileName = '%s/setload.error' % (outputDir) try: diagFile = open(diagFileName, 'w') except: exit(1, 'Could not open file %s\n' % diagFileName) try: errorFile = open(errorFileName, 'w') except: exit(1, 'Could not open file %s\n' % errorFileName) try: inputFile = open(inputFileName, 'r') except: exit(1, 'Could not open file %s\n' % inputFileName) # Output Files try: fullPathSetFile = '%s/%s' % (outputDir, outSetFileName) outSetFile = open(fullPathSetFile, 'w') except: exit(1, 'Could not open file %s\n' % fullPathSetFile) try: fullPathMemberFile = '%s/%s' % (outputDir, outMemberFileName) outMemberFile = open(fullPathMemberFile, 'w') except: exit(1, 'Could not open file %s\n' % fullPathMemberFile) # Log all SQL db.set_sqlLogFunction(db.sqlLogAll) diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date())) diagFile.write('Server: %s\n' % (db.get_sqlServer())) diagFile.write('Database: %s\n' % (db.get_sqlDatabase())) errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date())) if mode == 'preview': DEBUG = 1 bcpon = 0 elif mode != 'load': exit(1, 'Invalid Processing Mode: %s\n' % (mode)) results = db.sql('select max(_Set_key) + 1 as maxKey from MGI_Set', 'auto') setKey = results[0]['maxKey'] createdByKey = loadlib.verifyUser(createdBy, 0, errorFile) mgiTypeKey = loadlib.verifyMGIType(setType, 0, errorFile) # # use existing MGI_Set, or create a new one # results = db.sql( 'select _Set_key from MGI_Set where _MGIType_key = %s and name = \'%s\'' % (mgiTypeKey, setName), 'auto') if len(results) > 0: for r in results: setKey = r['_Set_key'] # delete/reload db.sql('delete from MGI_SetMember where _Set_key = %s' % (setKey), None) else: outSetFile.write(str(setKey) + TAB + \ str(mgiTypeKey) + TAB + \ str(setName) + TAB + \ '1' + TAB + \ str(createdByKey) + TAB + str(createdByKey) + TAB + \ loaddate + TAB + loaddate + CRT) results = db.sql( 'select max(_SetMember_key) + 1 as maxKey from MGI_SetMember', 'auto') setMemberKey = results[0]['maxKey'] return
def init(): # requires: # # effects: # 1. Processes command line options # 2. Initializes local DBMS parameters # 3. Initializes global file descriptors/file names # 4. Initializes global keys # # returns: # global diagFileName, errorFileName, synFileName global inputFile, diagFile, errorFile, synFile global mgiTypeKey, createdByKey, referenceKey db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) head, tail = os.path.split(inputFileName) diagFileName = logDir + '/' + tail + '.diagnostics' errorFileName = logDir + '/' + tail + '.error' synFileName = 'MGI_Synonym.bcp' print inputFileName print logDir try: inputFile = open(inputFileName, 'r') except: exit(1, 'Could not open file %s\n' % inputFileName) try: diagFile = open(diagFileName, 'w') except: exit(1, 'Could not open file %s\n' % diagFileName) try: errorFile = open(errorFileName, 'w') except: exit(1, 'Could not open file %s\n' % errorFileName) try: synFile = open(outputDir + '/' + synFileName, 'w') except: exit(1, 'Could not open file %s\n' % synFileName) # Log all SQL db.set_sqlLogFunction(db.sqlLogAll) diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date())) diagFile.write('Server: %s\n' % (db.get_sqlServer())) diagFile.write('Database: %s\n' % (db.get_sqlDatabase())) diagFile.write('Object Type: %s\n' % (mgiType)) diagFile.write('Input File: %s\n' % (inputFileName)) errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date())) mgiTypeKey = loadlib.verifyMGIType(mgiType, 0, errorFile) createdByKey = loadlib.verifyUser(createdBy, 0, errorFile) # if reference is J:0, then no reference is given if jnum == 'J:0': referenceKey = '' else: referenceKey = loadlib.verifyReference(jnum, 0, errorFile) # exit if we can't resolve mgiType, createdBy or jnum if mgiTypeKey == 0 or \ createdByKey == 0 or \ referenceKey == 0: exit(1) if mode == 'reload': print 'mode is: %s, deleting synonyms' % mode sys.stdout.flush() db.sql('delete from MGI_Synonym ' + \ 'where _MGIType_key = %d ' % (mgiTypeKey) + \ 'and _CreatedBy_key = %d ' % (createdByKey), None)
def processFile(): ''' # requires: # # effects: # Reads input file # Verifies and Processes each line in the input file # # returns: # nothing # ''' results = db.sql( 'select maxKey = max(_Translation_key) + 1 from MGI_Translation', 'auto') transKey = results[0]['maxKey'] if transKey is None: transKey = 1000 lineNum = 0 # sequence number of bad name in translation list seq = 1 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: objectID = tokens[0] objectDescription = tokens[1] term = tokens[2] userID = tokens[3] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) continue if vocabKey > 0: objectKey = loadlib.verifyTerm(objectID, vocabKey, objectDescription, lineNum, errorFile) else: objectKey = loadlib.verifyObject(objectID, mgiTypeKey, objectDescription, lineNum, errorFile) userKey = loadlib.verifyUser(userID, lineNum, errorFile) if objectKey == 0 or userKey == 0: # set error flag to true error = 1 # if errors, continue to next record if error: continue # if no errors, process # add term to translation file bcpWrite(transFile, [ transKey, transTypeKey, objectKey, term, seq, userKey, userKey, loaddate, loaddate ]) transKey = transKey + 1 seq = seq + 1 # end of "for line in inputFile.readlines():" if newTransType: bcpWrite(transTypeFile, [ transTypeKey, mgiTypeKey, vocabKey, transTypeName, transCompression, 0, userKey, userKey, loaddate, loaddate ])
def processAssayFile(): global assayAssay, assayKey, accKey, mgiKey lineNum = 0 # For each line in the input file for line in inAssayFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], TAB) try: assayID = tokens[0] markerID = tokens[1] jnum = tokens[2] assayType = tokens[3] reporterGene = tokens[4] note = tokens[5] createdBy = tokens[6] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile) referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile) assayTypeKey = gxdloadlib.verifyAssayType(assayType, lineNum, errorFile) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if markerKey == 0 or referenceKey == 0 or assayTypeKey == 0: # set error flag to true error = 1 if len(reporterGene) > 0: reporterGeneKey = gxdloadlib.verifyReporterGene(reporterGene, lineNum, errorFile) if reporterGeneKey == 0: error = 1 else: reporterGeneKey = '' # if errors, continue to next record if error: continue if assayProbePrep.has_key(assayID): probePrepKey = assayProbePrep[assayID] else: probePrepKey = '' # if no errors, process outAssayFile.write(str(assayKey) + TAB + \ str(assayTypeKey) + TAB + \ str(referenceKey) + TAB + \ str(markerKey) + TAB + \ str(probePrepKey) + TAB + \ TAB + \ TAB + \ str(reporterGeneKey) + TAB + \ str(createdByKey) + TAB + \ str(createdByKey) + TAB + \ loaddate + TAB + loaddate + CRT) if len(note) > 0: i = 0 while i < len(note): outAssayNoteFile.write(str(assayKey) + TAB + \ note[i:i+ASSAY_NOTE_LENGTH] + TAB + \ loaddate + TAB + loaddate + CRT) i = i + ASSAY_NOTE_LENGTH # MGI Accession ID for the assay outAccFile.write(str(accKey) + TAB + \ mgiPrefix + str(mgiKey) + TAB + \ mgiPrefix + TAB + \ str(mgiKey) + TAB + \ accLogicalDBKey + TAB + \ str(assayKey) + TAB + \ assayMgiTypeKey + TAB + \ accPrivate + TAB + \ accPreferred + TAB + \ str(createdByKey) + TAB + \ str(createdByKey) + TAB + \ loaddate + TAB + loaddate + CRT) assayAssay[assayID] = assayKey accKey = accKey + 1 mgiKey = mgiKey + 1 assayKey = assayKey + 1 # end of "for line in inAssayFile.readlines():" return lineNum
def processFile(): # requires: # # effects: # Reads input file # Verifies and Processes each line in the input file # # returns: # nothing # global strainalleleKey lineNum = 0 notDeleted = 1 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = str.split(line[:-1], '\t') try: strainID = tokens[0] alleleID = tokens[1] qualifier = tokens[2] createdBy = tokens[3] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) if len(strainID) == 4: strainID = '00' + strainID if len(strainID) == 3: strainID = '000' + strainID if len(strainID) == 2: strainID = '0000' + strainID if len(strainID) == 1: strainID = '00000' + strainID strainKey = loadlib.verifyObject(strainID, strainTypeKey, None, lineNum, errorFile) # this could generate an error because the ID is a marker, not an allele # just ignore the error in the error file if it gets resolved later alleleKey = loadlib.verifyObject(alleleID, alleleTypeKey, None, lineNum, errorFile) markerKey = 0 if alleleKey == 0: markerKey = loadlib.verifyObject(alleleID, markerTypeKey, None, lineNum, errorFile) qualifierKey = verifyQualifier(qualifier, lineNum) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if notDeleted: db.sql( 'delete PRB_Strain_Marker where _CreatedBy_key = %s' % (createdByKey), None) notDeleted = 0 # if Allele found, resolve to Marker if alleleKey > 0: results = db.sql( 'select _Marker_key from ALL_Allele where _Allele_key = %s' % (alleleKey), 'auto') if len(results) > 0: markerKey = results[0]['_Marker_key'] elif markerKey == 0: errorFile.write('Invalid Allele (%s): %s\n' % (lineNum, alleleID)) error = 1 if strainKey == 0 or markerKey == 0 or qualifierKey == 0: # set error flag to true error = 1 # if errors, continue to next record if error: continue # if no errors, process if alleleKey == 0: alleleKey = '' strainFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (strainalleleKey, strainKey, markerKey, alleleKey, qualifierKey, createdByKey, createdByKey, loaddate, loaddate)) strainalleleKey = strainalleleKey + 1 # end of "for line in inputFile.readlines():" # # Update the AccessionMax value # db.sql('select * from ACC_setMax (%d);' % (lineNum), None) db.commit() # update prb_strain_marker_seq auto-sequence db.sql( ''' select setval('prb_strain_marker_seq', (select max(_StrainMarker_key) from PRB_Strain_Marker)) ''', None) db.commit()