def buildPaneKeyLookup (): global paneKeyLookup, refKey # # Get the reference key for the J-Number. # refKey = loadlib.verifyReference(jNumber, 0, None) # # Get all the figure labels and associated image pane keys for the # reference. # results = db.sql('''select i.figureLabel, ip._ImagePane_key from IMG_Image i, IMG_ImagePane ip where i._Image_key = ip._Image_key and i._ImageType_key = %d and i._Refs_key = %d''' % (FULLSIZE_IMAGE_TYPE_KEY, refKey), 'auto') for r in results: figureLabel = r['figureLabel'] paneKey = r['_ImagePane_key'] paneKeyLookup[figureLabel] = paneKey print 'paneKeyLookup[' + figureLabel + '] = ' + str(paneKey) return
def init (): global createdByKey, refKey, accKey db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFile) # # Get the created by key for the user. # createdByKey = loadlib.verifyUser(createdBy, 0, None) # # Get the reference key for the J-Number. # refKey = loadlib.verifyReference(jNumber, 0, None) # # Get the next available accession key. # results = db.sql('select max(_Accession_key) + 1 as maxKey from ACC_Accession', 'auto') accKey = results[0]['maxKey'] return
def processFile(): # requires: # # effects: # Reads input file # Verifies and Processes each line in the input file # # returns: # nothing # global refAssocKey lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: accID = tokens[0] jnum = tokens[1] refAssocType = tokens[2] createdBy = tokens[3] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) objectKey = loadlib.verifyObject(accID, mgiTypeKey, None, lineNum, errorFile) referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile) refAssocTypeKey = verifyRefAssocType(refAssocType, lineNum) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if objectKey == 0 or \ referenceKey == 0 or \ refAssocTypeKey == 0 or \ createdByKey == 0: # set error flag to true error = 1 # if errors, continue to next record if error: continue # if no errors, process the marker # could move to verifyDuplicate routine key = '%s:%s:%s' % (objectKey, referenceKey, refAssocTypeKey) if refDict.has_key(key): errorFile.write('Duplicate (%d) %s\n' % (lineNum, line)) continue refFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (refAssocKey, referenceKey, objectKey, mgiTypeKey, refAssocTypeKey, createdByKey, createdByKey, loaddate, loaddate)) refAssocKey = refAssocKey + 1
def processFile(): global probeKey, refKey, aliasKey, accKey, mgiKey lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: name = tokens[0] jnum = tokens[1] parentID = tokens[2] sourceName = tokens[3] organism = tokens[4] strain = tokens[5] tissue = tokens[6] gender = tokens[7] cellLine = tokens[8] age = tokens[9] vectorType = tokens[10] segmentType = tokens[11] regionCovered = tokens[12] insertSite = tokens[13] insertSize = tokens[14] markerIDs = string.split(tokens[15], '|') relationship = tokens[16] sequenceIDs = tokens[17] aliasList = string.split(tokens[18], '|') notes = tokens[19] rawnotes = tokens[20] createdBy = tokens[21] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) isParent = 0 isSource = 0 parentProbeKey = ''; sourceKey = 0 if parentID != '': isParent = 1 if sourceName != '': isSource = 1 if not isParent and not isSource: organismKey = sourceloadlib.verifyOrganism(organism, lineNum, errorFile) strainKey = sourceloadlib.verifyStrain(strain, lineNum, errorFile) tissueKey = sourceloadlib.verifyTissue(tissue, lineNum, errorFile) genderKey = sourceloadlib.verifyGender(gender, lineNum, errorFile) cellLineKey = sourceloadlib.verifyCellLine(cellLine, lineNum, errorFile) vectorKey = sourceloadlib.verifyVectorType(vectorType, lineNum, errorFile) segmentTypeKey = sourceloadlib.verifySegmentType(segmentType, lineNum, errorFile) sourceKey = sourceloadlib.verifySource(segmentTypeKey, \ vectorKey, organismKey, strainKey, \ tissueKey, genderKey, cellLineKey, age, lineNum, errorFile) if organismKey == 0 or strainKey == 0 or tissueKey == 0 or \ genderKey == 0 or cellLineKey == 0 or vectorKey == 0 or \ segmentTypeKey == 0 or sourceKey == 0: errorFile.write('%s, %s, %s, %s, %s, %s, %s, %s\n' % (segmentType, vectorType, organism, strain, tissue, gender, cellLine, age)) error = 1 elif not isParent and isSource: vectorKey = sourceloadlib.verifyVectorType(vectorType, lineNum, errorFile) segmentTypeKey = sourceloadlib.verifySegmentType(segmentType, lineNum, errorFile) sourceKey = sourceloadlib.verifyLibrary(sourceName, lineNum, errorFile) if vectorKey == 0 or segmentTypeKey == 0 or sourceKey == 0: error = 1 # parent from = yes, source given = yes or no (ignored) else: parentProbeKey, sourceKey = verifyParentProbe(parentID, lineNum, errorFile) vectorKey = sourceloadlib.verifyVectorType(vectorType, lineNum, errorFile) segmentTypeKey = sourceloadlib.verifySegmentType(segmentType, lineNum, errorFile) if parentProbeKey == 0 or sourceKey == 0 or vectorKey == 0 or segmentTypeKey == 0: error = 1 referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if referenceKey == 0: errorFile.write('Invalid Reference: %s\n' % (jnum)) error = 1 if createdByKey == 0: errorFile.write('Invalid Creator: %s\n\n' % (createdBy)) error = 1 # marker IDs markerList = [] for markerID in markerIDs: markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile) if len(markerID) > 0 and markerKey == 0: errorFile.write('Invalid Marker: %s, %s\n' % (name, markerID)) error = 1 elif len(markerID) > 0: markerList.append(markerKey) # sequence IDs seqAccDict = {} for seqID in string.split(sequenceIDs, '|'): if len(seqID) > 0: [logicalDB, acc] = string.split(seqID, ':') logicalDBKey = loadlib.verifyLogicalDB(logicalDB, lineNum, errorFile) if logicalDBKey > 0: seqAccDict[acc] = logicalDBKey # if errors, continue to next record if error: continue # if no errors, process the probe probeFile.write('%d\t%s\t%s\t%s\t%s\t%s\t\t\t%s\t%s\t%s\t\t%s\t%s\t%s\t%s\n' \ % (probeKey, name, parentProbeKey, sourceKey, vectorKey, segmentTypeKey, mgi_utils.prvalue(regionCovered), \ mgi_utils.prvalue(insertSite), mgi_utils.prvalue(insertSize), createdByKey, createdByKey, loaddate, loaddate)) for markerKey in markerList: if markerList.count(markerKey) == 1: markerFile.write('%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n' \ % (probeKey, markerKey, referenceKey, relationship, createdByKey, createdByKey, loaddate, loaddate)) else: errorFile.write('Invalid Marker Duplicate: %s, %s\n' % (name, markerID)) refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \ % (refKey, probeKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate)) # aliases for alias in aliasList: if len(alias) == 0: continue aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \ % (aliasKey, refKey, alias, createdByKey, createdByKey, loaddate, loaddate)) aliasKey = aliasKey + 1 # MGI Accession ID for the marker accFile.write('%s\t%s%d\t%s\t%s\t1\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \ % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, probeKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate)) # Print out a new text file and attach the new MGI Probe IDs as the last field newProbeFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%d\n' \ % (name, jnum, \ mgi_utils.prvalue(sourceName), \ organism, \ mgi_utils.prvalue(strain), \ mgi_utils.prvalue(tissue), \ mgi_utils.prvalue(gender), \ mgi_utils.prvalue(cellLine), \ mgi_utils.prvalue(age), \ mgi_utils.prvalue(vectorType), \ mgi_utils.prvalue(segmentType), \ mgi_utils.prvalue(regionCovered) + \ mgi_utils.prvalue(insertSite), \ mgi_utils.prvalue(insertSize), \ string.join(markerIDs, '|'), \ relationship, \ mgi_utils.prvalue(sequenceIDs), \ string.join(aliasList, '|'), \ mgi_utils.prvalue(notes), \ createdBy, mgiPrefix, mgiKey)) # Print out a raw note file if len(rawnotes) > 0: rawNoteFile.write('%s%d\t%s\n' % (mgiPrefix, mgiKey, rawnotes)) # Notes if len(notes) > 0: noteFile.write('%s\t%s\t%s\t%s\n' % (probeKey, notes, loaddate, loaddate)) accKey = accKey + 1 mgiKey = mgiKey + 1 # sequence accession ids for acc in seqAccDict.keys(): prefixPart, numericPart = accessionlib.split_accnum(acc) accFile.write('%s\t%s\t%s\t%s\t%s\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \ % (accKey, acc, prefixPart, numericPart, seqAccDict[acc], probeKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate)) accRefFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' \ % (accKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate)) accKey = accKey + 1 refKey = refKey + 1 probeKey = probeKey + 1 # end of "for line in inputFile.readlines():" # # Update the AccessionMax value # if not DEBUG: db.sql('select * from ACC_setMax (%d)' % (lineNum), None)
def processFile(): global refKey, aliasKey lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: probeID = probeName = tokens[0] jnum = tokens[1] aliasList = string.split(tokens[2], '|') createdBy = tokens[3] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) if probeID.find('MGI:') >= 0: probeKey = loadlib.verifyProbe(probeID, lineNum, errorFile) else: probeKey, probeID = verifyProbe(probeName, lineNum, errorFile) probeReferenceKey = verifyProbeReference(probeID, jnum, lineNum, errorFile) referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if probeKey == 0: errorFile.write('Invalid Probe: %s\n' % (probeID)) error = 1 if referenceKey == 0: errorFile.write('Invalid Reference: %s\n' % (jnum)) error = 1 #if probeReferenceKey == 0: # errorFile.write('Invalid Probe Reference: %s, %s\n' % (probeID, jnum)) # error = 1 if createdByKey == 0: errorFile.write('Invalid Creator: %s\n\n' % (createdBy)) error = 1 # if errors, continue to next record if error: continue # if no errors, process # create a new probe-reference key if one does not already exist # else use the existing probe-reference key if probeReferenceKey == 0: refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \ % (refKey, probeKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate)) aliasrefKey = refKey refKey = refKey + 1 else: #errorFile.write('Probe/Reference Already Exists: %s\n' % (tokens)) aliasrefKey = probeReferenceKey # aliases for alias in aliasList: if len(alias) == 0: continue aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \ % (aliasKey, aliasrefKey, alias, createdByKey, createdByKey, loaddate, loaddate)) aliasKey = aliasKey + 1
def processFile(): ''' # requires: # # effects: # Reads input file # Verifies and Processes each line in the input file # # returns: # nothing # ''' global referenceKey global exptDict, seqExptDict lineNum = 0 note = '' # For each line in the input file inputFile = open(inputFileName, 'r') for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '|') try: markerID = tokens[0] chromosome = tokens[1] updateChr = tokens[2] band = tokens[3] assay = tokens[4] description = tokens[5] jnum = tokens[6] createdBy = tokens[7] except: # if it's not a valid line, assume it's the note note = line continue # exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) markerKey, markerSymbol = verifyMarker(markerID, lineNum) assayKey = verifyAssay(assay) referenceKey = loadlib.verifyReference(jnum, 0, errorFile) createdByKey = loadlib.verifyUser(createdBy, 0, errorFile) error = not verifyChromosome(chromosome, lineNum) if markerKey == 0 or \ assayKey == 0 or \ referenceKey == 0 or \ createdByKey == 0: # set error flag to true error = 1 # if errors, continue to next record if error: continue # if no errors, process # run once...needs the reference if lineNum == 1: createExperimentMaster() # determine experiment key for this chromosome # if it doesn't exist, create it if not exptDict.has_key(chromosome): createExperimentBCP(chromosome) if not exptDict.has_key(chromosome): errorFile.write('Cannot Find Experiment Key For Chromosome (%d): %s\n' % (lineNum, chromosome)) chrExptKey = 0 else: chrExptKey = exptDict[chromosome] # if errors, continue to next record if chrExptKey == 0: continue # add marker to experiment marker file bcpWrite(exptMarkerFile, \ [chrExptKey, \ markerKey, \ alleleKey, \ assayKey, \ seqExptDict[chrExptKey], \ markerSymbol, \ description, \ matrixData, \ loaddate, loaddate]) # increment marker sequence number for the experiment seqExptDict[chrExptKey] = seqExptDict[chrExptKey] + 1 # end of "for line in inputFile.readlines():" if len(note) > 0: bcpWrite(noteFile, [referenceKey, note, loaddate, loaddate])
def processFile(): global refKey, aliasKey global execProbeSQL global execAssaySQL global execRefSQL lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: fromID = tokens[0] name = tokens[1] toID = tokens[2] jnum = tokens[3] createdBy = tokens[4] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) fromKey = loadlib.verifyObject(fromID, mgiTypeKey, None, lineNum, errorFile) toKey = loadlib.verifyObject(toID, mgiTypeKey, None, lineNum, errorFile) referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if fromKey == 0: errorFile.write('Invalid Probe "From": %s\n' % (fromID)) error = 1 if toKey == 0: errorFile.write('Invalid Probe "To": %s\n' % (toID)) error = 1 if referenceKey == 0: errorFile.write('Invalid Reference: %s\n' % (jnum)) error = 1 if createdByKey == 0: errorFile.write('Invalid Creator: %s\n\n' % (createdBy)) error = 1 # check that all genes are the same checkGenesSQL = ''' select f.* from PRB_Marker f, PRB_Marker t, GXD_ProbePrep p, GXD_Assay a where f._Probe_key = %s and t._Probe_key = %s and p._Probe_key = %s and p._ProbePrep_key = a._ProbePrep_key and f._Marker_key = t._Marker_key and f._Marker_key = a._Marker_key ''' % (fromKey, toKey, fromKey) checkGenes = db.sql(checkGenesSQL, 'auto') if len(checkGenes) == 0: errorFile.write('Gene of GenePaint, Eurexpress and Assay are not the same: %s, %s\n' % (fromID, toID)) error = 1 # check that the J: is on at least one Assay checkJAssaySQL = ''' select a.* from GXD_ProbePrep p, GXD_Assay a where p._Probe_key = %s and p._ProbePrep_key = a._ProbePrep_key and a._Refs_key = %s ''' % (fromKey, referenceKey) checkJAssay = db.sql(checkJAssaySQL, 'auto') if len(checkJAssay) == 0: errorFile.write('J: is not on any Assays attached to the probe: %s\n' % (fromID)) error = 1 # if errors, continue to next record if error: continue # add alias using fromID name (from) to toID refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \ % (refKey, toKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate)) aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \ % (aliasKey, refKey, name, createdByKey, createdByKey, loaddate, loaddate)) refKey = refKey + 1 aliasKey = aliasKey + 1 # move assay information from fromID to toID execAssaySQL.append(updateAssaySQL % (toKey, fromKey)) # move fromID (from) references to toID execRefSQL.append(updateRefSQL % (toKey, fromKey, referenceKey)) # delete fromID (from) execProbeSQL.append(deleteProbeSQL % (fromKey))
def processFile(): global refKey, aliasKey, execSQL lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: probeID = tokens[0] markerIDs = string.split(tokens[1], '|') jnum = tokens[2] relationship = tokens[3] aliasList = string.split(tokens[4], '|') createdBy = tokens[5] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) probeKey = loadlib.verifyProbe(probeID, lineNum, errorFile) refsKey = loadlib.verifyReference(jnum, lineNum, errorFile) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if probeKey == 0: errorFile.write('Invalid Probe: %s\n' % (probeID)) error = 1 if refsKey == 0: errorFile.write('Invalid Reference: %s\n' % (jnum)) error = 1 if createdByKey == 0: errorFile.write('Invalid Creator: %s\n\n' % (createdBy)) error = 1 results = db.sql('''select _Reference_key from PRB_Reference where _Probe_key = %s and _Refs_key = %s ''' % (probeKey, refsKey), 'auto') referenceKey = results[0]['_Reference_key'] if referenceKey == 0: errorFile.write('Invalid Probe/Reference: %s\n' % (jnum)) error = 1 # marker IDs markerList = [] for markerID in markerIDs: if markerID == 'none': break markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile) if markerKey == 0: errorFile.write('Invalid Marker: %s\n' % (markerID)) error = 1 else: markerList.append(markerKey) # if errors, continue to next record if error: continue # if no errors, process for markerKey in markerList: if markerList.count(markerKey) == 1: markerFile.write('%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n' \ % (probeKey, markerKey, refsKey, relationship, createdByKey, createdByKey, loaddate, loaddate)) execSQL.append(deleteSQL % (probeKey, markerKey)) else: errorFile.write('Invalid Marker Duplicate: %s\n' % (markerID)) if referenceKey > 0: refKey = referenceKey else: refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \ % (refKey, probeKey, refsKey, createdByKey, createdByKey, loaddate, loaddate)) # aliases for alias in aliasList: aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \ % (aliasKey, refKey, alias, createdByKey, createdByKey, loaddate, loaddate)) aliasKey = aliasKey + 1 # only used if referenceKey == 0 refKey = refKey + 1
def processFile(): global refKey, aliasKey, execSQL lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: probeID = tokens[0] markerIDs = string.split(tokens[1], '|') jnum = tokens[2] relationship = tokens[3] aliasList = string.split(tokens[4], '|') createdBy = tokens[5] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) probeKey = loadlib.verifyProbe(probeID, lineNum, errorFile) refsKey = loadlib.verifyReference(jnum, lineNum, errorFile) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if probeKey == 0: errorFile.write('Invalid Probe: %s\n' % (probeID)) error = 1 if refsKey == 0: errorFile.write('Invalid Reference: %s\n' % (jnum)) error = 1 if createdByKey == 0: errorFile.write('Invalid Creator: %s\n\n' % (createdBy)) error = 1 results = db.sql( '''select _Reference_key from PRB_Reference where _Probe_key = %s and _Refs_key = %s ''' % (probeKey, refsKey), 'auto') referenceKey = results[0]['_Reference_key'] if referenceKey == 0: errorFile.write('Invalid Probe/Reference: %s\n' % (jnum)) error = 1 # marker IDs markerList = [] for markerID in markerIDs: if markerID == 'none': break markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile) if markerKey == 0: errorFile.write('Invalid Marker: %s\n' % (markerID)) error = 1 else: markerList.append(markerKey) # if errors, continue to next record if error: continue # if no errors, process for markerKey in markerList: if markerList.count(markerKey) == 1: markerFile.write('%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n' \ % (probeKey, markerKey, refsKey, relationship, createdByKey, createdByKey, loaddate, loaddate)) execSQL.append(deleteSQL % (probeKey, markerKey)) else: errorFile.write('Invalid Marker Duplicate: %s\n' % (markerID)) if referenceKey > 0: refKey = referenceKey else: refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \ % (refKey, probeKey, refsKey, createdByKey, createdByKey, loaddate, loaddate)) # aliases for alias in aliasList: aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \ % (aliasKey, refKey, alias, createdByKey, createdByKey, loaddate, loaddate)) aliasKey = aliasKey + 1 # only used if referenceKey == 0 refKey = refKey + 1
def init(): # requires: # # effects: # 1. Processes command line options # 2. Initializes local DBMS parameters # 3. Initializes global file descriptors/file names # 4. Initializes global keys # # returns: # global diagFileName, errorFileName, synFileName global inputFile, diagFile, errorFile, synFile global mgiTypeKey, createdByKey, referenceKey db.useOneConnection(1) db.set_sqlUser(user) db.set_sqlPasswordFromFile(passwordFileName) head, tail = os.path.split(inputFileName) diagFileName = logDir + '/' + tail + '.diagnostics' errorFileName = logDir + '/' + tail + '.error' synFileName = 'MGI_Synonym.bcp' print inputFileName print logDir try: inputFile = open(inputFileName, 'r') except: exit(1, 'Could not open file %s\n' % inputFileName) try: diagFile = open(diagFileName, 'w') except: exit(1, 'Could not open file %s\n' % diagFileName) try: errorFile = open(errorFileName, 'w') except: exit(1, 'Could not open file %s\n' % errorFileName) try: synFile = open(outputDir + '/' + synFileName, 'w') except: exit(1, 'Could not open file %s\n' % synFileName) # Log all SQL db.set_sqlLogFunction(db.sqlLogAll) diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date())) diagFile.write('Server: %s\n' % (db.get_sqlServer())) diagFile.write('Database: %s\n' % (db.get_sqlDatabase())) diagFile.write('Object Type: %s\n' % (mgiType)) diagFile.write('Input File: %s\n' % (inputFileName)) errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date())) mgiTypeKey = loadlib.verifyMGIType(mgiType, 0, errorFile) createdByKey = loadlib.verifyUser(createdBy, 0, errorFile) # if reference is J:0, then no reference is given if jnum == 'J:0': referenceKey = '' else: referenceKey = loadlib.verifyReference(jnum, 0, errorFile) # exit if we can't resolve mgiType, createdBy or jnum if mgiTypeKey == 0 or \ createdByKey == 0 or \ referenceKey == 0: exit(1) if mode == 'reload': print 'mode is: %s, deleting synonyms' % mode sys.stdout.flush() db.sql('delete from MGI_Synonym ' + \ 'where _MGIType_key = %d ' % (mgiTypeKey) + \ 'and _CreatedBy_key = %d ' % (createdByKey), None)
def processFile(): global refKey, aliasKey global execProbeSQL global execAssaySQL global execRefSQL lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: fromID = tokens[0] name = tokens[1] toID = tokens[2] jnum = tokens[3] createdBy = tokens[4] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) fromKey = loadlib.verifyObject(fromID, mgiTypeKey, None, lineNum, errorFile) toKey = loadlib.verifyObject(toID, mgiTypeKey, None, lineNum, errorFile) referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if fromKey == 0: errorFile.write('Invalid Probe "From": %s\n' % (fromID)) error = 1 if toKey == 0: errorFile.write('Invalid Probe "To": %s\n' % (toID)) error = 1 if referenceKey == 0: errorFile.write('Invalid Reference: %s\n' % (jnum)) error = 1 if createdByKey == 0: errorFile.write('Invalid Creator: %s\n\n' % (createdBy)) error = 1 # check that all genes are the same checkGenesSQL = ''' select f.* from PRB_Marker f, PRB_Marker t, GXD_ProbePrep p, GXD_Assay a where f._Probe_key = %s and t._Probe_key = %s and p._Probe_key = %s and p._ProbePrep_key = a._ProbePrep_key and f._Marker_key = t._Marker_key and f._Marker_key = a._Marker_key ''' % (fromKey, toKey, fromKey) checkGenes = db.sql(checkGenesSQL, 'auto') if len(checkGenes) == 0: errorFile.write( 'Gene of GenePaint, Eurexpress and Assay are not the same: %s, %s\n' % (fromID, toID)) error = 1 # check that the J: is on at least one Assay checkJAssaySQL = ''' select a.* from GXD_ProbePrep p, GXD_Assay a where p._Probe_key = %s and p._ProbePrep_key = a._ProbePrep_key and a._Refs_key = %s ''' % (fromKey, referenceKey) checkJAssay = db.sql(checkJAssaySQL, 'auto') if len(checkJAssay) == 0: errorFile.write( 'J: is not on any Assays attached to the probe: %s\n' % (fromID)) error = 1 # if errors, continue to next record if error: continue # add alias using fromID name (from) to toID refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \ % (refKey, toKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate)) aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \ % (aliasKey, refKey, name, createdByKey, createdByKey, loaddate, loaddate)) refKey = refKey + 1 aliasKey = aliasKey + 1 # move assay information from fromID to toID execAssaySQL.append(updateAssaySQL % (toKey, fromKey)) # move fromID (from) references to toID execRefSQL.append(updateRefSQL % (toKey, fromKey, referenceKey)) # delete fromID (from) execProbeSQL.append(deleteProbeSQL % (fromKey))
def processFile(): ''' # requires: # # effects: # Reads input file # Verifies and Processes each line in the input file # # returns: # nothing # ''' global referenceKey global exptDict, seqExptDict lineNum = 0 note = '' # For each line in the input file inputFile = open(inputFileName, 'r') for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = str.split(line[:-1], '|') try: mappingKey = tokens[0] markerID = tokens[1] chromosome = tokens[2] updateChr = tokens[3] band = tokens[4] assay = tokens[5] description = tokens[6] jnum = tokens[7] createdBy = tokens[8] except: # if it's not a valid line, assume it's the note note = line continue # exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) markerKey, markerSymbol = verifyMarker(markerID, lineNum) assayKey = verifyAssay(assay) referenceKey = loadlib.verifyReference(jnum, 0, errorFile) createdByKey = loadlib.verifyUser(createdBy, 0, errorFile) error = not verifyChromosome(chromosome, lineNum) if markerKey == 0 or \ assayKey == 0 or \ referenceKey == 0 or \ createdByKey == 0: # set error flag to true error = 1 # if errors, continue to next record if error: continue # if no errors, process # run once...needs the reference if lineNum == 1: createExperimentMaster() # determine experiment key for this chromosome # if it doesn't exist, create it if chromosome not in exptDict: createExperimentBCP(chromosome) if chromosome not in exptDict: errorFile.write( 'Cannot Find Experiment Key For Chromosome (%d): %s\n' % (lineNum, chromosome)) chrExptKey = 0 else: chrExptKey = exptDict[chromosome] # if errors, continue to next record if chrExptKey == 0: continue # add marker to experiment marker file bcpWrite(exptMarkerFile, \ [mappingKey, \ chrExptKey, \ markerKey, \ alleleKey, \ assayKey, \ seqExptDict[chrExptKey], \ description, \ matrixData, \ loaddate, loaddate]) # increment marker sequence number for the experiment seqExptDict[chrExptKey] = seqExptDict[chrExptKey] + 1 # end of "for line in inputFile.readlines():" if len(note) > 0: bcpWrite(noteFile, [referenceKey, note, loaddate, loaddate])
def processFile(): global alleleKey, refAssocKey, accKey, noteKey, mgiKey, annotKey, mutationKey global alleleLookup lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = line[:-1].split('\t') #print line try: markerID = tokens[0] symbol = tokens[1] name = tokens[2] alleleStatus = tokens[3] alleleType = tokens[4] alleleSubtypes = tokens[5] collectionKey = tokens[6] germLine = tokens[7] references = tokens[8] strainOfOrigin = tokens[9] mutantCellLine = tokens[10] molecularNotes = tokens[11] driverNotes = tokens[12] ikmcNotes = tokens[13] mutations = tokens[14] inheritanceMode = tokens[15] isMixed = tokens[16] isExtinct = tokens[17] createdBy = tokens[18] createMCL = tokens[19] createNote = tokens[20] setStatus = tokens[21] existingAlleleID = tokens[22] ikmcSymbol = tokens[23] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) # creator createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if createdByKey == 0: continue # processing for IKMC-only if len(createMCL) > 0 or len(createNote) > 0 or len(setStatus) > 0: processFileIKMC(createMCL, createNote, setStatus, \ symbol, ikmcSymbol, mutantCellLine, ikmcNotes, \ createdByKey, existingAlleleID) continue # marker key markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile) # hard-coded # _vocab_key = 73 (Marker-Allele Association Status) # _term_key = 4268545 (Curated) markerStatusKey = 4268545 # _vocab_key = 37 (Allele Status) alleleStatusKey = loadlib.verifyTerm('', 37, alleleStatus, lineNum, errorFile) # _vocab_key = 38 (Allele Type) alleleTypeKey = loadlib.verifyTerm('', 38, alleleType, lineNum, errorFile) # _vocab_key = 61 (Allele Transmission) germLineKey = loadlib.verifyTerm('', 61, germLine, lineNum, errorFile) # _vocab_key = 36 (Allele Molecular Mutation) allMutations = mutations.split('|') # _vocab_key = 35 (Allele Status) inheritanceModeKey = loadlib.verifyTerm('', 35, inheritanceMode, lineNum, errorFile) # strains strainOfOriginKey = sourceloadlib.verifyStrain(strainOfOrigin, lineNum, errorFile) # reference refKey = loadlib.verifyReference(jnum, lineNum, errorFile) # if errors, continue to next record # errors are stored (via loadlib) in the .error log if markerKey == 0 \ or markerStatusKey == 0 \ or alleleStatusKey == 0 \ or alleleTypeKey == 0 \ or germLineKey == 0 \ or allMutations == 0 \ or inheritanceModeKey == 0 \ or strainOfOriginKey == 0 \ or refKey == 0 \ or createdByKey == 0: continue # if no errors, process the allele # not specified/testing #collectionKey = 11025586 # allele (master) alleleFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|0|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (alleleKey, markerKey, strainOfOriginKey, inheritanceModeKey, alleleTypeKey, \ alleleStatusKey, germLineKey, collectionKey, symbol, name, \ isExtinct, isMixed, refKey, markerStatusKey, \ createdByKey, createdByKey, createdByKey, loaddate, loaddate, loaddate)) # molecular mutation for mutation in allMutations: mutationTermKey = loadlib.verifyTerm('', 36, mutation, lineNum, errorFile) mutationFile.write('%s|%s|%s|%s|%s\n' \ % (mutationKey, alleleKey, mutationTermKey, loaddate, loaddate)) mutationKey = mutationKey + 1 # # allele references # allReferences = references.split('||') for reference in allReferences: refType, refID = reference.split('|') refKey = loadlib.verifyReference(refID, lineNum, errorFile) if refType == 'Original': refAssocTypeKey = 1011 elif refType == 'Transmission': refAssocTypeKey = 1023 elif refType == 'Molecular': refAssocTypeKey = 1012 refFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (refAssocKey, refKey, alleleKey, mgiTypeKey, refAssocTypeKey, \ createdByKey, createdByKey, loaddate, loaddate)) refAssocKey = refAssocKey + 1 # # allele subtypes # allSubtypes = alleleSubtypes.split('|') for s in allSubtypes: # _vocab_key = 93 (Allele Subtype) alleleSubtypeKey = loadlib.verifyTerm('', 93, s, lineNum, errorFile) annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (annotKey, annotTypeKey, alleleKey, alleleSubtypeKey, \ qualifierKey, loaddate, loaddate)) annotKey = annotKey + 1 # # mutant cell line # if len(mutantCellLine) > 0: addMutantCellLine(alleleKey, mutantCellLine, createdByKey) # MGI Accession ID for the allelearker accFile.write('%s|%s%d|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \ % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, alleleKey, mgiTypeKey, \ createdByKey, createdByKey, loaddate, loaddate)) # storing data in MGI_Note # molecular notes mgiNoteSeqNum = 1 if len(molecularNotes) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, alleleKey, mgiNoteObjectKey, mgiMolecularNoteTypeKey, \ molecularNotes, createdByKey, createdByKey, loaddate, loaddate)) noteKey = noteKey + 1 # driver notes # TR12662/MGI_Relationship._Category_key = 1006 # removed noteFile code # place hodler for MGI_Relationship code # the IKMC is the only product using this and IKMC does not add any driver note #mgiNoteSeqNum = 1 #if len(driverNotes) > 0: # ikmc notes useIKMCnotekey = 0 if len(ikmcNotes) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, alleleKey, mgiNoteObjectKey, mgiIKMCNoteTypeKey, \ ikmcNotes, createdByKey, createdByKey, loaddate, loaddate)) useIKMCnotekey = noteKey noteKey = noteKey + 1 # Print out a new text file and attach the new MGI Allele IDs as the last field if createdBy == 'ikmc_alleleload': newAlleleFile.write('%s\t%s%s\t%s\n' \ % (mgi_utils.prvalue(ikmcNotes), \ mgi_utils.prvalue(mgiPrefix), mgi_utils.prvalue(mgiKey), \ mgi_utils.prvalue(ikmcSymbol))) else: newAlleleFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%s\n' \ % (mgi_utils.prvalue(markerID), \ mgi_utils.prvalue(symbol), \ mgi_utils.prvalue(name), \ mgi_utils.prvalue(alleleStatus), \ mgi_utils.prvalue(alleleType), \ mgi_utils.prvalue(alleleSubtype), \ mgi_utils.prvalue(collection), \ mgi_utils.prvalue(germLine), \ mgi_utils.prvalue(references), \ mgi_utils.prvalue(strainOfOrigin), \ mgi_utils.prvalue(mutantCellLine), \ mgi_utils.prvalue(allMutations), \ mgi_utils.prvalue(inheritanceMode), \ mgi_utils.prvalue(isMixed), \ mgi_utils.prvalue(isExtinct), \ mgi_utils.prvalue(refKey), \ mgi_utils.prvalue(markerStatusKey), \ mgi_utils.prvalue(createdBy), \ mgi_utils.prvalue(mgiPrefix), mgi_utils.prvalue(mgiKey))) # save symbol/alleleKey/ikmc note key alleleLookup[symbol] = [] alleleLookup[symbol].append( (alleleKey, useIKMCnotekey, mgiPrefix + str(mgiKey))) accKey = accKey + 1 mgiKey = mgiKey + 1 alleleKey = alleleKey + 1 # end of "for line in inputFile.readlines():" # # Update the AccessionMax value # if not DEBUG: db.sql('select * from ACC_setMax(%d)' % (lineNum), None) db.commit()
def processFile(): global primerKey, refKey, aliasKey, accKey, mgiKey lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: markerSymbol = tokens[0] # not used markerIDs = string.split(tokens[1], '|') name = tokens[2] jnum = tokens[3] regionCovered = tokens[4] sequence1 = tokens[5] sequence2 = tokens[6] productSize = tokens[7] notes = tokens[8] sequenceIDs = tokens[9] aliasList = string.split(tokens[10], '|') createdBy = tokens[11] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) # marker IDs markerList = [] for markerID in markerIDs: markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile) if len(markerID) > 0 and markerKey == 0: errorFile.write('Invalid Marker: %s, %s\n' % (name, markerID)) error = 1 elif len(markerID) > 0: markerList.append(markerKey) referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) # sequence IDs seqAccList = string.split(sequenceIDs, '|') # if errors, continue to next record if error: continue # if no errors, process the primer primerFile.write('%d\t%s\t\t%d\t%d\t%s\t%s\t%s\t%s\t\t\t%s\t%s\t%s\t%s\t%s\n' \ % (primerKey, name, NA, vectorKey, segmentTypeKey, mgi_utils.prvalue(sequence1), \ mgi_utils.prvalue(sequence2), mgi_utils.prvalue(regionCovered), mgi_utils.prvalue(productSize), \ createdByKey, createdByKey, loaddate, loaddate)) for markerKey in markerList: if markerList.count(markerKey) == 1: markerFile.write('%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n' \ % (primerKey, markerKey, referenceKey, relationship, createdByKey, createdByKey, loaddate, loaddate)) else: errorFile.write('Invalid Marker Duplicate: %s, %s\n' % (name, markerID)) # loaddate)) refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' % (refKey, primerKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate)) # aliases for alias in aliasList: if len(alias) == 0: continue aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \ % (aliasKey, refKey, alias, createdByKey, createdByKey, loaddate, loaddate)) aliasKey = aliasKey + 1 # MGI Accession ID for the marker accFile.write('%s\t%s%d\t%s\t%s\t1\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \ % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, primerKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate)) newPrimerFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%d\n' \ % (markerSymbol, string.join(markerIDs, '|'), name, jnum, regionCovered, sequence1, sequence2, productSize, notes, sequenceIDs, createdBy, mgiPrefix, mgiKey)) accKey = accKey + 1 mgiKey = mgiKey + 1 # sequence accession ids for acc in seqAccList: if len(acc) == 0: continue prefixPart, numericPart = accessionlib.split_accnum(acc) accFile.write('%s\t%s\t%s\t%s\t%s\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \ % (accKey, acc, prefixPart, numericPart, logicalDBKey, primerKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate)) accRefFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' \ % (accKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate)) accKey = accKey + 1 # notes if len(notes) > 0: noteFile.write('%s|1\t%s\t%s\t%s\n' \ % (primerKey, notes, loaddate, loaddate)) refKey = refKey + 1 primerKey = primerKey + 1 # end of "for line in inputFile.readlines():" # # Update the AccessionMax value # if not DEBUG: db.sql('select * from ACC_setMax (%d)' % (lineNum), None)
def processImageFile(): global imageKey, accKey, mgiKey global imagePix global referenceKey lineNum = 0 # For each line in the input file for line in inImageFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: jnum = tokens[0] fullsizeKey = tokens[1] imageClass = tokens[2] pixID = tokens[3] xdim = tokens[4] ydim = tokens[5] figureLabel = tokens[6] copyrightNote = tokens[7] imageNote = tokens[8] imageInfo = tokens[9] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) imageClassKey = loadlib.verifyTerm('', imageVocabClassKey, imageClass, lineNum, errorFile) if imageClassKey == 0: error = 1 referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile) if referenceKey == 0: error = 1 # if errors, continue to next record if error: continue # if no errors, process imageTypeKey = FSimageTypeKey outImageFile.write(str(imageKey) + TAB + \ str(gxdMgiTypeKey) + TAB + \ str(imageClassKey) + TAB + \ str(imageTypeKey) + TAB + \ str(referenceKey) + TAB + \ TAB + \ xdim + TAB + \ ydim + TAB + \ figureLabel + TAB + \ str(createdByKey) + TAB + \ str(createdByKey) + TAB + \ loaddate + TAB + loaddate + CRT) # MGI Accession ID for the image mgiAccID = mgiPrefix + str(mgiKey) outAccFile.write(str(accKey) + TAB + \ mgiPrefix + str(mgiKey) + TAB + \ mgiPrefix + TAB + \ str(mgiKey) + TAB + \ accLogicalDBKey + TAB + \ str(imageKey) + TAB + \ imageMgiTypeKey + TAB + \ accPrivate + TAB + \ accPreferred + TAB + \ str(createdByKey) + TAB + \ str(createdByKey) + TAB + \ loaddate + TAB + loaddate + CRT) accKey = accKey + 1 mgiKey = mgiKey + 1 if pixID.find('GUDMAP') < 0 and len(pixID) > 0: outAccFile.write(str(accKey) + TAB + \ pixPrefix + str(pixID) + TAB + \ pixPrefix + TAB + \ pixID + TAB + \ pixLogicalDBKey + TAB + \ str(imageKey) + TAB + \ imageMgiTypeKey + TAB + \ pixPrivate + TAB + \ accPreferred + TAB + \ str(createdByKey) + TAB + \ str(createdByKey) + TAB + \ loaddate + TAB + loaddate + CRT) accKey = accKey + 1 if len(imageInfo) > 0: imageLogicalDBKey, imageID = imageInfo.split('|') outAccFile.write(str(accKey) + TAB + \ imageID + TAB + \ imageID + TAB + \ TAB + \ imageLogicalDBKey + TAB + \ str(imageKey) + TAB + \ imageMgiTypeKey + TAB + \ accPrivate + TAB + \ accPreferred + TAB + \ str(createdByKey) + TAB + \ str(createdByKey) + TAB + \ loaddate + TAB + loaddate + CRT) accKey = accKey + 1 # Copyrights if len(copyrightNote) > 0: outCopyrightFile.write(mgiAccID + TAB + copyrightNote + CRT) # Notes if len(imageNote) > 0: outCaptionFile.write(mgiAccID + TAB + imageNote + CRT) imagePix[pixID] = imageKey imageKey = imageKey + 1 # end of "for line in inImageFile.readlines():" return lineNum
def processFile(): # Purpose: processes input file # Returns: nothing # Assumes: nothing # Effects: nothing # Throws: nothing global libraryName, libraryID, libraryKey, logicalDBKey global segmentTypeKey, vectorTypeKey, organismKey, referenceKey, strainKey, tissueKey global age, ageMin, ageMax, genderKey, cellLineKey, createdByKey global strainNS, tissueNS, genderNS, cellLineNS, ageNS lineNum = 0 # retrieve next available primary key for Library record results = db.sql( 'select maxKey = max(_Source_key) + 1 from %s' % (libraryTable), 'auto') newlibraryKey = results[0]['maxKey'] strainNS = sourceloadlib.verifyStrain(NS, 0, None) tissueNS = sourceloadlib.verifyTissue(NS, 0, None) genderNS = sourceloadlib.verifyGender(NS, 0, None) cellLineNS = sourceloadlib.verifyCellLine(NS, 0, None) ageNS = NS # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens try: [libraryName, \ logicalDB, \ libraryID, \ segmentType, \ vectorType, \ organism, \ strain, \ tissue, \ age, \ gender, \ cellLine, \ jnum, \ note, \ cloneCollections, \ createdBy] = string.split(line[:-1], TAB) except: exit(1, 'Invalid Line (line: %d): %s\n' % (lineNum, line)) continue libraryKey = sourceloadlib.verifyLibrary(libraryName, lineNum) if len(logicalDB) > 0: logicalDBKey = loadlib.verifyLogicalDB(logicalDB, lineNum, errorFile) else: logicalDBKey = 0 if libraryKey == 0 and len(libraryID) > 0: libraryKey = sourceloadlib.verifyLibraryID(libraryID, logicalDBKey, lineNum, errorFile) segmentTypeKey = sourceloadlib.verifySegmentType( segmentType, lineNum, errorFile) vectorTypeKey = sourceloadlib.verifyVectorType(vectorType, lineNum, errorFile) strainKey = sourceloadlib.verifyStrain(strain, lineNum, errorFile) tissueKey = sourceloadlib.verifyTissue(tissue, lineNum, errorFile) genderKey = sourceloadlib.verifyGender(gender, lineNum, errorFile) cellLineKey = sourceloadlib.verifyCellLine(cellLine, lineNum, errorFile) ageMin, ageMax = sourceloadlib.verifyAge(age, lineNum, errorFile) referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if segmentTypeKey == 0 or \ vectorTypeKey == 0 or \ strainKey == 0 or \ tissueKey == 0 or \ genderKey == 0 or \ cellLineKey == 0 or \ organismKey == 0 or \ referenceKey == 0 or \ createdByKey == 0 or \ ageMin is None: # set error flag to true error = 1 # print str(segmentTypeKey) # print str(vectorTypeKey) # print str(strainKey) # print str(tissueKey) # print str(genderKey) # print str(cellLineKey) # print str(organismKey) # print str(referenceKey) # print str(createdByKey) # print str(ageMin) errorFile.write('Errors: %s\n' % (libraryName)) # if errors, continue to next record if error: continue # if no errors, continue processing # process new library if libraryKey == 0: libraryKey = newlibraryKey addLibrary() # increment primary keys newlibraryKey = newlibraryKey + 1 # else, process existing library else: updateLibrary() addCloneCollections(cloneCollections) return
def processFile(): global execSQL lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: probeID = tokens[0] markerIDs = string.split(tokens[1], '|') jnum = tokens[2] relationship = tokens[3] createdBy = tokens[4] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) probeKey = loadlib.verifyProbe(probeID, lineNum, errorFile) referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if probeKey == 0: errorFile.write('Invalid Probe: %s\n' % (probeID)) error = 1 if referenceKey == 0: errorFile.write('Invalid Reference: %s\n' % (jnum)) error = 1 if createdByKey == 0: errorFile.write('Invalid Creator: %s\n\n' % (createdBy)) error = 1 # marker IDs markerList = [] for markerID in markerIDs: markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile) if markerKey == 0: errorFile.write('Invalid Marker: %s, %s\n' % (name, markerID)) error = 1 else: markerList.append(markerKey) # if errors, continue to next record if error: continue # if no errors, process for markerKey in markerList: if markerList.count(markerKey) == 1: markerFile.write('%s|%s|%d|%s|%s|%s|%s|%s\n' \ % (probeKey, markerKey, referenceKey, relationship, createdByKey, createdByKey, loaddate, loaddate)) execSQL.append(deleteSQL % (probeKey, markerKey)) else: errorFile.write('Invalid Marker Duplicate: %s, %s\n' % (name, markerID))
def processFile(): # Purpose: Read the input file, resolve values to keys. Create bcp files # Returns: 1 if error, else 0 # Assumes: file descriptors have been initialized # Effects: exits if the line does not have 15 columns # Throws: Nothing global alleleKey, refAssocKey, accKey, noteKey, mgiKey, annotKey global alleleLookup, alleleMutationKey lineNum = 0 # For each line in the input file for line in fpInputFile.readlines(): error = 0 lineNum = lineNum + 1 print('%s: %s' % (lineNum, line)) # Split the line into tokens tokens = line[:-1].split('\t') try: markerID = tokens[0] markerSymbol = tokens[1] mutationType = tokens[2] # IMPC allele type description = tokens[3] colonyID = tokens[4] strainOfOrigin = tokens[5] alleleSymbol = tokens[6] alleleName = tokens[7] inheritanceMode = tokens[8] alleleType = tokens[9] # IMPC allele class alleleSubType = tokens[10] alleleStatus = tokens[11] transmission = tokens[12] collection = tokens[13] jNum = tokens[14] createdBy = tokens[15] except: print('exiting with invalid line') exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) print('validating data and getting keys') # marker key markerKey = loadlib.verifyMarker(markerID, lineNum, fpErrorFile) # _vocab_key = 36 (Allele Molecular Mutation) mutationList = str.split(mutationType, ';') if len(mutationList) > 1: print('mutationList: %s' % mutationList) mutationKeyList = [] for m in mutationList: mutationKey = loadlib.verifyTerm('', 36, m, lineNum, fpErrorFile) if mutationKey != 0: mutationKeyList.append(mutationKey) if len(mutationKeyList) > 1: print('mutationKeyList: %s' % mutationKeyList) # strains strainOfOriginKey = sourceloadlib.verifyStrain(strainOfOrigin, lineNum, fpErrorFile) # _vocab_key = 35 (Allele Inheritance Mode) inheritanceModeKey = loadlib.verifyTerm('', 35, inheritanceMode, lineNum, fpErrorFile) # _vocab_key = 38 (Allele Type) alleleTypeKey = loadlib.verifyTerm('', 38, alleleType, lineNum, fpErrorFile) # _vocab_key = 93 (Allele Subtype) subTypeList = str.split(alleleSubType, ';') if len(subTypeList) > 1: print('subTypeList: %s' % subTypeList) subTypeKeyList = [] for s in subTypeList: if s != '': # if we have a subtype, get it's key subTypeKey = loadlib.verifyTerm('', 93, s, lineNum, fpErrorFile) if subTypeKey != 0: subTypeKeyList.append(subTypeKey) if len(subTypeKeyList) > 1: print('subTypeKeyList: %s' % subTypeKeyList) # _vocab_key = 37 (Allele Status) alleleStatusKey = loadlib.verifyTerm('', 37, alleleStatus, lineNum, fpErrorFile) # _vocab_key = 61 (Allele Transmission) transmissionKey = loadlib.verifyTerm('', 61, transmission, lineNum, fpErrorFile) # _vocab_key = 92 collectionKey = loadlib.verifyTerm('', 92, collection, lineNum, fpErrorFile) # _vocab_key = 73 (Marker-Allele Association Status) # _term_key = 4268545 (Curated) markerStatusKey = 4268545 # reference refKey = loadlib.verifyReference(jNum, lineNum, fpErrorFile) # creator createdByKey = loadlib.verifyUser(createdBy, lineNum, fpErrorFile) if createdByKey == 0: continue print('checking for missing data') # if errors, continue to next record # errors are stored (via loadlib) in the .error log if markerKey == 0 \ or mutationKeyList == [] \ or strainOfOriginKey == 0 \ or inheritanceModeKey == 0 \ or alleleTypeKey == 0 \ or alleleStatusKey == 0 \ or transmissionKey == 0 \ or collectionKey == 0 \ or refKey == 0 \ or createdByKey == 0: print('missing data, skipping this line') continue # if no errors, process the allele print('writing to allele file') # allele (isWildType = 0) fpAlleleFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|0|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (alleleKey, markerKey, strainOfOriginKey, inheritanceModeKey, alleleTypeKey, \ alleleStatusKey, transmissionKey, collectionKey, alleleSymbol, alleleName, \ isExtinct, isMixed, refKey, markerStatusKey, \ createdByKey, createdByKey, createdByKey, loaddate, loaddate, loaddate)) # molecular mutation for mutationKey in mutationKeyList: fpMutationFile.write('%s|%s|%s|%s|%s\n' \ % (alleleMutationKey, alleleKey, mutationKey, loaddate, loaddate)) alleleMutationKey += 1 # reference associations # Original fpRefFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (refAssocKey, refKey, alleleKey, mgiTypeKey, origRefTypeKey, \ createdByKey, createdByKey, loaddate, loaddate)) refAssocKey = refAssocKey + 1 # Molecular fpRefFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (refAssocKey, refKey, alleleKey, mgiTypeKey, molRefTypeKey, \ createdByKey, createdByKey, loaddate, loaddate)) refAssocKey = refAssocKey + 1 # allele subtype for subTypeKey in subTypeKeyList: fpAnnotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (annotKey, annotTypeKey, alleleKey, subTypeKey, \ qualifierKey, loaddate, loaddate)) annotKey = annotKey + 1 # MGI Accession ID for the allele alleleID = '%s%s' % (mgiPrefix, mgiKey) fpAccFile.write('%s|%s|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \ % (accKey, alleleID, mgiPrefix, mgiKey, alleleKey, mgiTypeKey, \ createdByKey, createdByKey, loaddate, loaddate)) # storing data in MGI_Note # molecular note fpNoteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, alleleKey, mgiTypeKey, molecularNoteTypeKey, description,\ createdByKey, createdByKey, loaddate, loaddate)) noteKey = noteKey + 1 # colony ID note fpNoteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, alleleKey, mgiTypeKey, colonyIdNoteTypeKey, colonyID, \ createdByKey, createdByKey, loaddate, loaddate)) noteKey = noteKey + 1 # Print out a new text file and attach the new MGI Allele IDs # as the last field fpNewAlleleRptFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' \ % (mgi_utils.prvalue(alleleID), \ mgi_utils.prvalue(alleleSymbol), \ mgi_utils.prvalue(alleleName), \ mgi_utils.prvalue(markerID), \ mgi_utils.prvalue(markerSymbol), \ mgi_utils.prvalue(colonyID))) accKey = accKey + 1 mgiKey = mgiKey + 1 alleleKey = alleleKey + 1 # # Update the AccessionMax value # print('DEBUG: %s' % DEBUG) if DEBUG == 'false': db.sql('select * from ACC_setMax(%d)' % (lineNum), None) db.commit() return 0
def processAssayFile(): global assayAssay, assayKey, accKey, mgiKey lineNum = 0 # For each line in the input file for line in inAssayFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], TAB) try: assayID = tokens[0] markerID = tokens[1] jnum = tokens[2] assayType = tokens[3] reporterGene = tokens[4] note = tokens[5] createdBy = tokens[6] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile) referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile) assayTypeKey = gxdloadlib.verifyAssayType(assayType, lineNum, errorFile) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if markerKey == 0 or referenceKey == 0 or assayTypeKey == 0: # set error flag to true error = 1 if len(reporterGene) > 0: reporterGeneKey = gxdloadlib.verifyReporterGene(reporterGene, lineNum, errorFile) if reporterGeneKey == 0: error = 1 else: reporterGeneKey = '' # if errors, continue to next record if error: continue if assayProbePrep.has_key(assayID): probePrepKey = assayProbePrep[assayID] else: probePrepKey = '' # if no errors, process outAssayFile.write(str(assayKey) + TAB + \ str(assayTypeKey) + TAB + \ str(referenceKey) + TAB + \ str(markerKey) + TAB + \ str(probePrepKey) + TAB + \ TAB + \ TAB + \ str(reporterGeneKey) + TAB + \ str(createdByKey) + TAB + \ str(createdByKey) + TAB + \ loaddate + TAB + loaddate + CRT) if len(note) > 0: i = 0 while i < len(note): outAssayNoteFile.write(str(assayKey) + TAB + \ note[i:i+ASSAY_NOTE_LENGTH] + TAB + \ loaddate + TAB + loaddate + CRT) i = i + ASSAY_NOTE_LENGTH # MGI Accession ID for the assay outAccFile.write(str(accKey) + TAB + \ mgiPrefix + str(mgiKey) + TAB + \ mgiPrefix + TAB + \ str(mgiKey) + TAB + \ accLogicalDBKey + TAB + \ str(assayKey) + TAB + \ assayMgiTypeKey + TAB + \ accPrivate + TAB + \ accPreferred + TAB + \ str(createdByKey) + TAB + \ str(createdByKey) + TAB + \ loaddate + TAB + loaddate + CRT) assayAssay[assayID] = assayKey accKey = accKey + 1 mgiKey = mgiKey + 1 assayKey = assayKey + 1 # end of "for line in inAssayFile.readlines():" return lineNum
def processFile(): global alleleKey, refAssocKey, accKey, noteKey, mgiKey, annotKey global alleleLookup lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = line[:-1].split('\t') #print line try: markerID = tokens[0] symbol = tokens[1] name = tokens[2] alleleStatus = tokens[3] alleleType = tokens[4] alleleSubtypes = tokens[5] collectionKey = tokens[6] germLine = tokens[7] references = tokens[8] strainOfOrigin = tokens[9] mutantCellLine = tokens[10] molecularNotes = tokens[11] driverNotes = tokens[12] ikmcNotes = tokens[13] mutations = tokens[14] inheritanceMode = tokens[15] isMixed = tokens[16] isExtinct = tokens[17] createdBy = tokens[18] createMCL = tokens[19] createNote = tokens[20] setStatus = tokens[21] existingAlleleID = tokens[22] ikmcSymbol = tokens[23] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) # creator createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if createdByKey == 0: continue # processing for IKMC-only if len(createMCL) > 0 or len(createNote) > 0 or len(setStatus) > 0: processFileIKMC(createMCL, createNote, setStatus, \ symbol, ikmcSymbol, mutantCellLine, ikmcNotes, \ createdByKey, existingAlleleID) continue # marker key markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile) # hard-coded # _vocab_key = 73 (Marker-Allele Association Status) # _term_key = 4268545 (Curated) markerStatusKey = 4268545 # _vocab_key = 37 (Allele Status) alleleStatusKey = loadlib.verifyTerm('', 37, alleleStatus, lineNum, errorFile) # _vocab_key = 38 (Allele Type) alleleTypeKey = loadlib.verifyTerm('', 38, alleleType, lineNum, errorFile) # _vocab_key = 61 (Allele Transmission) germLineKey = loadlib.verifyTerm('', 61, germLine, lineNum, errorFile) # _vocab_key = 36 (Allele Molecular Mutation) allMutations = mutations.split('|') # _vocab_key = 35 (Allele Status) inheritanceModeKey = loadlib.verifyTerm('', 35, inheritanceMode, lineNum, errorFile) # strains strainOfOriginKey = sourceloadlib.verifyStrain(strainOfOrigin, lineNum, errorFile) # reference refKey = loadlib.verifyReference(jnum, lineNum, errorFile) # if errors, continue to next record # errors are stored (via loadlib) in the .error log if markerKey == 0 \ or markerStatusKey == 0 \ or alleleStatusKey == 0 \ or alleleTypeKey == 0 \ or germLineKey == 0 \ or allMutations == 0 \ or inheritanceModeKey == 0 \ or strainOfOriginKey == 0 \ or refKey == 0 \ or createdByKey == 0: continue # if no errors, process the allele # not specified/testing #collectionKey = 11025586 # allele (master) alleleFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|0|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (alleleKey, markerKey, strainOfOriginKey, inheritanceModeKey, alleleTypeKey, \ alleleStatusKey, germLineKey, collectionKey, symbol, name, \ isExtinct, isMixed, refKey, markerStatusKey, \ createdByKey, createdByKey, createdByKey, loaddate, loaddate, loaddate)) # molecular mutation for mutation in allMutations: mutationKey = loadlib.verifyTerm('', 36, mutation, lineNum, errorFile) mutationFile.write('%s|%s|%s|%s\n' \ % (alleleKey, mutationKey, loaddate, loaddate)) # # allele references # allReferences = references.split('||') for reference in allReferences: refType, refID = reference.split('|') refKey = loadlib.verifyReference(refID, lineNum, errorFile) if refType == 'Original': refAssocTypeKey = 1011 elif refType == 'Transmission': refAssocTypeKey = 1023 elif refType == 'Molecular': refAssocTypeKey = 1012 refFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (refAssocKey, refKey, alleleKey, mgiTypeKey, refAssocTypeKey, \ createdByKey, createdByKey, loaddate, loaddate)) refAssocKey = refAssocKey + 1 # # allele subtypes # allSubtypes = alleleSubtypes.split('|') for s in allSubtypes: # _vocab_key = 93 (Allele Subtype) alleleSubtypeKey = loadlib.verifyTerm('', 93, s, lineNum, errorFile) annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (annotKey, annotTypeKey, alleleKey, alleleSubtypeKey, \ qualifierKey, loaddate, loaddate)) annotKey = annotKey + 1 # # mutant cell line # if len(mutantCellLine) > 0: addMutantCellLine(alleleKey, mutantCellLine, createdByKey) # MGI Accession ID for the allelearker accFile.write('%s|%s%d|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \ % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, alleleKey, mgiTypeKey, \ createdByKey, createdByKey, loaddate, loaddate)) # storing data in MGI_Note/MGI_NoteChunk # molecular notes mgiNoteSeqNum = 1 if len(molecularNotes) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, alleleKey, mgiNoteObjectKey, mgiMolecularNoteTypeKey, \ createdByKey, createdByKey, loaddate, loaddate)) noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, mgiNoteSeqNum, molecularNotes, createdByKey, createdByKey, loaddate, loaddate)) noteKey = noteKey + 1 # driver notes # TR12662/MGI_Relationship._Category_key = 1006 # removed noteFile code # place hodler for MGI_Relationship code # the IKMC is the only product using this and IKMC does not add any driver note #mgiNoteSeqNum = 1 #if len(driverNotes) > 0: # ikmc notes useIKMCnotekey = 0 if len(ikmcNotes) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, alleleKey, mgiNoteObjectKey, mgiIKMCNoteTypeKey, \ createdByKey, createdByKey, loaddate, loaddate)) noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, 1, ikmcNotes, createdByKey, createdByKey, loaddate, loaddate)) useIKMCnotekey = noteKey noteKey = noteKey + 1 # Print out a new text file and attach the new MGI Allele IDs as the last field if createdBy == 'ikmc_alleleload': newAlleleFile.write('%s\t%s%s\t%s\n' \ % (mgi_utils.prvalue(ikmcNotes), \ mgi_utils.prvalue(mgiPrefix), mgi_utils.prvalue(mgiKey), \ mgi_utils.prvalue(ikmcSymbol))) else: newAlleleFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%s\n' \ % (mgi_utils.prvalue(markerID), \ mgi_utils.prvalue(symbol), \ mgi_utils.prvalue(name), \ mgi_utils.prvalue(alleleStatus), \ mgi_utils.prvalue(alleleType), \ mgi_utils.prvalue(alleleSubtype), \ mgi_utils.prvalue(collection), \ mgi_utils.prvalue(germLine), \ mgi_utils.prvalue(references), \ mgi_utils.prvalue(strainOfOrigin), \ mgi_utils.prvalue(mutantCellLine), \ mgi_utils.prvalue(allMutations), \ mgi_utils.prvalue(inheritanceMode), \ mgi_utils.prvalue(isMixed), \ mgi_utils.prvalue(isExtinct), \ mgi_utils.prvalue(refKey), \ mgi_utils.prvalue(markerStatusKey), \ mgi_utils.prvalue(createdBy), \ mgi_utils.prvalue(mgiPrefix), mgi_utils.prvalue(mgiKey))) # save symbol/alleleKey/ikmc note key alleleLookup[symbol] = [] alleleLookup[symbol].append((alleleKey, useIKMCnotekey, mgiPrefix + str(mgiKey))) accKey = accKey + 1 mgiKey = mgiKey + 1 alleleKey = alleleKey + 1 # end of "for line in inputFile.readlines():" # # Update the AccessionMax value # if not DEBUG: db.sql('select * from ACC_setMax(%d)' % (lineNum), None) db.commit()
def processFile(): global probeKey, refKey, aliasKey, accKey, mgiKey lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: name = tokens[0] jnum = tokens[1] parentID = tokens[2] sourceName = tokens[3] organism = tokens[4] strain = tokens[5] tissue = tokens[6] gender = tokens[7] cellLine = tokens[8] age = tokens[9] vectorType = tokens[10] segmentType = tokens[11] regionCovered = tokens[12] insertSite = tokens[13] insertSize = tokens[14] markerIDs = string.split(tokens[15], '|') relationship = tokens[16] sequenceIDs = tokens[17] aliasList = string.split(tokens[18], '|') notes = tokens[19] rawnotes = tokens[20] createdBy = tokens[21] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) isParent = 0 isSource = 0 parentProbeKey = '' sourceKey = 0 if parentID != '': isParent = 1 if sourceName != '': isSource = 1 if not isParent and not isSource: organismKey = sourceloadlib.verifyOrganism(organism, lineNum, errorFile) strainKey = sourceloadlib.verifyStrain(strain, lineNum, errorFile) tissueKey = sourceloadlib.verifyTissue(tissue, lineNum, errorFile) genderKey = sourceloadlib.verifyGender(gender, lineNum, errorFile) cellLineKey = sourceloadlib.verifyCellLine(cellLine, lineNum, errorFile) vectorKey = sourceloadlib.verifyVectorType(vectorType, lineNum, errorFile) segmentTypeKey = sourceloadlib.verifySegmentType( segmentType, lineNum, errorFile) sourceKey = sourceloadlib.verifySource(segmentTypeKey, \ vectorKey, organismKey, strainKey, \ tissueKey, genderKey, cellLineKey, age, lineNum, errorFile) if organismKey == 0 or strainKey == 0 or tissueKey == 0 or \ genderKey == 0 or cellLineKey == 0 or vectorKey == 0 or \ segmentTypeKey == 0 or sourceKey == 0: errorFile.write('%s, %s, %s, %s, %s, %s, %s, %s\n' % (segmentType, vectorType, organism, strain, tissue, gender, cellLine, age)) error = 1 elif not isParent and isSource: vectorKey = sourceloadlib.verifyVectorType(vectorType, lineNum, errorFile) segmentTypeKey = sourceloadlib.verifySegmentType( segmentType, lineNum, errorFile) sourceKey = sourceloadlib.verifyLibrary(sourceName, lineNum, errorFile) if vectorKey == 0 or segmentTypeKey == 0 or sourceKey == 0: error = 1 # parent from = yes, source given = yes or no (ignored) else: parentProbeKey, sourceKey = verifyParentProbe( parentID, lineNum, errorFile) vectorKey = sourceloadlib.verifyVectorType(vectorType, lineNum, errorFile) segmentTypeKey = sourceloadlib.verifySegmentType( segmentType, lineNum, errorFile) if parentProbeKey == 0 or sourceKey == 0 or vectorKey == 0 or segmentTypeKey == 0: error = 1 referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if referenceKey == 0: errorFile.write('Invalid Reference: %s\n' % (jnum)) error = 1 if createdByKey == 0: errorFile.write('Invalid Creator: %s\n\n' % (createdBy)) error = 1 # marker IDs markerList = [] for markerID in markerIDs: markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile) if len(markerID) > 0 and markerKey == 0: errorFile.write('Invalid Marker: %s, %s\n' % (name, markerID)) error = 1 elif len(markerID) > 0: markerList.append(markerKey) # sequence IDs seqAccDict = {} for seqID in string.split(sequenceIDs, '|'): if len(seqID) > 0: [logicalDB, acc] = string.split(seqID, ':') logicalDBKey = loadlib.verifyLogicalDB(logicalDB, lineNum, errorFile) if logicalDBKey > 0: seqAccDict[acc] = logicalDBKey # if errors, continue to next record if error: continue # if no errors, process the probe probeFile.write('%d\t%s\t%s\t%s\t%s\t%s\t\t\t%s\t%s\t%s\t\t%s\t%s\t%s\t%s\n' \ % (probeKey, name, parentProbeKey, sourceKey, vectorKey, segmentTypeKey, mgi_utils.prvalue(regionCovered), \ mgi_utils.prvalue(insertSite), mgi_utils.prvalue(insertSize), createdByKey, createdByKey, loaddate, loaddate)) for markerKey in markerList: if markerList.count(markerKey) == 1: markerFile.write('%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n' \ % (probeKey, markerKey, referenceKey, relationship, createdByKey, createdByKey, loaddate, loaddate)) else: errorFile.write('Invalid Marker Duplicate: %s, %s\n' % (name, markerID)) refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \ % (refKey, probeKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate)) # aliases for alias in aliasList: if len(alias) == 0: continue aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \ % (aliasKey, refKey, alias, createdByKey, createdByKey, loaddate, loaddate)) aliasKey = aliasKey + 1 # MGI Accession ID for the marker accFile.write('%s\t%s%d\t%s\t%s\t1\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \ % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, probeKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate)) # Print out a new text file and attach the new MGI Probe IDs as the last field newProbeFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%d\n' \ % (name, jnum, \ mgi_utils.prvalue(sourceName), \ organism, \ mgi_utils.prvalue(strain), \ mgi_utils.prvalue(tissue), \ mgi_utils.prvalue(gender), \ mgi_utils.prvalue(cellLine), \ mgi_utils.prvalue(age), \ mgi_utils.prvalue(vectorType), \ mgi_utils.prvalue(segmentType), \ mgi_utils.prvalue(regionCovered) + \ mgi_utils.prvalue(insertSite), \ mgi_utils.prvalue(insertSize), \ string.join(markerIDs, '|'), \ relationship, \ mgi_utils.prvalue(sequenceIDs), \ string.join(aliasList, '|'), \ mgi_utils.prvalue(notes), \ createdBy, mgiPrefix, mgiKey)) # Print out a raw note file if len(rawnotes) > 0: rawNoteFile.write('%s%d\t%s\n' % (mgiPrefix, mgiKey, rawnotes)) # Notes if len(notes) > 0: noteFile.write('%s\t%s\t%s\t%s\n' % (probeKey, notes, loaddate, loaddate)) accKey = accKey + 1 mgiKey = mgiKey + 1 # sequence accession ids for acc in seqAccDict.keys(): prefixPart, numericPart = accessionlib.split_accnum(acc) accFile.write('%s\t%s\t%s\t%s\t%s\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \ % (accKey, acc, prefixPart, numericPart, seqAccDict[acc], probeKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate)) accRefFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' \ % (accKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate)) accKey = accKey + 1 refKey = refKey + 1 probeKey = probeKey + 1 # end of "for line in inputFile.readlines():" # # Update the AccessionMax value # if not DEBUG: db.sql('select * from ACC_setMax (%d)' % (lineNum), None)