def processFile(): lineNum = 0 # For each line in the input file for line in inputFile.readlines(): lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: probeID = tokens[0] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) probeKey = loadlib.verifyObject(probeID, mgiTypeKey, None, lineNum, errorFile) if probeKey == 0: continue if DEBUG: print deleteSQL % (probeKey) continue db.sql(deleteSQL % (probeKey), None)
def processSusceptibility(): # do formatted file doFileName = None # do file pointer doFile = None # insert statement INSERT_ACCESSION = '''insert into ACC_Accession values ((select max(_Accession_key) + 1 from ACC_Accession), '%s', '%s', %s, 15, %s, 13, 0, 0) ''' doFileName = os.environ['OBO_FILE'] doFile = open(doFileName, 'r') omimIdValue = 'id: OMIM:' relValue = 'relationship: RO:0003304' skipValue = 'OMIM:000000' foundOMIM = 0 for line in doFile.readlines(): # find [Term] # find relationship: RO:0003304 if line == '[Term]': foundOMIM = 0 elif line[:9] == omimIdValue: omimId = line[4:-1] if omimId == skipValue: continue foundOMIM = 1 elif foundOMIM and line[:24] == relValue: tokens = line[25:-1].split(' ') doId = tokens[0] prefixPart, numericPart = accessionlib.split_accnum(omimId) objectKey = loadlib.verifyObject(doId, 13, None, None, None) addSQL = INSERT_ACCESSION % (omimId, prefixPart, numericPart, objectKey) db.sql(addSQL, None) else: continue doFile.close() db.commit() return 0
def process(): global setKey, setMemberKey, setKey lineNum = 0 sequenceNum = 1 for line in inputFile.readlines(): lineNum = lineNum + 1 tokens = str.split(line[:-1], TAB) try: setMember = tokens[0] setLabel = tokens[1] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) objectKey = loadlib.verifyObject(setMember, mgiTypeKey, "", lineNum, errorFile) if objectKey == 0: continue outMemberFile.write(str(setMemberKey) + TAB + \ str(setKey) + TAB + \ str(objectKey) + TAB + \ str(setLabel) + TAB + \ str(sequenceNum) + TAB + \ str(createdByKey) + TAB + str(createdByKey) + TAB + \ loaddate + TAB + loaddate + CRT) setMemberKey = setMemberKey + 1 sequenceNum = sequenceNum + 1 return
def process(): global setKey, setMemberKey, setKey lineNum = 0 sequenceNum = 1 for line in inputFile.readlines(): lineNum = lineNum + 1 tokens = string.split(line[:-1], TAB) try: setMember = tokens[0] setLabel = tokens[1] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) objectKey = loadlib.verifyObject(setMember, mgiTypeKey, "", lineNum, errorFile) if objectKey == 0: continue outMemberFile.write(str(setMemberKey) + TAB + \ str(setKey) + TAB + \ str(objectKey) + TAB + \ str(setLabel) + TAB + \ str(sequenceNum) + TAB + \ str(createdByKey) + TAB + str(createdByKey) + TAB + \ loaddate + TAB + loaddate + CRT) setMemberKey = setMemberKey + 1 sequenceNum = sequenceNum + 1 return
def processFile(): # requires: # # effects: # Reads input file # Verifies and Processes each line in the input file # # returns: # nothing # global refAssocKey lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: accID = tokens[0] jnum = tokens[1] refAssocType = tokens[2] createdBy = tokens[3] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) objectKey = loadlib.verifyObject(accID, mgiTypeKey, None, lineNum, errorFile) referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile) refAssocTypeKey = verifyRefAssocType(refAssocType, lineNum) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if objectKey == 0 or \ referenceKey == 0 or \ refAssocTypeKey == 0 or \ createdByKey == 0: # set error flag to true error = 1 # if errors, continue to next record if error: continue # if no errors, process the marker # could move to verifyDuplicate routine key = '%s:%s:%s' % (objectKey, referenceKey, refAssocTypeKey) if refDict.has_key(key): errorFile.write('Duplicate (%d) %s\n' % (lineNum, line)) continue refFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (refAssocKey, referenceKey, objectKey, mgiTypeKey, refAssocTypeKey, createdByKey, createdByKey, loaddate, loaddate)) refAssocKey = refAssocKey + 1
def processFile(): ''' # requires: # # effects: # Reads input file # Verifies and Processes each line in the input file # # returns: # nothing # ''' results = db.sql( 'select maxKey = max(_Translation_key) + 1 from MGI_Translation', 'auto') transKey = results[0]['maxKey'] if transKey is None: transKey = 1000 lineNum = 0 # sequence number of bad name in translation list seq = 1 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: objectID = tokens[0] objectDescription = tokens[1] term = tokens[2] userID = tokens[3] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) continue if vocabKey > 0: objectKey = loadlib.verifyTerm(objectID, vocabKey, objectDescription, lineNum, errorFile) else: objectKey = loadlib.verifyObject(objectID, mgiTypeKey, objectDescription, lineNum, errorFile) userKey = loadlib.verifyUser(userID, lineNum, errorFile) if objectKey == 0 or userKey == 0: # set error flag to true error = 1 # if errors, continue to next record if error: continue # if no errors, process # add term to translation file bcpWrite(transFile, [ transKey, transTypeKey, objectKey, term, seq, userKey, userKey, loaddate, loaddate ]) transKey = transKey + 1 seq = seq + 1 # end of "for line in inputFile.readlines():" if newTransType: bcpWrite(transTypeFile, [ transTypeKey, mgiTypeKey, vocabKey, transTypeName, transCompression, 0, userKey, userKey, loaddate, loaddate ])
def getGenotypes(): global genotypeOrderDict lineNum = 0 genotypeOrder = 1 # annotations organized by order/mpID # 'order' indicates uniq genotype # key = order + '|' + mpID # value = list of lines annotDict = {} for line in fpHTMPInput.readlines(): if DEBUG: print '\nNEW LINE: ', line error = 0 lineNum = lineNum + 1 tokens = line[:-1].split('\t') # sc 2/6/2016 - a subtlety: # if genotypeID remains '', the genotype is not in the database # if it is assigned an ID from the database, it is still written to # the genotypeload input file because this file is used as input to the # annotation load. The genotypeload will only create # a genotype if the genotypeID field is '' genotypeID = '' phenotypingCenter = tokens[0] annotationCenter = tokens[1] mutantID = tokens[2] mutantID2 = mutantID mpID = tokens[3] alleleID = tokens[4] alleleID2 = alleleID alleleState = tokens[5] alleleSymbol = tokens[6] markerID = tokens[7] strainName = tokens[9] gender = tokens[10] colonyID = tokens[11] # marker if len(markerID) > 0: markerKey = loadlib.verifyMarker(markerID, lineNum, fpLogDiag) else: markerKey = 0 if markerKey == 0: logit = errorDisplay % (markerID, lineNum, '8', line) fpLogDiag.write(logit) fpLogCur.write(logit) error = 1 if DEBUG: print ' markerID: %s markerKey: %s' % (markerID, markerKey) # allele if len(alleleID) > 0: alleleKey = loadlib.verifyObject(alleleID, 11, None, lineNum, fpLogDiag) else: alleleKey = 0 if alleleKey == 0: logit = errorDisplay % (alleleID, lineNum, '5', line) fpLogDiag.write(logit) fpLogCur.write(logit) error = 1 if DEBUG: print ' alleleID: %s alleleKey: %s' % (alleleID, alleleKey) # mutant if len(mutantID) > 0: mutantKey = alleleloadlib.verifyMutnatCellLine(mutantID, lineNum, fpLogDiag) mutantKey2 = mutantKey mutantSQL = mutantSQL2 = '=' else: mutantSQL = 'is' mutantKey = 'null' # # if the MCL in the input file does not match the Allele/MCL association in MGD, # (i.e. the mutantKey returned from the alleleloadlib lookup is null), # then add the Genotype with null MCLs (see TR12508). # if mutantKey == 0: mutantID = '' mutantID2 = '' #logit = errorDisplay % (mutantID, lineNum, '3', line) #fpLogDiag.write(logit) #fpLogCur.write(logit) #error = 1 if DEBUG: print ' mutantID: %s mutantKey: %s' % (mutantID, mutantKey) # strain should have been added by the previous makeStrains.sh # wrapper but in case it was not... strainID = '' strainKey = 0 # NS strain does not have colony ID, so don't check if strainName == 'Not Specified': results = db.sql(''' select * from strains where strain = '%s' ''' % strainName, 'auto') else: results = db.sql(''' select * from strains where strain = '%s' and colonyID like'%%%s%%' ''' % (strainName, colonyID), 'auto') for r in results: strainID = r['strainID'] strainKey = r['_Strain_key'] if strainKey == 0: logit = errorDisplay % (strainName + '|' + colonyID, lineNum, '10', line) fpLogDiag.write(logit) fpLogCur.write(logit) if DEBUG: print ' strainName: %s strainID %s strainKey: %s\n' % (strainName, strainID, strainKey) # if allele is Heterzygous, then marker must have a wild-type allele if alleleState == 'Heterozygous': if DEBUG: print ' if allele is Heterzygous, then marker must have a wild-type allele, get it' # # for heterzygous, allele 2 = the wild type allele # (marker symbol + '<+>') # find the wild type allele accession id # querySQL = ''' select awt.accID from ALL_Allele wt, ACC_Accession awt where wt._Marker_key = %s and wt.name = 'wild type' and wt._Allele_key = awt._Object_key and awt._MGIType_key = 11 and awt._LogicalDB_key = 1 and awt.preferred = 1 ''' % (markerKey) if DEBUG: print querySQL results = db.sql(querySQL, 'auto') for r in results: # found the wild type, so set it alleleID2 = r['accID'] mutantID2 = '' if DEBUG: print ' found wild type and alleleID2: %s mutantID2: %s' % (alleleID2, mutantID2) if alleleID == alleleID2: logit = errorDisplay % (markerID, lineNum, '8', line) logit = logit + 'no wild type allele exists for this marker' fpLogDiag.write(logit) fpLogCur.write(logit) error = 1 # if error, continue to next line if error: fpHTMPError.write(line) continue # # check alleleState # if DEBUG: print '\n Check AlleleState:' if alleleState == 'Homozygous': if DEBUG: print ' Homozygous : querying to find genotype' querySQL = ''' select g.accID from genotypes g where g._Marker_key = %s and g._Allele_key_1 = %s and g._Allele_key_2 = %s and g._MutantCellLine_key_1 %s %s and g._MutantCellLine_key_2 %s %s and g.term = '%s' and g._Strain_key = %s ''' % (markerKey, alleleKey, alleleKey, mutantSQL, mutantKey, mutantSQL, mutantKey, alleleState, strainKey) if DEBUG: print querySQL results = db.sql(querySQL, 'auto') if len(results) > 1: if DEBUG: print ' More than one genotype - last one wins' print ' %s' % results for r in results: genotypeID = r['accID'] if DEBUG: print ' genotypeID: %s' % genotypeID elif alleleState == 'Heterozygous': # # for heterzygous, allele 2 = the wild type allele # (marker symbol + '<+>') # find the wild type allele accession id # if DEBUG: print ' Heterozygous : querying to find genotype' querySQL = ''' select g.accID from genotypes g where g._Marker_key = %s and g._Allele_key_1 = %s and g._Allele_key_2 != %s and g._MutantCellLine_key_1 %s %s and g._MutantCellLine_key_2 is null and g.term = '%s' and g._Strain_key = %s ''' % (markerKey, alleleKey, alleleKey, mutantSQL, mutantKey, alleleState, strainKey) if DEBUG: print querySQL results = db.sql(querySQL, 'auto') if len(results) > 1: if DEBUG: print ' More than one genotype - last one wins' print ' %s' % results for r in results: genotypeID = r['accID'] if DEBUG: print ' genotypeID: %s' % genotypeID elif alleleState in ('Hemizygous', 'Indeterminate'): if DEBUG: print ' querying to find genotype : ', alleleState alleleID2 = '' mutantID2 = '' if alleleState == 'Hemizygous': querySQL = ''' select chromosome from MRK_Marker where _Marker_key = %s''' % markerKey results = db.sql(querySQL, 'auto') for r in results: if r['chromosome'] == 'X': alleleState = 'Hemizygous X-linked' if DEBUG: print ' ', alleleState elif r['chromosome'] == 'Y': alleleState = 'Hemizygous Y-linked' if DEBUG: print ' ', alleleState else: logit = errorDisplay % (alleleState, lineNum, '6', line) logit = logit + 'pair state %s does not match chromosome %s' % (alleleState, r['chromosome']) if DEBUG: print ' ', logit fpLogDiag.write(logit) fpLogCur.write(logit) error = 1 break querySQL = ''' select g.accID from genotypes g where g._Marker_key = %s and g._Allele_key_1 = %s and g._Allele_key_2 is null and g._MutantCellLine_key_1 %s %s and g._MutantCellLine_key_2 is null and g.term = '%s' and g._Strain_key = %s ''' % (markerKey, alleleKey, mutantSQL, mutantKey, alleleState, strainKey) if DEBUG: print querySQL results = db.sql(querySQL, 'auto') if len(results) > 1: if DEBUG: print ' More than one genotype - last one wins' print ' %s' % results for r in results: genotypeID = r['accID'] if DEBUG: print ' genotypeID: %s' % genotypeID else: logit = errorDisplay % (alleleState, lineNum, '6', line) if DEBUG: print ' logging error:' print ' ' + errorDisplay % (alleleState, lineNum, '6', line) fpLogDiag.write(logit) fpLogCur.write(logit) error = 1 # if error, continue to next line if error: fpHTMPError.write(line) continue # # check genotype unique-ness # # duplicate genotypes WITHIN the input file, doesn't mean the genotype # isn't in the database dupGeno = 0 useOrder = str(genotypeOrder) if DEBUG: print ' check genotype uniqueness' # # set uniqueness # isConditional is always 0, so we do not need to specify this value # key = str(markerKey) + str(alleleKey) + str(alleleState) + str(strainKey) + str(mutantKey) if DEBUG: print ' unique key is: %s' % key if genotypeOrderDict.has_key(key): dupGeno = 1 useOrder = str(genotypeOrderDict[key]) if DEBUG: print ' duplicate genotype and order is: %s' % useOrder # uniq genotype/mpID key currentMP = useOrder + '|' + mpID #### new code HDP-2 US161 support TR11792 #### # add line to dictionary by currentMP key for later processing if not annotDict.has_key(currentMP): annotDict[currentMP] = [] annotDict[currentMP].append(line) if dupGeno: fpHTMPDup.write(line) continue # # save genotype order # if DEBUG: print ' saving genotype order genotypeOrderDict[%s] = %s' % (key, genotypeOrder) genotypeOrderDict[key] = genotypeOrder # # add to genotype mgi-format file # if DEBUG: print ' writing genotype to genotype file' fpGenotype.write(genotypeLine % (\ genotypeOrder, genotypeID, strainID, strainName, \ markerID, alleleID, mutantID, alleleID2, mutantID2, \ conditional, existsAs, generalNote, privateNote, alleleState, \ compound, createdBy)) genotypeOrder = genotypeOrder + 1 #### new code HDP-2 US161 support TR11792 #### # iterate through annotDict for key in annotDict.keys(): order, mpID = key.split('|') lineList = annotDict[key] genderSet = set([]) # get the gender for each line and add to the set for line in lineList: tokens = line.split('\t') genderSet.add(tokens[10]) # if multi lines, the only difference is gender # just get the last (or only) line in the list; prepend the order number line = order + '\t' + line # if there are multi gender values in the set, update line to 'Both' if len(genderSet) > 1: # Don't bother to look at values. If already 'Both', we're golden # otherwise just update the line to 'Both' line = line.replace('Male', 'Both') line = line.replace('Female', 'Both') # now write out the line fpHTMP.write(line) return 0
def processFile(): global strainKey, strainmarkerKey, accKey, mgiKey, annotKey, noteKey lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = line[:-1].split('\t') try: id = tokens[0] externalPrefix = id externalNumeric = '' #(externalPrefix, externalNumeric) = id.split(':') name = tokens[1] alleleIDs = tokens[2] strainType = tokens[3] species = tokens[4] isStandard = tokens[5] sooNote = tokens[6] externalLDB = tokens[7] externalTypeKey = tokens[8] annotations = tokens[9] createdBy = tokens[10] mutantNote = tokens[11] isPrivate = tokens[12] impcColonyNote = tokens[13] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) strainExistKey = verifyStrain(name, lineNum) strainTypeKey = verifyStrainType(strainType, lineNum) speciesKey = verifySpecies(species, lineNum) createdByKey = loadlib.verifyUser(createdBy, 0, errorFile) if strainExistKey > 0 or strainTypeKey == 0 or speciesKey == 0 or createdByKey == 0: # set error flag to true error = 1 # if errors, continue to next record if error: continue # if no errors, process strainFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (strainKey, speciesKey, strainTypeKey, name, isStandard, isPrivate, isGeneticBackground, createdByKey, createdByKey, cdate, cdate)) # if Allele found, resolve to Marker if len(alleleIDs) > 0: allAlleles = alleleIDs.split('|') for a in allAlleles: alleleKey = loadlib.verifyObject(a, alleleTypeKey, None, lineNum, errorFile) if alleleKey == 0: continue results = db.sql('select _Marker_key from ALL_Allele where _Allele_key = %s' % (alleleKey), 'auto') markerKey = results[0]['_Marker_key'] markerFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (strainmarkerKey, strainKey, markerKey, alleleKey, qualifierKey, createdByKey, createdByKey, cdate, cdate)) strainmarkerKey = strainmarkerKey + 1 # MGI Accession ID for all strain accFile.write('%d|%s%d|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \ % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, strainKey, mgiTypeKey, createdByKey, createdByKey, cdate, cdate)) accKey = accKey + 1 # external accession id # % (accKey, id, '', id, externalLDB, strainKey, externalTypeKey, #for ids that contain prefix:numeric accFile.write('%d|%s|%s|%s|%s|%s|%s|0|1|%s|%s|%s|%s\n' \ % (accKey, id, externalPrefix, externalNumeric, externalLDB, strainKey, externalTypeKey, createdByKey, createdByKey, cdate, cdate)) accKey = accKey + 1 # storing data in MGI_Note/MGI_NoteChunk # Strain of Origin Note if len(sooNote) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, strainKey, mgiNoteObjectKey, mgiStrainOriginTypeKey, \ createdByKey, createdByKey, cdate, cdate)) noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, 1, sooNote, createdByKey, createdByKey, cdate, cdate)) noteKey = noteKey + 1 # storing data in MGI_Note/MGI_NoteChunk # Mutant Cell Line of Origin Note if len(mutantNote) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, strainKey, mgiNoteObjectKey, mgiMutantOriginTypeKey, \ createdByKey, createdByKey, cdate, cdate)) if len(mutantNote) > 0: noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, 1, mutantNote, createdByKey, createdByKey, cdate, cdate)) noteKey = noteKey + 1 # storing data in MGI_Note/MGI_NoteChunk # IMPC Colony Note if len(impcColonyNote) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, strainKey, mgiNoteObjectKey, mgiIMPCColonyTypeKey, \ createdByKey, createdByKey, cdate, cdate)) noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, 1, sooNote, createdByKey, createdByKey, cdate, cdate)) noteKey = noteKey + 1 # # Annotations # # _AnnotType_key = 1009 # _Qualifier_ke = 1614158 # if len(annotations) > 0: annotations = annotations.split('|') for a in annotations: # strain annotation type annotTypeKey = 1009 # this is a null qualifier key annotQualifierKey = 1614158 annotTermKey = loadlib.verifyTerm('', 27, a, lineNum, errorFile) if annotTermKey == 0: continue annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (annotKey, annotTypeKey, strainKey, annotTermKey, annotQualifierKey, cdate, cdate)) annotKey = annotKey + 1 mgiKey = mgiKey + 1 strainKey = strainKey + 1 # end of "for line in inputFile.readlines():" # # Update the AccessionMax value # db.sql('select * from ACC_setMax (%d)' % (lineNum), None) db.commit() # update prb_strain_marker_seq auto-sequence db.sql(''' select setval('prb_strain_marker_seq', (select max(_StrainMarker_key) from PRB_Strain_Marker)) ''', None) db.commit() # update voc_annot_seq auto-sequence db.sql(''' select setval('voc_annot_seq', (select max(_Annot_key) from VOC_Annot)) ''', None) db.commit()
def processFile(): global strainKey, strainmarkerKey, accKey, mgiKey, annotKey, noteKey lineNum = 0 # For each line in the input file for line in inputFile.readlines(): lineNum = lineNum + 1 #print line # Split the line into tokens tokens = line[:-1].split('\t') try: name = tokens[0] alleleIDs = tokens[1] strainType = tokens[2] species = tokens[3] isStandard = tokens[4] createdBy = tokens[5] mutantNote = tokens[6] colonyNote = tokens[7] annotations = tokens[8].split('|') except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) strainExistKey = verifyStrain(name, lineNum) strainTypeKey = verifyStrainType(strainType, lineNum) speciesKey = verifySpecies(species, lineNum) createdByKey = loadlib.verifyUser(createdBy, 0, errorFile) # if the strain exist, but with no colony id note, create one if strainExistKey > 0: print 'strain in database checking colony note : %s' % line if (not checkColonyNote(strainExistKey) ): #print 'colony note not in the database: %s' % colonyNote createNote(strainExistKey, colonyNote, mgiColonyNoteTypeKey, createdByKey) else: print 'colony note in database: %s' % colonyNote continue else: print 'strain not in database : %s' % line # if strain does not exist and verification failed on strain type, # species or createdBy, skip the record if strainTypeKey == 0 or speciesKey == 0 \ or createdByKey == 0: #print 'verification failed on strain type, species or createdBy: %s %s %s ' % (strainTypeKey, speciesKey, createdByKey) continue # if no errors, process strainFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (strainKey, speciesKey, strainTypeKey, name, isStandard, isPrivate, isGeneticBackground, createdByKey, createdByKey, cdate, cdate)) # if Allele found, resolve to Marker allAlleles = alleleIDs.split('|') for a in allAlleles: alleleKey = loadlib.verifyObject(a, alleleTypeKey, None, lineNum, errorFile) #print 'makeStrains.py allele: %s marker key: %s' % (a, alleleKey) results = db.sql('select _Marker_key from ALL_Allele where _Allele_key = %s' % (alleleKey), 'auto') markerKey = results[0]['_Marker_key'] markerFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (strainmarkerKey, strainKey, markerKey, alleleKey, qualifierKey, createdByKey, createdByKey, cdate, cdate)) strainmarkerKey = strainmarkerKey + 1 # MGI Accession ID for the strain if isStandard == '1': accFile.write('%d|%s%d|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \ % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, strainKey, mgiTypeKey, createdByKey, createdByKey, cdate, cdate)) accKey = accKey + 1 # storing data in MGI_Note/MGI_NoteChunk # Colony ID Note if len(colonyNote) > 0: createNote(strainKey, colonyNote, mgiColonyNoteTypeKey, createdByKey) # storing data in MGI_Note/MGI_NoteChunk # Mutant Cell Line of Origin Note if len(mutantNote) > 0: createNote(strainKey, mutantNote, mgiMutOrigNoteTypeKey, createdByKey) # # Annotations # # _AnnotType_key = 1009 = "Strain/Attributes" # _Qualifier_key = 1614158 = null # for a in annotations: # strain annotation type annotTypeKey = 1009 # this is a null qualifier key annotQualifierKey = 1614158 annotTermKey = loadlib.verifyTerm('', 27, a, lineNum, errorFile) if annotTermKey == 0: continue annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (annotKey, annotTypeKey, strainKey, annotTermKey, annotQualifierKey, cdate, cdate)) annotKey = annotKey + 1 mgiKey = mgiKey + 1 strainKey = strainKey + 1 # end of "for line in inputFile.readlines():" # # Update the AccessionMax value # if not DEBUG: db.sql('select * from ACC_setMax (%d)' % (lineNum), None)
def processFile(): ''' # requires: # # effects: # Reads input file # Verifies and Processes each line in the input file # # returns: # nothing # ''' results = db.sql('select maxKey = max(_Translation_key) + 1 from MGI_Translation', 'auto') transKey = results[0]['maxKey'] if transKey is None: transKey = 1000 lineNum = 0 # sequence number of bad name in translation list seq = 1 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: objectID = tokens[0] objectDescription = tokens[1] term = tokens[2] userID = tokens[3] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) continue if vocabKey > 0: objectKey = loadlib.verifyTerm(objectID, vocabKey, objectDescription, lineNum, errorFile) else: objectKey = loadlib.verifyObject(objectID, mgiTypeKey, objectDescription, lineNum, errorFile) userKey = loadlib.verifyUser(userID, lineNum, errorFile) if objectKey == 0 or userKey == 0: # set error flag to true error = 1 # if errors, continue to next record if error: continue # if no errors, process # add term to translation file bcpWrite(transFile, [transKey, transTypeKey, objectKey, term, seq, userKey, userKey, loaddate, loaddate]) transKey = transKey + 1 seq = seq + 1 # end of "for line in inputFile.readlines():" if newTransType: bcpWrite(transTypeFile, [transTypeKey, mgiTypeKey, vocabKey, transTypeName, transCompression, 0, userKey, userKey, loaddate, loaddate])
def processFile(): global refKey, aliasKey global execProbeSQL global execAssaySQL global execRefSQL lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: fromID = tokens[0] name = tokens[1] toID = tokens[2] jnum = tokens[3] createdBy = tokens[4] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) fromKey = loadlib.verifyObject(fromID, mgiTypeKey, None, lineNum, errorFile) toKey = loadlib.verifyObject(toID, mgiTypeKey, None, lineNum, errorFile) referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if fromKey == 0: errorFile.write('Invalid Probe "From": %s\n' % (fromID)) error = 1 if toKey == 0: errorFile.write('Invalid Probe "To": %s\n' % (toID)) error = 1 if referenceKey == 0: errorFile.write('Invalid Reference: %s\n' % (jnum)) error = 1 if createdByKey == 0: errorFile.write('Invalid Creator: %s\n\n' % (createdBy)) error = 1 # check that all genes are the same checkGenesSQL = ''' select f.* from PRB_Marker f, PRB_Marker t, GXD_ProbePrep p, GXD_Assay a where f._Probe_key = %s and t._Probe_key = %s and p._Probe_key = %s and p._ProbePrep_key = a._ProbePrep_key and f._Marker_key = t._Marker_key and f._Marker_key = a._Marker_key ''' % (fromKey, toKey, fromKey) checkGenes = db.sql(checkGenesSQL, 'auto') if len(checkGenes) == 0: errorFile.write( 'Gene of GenePaint, Eurexpress and Assay are not the same: %s, %s\n' % (fromID, toID)) error = 1 # check that the J: is on at least one Assay checkJAssaySQL = ''' select a.* from GXD_ProbePrep p, GXD_Assay a where p._Probe_key = %s and p._ProbePrep_key = a._ProbePrep_key and a._Refs_key = %s ''' % (fromKey, referenceKey) checkJAssay = db.sql(checkJAssaySQL, 'auto') if len(checkJAssay) == 0: errorFile.write( 'J: is not on any Assays attached to the probe: %s\n' % (fromID)) error = 1 # if errors, continue to next record if error: continue # add alias using fromID name (from) to toID refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \ % (refKey, toKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate)) aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \ % (aliasKey, refKey, name, createdByKey, createdByKey, loaddate, loaddate)) refKey = refKey + 1 aliasKey = aliasKey + 1 # move assay information from fromID to toID execAssaySQL.append(updateAssaySQL % (toKey, fromKey)) # move fromID (from) references to toID execRefSQL.append(updateRefSQL % (toKey, fromKey, referenceKey)) # delete fromID (from) execProbeSQL.append(deleteProbeSQL % (fromKey))
def processFile(): # requires: # # effects: # Reads input file # Verifies and Processes each line in the input file # # returns: # nothing # global synKey mgiIdsWithSynonyms = synDict.keys() lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: accID = tokens[0] synonym = tokens[1] synType = tokens[2] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) objectKey = loadlib.verifyObject(accID, mgiTypeKey, None, lineNum, errorFile) if accID in mgiIdsWithSynonyms: if synonym in synDict[accID]: errorFile.write('Duplicate synonym: %s for %s\n' % (synonym, accID)) continue synTypeKey = verifySynonymType(synType, lineNum) if len(synonym) == 0: errorFile.write('Invalid Synonym:Empty (%d) %s\n' % (lineNum, synonym)) if objectKey == 0 or \ synTypeKey == 0 or \ len(synonym) == 0: # set error flag to true error = 1 # if errors, continue to next record if error: continue # if no errors, process synFile.write('%d|%d|%d|%d|%s|%s|%s|%s|%s|%s\n' \ % (synKey, objectKey, mgiTypeKey, synTypeKey, referenceKey, synonym, createdByKey, createdByKey, loaddate, loaddate)) synKey = synKey + 1
def processFile(): # requires: # # effects: # Reads input file # Verifies and Processes each line in the input file # # returns: # nothing # global strainalleleKey lineNum = 0 notDeleted = 1 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: strainID = tokens[0] alleleID = tokens[1] qualifier = tokens[2] createdBy = tokens[3] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) if len(strainID) == 4: strainID = '00' + strainID if len(strainID) == 3: strainID = '000' + strainID if len(strainID) == 2: strainID = '0000' + strainID if len(strainID) == 1: strainID = '00000' + strainID strainKey = loadlib.verifyObject(strainID, strainTypeKey, None, lineNum, errorFile) # this could generate an error because the ID is a marker, not an allele # just ignore the error in the error file if it gets resolved later alleleKey = loadlib.verifyObject(alleleID, alleleTypeKey, None, lineNum, errorFile) markerKey = 0 if alleleKey == 0: markerKey = loadlib.verifyObject(alleleID, markerTypeKey, None, lineNum, errorFile) qualifierKey = verifyQualifier(qualifier, lineNum) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if notDeleted: db.sql('delete PRB_Strain_Marker where _CreatedBy_key = %s' % (createdByKey), None) notDeleted = 0 # if Allele found, resolve to Marker if alleleKey > 0: results = db.sql('select _Marker_key from ALL_Allele where _Allele_key = %s' % (alleleKey), 'auto') if len(results) > 0: markerKey = results[0]['_Marker_key'] elif markerKey == 0: errorFile.write('Invalid Allele (%s): %s\n' % (lineNum, alleleID)) error = 1 if strainKey == 0 or markerKey == 0 or qualifierKey == 0: # set error flag to true error = 1 # if errors, continue to next record if error: continue # if no errors, process if alleleKey == 0: alleleKey = '' strainFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (strainalleleKey, strainKey, markerKey, alleleKey, qualifierKey, createdByKey, createdByKey, loaddate, loaddate)) strainalleleKey = strainalleleKey + 1 # end of "for line in inputFile.readlines():" # # Update the AccessionMax value # db.sql('select * from ACC_setMax (%d);' % (lineNum), None) db.commit() # update prb_strain_marker_seq auto-sequence db.sql(''' select setval('prb_strain_marker_seq', (select max(_StrainMarker_key) from PRB_Strain_Marker)) ''', None) db.commit()
def processFile(): global refKey, aliasKey global execProbeSQL global execAssaySQL global execRefSQL lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: fromID = tokens[0] name = tokens[1] toID = tokens[2] jnum = tokens[3] createdBy = tokens[4] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) fromKey = loadlib.verifyObject(fromID, mgiTypeKey, None, lineNum, errorFile) toKey = loadlib.verifyObject(toID, mgiTypeKey, None, lineNum, errorFile) referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if fromKey == 0: errorFile.write('Invalid Probe "From": %s\n' % (fromID)) error = 1 if toKey == 0: errorFile.write('Invalid Probe "To": %s\n' % (toID)) error = 1 if referenceKey == 0: errorFile.write('Invalid Reference: %s\n' % (jnum)) error = 1 if createdByKey == 0: errorFile.write('Invalid Creator: %s\n\n' % (createdBy)) error = 1 # check that all genes are the same checkGenesSQL = ''' select f.* from PRB_Marker f, PRB_Marker t, GXD_ProbePrep p, GXD_Assay a where f._Probe_key = %s and t._Probe_key = %s and p._Probe_key = %s and p._ProbePrep_key = a._ProbePrep_key and f._Marker_key = t._Marker_key and f._Marker_key = a._Marker_key ''' % (fromKey, toKey, fromKey) checkGenes = db.sql(checkGenesSQL, 'auto') if len(checkGenes) == 0: errorFile.write('Gene of GenePaint, Eurexpress and Assay are not the same: %s, %s\n' % (fromID, toID)) error = 1 # check that the J: is on at least one Assay checkJAssaySQL = ''' select a.* from GXD_ProbePrep p, GXD_Assay a where p._Probe_key = %s and p._ProbePrep_key = a._ProbePrep_key and a._Refs_key = %s ''' % (fromKey, referenceKey) checkJAssay = db.sql(checkJAssaySQL, 'auto') if len(checkJAssay) == 0: errorFile.write('J: is not on any Assays attached to the probe: %s\n' % (fromID)) error = 1 # if errors, continue to next record if error: continue # add alias using fromID name (from) to toID refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \ % (refKey, toKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate)) aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \ % (aliasKey, refKey, name, createdByKey, createdByKey, loaddate, loaddate)) refKey = refKey + 1 aliasKey = aliasKey + 1 # move assay information from fromID to toID execAssaySQL.append(updateAssaySQL % (toKey, fromKey)) # move fromID (from) references to toID execRefSQL.append(updateRefSQL % (toKey, fromKey, referenceKey)) # delete fromID (from) execProbeSQL.append(deleteProbeSQL % (fromKey))
def processFile(): global lineNum global strainKey, strainmarkerKey, accKey, mgiKey, annotKey, noteKey # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = line[:-1].split('\t') try: id = tokens[0] externalPrefix = id externalNumeric = '' #(externalPrefix, externalNumeric) = id.split(':') name = tokens[1] alleleIDs = tokens[2] strainType = tokens[3] species = tokens[4] isStandard = tokens[5] sooNote = tokens[6] externalLDB = tokens[7] externalTypeKey = tokens[8] annotations = tokens[9] createdBy = tokens[10] mutantNote = tokens[11] isPrivate = tokens[12] impcColonyNote = tokens[13] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) strainExistKey = verifyStrain(name, lineNum) strainTypeKey = verifyStrainType(strainType, lineNum) speciesKey = verifySpecies(species, lineNum) createdByKey = loadlib.verifyUser(createdBy, 0, errorFile) if strainExistKey > 0 or strainTypeKey == 0 or speciesKey == 0 or createdByKey == 0: # set error flag to true error = 1 # if errors, continue to next record if error: continue # if no errors, process strainFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (strainKey, speciesKey, strainTypeKey, name, isStandard, isPrivate, isGeneticBackground, createdByKey, createdByKey, cdate, cdate)) # if Allele found, resolve to Marker if len(alleleIDs) > 0: allAlleles = alleleIDs.split('|') for a in allAlleles: alleleKey = loadlib.verifyObject(a, alleleTypeKey, None, lineNum, errorFile) if alleleKey == 0: continue if alleleKey == None: continue results = db.sql('select _Marker_key from ALL_Allele where _Allele_key = %s' % (alleleKey), 'auto') markerKey = results[0]['_Marker_key'] if markerKey != None: markerFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (strainmarkerKey, strainKey, markerKey, alleleKey, qualifierKey, createdByKey, createdByKey, cdate, cdate)) else: markerFile.write('%s|%s||%s|%s|%s|%s|%s|%s\n' \ % (strainmarkerKey, strainKey, alleleKey, qualifierKey, createdByKey, createdByKey, cdate, cdate)) strainmarkerKey = strainmarkerKey + 1 # MGI Accession ID for all strain # all private = 0 (false) accFile.write('%d|%s%d|%s|%s|1|%d|%d|%s|1|%s|%s|%s|%s\n' \ % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, strainKey, mgiTypeKey, isPrivate, createdByKey, createdByKey, cdate, cdate)) accKey = accKey + 1 # external accession id # % (accKey, id, '', id, externalLDB, strainKey, externalTypeKey, #for ids that contain prefix:numeric accFile.write('%d|%s|%s|%s|%s|%s|%s|0|1|%s|%s|%s|%s\n' \ % (accKey, id, externalPrefix, externalNumeric, externalLDB, strainKey, externalTypeKey, createdByKey, createdByKey, cdate, cdate)) accKey = accKey + 1 # storing data in MGI_Note # Strain of Origin Note if len(sooNote) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, strainKey, mgiNoteObjectKey, mgiStrainOriginTypeKey, sooNote, \ createdByKey, createdByKey, cdate, cdate)) noteKey = noteKey + 1 # storing data in MGI_Note # Mutant Cell Line of Origin Note if len(mutantNote) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, strainKey, mgiNoteObjectKey, mgiMutantOriginTypeKey, mutantNote, \ createdByKey, createdByKey, cdate, cdate)) noteKey = noteKey + 1 # storing data in MGI_Note # IMPC Colony Note if len(impcColonyNote) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, strainKey, mgiNoteObjectKey, mgiIMPCColonyTypeKey, impcColonyNote, \ createdByKey, createdByKey, cdate, cdate)) noteKey = noteKey + 1 # # Annotations # # _AnnotType_key = 1009 # _Qualifier_ke = 1614158 # if len(annotations) > 0: annotations = annotations.split('|') for a in annotations: # strain annotation type annotTypeKey = 1009 # this is a null qualifier key annotQualifierKey = 1614158 annotTermKey = loadlib.verifyTerm('', 27, a, lineNum, errorFile) if annotTermKey == 0: continue annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (annotKey, annotTypeKey, strainKey, annotTermKey, annotQualifierKey, cdate, cdate)) annotKey = annotKey + 1 mgiKey = mgiKey + 1 strainKey = strainKey + 1
def processFile(): # requires: # # effects: # Reads input file # Verifies and Processes each line in the input file # # returns: # nothing # global strainalleleKey lineNum = 0 notDeleted = 1 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = str.split(line[:-1], '\t') try: strainID = tokens[0] alleleID = tokens[1] qualifier = tokens[2] createdBy = tokens[3] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) if len(strainID) == 4: strainID = '00' + strainID if len(strainID) == 3: strainID = '000' + strainID if len(strainID) == 2: strainID = '0000' + strainID if len(strainID) == 1: strainID = '00000' + strainID strainKey = loadlib.verifyObject(strainID, strainTypeKey, None, lineNum, errorFile) # this could generate an error because the ID is a marker, not an allele # just ignore the error in the error file if it gets resolved later alleleKey = loadlib.verifyObject(alleleID, alleleTypeKey, None, lineNum, errorFile) markerKey = 0 if alleleKey == 0: markerKey = loadlib.verifyObject(alleleID, markerTypeKey, None, lineNum, errorFile) qualifierKey = verifyQualifier(qualifier, lineNum) createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if notDeleted: db.sql( 'delete PRB_Strain_Marker where _CreatedBy_key = %s' % (createdByKey), None) notDeleted = 0 # if Allele found, resolve to Marker if alleleKey > 0: results = db.sql( 'select _Marker_key from ALL_Allele where _Allele_key = %s' % (alleleKey), 'auto') if len(results) > 0: markerKey = results[0]['_Marker_key'] elif markerKey == 0: errorFile.write('Invalid Allele (%s): %s\n' % (lineNum, alleleID)) error = 1 if strainKey == 0 or markerKey == 0 or qualifierKey == 0: # set error flag to true error = 1 # if errors, continue to next record if error: continue # if no errors, process if alleleKey == 0: alleleKey = '' strainFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (strainalleleKey, strainKey, markerKey, alleleKey, qualifierKey, createdByKey, createdByKey, loaddate, loaddate)) strainalleleKey = strainalleleKey + 1 # end of "for line in inputFile.readlines():" # # Update the AccessionMax value # db.sql('select * from ACC_setMax (%d);' % (lineNum), None) db.commit() # update prb_strain_marker_seq auto-sequence db.sql( ''' select setval('prb_strain_marker_seq', (select max(_StrainMarker_key) from PRB_Strain_Marker)) ''', None) db.commit()