def processFile(): global alleleKey, refAssocKey, accKey, noteKey, mgiKey, annotKey global alleleLookup lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = line[:-1].split('\t') #print line try: markerID = tokens[0] symbol = tokens[1] name = tokens[2] alleleStatus = tokens[3] alleleType = tokens[4] alleleSubtypes = tokens[5] collectionKey = tokens[6] germLine = tokens[7] references = tokens[8] strainOfOrigin = tokens[9] mutantCellLine = tokens[10] molecularNotes = tokens[11] driverNotes = tokens[12] ikmcNotes = tokens[13] mutations = tokens[14] inheritanceMode = tokens[15] isMixed = tokens[16] isExtinct = tokens[17] createdBy = tokens[18] createMCL = tokens[19] createNote = tokens[20] setStatus = tokens[21] existingAlleleID = tokens[22] ikmcSymbol = tokens[23] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) # creator createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if createdByKey == 0: continue # processing for IKMC-only if len(createMCL) > 0 or len(createNote) > 0 or len(setStatus) > 0: processFileIKMC(createMCL, createNote, setStatus, \ symbol, ikmcSymbol, mutantCellLine, ikmcNotes, \ createdByKey, existingAlleleID) continue # marker key markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile) # hard-coded # _vocab_key = 73 (Marker-Allele Association Status) # _term_key = 4268545 (Curated) markerStatusKey = 4268545 # _vocab_key = 37 (Allele Status) alleleStatusKey = loadlib.verifyTerm('', 37, alleleStatus, lineNum, errorFile) # _vocab_key = 38 (Allele Type) alleleTypeKey = loadlib.verifyTerm('', 38, alleleType, lineNum, errorFile) # _vocab_key = 61 (Allele Transmission) germLineKey = loadlib.verifyTerm('', 61, germLine, lineNum, errorFile) # _vocab_key = 36 (Allele Molecular Mutation) allMutations = mutations.split('|') # _vocab_key = 35 (Allele Status) inheritanceModeKey = loadlib.verifyTerm('', 35, inheritanceMode, lineNum, errorFile) # strains strainOfOriginKey = sourceloadlib.verifyStrain(strainOfOrigin, lineNum, errorFile) # reference refKey = loadlib.verifyReference(jnum, lineNum, errorFile) # if errors, continue to next record # errors are stored (via loadlib) in the .error log if markerKey == 0 \ or markerStatusKey == 0 \ or alleleStatusKey == 0 \ or alleleTypeKey == 0 \ or germLineKey == 0 \ or allMutations == 0 \ or inheritanceModeKey == 0 \ or strainOfOriginKey == 0 \ or refKey == 0 \ or createdByKey == 0: continue # if no errors, process the allele # not specified/testing #collectionKey = 11025586 # allele (master) alleleFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|0|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (alleleKey, markerKey, strainOfOriginKey, inheritanceModeKey, alleleTypeKey, \ alleleStatusKey, germLineKey, collectionKey, symbol, name, \ isExtinct, isMixed, refKey, markerStatusKey, \ createdByKey, createdByKey, createdByKey, loaddate, loaddate, loaddate)) # molecular mutation for mutation in allMutations: mutationKey = loadlib.verifyTerm('', 36, mutation, lineNum, errorFile) mutationFile.write('%s|%s|%s|%s\n' \ % (alleleKey, mutationKey, loaddate, loaddate)) # # allele references # allReferences = references.split('||') for reference in allReferences: refType, refID = reference.split('|') refKey = loadlib.verifyReference(refID, lineNum, errorFile) if refType == 'Original': refAssocTypeKey = 1011 elif refType == 'Transmission': refAssocTypeKey = 1023 elif refType == 'Molecular': refAssocTypeKey = 1012 refFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (refAssocKey, refKey, alleleKey, mgiTypeKey, refAssocTypeKey, \ createdByKey, createdByKey, loaddate, loaddate)) refAssocKey = refAssocKey + 1 # # allele subtypes # allSubtypes = alleleSubtypes.split('|') for s in allSubtypes: # _vocab_key = 93 (Allele Subtype) alleleSubtypeKey = loadlib.verifyTerm('', 93, s, lineNum, errorFile) annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (annotKey, annotTypeKey, alleleKey, alleleSubtypeKey, \ qualifierKey, loaddate, loaddate)) annotKey = annotKey + 1 # # mutant cell line # if len(mutantCellLine) > 0: addMutantCellLine(alleleKey, mutantCellLine, createdByKey) # MGI Accession ID for the allelearker accFile.write('%s|%s%d|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \ % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, alleleKey, mgiTypeKey, \ createdByKey, createdByKey, loaddate, loaddate)) # storing data in MGI_Note/MGI_NoteChunk # molecular notes mgiNoteSeqNum = 1 if len(molecularNotes) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, alleleKey, mgiNoteObjectKey, mgiMolecularNoteTypeKey, \ createdByKey, createdByKey, loaddate, loaddate)) noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, mgiNoteSeqNum, molecularNotes, createdByKey, createdByKey, loaddate, loaddate)) noteKey = noteKey + 1 # driver notes # TR12662/MGI_Relationship._Category_key = 1006 # removed noteFile code # place hodler for MGI_Relationship code # the IKMC is the only product using this and IKMC does not add any driver note #mgiNoteSeqNum = 1 #if len(driverNotes) > 0: # ikmc notes useIKMCnotekey = 0 if len(ikmcNotes) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, alleleKey, mgiNoteObjectKey, mgiIKMCNoteTypeKey, \ createdByKey, createdByKey, loaddate, loaddate)) noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, 1, ikmcNotes, createdByKey, createdByKey, loaddate, loaddate)) useIKMCnotekey = noteKey noteKey = noteKey + 1 # Print out a new text file and attach the new MGI Allele IDs as the last field if createdBy == 'ikmc_alleleload': newAlleleFile.write('%s\t%s%s\t%s\n' \ % (mgi_utils.prvalue(ikmcNotes), \ mgi_utils.prvalue(mgiPrefix), mgi_utils.prvalue(mgiKey), \ mgi_utils.prvalue(ikmcSymbol))) else: newAlleleFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%s\n' \ % (mgi_utils.prvalue(markerID), \ mgi_utils.prvalue(symbol), \ mgi_utils.prvalue(name), \ mgi_utils.prvalue(alleleStatus), \ mgi_utils.prvalue(alleleType), \ mgi_utils.prvalue(alleleSubtype), \ mgi_utils.prvalue(collection), \ mgi_utils.prvalue(germLine), \ mgi_utils.prvalue(references), \ mgi_utils.prvalue(strainOfOrigin), \ mgi_utils.prvalue(mutantCellLine), \ mgi_utils.prvalue(allMutations), \ mgi_utils.prvalue(inheritanceMode), \ mgi_utils.prvalue(isMixed), \ mgi_utils.prvalue(isExtinct), \ mgi_utils.prvalue(refKey), \ mgi_utils.prvalue(markerStatusKey), \ mgi_utils.prvalue(createdBy), \ mgi_utils.prvalue(mgiPrefix), mgi_utils.prvalue(mgiKey))) # save symbol/alleleKey/ikmc note key alleleLookup[symbol] = [] alleleLookup[symbol].append((alleleKey, useIKMCnotekey, mgiPrefix + str(mgiKey))) accKey = accKey + 1 mgiKey = mgiKey + 1 alleleKey = alleleKey + 1 # end of "for line in inputFile.readlines():" # # Update the AccessionMax value # if not DEBUG: db.sql('select * from ACC_setMax(%d)' % (lineNum), None) db.commit()
def sanityCheck(biotypeVocab, biotypeTerm, mcvTerms, primaryMCVTerm, markerType, lineNum): ''' # # requires: # # effects: # # returns: # List [] of error messages if sanity check fails # Empty list [] if all sanity checks pass # ''' global biotypeVocabKey global biotypeTermKey global mcvTermKeys global markerTypeKey global primaryMCVTermKey errors = [] mcvTermKeys = [] # # BioType Vocabularies # biotypeVocabKey = 0 if biotypeVocab == 'Ensembl': biotypeVocabKey = ENSEMBL_VOCAB_KEY elif biotypeVocab == 'NCBI': biotypeVocabKey = NCBI_VOCAB_KEY elif biotypeVocab == 'MGP': biotypeVocabKey = MGP_VOCAB_KEY elif biotypeVocab == 'EnsemblR': biotypeVocabKey = ENSEMBLREG_VOCAB_KEY elif biotypeVocab == 'VISTA': biotypeVocabKey = VISTAREG_VOCAB_KEY else: errors.append(INVALID_VOCAB_ERROR % (lineNum, biotypeVocab)) # Lookup the biotype _term_key for this vocab/term if biotypeVocabKey: biotypeTermKey = loadlib.verifyTerm('', biotypeVocabKey, biotypeTerm, lineNum, errorFile) if biotypeTermKey == 0: errors.append(INVALID_BIOTYPE_TERM_ERROR % (lineNum, biotypeTerm, biotypeVocab)) # lookup the _marker_type_key markerTypeKey = loadlib.verifyMarkerType(markerType, lineNum, errorFile) if markerTypeKey == 0: errors.append(INVALID_MARKER_TYPE_ERROR % (lineNum, markerType)) # # mcv/feature types # tokens = mcvTerms.split('|') for r in tokens: t = loadlib.verifyTerm('', MCV_VOCAB_KEY, r, lineNum, errorFile) if t == 0: errors.append(INVALID_MCV_TERM_ERROR % (lineNum, r)) else: mcvTermKeys.append(t) # lookup the primary feature type primaryMCVTermKey = loadlib.verifyTerm('', MCV_VOCAB_KEY, primaryMCVTerm, lineNum, errorFile) if primaryMCVTermKey == 0: errors.append(INVALID_MARKER_TYPE_ERROR % (lineNum, primaryMCVTerm)) return errors
def processFile(): # Purpose: Read the input file, resolve values to keys. Create bcp files # Returns: 1 if error, else 0 # Assumes: file descriptors have been initialized # Effects: exits if the line does not have 15 columns # Throws: Nothing global alleleKey, refAssocKey, accKey, noteKey, mgiKey, annotKey global alleleLookup, alleleMutationKey lineNum = 0 # For each line in the input file for line in fpInputFile.readlines(): error = 0 lineNum = lineNum + 1 print('%s: %s' % (lineNum, line)) # Split the line into tokens tokens = line[:-1].split('\t') try: markerID = tokens[0] markerSymbol = tokens[1] mutationType = tokens[2] # IMPC allele type description = tokens[3] colonyID = tokens[4] strainOfOrigin = tokens[5] alleleSymbol = tokens[6] alleleName = tokens[7] inheritanceMode = tokens[8] alleleType = tokens[9] # IMPC allele class alleleSubType = tokens[10] alleleStatus = tokens[11] transmission = tokens[12] collection = tokens[13] jNum = tokens[14] createdBy = tokens[15] except: print('exiting with invalid line') exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) print('validating data and getting keys') # marker key markerKey = loadlib.verifyMarker(markerID, lineNum, fpErrorFile) # _vocab_key = 36 (Allele Molecular Mutation) mutationList = str.split(mutationType, ';') if len(mutationList) > 1: print('mutationList: %s' % mutationList) mutationKeyList = [] for m in mutationList: mutationKey = loadlib.verifyTerm('', 36, m, lineNum, fpErrorFile) if mutationKey != 0: mutationKeyList.append(mutationKey) if len(mutationKeyList) > 1: print('mutationKeyList: %s' % mutationKeyList) # strains strainOfOriginKey = sourceloadlib.verifyStrain(strainOfOrigin, lineNum, fpErrorFile) # _vocab_key = 35 (Allele Inheritance Mode) inheritanceModeKey = loadlib.verifyTerm('', 35, inheritanceMode, lineNum, fpErrorFile) # _vocab_key = 38 (Allele Type) alleleTypeKey = loadlib.verifyTerm('', 38, alleleType, lineNum, fpErrorFile) # _vocab_key = 93 (Allele Subtype) subTypeList = str.split(alleleSubType, ';') if len(subTypeList) > 1: print('subTypeList: %s' % subTypeList) subTypeKeyList = [] for s in subTypeList: if s != '': # if we have a subtype, get it's key subTypeKey = loadlib.verifyTerm('', 93, s, lineNum, fpErrorFile) if subTypeKey != 0: subTypeKeyList.append(subTypeKey) if len(subTypeKeyList) > 1: print('subTypeKeyList: %s' % subTypeKeyList) # _vocab_key = 37 (Allele Status) alleleStatusKey = loadlib.verifyTerm('', 37, alleleStatus, lineNum, fpErrorFile) # _vocab_key = 61 (Allele Transmission) transmissionKey = loadlib.verifyTerm('', 61, transmission, lineNum, fpErrorFile) # _vocab_key = 92 collectionKey = loadlib.verifyTerm('', 92, collection, lineNum, fpErrorFile) # _vocab_key = 73 (Marker-Allele Association Status) # _term_key = 4268545 (Curated) markerStatusKey = 4268545 # reference refKey = loadlib.verifyReference(jNum, lineNum, fpErrorFile) # creator createdByKey = loadlib.verifyUser(createdBy, lineNum, fpErrorFile) if createdByKey == 0: continue print('checking for missing data') # if errors, continue to next record # errors are stored (via loadlib) in the .error log if markerKey == 0 \ or mutationKeyList == [] \ or strainOfOriginKey == 0 \ or inheritanceModeKey == 0 \ or alleleTypeKey == 0 \ or alleleStatusKey == 0 \ or transmissionKey == 0 \ or collectionKey == 0 \ or refKey == 0 \ or createdByKey == 0: print('missing data, skipping this line') continue # if no errors, process the allele print('writing to allele file') # allele (isWildType = 0) fpAlleleFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|0|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (alleleKey, markerKey, strainOfOriginKey, inheritanceModeKey, alleleTypeKey, \ alleleStatusKey, transmissionKey, collectionKey, alleleSymbol, alleleName, \ isExtinct, isMixed, refKey, markerStatusKey, \ createdByKey, createdByKey, createdByKey, loaddate, loaddate, loaddate)) # molecular mutation for mutationKey in mutationKeyList: fpMutationFile.write('%s|%s|%s|%s|%s\n' \ % (alleleMutationKey, alleleKey, mutationKey, loaddate, loaddate)) alleleMutationKey += 1 # reference associations # Original fpRefFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (refAssocKey, refKey, alleleKey, mgiTypeKey, origRefTypeKey, \ createdByKey, createdByKey, loaddate, loaddate)) refAssocKey = refAssocKey + 1 # Molecular fpRefFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (refAssocKey, refKey, alleleKey, mgiTypeKey, molRefTypeKey, \ createdByKey, createdByKey, loaddate, loaddate)) refAssocKey = refAssocKey + 1 # allele subtype for subTypeKey in subTypeKeyList: fpAnnotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (annotKey, annotTypeKey, alleleKey, subTypeKey, \ qualifierKey, loaddate, loaddate)) annotKey = annotKey + 1 # MGI Accession ID for the allele alleleID = '%s%s' % (mgiPrefix, mgiKey) fpAccFile.write('%s|%s|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \ % (accKey, alleleID, mgiPrefix, mgiKey, alleleKey, mgiTypeKey, \ createdByKey, createdByKey, loaddate, loaddate)) # storing data in MGI_Note # molecular note fpNoteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, alleleKey, mgiTypeKey, molecularNoteTypeKey, description,\ createdByKey, createdByKey, loaddate, loaddate)) noteKey = noteKey + 1 # colony ID note fpNoteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, alleleKey, mgiTypeKey, colonyIdNoteTypeKey, colonyID, \ createdByKey, createdByKey, loaddate, loaddate)) noteKey = noteKey + 1 # Print out a new text file and attach the new MGI Allele IDs # as the last field fpNewAlleleRptFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' \ % (mgi_utils.prvalue(alleleID), \ mgi_utils.prvalue(alleleSymbol), \ mgi_utils.prvalue(alleleName), \ mgi_utils.prvalue(markerID), \ mgi_utils.prvalue(markerSymbol), \ mgi_utils.prvalue(colonyID))) accKey = accKey + 1 mgiKey = mgiKey + 1 alleleKey = alleleKey + 1 # # Update the AccessionMax value # print('DEBUG: %s' % DEBUG) if DEBUG == 'false': db.sql('select * from ACC_setMax(%d)' % (lineNum), None) db.commit() return 0
def processFile(): global lineNum global strainKey, strainmarkerKey, accKey, mgiKey, annotKey, noteKey # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = line[:-1].split('\t') try: id = tokens[0] externalPrefix = id externalNumeric = '' #(externalPrefix, externalNumeric) = id.split(':') name = tokens[1] alleleIDs = tokens[2] strainType = tokens[3] species = tokens[4] isStandard = tokens[5] sooNote = tokens[6] externalLDB = tokens[7] externalTypeKey = tokens[8] annotations = tokens[9] createdBy = tokens[10] mutantNote = tokens[11] isPrivate = tokens[12] impcColonyNote = tokens[13] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) strainExistKey = verifyStrain(name, lineNum) strainTypeKey = verifyStrainType(strainType, lineNum) speciesKey = verifySpecies(species, lineNum) createdByKey = loadlib.verifyUser(createdBy, 0, errorFile) if strainExistKey > 0 or strainTypeKey == 0 or speciesKey == 0 or createdByKey == 0: # set error flag to true error = 1 # if errors, continue to next record if error: continue # if no errors, process strainFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (strainKey, speciesKey, strainTypeKey, name, isStandard, isPrivate, isGeneticBackground, createdByKey, createdByKey, cdate, cdate)) # if Allele found, resolve to Marker if len(alleleIDs) > 0: allAlleles = alleleIDs.split('|') for a in allAlleles: alleleKey = loadlib.verifyObject(a, alleleTypeKey, None, lineNum, errorFile) if alleleKey == 0: continue if alleleKey == None: continue results = db.sql('select _Marker_key from ALL_Allele where _Allele_key = %s' % (alleleKey), 'auto') markerKey = results[0]['_Marker_key'] if markerKey != None: markerFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (strainmarkerKey, strainKey, markerKey, alleleKey, qualifierKey, createdByKey, createdByKey, cdate, cdate)) else: markerFile.write('%s|%s||%s|%s|%s|%s|%s|%s\n' \ % (strainmarkerKey, strainKey, alleleKey, qualifierKey, createdByKey, createdByKey, cdate, cdate)) strainmarkerKey = strainmarkerKey + 1 # MGI Accession ID for all strain # all private = 0 (false) accFile.write('%d|%s%d|%s|%s|1|%d|%d|%s|1|%s|%s|%s|%s\n' \ % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, strainKey, mgiTypeKey, isPrivate, createdByKey, createdByKey, cdate, cdate)) accKey = accKey + 1 # external accession id # % (accKey, id, '', id, externalLDB, strainKey, externalTypeKey, #for ids that contain prefix:numeric accFile.write('%d|%s|%s|%s|%s|%s|%s|0|1|%s|%s|%s|%s\n' \ % (accKey, id, externalPrefix, externalNumeric, externalLDB, strainKey, externalTypeKey, createdByKey, createdByKey, cdate, cdate)) accKey = accKey + 1 # storing data in MGI_Note # Strain of Origin Note if len(sooNote) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, strainKey, mgiNoteObjectKey, mgiStrainOriginTypeKey, sooNote, \ createdByKey, createdByKey, cdate, cdate)) noteKey = noteKey + 1 # storing data in MGI_Note # Mutant Cell Line of Origin Note if len(mutantNote) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, strainKey, mgiNoteObjectKey, mgiMutantOriginTypeKey, mutantNote, \ createdByKey, createdByKey, cdate, cdate)) noteKey = noteKey + 1 # storing data in MGI_Note # IMPC Colony Note if len(impcColonyNote) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, strainKey, mgiNoteObjectKey, mgiIMPCColonyTypeKey, impcColonyNote, \ createdByKey, createdByKey, cdate, cdate)) noteKey = noteKey + 1 # # Annotations # # _AnnotType_key = 1009 # _Qualifier_ke = 1614158 # if len(annotations) > 0: annotations = annotations.split('|') for a in annotations: # strain annotation type annotTypeKey = 1009 # this is a null qualifier key annotQualifierKey = 1614158 annotTermKey = loadlib.verifyTerm('', 27, a, lineNum, errorFile) if annotTermKey == 0: continue annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (annotKey, annotTypeKey, strainKey, annotTermKey, annotQualifierKey, cdate, cdate)) annotKey = annotKey + 1 mgiKey = mgiKey + 1 strainKey = strainKey + 1
def processFile(): ''' # requires: # # effects: # Reads input file # Verifies and Processes each line in the input file # # returns: # nothing # ''' results = db.sql( 'select maxKey = max(_Translation_key) + 1 from MGI_Translation', 'auto') transKey = results[0]['maxKey'] if transKey is None: transKey = 1000 lineNum = 0 # sequence number of bad name in translation list seq = 1 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: objectID = tokens[0] objectDescription = tokens[1] term = tokens[2] userID = tokens[3] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) continue if vocabKey > 0: objectKey = loadlib.verifyTerm(objectID, vocabKey, objectDescription, lineNum, errorFile) else: objectKey = loadlib.verifyObject(objectID, mgiTypeKey, objectDescription, lineNum, errorFile) userKey = loadlib.verifyUser(userID, lineNum, errorFile) if objectKey == 0 or userKey == 0: # set error flag to true error = 1 # if errors, continue to next record if error: continue # if no errors, process # add term to translation file bcpWrite(transFile, [ transKey, transTypeKey, objectKey, term, seq, userKey, userKey, loaddate, loaddate ]) transKey = transKey + 1 seq = seq + 1 # end of "for line in inputFile.readlines():" if newTransType: bcpWrite(transTypeFile, [ transTypeKey, mgiTypeKey, vocabKey, transTypeName, transCompression, 0, userKey, userKey, loaddate, loaddate ])
def processImageFile(): global imageKey, accKey, mgiKey global imagePix global referenceKey lineNum = 0 # For each line in the input file for line in inImageFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: jnum = tokens[0] fullsizeKey = tokens[1] imageClass = tokens[2] pixID = tokens[3] xdim = tokens[4] ydim = tokens[5] figureLabel = tokens[6] copyrightNote = tokens[7] imageNote = tokens[8] imageInfo = tokens[9] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) imageClassKey = loadlib.verifyTerm('', imageVocabClassKey, imageClass, lineNum, errorFile) if imageClassKey == 0: error = 1 referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile) if referenceKey == 0: error = 1 # if errors, continue to next record if error: continue # if no errors, process imageTypeKey = FSimageTypeKey outImageFile.write(str(imageKey) + TAB + \ str(gxdMgiTypeKey) + TAB + \ str(imageClassKey) + TAB + \ str(imageTypeKey) + TAB + \ str(referenceKey) + TAB + \ TAB + \ xdim + TAB + \ ydim + TAB + \ figureLabel + TAB + \ str(createdByKey) + TAB + \ str(createdByKey) + TAB + \ loaddate + TAB + loaddate + CRT) # MGI Accession ID for the image mgiAccID = mgiPrefix + str(mgiKey) outAccFile.write(str(accKey) + TAB + \ mgiPrefix + str(mgiKey) + TAB + \ mgiPrefix + TAB + \ str(mgiKey) + TAB + \ accLogicalDBKey + TAB + \ str(imageKey) + TAB + \ imageMgiTypeKey + TAB + \ accPrivate + TAB + \ accPreferred + TAB + \ str(createdByKey) + TAB + \ str(createdByKey) + TAB + \ loaddate + TAB + loaddate + CRT) accKey = accKey + 1 mgiKey = mgiKey + 1 if pixID.find('GUDMAP') < 0 and len(pixID) > 0: outAccFile.write(str(accKey) + TAB + \ pixPrefix + str(pixID) + TAB + \ pixPrefix + TAB + \ pixID + TAB + \ pixLogicalDBKey + TAB + \ str(imageKey) + TAB + \ imageMgiTypeKey + TAB + \ pixPrivate + TAB + \ accPreferred + TAB + \ str(createdByKey) + TAB + \ str(createdByKey) + TAB + \ loaddate + TAB + loaddate + CRT) accKey = accKey + 1 if len(imageInfo) > 0: imageLogicalDBKey, imageID = imageInfo.split('|') outAccFile.write(str(accKey) + TAB + \ imageID + TAB + \ imageID + TAB + \ TAB + \ imageLogicalDBKey + TAB + \ str(imageKey) + TAB + \ imageMgiTypeKey + TAB + \ accPrivate + TAB + \ accPreferred + TAB + \ str(createdByKey) + TAB + \ str(createdByKey) + TAB + \ loaddate + TAB + loaddate + CRT) accKey = accKey + 1 # Copyrights if len(copyrightNote) > 0: outCopyrightFile.write(mgiAccID + TAB + copyrightNote + CRT) # Notes if len(imageNote) > 0: outCaptionFile.write(mgiAccID + TAB + imageNote + CRT) imagePix[pixID] = imageKey imageKey = imageKey + 1 # end of "for line in inImageFile.readlines():" return lineNum
def processFile(): global strainKey, strainmarkerKey, accKey, mgiKey, annotKey, noteKey lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = line[:-1].split('\t') try: id = tokens[0] externalPrefix = id externalNumeric = '' #(externalPrefix, externalNumeric) = id.split(':') name = tokens[1] alleleIDs = tokens[2] strainType = tokens[3] species = tokens[4] isStandard = tokens[5] sooNote = tokens[6] externalLDB = tokens[7] externalTypeKey = tokens[8] annotations = tokens[9] createdBy = tokens[10] mutantNote = tokens[11] isPrivate = tokens[12] impcColonyNote = tokens[13] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) strainExistKey = verifyStrain(name, lineNum) strainTypeKey = verifyStrainType(strainType, lineNum) speciesKey = verifySpecies(species, lineNum) createdByKey = loadlib.verifyUser(createdBy, 0, errorFile) if strainExistKey > 0 or strainTypeKey == 0 or speciesKey == 0 or createdByKey == 0: # set error flag to true error = 1 # if errors, continue to next record if error: continue # if no errors, process strainFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (strainKey, speciesKey, strainTypeKey, name, isStandard, isPrivate, isGeneticBackground, createdByKey, createdByKey, cdate, cdate)) # if Allele found, resolve to Marker if len(alleleIDs) > 0: allAlleles = alleleIDs.split('|') for a in allAlleles: alleleKey = loadlib.verifyObject(a, alleleTypeKey, None, lineNum, errorFile) if alleleKey == 0: continue results = db.sql('select _Marker_key from ALL_Allele where _Allele_key = %s' % (alleleKey), 'auto') markerKey = results[0]['_Marker_key'] markerFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (strainmarkerKey, strainKey, markerKey, alleleKey, qualifierKey, createdByKey, createdByKey, cdate, cdate)) strainmarkerKey = strainmarkerKey + 1 # MGI Accession ID for all strain accFile.write('%d|%s%d|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \ % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, strainKey, mgiTypeKey, createdByKey, createdByKey, cdate, cdate)) accKey = accKey + 1 # external accession id # % (accKey, id, '', id, externalLDB, strainKey, externalTypeKey, #for ids that contain prefix:numeric accFile.write('%d|%s|%s|%s|%s|%s|%s|0|1|%s|%s|%s|%s\n' \ % (accKey, id, externalPrefix, externalNumeric, externalLDB, strainKey, externalTypeKey, createdByKey, createdByKey, cdate, cdate)) accKey = accKey + 1 # storing data in MGI_Note/MGI_NoteChunk # Strain of Origin Note if len(sooNote) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, strainKey, mgiNoteObjectKey, mgiStrainOriginTypeKey, \ createdByKey, createdByKey, cdate, cdate)) noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, 1, sooNote, createdByKey, createdByKey, cdate, cdate)) noteKey = noteKey + 1 # storing data in MGI_Note/MGI_NoteChunk # Mutant Cell Line of Origin Note if len(mutantNote) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, strainKey, mgiNoteObjectKey, mgiMutantOriginTypeKey, \ createdByKey, createdByKey, cdate, cdate)) if len(mutantNote) > 0: noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, 1, mutantNote, createdByKey, createdByKey, cdate, cdate)) noteKey = noteKey + 1 # storing data in MGI_Note/MGI_NoteChunk # IMPC Colony Note if len(impcColonyNote) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, strainKey, mgiNoteObjectKey, mgiIMPCColonyTypeKey, \ createdByKey, createdByKey, cdate, cdate)) noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, 1, sooNote, createdByKey, createdByKey, cdate, cdate)) noteKey = noteKey + 1 # # Annotations # # _AnnotType_key = 1009 # _Qualifier_ke = 1614158 # if len(annotations) > 0: annotations = annotations.split('|') for a in annotations: # strain annotation type annotTypeKey = 1009 # this is a null qualifier key annotQualifierKey = 1614158 annotTermKey = loadlib.verifyTerm('', 27, a, lineNum, errorFile) if annotTermKey == 0: continue annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (annotKey, annotTypeKey, strainKey, annotTermKey, annotQualifierKey, cdate, cdate)) annotKey = annotKey + 1 mgiKey = mgiKey + 1 strainKey = strainKey + 1 # end of "for line in inputFile.readlines():" # # Update the AccessionMax value # db.sql('select * from ACC_setMax (%d)' % (lineNum), None) db.commit() # update prb_strain_marker_seq auto-sequence db.sql(''' select setval('prb_strain_marker_seq', (select max(_StrainMarker_key) from PRB_Strain_Marker)) ''', None) db.commit() # update voc_annot_seq auto-sequence db.sql(''' select setval('voc_annot_seq', (select max(_Annot_key) from VOC_Annot)) ''', None) db.commit()
def processResultsFile(referenceKey): global resultKey global imagePaneLookup prevAssay = 0 prevSpecimen = 0 prevResult = 0 lineNum = 0 # # build imagePaneLookup lookup of figure label|pane label keys # J:226028/227123 # results = db.sql(''' select i.figureLabel, p.paneLabel, p._ImagePane_key from IMG_Image i, IMG_ImagePane p where i._Image_key = p._Image_key and i._Refs_key = %s ''' % (referenceKey), 'auto') for r in results: paneLabel = r['paneLabel'] if paneLabel == None: paneLabel = '' key = r['figureLabel'] + '|' + paneLabel value = r['_ImagePane_key'] imagePaneLookup[key] = [] imagePaneLookup[key].append(value) #print imagePaneLookup['Zmiz1_b41_E11.5b_JL|'] # For each line in the input file for line in inResultsFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], TAB) try: assayID = tokens[0] specimenID = tokens[1] resultID = tokens[2] strength = tokens[3] pattern = tokens[4] emapaID = tokens[5] structureTS = tokens[6] resultNote = tokens[7] imagePanes = tokens[8] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) strengthKey = gxdloadlib.verifyStrength(strength, lineNum, errorFile) patternKey = gxdloadlib.verifyPattern(pattern, lineNum, errorFile) structureKey = loadlib.verifyTerm(emapaID, 90, '', lineNum, errorFile) if strengthKey == 0 or patternKey == 0 or structureKey == 0: # set error flag to true error = 1 # if errors, continue to next record if error: continue # if no errors, process key = '%s:%s' % (assayID, specimenID) if not assaySpecimen.has_key(key): errorFile.write('Cannot find Assay:Speciman key "%s"\n' % (key)) errorFile.write(str(tokens) + '\n\n') continue specimenKey = assaySpecimen[key] if prevAssay != assayID: prevSpecimen = 0 if prevSpecimen != specimenKey: prevResult = 0 if prevResult != resultID: resultKey = resultKey + 1 outResultFile.write( str(resultKey) + TAB + \ str(specimenKey) + TAB + \ str(strengthKey) + TAB + \ str(patternKey) + TAB + \ resultID + TAB + \ mgi_utils.prvalue(resultNote) + TAB + \ loaddate + TAB + loaddate + CRT) for image in string.split(imagePanes,','): if image in imagePaneLookup: imageKey = imagePaneLookup[image][0] outResultImageFile.write(str(resultKey) + TAB + \ str(imageKey) + TAB + \ loaddate + TAB + loaddate + CRT) #else: #print image outResultStFile.write( str(resultKey) + TAB + \ str(structureKey) + TAB + \ str(structureTS) + TAB + \ loaddate + TAB + loaddate + CRT) prevAssay = assayID prevSpecimen = specimenKey prevResult = resultID # end of "for line in inResultsFile.readlines():" return
def processFile(): global strainKey, strainmarkerKey, accKey, mgiKey, annotKey, noteKey lineNum = 0 # For each line in the input file for line in inputFile.readlines(): lineNum = lineNum + 1 #print line # Split the line into tokens tokens = line[:-1].split('\t') try: name = tokens[0] alleleIDs = tokens[1] strainType = tokens[2] species = tokens[3] isStandard = tokens[4] createdBy = tokens[5] mutantNote = tokens[6] colonyNote = tokens[7] annotations = tokens[8].split('|') except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) strainExistKey = verifyStrain(name, lineNum) strainTypeKey = verifyStrainType(strainType, lineNum) speciesKey = verifySpecies(species, lineNum) createdByKey = loadlib.verifyUser(createdBy, 0, errorFile) # if the strain exist, but with no colony id note, create one if strainExistKey > 0: print 'strain in database checking colony note : %s' % line if (not checkColonyNote(strainExistKey) ): #print 'colony note not in the database: %s' % colonyNote createNote(strainExistKey, colonyNote, mgiColonyNoteTypeKey, createdByKey) else: print 'colony note in database: %s' % colonyNote continue else: print 'strain not in database : %s' % line # if strain does not exist and verification failed on strain type, # species or createdBy, skip the record if strainTypeKey == 0 or speciesKey == 0 \ or createdByKey == 0: #print 'verification failed on strain type, species or createdBy: %s %s %s ' % (strainTypeKey, speciesKey, createdByKey) continue # if no errors, process strainFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (strainKey, speciesKey, strainTypeKey, name, isStandard, isPrivate, isGeneticBackground, createdByKey, createdByKey, cdate, cdate)) # if Allele found, resolve to Marker allAlleles = alleleIDs.split('|') for a in allAlleles: alleleKey = loadlib.verifyObject(a, alleleTypeKey, None, lineNum, errorFile) #print 'makeStrains.py allele: %s marker key: %s' % (a, alleleKey) results = db.sql('select _Marker_key from ALL_Allele where _Allele_key = %s' % (alleleKey), 'auto') markerKey = results[0]['_Marker_key'] markerFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (strainmarkerKey, strainKey, markerKey, alleleKey, qualifierKey, createdByKey, createdByKey, cdate, cdate)) strainmarkerKey = strainmarkerKey + 1 # MGI Accession ID for the strain if isStandard == '1': accFile.write('%d|%s%d|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \ % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, strainKey, mgiTypeKey, createdByKey, createdByKey, cdate, cdate)) accKey = accKey + 1 # storing data in MGI_Note/MGI_NoteChunk # Colony ID Note if len(colonyNote) > 0: createNote(strainKey, colonyNote, mgiColonyNoteTypeKey, createdByKey) # storing data in MGI_Note/MGI_NoteChunk # Mutant Cell Line of Origin Note if len(mutantNote) > 0: createNote(strainKey, mutantNote, mgiMutOrigNoteTypeKey, createdByKey) # # Annotations # # _AnnotType_key = 1009 = "Strain/Attributes" # _Qualifier_key = 1614158 = null # for a in annotations: # strain annotation type annotTypeKey = 1009 # this is a null qualifier key annotQualifierKey = 1614158 annotTermKey = loadlib.verifyTerm('', 27, a, lineNum, errorFile) if annotTermKey == 0: continue annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (annotKey, annotTypeKey, strainKey, annotTermKey, annotQualifierKey, cdate, cdate)) annotKey = annotKey + 1 mgiKey = mgiKey + 1 strainKey = strainKey + 1 # end of "for line in inputFile.readlines():" # # Update the AccessionMax value # if not DEBUG: db.sql('select * from ACC_setMax (%d)' % (lineNum), None)
def processFile(): ''' # requires: # # effects: # Reads input file # Verifies and Processes each line in the input file # # returns: # nothing # ''' results = db.sql('select maxKey = max(_Translation_key) + 1 from MGI_Translation', 'auto') transKey = results[0]['maxKey'] if transKey is None: transKey = 1000 lineNum = 0 # sequence number of bad name in translation list seq = 1 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = string.split(line[:-1], '\t') try: objectID = tokens[0] objectDescription = tokens[1] term = tokens[2] userID = tokens[3] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) continue if vocabKey > 0: objectKey = loadlib.verifyTerm(objectID, vocabKey, objectDescription, lineNum, errorFile) else: objectKey = loadlib.verifyObject(objectID, mgiTypeKey, objectDescription, lineNum, errorFile) userKey = loadlib.verifyUser(userID, lineNum, errorFile) if objectKey == 0 or userKey == 0: # set error flag to true error = 1 # if errors, continue to next record if error: continue # if no errors, process # add term to translation file bcpWrite(transFile, [transKey, transTypeKey, objectKey, term, seq, userKey, userKey, loaddate, loaddate]) transKey = transKey + 1 seq = seq + 1 # end of "for line in inputFile.readlines():" if newTransType: bcpWrite(transTypeFile, [transTypeKey, mgiTypeKey, vocabKey, transTypeName, transCompression, 0, userKey, userKey, loaddate, loaddate])
def processFile(): global alleleKey, refAssocKey, accKey, noteKey, mgiKey, annotKey, mutationKey global alleleLookup lineNum = 0 # For each line in the input file for line in inputFile.readlines(): error = 0 lineNum = lineNum + 1 # Split the line into tokens tokens = line[:-1].split('\t') #print line try: markerID = tokens[0] symbol = tokens[1] name = tokens[2] alleleStatus = tokens[3] alleleType = tokens[4] alleleSubtypes = tokens[5] collectionKey = tokens[6] germLine = tokens[7] references = tokens[8] strainOfOrigin = tokens[9] mutantCellLine = tokens[10] molecularNotes = tokens[11] driverNotes = tokens[12] ikmcNotes = tokens[13] mutations = tokens[14] inheritanceMode = tokens[15] isMixed = tokens[16] isExtinct = tokens[17] createdBy = tokens[18] createMCL = tokens[19] createNote = tokens[20] setStatus = tokens[21] existingAlleleID = tokens[22] ikmcSymbol = tokens[23] except: exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line)) # creator createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile) if createdByKey == 0: continue # processing for IKMC-only if len(createMCL) > 0 or len(createNote) > 0 or len(setStatus) > 0: processFileIKMC(createMCL, createNote, setStatus, \ symbol, ikmcSymbol, mutantCellLine, ikmcNotes, \ createdByKey, existingAlleleID) continue # marker key markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile) # hard-coded # _vocab_key = 73 (Marker-Allele Association Status) # _term_key = 4268545 (Curated) markerStatusKey = 4268545 # _vocab_key = 37 (Allele Status) alleleStatusKey = loadlib.verifyTerm('', 37, alleleStatus, lineNum, errorFile) # _vocab_key = 38 (Allele Type) alleleTypeKey = loadlib.verifyTerm('', 38, alleleType, lineNum, errorFile) # _vocab_key = 61 (Allele Transmission) germLineKey = loadlib.verifyTerm('', 61, germLine, lineNum, errorFile) # _vocab_key = 36 (Allele Molecular Mutation) allMutations = mutations.split('|') # _vocab_key = 35 (Allele Status) inheritanceModeKey = loadlib.verifyTerm('', 35, inheritanceMode, lineNum, errorFile) # strains strainOfOriginKey = sourceloadlib.verifyStrain(strainOfOrigin, lineNum, errorFile) # reference refKey = loadlib.verifyReference(jnum, lineNum, errorFile) # if errors, continue to next record # errors are stored (via loadlib) in the .error log if markerKey == 0 \ or markerStatusKey == 0 \ or alleleStatusKey == 0 \ or alleleTypeKey == 0 \ or germLineKey == 0 \ or allMutations == 0 \ or inheritanceModeKey == 0 \ or strainOfOriginKey == 0 \ or refKey == 0 \ or createdByKey == 0: continue # if no errors, process the allele # not specified/testing #collectionKey = 11025586 # allele (master) alleleFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|0|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (alleleKey, markerKey, strainOfOriginKey, inheritanceModeKey, alleleTypeKey, \ alleleStatusKey, germLineKey, collectionKey, symbol, name, \ isExtinct, isMixed, refKey, markerStatusKey, \ createdByKey, createdByKey, createdByKey, loaddate, loaddate, loaddate)) # molecular mutation for mutation in allMutations: mutationTermKey = loadlib.verifyTerm('', 36, mutation, lineNum, errorFile) mutationFile.write('%s|%s|%s|%s|%s\n' \ % (mutationKey, alleleKey, mutationTermKey, loaddate, loaddate)) mutationKey = mutationKey + 1 # # allele references # allReferences = references.split('||') for reference in allReferences: refType, refID = reference.split('|') refKey = loadlib.verifyReference(refID, lineNum, errorFile) if refType == 'Original': refAssocTypeKey = 1011 elif refType == 'Transmission': refAssocTypeKey = 1023 elif refType == 'Molecular': refAssocTypeKey = 1012 refFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (refAssocKey, refKey, alleleKey, mgiTypeKey, refAssocTypeKey, \ createdByKey, createdByKey, loaddate, loaddate)) refAssocKey = refAssocKey + 1 # # allele subtypes # allSubtypes = alleleSubtypes.split('|') for s in allSubtypes: # _vocab_key = 93 (Allele Subtype) alleleSubtypeKey = loadlib.verifyTerm('', 93, s, lineNum, errorFile) annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \ % (annotKey, annotTypeKey, alleleKey, alleleSubtypeKey, \ qualifierKey, loaddate, loaddate)) annotKey = annotKey + 1 # # mutant cell line # if len(mutantCellLine) > 0: addMutantCellLine(alleleKey, mutantCellLine, createdByKey) # MGI Accession ID for the allelearker accFile.write('%s|%s%d|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \ % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, alleleKey, mgiTypeKey, \ createdByKey, createdByKey, loaddate, loaddate)) # storing data in MGI_Note # molecular notes mgiNoteSeqNum = 1 if len(molecularNotes) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, alleleKey, mgiNoteObjectKey, mgiMolecularNoteTypeKey, \ molecularNotes, createdByKey, createdByKey, loaddate, loaddate)) noteKey = noteKey + 1 # driver notes # TR12662/MGI_Relationship._Category_key = 1006 # removed noteFile code # place hodler for MGI_Relationship code # the IKMC is the only product using this and IKMC does not add any driver note #mgiNoteSeqNum = 1 #if len(driverNotes) > 0: # ikmc notes useIKMCnotekey = 0 if len(ikmcNotes) > 0: noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \ % (noteKey, alleleKey, mgiNoteObjectKey, mgiIKMCNoteTypeKey, \ ikmcNotes, createdByKey, createdByKey, loaddate, loaddate)) useIKMCnotekey = noteKey noteKey = noteKey + 1 # Print out a new text file and attach the new MGI Allele IDs as the last field if createdBy == 'ikmc_alleleload': newAlleleFile.write('%s\t%s%s\t%s\n' \ % (mgi_utils.prvalue(ikmcNotes), \ mgi_utils.prvalue(mgiPrefix), mgi_utils.prvalue(mgiKey), \ mgi_utils.prvalue(ikmcSymbol))) else: newAlleleFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%s\n' \ % (mgi_utils.prvalue(markerID), \ mgi_utils.prvalue(symbol), \ mgi_utils.prvalue(name), \ mgi_utils.prvalue(alleleStatus), \ mgi_utils.prvalue(alleleType), \ mgi_utils.prvalue(alleleSubtype), \ mgi_utils.prvalue(collection), \ mgi_utils.prvalue(germLine), \ mgi_utils.prvalue(references), \ mgi_utils.prvalue(strainOfOrigin), \ mgi_utils.prvalue(mutantCellLine), \ mgi_utils.prvalue(allMutations), \ mgi_utils.prvalue(inheritanceMode), \ mgi_utils.prvalue(isMixed), \ mgi_utils.prvalue(isExtinct), \ mgi_utils.prvalue(refKey), \ mgi_utils.prvalue(markerStatusKey), \ mgi_utils.prvalue(createdBy), \ mgi_utils.prvalue(mgiPrefix), mgi_utils.prvalue(mgiKey))) # save symbol/alleleKey/ikmc note key alleleLookup[symbol] = [] alleleLookup[symbol].append( (alleleKey, useIKMCnotekey, mgiPrefix + str(mgiKey))) accKey = accKey + 1 mgiKey = mgiKey + 1 alleleKey = alleleKey + 1 # end of "for line in inputFile.readlines():" # # Update the AccessionMax value # if not DEBUG: db.sql('select * from ACC_setMax(%d)' % (lineNum), None) db.commit()
def sanityCheck(biotypeVocab, biotypeTerm, mcvTerms, primaryMCVTerm, markerType, lineNum): ''' # # requires: # # effects: # # returns: # List [] of error messages if sanity check fails # Empty list [] if all sanity checks pass # ''' global biotypeVocabKey global biotypeTermKey global mcvTermKeys global markerTypeKey global primaryMCVTermKey errors = [] mcvTermKeys = [] # # BioType Vocabularies # biotypeVocabKey = 0 if biotypeVocab == 'Ensembl': biotypeVocabKey = ENSEMBL_VOCAB_KEY elif biotypeVocab == 'NCBI': biotypeVocabKey = NCBI_VOCAB_KEY elif biotypeVocab == 'MGP': biotypeVocabKey = MGP_VOCAB_KEY else: errors.append( INVALID_VOCAB_ERROR % (lineNum, biotypeVocab) ) # Lookup the biotype _term_key for this vocab/term if biotypeVocabKey: biotypeTermKey = loadlib.verifyTerm('', biotypeVocabKey, biotypeTerm, lineNum, errorFile) if biotypeTermKey == 0: errors.append( INVALID_BIOTYPE_TERM_ERROR % (lineNum, biotypeTerm, biotypeVocab) ) # lookup the _marker_type_key markerTypeKey = loadlib.verifyMarkerType(markerType, lineNum, errorFile) if markerTypeKey == 0: errors.append( INVALID_MARKER_TYPE_ERROR % (lineNum, markerType) ) # # mcv/feature types # tokens = mcvTerms.split('|') for r in tokens: t = loadlib.verifyTerm('', MCV_VOCAB_KEY, r, lineNum, errorFile) if t == 0: errors.append( INVALID_MCV_TERM_ERROR % (lineNum, r) ) else: mcvTermKeys.append(t) # lookup the primary feature type primaryMCVTermKey = loadlib.verifyTerm('', MCV_VOCAB_KEY, primaryMCVTerm, lineNum, errorFile) if primaryMCVTermKey == 0: errors.append( INVALID_MARKER_TYPE_ERROR % (lineNum, primaryMCVTerm) ) return errors