Exemple #1
0
def processFile():

    global alleleKey, refAssocKey, accKey, noteKey, mgiKey, annotKey
    global alleleLookup

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = line[:-1].split('\t')
	#print line
        try:
	    markerID = tokens[0]
	    symbol = tokens[1]
	    name = tokens[2]
	    alleleStatus = tokens[3]
	    alleleType = tokens[4]
	    alleleSubtypes = tokens[5]
	    collectionKey = tokens[6]
	    germLine = tokens[7]
	    references = tokens[8]
	    strainOfOrigin = tokens[9]
	    mutantCellLine = tokens[10]
	    molecularNotes = tokens[11]
	    driverNotes = tokens[12]
	    ikmcNotes = tokens[13]
	    mutations = tokens[14]
	    inheritanceMode = tokens[15]
	    isMixed = tokens[16]
	    isExtinct = tokens[17]
	    createdBy = tokens[18]
	    createMCL = tokens[19]
	    createNote = tokens[20]
	    setStatus = tokens[21]
	    existingAlleleID = tokens[22]
	    ikmcSymbol = tokens[23]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

	# creator
	createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)
        if createdByKey == 0:
            continue

	# processing for IKMC-only
	if len(createMCL) > 0 or len(createNote) > 0 or len(setStatus) > 0:
		processFileIKMC(createMCL, createNote, setStatus, \
			symbol, ikmcSymbol, mutantCellLine, ikmcNotes, \
			createdByKey, existingAlleleID)
		continue

	# marker key
	markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile)

	# hard-coded
	# _vocab_key = 73 (Marker-Allele Association Status)
	# _term_key = 4268545 (Curated)
	markerStatusKey = 4268545

	# _vocab_key = 37 (Allele Status)
	alleleStatusKey = loadlib.verifyTerm('', 37, alleleStatus, lineNum, errorFile)

	# _vocab_key = 38 (Allele Type)
	alleleTypeKey = loadlib.verifyTerm('', 38, alleleType, lineNum, errorFile)

	# _vocab_key = 61 (Allele Transmission)
	germLineKey = loadlib.verifyTerm('', 61, germLine, lineNum, errorFile)

	# _vocab_key = 36 (Allele Molecular Mutation)
	allMutations = mutations.split('|')

	# _vocab_key = 35 (Allele Status)
	inheritanceModeKey = loadlib.verifyTerm('', 35, inheritanceMode, lineNum, errorFile)

	# strains
	strainOfOriginKey = sourceloadlib.verifyStrain(strainOfOrigin, lineNum, errorFile)

	# reference
	refKey = loadlib.verifyReference(jnum, lineNum, errorFile)

        # if errors, continue to next record
	# errors are stored (via loadlib) in the .error log

        if markerKey == 0 \
		or markerStatusKey == 0 \
		or alleleStatusKey == 0 \
		or alleleTypeKey == 0 \
		or germLineKey == 0 \
		or allMutations == 0 \
		or inheritanceModeKey == 0 \
		or strainOfOriginKey == 0 \
		or refKey == 0 \
		or createdByKey == 0:
            continue

        # if no errors, process the allele

	# not specified/testing
	#collectionKey = 11025586

	# allele (master)
        alleleFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|0|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (alleleKey, markerKey, strainOfOriginKey, inheritanceModeKey, alleleTypeKey, \
	    alleleStatusKey, germLineKey, collectionKey, symbol, name, \
	    isExtinct, isMixed, refKey, markerStatusKey, \
	    createdByKey, createdByKey, createdByKey, loaddate, loaddate, loaddate))

	# molecular mutation
	for mutation in allMutations:
		mutationKey = loadlib.verifyTerm('', 36, mutation, lineNum, errorFile)
        	mutationFile.write('%s|%s|%s|%s\n' \
	    	% (alleleKey, mutationKey, loaddate, loaddate))

	#
	# allele references
	#
	allReferences = references.split('||')
	for reference in allReferences:
		refType, refID = reference.split('|')
		refKey = loadlib.verifyReference(refID, lineNum, errorFile)

		if refType == 'Original':
			refAssocTypeKey = 1011
		elif refType == 'Transmission':
			refAssocTypeKey = 1023
		elif refType == 'Molecular':
			refAssocTypeKey = 1012

        	refFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
	    		% (refAssocKey, refKey, alleleKey, mgiTypeKey, refAssocTypeKey, \
	       		createdByKey, createdByKey, loaddate, loaddate))
		refAssocKey = refAssocKey + 1

	#
	# allele subtypes
	#
	allSubtypes = alleleSubtypes.split('|')
	for s in allSubtypes:

		# _vocab_key = 93 (Allele Subtype)
		alleleSubtypeKey = loadlib.verifyTerm('', 93, s, lineNum, errorFile)

        	annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
                	% (annotKey, annotTypeKey, alleleKey, alleleSubtypeKey, \
        			qualifierKey, loaddate, loaddate))
		annotKey = annotKey + 1

        #
        # mutant cell line
        #
        if len(mutantCellLine) > 0:
            addMutantCellLine(alleleKey, mutantCellLine, createdByKey)

        # MGI Accession ID for the allelearker

        accFile.write('%s|%s%d|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \
            % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, alleleKey, mgiTypeKey, \
	       createdByKey, createdByKey, loaddate, loaddate))

	# storing data in MGI_Note/MGI_NoteChunk
	# molecular notes

	mgiNoteSeqNum = 1
	if len(molecularNotes) > 0:

	    noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \
		% (noteKey, alleleKey, mgiNoteObjectKey, mgiMolecularNoteTypeKey, \
		   createdByKey, createdByKey, loaddate, loaddate))

	    noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
		% (noteKey, mgiNoteSeqNum, molecularNotes, createdByKey, createdByKey, loaddate, loaddate))

	    noteKey = noteKey + 1

	# driver notes
	# TR12662/MGI_Relationship._Category_key = 1006
	# removed noteFile code
	# place hodler for MGI_Relationship code
	# the IKMC is the only product using this and IKMC does not add any driver note
	#mgiNoteSeqNum = 1
	#if len(driverNotes) > 0:

	# ikmc notes
	useIKMCnotekey = 0
	if len(ikmcNotes) > 0:

	    noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \
		% (noteKey, alleleKey, mgiNoteObjectKey, mgiIKMCNoteTypeKey, \
		   createdByKey, createdByKey, loaddate, loaddate))

	    noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
		% (noteKey, 1, ikmcNotes, createdByKey, createdByKey, loaddate, loaddate))

	    useIKMCnotekey = noteKey
	    noteKey = noteKey + 1

	# Print out a new text file and attach the new MGI Allele IDs as the last field

	if createdBy == 'ikmc_alleleload':
        	newAlleleFile.write('%s\t%s%s\t%s\n' \
	    	% (mgi_utils.prvalue(ikmcNotes), \
			mgi_utils.prvalue(mgiPrefix), mgi_utils.prvalue(mgiKey), \
			mgi_utils.prvalue(ikmcSymbol)))
	else:
        	newAlleleFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%s\n' \
	    	% (mgi_utils.prvalue(markerID), \
	       	mgi_utils.prvalue(symbol), \
	       	mgi_utils.prvalue(name), \
	       	mgi_utils.prvalue(alleleStatus), \
	       	mgi_utils.prvalue(alleleType), \
	       	mgi_utils.prvalue(alleleSubtype), \
	       	mgi_utils.prvalue(collection), \
	       	mgi_utils.prvalue(germLine), \
	       	mgi_utils.prvalue(references), \
	       	mgi_utils.prvalue(strainOfOrigin), \
	       	mgi_utils.prvalue(mutantCellLine), \
	       	mgi_utils.prvalue(allMutations), \
	       	mgi_utils.prvalue(inheritanceMode), \
	       	mgi_utils.prvalue(isMixed), \
	       	mgi_utils.prvalue(isExtinct), \
	       	mgi_utils.prvalue(refKey), \
	       	mgi_utils.prvalue(markerStatusKey), \
	       	mgi_utils.prvalue(createdBy), \
	       	mgi_utils.prvalue(mgiPrefix), mgi_utils.prvalue(mgiKey)))

	# save symbol/alleleKey/ikmc note key
	alleleLookup[symbol] = []
	alleleLookup[symbol].append((alleleKey, useIKMCnotekey, mgiPrefix + str(mgiKey)))

        accKey = accKey + 1
        mgiKey = mgiKey + 1
        alleleKey = alleleKey + 1

    #	end of "for line in inputFile.readlines():"

    #
    # Update the AccessionMax value
    #

    if not DEBUG:
        db.sql('select * from ACC_setMax(%d)' % (lineNum), None)
	db.commit()
def sanityCheck(biotypeVocab, biotypeTerm, mcvTerms, primaryMCVTerm,
                markerType, lineNum):
    '''
    #
    # requires:
    #
    # effects:
    #
    # returns:
    #   List [] of error messages if sanity check fails
    #   Empty list [] if all sanity checks pass
    #
    '''

    global biotypeVocabKey
    global biotypeTermKey
    global mcvTermKeys
    global markerTypeKey
    global primaryMCVTermKey

    errors = []
    mcvTermKeys = []

    #
    # BioType Vocabularies
    #
    biotypeVocabKey = 0

    if biotypeVocab == 'Ensembl':
        biotypeVocabKey = ENSEMBL_VOCAB_KEY
    elif biotypeVocab == 'NCBI':
        biotypeVocabKey = NCBI_VOCAB_KEY
    elif biotypeVocab == 'MGP':
        biotypeVocabKey = MGP_VOCAB_KEY
    elif biotypeVocab == 'EnsemblR':
        biotypeVocabKey = ENSEMBLREG_VOCAB_KEY
    elif biotypeVocab == 'VISTA':
        biotypeVocabKey = VISTAREG_VOCAB_KEY
    else:
        errors.append(INVALID_VOCAB_ERROR % (lineNum, biotypeVocab))

    # Lookup the biotype _term_key for this vocab/term
    if biotypeVocabKey:
        biotypeTermKey = loadlib.verifyTerm('', biotypeVocabKey, biotypeTerm,
                                            lineNum, errorFile)
        if biotypeTermKey == 0:
            errors.append(INVALID_BIOTYPE_TERM_ERROR %
                          (lineNum, biotypeTerm, biotypeVocab))

    # lookup the _marker_type_key
    markerTypeKey = loadlib.verifyMarkerType(markerType, lineNum, errorFile)
    if markerTypeKey == 0:
        errors.append(INVALID_MARKER_TYPE_ERROR % (lineNum, markerType))

    #
    # mcv/feature types
    #
    tokens = mcvTerms.split('|')
    for r in tokens:
        t = loadlib.verifyTerm('', MCV_VOCAB_KEY, r, lineNum, errorFile)
        if t == 0:
            errors.append(INVALID_MCV_TERM_ERROR % (lineNum, r))
        else:
            mcvTermKeys.append(t)

    # lookup the primary feature type
    primaryMCVTermKey = loadlib.verifyTerm('', MCV_VOCAB_KEY, primaryMCVTerm,
                                           lineNum, errorFile)
    if primaryMCVTermKey == 0:
        errors.append(INVALID_MARKER_TYPE_ERROR % (lineNum, primaryMCVTerm))

    return errors
Exemple #3
0
def processFile():
    # Purpose: Read the input file, resolve values to keys. Create bcp files
    # Returns: 1 if error,  else 0
    # Assumes: file descriptors have been initialized
    # Effects: exits if the line does not have 15 columns
    # Throws: Nothing

    global alleleKey, refAssocKey, accKey, noteKey, mgiKey, annotKey
    global alleleLookup, alleleMutationKey

    lineNum = 0
    # For each line in the input file

    for line in fpInputFile.readlines():

        error = 0
        lineNum = lineNum + 1
        print('%s: %s' % (lineNum, line))
        # Split the line into tokens
        tokens = line[:-1].split('\t')
        try:
            markerID = tokens[0]
            markerSymbol = tokens[1]
            mutationType = tokens[2]  # IMPC allele type
            description = tokens[3]
            colonyID = tokens[4]
            strainOfOrigin = tokens[5]
            alleleSymbol = tokens[6]
            alleleName = tokens[7]
            inheritanceMode = tokens[8]
            alleleType = tokens[9]  # IMPC allele class
            alleleSubType = tokens[10]
            alleleStatus = tokens[11]
            transmission = tokens[12]
            collection = tokens[13]
            jNum = tokens[14]
            createdBy = tokens[15]

        except:
            print('exiting with invalid line')
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        print('validating data and getting keys')
        # marker key
        markerKey = loadlib.verifyMarker(markerID, lineNum, fpErrorFile)

        # _vocab_key = 36 (Allele Molecular Mutation)
        mutationList = str.split(mutationType, ';')
        if len(mutationList) > 1:
            print('mutationList: %s' % mutationList)
        mutationKeyList = []
        for m in mutationList:
            mutationKey = loadlib.verifyTerm('', 36, m, lineNum, fpErrorFile)
            if mutationKey != 0:
                mutationKeyList.append(mutationKey)
        if len(mutationKeyList) > 1:
            print('mutationKeyList: %s' % mutationKeyList)
        # strains
        strainOfOriginKey = sourceloadlib.verifyStrain(strainOfOrigin, lineNum,
                                                       fpErrorFile)

        # _vocab_key = 35 (Allele Inheritance Mode)
        inheritanceModeKey = loadlib.verifyTerm('', 35, inheritanceMode,
                                                lineNum, fpErrorFile)

        # _vocab_key = 38 (Allele Type)
        alleleTypeKey = loadlib.verifyTerm('', 38, alleleType, lineNum,
                                           fpErrorFile)

        # _vocab_key = 93 (Allele Subtype)
        subTypeList = str.split(alleleSubType, ';')
        if len(subTypeList) > 1:
            print('subTypeList: %s' % subTypeList)
        subTypeKeyList = []
        for s in subTypeList:
            if s != '':  # if we have a subtype, get it's key
                subTypeKey = loadlib.verifyTerm('', 93, s, lineNum,
                                                fpErrorFile)
                if subTypeKey != 0:
                    subTypeKeyList.append(subTypeKey)
        if len(subTypeKeyList) > 1:
            print('subTypeKeyList: %s' % subTypeKeyList)

        # _vocab_key = 37 (Allele Status)
        alleleStatusKey = loadlib.verifyTerm('', 37, alleleStatus, lineNum,
                                             fpErrorFile)

        # _vocab_key = 61 (Allele Transmission)
        transmissionKey = loadlib.verifyTerm('', 61, transmission, lineNum,
                                             fpErrorFile)

        # _vocab_key = 92
        collectionKey = loadlib.verifyTerm('', 92, collection, lineNum,
                                           fpErrorFile)

        # _vocab_key = 73 (Marker-Allele Association Status)
        # _term_key = 4268545 (Curated)
        markerStatusKey = 4268545

        # reference
        refKey = loadlib.verifyReference(jNum, lineNum, fpErrorFile)

        # creator
        createdByKey = loadlib.verifyUser(createdBy, lineNum, fpErrorFile)
        if createdByKey == 0:
            continue

        print('checking for missing data')
        # if errors, continue to next record
        # errors are stored (via loadlib) in the .error log
        if markerKey == 0 \
                or mutationKeyList == [] \
                or strainOfOriginKey == 0 \
                or inheritanceModeKey == 0 \
                or alleleTypeKey == 0 \
                or alleleStatusKey == 0 \
                or transmissionKey == 0 \
                or collectionKey == 0 \
                or refKey == 0 \
                or createdByKey == 0:
            print('missing data, skipping this line')
            continue

        # if no errors, process the allele
        print('writing to allele file')
        # allele (isWildType = 0)
        fpAlleleFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|0|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (alleleKey, markerKey, strainOfOriginKey, inheritanceModeKey, alleleTypeKey, \
            alleleStatusKey, transmissionKey, collectionKey, alleleSymbol, alleleName, \
            isExtinct, isMixed, refKey, markerStatusKey, \
            createdByKey, createdByKey, createdByKey, loaddate, loaddate, loaddate))

        # molecular mutation
        for mutationKey in mutationKeyList:
            fpMutationFile.write('%s|%s|%s|%s|%s\n' \
                % (alleleMutationKey, alleleKey, mutationKey, loaddate, loaddate))
            alleleMutationKey += 1

        # reference associations

        # Original
        fpRefFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (refAssocKey, refKey, alleleKey, mgiTypeKey, origRefTypeKey, \
                        createdByKey, createdByKey, loaddate, loaddate))
        refAssocKey = refAssocKey + 1

        # Molecular
        fpRefFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (refAssocKey, refKey, alleleKey, mgiTypeKey, molRefTypeKey, \
                        createdByKey, createdByKey, loaddate, loaddate))
        refAssocKey = refAssocKey + 1

        # allele subtype
        for subTypeKey in subTypeKeyList:
            fpAnnotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
                    % (annotKey, annotTypeKey, alleleKey, subTypeKey, \
                            qualifierKey, loaddate, loaddate))
            annotKey = annotKey + 1

        # MGI Accession ID for the allele
        alleleID = '%s%s' % (mgiPrefix, mgiKey)
        fpAccFile.write('%s|%s|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \
            % (accKey, alleleID, mgiPrefix, mgiKey, alleleKey, mgiTypeKey, \
               createdByKey, createdByKey, loaddate, loaddate))

        # storing data in MGI_Note
        # molecular note

        fpNoteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (noteKey, alleleKey, mgiTypeKey, molecularNoteTypeKey, description,\
               createdByKey, createdByKey, loaddate, loaddate))

        noteKey = noteKey + 1

        # colony ID note
        fpNoteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (noteKey, alleleKey, mgiTypeKey, colonyIdNoteTypeKey, colonyID, \
               createdByKey, createdByKey, loaddate, loaddate))

        noteKey = noteKey + 1

        # Print out a new text file and attach the new MGI Allele IDs
        # as the last field

        fpNewAlleleRptFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' \
        % (mgi_utils.prvalue(alleleID), \
        mgi_utils.prvalue(alleleSymbol), \
        mgi_utils.prvalue(alleleName), \
        mgi_utils.prvalue(markerID), \
        mgi_utils.prvalue(markerSymbol), \
        mgi_utils.prvalue(colonyID)))

        accKey = accKey + 1
        mgiKey = mgiKey + 1
        alleleKey = alleleKey + 1

    #
    # Update the AccessionMax value
    #
    print('DEBUG: %s' % DEBUG)
    if DEBUG == 'false':
        db.sql('select * from ACC_setMax(%d)' % (lineNum), None)
        db.commit()

    return 0
Exemple #4
0
def processFile():

    global lineNum
    global strainKey, strainmarkerKey, accKey, mgiKey, annotKey, noteKey

    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = line[:-1].split('\t')

        try:
            id = tokens[0]
            externalPrefix = id
            externalNumeric = ''
            #(externalPrefix, externalNumeric) = id.split(':')
            name = tokens[1]
            alleleIDs = tokens[2]
            strainType = tokens[3]
            species = tokens[4]
            isStandard = tokens[5]
            sooNote = tokens[6]
            externalLDB = tokens[7]
            externalTypeKey = tokens[8]
            annotations = tokens[9]
            createdBy = tokens[10]
            mutantNote = tokens[11]
            isPrivate = tokens[12]
            impcColonyNote = tokens[13]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        strainExistKey = verifyStrain(name, lineNum)
        strainTypeKey = verifyStrainType(strainType, lineNum)
        speciesKey = verifySpecies(species, lineNum)
        createdByKey = loadlib.verifyUser(createdBy, 0, errorFile)

        if strainExistKey > 0 or strainTypeKey == 0 or speciesKey == 0 or createdByKey == 0:
            # set error flag to true
            error = 1

        # if errors, continue to next record
        if error:
            continue

        # if no errors, process

        strainFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (strainKey, speciesKey, strainTypeKey, name, isStandard, isPrivate, isGeneticBackground,
	       createdByKey, createdByKey, cdate, cdate))

	# if Allele found, resolve to Marker

        if len(alleleIDs) > 0:
            allAlleles = alleleIDs.split('|')
            for a in allAlleles:
                alleleKey = loadlib.verifyObject(a, alleleTypeKey, None, lineNum, errorFile)
                if alleleKey == 0:
                    continue
                if alleleKey == None:
                    continue
                results = db.sql('select _Marker_key from ALL_Allele where _Allele_key = %s' % (alleleKey),  'auto')
                markerKey = results[0]['_Marker_key']
                if markerKey != None:
                    markerFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
                    % (strainmarkerKey, strainKey, markerKey, alleleKey, qualifierKey, 
                    createdByKey, createdByKey, cdate, cdate))
                else:
                    markerFile.write('%s|%s||%s|%s|%s|%s|%s|%s\n' \
                    % (strainmarkerKey, strainKey, alleleKey, qualifierKey, 
                    createdByKey, createdByKey, cdate, cdate))
                strainmarkerKey = strainmarkerKey + 1

        # MGI Accession ID for all strain
        # all private = 0 (false)

        accFile.write('%d|%s%d|%s|%s|1|%d|%d|%s|1|%s|%s|%s|%s\n' \
                % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, strainKey, mgiTypeKey, 
                isPrivate, createdByKey, createdByKey, cdate, cdate))
        accKey = accKey + 1

        # external accession id
        # % (accKey, id, '', id, externalLDB, strainKey, externalTypeKey, 
        #for ids that contain prefix:numeric
        accFile.write('%d|%s|%s|%s|%s|%s|%s|0|1|%s|%s|%s|%s\n' \
          % (accKey, id, externalPrefix, externalNumeric, externalLDB, strainKey, externalTypeKey, 
             createdByKey, createdByKey, cdate, cdate))
        accKey = accKey + 1

        # storing data in MGI_Note
        # Strain of Origin Note

        if len(sooNote) > 0:

            noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, strainKey, mgiNoteObjectKey, mgiStrainOriginTypeKey, sooNote, \
                   createdByKey, createdByKey, cdate, cdate))

            noteKey = noteKey + 1

        # storing data in MGI_Note
        # Mutant Cell Line of Origin Note

        if len(mutantNote) > 0:

            noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, strainKey, mgiNoteObjectKey, mgiMutantOriginTypeKey, mutantNote, \
                   createdByKey, createdByKey, cdate, cdate))

            noteKey = noteKey + 1

        # storing data in MGI_Note
        # IMPC Colony Note

        if len(impcColonyNote) > 0:

            noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, strainKey, mgiNoteObjectKey, mgiIMPCColonyTypeKey, impcColonyNote, \
                   createdByKey, createdByKey, cdate, cdate))

            noteKey = noteKey + 1

        #
        # Annotations
        #
        # _AnnotType_key = 1009
        # _Qualifier_ke = 1614158
        #

        if len(annotations) > 0:
            annotations = annotations.split('|')
            for a in annotations:

                # strain annotation type
                annotTypeKey = 1009

                # this is a null qualifier key
                annotQualifierKey = 1614158

                annotTermKey = loadlib.verifyTerm('', 27, a, lineNum, errorFile)
                if annotTermKey == 0:
                    continue
    
                annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
                  % (annotKey, annotTypeKey, strainKey, annotTermKey, annotQualifierKey, cdate, cdate))
                annotKey = annotKey + 1

        mgiKey = mgiKey + 1
        strainKey = strainKey + 1
Exemple #5
0
def processFile():
    '''
	# requires:
	#
	# effects:
	#	Reads input file
	#	Verifies and Processes each line in the input file
	#
	# returns:
	#	nothing
	#
	'''

    results = db.sql(
        'select maxKey = max(_Translation_key) + 1 from MGI_Translation',
        'auto')
    transKey = results[0]['maxKey']
    if transKey is None:
        transKey = 1000

    lineNum = 0

    # sequence number of bad name in translation list
    seq = 1

    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
            objectID = tokens[0]
            objectDescription = tokens[1]
            term = tokens[2]
            userID = tokens[3]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))
            continue

        if vocabKey > 0:
            objectKey = loadlib.verifyTerm(objectID, vocabKey,
                                           objectDescription, lineNum,
                                           errorFile)
        else:
            objectKey = loadlib.verifyObject(objectID, mgiTypeKey,
                                             objectDescription, lineNum,
                                             errorFile)

        userKey = loadlib.verifyUser(userID, lineNum, errorFile)

        if objectKey == 0 or userKey == 0:
            # set error flag to true
            error = 1

        # if errors, continue to next record
        if error:
            continue

        # if no errors, process

        # add term to translation file
        bcpWrite(transFile, [
            transKey, transTypeKey, objectKey, term, seq, userKey, userKey,
            loaddate, loaddate
        ])
        transKey = transKey + 1
        seq = seq + 1


#	end of "for line in inputFile.readlines():"

    if newTransType:
        bcpWrite(transTypeFile, [
            transTypeKey, mgiTypeKey, vocabKey, transTypeName,
            transCompression, 0, userKey, userKey, loaddate, loaddate
        ])
Exemple #6
0
def processImageFile():

    global imageKey, accKey, mgiKey
    global imagePix
    global referenceKey

    lineNum = 0

    # For each line in the input file

    for line in inImageFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
	    jnum = tokens[0]
	    fullsizeKey = tokens[1]
	    imageClass = tokens[2]
	    pixID = tokens[3]
	    xdim = tokens[4]
	    ydim = tokens[5]
	    figureLabel = tokens[6]
	    copyrightNote = tokens[7]
	    imageNote = tokens[8]
	    imageInfo = tokens[9]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        imageClassKey = loadlib.verifyTerm('', imageVocabClassKey, imageClass, lineNum, errorFile)
        if imageClassKey == 0:
            error = 1

        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
        if referenceKey == 0:
            error = 1

        # if errors, continue to next record
        if error:
            continue

        # if no errors, process

	imageTypeKey = FSimageTypeKey

        outImageFile.write(str(imageKey) + TAB + \
	    str(gxdMgiTypeKey) + TAB + \
	    str(imageClassKey) + TAB + \
	    str(imageTypeKey) + TAB + \
	    str(referenceKey) + TAB + \
	    TAB + \
	    xdim + TAB + \
	    ydim + TAB + \
	    figureLabel + TAB + \
	    str(createdByKey) + TAB + \
	    str(createdByKey) + TAB + \
	    loaddate + TAB + loaddate + CRT)

        # MGI Accession ID for the image

	mgiAccID = mgiPrefix + str(mgiKey)

	outAccFile.write(str(accKey) + TAB + \
	    mgiPrefix + str(mgiKey) + TAB + \
	    mgiPrefix + TAB + \
	    str(mgiKey) + TAB + \
	    accLogicalDBKey + TAB + \
	    str(imageKey) + TAB + \
	    imageMgiTypeKey + TAB + \
	    accPrivate + TAB + \
	    accPreferred + TAB + \
	    str(createdByKey) + TAB + \
	    str(createdByKey) + TAB + \
	    loaddate + TAB + loaddate + CRT)

        accKey = accKey + 1
        mgiKey = mgiKey + 1

	if pixID.find('GUDMAP') < 0 and len(pixID) > 0:
	    outAccFile.write(str(accKey) + TAB + \
	        pixPrefix + str(pixID) + TAB + \
	        pixPrefix + TAB + \
	        pixID + TAB + \
	        pixLogicalDBKey + TAB + \
	        str(imageKey) + TAB + \
	        imageMgiTypeKey + TAB + \
	        pixPrivate + TAB + \
	        accPreferred + TAB + \
	        str(createdByKey) + TAB + \
	        str(createdByKey) + TAB + \
	        loaddate + TAB + loaddate + CRT)
    
            accKey = accKey + 1

	if len(imageInfo) > 0:

	    imageLogicalDBKey, imageID = imageInfo.split('|')

	    outAccFile.write(str(accKey) + TAB + \
	        imageID + TAB + \
	        imageID + TAB + \
	        TAB + \
	        imageLogicalDBKey + TAB + \
	        str(imageKey) + TAB + \
	        imageMgiTypeKey + TAB + \
	        accPrivate + TAB + \
	        accPreferred + TAB + \
	        str(createdByKey) + TAB + \
	        str(createdByKey) + TAB + \
	        loaddate + TAB + loaddate + CRT)
    
            accKey = accKey + 1

	# Copyrights

	if len(copyrightNote) > 0:
            outCopyrightFile.write(mgiAccID + TAB + copyrightNote + CRT)

	# Notes

	if len(imageNote) > 0:
            outCaptionFile.write(mgiAccID + TAB + imageNote + CRT)

	imagePix[pixID] = imageKey
        imageKey = imageKey + 1

    #	end of "for line in inImageFile.readlines():"

    return lineNum
Exemple #7
0
def processFile():

    global strainKey, strainmarkerKey, accKey, mgiKey, annotKey, noteKey

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = line[:-1].split('\t')

        try:
	    id = tokens[0]
	    externalPrefix = id
	    externalNumeric = ''
	    #(externalPrefix, externalNumeric) = id.split(':')
	    name = tokens[1]
	    alleleIDs = tokens[2]
	    strainType = tokens[3]
	    species = tokens[4]
	    isStandard = tokens[5]
	    sooNote = tokens[6]
	    externalLDB = tokens[7]
            externalTypeKey = tokens[8]
	    annotations = tokens[9]
	    createdBy = tokens[10]
	    mutantNote = tokens[11]
	    isPrivate = tokens[12]
	    impcColonyNote = tokens[13]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

	strainExistKey = verifyStrain(name, lineNum)
	strainTypeKey = verifyStrainType(strainType, lineNum)
	speciesKey = verifySpecies(species, lineNum)
	createdByKey = loadlib.verifyUser(createdBy, 0, errorFile)

        if strainExistKey > 0 or strainTypeKey == 0 or speciesKey == 0 or createdByKey == 0:
            # set error flag to true
            error = 1

        # if errors, continue to next record
        if error:
            continue

        # if no errors, process

        strainFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (strainKey, speciesKey, strainTypeKey, name, isStandard, isPrivate, isGeneticBackground,
	       createdByKey, createdByKey, cdate, cdate))

	# if Allele found, resolve to Marker

	if len(alleleIDs) > 0:
	    allAlleles = alleleIDs.split('|')
	    for a in allAlleles:
		alleleKey = loadlib.verifyObject(a, alleleTypeKey, None, lineNum, errorFile)
		if alleleKey == 0:
		    continue
	    	results = db.sql('select _Marker_key from ALL_Allele where _Allele_key = %s' % (alleleKey),  'auto')
		markerKey = results[0]['_Marker_key']

		markerFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
	    		% (strainmarkerKey, strainKey, markerKey, alleleKey, qualifierKey, 
	       		createdByKey, createdByKey, cdate, cdate))
		strainmarkerKey = strainmarkerKey + 1

        # MGI Accession ID for all strain

        accFile.write('%d|%s%d|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \
        	% (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, strainKey, mgiTypeKey, 
		createdByKey, createdByKey, cdate, cdate))
        accKey = accKey + 1

        # external accession id
        # % (accKey, id, '', id, externalLDB, strainKey, externalTypeKey, 
	#for ids that contain prefix:numeric
        accFile.write('%d|%s|%s|%s|%s|%s|%s|0|1|%s|%s|%s|%s\n' \
          % (accKey, id, externalPrefix, externalNumeric, externalLDB, strainKey, externalTypeKey, 
	     createdByKey, createdByKey, cdate, cdate))
        accKey = accKey + 1

        # storing data in MGI_Note/MGI_NoteChunk
        # Strain of Origin Note

        if len(sooNote) > 0:

            noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, strainKey, mgiNoteObjectKey, mgiStrainOriginTypeKey, \
                   createdByKey, createdByKey, cdate, cdate))

            noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, 1, sooNote, createdByKey, createdByKey, cdate, cdate))

            noteKey = noteKey + 1

        # storing data in MGI_Note/MGI_NoteChunk
        # Mutant Cell Line of Origin Note

        if len(mutantNote) > 0:

            noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, strainKey, mgiNoteObjectKey, mgiMutantOriginTypeKey, \
                   createdByKey, createdByKey, cdate, cdate))

            if len(mutantNote) > 0:
                noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
                    % (noteKey, 1, mutantNote, createdByKey, createdByKey, cdate, cdate))

            noteKey = noteKey + 1

        # storing data in MGI_Note/MGI_NoteChunk
        # IMPC Colony Note

        if len(impcColonyNote) > 0:

            noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, strainKey, mgiNoteObjectKey, mgiIMPCColonyTypeKey, \
                   createdByKey, createdByKey, cdate, cdate))

            noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, 1, sooNote, createdByKey, createdByKey, cdate, cdate))

            noteKey = noteKey + 1

	#
        # Annotations
        #
	# _AnnotType_key = 1009
	# _Qualifier_ke = 1614158
	#

	if len(annotations) > 0:
	    annotations = annotations.split('|')
	    for a in annotations:

	        # strain annotation type
	        annotTypeKey = 1009

	        # this is a null qualifier key
	        annotQualifierKey = 1614158

	        annotTermKey = loadlib.verifyTerm('', 27, a, lineNum, errorFile)
	        if annotTermKey == 0:
		    continue
    
                annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
                  % (annotKey, annotTypeKey, strainKey, annotTermKey, annotQualifierKey, cdate, cdate))
                annotKey = annotKey + 1

        mgiKey = mgiKey + 1
        strainKey = strainKey + 1

    #	end of "for line in inputFile.readlines():"

    #
    # Update the AccessionMax value
    #

    db.sql('select * from ACC_setMax (%d)' % (lineNum), None)
    db.commit()

    # update prb_strain_marker_seq auto-sequence
    db.sql(''' select setval('prb_strain_marker_seq', (select max(_StrainMarker_key) from PRB_Strain_Marker)) ''', None)
    db.commit()

    # update voc_annot_seq auto-sequence
    db.sql(''' select setval('voc_annot_seq', (select max(_Annot_key) from VOC_Annot)) ''', None)
    db.commit()
Exemple #8
0
def processResultsFile(referenceKey):

    global resultKey
    global imagePaneLookup

    prevAssay = 0
    prevSpecimen = 0
    prevResult = 0
    lineNum = 0

    #
    # build imagePaneLookup lookup of figure label|pane label keys
    # J:226028/227123
    #
    results = db.sql('''
	select i.figureLabel, p.paneLabel, p._ImagePane_key 
	from IMG_Image i, IMG_ImagePane p 
	where i._Image_key = p._Image_key
	and i._Refs_key = %s
    	''' % (referenceKey), 'auto')
    for r in results:
	paneLabel = r['paneLabel']
	if paneLabel == None:
	    paneLabel = ''
	key = r['figureLabel'] + '|' + paneLabel
    	value =  r['_ImagePane_key']
	imagePaneLookup[key] = []
	imagePaneLookup[key].append(value)
    #print imagePaneLookup['Zmiz1_b41_E11.5b_JL|']

    # For each line in the input file

    for line in inResultsFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], TAB)

        try:
	    assayID = tokens[0]
	    specimenID = tokens[1]
	    resultID = tokens[2]
	    strength = tokens[3]
	    pattern = tokens[4]
	    emapaID = tokens[5]
	    structureTS = tokens[6]
	    resultNote = tokens[7]
	    imagePanes = tokens[8]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

	strengthKey = gxdloadlib.verifyStrength(strength, lineNum, errorFile)
	patternKey = gxdloadlib.verifyPattern(pattern, lineNum, errorFile)

	structureKey = loadlib.verifyTerm(emapaID, 90, '', lineNum, errorFile)

        if strengthKey == 0 or patternKey == 0 or structureKey == 0:
            # set error flag to true
            error = 1

        # if errors, continue to next record
        if error:
            continue

        # if no errors, process

	key = '%s:%s' % (assayID, specimenID)

	if not assaySpecimen.has_key(key):
	    errorFile.write('Cannot find Assay:Speciman key "%s"\n' % (key))
	    errorFile.write(str(tokens) + '\n\n')
	    continue

	specimenKey = assaySpecimen[key]

	if prevAssay != assayID:
	    prevSpecimen = 0

        if prevSpecimen != specimenKey:
	    prevResult = 0

	if prevResult != resultID:

            resultKey = resultKey + 1

            outResultFile.write(
	        str(resultKey) + TAB + \
	        str(specimenKey) + TAB + \
	        str(strengthKey) + TAB + \
	        str(patternKey) + TAB + \
	        resultID + TAB + \
	        mgi_utils.prvalue(resultNote) + TAB + \
	        loaddate + TAB + loaddate + CRT)

            for image in string.split(imagePanes,','):
		if image in imagePaneLookup:
		    imageKey = imagePaneLookup[image][0]
                    outResultImageFile.write(str(resultKey) + TAB + \
					str(imageKey) + TAB + \
	        			loaddate + TAB + loaddate + CRT)
		#else:
		    #print image

	outResultStFile.write(
	    str(resultKey) + TAB + \
	    str(structureKey) + TAB + \
	    str(structureTS) + TAB + \
	    loaddate + TAB + loaddate + CRT)

	prevAssay = assayID
	prevSpecimen = specimenKey
	prevResult = resultID

    #	end of "for line in inResultsFile.readlines():"

    return
Exemple #9
0
def processFile():

    global strainKey, strainmarkerKey, accKey, mgiKey, annotKey, noteKey

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        lineNum = lineNum + 1
	#print line
        # Split the line into tokens
        tokens = line[:-1].split('\t')

        try:
	    name = tokens[0]
	    alleleIDs = tokens[1]
	    strainType = tokens[2]
	    species = tokens[3]
	    isStandard = tokens[4]
	    createdBy = tokens[5]
	    mutantNote = tokens[6]
	    colonyNote = tokens[7]
	    annotations = tokens[8].split('|')
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

	strainExistKey = verifyStrain(name, lineNum)
	strainTypeKey = verifyStrainType(strainType, lineNum)
	speciesKey = verifySpecies(species, lineNum)
	createdByKey = loadlib.verifyUser(createdBy, 0, errorFile)

	# if the strain exist, but with no colony id note, create one
	if strainExistKey > 0:
	    print 'strain in database checking colony note : %s' % line
	    if (not checkColonyNote(strainExistKey) ):
		#print 'colony note not in the database: %s' % colonyNote
		createNote(strainExistKey, colonyNote, mgiColonyNoteTypeKey, createdByKey)
	    else:
		print 'colony note in database: %s'  % colonyNote
	    continue
	else: 
	    print 'strain not in database : %s' % line

	# if strain does not exist and  verification failed on strain type, 
	# species or createdBy, skip the record
        if strainTypeKey == 0 or speciesKey == 0 \
		or createdByKey == 0:
	    #print 'verification failed on strain type, species or createdBy: %s %s %s ' % (strainTypeKey, speciesKey, createdByKey)
            continue

        # if no errors, process
        strainFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (strainKey, speciesKey, strainTypeKey, name, isStandard, 
		isPrivate, isGeneticBackground, createdByKey, createdByKey, 
		    cdate, cdate))

	# if Allele found, resolve to Marker
	allAlleles = alleleIDs.split('|')

	for a in allAlleles:
		alleleKey = loadlib.verifyObject(a, alleleTypeKey, None, lineNum, errorFile)
		#print 'makeStrains.py allele: %s marker key: %s' % (a, alleleKey)
	    	results = db.sql('select _Marker_key from ALL_Allele where _Allele_key = %s' % (alleleKey),  'auto')
		markerKey = results[0]['_Marker_key']

		markerFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
		    % (strainmarkerKey, strainKey, markerKey, alleleKey, 
			qualifierKey, createdByKey, createdByKey, cdate, cdate))
		strainmarkerKey = strainmarkerKey + 1

        # MGI Accession ID for the strain
	if isStandard == '1':
	    accFile.write('%d|%s%d|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \
	    % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, strainKey, mgiTypeKey, 
	       createdByKey, createdByKey, cdate, cdate))
	    accKey = accKey + 1

        # storing data in MGI_Note/MGI_NoteChunk
        # Colony ID Note

        if len(colonyNote) > 0:
	    createNote(strainKey, colonyNote, mgiColonyNoteTypeKey, createdByKey)

        # storing data in MGI_Note/MGI_NoteChunk
        # Mutant Cell Line of Origin Note
        if len(mutantNote) > 0:
	    createNote(strainKey, mutantNote, mgiMutOrigNoteTypeKey, createdByKey)

	#
        # Annotations
        #
	# _AnnotType_key = 1009 =  "Strain/Attributes"
	# _Qualifier_key = 1614158 =  null
	#

	for a in annotations:

	    # strain annotation type
	    annotTypeKey = 1009

	    # this is a null qualifier key
	    annotQualifierKey = 1614158

	    annotTermKey = loadlib.verifyTerm('', 27, a, lineNum, errorFile)
	    if annotTermKey == 0:
		continue

            annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
              % (annotKey, annotTypeKey, strainKey, annotTermKey, annotQualifierKey, cdate, cdate))
            annotKey = annotKey + 1

        mgiKey = mgiKey + 1
        strainKey = strainKey + 1

    #	end of "for line in inputFile.readlines():"

    #
    # Update the AccessionMax value
    #

    if not DEBUG:
        db.sql('select * from ACC_setMax (%d)' % (lineNum), None)
def processFile():
	'''
	# requires:
	#
	# effects:
	#	Reads input file
	#	Verifies and Processes each line in the input file
	#
	# returns:
	#	nothing
	#
	'''

	results = db.sql('select maxKey = max(_Translation_key) + 1 from MGI_Translation', 'auto')
	transKey = results[0]['maxKey']
	if transKey is None:
		transKey = 1000

	lineNum = 0

	# sequence number of bad name in translation list
	seq = 1

	# For each line in the input file

	for line in inputFile.readlines():

		error = 0
		lineNum = lineNum + 1

		# Split the line into tokens
		tokens = string.split(line[:-1], '\t')

		try:
			objectID = tokens[0]
			objectDescription = tokens[1]
			term = tokens[2]
			userID = tokens[3]
		except:
			exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))
			continue

		if vocabKey > 0:
		    objectKey = loadlib.verifyTerm(objectID, vocabKey, objectDescription, lineNum, errorFile)
		else:
		    objectKey = loadlib.verifyObject(objectID, mgiTypeKey, objectDescription, lineNum, errorFile)

		userKey = loadlib.verifyUser(userID, lineNum, errorFile)

		if objectKey == 0 or userKey == 0:
			# set error flag to true
			error = 1

		# if errors, continue to next record
		if error:
			continue

		# if no errors, process

		# add term to translation file
		bcpWrite(transFile, [transKey, transTypeKey, objectKey, term, seq, userKey, userKey, loaddate, loaddate])
		transKey = transKey + 1
		seq = seq + 1

#	end of "for line in inputFile.readlines():"

	if newTransType:
		bcpWrite(transTypeFile, [transTypeKey, mgiTypeKey, vocabKey, transTypeName, transCompression, 0, userKey, userKey, loaddate, loaddate])
Exemple #11
0
def processFile():

    global alleleKey, refAssocKey, accKey, noteKey, mgiKey, annotKey, mutationKey
    global alleleLookup

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = line[:-1].split('\t')
        #print line
        try:
            markerID = tokens[0]
            symbol = tokens[1]
            name = tokens[2]
            alleleStatus = tokens[3]
            alleleType = tokens[4]
            alleleSubtypes = tokens[5]
            collectionKey = tokens[6]
            germLine = tokens[7]
            references = tokens[8]
            strainOfOrigin = tokens[9]
            mutantCellLine = tokens[10]
            molecularNotes = tokens[11]
            driverNotes = tokens[12]
            ikmcNotes = tokens[13]
            mutations = tokens[14]
            inheritanceMode = tokens[15]
            isMixed = tokens[16]
            isExtinct = tokens[17]
            createdBy = tokens[18]
            createMCL = tokens[19]
            createNote = tokens[20]
            setStatus = tokens[21]
            existingAlleleID = tokens[22]
            ikmcSymbol = tokens[23]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        # creator
        createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)
        if createdByKey == 0:
            continue

        # processing for IKMC-only
        if len(createMCL) > 0 or len(createNote) > 0 or len(setStatus) > 0:
            processFileIKMC(createMCL, createNote, setStatus, \
                    symbol, ikmcSymbol, mutantCellLine, ikmcNotes, \
                    createdByKey, existingAlleleID)
            continue

        # marker key
        markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile)

        # hard-coded
        # _vocab_key = 73 (Marker-Allele Association Status)
        # _term_key = 4268545 (Curated)
        markerStatusKey = 4268545

        # _vocab_key = 37 (Allele Status)
        alleleStatusKey = loadlib.verifyTerm('', 37, alleleStatus, lineNum,
                                             errorFile)

        # _vocab_key = 38 (Allele Type)
        alleleTypeKey = loadlib.verifyTerm('', 38, alleleType, lineNum,
                                           errorFile)

        # _vocab_key = 61 (Allele Transmission)
        germLineKey = loadlib.verifyTerm('', 61, germLine, lineNum, errorFile)

        # _vocab_key = 36 (Allele Molecular Mutation)
        allMutations = mutations.split('|')

        # _vocab_key = 35 (Allele Status)
        inheritanceModeKey = loadlib.verifyTerm('', 35, inheritanceMode,
                                                lineNum, errorFile)

        # strains
        strainOfOriginKey = sourceloadlib.verifyStrain(strainOfOrigin, lineNum,
                                                       errorFile)

        # reference
        refKey = loadlib.verifyReference(jnum, lineNum, errorFile)

        # if errors, continue to next record
        # errors are stored (via loadlib) in the .error log

        if markerKey == 0 \
                or markerStatusKey == 0 \
                or alleleStatusKey == 0 \
                or alleleTypeKey == 0 \
                or germLineKey == 0 \
                or allMutations == 0 \
                or inheritanceModeKey == 0 \
                or strainOfOriginKey == 0 \
                or refKey == 0 \
                or createdByKey == 0:
            continue

        # if no errors, process the allele

        # not specified/testing
        #collectionKey = 11025586

        # allele (master)
        alleleFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|0|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (alleleKey, markerKey, strainOfOriginKey, inheritanceModeKey, alleleTypeKey, \
            alleleStatusKey, germLineKey, collectionKey, symbol, name, \
            isExtinct, isMixed, refKey, markerStatusKey, \
            createdByKey, createdByKey, createdByKey, loaddate, loaddate, loaddate))

        # molecular mutation
        for mutation in allMutations:
            mutationTermKey = loadlib.verifyTerm('', 36, mutation, lineNum,
                                                 errorFile)
            mutationFile.write('%s|%s|%s|%s|%s\n' \
            % (mutationKey, alleleKey, mutationTermKey, loaddate, loaddate))
            mutationKey = mutationKey + 1

        #
        # allele references
        #
        allReferences = references.split('||')
        for reference in allReferences:
            refType, refID = reference.split('|')
            refKey = loadlib.verifyReference(refID, lineNum, errorFile)

            if refType == 'Original':
                refAssocTypeKey = 1011
            elif refType == 'Transmission':
                refAssocTypeKey = 1023
            elif refType == 'Molecular':
                refAssocTypeKey = 1012

            refFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
                    % (refAssocKey, refKey, alleleKey, mgiTypeKey, refAssocTypeKey, \
                    createdByKey, createdByKey, loaddate, loaddate))
            refAssocKey = refAssocKey + 1

        #
        # allele subtypes
        #
        allSubtypes = alleleSubtypes.split('|')
        for s in allSubtypes:

            # _vocab_key = 93 (Allele Subtype)
            alleleSubtypeKey = loadlib.verifyTerm('', 93, s, lineNum,
                                                  errorFile)

            annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
                    % (annotKey, annotTypeKey, alleleKey, alleleSubtypeKey, \
                            qualifierKey, loaddate, loaddate))
            annotKey = annotKey + 1

        #
        # mutant cell line
        #
        if len(mutantCellLine) > 0:
            addMutantCellLine(alleleKey, mutantCellLine, createdByKey)

        # MGI Accession ID for the allelearker

        accFile.write('%s|%s%d|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \
            % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, alleleKey, mgiTypeKey, \
               createdByKey, createdByKey, loaddate, loaddate))

        # storing data in MGI_Note
        # molecular notes

        mgiNoteSeqNum = 1
        if len(molecularNotes) > 0:

            noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, alleleKey, mgiNoteObjectKey, mgiMolecularNoteTypeKey, \
                   molecularNotes, createdByKey, createdByKey, loaddate, loaddate))

            noteKey = noteKey + 1

        # driver notes
        # TR12662/MGI_Relationship._Category_key = 1006
        # removed noteFile code
        # place hodler for MGI_Relationship code
        # the IKMC is the only product using this and IKMC does not add any driver note
        #mgiNoteSeqNum = 1
        #if len(driverNotes) > 0:

        # ikmc notes
        useIKMCnotekey = 0
        if len(ikmcNotes) > 0:

            noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, alleleKey, mgiNoteObjectKey, mgiIKMCNoteTypeKey, \
                   ikmcNotes, createdByKey, createdByKey, loaddate, loaddate))

            useIKMCnotekey = noteKey
            noteKey = noteKey + 1

        # Print out a new text file and attach the new MGI Allele IDs as the last field

        if createdBy == 'ikmc_alleleload':
            newAlleleFile.write('%s\t%s%s\t%s\n' \
            % (mgi_utils.prvalue(ikmcNotes), \
                    mgi_utils.prvalue(mgiPrefix), mgi_utils.prvalue(mgiKey), \
                    mgi_utils.prvalue(ikmcSymbol)))
        else:
            newAlleleFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%s\n' \
            % (mgi_utils.prvalue(markerID), \
            mgi_utils.prvalue(symbol), \
            mgi_utils.prvalue(name), \
            mgi_utils.prvalue(alleleStatus), \
            mgi_utils.prvalue(alleleType), \
            mgi_utils.prvalue(alleleSubtype), \
            mgi_utils.prvalue(collection), \
            mgi_utils.prvalue(germLine), \
            mgi_utils.prvalue(references), \
            mgi_utils.prvalue(strainOfOrigin), \
            mgi_utils.prvalue(mutantCellLine), \
            mgi_utils.prvalue(allMutations), \
            mgi_utils.prvalue(inheritanceMode), \
            mgi_utils.prvalue(isMixed), \
            mgi_utils.prvalue(isExtinct), \
            mgi_utils.prvalue(refKey), \
            mgi_utils.prvalue(markerStatusKey), \
            mgi_utils.prvalue(createdBy), \
            mgi_utils.prvalue(mgiPrefix), mgi_utils.prvalue(mgiKey)))

        # save symbol/alleleKey/ikmc note key
        alleleLookup[symbol] = []
        alleleLookup[symbol].append(
            (alleleKey, useIKMCnotekey, mgiPrefix + str(mgiKey)))

        accKey = accKey + 1
        mgiKey = mgiKey + 1
        alleleKey = alleleKey + 1

    #	end of "for line in inputFile.readlines():"

    #
    # Update the AccessionMax value
    #

    if not DEBUG:
        db.sql('select * from ACC_setMax(%d)' % (lineNum), None)
        db.commit()
def sanityCheck(biotypeVocab, biotypeTerm, mcvTerms, primaryMCVTerm, markerType, lineNum):
    '''
    #
    # requires:
    #
    # effects:
    #
    # returns:
    #   List [] of error messages if sanity check fails
    #   Empty list [] if all sanity checks pass
    #
    '''

    global biotypeVocabKey
    global biotypeTermKey
    global mcvTermKeys
    global markerTypeKey
    global primaryMCVTermKey

    errors = []
    mcvTermKeys = []

    #
    # BioType Vocabularies
    #
    biotypeVocabKey = 0

    if biotypeVocab == 'Ensembl':
	biotypeVocabKey = ENSEMBL_VOCAB_KEY
    elif biotypeVocab == 'NCBI':
	biotypeVocabKey = NCBI_VOCAB_KEY
    elif biotypeVocab == 'MGP':
	biotypeVocabKey = MGP_VOCAB_KEY
    else:
        errors.append( INVALID_VOCAB_ERROR % (lineNum, biotypeVocab) )

    # Lookup the biotype _term_key for this vocab/term
    if biotypeVocabKey:
	biotypeTermKey = loadlib.verifyTerm('', biotypeVocabKey, biotypeTerm, lineNum, errorFile)
	if biotypeTermKey == 0:
	    errors.append( INVALID_BIOTYPE_TERM_ERROR % (lineNum, biotypeTerm, biotypeVocab) )

    # lookup the _marker_type_key
    markerTypeKey = loadlib.verifyMarkerType(markerType, lineNum, errorFile)
    if markerTypeKey == 0:
        errors.append( INVALID_MARKER_TYPE_ERROR % (lineNum, markerType) )

    # 
    # mcv/feature types
    #
    tokens = mcvTerms.split('|')
    for r in tokens:
	t = loadlib.verifyTerm('', MCV_VOCAB_KEY, r, lineNum, errorFile)
	if t == 0:
            errors.append( INVALID_MCV_TERM_ERROR % (lineNum, r) )
    	else:
            mcvTermKeys.append(t)

    # lookup the primary feature type
    primaryMCVTermKey = loadlib.verifyTerm('', MCV_VOCAB_KEY, primaryMCVTerm, lineNum, errorFile)
    if primaryMCVTermKey == 0:
        errors.append( INVALID_MARKER_TYPE_ERROR % (lineNum, primaryMCVTerm) )

    return errors