Esempio n. 1
0
def buildPaneKeyLookup ():
    global paneKeyLookup, refKey

    #
    # Get the reference key for the J-Number.
    #
    refKey = loadlib.verifyReference(jNumber, 0, None)

    #
    # Get all the figure labels and associated image pane keys for the
    # reference.
    #
    results = db.sql('''select i.figureLabel, ip._ImagePane_key
                     from IMG_Image i, IMG_ImagePane ip
                     where i._Image_key = ip._Image_key
		     and i._ImageType_key = %d 
                     and i._Refs_key = %d''' % (FULLSIZE_IMAGE_TYPE_KEY, refKey), 'auto')

    for r in results:
        figureLabel = r['figureLabel']
        paneKey = r['_ImagePane_key']
        paneKeyLookup[figureLabel] = paneKey
        print 'paneKeyLookup[' + figureLabel + '] = ' + str(paneKey)

    return
Esempio n. 2
0
def init ():
    global createdByKey, refKey, accKey

    db.useOneConnection(1)
    db.set_sqlUser(user)
    db.set_sqlPasswordFromFile(passwordFile)

    #
    # Get the created by key for the user.
    #
    createdByKey = loadlib.verifyUser(createdBy, 0, None)

    #
    # Get the reference key for the J-Number.
    #
    refKey = loadlib.verifyReference(jNumber, 0, None)

    #
    # Get the next available accession key.
    #
    results = db.sql('select max(_Accession_key) + 1 as maxKey from ACC_Accession', 'auto')
    accKey = results[0]['maxKey']

    return
Esempio n. 3
0
def processFile():
	# requires:
	#
	# effects:
	#	Reads input file
	#	Verifies and Processes each line in the input file
	#
	# returns:
	#	nothing
	#

	global refAssocKey

	lineNum = 0
	# For each line in the input file


	for line in inputFile.readlines():

		error = 0
		lineNum = lineNum + 1

		# Split the line into tokens
		tokens = string.split(line[:-1], '\t')

		try:
			accID = tokens[0]
			jnum = tokens[1]
			refAssocType = tokens[2]
			createdBy = tokens[3]
		except:
			exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

		objectKey = loadlib.verifyObject(accID, mgiTypeKey, None, lineNum, errorFile)
		referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
		refAssocTypeKey = verifyRefAssocType(refAssocType, lineNum)
		createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

		if objectKey == 0 or \
			referenceKey == 0 or \
			refAssocTypeKey == 0 or \
			createdByKey == 0:

			# set error flag to true
			error = 1

		# if errors, continue to next record
		if error:
			continue

		# if no errors, process the marker

		# could move to verifyDuplicate routine

		key = '%s:%s:%s' % (objectKey, referenceKey, refAssocTypeKey)
		if refDict.has_key(key):
		        errorFile.write('Duplicate (%d) %s\n' % (lineNum, line))
			continue

        	refFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
			% (refAssocKey, referenceKey, objectKey, mgiTypeKey, refAssocTypeKey, createdByKey, createdByKey, loaddate, loaddate))

		refAssocKey = refAssocKey + 1
Esempio n. 4
0
def processFile():

    global probeKey, refKey, aliasKey, accKey, mgiKey

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
	    name = tokens[0]
	    jnum = tokens[1]
	    parentID = tokens[2]
	    sourceName = tokens[3]
	    organism = tokens[4]
	    strain = tokens[5]
	    tissue = tokens[6]
	    gender = tokens[7]
	    cellLine = tokens[8]
	    age = tokens[9]
	    vectorType = tokens[10]
	    segmentType = tokens[11]
	    regionCovered = tokens[12]
	    insertSite = tokens[13]
	    insertSize = tokens[14]
	    markerIDs = string.split(tokens[15], '|')
	    relationship = tokens[16]
	    sequenceIDs = tokens[17]
	    aliasList = string.split(tokens[18], '|')
	    notes = tokens[19]
	    rawnotes = tokens[20]
	    createdBy = tokens[21]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

	isParent = 0
	isSource = 0
	parentProbeKey = '';
	sourceKey = 0

	if parentID != '':
	    isParent = 1

	if sourceName != '':
	    isSource = 1

	if not isParent and not isSource:
	    organismKey = sourceloadlib.verifyOrganism(organism, lineNum, errorFile)
	    strainKey = sourceloadlib.verifyStrain(strain, lineNum, errorFile)
	    tissueKey = sourceloadlib.verifyTissue(tissue, lineNum, errorFile)
	    genderKey = sourceloadlib.verifyGender(gender, lineNum, errorFile)
	    cellLineKey = sourceloadlib.verifyCellLine(cellLine, lineNum, errorFile)
	    vectorKey = sourceloadlib.verifyVectorType(vectorType, lineNum, errorFile)
	    segmentTypeKey = sourceloadlib.verifySegmentType(segmentType, lineNum, errorFile)
	    sourceKey = sourceloadlib.verifySource(segmentTypeKey, \
		vectorKey, organismKey, strainKey, \
		tissueKey, genderKey, cellLineKey, age, lineNum, errorFile)

	    if organismKey == 0 or strainKey == 0 or tissueKey == 0 or \
               genderKey == 0 or cellLineKey == 0 or vectorKey == 0 or \
               segmentTypeKey == 0 or sourceKey == 0:
		errorFile.write('%s, %s, %s, %s, %s, %s, %s, %s\n' % (segmentType, vectorType, organism, strain, tissue, gender, cellLine, age))
	        error = 1

        elif not isParent and isSource:
	    vectorKey = sourceloadlib.verifyVectorType(vectorType, lineNum, errorFile)
	    segmentTypeKey = sourceloadlib.verifySegmentType(segmentType, lineNum, errorFile)
	    sourceKey = sourceloadlib.verifyLibrary(sourceName, lineNum, errorFile)

	    if vectorKey == 0 or segmentTypeKey == 0 or sourceKey == 0:
	        error = 1

	# parent from = yes, source given = yes or no (ignored)
	else:
	    parentProbeKey, sourceKey = verifyParentProbe(parentID, lineNum, errorFile)
	    vectorKey = sourceloadlib.verifyVectorType(vectorType, lineNum, errorFile)
	    segmentTypeKey = sourceloadlib.verifySegmentType(segmentType, lineNum, errorFile)

	    if parentProbeKey == 0 or sourceKey == 0 or vectorKey == 0 or segmentTypeKey == 0:
	        error = 1

        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
	createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

	if referenceKey == 0:
	    errorFile.write('Invalid Reference:  %s\n' % (jnum))
	    error = 1

	if createdByKey == 0:
	    errorFile.write('Invalid Creator:  %s\n\n' % (createdBy))
	    error = 1

	# marker IDs

	markerList = []
	for markerID in markerIDs:

	    markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile)

	    if len(markerID) > 0 and markerKey == 0:
	        errorFile.write('Invalid Marker:  %s, %s\n' % (name, markerID))
	        error = 1
            elif len(markerID) > 0:
		markerList.append(markerKey)

	# sequence IDs
	seqAccDict = {}
	for seqID in string.split(sequenceIDs, '|'):
	    if len(seqID) > 0:
	        [logicalDB, acc] = string.split(seqID, ':')
	        logicalDBKey = loadlib.verifyLogicalDB(logicalDB, lineNum, errorFile)
	        if logicalDBKey > 0:
		    seqAccDict[acc] = logicalDBKey

        # if errors, continue to next record
        if error:
            continue

        # if no errors, process the probe

        probeFile.write('%d\t%s\t%s\t%s\t%s\t%s\t\t\t%s\t%s\t%s\t\t%s\t%s\t%s\t%s\n' \
            % (probeKey, name, parentProbeKey, sourceKey, vectorKey, segmentTypeKey, mgi_utils.prvalue(regionCovered), \
	    mgi_utils.prvalue(insertSite), mgi_utils.prvalue(insertSize), createdByKey, createdByKey, loaddate, loaddate))

	for markerKey in markerList:
	    if markerList.count(markerKey) == 1:
                markerFile.write('%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n' \
		    % (probeKey, markerKey, referenceKey, relationship, createdByKey, createdByKey, loaddate, loaddate))
            else:
		errorFile.write('Invalid Marker Duplicate:  %s, %s\n' % (name, markerID))

        refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \
		% (refKey, probeKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate))

        # aliases

        for alias in aliasList:
	    if len(alias) == 0:
		continue
            aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
		    % (aliasKey, refKey, alias, createdByKey, createdByKey, loaddate, loaddate))
	    aliasKey = aliasKey + 1

        # MGI Accession ID for the marker

        accFile.write('%s\t%s%d\t%s\t%s\t1\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \
            % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, probeKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate))

	# Print out a new text file and attach the new MGI Probe IDs as the last field

        newProbeFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%d\n' \
	    % (name, jnum, \
	    mgi_utils.prvalue(sourceName), \
	    organism, \
	    mgi_utils.prvalue(strain), \
	    mgi_utils.prvalue(tissue), \
	    mgi_utils.prvalue(gender), \
	    mgi_utils.prvalue(cellLine), \
	    mgi_utils.prvalue(age), \
	    mgi_utils.prvalue(vectorType), \
	    mgi_utils.prvalue(segmentType), \
	    mgi_utils.prvalue(regionCovered) + \
	    mgi_utils.prvalue(insertSite), \
	    mgi_utils.prvalue(insertSize), \
	    string.join(markerIDs, '|'), \
	    relationship, \
	    mgi_utils.prvalue(sequenceIDs), \
	    string.join(aliasList, '|'), \
	    mgi_utils.prvalue(notes), \
	    createdBy, mgiPrefix, mgiKey))

	# Print out a raw note file

        if len(rawnotes) > 0:
            rawNoteFile.write('%s%d\t%s\n' % (mgiPrefix, mgiKey, rawnotes))

	# Notes

        if len(notes) > 0:
	    noteFile.write('%s\t%s\t%s\t%s\n' % (probeKey, notes, loaddate, loaddate))

        accKey = accKey + 1
        mgiKey = mgiKey + 1

	# sequence accession ids
	for acc in seqAccDict.keys():
	    prefixPart, numericPart = accessionlib.split_accnum(acc)
            accFile.write('%s\t%s\t%s\t%s\t%s\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \
                % (accKey, acc, prefixPart, numericPart, seqAccDict[acc], probeKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate))
            accRefFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' \
                % (accKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate))
	    accKey = accKey + 1

	refKey = refKey + 1
        probeKey = probeKey + 1

    #	end of "for line in inputFile.readlines():"

    #
    # Update the AccessionMax value
    #

    if not DEBUG:
        db.sql('select * from ACC_setMax (%d)' % (lineNum), None)
Esempio n. 5
0
def processFile():

    global refKey, aliasKey

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
            probeID = probeName = tokens[0]
            jnum = tokens[1]
            aliasList = string.split(tokens[2], '|')
            createdBy = tokens[3]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        if probeID.find('MGI:') >= 0:
            probeKey = loadlib.verifyProbe(probeID, lineNum, errorFile)
        else:
            probeKey, probeID = verifyProbe(probeName, lineNum, errorFile)

        probeReferenceKey = verifyProbeReference(probeID, jnum, lineNum,
                                                 errorFile)
        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
        createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

        if probeKey == 0:
            errorFile.write('Invalid Probe:  %s\n' % (probeID))
            error = 1

        if referenceKey == 0:
            errorFile.write('Invalid Reference:  %s\n' % (jnum))
            error = 1

#if probeReferenceKey == 0:
#    errorFile.write('Invalid Probe Reference:  %s, %s\n' % (probeID, jnum))
#    error = 1

        if createdByKey == 0:
            errorFile.write('Invalid Creator:  %s\n\n' % (createdBy))
            error = 1

        # if errors, continue to next record
        if error:
            continue

        # if no errors, process

# create a new probe-reference key if one does not already exist
# else use the existing probe-reference key

        if probeReferenceKey == 0:
            refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \
      % (refKey, probeKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate))
            aliasrefKey = refKey
            refKey = refKey + 1
        else:
            #errorFile.write('Probe/Reference Already Exists: %s\n' % (tokens))
            aliasrefKey = probeReferenceKey

        # aliases

        for alias in aliasList:

            if len(alias) == 0:
                continue

            aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
      % (aliasKey, aliasrefKey, alias, createdByKey, createdByKey, loaddate, loaddate))
            aliasKey = aliasKey + 1
Esempio n. 6
0
def processFile():
	'''
	# requires:
	#
	# effects:
	#	Reads input file
	#	Verifies and Processes each line in the input file
	#
	# returns:
	#	nothing
	#
	'''

	global referenceKey
	global exptDict, seqExptDict

	lineNum = 0
	note = ''

	# For each line in the input file

	inputFile = open(inputFileName, 'r')
	for line in inputFile.readlines():

		error = 0
		lineNum = lineNum + 1

		# Split the line into tokens
		tokens = string.split(line[:-1], '|')

		try:
			markerID = tokens[0]
			chromosome = tokens[1]
			updateChr = tokens[2]
			band = tokens[3]
			assay = tokens[4]
			description = tokens[5]
			jnum = tokens[6]
			createdBy = tokens[7]
		except:
			# if it's not a valid line, assume it's the note
			note = line
			continue
#			exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

		markerKey, markerSymbol = verifyMarker(markerID, lineNum)
		assayKey = verifyAssay(assay)
	        referenceKey = loadlib.verifyReference(jnum, 0, errorFile)
	        createdByKey = loadlib.verifyUser(createdBy, 0, errorFile)
		error = not verifyChromosome(chromosome, lineNum)

		if markerKey == 0 or \
		   assayKey == 0 or \
		   referenceKey == 0 or \
		   createdByKey == 0:
			# set error flag to true
			error = 1

		# if errors, continue to next record
		if error:
			continue

		# if no errors, process

		# run once...needs the reference
		if lineNum == 1:
			createExperimentMaster()

		# determine experiment key for this chromosome
		# if it doesn't exist, create it

		if not exptDict.has_key(chromosome):
			createExperimentBCP(chromosome)

		if not exptDict.has_key(chromosome):
			errorFile.write('Cannot Find Experiment Key For Chromosome (%d): %s\n' % (lineNum, chromosome))
			chrExptKey = 0
		else:
			chrExptKey = exptDict[chromosome]

		# if errors, continue to next record
		if chrExptKey == 0:
			continue

		# add marker to experiment marker file
		bcpWrite(exptMarkerFile, \
			[chrExptKey, \
			markerKey, \
			alleleKey, \
			assayKey, \
			seqExptDict[chrExptKey], \
			markerSymbol, \
			description, \
			matrixData, \
			loaddate, loaddate])

		# increment marker sequence number for the experiment
		seqExptDict[chrExptKey] = seqExptDict[chrExptKey] + 1

#	end of "for line in inputFile.readlines():"

	if len(note) > 0:
		bcpWrite(noteFile, [referenceKey, note, loaddate, loaddate])
Esempio n. 7
0
def processFile():

    global refKey, aliasKey
    global execProbeSQL
    global execAssaySQL
    global execRefSQL

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

	error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
	    fromID = tokens[0]
	    name = tokens[1]
	    toID = tokens[2]
	    jnum = tokens[3]
	    createdBy = tokens[4]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        fromKey = loadlib.verifyObject(fromID, mgiTypeKey, None, lineNum, errorFile)
        toKey = loadlib.verifyObject(toID, mgiTypeKey, None, lineNum, errorFile)
	referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
	createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

	if fromKey == 0:
            errorFile.write('Invalid Probe "From":  %s\n' % (fromID))
            error = 1

	if toKey == 0:
            errorFile.write('Invalid Probe "To":  %s\n' % (toID))
            error = 1

        if referenceKey == 0:
            errorFile.write('Invalid Reference:  %s\n' % (jnum))
            error = 1

        if createdByKey == 0:
            errorFile.write('Invalid Creator:  %s\n\n' % (createdBy))
            error = 1

	# check that all genes are the same
	checkGenesSQL = '''
			select f.*
			from PRB_Marker f, PRB_Marker t, GXD_ProbePrep p, GXD_Assay a
			where f._Probe_key = %s
			and t._Probe_key = %s
			and p._Probe_key = %s
			and p._ProbePrep_key = a._ProbePrep_key
			and f._Marker_key = t._Marker_key
			and f._Marker_key = a._Marker_key
			''' % (fromKey, toKey, fromKey)

	checkGenes = db.sql(checkGenesSQL, 'auto')
        if len(checkGenes) == 0:
            errorFile.write('Gene of GenePaint, Eurexpress and Assay are not the same:  %s, %s\n' % (fromID, toID))
            error = 1

	# check that the J: is on at least one Assay
	checkJAssaySQL = '''
			 select a.*
			 from GXD_ProbePrep p, GXD_Assay a
			 where p._Probe_key = %s
			 and p._ProbePrep_key = a._ProbePrep_key
			 and a._Refs_key = %s
			 ''' % (fromKey, referenceKey)

	checkJAssay = db.sql(checkJAssaySQL, 'auto')
        if len(checkJAssay) == 0:
            errorFile.write('J: is not on any Assays attached to the probe:  %s\n' % (fromID))
            error = 1

        # if errors, continue to next record
        if error:
            continue

	# add alias using fromID name (from) to toID

        refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \
        	% (refKey, toKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate))
        aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
        	% (aliasKey, refKey, name, createdByKey, createdByKey, loaddate, loaddate))
        refKey = refKey + 1
        aliasKey = aliasKey + 1

	# move assay information from fromID to toID
	execAssaySQL.append(updateAssaySQL % (toKey, fromKey))

	# move fromID (from) references to toID
	execRefSQL.append(updateRefSQL % (toKey, fromKey, referenceKey))

	# delete fromID (from)
	execProbeSQL.append(deleteProbeSQL % (fromKey))
Esempio n. 8
0
def processFile():

    global refKey, aliasKey, execSQL

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
	    probeID = tokens[0]
	    markerIDs = string.split(tokens[1], '|')
	    jnum = tokens[2]
	    relationship = tokens[3]
	    aliasList = string.split(tokens[4], '|')
	    createdBy = tokens[5]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        probeKey = loadlib.verifyProbe(probeID, lineNum, errorFile)
        refsKey = loadlib.verifyReference(jnum, lineNum, errorFile)
	createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

	if probeKey == 0:
	    errorFile.write('Invalid Probe:  %s\n' % (probeID))
	    error = 1

	if refsKey == 0:
	    errorFile.write('Invalid Reference:  %s\n' % (jnum))
	    error = 1

	if createdByKey == 0:
	    errorFile.write('Invalid Creator:  %s\n\n' % (createdBy))
	    error = 1

	results = db.sql('''select _Reference_key from PRB_Reference
		where _Probe_key = %s
		and _Refs_key = %s
		''' % (probeKey, refsKey), 'auto')
        referenceKey = results[0]['_Reference_key']
	if referenceKey == 0:
	    errorFile.write('Invalid Probe/Reference:  %s\n' % (jnum))
	    error = 1

	# marker IDs

	markerList = []
	for markerID in markerIDs:

	    if markerID == 'none':
		break

	    markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile)

	    if markerKey == 0:
	        errorFile.write('Invalid Marker:  %s\n' % (markerID))
	        error = 1
            else:
		markerList.append(markerKey)

        # if errors, continue to next record
        if error:
            continue

        # if no errors, process

	for markerKey in markerList:
	    if markerList.count(markerKey) == 1:
                markerFile.write('%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n' \
		    % (probeKey, markerKey, refsKey, relationship, createdByKey, createdByKey, loaddate, loaddate))
		execSQL.append(deleteSQL % (probeKey, markerKey))
            else:
		errorFile.write('Invalid Marker Duplicate:  %s\n' % (markerID))

	if referenceKey > 0:
	    refKey = referenceKey
	else:
            refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \
		    % (refKey, probeKey, refsKey, createdByKey, createdByKey, loaddate, loaddate))

        # aliases

        for alias in aliasList:
            aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
		    % (aliasKey, refKey, alias, createdByKey, createdByKey, loaddate, loaddate))
	    aliasKey = aliasKey + 1

	# only used if referenceKey == 0
	refKey = refKey + 1
Esempio n. 9
0
def processFile():

    global refKey, aliasKey, execSQL

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
            probeID = tokens[0]
            markerIDs = string.split(tokens[1], '|')
            jnum = tokens[2]
            relationship = tokens[3]
            aliasList = string.split(tokens[4], '|')
            createdBy = tokens[5]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        probeKey = loadlib.verifyProbe(probeID, lineNum, errorFile)
        refsKey = loadlib.verifyReference(jnum, lineNum, errorFile)
        createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

        if probeKey == 0:
            errorFile.write('Invalid Probe:  %s\n' % (probeID))
            error = 1

        if refsKey == 0:
            errorFile.write('Invalid Reference:  %s\n' % (jnum))
            error = 1

        if createdByKey == 0:
            errorFile.write('Invalid Creator:  %s\n\n' % (createdBy))
            error = 1

        results = db.sql(
            '''select _Reference_key from PRB_Reference
		where _Probe_key = %s
		and _Refs_key = %s
		''' % (probeKey, refsKey), 'auto')
        referenceKey = results[0]['_Reference_key']
        if referenceKey == 0:
            errorFile.write('Invalid Probe/Reference:  %s\n' % (jnum))
            error = 1

# marker IDs

        markerList = []
        for markerID in markerIDs:

            if markerID == 'none':
                break

            markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile)

            if markerKey == 0:
                errorFile.write('Invalid Marker:  %s\n' % (markerID))
                error = 1
            else:
                markerList.append(markerKey)

        # if errors, continue to next record
        if error:
            continue

        # if no errors, process

        for markerKey in markerList:
            if markerList.count(markerKey) == 1:
                markerFile.write('%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n' \
      % (probeKey, markerKey, refsKey, relationship, createdByKey, createdByKey, loaddate, loaddate))
                execSQL.append(deleteSQL % (probeKey, markerKey))
            else:
                errorFile.write('Invalid Marker Duplicate:  %s\n' % (markerID))

        if referenceKey > 0:
            refKey = referenceKey
        else:
            refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \
      % (refKey, probeKey, refsKey, createdByKey, createdByKey, loaddate, loaddate))

        # aliases

        for alias in aliasList:
            aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
      % (aliasKey, refKey, alias, createdByKey, createdByKey, loaddate, loaddate))
            aliasKey = aliasKey + 1

# only used if referenceKey == 0
        refKey = refKey + 1
Esempio n. 10
0
def init():
	# requires: 
	#
	# effects: 
	# 1. Processes command line options
	# 2. Initializes local DBMS parameters
	# 3. Initializes global file descriptors/file names
	# 4. Initializes global keys
	#
	# returns:
	#
 
	global diagFileName, errorFileName, synFileName
	global inputFile, diagFile, errorFile, synFile
	global mgiTypeKey, createdByKey, referenceKey

	db.useOneConnection(1)
        db.set_sqlUser(user)
        db.set_sqlPasswordFromFile(passwordFileName)
 
	head, tail = os.path.split(inputFileName) 
	diagFileName = logDir + '/' + tail + '.diagnostics'
	errorFileName = logDir + '/' + tail + '.error'
	synFileName = 'MGI_Synonym.bcp'

	print inputFileName
	print logDir

	try:
		inputFile = open(inputFileName, 'r')
	except:
		exit(1, 'Could not open file %s\n' % inputFileName)
		
	try:
		diagFile = open(diagFileName, 'w')
	except:
		exit(1, 'Could not open file %s\n' % diagFileName)
		
	try:
		errorFile = open(errorFileName, 'w')
	except:
		exit(1, 'Could not open file %s\n' % errorFileName)
		
	try:
		synFile = open(outputDir + '/' + synFileName, 'w')
	except:
		exit(1, 'Could not open file %s\n' % synFileName)
		
	# Log all SQL
	db.set_sqlLogFunction(db.sqlLogAll)

	diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date()))
	diagFile.write('Server: %s\n' % (db.get_sqlServer()))
	diagFile.write('Database: %s\n' % (db.get_sqlDatabase()))
	diagFile.write('Object Type: %s\n' % (mgiType))
	diagFile.write('Input File: %s\n' % (inputFileName))

	errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date()))

	mgiTypeKey = loadlib.verifyMGIType(mgiType, 0, errorFile)
	createdByKey = loadlib.verifyUser(createdBy, 0, errorFile)

        # if reference is J:0, then no reference is given
	if jnum == 'J:0':
		referenceKey = ''
	else:
		referenceKey = loadlib.verifyReference(jnum, 0, errorFile)

        # exit if we can't resolve mgiType, createdBy or jnum
	if mgiTypeKey == 0 or \
		createdByKey == 0 or \
		referenceKey == 0:
	    exit(1)

        if mode == 'reload':
		print 'mode is: %s, deleting synonyms' % mode
		sys.stdout.flush()
        	db.sql('delete from MGI_Synonym ' + \
			'where _MGIType_key = %d ' % (mgiTypeKey) + \
			'and _CreatedBy_key = %d ' % (createdByKey), None)
Esempio n. 11
0
def processFile():

    global refKey, aliasKey
    global execProbeSQL
    global execAssaySQL
    global execRefSQL

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
            fromID = tokens[0]
            name = tokens[1]
            toID = tokens[2]
            jnum = tokens[3]
            createdBy = tokens[4]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        fromKey = loadlib.verifyObject(fromID, mgiTypeKey, None, lineNum,
                                       errorFile)
        toKey = loadlib.verifyObject(toID, mgiTypeKey, None, lineNum,
                                     errorFile)
        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
        createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

        if fromKey == 0:
            errorFile.write('Invalid Probe "From":  %s\n' % (fromID))
            error = 1

        if toKey == 0:
            errorFile.write('Invalid Probe "To":  %s\n' % (toID))
            error = 1

        if referenceKey == 0:
            errorFile.write('Invalid Reference:  %s\n' % (jnum))
            error = 1

        if createdByKey == 0:
            errorFile.write('Invalid Creator:  %s\n\n' % (createdBy))
            error = 1

        # check that all genes are the same
        checkGenesSQL = '''
			select f.*
			from PRB_Marker f, PRB_Marker t, GXD_ProbePrep p, GXD_Assay a
			where f._Probe_key = %s
			and t._Probe_key = %s
			and p._Probe_key = %s
			and p._ProbePrep_key = a._ProbePrep_key
			and f._Marker_key = t._Marker_key
			and f._Marker_key = a._Marker_key
			''' % (fromKey, toKey, fromKey)

        checkGenes = db.sql(checkGenesSQL, 'auto')
        if len(checkGenes) == 0:
            errorFile.write(
                'Gene of GenePaint, Eurexpress and Assay are not the same:  %s, %s\n'
                % (fromID, toID))
            error = 1

        # check that the J: is on at least one Assay
        checkJAssaySQL = '''
			 select a.*
			 from GXD_ProbePrep p, GXD_Assay a
			 where p._Probe_key = %s
			 and p._ProbePrep_key = a._ProbePrep_key
			 and a._Refs_key = %s
			 ''' % (fromKey, referenceKey)

        checkJAssay = db.sql(checkJAssaySQL, 'auto')
        if len(checkJAssay) == 0:
            errorFile.write(
                'J: is not on any Assays attached to the probe:  %s\n' %
                (fromID))
            error = 1

    # if errors, continue to next record
        if error:
            continue

        # add alias using fromID name (from) to toID

        refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \
         % (refKey, toKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate))
        aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
         % (aliasKey, refKey, name, createdByKey, createdByKey, loaddate, loaddate))
        refKey = refKey + 1
        aliasKey = aliasKey + 1

        # move assay information from fromID to toID
        execAssaySQL.append(updateAssaySQL % (toKey, fromKey))

        # move fromID (from) references to toID
        execRefSQL.append(updateRefSQL % (toKey, fromKey, referenceKey))

        # delete fromID (from)
        execProbeSQL.append(deleteProbeSQL % (fromKey))
Esempio n. 12
0
def processFile():
    '''
        # requires:
        #
        # effects:
        #	Reads input file
        #	Verifies and Processes each line in the input file
        #
        # returns:
        #	nothing
        #
        '''

    global referenceKey
    global exptDict, seqExptDict

    lineNum = 0
    note = ''

    # For each line in the input file

    inputFile = open(inputFileName, 'r')
    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = str.split(line[:-1], '|')

        try:
            mappingKey = tokens[0]
            markerID = tokens[1]
            chromosome = tokens[2]
            updateChr = tokens[3]
            band = tokens[4]
            assay = tokens[5]
            description = tokens[6]
            jnum = tokens[7]
            createdBy = tokens[8]
        except:
            # if it's not a valid line, assume it's the note
            note = line
            continue
#			exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        markerKey, markerSymbol = verifyMarker(markerID, lineNum)
        assayKey = verifyAssay(assay)
        referenceKey = loadlib.verifyReference(jnum, 0, errorFile)
        createdByKey = loadlib.verifyUser(createdBy, 0, errorFile)
        error = not verifyChromosome(chromosome, lineNum)

        if markerKey == 0 or \
           assayKey == 0 or \
           referenceKey == 0 or \
           createdByKey == 0:
            # set error flag to true
            error = 1

        # if errors, continue to next record
        if error:
            continue

        # if no errors, process

        # run once...needs the reference
        if lineNum == 1:
            createExperimentMaster()

        # determine experiment key for this chromosome
        # if it doesn't exist, create it

        if chromosome not in exptDict:
            createExperimentBCP(chromosome)

        if chromosome not in exptDict:
            errorFile.write(
                'Cannot Find Experiment Key For Chromosome (%d): %s\n' %
                (lineNum, chromosome))
            chrExptKey = 0
        else:
            chrExptKey = exptDict[chromosome]

        # if errors, continue to next record
        if chrExptKey == 0:
            continue

        # add marker to experiment marker file
        bcpWrite(exptMarkerFile, \
                [mappingKey, \
                chrExptKey, \
                markerKey, \
                alleleKey, \
                assayKey, \
                seqExptDict[chrExptKey], \
                description, \
                matrixData, \
                loaddate, loaddate])

        # increment marker sequence number for the experiment
        seqExptDict[chrExptKey] = seqExptDict[chrExptKey] + 1


#	end of "for line in inputFile.readlines():"

    if len(note) > 0:
        bcpWrite(noteFile, [referenceKey, note, loaddate, loaddate])
Esempio n. 13
0
def processFile():

    global alleleKey, refAssocKey, accKey, noteKey, mgiKey, annotKey, mutationKey
    global alleleLookup

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = line[:-1].split('\t')
        #print line
        try:
            markerID = tokens[0]
            symbol = tokens[1]
            name = tokens[2]
            alleleStatus = tokens[3]
            alleleType = tokens[4]
            alleleSubtypes = tokens[5]
            collectionKey = tokens[6]
            germLine = tokens[7]
            references = tokens[8]
            strainOfOrigin = tokens[9]
            mutantCellLine = tokens[10]
            molecularNotes = tokens[11]
            driverNotes = tokens[12]
            ikmcNotes = tokens[13]
            mutations = tokens[14]
            inheritanceMode = tokens[15]
            isMixed = tokens[16]
            isExtinct = tokens[17]
            createdBy = tokens[18]
            createMCL = tokens[19]
            createNote = tokens[20]
            setStatus = tokens[21]
            existingAlleleID = tokens[22]
            ikmcSymbol = tokens[23]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        # creator
        createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)
        if createdByKey == 0:
            continue

        # processing for IKMC-only
        if len(createMCL) > 0 or len(createNote) > 0 or len(setStatus) > 0:
            processFileIKMC(createMCL, createNote, setStatus, \
                    symbol, ikmcSymbol, mutantCellLine, ikmcNotes, \
                    createdByKey, existingAlleleID)
            continue

        # marker key
        markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile)

        # hard-coded
        # _vocab_key = 73 (Marker-Allele Association Status)
        # _term_key = 4268545 (Curated)
        markerStatusKey = 4268545

        # _vocab_key = 37 (Allele Status)
        alleleStatusKey = loadlib.verifyTerm('', 37, alleleStatus, lineNum,
                                             errorFile)

        # _vocab_key = 38 (Allele Type)
        alleleTypeKey = loadlib.verifyTerm('', 38, alleleType, lineNum,
                                           errorFile)

        # _vocab_key = 61 (Allele Transmission)
        germLineKey = loadlib.verifyTerm('', 61, germLine, lineNum, errorFile)

        # _vocab_key = 36 (Allele Molecular Mutation)
        allMutations = mutations.split('|')

        # _vocab_key = 35 (Allele Status)
        inheritanceModeKey = loadlib.verifyTerm('', 35, inheritanceMode,
                                                lineNum, errorFile)

        # strains
        strainOfOriginKey = sourceloadlib.verifyStrain(strainOfOrigin, lineNum,
                                                       errorFile)

        # reference
        refKey = loadlib.verifyReference(jnum, lineNum, errorFile)

        # if errors, continue to next record
        # errors are stored (via loadlib) in the .error log

        if markerKey == 0 \
                or markerStatusKey == 0 \
                or alleleStatusKey == 0 \
                or alleleTypeKey == 0 \
                or germLineKey == 0 \
                or allMutations == 0 \
                or inheritanceModeKey == 0 \
                or strainOfOriginKey == 0 \
                or refKey == 0 \
                or createdByKey == 0:
            continue

        # if no errors, process the allele

        # not specified/testing
        #collectionKey = 11025586

        # allele (master)
        alleleFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|0|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (alleleKey, markerKey, strainOfOriginKey, inheritanceModeKey, alleleTypeKey, \
            alleleStatusKey, germLineKey, collectionKey, symbol, name, \
            isExtinct, isMixed, refKey, markerStatusKey, \
            createdByKey, createdByKey, createdByKey, loaddate, loaddate, loaddate))

        # molecular mutation
        for mutation in allMutations:
            mutationTermKey = loadlib.verifyTerm('', 36, mutation, lineNum,
                                                 errorFile)
            mutationFile.write('%s|%s|%s|%s|%s\n' \
            % (mutationKey, alleleKey, mutationTermKey, loaddate, loaddate))
            mutationKey = mutationKey + 1

        #
        # allele references
        #
        allReferences = references.split('||')
        for reference in allReferences:
            refType, refID = reference.split('|')
            refKey = loadlib.verifyReference(refID, lineNum, errorFile)

            if refType == 'Original':
                refAssocTypeKey = 1011
            elif refType == 'Transmission':
                refAssocTypeKey = 1023
            elif refType == 'Molecular':
                refAssocTypeKey = 1012

            refFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
                    % (refAssocKey, refKey, alleleKey, mgiTypeKey, refAssocTypeKey, \
                    createdByKey, createdByKey, loaddate, loaddate))
            refAssocKey = refAssocKey + 1

        #
        # allele subtypes
        #
        allSubtypes = alleleSubtypes.split('|')
        for s in allSubtypes:

            # _vocab_key = 93 (Allele Subtype)
            alleleSubtypeKey = loadlib.verifyTerm('', 93, s, lineNum,
                                                  errorFile)

            annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
                    % (annotKey, annotTypeKey, alleleKey, alleleSubtypeKey, \
                            qualifierKey, loaddate, loaddate))
            annotKey = annotKey + 1

        #
        # mutant cell line
        #
        if len(mutantCellLine) > 0:
            addMutantCellLine(alleleKey, mutantCellLine, createdByKey)

        # MGI Accession ID for the allelearker

        accFile.write('%s|%s%d|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \
            % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, alleleKey, mgiTypeKey, \
               createdByKey, createdByKey, loaddate, loaddate))

        # storing data in MGI_Note
        # molecular notes

        mgiNoteSeqNum = 1
        if len(molecularNotes) > 0:

            noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, alleleKey, mgiNoteObjectKey, mgiMolecularNoteTypeKey, \
                   molecularNotes, createdByKey, createdByKey, loaddate, loaddate))

            noteKey = noteKey + 1

        # driver notes
        # TR12662/MGI_Relationship._Category_key = 1006
        # removed noteFile code
        # place hodler for MGI_Relationship code
        # the IKMC is the only product using this and IKMC does not add any driver note
        #mgiNoteSeqNum = 1
        #if len(driverNotes) > 0:

        # ikmc notes
        useIKMCnotekey = 0
        if len(ikmcNotes) > 0:

            noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, alleleKey, mgiNoteObjectKey, mgiIKMCNoteTypeKey, \
                   ikmcNotes, createdByKey, createdByKey, loaddate, loaddate))

            useIKMCnotekey = noteKey
            noteKey = noteKey + 1

        # Print out a new text file and attach the new MGI Allele IDs as the last field

        if createdBy == 'ikmc_alleleload':
            newAlleleFile.write('%s\t%s%s\t%s\n' \
            % (mgi_utils.prvalue(ikmcNotes), \
                    mgi_utils.prvalue(mgiPrefix), mgi_utils.prvalue(mgiKey), \
                    mgi_utils.prvalue(ikmcSymbol)))
        else:
            newAlleleFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%s\n' \
            % (mgi_utils.prvalue(markerID), \
            mgi_utils.prvalue(symbol), \
            mgi_utils.prvalue(name), \
            mgi_utils.prvalue(alleleStatus), \
            mgi_utils.prvalue(alleleType), \
            mgi_utils.prvalue(alleleSubtype), \
            mgi_utils.prvalue(collection), \
            mgi_utils.prvalue(germLine), \
            mgi_utils.prvalue(references), \
            mgi_utils.prvalue(strainOfOrigin), \
            mgi_utils.prvalue(mutantCellLine), \
            mgi_utils.prvalue(allMutations), \
            mgi_utils.prvalue(inheritanceMode), \
            mgi_utils.prvalue(isMixed), \
            mgi_utils.prvalue(isExtinct), \
            mgi_utils.prvalue(refKey), \
            mgi_utils.prvalue(markerStatusKey), \
            mgi_utils.prvalue(createdBy), \
            mgi_utils.prvalue(mgiPrefix), mgi_utils.prvalue(mgiKey)))

        # save symbol/alleleKey/ikmc note key
        alleleLookup[symbol] = []
        alleleLookup[symbol].append(
            (alleleKey, useIKMCnotekey, mgiPrefix + str(mgiKey)))

        accKey = accKey + 1
        mgiKey = mgiKey + 1
        alleleKey = alleleKey + 1

    #	end of "for line in inputFile.readlines():"

    #
    # Update the AccessionMax value
    #

    if not DEBUG:
        db.sql('select * from ACC_setMax(%d)' % (lineNum), None)
        db.commit()
Esempio n. 14
0
def processFile():
    # requires:
    #
    # effects:
    #	Reads input file
    #	Verifies and Processes each line in the input file
    #
    # returns:
    #	nothing
    #

    global refAssocKey

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
            accID = tokens[0]
            jnum = tokens[1]
            refAssocType = tokens[2]
            createdBy = tokens[3]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        objectKey = loadlib.verifyObject(accID, mgiTypeKey, None, lineNum,
                                         errorFile)
        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
        refAssocTypeKey = verifyRefAssocType(refAssocType, lineNum)
        createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

        if objectKey == 0 or \
         referenceKey == 0 or \
         refAssocTypeKey == 0 or \
         createdByKey == 0:

            # set error flag to true
            error = 1

        # if errors, continue to next record
        if error:
            continue

        # if no errors, process the marker

        # could move to verifyDuplicate routine

        key = '%s:%s:%s' % (objectKey, referenceKey, refAssocTypeKey)
        if refDict.has_key(key):
            errorFile.write('Duplicate (%d) %s\n' % (lineNum, line))
            continue

        refFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
  % (refAssocKey, referenceKey, objectKey, mgiTypeKey, refAssocTypeKey, createdByKey, createdByKey, loaddate, loaddate))

        refAssocKey = refAssocKey + 1
Esempio n. 15
0
def processFile():

    global primerKey, refKey, aliasKey, accKey, mgiKey

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
	    markerSymbol = tokens[0]	# not used
	    markerIDs = string.split(tokens[1], '|')
	    name = tokens[2]
	    jnum = tokens[3]
	    regionCovered = tokens[4]
	    sequence1 = tokens[5]
	    sequence2 = tokens[6]
	    productSize = tokens[7]
	    notes = tokens[8]
	    sequenceIDs = tokens[9]
	    aliasList = string.split(tokens[10], '|')
	    createdBy = tokens[11]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

	# marker IDs

	markerList = []
	for markerID in markerIDs:

	    markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile)

	    if len(markerID) > 0 and markerKey == 0:
	        errorFile.write('Invalid Marker:  %s, %s\n' % (name, markerID))
	        error = 1
            elif len(markerID) > 0:
		markerList.append(markerKey)

        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
	createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

	# sequence IDs
	seqAccList = string.split(sequenceIDs, '|')

        # if errors, continue to next record
        if error:
            continue

        # if no errors, process the primer

        primerFile.write('%d\t%s\t\t%d\t%d\t%s\t%s\t%s\t%s\t\t\t%s\t%s\t%s\t%s\t%s\n' \
            % (primerKey, name, NA, vectorKey, segmentTypeKey, mgi_utils.prvalue(sequence1), \
	    mgi_utils.prvalue(sequence2), mgi_utils.prvalue(regionCovered), mgi_utils.prvalue(productSize), \
	    createdByKey, createdByKey, loaddate, loaddate))

	for markerKey in markerList:
	    if markerList.count(markerKey) == 1:
                markerFile.write('%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n' \
		    % (primerKey, markerKey, referenceKey, relationship, createdByKey, createdByKey, loaddate, loaddate))
            else:
		errorFile.write('Invalid Marker Duplicate:  %s, %s\n' % (name, markerID))

	# loaddate))

        refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' % (refKey, primerKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate))

        # aliases

        for alias in aliasList:
            if len(alias) == 0:
                continue
            aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
                    % (aliasKey, refKey, alias, createdByKey, createdByKey, loaddate, loaddate))
            aliasKey = aliasKey + 1

        # MGI Accession ID for the marker

        accFile.write('%s\t%s%d\t%s\t%s\t1\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \
            % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, primerKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate))

	newPrimerFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%d\n' \
	   % (markerSymbol, string.join(markerIDs, '|'), name, jnum, regionCovered, sequence1, sequence2, productSize, notes, sequenceIDs, createdBy, mgiPrefix, mgiKey))

        accKey = accKey + 1
        mgiKey = mgiKey + 1

	# sequence accession ids
	for acc in seqAccList:

	    if len(acc) == 0:
		continue

	    prefixPart, numericPart = accessionlib.split_accnum(acc)
            accFile.write('%s\t%s\t%s\t%s\t%s\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \
                % (accKey, acc, prefixPart, numericPart, logicalDBKey, primerKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate))
            accRefFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' \
                % (accKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate))
	    accKey = accKey + 1

	# notes

	if len(notes) > 0:
	   noteFile.write('%s|1\t%s\t%s\t%s\n' \
		% (primerKey, notes, loaddate, loaddate))

	refKey = refKey + 1
        primerKey = primerKey + 1

    #	end of "for line in inputFile.readlines():"

    #
    # Update the AccessionMax value
    #

    if not DEBUG:
        db.sql('select * from ACC_setMax (%d)' % (lineNum), None)
Esempio n. 16
0
def processImageFile():

    global imageKey, accKey, mgiKey
    global imagePix
    global referenceKey

    lineNum = 0

    # For each line in the input file

    for line in inImageFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
	    jnum = tokens[0]
	    fullsizeKey = tokens[1]
	    imageClass = tokens[2]
	    pixID = tokens[3]
	    xdim = tokens[4]
	    ydim = tokens[5]
	    figureLabel = tokens[6]
	    copyrightNote = tokens[7]
	    imageNote = tokens[8]
	    imageInfo = tokens[9]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        imageClassKey = loadlib.verifyTerm('', imageVocabClassKey, imageClass, lineNum, errorFile)
        if imageClassKey == 0:
            error = 1

        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
        if referenceKey == 0:
            error = 1

        # if errors, continue to next record
        if error:
            continue

        # if no errors, process

	imageTypeKey = FSimageTypeKey

        outImageFile.write(str(imageKey) + TAB + \
	    str(gxdMgiTypeKey) + TAB + \
	    str(imageClassKey) + TAB + \
	    str(imageTypeKey) + TAB + \
	    str(referenceKey) + TAB + \
	    TAB + \
	    xdim + TAB + \
	    ydim + TAB + \
	    figureLabel + TAB + \
	    str(createdByKey) + TAB + \
	    str(createdByKey) + TAB + \
	    loaddate + TAB + loaddate + CRT)

        # MGI Accession ID for the image

	mgiAccID = mgiPrefix + str(mgiKey)

	outAccFile.write(str(accKey) + TAB + \
	    mgiPrefix + str(mgiKey) + TAB + \
	    mgiPrefix + TAB + \
	    str(mgiKey) + TAB + \
	    accLogicalDBKey + TAB + \
	    str(imageKey) + TAB + \
	    imageMgiTypeKey + TAB + \
	    accPrivate + TAB + \
	    accPreferred + TAB + \
	    str(createdByKey) + TAB + \
	    str(createdByKey) + TAB + \
	    loaddate + TAB + loaddate + CRT)

        accKey = accKey + 1
        mgiKey = mgiKey + 1

	if pixID.find('GUDMAP') < 0 and len(pixID) > 0:
	    outAccFile.write(str(accKey) + TAB + \
	        pixPrefix + str(pixID) + TAB + \
	        pixPrefix + TAB + \
	        pixID + TAB + \
	        pixLogicalDBKey + TAB + \
	        str(imageKey) + TAB + \
	        imageMgiTypeKey + TAB + \
	        pixPrivate + TAB + \
	        accPreferred + TAB + \
	        str(createdByKey) + TAB + \
	        str(createdByKey) + TAB + \
	        loaddate + TAB + loaddate + CRT)
    
            accKey = accKey + 1

	if len(imageInfo) > 0:

	    imageLogicalDBKey, imageID = imageInfo.split('|')

	    outAccFile.write(str(accKey) + TAB + \
	        imageID + TAB + \
	        imageID + TAB + \
	        TAB + \
	        imageLogicalDBKey + TAB + \
	        str(imageKey) + TAB + \
	        imageMgiTypeKey + TAB + \
	        accPrivate + TAB + \
	        accPreferred + TAB + \
	        str(createdByKey) + TAB + \
	        str(createdByKey) + TAB + \
	        loaddate + TAB + loaddate + CRT)
    
            accKey = accKey + 1

	# Copyrights

	if len(copyrightNote) > 0:
            outCopyrightFile.write(mgiAccID + TAB + copyrightNote + CRT)

	# Notes

	if len(imageNote) > 0:
            outCaptionFile.write(mgiAccID + TAB + imageNote + CRT)

	imagePix[pixID] = imageKey
        imageKey = imageKey + 1

    #	end of "for line in inImageFile.readlines():"

    return lineNum
Esempio n. 17
0
def processFile():
    # Purpose: processes input file
    # Returns: nothing
    # Assumes: nothing
    # Effects: nothing
    # Throws: nothing

    global libraryName, libraryID, libraryKey, logicalDBKey
    global segmentTypeKey, vectorTypeKey, organismKey, referenceKey, strainKey, tissueKey
    global age, ageMin, ageMax, genderKey, cellLineKey, createdByKey
    global strainNS, tissueNS, genderNS, cellLineNS, ageNS

    lineNum = 0

    # retrieve next available primary key for Library record
    results = db.sql(
        'select maxKey = max(_Source_key) + 1 from %s' % (libraryTable),
        'auto')
    newlibraryKey = results[0]['maxKey']

    strainNS = sourceloadlib.verifyStrain(NS, 0, None)
    tissueNS = sourceloadlib.verifyTissue(NS, 0, None)
    genderNS = sourceloadlib.verifyGender(NS, 0, None)
    cellLineNS = sourceloadlib.verifyCellLine(NS, 0, None)
    ageNS = NS

    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens

        try:
            [libraryName, \
      logicalDB, \
      libraryID, \
      segmentType, \
      vectorType, \
      organism, \
      strain, \
      tissue, \
      age, \
      gender, \
      cellLine, \
      jnum, \
      note, \
      cloneCollections, \
      createdBy] = string.split(line[:-1], TAB)
        except:
            exit(1, 'Invalid Line (line: %d): %s\n' % (lineNum, line))
            continue

        libraryKey = sourceloadlib.verifyLibrary(libraryName, lineNum)

        if len(logicalDB) > 0:
            logicalDBKey = loadlib.verifyLogicalDB(logicalDB, lineNum,
                                                   errorFile)
        else:
            logicalDBKey = 0

        if libraryKey == 0 and len(libraryID) > 0:
            libraryKey = sourceloadlib.verifyLibraryID(libraryID, logicalDBKey,
                                                       lineNum, errorFile)

        segmentTypeKey = sourceloadlib.verifySegmentType(
            segmentType, lineNum, errorFile)
        vectorTypeKey = sourceloadlib.verifyVectorType(vectorType, lineNum,
                                                       errorFile)
        strainKey = sourceloadlib.verifyStrain(strain, lineNum, errorFile)
        tissueKey = sourceloadlib.verifyTissue(tissue, lineNum, errorFile)
        genderKey = sourceloadlib.verifyGender(gender, lineNum, errorFile)
        cellLineKey = sourceloadlib.verifyCellLine(cellLine, lineNum,
                                                   errorFile)
        ageMin, ageMax = sourceloadlib.verifyAge(age, lineNum, errorFile)
        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
        createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

        if segmentTypeKey == 0 or \
    vectorTypeKey == 0 or \
           strainKey == 0 or \
           tissueKey == 0 or \
           genderKey == 0 or \
    cellLineKey == 0 or \
    organismKey == 0 or \
           referenceKey == 0 or \
    createdByKey == 0 or \
    ageMin is None:
            # set error flag to true
            error = 1
            #	    print str(segmentTypeKey)
            #	    print str(vectorTypeKey)
            #	    print str(strainKey)
            #	    print str(tissueKey)
            #	    print str(genderKey)
            #	    print str(cellLineKey)
            #	    print str(organismKey)
            #	    print str(referenceKey)
            #	    print str(createdByKey)
            #	    print str(ageMin)
            errorFile.write('Errors:  %s\n' % (libraryName))

        # if errors, continue to next record
        if error:
            continue

        # if no errors, continue processing

        # process new library
        if libraryKey == 0:

            libraryKey = newlibraryKey
            addLibrary()

            # increment primary keys
            newlibraryKey = newlibraryKey + 1

# else, process existing library
        else:
            updateLibrary()

        addCloneCollections(cloneCollections)

    return
Esempio n. 18
0
def processFile():

    global execSQL

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
            probeID = tokens[0]
            markerIDs = string.split(tokens[1], '|')
            jnum = tokens[2]
            relationship = tokens[3]
            createdBy = tokens[4]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        probeKey = loadlib.verifyProbe(probeID, lineNum, errorFile)
        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
        createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

        if probeKey == 0:
            errorFile.write('Invalid Probe:  %s\n' % (probeID))
            error = 1

        if referenceKey == 0:
            errorFile.write('Invalid Reference:  %s\n' % (jnum))
            error = 1

        if createdByKey == 0:
            errorFile.write('Invalid Creator:  %s\n\n' % (createdBy))
            error = 1

# marker IDs

        markerList = []
        for markerID in markerIDs:

            markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile)

            if markerKey == 0:
                errorFile.write('Invalid Marker:  %s, %s\n' % (name, markerID))
                error = 1
            else:
                markerList.append(markerKey)

        # if errors, continue to next record
        if error:
            continue

        # if no errors, process

        for markerKey in markerList:
            if markerList.count(markerKey) == 1:
                markerFile.write('%s|%s|%d|%s|%s|%s|%s|%s\n' \
      % (probeKey, markerKey, referenceKey, relationship, createdByKey, createdByKey, loaddate, loaddate))
                execSQL.append(deleteSQL % (probeKey, markerKey))
            else:
                errorFile.write('Invalid Marker Duplicate:  %s, %s\n' %
                                (name, markerID))
Esempio n. 19
0
def processFile():
    # Purpose: Read the input file, resolve values to keys. Create bcp files
    # Returns: 1 if error,  else 0
    # Assumes: file descriptors have been initialized
    # Effects: exits if the line does not have 15 columns
    # Throws: Nothing

    global alleleKey, refAssocKey, accKey, noteKey, mgiKey, annotKey
    global alleleLookup, alleleMutationKey

    lineNum = 0
    # For each line in the input file

    for line in fpInputFile.readlines():

        error = 0
        lineNum = lineNum + 1
        print('%s: %s' % (lineNum, line))
        # Split the line into tokens
        tokens = line[:-1].split('\t')
        try:
            markerID = tokens[0]
            markerSymbol = tokens[1]
            mutationType = tokens[2]  # IMPC allele type
            description = tokens[3]
            colonyID = tokens[4]
            strainOfOrigin = tokens[5]
            alleleSymbol = tokens[6]
            alleleName = tokens[7]
            inheritanceMode = tokens[8]
            alleleType = tokens[9]  # IMPC allele class
            alleleSubType = tokens[10]
            alleleStatus = tokens[11]
            transmission = tokens[12]
            collection = tokens[13]
            jNum = tokens[14]
            createdBy = tokens[15]

        except:
            print('exiting with invalid line')
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        print('validating data and getting keys')
        # marker key
        markerKey = loadlib.verifyMarker(markerID, lineNum, fpErrorFile)

        # _vocab_key = 36 (Allele Molecular Mutation)
        mutationList = str.split(mutationType, ';')
        if len(mutationList) > 1:
            print('mutationList: %s' % mutationList)
        mutationKeyList = []
        for m in mutationList:
            mutationKey = loadlib.verifyTerm('', 36, m, lineNum, fpErrorFile)
            if mutationKey != 0:
                mutationKeyList.append(mutationKey)
        if len(mutationKeyList) > 1:
            print('mutationKeyList: %s' % mutationKeyList)
        # strains
        strainOfOriginKey = sourceloadlib.verifyStrain(strainOfOrigin, lineNum,
                                                       fpErrorFile)

        # _vocab_key = 35 (Allele Inheritance Mode)
        inheritanceModeKey = loadlib.verifyTerm('', 35, inheritanceMode,
                                                lineNum, fpErrorFile)

        # _vocab_key = 38 (Allele Type)
        alleleTypeKey = loadlib.verifyTerm('', 38, alleleType, lineNum,
                                           fpErrorFile)

        # _vocab_key = 93 (Allele Subtype)
        subTypeList = str.split(alleleSubType, ';')
        if len(subTypeList) > 1:
            print('subTypeList: %s' % subTypeList)
        subTypeKeyList = []
        for s in subTypeList:
            if s != '':  # if we have a subtype, get it's key
                subTypeKey = loadlib.verifyTerm('', 93, s, lineNum,
                                                fpErrorFile)
                if subTypeKey != 0:
                    subTypeKeyList.append(subTypeKey)
        if len(subTypeKeyList) > 1:
            print('subTypeKeyList: %s' % subTypeKeyList)

        # _vocab_key = 37 (Allele Status)
        alleleStatusKey = loadlib.verifyTerm('', 37, alleleStatus, lineNum,
                                             fpErrorFile)

        # _vocab_key = 61 (Allele Transmission)
        transmissionKey = loadlib.verifyTerm('', 61, transmission, lineNum,
                                             fpErrorFile)

        # _vocab_key = 92
        collectionKey = loadlib.verifyTerm('', 92, collection, lineNum,
                                           fpErrorFile)

        # _vocab_key = 73 (Marker-Allele Association Status)
        # _term_key = 4268545 (Curated)
        markerStatusKey = 4268545

        # reference
        refKey = loadlib.verifyReference(jNum, lineNum, fpErrorFile)

        # creator
        createdByKey = loadlib.verifyUser(createdBy, lineNum, fpErrorFile)
        if createdByKey == 0:
            continue

        print('checking for missing data')
        # if errors, continue to next record
        # errors are stored (via loadlib) in the .error log
        if markerKey == 0 \
                or mutationKeyList == [] \
                or strainOfOriginKey == 0 \
                or inheritanceModeKey == 0 \
                or alleleTypeKey == 0 \
                or alleleStatusKey == 0 \
                or transmissionKey == 0 \
                or collectionKey == 0 \
                or refKey == 0 \
                or createdByKey == 0:
            print('missing data, skipping this line')
            continue

        # if no errors, process the allele
        print('writing to allele file')
        # allele (isWildType = 0)
        fpAlleleFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|0|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (alleleKey, markerKey, strainOfOriginKey, inheritanceModeKey, alleleTypeKey, \
            alleleStatusKey, transmissionKey, collectionKey, alleleSymbol, alleleName, \
            isExtinct, isMixed, refKey, markerStatusKey, \
            createdByKey, createdByKey, createdByKey, loaddate, loaddate, loaddate))

        # molecular mutation
        for mutationKey in mutationKeyList:
            fpMutationFile.write('%s|%s|%s|%s|%s\n' \
                % (alleleMutationKey, alleleKey, mutationKey, loaddate, loaddate))
            alleleMutationKey += 1

        # reference associations

        # Original
        fpRefFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (refAssocKey, refKey, alleleKey, mgiTypeKey, origRefTypeKey, \
                        createdByKey, createdByKey, loaddate, loaddate))
        refAssocKey = refAssocKey + 1

        # Molecular
        fpRefFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (refAssocKey, refKey, alleleKey, mgiTypeKey, molRefTypeKey, \
                        createdByKey, createdByKey, loaddate, loaddate))
        refAssocKey = refAssocKey + 1

        # allele subtype
        for subTypeKey in subTypeKeyList:
            fpAnnotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
                    % (annotKey, annotTypeKey, alleleKey, subTypeKey, \
                            qualifierKey, loaddate, loaddate))
            annotKey = annotKey + 1

        # MGI Accession ID for the allele
        alleleID = '%s%s' % (mgiPrefix, mgiKey)
        fpAccFile.write('%s|%s|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \
            % (accKey, alleleID, mgiPrefix, mgiKey, alleleKey, mgiTypeKey, \
               createdByKey, createdByKey, loaddate, loaddate))

        # storing data in MGI_Note
        # molecular note

        fpNoteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (noteKey, alleleKey, mgiTypeKey, molecularNoteTypeKey, description,\
               createdByKey, createdByKey, loaddate, loaddate))

        noteKey = noteKey + 1

        # colony ID note
        fpNoteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (noteKey, alleleKey, mgiTypeKey, colonyIdNoteTypeKey, colonyID, \
               createdByKey, createdByKey, loaddate, loaddate))

        noteKey = noteKey + 1

        # Print out a new text file and attach the new MGI Allele IDs
        # as the last field

        fpNewAlleleRptFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' \
        % (mgi_utils.prvalue(alleleID), \
        mgi_utils.prvalue(alleleSymbol), \
        mgi_utils.prvalue(alleleName), \
        mgi_utils.prvalue(markerID), \
        mgi_utils.prvalue(markerSymbol), \
        mgi_utils.prvalue(colonyID)))

        accKey = accKey + 1
        mgiKey = mgiKey + 1
        alleleKey = alleleKey + 1

    #
    # Update the AccessionMax value
    #
    print('DEBUG: %s' % DEBUG)
    if DEBUG == 'false':
        db.sql('select * from ACC_setMax(%d)' % (lineNum), None)
        db.commit()

    return 0
Esempio n. 20
0
def processAssayFile():

    global assayAssay, assayKey, accKey, mgiKey

    lineNum = 0
    # For each line in the input file

    for line in inAssayFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], TAB)

        try:
	    assayID = tokens[0]
	    markerID = tokens[1]
	    jnum = tokens[2]
	    assayType = tokens[3]
	    reporterGene = tokens[4]
	    note = tokens[5]
	    createdBy = tokens[6]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

	markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile)
        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
	assayTypeKey = gxdloadlib.verifyAssayType(assayType, lineNum, errorFile)
	createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

        if markerKey == 0 or referenceKey == 0 or assayTypeKey == 0:
            # set error flag to true
            error = 1

        if len(reporterGene) > 0:
            reporterGeneKey = gxdloadlib.verifyReporterGene(reporterGene, lineNum, errorFile)
	    if reporterGeneKey == 0:
                error = 1
        else:
            reporterGeneKey = ''

        # if errors, continue to next record
        if error:
            continue

	if assayProbePrep.has_key(assayID):
	    probePrepKey = assayProbePrep[assayID]
	else:
	    probePrepKey = ''

        # if no errors, process

        outAssayFile.write(str(assayKey) + TAB + \
	    str(assayTypeKey) + TAB + \
	    str(referenceKey) + TAB + \
	    str(markerKey) + TAB + \
	    str(probePrepKey) + TAB + \
	    TAB + \
	    TAB + \
            str(reporterGeneKey) + TAB + \
            str(createdByKey) + TAB + \
            str(createdByKey) + TAB + \
	    loaddate + TAB + loaddate + CRT)

	if len(note) > 0:
	    i = 0
	    while i < len(note):
		outAssayNoteFile.write(str(assayKey) + TAB + \
		    note[i:i+ASSAY_NOTE_LENGTH] + TAB + \
		    loaddate + TAB + loaddate + CRT)
		i = i + ASSAY_NOTE_LENGTH

        # MGI Accession ID for the assay

	outAccFile.write(str(accKey) + TAB + \
	    mgiPrefix + str(mgiKey) + TAB + \
	    mgiPrefix + TAB + \
	    str(mgiKey) + TAB + \
	    accLogicalDBKey + TAB + \
	    str(assayKey) + TAB + \
	    assayMgiTypeKey + TAB + \
	    accPrivate + TAB + \
	    accPreferred + TAB + \
            str(createdByKey) + TAB + \
            str(createdByKey) + TAB + \
	    loaddate + TAB + loaddate + CRT)

	assayAssay[assayID] = assayKey
	accKey = accKey + 1
	mgiKey = mgiKey + 1
        assayKey = assayKey + 1

    #	end of "for line in inAssayFile.readlines():"

    return lineNum
Esempio n. 21
0
def processFile():

    global execSQL

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
	    probeID = tokens[0]
	    markerIDs = string.split(tokens[1], '|')
	    jnum = tokens[2]
	    relationship = tokens[3]
	    createdBy = tokens[4]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        probeKey = loadlib.verifyProbe(probeID, lineNum, errorFile)
        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
	createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

	if probeKey == 0:
	    errorFile.write('Invalid Probe:  %s\n' % (probeID))
	    error = 1

	if referenceKey == 0:
	    errorFile.write('Invalid Reference:  %s\n' % (jnum))
	    error = 1

	if createdByKey == 0:
	    errorFile.write('Invalid Creator:  %s\n\n' % (createdBy))
	    error = 1

	# marker IDs

	markerList = []
	for markerID in markerIDs:

	    markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile)

	    if markerKey == 0:
	        errorFile.write('Invalid Marker:  %s, %s\n' % (name, markerID))
	        error = 1
            else:
		markerList.append(markerKey)

        # if errors, continue to next record
        if error:
            continue

        # if no errors, process

	for markerKey in markerList:
	    if markerList.count(markerKey) == 1:
                markerFile.write('%s|%s|%d|%s|%s|%s|%s|%s\n' \
		    % (probeKey, markerKey, referenceKey, relationship, createdByKey, createdByKey, loaddate, loaddate))
		execSQL.append(deleteSQL % (probeKey, markerKey))
            else:
		errorFile.write('Invalid Marker Duplicate:  %s, %s\n' % (name, markerID))
Esempio n. 22
0
def processFile():

    global refKey, aliasKey

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
	    probeID = probeName = tokens[0]
	    jnum = tokens[1]
	    aliasList = string.split(tokens[2], '|')
	    createdBy = tokens[3]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

	if probeID.find('MGI:') >= 0:
            probeKey = loadlib.verifyProbe(probeID, lineNum, errorFile)
	else:
	    probeKey, probeID = verifyProbe(probeName, lineNum, errorFile)

        probeReferenceKey = verifyProbeReference(probeID, jnum, lineNum, errorFile)
        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
	createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

	if probeKey == 0:
	    errorFile.write('Invalid Probe:  %s\n' % (probeID))
	    error = 1

	if referenceKey == 0:
	    errorFile.write('Invalid Reference:  %s\n' % (jnum))
	    error = 1

	#if probeReferenceKey == 0:
	#    errorFile.write('Invalid Probe Reference:  %s, %s\n' % (probeID, jnum))
	#    error = 1

	if createdByKey == 0:
	    errorFile.write('Invalid Creator:  %s\n\n' % (createdBy))
	    error = 1

        # if errors, continue to next record
        if error:
            continue

        # if no errors, process

	# create a new probe-reference key if one does not already exist
	# else use the existing probe-reference key

        if probeReferenceKey == 0:
            refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \
		    % (refKey, probeKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate))
	    aliasrefKey = refKey
	    refKey = refKey + 1
        else:
	    #errorFile.write('Probe/Reference Already Exists: %s\n' % (tokens))
	    aliasrefKey = probeReferenceKey

        # aliases

        for alias in aliasList:

	    if len(alias) == 0:
		continue

            aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
		    % (aliasKey, aliasrefKey, alias, createdByKey, createdByKey, loaddate, loaddate))
	    aliasKey = aliasKey + 1
Esempio n. 23
0
def processFile():

    global alleleKey, refAssocKey, accKey, noteKey, mgiKey, annotKey
    global alleleLookup

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = line[:-1].split('\t')
	#print line
        try:
	    markerID = tokens[0]
	    symbol = tokens[1]
	    name = tokens[2]
	    alleleStatus = tokens[3]
	    alleleType = tokens[4]
	    alleleSubtypes = tokens[5]
	    collectionKey = tokens[6]
	    germLine = tokens[7]
	    references = tokens[8]
	    strainOfOrigin = tokens[9]
	    mutantCellLine = tokens[10]
	    molecularNotes = tokens[11]
	    driverNotes = tokens[12]
	    ikmcNotes = tokens[13]
	    mutations = tokens[14]
	    inheritanceMode = tokens[15]
	    isMixed = tokens[16]
	    isExtinct = tokens[17]
	    createdBy = tokens[18]
	    createMCL = tokens[19]
	    createNote = tokens[20]
	    setStatus = tokens[21]
	    existingAlleleID = tokens[22]
	    ikmcSymbol = tokens[23]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

	# creator
	createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)
        if createdByKey == 0:
            continue

	# processing for IKMC-only
	if len(createMCL) > 0 or len(createNote) > 0 or len(setStatus) > 0:
		processFileIKMC(createMCL, createNote, setStatus, \
			symbol, ikmcSymbol, mutantCellLine, ikmcNotes, \
			createdByKey, existingAlleleID)
		continue

	# marker key
	markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile)

	# hard-coded
	# _vocab_key = 73 (Marker-Allele Association Status)
	# _term_key = 4268545 (Curated)
	markerStatusKey = 4268545

	# _vocab_key = 37 (Allele Status)
	alleleStatusKey = loadlib.verifyTerm('', 37, alleleStatus, lineNum, errorFile)

	# _vocab_key = 38 (Allele Type)
	alleleTypeKey = loadlib.verifyTerm('', 38, alleleType, lineNum, errorFile)

	# _vocab_key = 61 (Allele Transmission)
	germLineKey = loadlib.verifyTerm('', 61, germLine, lineNum, errorFile)

	# _vocab_key = 36 (Allele Molecular Mutation)
	allMutations = mutations.split('|')

	# _vocab_key = 35 (Allele Status)
	inheritanceModeKey = loadlib.verifyTerm('', 35, inheritanceMode, lineNum, errorFile)

	# strains
	strainOfOriginKey = sourceloadlib.verifyStrain(strainOfOrigin, lineNum, errorFile)

	# reference
	refKey = loadlib.verifyReference(jnum, lineNum, errorFile)

        # if errors, continue to next record
	# errors are stored (via loadlib) in the .error log

        if markerKey == 0 \
		or markerStatusKey == 0 \
		or alleleStatusKey == 0 \
		or alleleTypeKey == 0 \
		or germLineKey == 0 \
		or allMutations == 0 \
		or inheritanceModeKey == 0 \
		or strainOfOriginKey == 0 \
		or refKey == 0 \
		or createdByKey == 0:
            continue

        # if no errors, process the allele

	# not specified/testing
	#collectionKey = 11025586

	# allele (master)
        alleleFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|0|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (alleleKey, markerKey, strainOfOriginKey, inheritanceModeKey, alleleTypeKey, \
	    alleleStatusKey, germLineKey, collectionKey, symbol, name, \
	    isExtinct, isMixed, refKey, markerStatusKey, \
	    createdByKey, createdByKey, createdByKey, loaddate, loaddate, loaddate))

	# molecular mutation
	for mutation in allMutations:
		mutationKey = loadlib.verifyTerm('', 36, mutation, lineNum, errorFile)
        	mutationFile.write('%s|%s|%s|%s\n' \
	    	% (alleleKey, mutationKey, loaddate, loaddate))

	#
	# allele references
	#
	allReferences = references.split('||')
	for reference in allReferences:
		refType, refID = reference.split('|')
		refKey = loadlib.verifyReference(refID, lineNum, errorFile)

		if refType == 'Original':
			refAssocTypeKey = 1011
		elif refType == 'Transmission':
			refAssocTypeKey = 1023
		elif refType == 'Molecular':
			refAssocTypeKey = 1012

        	refFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
	    		% (refAssocKey, refKey, alleleKey, mgiTypeKey, refAssocTypeKey, \
	       		createdByKey, createdByKey, loaddate, loaddate))
		refAssocKey = refAssocKey + 1

	#
	# allele subtypes
	#
	allSubtypes = alleleSubtypes.split('|')
	for s in allSubtypes:

		# _vocab_key = 93 (Allele Subtype)
		alleleSubtypeKey = loadlib.verifyTerm('', 93, s, lineNum, errorFile)

        	annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
                	% (annotKey, annotTypeKey, alleleKey, alleleSubtypeKey, \
        			qualifierKey, loaddate, loaddate))
		annotKey = annotKey + 1

        #
        # mutant cell line
        #
        if len(mutantCellLine) > 0:
            addMutantCellLine(alleleKey, mutantCellLine, createdByKey)

        # MGI Accession ID for the allelearker

        accFile.write('%s|%s%d|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \
            % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, alleleKey, mgiTypeKey, \
	       createdByKey, createdByKey, loaddate, loaddate))

	# storing data in MGI_Note/MGI_NoteChunk
	# molecular notes

	mgiNoteSeqNum = 1
	if len(molecularNotes) > 0:

	    noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \
		% (noteKey, alleleKey, mgiNoteObjectKey, mgiMolecularNoteTypeKey, \
		   createdByKey, createdByKey, loaddate, loaddate))

	    noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
		% (noteKey, mgiNoteSeqNum, molecularNotes, createdByKey, createdByKey, loaddate, loaddate))

	    noteKey = noteKey + 1

	# driver notes
	# TR12662/MGI_Relationship._Category_key = 1006
	# removed noteFile code
	# place hodler for MGI_Relationship code
	# the IKMC is the only product using this and IKMC does not add any driver note
	#mgiNoteSeqNum = 1
	#if len(driverNotes) > 0:

	# ikmc notes
	useIKMCnotekey = 0
	if len(ikmcNotes) > 0:

	    noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \
		% (noteKey, alleleKey, mgiNoteObjectKey, mgiIKMCNoteTypeKey, \
		   createdByKey, createdByKey, loaddate, loaddate))

	    noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
		% (noteKey, 1, ikmcNotes, createdByKey, createdByKey, loaddate, loaddate))

	    useIKMCnotekey = noteKey
	    noteKey = noteKey + 1

	# Print out a new text file and attach the new MGI Allele IDs as the last field

	if createdBy == 'ikmc_alleleload':
        	newAlleleFile.write('%s\t%s%s\t%s\n' \
	    	% (mgi_utils.prvalue(ikmcNotes), \
			mgi_utils.prvalue(mgiPrefix), mgi_utils.prvalue(mgiKey), \
			mgi_utils.prvalue(ikmcSymbol)))
	else:
        	newAlleleFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%s\n' \
	    	% (mgi_utils.prvalue(markerID), \
	       	mgi_utils.prvalue(symbol), \
	       	mgi_utils.prvalue(name), \
	       	mgi_utils.prvalue(alleleStatus), \
	       	mgi_utils.prvalue(alleleType), \
	       	mgi_utils.prvalue(alleleSubtype), \
	       	mgi_utils.prvalue(collection), \
	       	mgi_utils.prvalue(germLine), \
	       	mgi_utils.prvalue(references), \
	       	mgi_utils.prvalue(strainOfOrigin), \
	       	mgi_utils.prvalue(mutantCellLine), \
	       	mgi_utils.prvalue(allMutations), \
	       	mgi_utils.prvalue(inheritanceMode), \
	       	mgi_utils.prvalue(isMixed), \
	       	mgi_utils.prvalue(isExtinct), \
	       	mgi_utils.prvalue(refKey), \
	       	mgi_utils.prvalue(markerStatusKey), \
	       	mgi_utils.prvalue(createdBy), \
	       	mgi_utils.prvalue(mgiPrefix), mgi_utils.prvalue(mgiKey)))

	# save symbol/alleleKey/ikmc note key
	alleleLookup[symbol] = []
	alleleLookup[symbol].append((alleleKey, useIKMCnotekey, mgiPrefix + str(mgiKey)))

        accKey = accKey + 1
        mgiKey = mgiKey + 1
        alleleKey = alleleKey + 1

    #	end of "for line in inputFile.readlines():"

    #
    # Update the AccessionMax value
    #

    if not DEBUG:
        db.sql('select * from ACC_setMax(%d)' % (lineNum), None)
	db.commit()
Esempio n. 24
0
def processFile():

    global primerKey, refKey, aliasKey, accKey, mgiKey

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
            markerSymbol = tokens[0]  # not used
            markerIDs = string.split(tokens[1], '|')
            name = tokens[2]
            jnum = tokens[3]
            regionCovered = tokens[4]
            sequence1 = tokens[5]
            sequence2 = tokens[6]
            productSize = tokens[7]
            notes = tokens[8]
            sequenceIDs = tokens[9]
            aliasList = string.split(tokens[10], '|')
            createdBy = tokens[11]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

# marker IDs

        markerList = []
        for markerID in markerIDs:

            markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile)

            if len(markerID) > 0 and markerKey == 0:
                errorFile.write('Invalid Marker:  %s, %s\n' % (name, markerID))
                error = 1
            elif len(markerID) > 0:
                markerList.append(markerKey)

        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
        createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

        # sequence IDs
        seqAccList = string.split(sequenceIDs, '|')

        # if errors, continue to next record
        if error:
            continue

        # if no errors, process the primer

        primerFile.write('%d\t%s\t\t%d\t%d\t%s\t%s\t%s\t%s\t\t\t%s\t%s\t%s\t%s\t%s\n' \
            % (primerKey, name, NA, vectorKey, segmentTypeKey, mgi_utils.prvalue(sequence1), \
     mgi_utils.prvalue(sequence2), mgi_utils.prvalue(regionCovered), mgi_utils.prvalue(productSize), \
     createdByKey, createdByKey, loaddate, loaddate))

        for markerKey in markerList:
            if markerList.count(markerKey) == 1:
                markerFile.write('%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n' \
      % (primerKey, markerKey, referenceKey, relationship, createdByKey, createdByKey, loaddate, loaddate))
            else:
                errorFile.write('Invalid Marker Duplicate:  %s, %s\n' %
                                (name, markerID))

# loaddate))

        refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' %
                      (refKey, primerKey, referenceKey, createdByKey,
                       createdByKey, loaddate, loaddate))

        # aliases

        for alias in aliasList:
            if len(alias) == 0:
                continue
            aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
                    % (aliasKey, refKey, alias, createdByKey, createdByKey, loaddate, loaddate))
            aliasKey = aliasKey + 1

        # MGI Accession ID for the marker

        accFile.write('%s\t%s%d\t%s\t%s\t1\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \
            % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, primerKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate))

        newPrimerFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%d\n' \
           % (markerSymbol, string.join(markerIDs, '|'), name, jnum, regionCovered, sequence1, sequence2, productSize, notes, sequenceIDs, createdBy, mgiPrefix, mgiKey))

        accKey = accKey + 1
        mgiKey = mgiKey + 1

        # sequence accession ids
        for acc in seqAccList:

            if len(acc) == 0:
                continue

            prefixPart, numericPart = accessionlib.split_accnum(acc)
            accFile.write('%s\t%s\t%s\t%s\t%s\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \
                % (accKey, acc, prefixPart, numericPart, logicalDBKey, primerKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate))
            accRefFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' \
                % (accKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate))
            accKey = accKey + 1

# notes

        if len(notes) > 0:
            noteFile.write('%s|1\t%s\t%s\t%s\n' \
          % (primerKey, notes, loaddate, loaddate))

        refKey = refKey + 1
        primerKey = primerKey + 1

    #	end of "for line in inputFile.readlines():"

    #
    # Update the AccessionMax value
    #

    if not DEBUG:
        db.sql('select * from ACC_setMax (%d)' % (lineNum), None)
Esempio n. 25
0
def init():
	# requires: 
	#
	# effects: 
	# 1. Processes command line options
	# 2. Initializes local DBMS parameters
	# 3. Initializes global file descriptors/file names
	# 4. Initializes global keys
	#
	# returns:
	#
 
	global diagFileName, errorFileName, synFileName
	global inputFile, diagFile, errorFile, synFile
	global mgiTypeKey, createdByKey, referenceKey

	db.useOneConnection(1)
        db.set_sqlUser(user)
        db.set_sqlPasswordFromFile(passwordFileName)
 
	head, tail = os.path.split(inputFileName) 
	diagFileName = logDir + '/' + tail + '.diagnostics'
	errorFileName = logDir + '/' + tail + '.error'
	synFileName = 'MGI_Synonym.bcp'

	print inputFileName
	print logDir

	try:
		inputFile = open(inputFileName, 'r')
	except:
		exit(1, 'Could not open file %s\n' % inputFileName)
		
	try:
		diagFile = open(diagFileName, 'w')
	except:
		exit(1, 'Could not open file %s\n' % diagFileName)
		
	try:
		errorFile = open(errorFileName, 'w')
	except:
		exit(1, 'Could not open file %s\n' % errorFileName)
		
	try:
		synFile = open(outputDir + '/' + synFileName, 'w')
	except:
		exit(1, 'Could not open file %s\n' % synFileName)
		
	# Log all SQL
	db.set_sqlLogFunction(db.sqlLogAll)

	diagFile.write('Start Date/Time: %s\n' % (mgi_utils.date()))
	diagFile.write('Server: %s\n' % (db.get_sqlServer()))
	diagFile.write('Database: %s\n' % (db.get_sqlDatabase()))
	diagFile.write('Object Type: %s\n' % (mgiType))
	diagFile.write('Input File: %s\n' % (inputFileName))

	errorFile.write('Start Date/Time: %s\n\n' % (mgi_utils.date()))

	mgiTypeKey = loadlib.verifyMGIType(mgiType, 0, errorFile)
	createdByKey = loadlib.verifyUser(createdBy, 0, errorFile)

        # if reference is J:0, then no reference is given
	if jnum == 'J:0':
		referenceKey = ''
	else:
		referenceKey = loadlib.verifyReference(jnum, 0, errorFile)

        # exit if we can't resolve mgiType, createdBy or jnum
	if mgiTypeKey == 0 or \
		createdByKey == 0 or \
		referenceKey == 0:
	    exit(1)

        if mode == 'reload':
		print 'mode is: %s, deleting synonyms' % mode
		sys.stdout.flush()
        	db.sql('delete from MGI_Synonym ' + \
			'where _MGIType_key = %d ' % (mgiTypeKey) + \
			'and _CreatedBy_key = %d ' % (createdByKey), None)
Esempio n. 26
0
def processFile():

    global probeKey, refKey, aliasKey, accKey, mgiKey

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
            name = tokens[0]
            jnum = tokens[1]
            parentID = tokens[2]
            sourceName = tokens[3]
            organism = tokens[4]
            strain = tokens[5]
            tissue = tokens[6]
            gender = tokens[7]
            cellLine = tokens[8]
            age = tokens[9]
            vectorType = tokens[10]
            segmentType = tokens[11]
            regionCovered = tokens[12]
            insertSite = tokens[13]
            insertSize = tokens[14]
            markerIDs = string.split(tokens[15], '|')
            relationship = tokens[16]
            sequenceIDs = tokens[17]
            aliasList = string.split(tokens[18], '|')
            notes = tokens[19]
            rawnotes = tokens[20]
            createdBy = tokens[21]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        isParent = 0
        isSource = 0
        parentProbeKey = ''
        sourceKey = 0

        if parentID != '':
            isParent = 1

        if sourceName != '':
            isSource = 1

        if not isParent and not isSource:
            organismKey = sourceloadlib.verifyOrganism(organism, lineNum,
                                                       errorFile)
            strainKey = sourceloadlib.verifyStrain(strain, lineNum, errorFile)
            tissueKey = sourceloadlib.verifyTissue(tissue, lineNum, errorFile)
            genderKey = sourceloadlib.verifyGender(gender, lineNum, errorFile)
            cellLineKey = sourceloadlib.verifyCellLine(cellLine, lineNum,
                                                       errorFile)
            vectorKey = sourceloadlib.verifyVectorType(vectorType, lineNum,
                                                       errorFile)
            segmentTypeKey = sourceloadlib.verifySegmentType(
                segmentType, lineNum, errorFile)
            sourceKey = sourceloadlib.verifySource(segmentTypeKey, \
         vectorKey, organismKey, strainKey, \
         tissueKey, genderKey, cellLineKey, age, lineNum, errorFile)

            if organismKey == 0 or strainKey == 0 or tissueKey == 0 or \
                      genderKey == 0 or cellLineKey == 0 or vectorKey == 0 or \
                      segmentTypeKey == 0 or sourceKey == 0:
                errorFile.write('%s, %s, %s, %s, %s, %s, %s, %s\n' %
                                (segmentType, vectorType, organism, strain,
                                 tissue, gender, cellLine, age))
                error = 1

        elif not isParent and isSource:
            vectorKey = sourceloadlib.verifyVectorType(vectorType, lineNum,
                                                       errorFile)
            segmentTypeKey = sourceloadlib.verifySegmentType(
                segmentType, lineNum, errorFile)
            sourceKey = sourceloadlib.verifyLibrary(sourceName, lineNum,
                                                    errorFile)

            if vectorKey == 0 or segmentTypeKey == 0 or sourceKey == 0:
                error = 1

# parent from = yes, source given = yes or no (ignored)
        else:
            parentProbeKey, sourceKey = verifyParentProbe(
                parentID, lineNum, errorFile)
            vectorKey = sourceloadlib.verifyVectorType(vectorType, lineNum,
                                                       errorFile)
            segmentTypeKey = sourceloadlib.verifySegmentType(
                segmentType, lineNum, errorFile)

            if parentProbeKey == 0 or sourceKey == 0 or vectorKey == 0 or segmentTypeKey == 0:
                error = 1

        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
        createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

        if referenceKey == 0:
            errorFile.write('Invalid Reference:  %s\n' % (jnum))
            error = 1

        if createdByKey == 0:
            errorFile.write('Invalid Creator:  %s\n\n' % (createdBy))
            error = 1

# marker IDs

        markerList = []
        for markerID in markerIDs:

            markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile)

            if len(markerID) > 0 and markerKey == 0:
                errorFile.write('Invalid Marker:  %s, %s\n' % (name, markerID))
                error = 1
            elif len(markerID) > 0:
                markerList.append(markerKey)

# sequence IDs
        seqAccDict = {}
        for seqID in string.split(sequenceIDs, '|'):
            if len(seqID) > 0:
                [logicalDB, acc] = string.split(seqID, ':')
                logicalDBKey = loadlib.verifyLogicalDB(logicalDB, lineNum,
                                                       errorFile)
                if logicalDBKey > 0:
                    seqAccDict[acc] = logicalDBKey

        # if errors, continue to next record
        if error:
            continue

        # if no errors, process the probe

        probeFile.write('%d\t%s\t%s\t%s\t%s\t%s\t\t\t%s\t%s\t%s\t\t%s\t%s\t%s\t%s\n' \
            % (probeKey, name, parentProbeKey, sourceKey, vectorKey, segmentTypeKey, mgi_utils.prvalue(regionCovered), \
     mgi_utils.prvalue(insertSite), mgi_utils.prvalue(insertSize), createdByKey, createdByKey, loaddate, loaddate))

        for markerKey in markerList:
            if markerList.count(markerKey) == 1:
                markerFile.write('%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n' \
      % (probeKey, markerKey, referenceKey, relationship, createdByKey, createdByKey, loaddate, loaddate))
            else:
                errorFile.write('Invalid Marker Duplicate:  %s, %s\n' %
                                (name, markerID))

        refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \
  % (refKey, probeKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate))

        # aliases

        for alias in aliasList:
            if len(alias) == 0:
                continue
            aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
      % (aliasKey, refKey, alias, createdByKey, createdByKey, loaddate, loaddate))
            aliasKey = aliasKey + 1

        # MGI Accession ID for the marker

        accFile.write('%s\t%s%d\t%s\t%s\t1\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \
            % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, probeKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate))

        # Print out a new text file and attach the new MGI Probe IDs as the last field

        newProbeFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%d\n' \
     % (name, jnum, \
     mgi_utils.prvalue(sourceName), \
     organism, \
     mgi_utils.prvalue(strain), \
     mgi_utils.prvalue(tissue), \
     mgi_utils.prvalue(gender), \
     mgi_utils.prvalue(cellLine), \
     mgi_utils.prvalue(age), \
     mgi_utils.prvalue(vectorType), \
     mgi_utils.prvalue(segmentType), \
     mgi_utils.prvalue(regionCovered) + \
     mgi_utils.prvalue(insertSite), \
     mgi_utils.prvalue(insertSize), \
     string.join(markerIDs, '|'), \
     relationship, \
     mgi_utils.prvalue(sequenceIDs), \
     string.join(aliasList, '|'), \
     mgi_utils.prvalue(notes), \
     createdBy, mgiPrefix, mgiKey))

        # Print out a raw note file

        if len(rawnotes) > 0:
            rawNoteFile.write('%s%d\t%s\n' % (mgiPrefix, mgiKey, rawnotes))

# Notes

        if len(notes) > 0:
            noteFile.write('%s\t%s\t%s\t%s\n' %
                           (probeKey, notes, loaddate, loaddate))

        accKey = accKey + 1
        mgiKey = mgiKey + 1

        # sequence accession ids
        for acc in seqAccDict.keys():
            prefixPart, numericPart = accessionlib.split_accnum(acc)
            accFile.write('%s\t%s\t%s\t%s\t%s\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \
                % (accKey, acc, prefixPart, numericPart, seqAccDict[acc], probeKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate))
            accRefFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' \
                % (accKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate))
            accKey = accKey + 1

        refKey = refKey + 1
        probeKey = probeKey + 1

    #	end of "for line in inputFile.readlines():"

    #
    # Update the AccessionMax value
    #

    if not DEBUG:
        db.sql('select * from ACC_setMax (%d)' % (lineNum), None)