コード例 #1
0
ファイル: probedelete.py プロジェクト: mgijax/probeload
def processFile():

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
            probeID = tokens[0]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        probeKey = loadlib.verifyObject(probeID, mgiTypeKey, None, lineNum,
                                        errorFile)

        if probeKey == 0:
            continue

        if DEBUG:
            print deleteSQL % (probeKey)
            continue

        db.sql(deleteSQL % (probeKey), None)
コード例 #2
0
ファイル: probedelete.py プロジェクト: mgijax/probeload
def processFile():

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
	    probeID = tokens[0]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        probeKey = loadlib.verifyObject(probeID, mgiTypeKey, None, lineNum, errorFile)

	if probeKey == 0:
            continue

	if DEBUG:
	    print deleteSQL % (probeKey)
	    continue

	db.sql(deleteSQL % (probeKey), None)
コード例 #3
0
ファイル: DOpostprocess.py プロジェクト: mgijax/vocload
def processSusceptibility():

    # do formatted file
    doFileName = None
    # do file pointer
    doFile = None

    # insert statement
    INSERT_ACCESSION = '''insert into ACC_Accession 
      values ((select max(_Accession_key) + 1 from ACC_Accession), 
	   '%s', '%s', %s, 15, %s, 13, 0, 0)
    '''

    doFileName = os.environ['OBO_FILE']
    doFile = open(doFileName, 'r')

    omimIdValue = 'id: OMIM:'
    relValue = 'relationship: RO:0003304'
    skipValue = 'OMIM:000000'
    foundOMIM = 0

    for line in doFile.readlines():

        # find [Term]
        # find relationship: RO:0003304

        if line == '[Term]':
            foundOMIM = 0

        elif line[:9] == omimIdValue:
            omimId = line[4:-1]
	    if omimId == skipValue:
	        continue
	    foundOMIM = 1

        elif foundOMIM and line[:24] == relValue:

            tokens = line[25:-1].split(' ')
	    doId = tokens[0]

            prefixPart, numericPart = accessionlib.split_accnum(omimId)
            objectKey = loadlib.verifyObject(doId, 13, None, None, None)
            addSQL = INSERT_ACCESSION % (omimId, prefixPart, numericPart, objectKey)
            db.sql(addSQL, None)
    
        else:
            continue
    
    doFile.close()
    db.commit()
    return 0
コード例 #4
0
ファイル: setload.py プロジェクト: mgijax/setload
def process():

    global setKey, setMemberKey, setKey

    lineNum = 0
    sequenceNum = 1

    for line in inputFile.readlines():

        lineNum = lineNum + 1

        tokens = str.split(line[:-1], TAB)

        try:
            setMember = tokens[0]
            setLabel = tokens[1]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        objectKey = loadlib.verifyObject(setMember, mgiTypeKey, "", lineNum,
                                         errorFile)

        if objectKey == 0:
            continue

        outMemberFile.write(str(setMemberKey) + TAB + \
            str(setKey) + TAB + \
            str(objectKey) + TAB + \
            str(setLabel) + TAB + \
            str(sequenceNum) + TAB + \
            str(createdByKey) + TAB + str(createdByKey) + TAB + \
            loaddate + TAB + loaddate + CRT)

        setMemberKey = setMemberKey + 1
        sequenceNum = sequenceNum + 1

    return
コード例 #5
0
ファイル: setload.py プロジェクト: mgijax/setload
def process():

    global setKey, setMemberKey, setKey

    lineNum = 0
    sequenceNum = 1

    for line in inputFile.readlines():

	lineNum = lineNum + 1

	tokens = string.split(line[:-1], TAB)

        try:
	    setMember = tokens[0]
	    setLabel = tokens[1]
	except:
	    exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

	objectKey = loadlib.verifyObject(setMember, mgiTypeKey, "", lineNum, errorFile)

	if objectKey == 0:
	    continue

	outMemberFile.write(str(setMemberKey) + TAB + \
	    str(setKey) + TAB + \
	    str(objectKey) + TAB + \
	    str(setLabel) + TAB + \
	    str(sequenceNum) + TAB + \
	    str(createdByKey) + TAB + str(createdByKey) + TAB + \
	    loaddate + TAB + loaddate + CRT)

        setMemberKey = setMemberKey + 1
	sequenceNum = sequenceNum + 1

    return
コード例 #6
0
ファイル: referenceload.py プロジェクト: mgijax/referenceload
def processFile():
	# requires:
	#
	# effects:
	#	Reads input file
	#	Verifies and Processes each line in the input file
	#
	# returns:
	#	nothing
	#

	global refAssocKey

	lineNum = 0
	# For each line in the input file


	for line in inputFile.readlines():

		error = 0
		lineNum = lineNum + 1

		# Split the line into tokens
		tokens = string.split(line[:-1], '\t')

		try:
			accID = tokens[0]
			jnum = tokens[1]
			refAssocType = tokens[2]
			createdBy = tokens[3]
		except:
			exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

		objectKey = loadlib.verifyObject(accID, mgiTypeKey, None, lineNum, errorFile)
		referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
		refAssocTypeKey = verifyRefAssocType(refAssocType, lineNum)
		createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

		if objectKey == 0 or \
			referenceKey == 0 or \
			refAssocTypeKey == 0 or \
			createdByKey == 0:

			# set error flag to true
			error = 1

		# if errors, continue to next record
		if error:
			continue

		# if no errors, process the marker

		# could move to verifyDuplicate routine

		key = '%s:%s:%s' % (objectKey, referenceKey, refAssocTypeKey)
		if refDict.has_key(key):
		        errorFile.write('Duplicate (%d) %s\n' % (lineNum, line))
			continue

        	refFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
			% (refAssocKey, referenceKey, objectKey, mgiTypeKey, refAssocTypeKey, createdByKey, createdByKey, loaddate, loaddate))

		refAssocKey = refAssocKey + 1
コード例 #7
0
def processFile():
    '''
	# requires:
	#
	# effects:
	#	Reads input file
	#	Verifies and Processes each line in the input file
	#
	# returns:
	#	nothing
	#
	'''

    results = db.sql(
        'select maxKey = max(_Translation_key) + 1 from MGI_Translation',
        'auto')
    transKey = results[0]['maxKey']
    if transKey is None:
        transKey = 1000

    lineNum = 0

    # sequence number of bad name in translation list
    seq = 1

    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
            objectID = tokens[0]
            objectDescription = tokens[1]
            term = tokens[2]
            userID = tokens[3]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))
            continue

        if vocabKey > 0:
            objectKey = loadlib.verifyTerm(objectID, vocabKey,
                                           objectDescription, lineNum,
                                           errorFile)
        else:
            objectKey = loadlib.verifyObject(objectID, mgiTypeKey,
                                             objectDescription, lineNum,
                                             errorFile)

        userKey = loadlib.verifyUser(userID, lineNum, errorFile)

        if objectKey == 0 or userKey == 0:
            # set error flag to true
            error = 1

        # if errors, continue to next record
        if error:
            continue

        # if no errors, process

        # add term to translation file
        bcpWrite(transFile, [
            transKey, transTypeKey, objectKey, term, seq, userKey, userKey,
            loaddate, loaddate
        ])
        transKey = transKey + 1
        seq = seq + 1


#	end of "for line in inputFile.readlines():"

    if newTransType:
        bcpWrite(transTypeFile, [
            transTypeKey, mgiTypeKey, vocabKey, transTypeName,
            transCompression, 0, userKey, userKey, loaddate, loaddate
        ])
コード例 #8
0
ファイル: makeGenotype.py プロジェクト: mgijax/htmpload
def getGenotypes():

    global genotypeOrderDict
    lineNum = 0
    genotypeOrder = 1
    
    # annotations organized by order/mpID
    # 'order' indicates uniq genotype
    # key = order + '|' + mpID
    # value = list of lines
    annotDict = {}

    for line in fpHTMPInput.readlines():

	if DEBUG:
	    print '\nNEW LINE: ', line

	error = 0
	lineNum = lineNum + 1

        tokens = line[:-1].split('\t')

        # sc 2/6/2016 - a subtlety:
        # if genotypeID  remains '', the genotype is not in the database
        # if it is assigned an ID from the database, it is still written to
        # the genotypeload input file because this file is used as input to the
	# annotation load.  The genotypeload will only create
        # a genotype if the genotypeID field is ''

	genotypeID = ''

	phenotypingCenter = tokens[0]
	annotationCenter = tokens[1]

	mutantID = tokens[2]
	mutantID2 = mutantID
	mpID = tokens[3]
        alleleID = tokens[4]
	alleleID2 = alleleID
        alleleState = tokens[5]
        alleleSymbol = tokens[6]
        markerID = tokens[7]
	strainName = tokens[9]
        gender = tokens[10]
        colonyID = tokens[11]

	# marker

	if len(markerID) > 0:
            markerKey = loadlib.verifyMarker(markerID, lineNum, fpLogDiag)
        else:
	    markerKey = 0

        if markerKey == 0:
            logit = errorDisplay % (markerID, lineNum, '8', line)
            fpLogDiag.write(logit)
            fpLogCur.write(logit)
            error = 1

	if DEBUG:
	    print '    markerID: %s markerKey: %s' % (markerID, markerKey)

	# allele

	if len(alleleID) > 0:
            alleleKey = loadlib.verifyObject(alleleID, 11, None, lineNum, fpLogDiag)
        else:
	    alleleKey = 0

        if alleleKey == 0:
            logit = errorDisplay % (alleleID, lineNum, '5', line)
            fpLogDiag.write(logit)
            fpLogCur.write(logit)
            error = 1

	if DEBUG:
	    print '    alleleID: %s alleleKey: %s' % (alleleID, alleleKey)

	# mutant

	if len(mutantID) > 0:
           mutantKey = alleleloadlib.verifyMutnatCellLine(mutantID, lineNum, fpLogDiag)
           mutantKey2 = mutantKey
           mutantSQL = mutantSQL2 = '='

        else:
	    mutantSQL = 'is'
	    mutantKey = 'null'

	#
	# if the MCL in the input file does not match the Allele/MCL association in MGD,
	# (i.e. the mutantKey returned from the alleleloadlib lookup is null),
	# then add the Genotype with null MCLs (see TR12508).
	#
        if mutantKey == 0:
	    mutantID = ''
	    mutantID2 = ''
            #logit = errorDisplay % (mutantID, lineNum, '3', line)
            #fpLogDiag.write(logit)
            #fpLogCur.write(logit)
            #error = 1

	if DEBUG:
	    print '    mutantID: %s mutantKey: %s' % (mutantID, mutantKey)

	# strain should have been added by the previous makeStrains.sh 
	# wrapper but in case it was not...
 
        strainID = ''
        strainKey = 0

	# NS strain does not have colony ID, so don't check
	if strainName == 'Not Specified':
	    results = db.sql(''' select * from strains where strain = '%s' ''' % strainName, 'auto')
	else:
	    results = db.sql(''' select * from strains where strain = '%s' and colonyID like'%%%s%%' ''' % (strainName, colonyID), 'auto')

	for r in results:
	   strainID = r['strainID']
	   strainKey = r['_Strain_key']

	if strainKey == 0:
	    logit = errorDisplay % (strainName + '|' + colonyID, lineNum, '10', line)
	    fpLogDiag.write(logit)
	    fpLogCur.write(logit)
	if DEBUG:
	    print '    strainName: %s strainID %s strainKey: %s\n' % (strainName, strainID, strainKey)

	# if allele is Heterzygous, then marker must have a wild-type allele
        if alleleState == 'Heterozygous':

	    if DEBUG:
		print '    if allele is Heterzygous, then marker must have a wild-type allele, get it'
	    #
	    # for heterzygous, allele 2 = the wild type allele 
	    #    (marker symbol + '<+>')
	    # find the wild type allele accession id
	    #

	    querySQL = '''
		select awt.accID
			from ALL_Allele wt, ACC_Accession awt
			where wt._Marker_key = %s
			and wt.name = 'wild type'
			and wt._Allele_key = awt._Object_key
		        and awt._MGIType_key = 11
		        and awt._LogicalDB_key = 1
		        and awt.preferred = 1
		''' % (markerKey)

	    if DEBUG:
		print querySQL

	    results = db.sql(querySQL, 'auto')
	    for r in results:
		# found the wild type, so set it
		alleleID2 = r['accID']
		mutantID2 = ''

	    if DEBUG:
		print '    found wild type and alleleID2: %s mutantID2: %s' % (alleleID2, mutantID2)

	    if alleleID == alleleID2:
                logit = errorDisplay % (markerID, lineNum, '8', line)
	        logit = logit + 'no wild type allele exists for this marker'
                fpLogDiag.write(logit)
                fpLogCur.write(logit)
                error = 1

        # if error, continue to next line
        if error:
	    fpHTMPError.write(line)
            continue

	#
	# check alleleState
	#

	if DEBUG:
	    print '\n    Check AlleleState:'

        if alleleState == 'Homozygous':

	    if DEBUG:
		print '    Homozygous : querying to find genotype'

	    querySQL = '''
		select g.accID
			from genotypes g
			where g._Marker_key = %s
			and g._Allele_key_1 = %s
			and g._Allele_key_2 = %s
			and g._MutantCellLine_key_1 %s %s
			and g._MutantCellLine_key_2 %s %s
			and g.term = '%s'
			and g._Strain_key = %s
		''' % (markerKey, alleleKey, alleleKey, mutantSQL, mutantKey, mutantSQL, mutantKey, alleleState, strainKey)

	    if DEBUG:
		print querySQL

	    results = db.sql(querySQL, 'auto')

	    if len(results) > 1:
		if DEBUG:
		    print '    More than one genotype - last one wins'
		    print '    %s' % results

	    for r in results:
		genotypeID = r['accID']

	    if DEBUG:
		print '    genotypeID: %s' % genotypeID

        elif alleleState == 'Heterozygous':

	    #
	    # for heterzygous, allele 2 = the wild type allele 
	    #   (marker symbol + '<+>')
	    # find the wild type allele accession id
	    #

	    if DEBUG:
		print '    Heterozygous : querying to find genotype'

	    querySQL = '''
		select g.accID
			from genotypes g
			where g._Marker_key = %s
			and g._Allele_key_1 = %s
			and g._Allele_key_2 != %s
			and g._MutantCellLine_key_1 %s %s
			and g._MutantCellLine_key_2 is null
			and g.term = '%s'
			and g._Strain_key = %s
		''' % (markerKey, alleleKey, alleleKey, mutantSQL, mutantKey, alleleState, strainKey)

	    if DEBUG:
		print querySQL

	    results = db.sql(querySQL, 'auto')

	    if len(results) > 1:
		if DEBUG:
		    print '    More than one genotype - last one wins'
		    print '    %s' % results

	    for r in results:
		genotypeID = r['accID']

	    if DEBUG:
		print '    genotypeID: %s' % genotypeID

	elif alleleState in ('Hemizygous', 'Indeterminate'):

	    if DEBUG:
		print '    querying to find genotype : ', alleleState

	    alleleID2 = ''
	    mutantID2 = ''

	    if alleleState == 'Hemizygous':

	        querySQL = '''
		    select chromosome 
			from MRK_Marker 
			where _Marker_key = %s''' % markerKey

	        results = db.sql(querySQL, 'auto')

	        for r in results:

		    if r['chromosome'] == 'X':
		        alleleState = 'Hemizygous X-linked'
		        if DEBUG:
		            print '    ', alleleState

		    elif r['chromosome'] == 'Y':
		        alleleState = 'Hemizygous Y-linked'
		        if DEBUG:
		            print '    ', alleleState

		    else:
            		logit = errorDisplay % (alleleState, lineNum, '6', line)
			logit = logit + 'pair state %s does not match chromosome %s' % (alleleState, r['chromosome'])
			if DEBUG:
			    print '    ', logit

            		fpLogDiag.write(logit)
            		fpLogCur.write(logit)
	    		error = 1
			break

	    querySQL = '''
		select g.accID
			from genotypes g
			where g._Marker_key = %s
			and g._Allele_key_1 = %s
			and g._Allele_key_2 is null
			and g._MutantCellLine_key_1 %s %s
			and g._MutantCellLine_key_2 is null
			and g.term = '%s'
			and g._Strain_key = %s
		''' % (markerKey, alleleKey, mutantSQL, mutantKey, alleleState, strainKey)
	    
	    if DEBUG:
		print querySQL

	    results = db.sql(querySQL, 'auto')

	    if len(results) > 1:
		if DEBUG:
		    print '    More than one genotype - last one wins'
		    print '    %s' % results

	    for r in results:
		genotypeID = r['accID']

	    if DEBUG:
		print '    genotypeID: %s' % genotypeID

	else:
            logit = errorDisplay % (alleleState, lineNum, '6', line)

	    if DEBUG:
		print '    logging error:'
		print '    ' + errorDisplay % (alleleState, lineNum, '6', line)

            fpLogDiag.write(logit)
            fpLogCur.write(logit)
	    error = 1

        # if error, continue to next line
        if error:
	    fpHTMPError.write(line)
            continue

	#
	# check genotype unique-ness
	#
	# duplicate genotypes WITHIN the input file, doesn't mean the genotype
        # isn't in the database

	dupGeno = 0
	useOrder = str(genotypeOrder)

	if DEBUG:
	    print '    check genotype uniqueness'

	#
	# set uniqueness
	# isConditional is always 0, so we do not need to specify this value
	#
	key = str(markerKey) + str(alleleKey) + str(alleleState) + str(strainKey) + str(mutantKey)

	if DEBUG:
	    print '    unique key is: %s' % key

	if genotypeOrderDict.has_key(key):
	    dupGeno = 1
	    useOrder = str(genotypeOrderDict[key])
	    if DEBUG:
		print '    duplicate genotype and order is: %s' % useOrder

	# uniq genotype/mpID key
	currentMP = useOrder + '|' + mpID

	#### new code HDP-2 US161 support TR11792 ####
	# add line to dictionary by currentMP key for later processing
	if not annotDict.has_key(currentMP):
	    annotDict[currentMP] = []
	annotDict[currentMP].append(line)

	if dupGeno:
	    fpHTMPDup.write(line)
	    continue

	#
	# save genotype order
	#
	if DEBUG:
	    print '    saving genotype order genotypeOrderDict[%s] = %s' % (key, genotypeOrder)
        genotypeOrderDict[key] = genotypeOrder

	#
	# add to genotype mgi-format file
	#

	if DEBUG:
	    print '    writing genotype to  genotype file'

	fpGenotype.write(genotypeLine % (\
		genotypeOrder, genotypeID, strainID, strainName, \
		markerID, alleleID, mutantID, alleleID2, mutantID2, \
		conditional, existsAs, generalNote, privateNote, alleleState, \
		compound, createdBy))

	genotypeOrder = genotypeOrder + 1

    #### new code HDP-2 US161 support TR11792 ####
    # iterate through annotDict

    for key in annotDict.keys():
	order, mpID = key.split('|')
	lineList = annotDict[key]
	genderSet = set([])

	# get the gender for each line and add to the set
	for line in lineList:
	    tokens = line.split('\t')
	    genderSet.add(tokens[10])

	# if multi lines, the only difference is gender
	# just get the last (or only) line in the list; prepend the order number
	line = order + '\t' + line

	# if there are multi gender values in the set, update line to 'Both'
	if len(genderSet) > 1:
	    # Don't bother to look at values. If already 'Both', we're golden
	    # otherwise just update the line to 'Both'
	    line = line.replace('Male', 'Both')
	    line = line.replace('Female', 'Both')

	# now write out the line
	fpHTMP.write(line)

    return 0
コード例 #9
0
ファイル: referenceload.py プロジェクト: mgijax/referenceload
def processFile():
    # requires:
    #
    # effects:
    #	Reads input file
    #	Verifies and Processes each line in the input file
    #
    # returns:
    #	nothing
    #

    global refAssocKey

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
            accID = tokens[0]
            jnum = tokens[1]
            refAssocType = tokens[2]
            createdBy = tokens[3]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        objectKey = loadlib.verifyObject(accID, mgiTypeKey, None, lineNum,
                                         errorFile)
        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
        refAssocTypeKey = verifyRefAssocType(refAssocType, lineNum)
        createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

        if objectKey == 0 or \
         referenceKey == 0 or \
         refAssocTypeKey == 0 or \
         createdByKey == 0:

            # set error flag to true
            error = 1

        # if errors, continue to next record
        if error:
            continue

        # if no errors, process the marker

        # could move to verifyDuplicate routine

        key = '%s:%s:%s' % (objectKey, referenceKey, refAssocTypeKey)
        if refDict.has_key(key):
            errorFile.write('Duplicate (%d) %s\n' % (lineNum, line))
            continue

        refFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
  % (refAssocKey, referenceKey, objectKey, mgiTypeKey, refAssocTypeKey, createdByKey, createdByKey, loaddate, loaddate))

        refAssocKey = refAssocKey + 1
コード例 #10
0
ファイル: strainload.py プロジェクト: mgijax/strainload
def processFile():

    global strainKey, strainmarkerKey, accKey, mgiKey, annotKey, noteKey

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = line[:-1].split('\t')

        try:
	    id = tokens[0]
	    externalPrefix = id
	    externalNumeric = ''
	    #(externalPrefix, externalNumeric) = id.split(':')
	    name = tokens[1]
	    alleleIDs = tokens[2]
	    strainType = tokens[3]
	    species = tokens[4]
	    isStandard = tokens[5]
	    sooNote = tokens[6]
	    externalLDB = tokens[7]
            externalTypeKey = tokens[8]
	    annotations = tokens[9]
	    createdBy = tokens[10]
	    mutantNote = tokens[11]
	    isPrivate = tokens[12]
	    impcColonyNote = tokens[13]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

	strainExistKey = verifyStrain(name, lineNum)
	strainTypeKey = verifyStrainType(strainType, lineNum)
	speciesKey = verifySpecies(species, lineNum)
	createdByKey = loadlib.verifyUser(createdBy, 0, errorFile)

        if strainExistKey > 0 or strainTypeKey == 0 or speciesKey == 0 or createdByKey == 0:
            # set error flag to true
            error = 1

        # if errors, continue to next record
        if error:
            continue

        # if no errors, process

        strainFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (strainKey, speciesKey, strainTypeKey, name, isStandard, isPrivate, isGeneticBackground,
	       createdByKey, createdByKey, cdate, cdate))

	# if Allele found, resolve to Marker

	if len(alleleIDs) > 0:
	    allAlleles = alleleIDs.split('|')
	    for a in allAlleles:
		alleleKey = loadlib.verifyObject(a, alleleTypeKey, None, lineNum, errorFile)
		if alleleKey == 0:
		    continue
	    	results = db.sql('select _Marker_key from ALL_Allele where _Allele_key = %s' % (alleleKey),  'auto')
		markerKey = results[0]['_Marker_key']

		markerFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
	    		% (strainmarkerKey, strainKey, markerKey, alleleKey, qualifierKey, 
	       		createdByKey, createdByKey, cdate, cdate))
		strainmarkerKey = strainmarkerKey + 1

        # MGI Accession ID for all strain

        accFile.write('%d|%s%d|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \
        	% (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, strainKey, mgiTypeKey, 
		createdByKey, createdByKey, cdate, cdate))
        accKey = accKey + 1

        # external accession id
        # % (accKey, id, '', id, externalLDB, strainKey, externalTypeKey, 
	#for ids that contain prefix:numeric
        accFile.write('%d|%s|%s|%s|%s|%s|%s|0|1|%s|%s|%s|%s\n' \
          % (accKey, id, externalPrefix, externalNumeric, externalLDB, strainKey, externalTypeKey, 
	     createdByKey, createdByKey, cdate, cdate))
        accKey = accKey + 1

        # storing data in MGI_Note/MGI_NoteChunk
        # Strain of Origin Note

        if len(sooNote) > 0:

            noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, strainKey, mgiNoteObjectKey, mgiStrainOriginTypeKey, \
                   createdByKey, createdByKey, cdate, cdate))

            noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, 1, sooNote, createdByKey, createdByKey, cdate, cdate))

            noteKey = noteKey + 1

        # storing data in MGI_Note/MGI_NoteChunk
        # Mutant Cell Line of Origin Note

        if len(mutantNote) > 0:

            noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, strainKey, mgiNoteObjectKey, mgiMutantOriginTypeKey, \
                   createdByKey, createdByKey, cdate, cdate))

            if len(mutantNote) > 0:
                noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
                    % (noteKey, 1, mutantNote, createdByKey, createdByKey, cdate, cdate))

            noteKey = noteKey + 1

        # storing data in MGI_Note/MGI_NoteChunk
        # IMPC Colony Note

        if len(impcColonyNote) > 0:

            noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, strainKey, mgiNoteObjectKey, mgiIMPCColonyTypeKey, \
                   createdByKey, createdByKey, cdate, cdate))

            noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, 1, sooNote, createdByKey, createdByKey, cdate, cdate))

            noteKey = noteKey + 1

	#
        # Annotations
        #
	# _AnnotType_key = 1009
	# _Qualifier_ke = 1614158
	#

	if len(annotations) > 0:
	    annotations = annotations.split('|')
	    for a in annotations:

	        # strain annotation type
	        annotTypeKey = 1009

	        # this is a null qualifier key
	        annotQualifierKey = 1614158

	        annotTermKey = loadlib.verifyTerm('', 27, a, lineNum, errorFile)
	        if annotTermKey == 0:
		    continue
    
                annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
                  % (annotKey, annotTypeKey, strainKey, annotTermKey, annotQualifierKey, cdate, cdate))
                annotKey = annotKey + 1

        mgiKey = mgiKey + 1
        strainKey = strainKey + 1

    #	end of "for line in inputFile.readlines():"

    #
    # Update the AccessionMax value
    #

    db.sql('select * from ACC_setMax (%d)' % (lineNum), None)
    db.commit()

    # update prb_strain_marker_seq auto-sequence
    db.sql(''' select setval('prb_strain_marker_seq', (select max(_StrainMarker_key) from PRB_Strain_Marker)) ''', None)
    db.commit()

    # update voc_annot_seq auto-sequence
    db.sql(''' select setval('voc_annot_seq', (select max(_Annot_key) from VOC_Annot)) ''', None)
    db.commit()
コード例 #11
0
ファイル: makeStrains.py プロジェクト: mgijax/htmpload
def processFile():

    global strainKey, strainmarkerKey, accKey, mgiKey, annotKey, noteKey

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        lineNum = lineNum + 1
	#print line
        # Split the line into tokens
        tokens = line[:-1].split('\t')

        try:
	    name = tokens[0]
	    alleleIDs = tokens[1]
	    strainType = tokens[2]
	    species = tokens[3]
	    isStandard = tokens[4]
	    createdBy = tokens[5]
	    mutantNote = tokens[6]
	    colonyNote = tokens[7]
	    annotations = tokens[8].split('|')
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

	strainExistKey = verifyStrain(name, lineNum)
	strainTypeKey = verifyStrainType(strainType, lineNum)
	speciesKey = verifySpecies(species, lineNum)
	createdByKey = loadlib.verifyUser(createdBy, 0, errorFile)

	# if the strain exist, but with no colony id note, create one
	if strainExistKey > 0:
	    print 'strain in database checking colony note : %s' % line
	    if (not checkColonyNote(strainExistKey) ):
		#print 'colony note not in the database: %s' % colonyNote
		createNote(strainExistKey, colonyNote, mgiColonyNoteTypeKey, createdByKey)
	    else:
		print 'colony note in database: %s'  % colonyNote
	    continue
	else: 
	    print 'strain not in database : %s' % line

	# if strain does not exist and  verification failed on strain type, 
	# species or createdBy, skip the record
        if strainTypeKey == 0 or speciesKey == 0 \
		or createdByKey == 0:
	    #print 'verification failed on strain type, species or createdBy: %s %s %s ' % (strainTypeKey, speciesKey, createdByKey)
            continue

        # if no errors, process
        strainFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (strainKey, speciesKey, strainTypeKey, name, isStandard, 
		isPrivate, isGeneticBackground, createdByKey, createdByKey, 
		    cdate, cdate))

	# if Allele found, resolve to Marker
	allAlleles = alleleIDs.split('|')

	for a in allAlleles:
		alleleKey = loadlib.verifyObject(a, alleleTypeKey, None, lineNum, errorFile)
		#print 'makeStrains.py allele: %s marker key: %s' % (a, alleleKey)
	    	results = db.sql('select _Marker_key from ALL_Allele where _Allele_key = %s' % (alleleKey),  'auto')
		markerKey = results[0]['_Marker_key']

		markerFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
		    % (strainmarkerKey, strainKey, markerKey, alleleKey, 
			qualifierKey, createdByKey, createdByKey, cdate, cdate))
		strainmarkerKey = strainmarkerKey + 1

        # MGI Accession ID for the strain
	if isStandard == '1':
	    accFile.write('%d|%s%d|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \
	    % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, strainKey, mgiTypeKey, 
	       createdByKey, createdByKey, cdate, cdate))
	    accKey = accKey + 1

        # storing data in MGI_Note/MGI_NoteChunk
        # Colony ID Note

        if len(colonyNote) > 0:
	    createNote(strainKey, colonyNote, mgiColonyNoteTypeKey, createdByKey)

        # storing data in MGI_Note/MGI_NoteChunk
        # Mutant Cell Line of Origin Note
        if len(mutantNote) > 0:
	    createNote(strainKey, mutantNote, mgiMutOrigNoteTypeKey, createdByKey)

	#
        # Annotations
        #
	# _AnnotType_key = 1009 =  "Strain/Attributes"
	# _Qualifier_key = 1614158 =  null
	#

	for a in annotations:

	    # strain annotation type
	    annotTypeKey = 1009

	    # this is a null qualifier key
	    annotQualifierKey = 1614158

	    annotTermKey = loadlib.verifyTerm('', 27, a, lineNum, errorFile)
	    if annotTermKey == 0:
		continue

            annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
              % (annotKey, annotTypeKey, strainKey, annotTermKey, annotQualifierKey, cdate, cdate))
            annotKey = annotKey + 1

        mgiKey = mgiKey + 1
        strainKey = strainKey + 1

    #	end of "for line in inputFile.readlines():"

    #
    # Update the AccessionMax value
    #

    if not DEBUG:
        db.sql('select * from ACC_setMax (%d)' % (lineNum), None)
コード例 #12
0
def processFile():
	'''
	# requires:
	#
	# effects:
	#	Reads input file
	#	Verifies and Processes each line in the input file
	#
	# returns:
	#	nothing
	#
	'''

	results = db.sql('select maxKey = max(_Translation_key) + 1 from MGI_Translation', 'auto')
	transKey = results[0]['maxKey']
	if transKey is None:
		transKey = 1000

	lineNum = 0

	# sequence number of bad name in translation list
	seq = 1

	# For each line in the input file

	for line in inputFile.readlines():

		error = 0
		lineNum = lineNum + 1

		# Split the line into tokens
		tokens = string.split(line[:-1], '\t')

		try:
			objectID = tokens[0]
			objectDescription = tokens[1]
			term = tokens[2]
			userID = tokens[3]
		except:
			exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))
			continue

		if vocabKey > 0:
		    objectKey = loadlib.verifyTerm(objectID, vocabKey, objectDescription, lineNum, errorFile)
		else:
		    objectKey = loadlib.verifyObject(objectID, mgiTypeKey, objectDescription, lineNum, errorFile)

		userKey = loadlib.verifyUser(userID, lineNum, errorFile)

		if objectKey == 0 or userKey == 0:
			# set error flag to true
			error = 1

		# if errors, continue to next record
		if error:
			continue

		# if no errors, process

		# add term to translation file
		bcpWrite(transFile, [transKey, transTypeKey, objectKey, term, seq, userKey, userKey, loaddate, loaddate])
		transKey = transKey + 1
		seq = seq + 1

#	end of "for line in inputFile.readlines():"

	if newTransType:
		bcpWrite(transTypeFile, [transTypeKey, mgiTypeKey, vocabKey, transTypeName, transCompression, 0, userKey, userKey, loaddate, loaddate])
コード例 #13
0
ファイル: probeassay.py プロジェクト: mgijax/probeload
def processFile():

    global refKey, aliasKey
    global execProbeSQL
    global execAssaySQL
    global execRefSQL

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
            fromID = tokens[0]
            name = tokens[1]
            toID = tokens[2]
            jnum = tokens[3]
            createdBy = tokens[4]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        fromKey = loadlib.verifyObject(fromID, mgiTypeKey, None, lineNum,
                                       errorFile)
        toKey = loadlib.verifyObject(toID, mgiTypeKey, None, lineNum,
                                     errorFile)
        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
        createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

        if fromKey == 0:
            errorFile.write('Invalid Probe "From":  %s\n' % (fromID))
            error = 1

        if toKey == 0:
            errorFile.write('Invalid Probe "To":  %s\n' % (toID))
            error = 1

        if referenceKey == 0:
            errorFile.write('Invalid Reference:  %s\n' % (jnum))
            error = 1

        if createdByKey == 0:
            errorFile.write('Invalid Creator:  %s\n\n' % (createdBy))
            error = 1

        # check that all genes are the same
        checkGenesSQL = '''
			select f.*
			from PRB_Marker f, PRB_Marker t, GXD_ProbePrep p, GXD_Assay a
			where f._Probe_key = %s
			and t._Probe_key = %s
			and p._Probe_key = %s
			and p._ProbePrep_key = a._ProbePrep_key
			and f._Marker_key = t._Marker_key
			and f._Marker_key = a._Marker_key
			''' % (fromKey, toKey, fromKey)

        checkGenes = db.sql(checkGenesSQL, 'auto')
        if len(checkGenes) == 0:
            errorFile.write(
                'Gene of GenePaint, Eurexpress and Assay are not the same:  %s, %s\n'
                % (fromID, toID))
            error = 1

        # check that the J: is on at least one Assay
        checkJAssaySQL = '''
			 select a.*
			 from GXD_ProbePrep p, GXD_Assay a
			 where p._Probe_key = %s
			 and p._ProbePrep_key = a._ProbePrep_key
			 and a._Refs_key = %s
			 ''' % (fromKey, referenceKey)

        checkJAssay = db.sql(checkJAssaySQL, 'auto')
        if len(checkJAssay) == 0:
            errorFile.write(
                'J: is not on any Assays attached to the probe:  %s\n' %
                (fromID))
            error = 1

    # if errors, continue to next record
        if error:
            continue

        # add alias using fromID name (from) to toID

        refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \
         % (refKey, toKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate))
        aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
         % (aliasKey, refKey, name, createdByKey, createdByKey, loaddate, loaddate))
        refKey = refKey + 1
        aliasKey = aliasKey + 1

        # move assay information from fromID to toID
        execAssaySQL.append(updateAssaySQL % (toKey, fromKey))

        # move fromID (from) references to toID
        execRefSQL.append(updateRefSQL % (toKey, fromKey, referenceKey))

        # delete fromID (from)
        execProbeSQL.append(deleteProbeSQL % (fromKey))
コード例 #14
0
ファイル: synonymload.py プロジェクト: mgijax/synonymload
def processFile():
	# requires:
	#
	# effects:
	#	Reads input file
	#	Verifies and Processes each line in the input file
	#
	# returns:
	#	nothing
	#

	global synKey
        mgiIdsWithSynonyms = synDict.keys()

	lineNum = 0

	# For each line in the input file

	for line in inputFile.readlines():

		error = 0
		lineNum = lineNum + 1

		# Split the line into tokens
		tokens = string.split(line[:-1], '\t')

		try:
			accID = tokens[0]
			synonym = tokens[1]
			synType = tokens[2]
		except:
			exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

		objectKey = loadlib.verifyObject(accID, mgiTypeKey, None, lineNum, errorFile)

		if accID in mgiIdsWithSynonyms:
		    if synonym in synDict[accID]:
			errorFile.write('Duplicate synonym: %s for %s\n' % (synonym, accID))
			continue

		synTypeKey = verifySynonymType(synType, lineNum)

		if len(synonym) == 0:
		    errorFile.write('Invalid Synonym:Empty (%d) %s\n' % (lineNum, synonym))

		if objectKey == 0 or \
			synTypeKey == 0 or \
			len(synonym) == 0:

			# set error flag to true
			error = 1

		# if errors, continue to next record
		if error:
			continue

		# if no errors, process

		synFile.write('%d|%d|%d|%d|%s|%s|%s|%s|%s|%s\n' \
			% (synKey, objectKey, mgiTypeKey, synTypeKey, referenceKey, synonym, createdByKey, createdByKey, loaddate, loaddate))
		synKey = synKey + 1
コード例 #15
0
ファイル: strainalleleload.py プロジェクト: mgijax/strainload
def processFile():
        # requires:
        #
        # effects:
        #       Reads input file
        #       Verifies and Processes each line in the input file
        #
        # returns:
        #       nothing
        #

    global strainalleleKey

    lineNum = 0
    notDeleted = 1

    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
	    strainID = tokens[0]
	    alleleID = tokens[1]
	    qualifier = tokens[2]
	    createdBy = tokens[3]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

	if len(strainID) == 4:
	    strainID = '00' + strainID
	if len(strainID) == 3:
	    strainID = '000' + strainID
	if len(strainID) == 2:
	    strainID = '0000' + strainID
	if len(strainID) == 1:
	    strainID = '00000' + strainID

	strainKey = loadlib.verifyObject(strainID, strainTypeKey, None, lineNum, errorFile)

	# this could generate an error because the ID is a marker, not an allele
	# just ignore the error in the error file if it gets resolved later
	alleleKey = loadlib.verifyObject(alleleID, alleleTypeKey, None, lineNum, errorFile)
	markerKey = 0

	if alleleKey == 0:
	    markerKey = loadlib.verifyObject(alleleID, markerTypeKey, None, lineNum, errorFile)

	qualifierKey = verifyQualifier(qualifier, lineNum)
	createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

	if notDeleted:
	    db.sql('delete PRB_Strain_Marker where _CreatedBy_key = %s' % (createdByKey), None)
	    notDeleted = 0

	# if Allele found, resolve to Marker

	if alleleKey > 0:
	    results = db.sql('select _Marker_key from ALL_Allele where _Allele_key = %s' % (alleleKey),  'auto')
	    if len(results) > 0:
		markerKey = results[0]['_Marker_key']

        elif markerKey == 0:
	    errorFile.write('Invalid Allele (%s): %s\n' % (lineNum, alleleID))
	    error = 1

        if strainKey == 0 or markerKey == 0 or qualifierKey == 0:
            # set error flag to true
            error = 1

        # if errors, continue to next record
        if error:
            continue

        # if no errors, process

	if alleleKey == 0:
	    alleleKey = ''

        strainFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (strainalleleKey, strainKey, markerKey, alleleKey, qualifierKey, createdByKey, createdByKey, loaddate, loaddate))

        strainalleleKey = strainalleleKey + 1

    #	end of "for line in inputFile.readlines():"

    #
    # Update the AccessionMax value
    #

    db.sql('select * from ACC_setMax (%d);' % (lineNum), None)
    db.commit()

    # update prb_strain_marker_seq auto-sequence
    db.sql(''' select setval('prb_strain_marker_seq', (select max(_StrainMarker_key) from PRB_Strain_Marker)) ''', None)
    db.commit()
コード例 #16
0
ファイル: probeassay.py プロジェクト: mgijax/probeload
def processFile():

    global refKey, aliasKey
    global execProbeSQL
    global execAssaySQL
    global execRefSQL

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

	error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
	    fromID = tokens[0]
	    name = tokens[1]
	    toID = tokens[2]
	    jnum = tokens[3]
	    createdBy = tokens[4]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        fromKey = loadlib.verifyObject(fromID, mgiTypeKey, None, lineNum, errorFile)
        toKey = loadlib.verifyObject(toID, mgiTypeKey, None, lineNum, errorFile)
	referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
	createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

	if fromKey == 0:
            errorFile.write('Invalid Probe "From":  %s\n' % (fromID))
            error = 1

	if toKey == 0:
            errorFile.write('Invalid Probe "To":  %s\n' % (toID))
            error = 1

        if referenceKey == 0:
            errorFile.write('Invalid Reference:  %s\n' % (jnum))
            error = 1

        if createdByKey == 0:
            errorFile.write('Invalid Creator:  %s\n\n' % (createdBy))
            error = 1

	# check that all genes are the same
	checkGenesSQL = '''
			select f.*
			from PRB_Marker f, PRB_Marker t, GXD_ProbePrep p, GXD_Assay a
			where f._Probe_key = %s
			and t._Probe_key = %s
			and p._Probe_key = %s
			and p._ProbePrep_key = a._ProbePrep_key
			and f._Marker_key = t._Marker_key
			and f._Marker_key = a._Marker_key
			''' % (fromKey, toKey, fromKey)

	checkGenes = db.sql(checkGenesSQL, 'auto')
        if len(checkGenes) == 0:
            errorFile.write('Gene of GenePaint, Eurexpress and Assay are not the same:  %s, %s\n' % (fromID, toID))
            error = 1

	# check that the J: is on at least one Assay
	checkJAssaySQL = '''
			 select a.*
			 from GXD_ProbePrep p, GXD_Assay a
			 where p._Probe_key = %s
			 and p._ProbePrep_key = a._ProbePrep_key
			 and a._Refs_key = %s
			 ''' % (fromKey, referenceKey)

	checkJAssay = db.sql(checkJAssaySQL, 'auto')
        if len(checkJAssay) == 0:
            errorFile.write('J: is not on any Assays attached to the probe:  %s\n' % (fromID))
            error = 1

        # if errors, continue to next record
        if error:
            continue

	# add alias using fromID name (from) to toID

        refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \
        	% (refKey, toKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate))
        aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
        	% (aliasKey, refKey, name, createdByKey, createdByKey, loaddate, loaddate))
        refKey = refKey + 1
        aliasKey = aliasKey + 1

	# move assay information from fromID to toID
	execAssaySQL.append(updateAssaySQL % (toKey, fromKey))

	# move fromID (from) references to toID
	execRefSQL.append(updateRefSQL % (toKey, fromKey, referenceKey))

	# delete fromID (from)
	execProbeSQL.append(deleteProbeSQL % (fromKey))
コード例 #17
0
def processFile():

    global lineNum
    global strainKey, strainmarkerKey, accKey, mgiKey, annotKey, noteKey

    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = line[:-1].split('\t')

        try:
            id = tokens[0]
            externalPrefix = id
            externalNumeric = ''
            #(externalPrefix, externalNumeric) = id.split(':')
            name = tokens[1]
            alleleIDs = tokens[2]
            strainType = tokens[3]
            species = tokens[4]
            isStandard = tokens[5]
            sooNote = tokens[6]
            externalLDB = tokens[7]
            externalTypeKey = tokens[8]
            annotations = tokens[9]
            createdBy = tokens[10]
            mutantNote = tokens[11]
            isPrivate = tokens[12]
            impcColonyNote = tokens[13]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        strainExistKey = verifyStrain(name, lineNum)
        strainTypeKey = verifyStrainType(strainType, lineNum)
        speciesKey = verifySpecies(species, lineNum)
        createdByKey = loadlib.verifyUser(createdBy, 0, errorFile)

        if strainExistKey > 0 or strainTypeKey == 0 or speciesKey == 0 or createdByKey == 0:
            # set error flag to true
            error = 1

        # if errors, continue to next record
        if error:
            continue

        # if no errors, process

        strainFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (strainKey, speciesKey, strainTypeKey, name, isStandard, isPrivate, isGeneticBackground,
	       createdByKey, createdByKey, cdate, cdate))

	# if Allele found, resolve to Marker

        if len(alleleIDs) > 0:
            allAlleles = alleleIDs.split('|')
            for a in allAlleles:
                alleleKey = loadlib.verifyObject(a, alleleTypeKey, None, lineNum, errorFile)
                if alleleKey == 0:
                    continue
                if alleleKey == None:
                    continue
                results = db.sql('select _Marker_key from ALL_Allele where _Allele_key = %s' % (alleleKey),  'auto')
                markerKey = results[0]['_Marker_key']
                if markerKey != None:
                    markerFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
                    % (strainmarkerKey, strainKey, markerKey, alleleKey, qualifierKey, 
                    createdByKey, createdByKey, cdate, cdate))
                else:
                    markerFile.write('%s|%s||%s|%s|%s|%s|%s|%s\n' \
                    % (strainmarkerKey, strainKey, alleleKey, qualifierKey, 
                    createdByKey, createdByKey, cdate, cdate))
                strainmarkerKey = strainmarkerKey + 1

        # MGI Accession ID for all strain
        # all private = 0 (false)

        accFile.write('%d|%s%d|%s|%s|1|%d|%d|%s|1|%s|%s|%s|%s\n' \
                % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, strainKey, mgiTypeKey, 
                isPrivate, createdByKey, createdByKey, cdate, cdate))
        accKey = accKey + 1

        # external accession id
        # % (accKey, id, '', id, externalLDB, strainKey, externalTypeKey, 
        #for ids that contain prefix:numeric
        accFile.write('%d|%s|%s|%s|%s|%s|%s|0|1|%s|%s|%s|%s\n' \
          % (accKey, id, externalPrefix, externalNumeric, externalLDB, strainKey, externalTypeKey, 
             createdByKey, createdByKey, cdate, cdate))
        accKey = accKey + 1

        # storing data in MGI_Note
        # Strain of Origin Note

        if len(sooNote) > 0:

            noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, strainKey, mgiNoteObjectKey, mgiStrainOriginTypeKey, sooNote, \
                   createdByKey, createdByKey, cdate, cdate))

            noteKey = noteKey + 1

        # storing data in MGI_Note
        # Mutant Cell Line of Origin Note

        if len(mutantNote) > 0:

            noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, strainKey, mgiNoteObjectKey, mgiMutantOriginTypeKey, mutantNote, \
                   createdByKey, createdByKey, cdate, cdate))

            noteKey = noteKey + 1

        # storing data in MGI_Note
        # IMPC Colony Note

        if len(impcColonyNote) > 0:

            noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, strainKey, mgiNoteObjectKey, mgiIMPCColonyTypeKey, impcColonyNote, \
                   createdByKey, createdByKey, cdate, cdate))

            noteKey = noteKey + 1

        #
        # Annotations
        #
        # _AnnotType_key = 1009
        # _Qualifier_ke = 1614158
        #

        if len(annotations) > 0:
            annotations = annotations.split('|')
            for a in annotations:

                # strain annotation type
                annotTypeKey = 1009

                # this is a null qualifier key
                annotQualifierKey = 1614158

                annotTermKey = loadlib.verifyTerm('', 27, a, lineNum, errorFile)
                if annotTermKey == 0:
                    continue
    
                annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
                  % (annotKey, annotTypeKey, strainKey, annotTermKey, annotQualifierKey, cdate, cdate))
                annotKey = annotKey + 1

        mgiKey = mgiKey + 1
        strainKey = strainKey + 1
コード例 #18
0
ファイル: synonymload.py プロジェクト: mgijax/synonymload
def processFile():
	# requires:
	#
	# effects:
	#	Reads input file
	#	Verifies and Processes each line in the input file
	#
	# returns:
	#	nothing
	#

	global synKey
        mgiIdsWithSynonyms = synDict.keys()

	lineNum = 0

	# For each line in the input file

	for line in inputFile.readlines():

		error = 0
		lineNum = lineNum + 1

		# Split the line into tokens
		tokens = string.split(line[:-1], '\t')

		try:
			accID = tokens[0]
			synonym = tokens[1]
			synType = tokens[2]
		except:
			exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

		objectKey = loadlib.verifyObject(accID, mgiTypeKey, None, lineNum, errorFile)

		if accID in mgiIdsWithSynonyms:
		    if synonym in synDict[accID]:
			errorFile.write('Duplicate synonym: %s for %s\n' % (synonym, accID))
			continue

		synTypeKey = verifySynonymType(synType, lineNum)

		if len(synonym) == 0:
		    errorFile.write('Invalid Synonym:Empty (%d) %s\n' % (lineNum, synonym))

		if objectKey == 0 or \
			synTypeKey == 0 or \
			len(synonym) == 0:

			# set error flag to true
			error = 1

		# if errors, continue to next record
		if error:
			continue

		# if no errors, process

		synFile.write('%d|%d|%d|%d|%s|%s|%s|%s|%s|%s\n' \
			% (synKey, objectKey, mgiTypeKey, synTypeKey, referenceKey, synonym, createdByKey, createdByKey, loaddate, loaddate))
		synKey = synKey + 1
コード例 #19
0
def processFile():
    # requires:
    #
    # effects:
    #       Reads input file
    #       Verifies and Processes each line in the input file
    #
    # returns:
    #       nothing
    #

    global strainalleleKey

    lineNum = 0
    notDeleted = 1

    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = str.split(line[:-1], '\t')

        try:
            strainID = tokens[0]
            alleleID = tokens[1]
            qualifier = tokens[2]
            createdBy = tokens[3]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        if len(strainID) == 4:
            strainID = '00' + strainID
        if len(strainID) == 3:
            strainID = '000' + strainID
        if len(strainID) == 2:
            strainID = '0000' + strainID
        if len(strainID) == 1:
            strainID = '00000' + strainID

        strainKey = loadlib.verifyObject(strainID, strainTypeKey, None,
                                         lineNum, errorFile)

        # this could generate an error because the ID is a marker, not an allele
        # just ignore the error in the error file if it gets resolved later
        alleleKey = loadlib.verifyObject(alleleID, alleleTypeKey, None,
                                         lineNum, errorFile)
        markerKey = 0

        if alleleKey == 0:
            markerKey = loadlib.verifyObject(alleleID, markerTypeKey, None,
                                             lineNum, errorFile)

        qualifierKey = verifyQualifier(qualifier, lineNum)
        createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

        if notDeleted:
            db.sql(
                'delete PRB_Strain_Marker where _CreatedBy_key = %s' %
                (createdByKey), None)
            notDeleted = 0

        # if Allele found, resolve to Marker

        if alleleKey > 0:
            results = db.sql(
                'select _Marker_key from ALL_Allele where _Allele_key = %s' %
                (alleleKey), 'auto')
            if len(results) > 0:
                markerKey = results[0]['_Marker_key']

        elif markerKey == 0:
            errorFile.write('Invalid Allele (%s): %s\n' % (lineNum, alleleID))
            error = 1

        if strainKey == 0 or markerKey == 0 or qualifierKey == 0:
            # set error flag to true
            error = 1

        # if errors, continue to next record
        if error:
            continue

        # if no errors, process

        if alleleKey == 0:
            alleleKey = ''

        strainFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (strainalleleKey, strainKey, markerKey, alleleKey, qualifierKey, createdByKey, createdByKey, loaddate, loaddate))

        strainalleleKey = strainalleleKey + 1

    #	end of "for line in inputFile.readlines():"

    #
    # Update the AccessionMax value
    #

    db.sql('select * from ACC_setMax (%d);' % (lineNum), None)
    db.commit()

    # update prb_strain_marker_seq auto-sequence
    db.sql(
        ''' select setval('prb_strain_marker_seq', (select max(_StrainMarker_key) from PRB_Strain_Marker)) ''',
        None)
    db.commit()