예제 #1
0
def qtl2():

	fp2.write('QTL markers that have mapping but no allele associated (yes/no)\n')
	fp2.write('mapping = yes/alleles = no\n')

	results = db.sql('''%s
	  and not exists (select 1 from MRK_Notes n where m._Marker_key = n._Marker_key)
    	  and exists (select 1 from MLD_Expt_Marker mld where m._Marker_key = mld._Marker_key)
    	  and not exists (select 1 from ALL_Allele al where m._Marker_key = al._Marker_key and al.isWildType = 0)
	  order by symbol
    	''' % (query1), 'auto')

	for r in results:
    		fp2.write(r['mgiID'] + TAB)
    		fp2.write(mgi_utils.prvalue(r['refID']) + TAB)
    		fp2.write(r['symbol'] + TAB)
    		fp2.write(r['name'] + TAB)
    		fp2.write(CRT)

	fp2.write(CRT + '(%d rows affected)' % (len(results)) + CRT)

	fp2.write('\n\nQTL References with map records that have QTL associated w/o Alleles:\n\n')

	results = db.sql('''%s
	  and not exists (select 1 from MRK_Notes n where m._Marker_key = n._Marker_key)
    	  and exists (select 1 from MLD_Expt_Marker mld where m._Marker_key = mld._Marker_key)
    	  and not exists (select 1 from ALL_Allele al where m._Marker_key = al._Marker_key and al.isWildType = 0)
	  order by numericPart
    	''' % (query2), 'auto')

	for r in results:
    		fp2.write(mgi_utils.prvalue(r['refID']) + CRT)
	fp2.write(CRT + '(%d rows affected)' % (len(results)) + CRT)
예제 #2
0
def writeAccBCP():
    '''
        # requires:
        #
        # effects:
        #	Creates approrpriate BCP records
        #
        # returns:
        #	nothing
        #
        '''

    global accKey, userKey

    # records that require a reference

    results = db.sql('select _Object_key, _LogicalDB_key, accID, private, geneID ' + \
            'from WRK_EntrezGene_Bucket0 ' + \
            'where taxID = %s and refRequired = 1 ' % (taxId), 'auto')

    for r in results:

        if r['_Object_key'] == -1:
            objectKey = geneIDtoMarkerKey[r['geneID']]
        else:
            objectKey = r['_Object_key']

        prefixPart, numericPart = accessionlib.split_accnum(r['accID'])
        accFile.write(
            '%d|%s|%s|%s|%d|%d|%s|%d|1|%s|%s|%s|%s\n' %
            (accKey, r['accID'], mgi_utils.prvalue(prefixPart),
             mgi_utils.prvalue(numericPart), r['_LogicalDB_key'], objectKey,
             mgiTypeKey, r['private'], userKey, userKey, loaddate, loaddate))
        accrefFile.write(
            '%d|%s|%s|%s|%s|%s\n' %
            (accKey, referenceKey, userKey, userKey, loaddate, loaddate))
        accKey = accKey + 1

    # records that don't require a reference

    results = db.sql('select _Object_key, _LogicalDB_key, accID, private, geneID ' + \
            'from WRK_EntrezGene_Bucket0 ' + \
            'where taxID = %s and refRequired = 0' % (taxId), 'auto')

    for r in results:

        if r['_Object_key'] == -1:
            objectKey = geneIDtoMarkerKey[r['geneID']]
        else:
            objectKey = r['_Object_key']

        prefixPart, numericPart = accessionlib.split_accnum(r['accID'])
        accFile.write(
            '%d|%s|%s|%s|%d|%d|%s|%d|1|%s|%s|%s|%s\n' %
            (accKey, r['accID'], mgi_utils.prvalue(prefixPart),
             mgi_utils.prvalue(numericPart), r['_LogicalDB_key'], objectKey,
             mgiTypeKey, r['private'], userKey, userKey, loaddate, loaddate))
        accKey = accKey + 1
def writeRecord(i, r, e):

	# if we can't find the DAG for the Term, skip it

	if not dag.has_key(r['_Term_key']):
		return

	# field 1
	fp.write(FIELD1 + TAB)

	# field 2
	fp.write(i + TAB)

	# field 3
	fp.write(TAB)

	# field 4
	if r['qualifier'] != None:
	    qualifier = string.strip(r['qualifier'])
	else:
	    qualifier = ''
	fp.write(qualifier + TAB)

	# field 5
	fp.write(r['termID'] + TAB)

	# field 6
	fp.write(mgi_utils.prvalue(e[0]) + TAB)

	# field 7
	fp.write(mgi_utils.prvalue(e[1]) + TAB)

	# field 8 
	fp.write(mgi_utils.prvalue(e[2]) + TAB)

	# field 9
	fp.write(dag[r['_Term_key']] + TAB)

	# field 10
	fp.write(TAB)

	# field 11
	fp.write(TAB)

	# field 12
	fp.write(FIELD12 + TAB)

	# field 13
	fp.write(TAB)

	# field 14
	fp.write(str(r['mDate']) + TAB)

	# field 15
	fp.write(FIELD15)

	fp.write(CRT)
예제 #4
0
def createBCP():

	outBCP = open('%s/%s.bcp' % (datadir, table), 'w')

	print 'sequences1 begin...%s' % (mgi_utils.date())
	db.sql('''select s._Object_key as sequenceKey, p._Object_key as probeKey, p._Accession_key 
		INTO TEMPORARY TABLE sequences1 
		from ACC_Accession s, ACC_Accession p 
		where s._MGIType_key = 19 
		and lower(s.accID) = lower(p.accID) 
		and p._MGIType_key = 3 
		and s._LogicalDB_key = p._LogicalDB_key
		''', None)
	db.sql('create index idx2 on sequences1 (sequenceKey)', None)
	db.sql('create index idx3 on sequences1 (probeKey)', None)
	db.sql('create index idx4 on sequences1 (_Accession_key)', None)
	print 'sequences1 end...%s' % (mgi_utils.date())

	print 'deletion begin...%s' % (mgi_utils.date())
	db.sql('delete from sequences1 using excluded e where sequences1.probeKey = e._Probe_key', None)
	print 'deletion end...%s' % (mgi_utils.date())
	db.commit()

	print 'sequences2 begin...%s' % (mgi_utils.date())
	db.sql('''select s.sequenceKey, s.probeKey, ar._Refs_key as refskey, 
			ar._ModifiedBy_key as userKey, ar.modification_date as mdate 
		INTO TEMPORARY TABLE sequences2 
		from sequences1 s, ACC_AccessionReference ar 
		where s._Accession_key = ar._Accession_key
		''', None)
	db.sql('create index idx5 on sequences2 (sequenceKey, probeKey, refsKey, userKey, mdate)', None)
	db.sql('create index idx6 on sequences2 (userKey)', None)
	db.sql('create index idx7 on sequences2 (mdate)', None)
	print 'sequences2 end...%s' % (mgi_utils.date())

	print 'final begin...%s' % (mgi_utils.date())
	results = db.sql('''select distinct sequenceKey, probeKey, refsKey, 
		max(userKey) as userKey, max(mdate) as mdate 
		from sequences2 
		group by sequenceKey, probeKey, refsKey
		''', 'auto')
	print 'final end...%s' % (mgi_utils.date())

	for r in results:
		outBCP.write(mgi_utils.prvalue(r['sequenceKey']) + DL + \
		       	mgi_utils.prvalue(r['probeKey']) + DL + \
		       	mgi_utils.prvalue(r['refsKey']) + DL + \
			r['mdate'] + DL + \
        		mgi_utils.prvalue(r['userKey']) + DL + mgi_utils.prvalue(r['userKey']) + DL + \
			loaddate + DL + loaddate + NL)
	outBCP.close()
예제 #5
0
def writeAccBCP():
	'''
	# requires:
	#
	# effects:
	#	Creates approrpriate BCP records
	#
	# returns:
	#	nothing
	#
	'''

	global accKey, userKey

	# records that require a reference

	results = db.sql('select _Object_key, _LogicalDB_key, accID, private, geneID ' + \
		'from WRK_EntrezGene_Bucket0 ' + \
		'where taxID = %s and refRequired = 1 ' % (taxId), 'auto')

	for r in results:

		if r['_Object_key'] == -1:
		    objectKey = geneIDtoMarkerKey[r['geneID']]
                else:
		    objectKey = r['_Object_key']

		prefixPart, numericPart = accessionlib.split_accnum(r['accID'])
		accFile.write('%d|%s|%s|%s|%d|%d|%s|%d|1|%s|%s|%s|%s\n'
			% (accKey, r['accID'], mgi_utils.prvalue(prefixPart), mgi_utils.prvalue(numericPart), r['_LogicalDB_key'], objectKey, mgiTypeKey, r['private'], userKey, userKey, loaddate, loaddate))
		accrefFile.write('%d|%s|%s|%s|%s|%s\n' % (accKey, referenceKey, userKey, userKey, loaddate, loaddate))
		accKey = accKey + 1

	# records that don't require a reference

	results = db.sql('select _Object_key, _LogicalDB_key, accID, private, geneID ' + \
		'from WRK_EntrezGene_Bucket0 ' + \
		'where taxID = %s and refRequired = 0' % (taxId), 'auto')

	for r in results:

		if r['_Object_key'] == -1:
		    objectKey = geneIDtoMarkerKey[r['geneID']]
                else:
		    objectKey = r['_Object_key']

		prefixPart, numericPart = accessionlib.split_accnum(r['accID'])
		accFile.write('%d|%s|%s|%s|%d|%d|%s|%d|1|%s|%s|%s|%s\n'
			% (accKey, r['accID'], mgi_utils.prvalue(prefixPart), mgi_utils.prvalue(numericPart), r['_LogicalDB_key'], objectKey, mgiTypeKey, r['private'], userKey, userKey, loaddate, loaddate))
		accKey = accKey + 1
예제 #6
0
def qtl4():

	fp4.write('QTL markers that have assigned reference of J:23000 or J:85000\n')

        results = db.sql('''
          select a1.accID as mgiID, a2.accID as refID, m.symbol, m.name
          from MRK_Marker m, ACC_Accession a1, ACC_Accession a2, refs r
          where m._Marker_Type_key = 6
	  and m._Marker_Status_key = 1
          and m._Organism_key = 1
          and m._Marker_key = a1._Object_key
          and a1._MGIType_key = 2
          and a1._Logicaldb_key = 1
          and a1.prefixpart = 'MGI:'
          and a1.preferred = 1
          and m._Marker_key = r._Marker_key
	  and r._Refs_key in (22864, 85477)
    	  and r._Refs_key = a2._Object_key
    	  and a2._MGIType_key = 1
    	  and a2._Logicaldb_key = 1
    	  and a2.prefixpart = 'J:'
    	  and a2.preferred = 1
	  and not exists (select 1 from MRK_Notes n where m._Marker_key = n._Marker_key)
	  order by symbol
        ''', 'auto')

	for r in results:
    		fp4.write(r['mgiID'] + TAB)
    		fp4.write(mgi_utils.prvalue(r['refID']) + TAB)
    		fp4.write(r['symbol'] + TAB)
    		fp4.write(r['name'] + TAB)
    		fp4.write(CRT)

	fp4.write(CRT + '(%d rows affected)' % (len(results)) + CRT)
예제 #7
0
파일: snapshot.py 프로젝트: mgijax/ei
def history():

    #
    # history
    #

    fp.write('#\n# History\n#\n')

    results = db.sql('''
	    select name, history, historyName, event, eventReason, symbol, markerName,
		   edate = convert(char(10), event_date, 101),
		   cdate = convert(char(10), creation_date, 101)
	    from MRK_History_View
	    where _Marker_key =  %s
	    order by sequenceNum
	    ''' % (markerKey), 'auto')

    for r in results:
        fp.write(string.ljust(r['history'],15) + TAB)
        fp.write(string.ljust(r['name'],35) + TAB)
        fp.write(string.ljust(r['historyName'],35) + TAB)
        fp.write(string.ljust(r['symbol'],15) + TAB)
        fp.write(string.ljust(r['markerName'],35) + TAB)
        fp.write(string.ljust(r['event'],15) + TAB)
        fp.write(string.ljust(r['eventReason'],15) + TAB)
        fp.write(string.ljust(mgi_utils.prvalue(r['edate']),15) + TAB)
        fp.write(string.ljust(r['cdate'],15) + CRT)
예제 #8
0
파일: snapshot.py 프로젝트: mgijax/ei
def goAnnotations():

    #
    # GO annotations
    #

    fp.write('#\n# GO annotations\n#\n')

    results = db.sql('''
	    select a.accID, a.term, a.qualifier, e.evidenceCode, e.jnumID, e.createdBy,
		   cdate = convert(char(10), a.creation_date, 101)
	    from VOC_Annot_View a, VOC_Evidence_View e
	    where a._AnnotType_key = 1000 and _Object_key =  %s
	    and a._Annot_key = e._Annot_key
	    order by a.accID, e.jnumID
	    ''' % (markerKey), 'auto')

    for r in results:
        fp.write(string.ljust(r['accID'],15) + TAB)
        fp.write(string.ljust(mgi_utils.prvalue(r['qualifier']),5) + TAB)
        fp.write(string.ljust(r['jnumID'],10) + TAB)
        fp.write(string.ljust(r['evidenceCode'],10) + TAB)
        fp.write(string.ljust(r['createdBy'],30) + TAB)
        fp.write(string.ljust(r['term'],100) + TAB)
        fp.write(string.ljust(r['cdate'],15) + CRT)
예제 #9
0
def processFile():
	'''
	# requires:
	#
	# effects:
	#	Reads input file
	#	Writes output file
	#
	# returns:
	#	nothing
	#
	'''

	# For each line in the input file

	for line in inputFile.readlines():

		if line[0] == '!':
			continue

		tokens = string.split(line[:-1], delim)

		try:
			if parseType == 'Library':
			    badName = tokens[0]
			    goodName = tokens[2]
			else:
			    badName = tokens[1]
			    goodName = tokens[2]

		except:
			errorFile.write('Invalid line: %s\n' % (line))
			continue

		if parseType == 'Tissues':
		    results = db.sql('select _Tissue_key from PRB_Tissue where tissue = "%s"' % (goodName), 'auto')
		elif parseType == 'Cell':
		    results = db.sql('select term from VOC_Term where term = "%s"' % (goodName), 'auto')
		elif parseType == 'Library':
		    results = db.sql('select _Source_key from PRB_Source where name = "%s"' % (goodName), 'auto')
		elif parseType == 'Strains':
		    results = db.sql('select a.accID from PRB_Strain_Acc_View a, PRB_Strain s ' + \
			'where s.strain = "%s" ' % (goodName) + \
			'and s._Strain_key *= a._Object_key ' + \
			'and a._LogicalDB_key = 1 ' + \
			'and a.prefixPart = "MGI:" ' + \
			'and a.preferred = 1', 'auto')

		if len(results) > 0 and badName != goodName:
			if parseType == 'strain':
			  outputFile.write(mgi_utils.prvalue(results[0]['accID']) + delim + goodName + delim + badName + delim + createdBy + '\n')
			else:
			  outputFile.write(delim + goodName + delim + badName + delim + createdBy + '\n')
		elif len(results) == 0:
			errorFile.write('Invalid good name: %s\n' % (goodName))
예제 #10
0
def createBCPfile():

	print 'Creating %s.bcp...' % (table)

	cacheBCP = open(outDir + '/%s.bcp' % (table), 'w')

	results = db.sql('''select t._Term_key, n._DAG_key, t.term, a.accID, d.abbreviation 
		from VOC_Term t, ACC_Accession a, ACC_LogicalDB ldb, VOC_VocabDAG vd, DAG_Node n, DAG_DAG d
		where t._Vocab_key = 4 
		and t._Term_key = a._Object_key 
		and a._MGIType_key = 13 
		and a.preferred = 1 
		and ldb._logicaldb_key = a._logicaldb_key
		and ldb.name = \'GO\'
		and t._Vocab_key = vd._Vocab_key 
		and t._Term_key = n._Object_key 
		and vd._DAG_key = n._DAG_key 
		and n._DAG_key = d._DAG_key 
		''', 'auto')

	cacheKey = 1
	for r in results:
	    cacheBCP.write(mgi_utils.prvalue(cacheKey) + COLDL + \
	                   mgi_utils.prvalue(r['_Term_key']) + COLDL + \
	    		   mgi_utils.prvalue(r['_DAG_key']) + COLDL + \
	    		   mgi_utils.prvalue(r['abbreviation']) + COLDL + \
	    		   mgi_utils.prvalue(r['accID']) + COLDL + \
	    		   mgi_utils.prvalue(r['term']) + LINEDL)
	    cacheKey = cacheKey + 1
	cacheBCP.close()
예제 #11
0
def qtl3():

	fp3.write('QTL markers that have no mapping but have allele associated (no/yes)\n')
	fp3.write('mapping = no/alleles = yes\n')

	results = db.sql('''%s
	  and not exists (select 1 from MRK_Notes n where m._Marker_key = n._Marker_key)
    	  and not exists (select 1 from MLD_Expt_Marker mld where m._Marker_key = mld._Marker_key)
    	  and exists (select 1 from ALL_Allele al where m._Marker_key = al._Marker_key and al.isWildType = 0)
	  order by symbol
    	''' % (query1), 'auto')

	for r in results:
    		fp3.write(r['mgiID'] + TAB)
    		fp3.write(mgi_utils.prvalue(r['refID']) + TAB)
    		fp3.write(r['symbol'] + TAB)
    		fp3.write(r['name'] + TAB)
    		fp3.write(CRT)

	fp3.write(CRT + '(%d rows affected)' % (len(results)) + CRT)
예제 #12
0
def qtl5():

	fp5.write('QTL markers that are Reserved\n')

        results = db.sql('''
          select m._Marker_key, 
		to_char(m.creation_date, 'MM/dd/yyyy') as creation_date,
		t.status,
	  	a1.accID as mgiID, 
		r.jnumID as refID, 
		m.symbol, 
		m.name
          from ACC_Accession a1, 
		MRK_Status t,
		MRK_Marker m,
		MRK_History h,
		BIB_Citation_Cache r
          where m._Marker_Type_key = 6
          and m._Marker_Status_key = 3
          and m._Marker_key = a1._Object_key
          and a1._MGIType_key = 2
          and a1._Logicaldb_key = 1
          and a1.prefixpart = 'MGI:'
          and a1.preferred = 1
	  and m._Marker_Status_key = t._Marker_Status_key
	  and m._Marker_key = h._Marker_key
	  and h.sequenceNum = 1
	  and h._Refs_key = r._Refs_key
	  order by m.creation_date
        ''', 'auto')

        for r in results:
                fp5.write(r['creation_date'] + TAB)
                fp5.write(r['status'] + TAB)
                fp5.write(r['mgiID'] + TAB)
                fp5.write(mgi_utils.prvalue(r['refID']) + TAB)
                fp5.write(r['symbol'] + TAB)
                fp5.write(r['name'] + TAB)
                fp5.write(CRT)

        fp5.write(CRT + '(%d rows affected)' % (len(results)) + CRT)
예제 #13
0
파일: snapshot.py 프로젝트: mgijax/ei
def marker():

    #
    # marker
    #

    fp.write('#\n# Marker\n#\n')

    results = db.sql('''
	    select symbol, name, chromosome, cytogeneticOffset,
		   cdate = convert(char(10), creation_date, 101)
	    from MRK_Marker 
	    where _Marker_key =  %s
	    ''' % (markerKey), 'auto')

    for r in results:
        fp.write(string.ljust(r['symbol'],30) + TAB)
        fp.write(string.ljust(r['name'],30) + TAB)
        fp.write(string.ljust(r['chromosome'],5) + TAB)
        fp.write(string.ljust(mgi_utils.prvalue(r['cytogeneticOffset']),5) + TAB)
        fp.write(string.ljust(r['cdate'],15) + CRT)
예제 #14
0
def processImagePaneFile():

    global imagePix, paneKey

    lineNum = 0
    # For each line in the input file
    for line in inPaneFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
	    pixID = tokens[0]
	    paneLabel = tokens[1]
	    paneWidth = tokens[2]
	    paneHeight = tokens[3]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

	paneX = 0
	paneY = 0

        outPaneFile.write(str(paneKey) + TAB + \
	    str(imagePix[pixID]) + TAB + \
	    mgi_utils.prvalue(paneLabel) + TAB + \
	    str(paneX) + TAB + \
	    str(paneY) + TAB + \
	    str(paneWidth) + TAB + \
	    str(paneHeight) + TAB + \
	    loaddate + TAB + loaddate + CRT)

        paneKey = paneKey + 1

    #	end of "for line in inPaneFile.readlines():"

    return lineNum
예제 #15
0
def writeRecord(results, labelStatusKey, priority, labelType, labelTypeName):

    for r in results:

	if labelTypeName is None:
	    labelTypeName = r['labelTypeName']

        outBCP.write(mgi_utils.prvalue(r['_Allele_key']) + COLDL + \
        	mgi_utils.prvalue(labelStatusKey) + COLDL + \
        	mgi_utils.prvalue(priority) + COLDL + \
        	mgi_utils.prvalue(r['label']) + COLDL + \
        	mgi_utils.prvalue(labelType) + COLDL + \
        	mgi_utils.prvalue(labelTypeName) + COLDL + \
        	loaddate + COLDL + \
        	loaddate + LINEDL)

    print 'processed (%d) records...%s' % (len(results), mgi_utils.date())
예제 #16
0
    if providers.has_key(key):
	    provider = providers[key]
    else:
	    provider = noneDisplay

    fp.write(r['accID'] + TAB)
    fp.write(r['markerType'] + TAB)
    fp.write(featureType + TAB)
    fp.write(r['symbol'] + TAB)
    fp.write(r['name'] + TAB)

    # prefer to display genomic chromosome (associated with coordinates) 
    # rather than genetic chromosome (associated with cM / cytoband)
    if r['genomicChromosome']:
	fp.write(r['genomicChromosome'] + TAB)
    else:
	fp.write(r['chromosome'] + TAB)

    fp.write(str(r['startCoordinate']) + TAB)
    fp.write(str(r['endCoordinate']) + TAB)
    fp.write(mgi_utils.prvalue(r['strand']) + TAB)
    fp.write(genomeBuild + TAB)
    fp.write(provider + TAB)
    fp.write(r['displayName'] + TAB)

    fp.write(CRT)

reportlib.finish_nonps(fp)
db.useOneConnection(0)
예제 #17
0
for r in results:

    key = r['_marker_key']
    # if the marker's feature type is not 
    # 'mutation defined region', key=11928467 write out to the report

    # default feature type
    fTypes = ''
    if featureTypes.has_key(key):
	mcvKeyList = featureTypeByKey[key]
	if 11928467 in mcvKeyList:
	    continue
	else:
	    fTypes = (string.join(featureTypes[key],'|'))

    fp1.write(mgi_utils.prvalue(r['accid']) + TAB)

    if r['genomicChromosome']:
    	fp1.write(r['genomicChromosome'] + TAB)
    else:
    	fp1.write(r['chromosome'] + TAB)

    fp1.write(r['cmposition'] + TAB)

    if coords.has_key(key):
	fp1.write(mgi_utils.prvalue(coords[key][0]['startC']) + TAB)
	fp1.write(mgi_utils.prvalue(coords[key][0]['endC']) + TAB)
	fp1.write(mgi_utils.prvalue(coords[key][0]['strand']) + TAB)
    else:
	fp1.write(TAB + TAB + TAB)
예제 #18
0
results = db.sql('''
	select m._Marker_key, a.accID 
	from markers m, ACC_Accession a 
        where m._Marker_key = a._Object_key 
        and a._MGIType_key = 2 
        and a._LogicalDB_key = 9 
	order by m._Marker_key, a.accID
	''', 'auto')
seqIDs = {}
for r in results:
    if not seqIDs.has_key(r['_Marker_key']):
	seqIDs[r['_Marker_key']] = []
    seqIDs[r['_Marker_key']].append(r['accID'])

results = db.sql('select * from markers order by symbol, mgiID', 'auto')
for r in results:

	fp.write(mgi_utils.prvalue(r['symbol']) + reportlib.TAB + \
	         mgi_utils.prvalue(r['name']) + reportlib.TAB + \
	         mgi_utils.prvalue(r['mgiID']) + reportlib.TAB + \
	         mgi_utils.prvalue(r['chromosome']) + reportlib.TAB)

	if seqIDs.has_key(r['_Marker_key']):
		fp.write(string.join(seqIDs[r['_Marker_key']], ' '))

	fp.write(reportlib.CRT)

reportlib.finish_nonps(fp)

예제 #19
0
def processFile():
    # Purpose: Read the input file, resolve values to keys. Create bcp files
    # Returns: 1 if error,  else 0
    # Assumes: file descriptors have been initialized
    # Effects: exits if the line does not have 15 columns
    # Throws: Nothing

    global alleleKey, refAssocKey, accKey, noteKey, mgiKey, annotKey
    global alleleLookup, alleleMutationKey

    lineNum = 0
    # For each line in the input file

    for line in fpInputFile.readlines():

        error = 0
        lineNum = lineNum + 1
        print('%s: %s' % (lineNum, line))
        # Split the line into tokens
        tokens = line[:-1].split('\t')
        try:
            markerID = tokens[0]
            markerSymbol = tokens[1]
            mutationType = tokens[2]  # IMPC allele type
            description = tokens[3]
            colonyID = tokens[4]
            strainOfOrigin = tokens[5]
            alleleSymbol = tokens[6]
            alleleName = tokens[7]
            inheritanceMode = tokens[8]
            alleleType = tokens[9]  # IMPC allele class
            alleleSubType = tokens[10]
            alleleStatus = tokens[11]
            transmission = tokens[12]
            collection = tokens[13]
            jNum = tokens[14]
            createdBy = tokens[15]

        except:
            print('exiting with invalid line')
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        print('validating data and getting keys')
        # marker key
        markerKey = loadlib.verifyMarker(markerID, lineNum, fpErrorFile)

        # _vocab_key = 36 (Allele Molecular Mutation)
        mutationList = str.split(mutationType, ';')
        if len(mutationList) > 1:
            print('mutationList: %s' % mutationList)
        mutationKeyList = []
        for m in mutationList:
            mutationKey = loadlib.verifyTerm('', 36, m, lineNum, fpErrorFile)
            if mutationKey != 0:
                mutationKeyList.append(mutationKey)
        if len(mutationKeyList) > 1:
            print('mutationKeyList: %s' % mutationKeyList)
        # strains
        strainOfOriginKey = sourceloadlib.verifyStrain(strainOfOrigin, lineNum,
                                                       fpErrorFile)

        # _vocab_key = 35 (Allele Inheritance Mode)
        inheritanceModeKey = loadlib.verifyTerm('', 35, inheritanceMode,
                                                lineNum, fpErrorFile)

        # _vocab_key = 38 (Allele Type)
        alleleTypeKey = loadlib.verifyTerm('', 38, alleleType, lineNum,
                                           fpErrorFile)

        # _vocab_key = 93 (Allele Subtype)
        subTypeList = str.split(alleleSubType, ';')
        if len(subTypeList) > 1:
            print('subTypeList: %s' % subTypeList)
        subTypeKeyList = []
        for s in subTypeList:
            if s != '':  # if we have a subtype, get it's key
                subTypeKey = loadlib.verifyTerm('', 93, s, lineNum,
                                                fpErrorFile)
                if subTypeKey != 0:
                    subTypeKeyList.append(subTypeKey)
        if len(subTypeKeyList) > 1:
            print('subTypeKeyList: %s' % subTypeKeyList)

        # _vocab_key = 37 (Allele Status)
        alleleStatusKey = loadlib.verifyTerm('', 37, alleleStatus, lineNum,
                                             fpErrorFile)

        # _vocab_key = 61 (Allele Transmission)
        transmissionKey = loadlib.verifyTerm('', 61, transmission, lineNum,
                                             fpErrorFile)

        # _vocab_key = 92
        collectionKey = loadlib.verifyTerm('', 92, collection, lineNum,
                                           fpErrorFile)

        # _vocab_key = 73 (Marker-Allele Association Status)
        # _term_key = 4268545 (Curated)
        markerStatusKey = 4268545

        # reference
        refKey = loadlib.verifyReference(jNum, lineNum, fpErrorFile)

        # creator
        createdByKey = loadlib.verifyUser(createdBy, lineNum, fpErrorFile)
        if createdByKey == 0:
            continue

        print('checking for missing data')
        # if errors, continue to next record
        # errors are stored (via loadlib) in the .error log
        if markerKey == 0 \
                or mutationKeyList == [] \
                or strainOfOriginKey == 0 \
                or inheritanceModeKey == 0 \
                or alleleTypeKey == 0 \
                or alleleStatusKey == 0 \
                or transmissionKey == 0 \
                or collectionKey == 0 \
                or refKey == 0 \
                or createdByKey == 0:
            print('missing data, skipping this line')
            continue

        # if no errors, process the allele
        print('writing to allele file')
        # allele (isWildType = 0)
        fpAlleleFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|0|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (alleleKey, markerKey, strainOfOriginKey, inheritanceModeKey, alleleTypeKey, \
            alleleStatusKey, transmissionKey, collectionKey, alleleSymbol, alleleName, \
            isExtinct, isMixed, refKey, markerStatusKey, \
            createdByKey, createdByKey, createdByKey, loaddate, loaddate, loaddate))

        # molecular mutation
        for mutationKey in mutationKeyList:
            fpMutationFile.write('%s|%s|%s|%s|%s\n' \
                % (alleleMutationKey, alleleKey, mutationKey, loaddate, loaddate))
            alleleMutationKey += 1

        # reference associations

        # Original
        fpRefFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (refAssocKey, refKey, alleleKey, mgiTypeKey, origRefTypeKey, \
                        createdByKey, createdByKey, loaddate, loaddate))
        refAssocKey = refAssocKey + 1

        # Molecular
        fpRefFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (refAssocKey, refKey, alleleKey, mgiTypeKey, molRefTypeKey, \
                        createdByKey, createdByKey, loaddate, loaddate))
        refAssocKey = refAssocKey + 1

        # allele subtype
        for subTypeKey in subTypeKeyList:
            fpAnnotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
                    % (annotKey, annotTypeKey, alleleKey, subTypeKey, \
                            qualifierKey, loaddate, loaddate))
            annotKey = annotKey + 1

        # MGI Accession ID for the allele
        alleleID = '%s%s' % (mgiPrefix, mgiKey)
        fpAccFile.write('%s|%s|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \
            % (accKey, alleleID, mgiPrefix, mgiKey, alleleKey, mgiTypeKey, \
               createdByKey, createdByKey, loaddate, loaddate))

        # storing data in MGI_Note
        # molecular note

        fpNoteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (noteKey, alleleKey, mgiTypeKey, molecularNoteTypeKey, description,\
               createdByKey, createdByKey, loaddate, loaddate))

        noteKey = noteKey + 1

        # colony ID note
        fpNoteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (noteKey, alleleKey, mgiTypeKey, colonyIdNoteTypeKey, colonyID, \
               createdByKey, createdByKey, loaddate, loaddate))

        noteKey = noteKey + 1

        # Print out a new text file and attach the new MGI Allele IDs
        # as the last field

        fpNewAlleleRptFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' \
        % (mgi_utils.prvalue(alleleID), \
        mgi_utils.prvalue(alleleSymbol), \
        mgi_utils.prvalue(alleleName), \
        mgi_utils.prvalue(markerID), \
        mgi_utils.prvalue(markerSymbol), \
        mgi_utils.prvalue(colonyID)))

        accKey = accKey + 1
        mgiKey = mgiKey + 1
        alleleKey = alleleKey + 1

    #
    # Update the AccessionMax value
    #
    print('DEBUG: %s' % DEBUG)
    if DEBUG == 'false':
        db.sql('select * from ACC_setMax(%d)' % (lineNum), None)
        db.commit()

    return 0
예제 #20
0
def processFile():
    '''
	# requires:
	#
	# effects:
	#	Reads input file
	#	Writes output file
	#
	# returns:
	#	nothing
	#
	'''

    # For each line in the input file

    for line in inputFile.readlines():

        if line[0] == '!':
            continue

        tokens = string.split(line[:-1], delim)

        try:
            if parseType == 'Library':
                badName = tokens[0]
                goodName = tokens[2]
            else:
                badName = tokens[1]
                goodName = tokens[2]

        except:
            errorFile.write('Invalid line: %s\n' % (line))
            continue

        if parseType == 'Tissues':
            results = db.sql(
                'select _Tissue_key from PRB_Tissue where tissue = "%s"' %
                (goodName), 'auto')
        elif parseType == 'Cell':
            results = db.sql(
                'select term from VOC_Term where term = "%s"' % (goodName),
                'auto')
        elif parseType == 'Library':
            results = db.sql(
                'select _Source_key from PRB_Source where name = "%s"' %
                (goodName), 'auto')
        elif parseType == 'Strains':
            results = db.sql('select a.accID from PRB_Strain_Acc_View a, PRB_Strain s ' + \
         'where s.strain = "%s" ' % (goodName) + \
         'and s._Strain_key *= a._Object_key ' + \
         'and a._LogicalDB_key = 1 ' + \
         'and a.prefixPart = "MGI:" ' + \
         'and a.preferred = 1', 'auto')

        if len(results) > 0 and badName != goodName:
            if parseType == 'strain':
                outputFile.write(
                    mgi_utils.prvalue(results[0]['accID']) + delim + goodName +
                    delim + badName + delim + createdBy + '\n')
            else:
                outputFile.write(delim + goodName + delim + badName + delim +
                                 createdBy + '\n')
        elif len(results) == 0:
            errorFile.write('Invalid good name: %s\n' % (goodName))
예제 #21
0
    # column 7
    if genomicToTranscript.has_key(genomicID):
        fp.write(string.join(genomicToTranscript[genomicID], ' '))
    fp.write(TAB)

    # column 8
    if genomicToProtein.has_key(genomicID):
        fp.write(string.join(genomicToProtein[genomicID], ' '))
    fp.write(TAB)

    # column 9: feature types
    if featureTypes.has_key(r['_Marker_key']):	
        fp.write(string.join(featureTypes[r['_Marker_key']], '|'))
        fp.write(TAB)

    # column 10-11-12
    if coords.has_key(key):
        fp.write(mgi_utils.prvalue(coords[r['_Marker_key']][0]['startC']) + TAB)
        fp.write(mgi_utils.prvalue(coords[r['_Marker_key']][0]['endC']) + TAB)
        fp.write(mgi_utils.prvalue(coords[r['_Marker_key']][0]['strand']) + TAB)
    else:
        fp.write(TAB + TAB + TAB)

    # column 13: biotypes
    if bioTypes.has_key(r['_Marker_key']):	
        fp.write(string.join(bioTypes[r['_Marker_key']], '|'))
    fp.write(CRT)

reportlib.finish_nonps(fp)
예제 #22
0
def processFile():

    global primerKey, refKey, aliasKey, accKey, mgiKey

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
	    markerSymbol = tokens[0]	# not used
	    markerIDs = string.split(tokens[1], '|')
	    name = tokens[2]
	    jnum = tokens[3]
	    regionCovered = tokens[4]
	    sequence1 = tokens[5]
	    sequence2 = tokens[6]
	    productSize = tokens[7]
	    notes = tokens[8]
	    sequenceIDs = tokens[9]
	    aliasList = string.split(tokens[10], '|')
	    createdBy = tokens[11]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

	# marker IDs

	markerList = []
	for markerID in markerIDs:

	    markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile)

	    if len(markerID) > 0 and markerKey == 0:
	        errorFile.write('Invalid Marker:  %s, %s\n' % (name, markerID))
	        error = 1
            elif len(markerID) > 0:
		markerList.append(markerKey)

        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
	createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

	# sequence IDs
	seqAccList = string.split(sequenceIDs, '|')

        # if errors, continue to next record
        if error:
            continue

        # if no errors, process the primer

        primerFile.write('%d\t%s\t\t%d\t%d\t%s\t%s\t%s\t%s\t\t\t%s\t%s\t%s\t%s\t%s\n' \
            % (primerKey, name, NA, vectorKey, segmentTypeKey, mgi_utils.prvalue(sequence1), \
	    mgi_utils.prvalue(sequence2), mgi_utils.prvalue(regionCovered), mgi_utils.prvalue(productSize), \
	    createdByKey, createdByKey, loaddate, loaddate))

	for markerKey in markerList:
	    if markerList.count(markerKey) == 1:
                markerFile.write('%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n' \
		    % (primerKey, markerKey, referenceKey, relationship, createdByKey, createdByKey, loaddate, loaddate))
            else:
		errorFile.write('Invalid Marker Duplicate:  %s, %s\n' % (name, markerID))

	# loaddate))

        refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' % (refKey, primerKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate))

        # aliases

        for alias in aliasList:
            if len(alias) == 0:
                continue
            aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
                    % (aliasKey, refKey, alias, createdByKey, createdByKey, loaddate, loaddate))
            aliasKey = aliasKey + 1

        # MGI Accession ID for the marker

        accFile.write('%s\t%s%d\t%s\t%s\t1\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \
            % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, primerKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate))

	newPrimerFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%d\n' \
	   % (markerSymbol, string.join(markerIDs, '|'), name, jnum, regionCovered, sequence1, sequence2, productSize, notes, sequenceIDs, createdBy, mgiPrefix, mgiKey))

        accKey = accKey + 1
        mgiKey = mgiKey + 1

	# sequence accession ids
	for acc in seqAccList:

	    if len(acc) == 0:
		continue

	    prefixPart, numericPart = accessionlib.split_accnum(acc)
            accFile.write('%s\t%s\t%s\t%s\t%s\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \
                % (accKey, acc, prefixPart, numericPart, logicalDBKey, primerKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate))
            accRefFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' \
                % (accKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate))
	    accKey = accKey + 1

	# notes

	if len(notes) > 0:
	   noteFile.write('%s|1\t%s\t%s\t%s\n' \
		% (primerKey, notes, loaddate, loaddate))

	refKey = refKey + 1
        primerKey = primerKey + 1

    #	end of "for line in inputFile.readlines():"

    #
    # Update the AccessionMax value
    #

    if not DEBUG:
        db.sql('select * from ACC_setMax (%d)' % (lineNum), None)
예제 #23
0
def processFileIKMC(createMCL, createNote, setStatus, \
	symbol, ikmcSymbol, mutantCellLine, ikmcNotes, createdByKey, existingAlleleID):

    global noteKey, ikmcSQLs

    #
    # add new MCLs to new/existing alleles
    #
    if len(createMCL) > 0:

	if DEBUG:
		print symbol, createMCL

	if int(createMCL) == 0:
		aKey = alleleLookup[symbol][0][0]
	else:
		aKey = createMCL

    	addMutantCellLine(aKey, mutantCellLine, createdByKey)

    #
    # set allele/status = Approved for existing "reserved" alleles
    #
    if len(setStatus) > 0:
	ikmcSQLs.append('update ALL_Allele set _Allele_Status_key = 847114 where _Allele_key = %s' % (setStatus))

    #
    # Add IKMC Colony/Note to a new or existing allele
    #
    # child exists/ikmc note exists : update existing note
    # 	|| => _Note_key||existing colony notes
    #
    # child exists/ikmc note does not exis : add note
    # 	:: => allele/child key
    #
    # new allele/child/non-duplicate IKMC Colony
    #	0::colony(s)
    #
    # blank => do nothing
    #

    if len(createNote) > 0:

	if DEBUG:
		print 'createNote: ', symbol

        try:
	    tokens = createNote.split('::')
	    aKey = tokens[0]

	    # duplicate child, additional note : add note to new child
	    if int(aKey) == 0:
		nKey = alleleLookup[symbol][0][1]
		note = tokens[1]
	        ikmcSQLs.append('''update MGI_NoteChunk set note = '%s' where _Note_key = %s;''' % (note, nKey))
		    	
	    # child exists, note does not exist : add note to existing child
	    else:
		aKey = tokens[0]
		note = ikmcNotes

		if alleleLookup.has_key(symbol):
			nKey = alleleLookup[symbol][0][1]
	    		ikmcSQLs.append('''update MGI_NoteChunk set note = rtrim(note) || '|%s' where _Note_key = %s;''' % (note, nKey))
		else:
	        	noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \
		    	% (noteKey, aKey, mgiNoteObjectKey, mgiIKMCNoteTypeKey, \
	   	    	createdByKey, createdByKey, loaddate, loaddate))

	        	noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
	            	% (noteKey, 1, note, createdByKey, createdByKey, loaddate, loaddate))

			# save symbol/aKey/ikmc note key/allele id
			alleleLookup[symbol] = []
			alleleLookup[symbol].append((aKey, noteKey, 'missing allele id (1)'))

	        	noteKey = noteKey + 1

	# child exists, note exists : update existing note
        except:
	    if DEBUG:
	    	print createNote

	    tokens = createNote.split('||')
	    nKey = tokens[0]
	    note = tokens[1] + '|' + ikmcNotes
	    ikmcSQLs.append('''update MGI_NoteChunk set note = '%s' where _Note_key = %s;''' % (note, nKey))
		    	
    # 
    # print out the proper allele id
    #
    if len(existingAlleleID) > 0:
	printAlleleID = existingAlleleID
    elif alleleLookup.has_key(symbol):
	printAlleleID = alleleLookup[symbol][0][2]
    else:
	printAlleleID = 'missing allele id (2)'

    newAlleleFile.write('%s\t%s\t%s\n' \
   		% (mgi_utils.prvalue(ikmcNotes), \
			mgi_utils.prvalue(printAlleleID), \
			mgi_utils.prvalue(ikmcSymbol)))
예제 #24
0
def processFile():

    global alleleKey, refAssocKey, accKey, noteKey, mgiKey, annotKey, mutationKey
    global alleleLookup

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = line[:-1].split('\t')
        #print line
        try:
            markerID = tokens[0]
            symbol = tokens[1]
            name = tokens[2]
            alleleStatus = tokens[3]
            alleleType = tokens[4]
            alleleSubtypes = tokens[5]
            collectionKey = tokens[6]
            germLine = tokens[7]
            references = tokens[8]
            strainOfOrigin = tokens[9]
            mutantCellLine = tokens[10]
            molecularNotes = tokens[11]
            driverNotes = tokens[12]
            ikmcNotes = tokens[13]
            mutations = tokens[14]
            inheritanceMode = tokens[15]
            isMixed = tokens[16]
            isExtinct = tokens[17]
            createdBy = tokens[18]
            createMCL = tokens[19]
            createNote = tokens[20]
            setStatus = tokens[21]
            existingAlleleID = tokens[22]
            ikmcSymbol = tokens[23]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        # creator
        createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)
        if createdByKey == 0:
            continue

        # processing for IKMC-only
        if len(createMCL) > 0 or len(createNote) > 0 or len(setStatus) > 0:
            processFileIKMC(createMCL, createNote, setStatus, \
                    symbol, ikmcSymbol, mutantCellLine, ikmcNotes, \
                    createdByKey, existingAlleleID)
            continue

        # marker key
        markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile)

        # hard-coded
        # _vocab_key = 73 (Marker-Allele Association Status)
        # _term_key = 4268545 (Curated)
        markerStatusKey = 4268545

        # _vocab_key = 37 (Allele Status)
        alleleStatusKey = loadlib.verifyTerm('', 37, alleleStatus, lineNum,
                                             errorFile)

        # _vocab_key = 38 (Allele Type)
        alleleTypeKey = loadlib.verifyTerm('', 38, alleleType, lineNum,
                                           errorFile)

        # _vocab_key = 61 (Allele Transmission)
        germLineKey = loadlib.verifyTerm('', 61, germLine, lineNum, errorFile)

        # _vocab_key = 36 (Allele Molecular Mutation)
        allMutations = mutations.split('|')

        # _vocab_key = 35 (Allele Status)
        inheritanceModeKey = loadlib.verifyTerm('', 35, inheritanceMode,
                                                lineNum, errorFile)

        # strains
        strainOfOriginKey = sourceloadlib.verifyStrain(strainOfOrigin, lineNum,
                                                       errorFile)

        # reference
        refKey = loadlib.verifyReference(jnum, lineNum, errorFile)

        # if errors, continue to next record
        # errors are stored (via loadlib) in the .error log

        if markerKey == 0 \
                or markerStatusKey == 0 \
                or alleleStatusKey == 0 \
                or alleleTypeKey == 0 \
                or germLineKey == 0 \
                or allMutations == 0 \
                or inheritanceModeKey == 0 \
                or strainOfOriginKey == 0 \
                or refKey == 0 \
                or createdByKey == 0:
            continue

        # if no errors, process the allele

        # not specified/testing
        #collectionKey = 11025586

        # allele (master)
        alleleFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|0|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (alleleKey, markerKey, strainOfOriginKey, inheritanceModeKey, alleleTypeKey, \
            alleleStatusKey, germLineKey, collectionKey, symbol, name, \
            isExtinct, isMixed, refKey, markerStatusKey, \
            createdByKey, createdByKey, createdByKey, loaddate, loaddate, loaddate))

        # molecular mutation
        for mutation in allMutations:
            mutationTermKey = loadlib.verifyTerm('', 36, mutation, lineNum,
                                                 errorFile)
            mutationFile.write('%s|%s|%s|%s|%s\n' \
            % (mutationKey, alleleKey, mutationTermKey, loaddate, loaddate))
            mutationKey = mutationKey + 1

        #
        # allele references
        #
        allReferences = references.split('||')
        for reference in allReferences:
            refType, refID = reference.split('|')
            refKey = loadlib.verifyReference(refID, lineNum, errorFile)

            if refType == 'Original':
                refAssocTypeKey = 1011
            elif refType == 'Transmission':
                refAssocTypeKey = 1023
            elif refType == 'Molecular':
                refAssocTypeKey = 1012

            refFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
                    % (refAssocKey, refKey, alleleKey, mgiTypeKey, refAssocTypeKey, \
                    createdByKey, createdByKey, loaddate, loaddate))
            refAssocKey = refAssocKey + 1

        #
        # allele subtypes
        #
        allSubtypes = alleleSubtypes.split('|')
        for s in allSubtypes:

            # _vocab_key = 93 (Allele Subtype)
            alleleSubtypeKey = loadlib.verifyTerm('', 93, s, lineNum,
                                                  errorFile)

            annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
                    % (annotKey, annotTypeKey, alleleKey, alleleSubtypeKey, \
                            qualifierKey, loaddate, loaddate))
            annotKey = annotKey + 1

        #
        # mutant cell line
        #
        if len(mutantCellLine) > 0:
            addMutantCellLine(alleleKey, mutantCellLine, createdByKey)

        # MGI Accession ID for the allelearker

        accFile.write('%s|%s%d|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \
            % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, alleleKey, mgiTypeKey, \
               createdByKey, createdByKey, loaddate, loaddate))

        # storing data in MGI_Note
        # molecular notes

        mgiNoteSeqNum = 1
        if len(molecularNotes) > 0:

            noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, alleleKey, mgiNoteObjectKey, mgiMolecularNoteTypeKey, \
                   molecularNotes, createdByKey, createdByKey, loaddate, loaddate))

            noteKey = noteKey + 1

        # driver notes
        # TR12662/MGI_Relationship._Category_key = 1006
        # removed noteFile code
        # place hodler for MGI_Relationship code
        # the IKMC is the only product using this and IKMC does not add any driver note
        #mgiNoteSeqNum = 1
        #if len(driverNotes) > 0:

        # ikmc notes
        useIKMCnotekey = 0
        if len(ikmcNotes) > 0:

            noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \
                % (noteKey, alleleKey, mgiNoteObjectKey, mgiIKMCNoteTypeKey, \
                   ikmcNotes, createdByKey, createdByKey, loaddate, loaddate))

            useIKMCnotekey = noteKey
            noteKey = noteKey + 1

        # Print out a new text file and attach the new MGI Allele IDs as the last field

        if createdBy == 'ikmc_alleleload':
            newAlleleFile.write('%s\t%s%s\t%s\n' \
            % (mgi_utils.prvalue(ikmcNotes), \
                    mgi_utils.prvalue(mgiPrefix), mgi_utils.prvalue(mgiKey), \
                    mgi_utils.prvalue(ikmcSymbol)))
        else:
            newAlleleFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%s\n' \
            % (mgi_utils.prvalue(markerID), \
            mgi_utils.prvalue(symbol), \
            mgi_utils.prvalue(name), \
            mgi_utils.prvalue(alleleStatus), \
            mgi_utils.prvalue(alleleType), \
            mgi_utils.prvalue(alleleSubtype), \
            mgi_utils.prvalue(collection), \
            mgi_utils.prvalue(germLine), \
            mgi_utils.prvalue(references), \
            mgi_utils.prvalue(strainOfOrigin), \
            mgi_utils.prvalue(mutantCellLine), \
            mgi_utils.prvalue(allMutations), \
            mgi_utils.prvalue(inheritanceMode), \
            mgi_utils.prvalue(isMixed), \
            mgi_utils.prvalue(isExtinct), \
            mgi_utils.prvalue(refKey), \
            mgi_utils.prvalue(markerStatusKey), \
            mgi_utils.prvalue(createdBy), \
            mgi_utils.prvalue(mgiPrefix), mgi_utils.prvalue(mgiKey)))

        # save symbol/alleleKey/ikmc note key
        alleleLookup[symbol] = []
        alleleLookup[symbol].append(
            (alleleKey, useIKMCnotekey, mgiPrefix + str(mgiKey)))

        accKey = accKey + 1
        mgiKey = mgiKey + 1
        alleleKey = alleleKey + 1

    #	end of "for line in inputFile.readlines():"

    #
    # Update the AccessionMax value
    #

    if not DEBUG:
        db.sql('select * from ACC_setMax(%d)' % (lineNum), None)
        db.commit()
예제 #25
0
	and ma._LogicalDB_key = 1 
	and ma.preferred = 1 
	order by p.accID
	''', 'auto')

prevProbe = 0
markers = []

for r in results:

	if prevProbe != r['_Probe_key']:
		if len(markers) > 0:
			fp.write(string.join(markers, ','))
		markers = ''

		if prevProbe > 0:
			fp.write(reportlib.CRT)

		fp.write(mgi_utils.prvalue(r['accID']) + reportlib.TAB)
		fp.write(mgi_utils.prvalue(r['name']) + reportlib.TAB)

		prevProbe = r['_Probe_key']
		markers = []

        markers.append(r['markerID'])

fp.write(string.join(markers, ','))
fp.write(reportlib.CRT)
reportlib.finish_nonps(fp)

예제 #26
0
def processFile():

    global primerKey, refKey, aliasKey, accKey, mgiKey

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
            markerSymbol = tokens[0]  # not used
            markerIDs = string.split(tokens[1], '|')
            name = tokens[2]
            jnum = tokens[3]
            regionCovered = tokens[4]
            sequence1 = tokens[5]
            sequence2 = tokens[6]
            productSize = tokens[7]
            notes = tokens[8]
            sequenceIDs = tokens[9]
            aliasList = string.split(tokens[10], '|')
            createdBy = tokens[11]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

# marker IDs

        markerList = []
        for markerID in markerIDs:

            markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile)

            if len(markerID) > 0 and markerKey == 0:
                errorFile.write('Invalid Marker:  %s, %s\n' % (name, markerID))
                error = 1
            elif len(markerID) > 0:
                markerList.append(markerKey)

        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
        createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

        # sequence IDs
        seqAccList = string.split(sequenceIDs, '|')

        # if errors, continue to next record
        if error:
            continue

        # if no errors, process the primer

        primerFile.write('%d\t%s\t\t%d\t%d\t%s\t%s\t%s\t%s\t\t\t%s\t%s\t%s\t%s\t%s\n' \
            % (primerKey, name, NA, vectorKey, segmentTypeKey, mgi_utils.prvalue(sequence1), \
     mgi_utils.prvalue(sequence2), mgi_utils.prvalue(regionCovered), mgi_utils.prvalue(productSize), \
     createdByKey, createdByKey, loaddate, loaddate))

        for markerKey in markerList:
            if markerList.count(markerKey) == 1:
                markerFile.write('%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n' \
      % (primerKey, markerKey, referenceKey, relationship, createdByKey, createdByKey, loaddate, loaddate))
            else:
                errorFile.write('Invalid Marker Duplicate:  %s, %s\n' %
                                (name, markerID))

# loaddate))

        refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' %
                      (refKey, primerKey, referenceKey, createdByKey,
                       createdByKey, loaddate, loaddate))

        # aliases

        for alias in aliasList:
            if len(alias) == 0:
                continue
            aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
                    % (aliasKey, refKey, alias, createdByKey, createdByKey, loaddate, loaddate))
            aliasKey = aliasKey + 1

        # MGI Accession ID for the marker

        accFile.write('%s\t%s%d\t%s\t%s\t1\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \
            % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, primerKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate))

        newPrimerFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%d\n' \
           % (markerSymbol, string.join(markerIDs, '|'), name, jnum, regionCovered, sequence1, sequence2, productSize, notes, sequenceIDs, createdBy, mgiPrefix, mgiKey))

        accKey = accKey + 1
        mgiKey = mgiKey + 1

        # sequence accession ids
        for acc in seqAccList:

            if len(acc) == 0:
                continue

            prefixPart, numericPart = accessionlib.split_accnum(acc)
            accFile.write('%s\t%s\t%s\t%s\t%s\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \
                % (accKey, acc, prefixPart, numericPart, logicalDBKey, primerKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate))
            accRefFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' \
                % (accKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate))
            accKey = accKey + 1

# notes

        if len(notes) > 0:
            noteFile.write('%s|1\t%s\t%s\t%s\n' \
          % (primerKey, notes, loaddate, loaddate))

        refKey = refKey + 1
        primerKey = primerKey + 1

    #	end of "for line in inputFile.readlines():"

    #
    # Update the AccessionMax value
    #

    if not DEBUG:
        db.sql('select * from ACC_setMax (%d)' % (lineNum), None)
예제 #27
0
def process(mode):
    # Purpose: process data using either 'sql' or 'bcp' mode

    db.sql('create index idx1 on toprocess1(_Allele_key)', None)
    db.sql('create index idx2 on toprocess2(_Allele_key)', None)

    if mode == 'bcp':
       outBCP = open(os.environ['ALLCACHEBCPDIR'] + '/ALL_Cre_Cache.bcp', 'w')
    else:
	db.sql(deleteSQL, None)
	db.commit()

    #
    # next available primary key
    #
    
    if mode == 'sql':
        results = db.sql('select max(_Cache_key) as cacheKey from ALL_Cre_Cache', 'auto')
        for r in results:
	    nextMaxKey = r['cacheKey']

        if nextMaxKey == None:
            nextMaxKey = 0
    else:
	nextMaxKey = 0

    nextMaxKey = nextMaxKey + 1
    results = db.sql('select * from toprocess1', 'auto')
    for r in results:

        creSystemsList = processCreSystems(r['_EMAPA_Term_key'], r['emapaTerm'], r['_Stage_key']) 

	if mode == 'sql':
	    for printCreLabel in creSystemsList:
	        db.sql(insertSQL1 % (str(nextMaxKey),
			       r['_Allele_key'],
                               r['_Allele_Type_key'],
		               r['_EMAPA_Term_key'],
		               r['_Stage_key'],
		               r['_Assay_key'],
		               r['accID'],
		               r['symbol'],
		               r['name'],
		               r['alleleType'],
		               r['driverGene'],
		               r['emapaTerm'],
		               r['age'],
		               r['ageMin'],
		               r['ageMax'],
		               r['expressed'],
		               r['hasImage'],
		               printCreLabel,
		               userKey, userKey), None)
                nextMaxKey = nextMaxKey + 1

        else:
	    for printCreLabel in creSystemsList:
                outBCP.write(str(nextMaxKey) + COLDL +
		     mgi_utils.prvalue(r['_Allele_key']) + COLDL +
                     mgi_utils.prvalue(r['_Allele_Type_key']) + COLDL +
		     mgi_utils.prvalue(r['_EMAPA_Term_key']) + COLDL +
		     mgi_utils.prvalue(r['_Stage_key']) + COLDL +
		     mgi_utils.prvalue(r['_Assay_key']) + COLDL +
		     mgi_utils.prvalue(r['accID']) + COLDL +
		     mgi_utils.prvalue(r['symbol']) + COLDL +
		     mgi_utils.prvalue(r['name']) + COLDL +
		     mgi_utils.prvalue(r['alleleType']) + COLDL +
		     mgi_utils.prvalue(r['driverGene']) + COLDL +
		     mgi_utils.prvalue(r['emapaTerm']) + COLDL +
		     mgi_utils.prvalue(r['age']) + COLDL +
		     mgi_utils.prvalue(r['ageMin']) + COLDL +
		     mgi_utils.prvalue(r['ageMax']) + COLDL +
		     mgi_utils.prvalue(r['expressed']) + COLDL +
		     mgi_utils.prvalue(r['hasImage']) + COLDL +
		     mgi_utils.prvalue(printCreLabel) + COLDL +
		     mgi_utils.prvalue(userKey) + COLDL + mgi_utils.prvalue(userKey) + COLDL + 
		     loaddate + COLDL + loaddate + LINEDL)
                nextMaxKey = nextMaxKey + 1

    #
    # select the remaining Cre data (those alleles without genotypes/structures)
    # cre-system is always empty (null)
    #

    if isQuerySQL2 == 1:

        results = db.sql('select * from toprocess2', 'auto')
        for r in results:

            nextMaxKey = nextMaxKey + 1
	    if mode == 'sql':
	        db.sql(insertSQL2 % (str(nextMaxKey) ,
				   r['_Allele_key'],
                                   r['_Allele_Type_key'],
		                   r['accID'],
		                   r['symbol'],
		                   r['name'],
		                   r['alleleType'],
		                   r['driverGene'],
		                   userKey, userKey), None)
            else:
                outBCP.write(str(nextMaxKey) + COLDL +
			 mgi_utils.prvalue(r['_Allele_key']) + COLDL +
                         mgi_utils.prvalue(r['_Allele_Type_key']) + COLDL +
		         mgi_utils.prvalue('') + COLDL +
		         mgi_utils.prvalue('') + COLDL +
		         mgi_utils.prvalue('') + COLDL +
		         mgi_utils.prvalue(r['accID']) + COLDL +
		         mgi_utils.prvalue(r['symbol']) + COLDL +
		         mgi_utils.prvalue(r['name']) + COLDL +
		         mgi_utils.prvalue(r['alleleType']) + COLDL +
		         mgi_utils.prvalue(r['driverGene']) + COLDL +
		         mgi_utils.prvalue('') + COLDL +
		         mgi_utils.prvalue('') + COLDL +
		         mgi_utils.prvalue('') + COLDL +
		         mgi_utils.prvalue('') + COLDL +
		         mgi_utils.prvalue('') + COLDL +
		         mgi_utils.prvalue('') + COLDL +
		         mgi_utils.prvalue('') + COLDL +
		         mgi_utils.prvalue(userKey) + COLDL + mgi_utils.prvalue(userKey) + COLDL + 
		         loaddate + COLDL + loaddate + LINEDL)

    if mode == 'bcp':
       outBCP.close()
예제 #28
0
def processFileIKMC(createMCL, createNote, setStatus, \
        symbol, ikmcSymbol, mutantCellLine, ikmcNotes, createdByKey, existingAlleleID):

    global noteKey, ikmcSQLs

    #
    # add new MCLs to new/existing alleles
    #
    if len(createMCL) > 0:

        if DEBUG:
            print(symbol, createMCL)

        if int(createMCL) == 0:
            aKey = alleleLookup[symbol][0][0]
        else:
            aKey = createMCL

        addMutantCellLine(aKey, mutantCellLine, createdByKey)

    #
    # set allele/status = Approved for existing "reserved" alleles
    #
    if len(setStatus) > 0:
        ikmcSQLs.append(
            'update ALL_Allele set _Allele_Status_key = 847114 where _Allele_key = %s'
            % (setStatus))

    #
    # Add IKMC Colony/Note to a new or existing allele
    #
    # child exists/ikmc note exists : update existing note
    # 	|| => _Note_key||existing colony notes
    #
    # child exists/ikmc note does not exis : add note
    # 	:: => allele/child key
    #
    # new allele/child/non-duplicate IKMC Colony
    #	0::colony(s)
    #
    # blank => do nothing
    #

    if len(createNote) > 0:

        if DEBUG:
            print('createNote: ', symbol)

        try:
            tokens = createNote.split('::')
            aKey = tokens[0]

            # duplicate child, additional note : add note to new child
            if int(aKey) == 0:
                nKey = alleleLookup[symbol][0][1]
                note = tokens[1]
                ikmcSQLs.append(
                    '''update MGI_Note set note = '%s' where _Note_key = %s;'''
                    % (note, nKey))

            # child exists, note does not exist : add note to existing child
            else:
                aKey = tokens[0]
                note = ikmcNotes

                if symbol in alleleLookup:
                    nKey = alleleLookup[symbol][0][1]
                    ikmcSQLs.append(
                        '''update MGI_Note set note = rtrim(note) || '|%s' where _Note_key = %s;'''
                        % (note, nKey))
                else:
                    noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
                    % (noteKey, aKey, mgiNoteObjectKey, mgiIKMCNoteTypeKey, \
                    note, createdByKey, createdByKey, loaddate, loaddate))

                    # save symbol/aKey/ikmc note key/allele id
                    alleleLookup[symbol] = []
                    alleleLookup[symbol].append(
                        (aKey, noteKey, 'missing allele id (1)'))

                    noteKey = noteKey + 1

        # child exists, note exists : update existing note
        except:
            if DEBUG:
                print(createNote)

            tokens = createNote.split('||')
            nKey = tokens[0]
            note = tokens[1] + '|' + ikmcNotes
            ikmcSQLs.append(
                '''update MGI_Note set note = '%s' where _Note_key = %s;''' %
                (note, nKey))

    #
    # print out the proper allele id
    #
    if len(existingAlleleID) > 0:
        printAlleleID = existingAlleleID
    elif symbol in alleleLookup:
        printAlleleID = alleleLookup[symbol][0][2]
    else:
        printAlleleID = 'missing allele id (2)'

    newAlleleFile.write('%s\t%s\t%s\n' \
                % (mgi_utils.prvalue(ikmcNotes), \
                        mgi_utils.prvalue(printAlleleID), \
                        mgi_utils.prvalue(ikmcSymbol)))
예제 #29
0
    key = r['mgiID']
    value = r['symbol']

    # print one row of the marker record

    if not markerList.has_key(key):
	fp.write(r['mgiID'] + TAB)
        fp.write(r['symbol'] + TAB)
	fp.write('MGI' + TAB)
	fp.write(r['mgiID'] + TAB)
	fp.write(r['featureType'] + TAB)
	fp.write(CRT)
	markerList[key] = value

    # print row of the gene model sequence
    fp.write(r['mgiID'] + TAB)
    fp.write(r['symbol'] + TAB)
    fp.write(r['provider'] + TAB)
    fp.write(r['accID'] + TAB)
    fp.write(mgi_utils.prvalue(r['rawbiotype']) + TAB)

    if r['_Qualifier_key'] == 615419:
	fp.write('Representative')
    fp.write(CRT)

fp.write(CRT + '(%d genes affected)' % (len(markerList)) + CRT)

db.useOneConnection(0)
reportlib.finish_nonps(fp)	# non-postscript file

예제 #30
0
def doGAFFinish():

    #
    # Output format:
    #
    # The GO format has the following columns:
    #
    #   1.  Database designation (MGI)
    #   2.  MGI Marker ID (MGI:xxxx)
    #   3.  Symbol
    #   4.  Qualifier
    #   5.  GO id
    #   6.  MGI ID of Reference (MGI:MGI:xxxx|PMID:xxxx)
    #   7.  Evidence abbreviation
    #   8.  Inferred From
    #   9.  GO DAG Abbreviation (F, P, C)
    #   10. Gene name
    #   11. Gene synonym(s) - list of |-delimited synonyms
    #   12. Marker Type or Protein (gene)
    #   13. Species (taxon:10090)
    #   14. Modification Date (YYYYMMDD)
    #   15. Assigned By
    #   16. Properites/Values (occurs_in, part_of, etc.)
    #   17. Isorform
    #

    #
    # process results
    #
    results = db.sql('select * from gomarker2 order by symbol, termID', 'auto')

    for r in results:

        reportRow = ''    

        if r['_Term_key'] not in dag:
	    continue

        if dag[r['_Term_key']] not in dagQualifier:
            continue

	objectKey = str(r['_Object_key']) + ':' + str(r['_AnnotEvidence_key'])

	# columns 1-5
        reportRow = MGIPREFIX + TAB
        reportRow = reportRow + str(r['markerID']) + TAB
        reportRow = reportRow + r['symbol'] + TAB

	if r['qualifier'] != None:
	    qualifier = r['qualifier'].strip()
	else:
	    qualifier = ''

        reportRow = reportRow + qualifier + TAB
        reportRow = reportRow + r['termID'] + TAB

        # column 6; reference
	references = []
	references.append(MGIPREFIX + ':' + r['refID'])
        if pubMed.has_key(r['_Refs_key']):
	    references.append('PMID:' + pubMed[r['_Refs_key']])
	else:
	    if r['_Refs_key'] in goRefDict:
	        references.append(goRefDict[r['_Refs_key']])
        reportRow = reportRow + '|'.join(references) + TAB

	# column 7
        reportRow = reportRow + r['evidenceCode'] + TAB

	# column 8
        inferredFrom = mgi_utils.prvalue(r['inferredFrom']).replace('MGI:', 'MGI:MGI:')
        reportRow = reportRow + inferredFrom + TAB

	# column 9-10
        reportRow = reportRow + dag[r['_Term_key']] + TAB
        reportRow = reportRow + r['name'] + TAB

	# column 11
        if syns.has_key(r['_Object_key']):
            syn_string = '|'.join(syns[r['_Object_key']])
            reportRow = reportRow + syn_string + TAB
        else:
            reportRow = reportRow + TAB

        # column 12
	# if marker is associated with an isoform (via go/annotation)
	# or marker is associated with a protein (via marker/sequence cache)
        # 	print 'protein' 
        # else, print marker type (ex. 'gene')

        if isoformsProtein.has_key(objectKey) or proteins.has_key(r['_Object_key']):
            reportRow = reportRow + 'protein' + TAB
        else:
            reportRow = reportRow + r['markerType'] + TAB
                
	# column 13
        reportRow = reportRow + SPECIES + TAB

	# column 14
        reportRow = reportRow + str(r['mDate']) + TAB

	# column 15; assigned by

	# remove "GOA_"; for example:  "GOA_IntAct" ==> "IntAct"
	# remove "NOCTUA_"; for example:  "NOCTUA_MGI" ==> "MGI"
	if r['assignedBy'].find('NOCTUA_') >= 0:
            assignedBy = r['assignedBy'].replace('NOCTUA_', '')
            reportRow = reportRow + assignedBy + TAB

	elif r['assignedBy'].find('GOA_') >= 0:
            assignedBy = r['assignedBy'].replace('GOA_', '')
            reportRow = reportRow + assignedBy + TAB

        elif r['assignedBy'] in assignedByList1:
            reportRow = reportRow + 'UniProt' + TAB

        elif r['assignedBy'] in assignedByList2:
            reportRow = reportRow + r['assignedBy'] + TAB

	# else use default (MGIPREFIX)
        else:
            reportRow = reportRow + MGIPREFIX + TAB

	#
	# column 16
	# contains property/value information
	# see lib_py_report/go_annot_extensions.py for list of excluded properties
	properties = ''
        if gafCol16Lookup.has_key(objectKey):
	    properties = ''.join(gafCol16Lookup[objectKey])
        reportRow = reportRow + properties + TAB

        # column 17
	# if isoformProtein = true
	#    then use isoformsProtein
	isoforms = ''
        if isoformsProtein.has_key(objectKey):
	    isoforms = '|'.join(isoformsProtein[objectKey])
        reportRow = reportRow + isoforms + CRT

        fp.write(reportRow)

	#
	# TR11060
	# subset of UniProtKB:xxxx-?? only
	#
	if forPROC.has_key(objectKey):
            fp2.write(reportRow)
예제 #31
0
def addGPADReportRow(reportRow, r):

	objectKey = str(r['_Object_key']) + ':' + str(r['_AnnotEvidence_key'])
	key = r['_AnnotEvidence_key']

	#   3. Qualifier

	# use gadCol3 or DAG
        if key in gpadCol3Lookup:
            default_relation_for_aspect = '|'.join(gpadCol3Lookup[key])
	elif r['inferredFrom'] != None and r['inferredFrom'].find('InterPro:') >= 0 and dag[r['_Term_key']] == 'P':
	    default_relation_for_aspect = 'involved_in'
        else:
	    default_relation_for_aspect = dagQualifier[dag[r['_Term_key']]]

	# qualifier from MGD annotations
        if r['qualifier'] != None:
	    qualifier = r['qualifier'].strip()
	else:
	    qualifier = ''

        if qualifier == '':
            gap_qualifier = default_relation_for_aspect
        elif qualifier == 'NOT':
            gap_qualifier = qualifier + '|' + default_relation_for_aspect
        else:
	    gap_qualifier = qualifier

        reportRow = reportRow + gap_qualifier + TAB

	#   4. GO ID
        reportRow = reportRow + r['termID'] + TAB

	#   5. DB:Reference(s)
	references = []
	references.append(MGIPREFIX + ':' + r['refID'])
        if pubMed.has_key(r['_Refs_key']):
	    references.append('PMID:' + pubMed[r['_Refs_key']])
	else:
	    if r['_Refs_key'] in goRefDict:
	        references.append(goRefDict[r['_Refs_key']])
        reportRow = reportRow + '|'.join(references) + TAB

	#   6. Evidence Code
	if key in evidenceLookup:
            reportRow = reportRow + evidenceLookup[key][0]
	elif r['evidenceCode'] in ecoLookupByEvidence:
	    reportRow = reportRow + ecoLookupByEvidence[r['evidenceCode']]
        else:
            reportRow = reportRow + 'NOT FOUND'
	reportRow = reportRow + TAB

	#   7. With (or)From
        inferredFrom = mgi_utils.prvalue(r['inferredFrom']).replace('MGI:', 'MGI:MGI:')
        reportRow = reportRow + mgi_utils.prvalue(inferredFrom) + TAB

	#   8. Interacting taxon ID
	if key in taxonLookup:
            reportRow = reportRow + taxonLookup[key][0]
	reportRow = reportRow + TAB

	#   9. Date
        reportRow = reportRow + str(r['mDate']) + TAB

	#   10. Assigned by

	# remove "NOCTUA_"; for example:  "NOCTUA_MGI" ==> "MGI"
	if r['assignedBy'].find('NOCTUA_') >= 0:
            assignedBy = r['assignedBy'].replace('NOCTUA_', '')
            reportRow = reportRow + assignedBy + TAB

	# remove "GOA_"; for example:  "GOA_IntAct" ==> "IntAct"
	elif r['assignedBy'].find('GOA_') >= 0:
            assignedBy = r['assignedBy'].replace('GOA_', '')
            reportRow = reportRow + assignedBy + TAB

        elif r['assignedBy'] in assignedByList1:
            reportRow = reportRow + 'UniProt' + TAB

        elif r['assignedBy'] in assignedByList2:
            reportRow = reportRow + r['assignedBy'] + TAB

	# else use default (MGIPREFIX)
        else:
            reportRow = reportRow + MGIPREFIX + TAB

	#   11. Annotation Extension
	properties = ''
	if key in gpadCol11Lookup:
            properties = ','.join(gpadCol11Lookup[key])
	elif gafCol16Lookup.has_key(objectKey):
	    properties = ''.join(gafCol16Lookup[objectKey])
        reportRow = reportRow + properties + TAB

	#   12. Annotation Properties
	properties = ''
	if key in gpadCol12Lookup:
	    properties = '|'.join(gpadCol12Lookup[key])
	reportRow = reportRow + properties + CRT

	return reportRow
예제 #32
0
def processFile():

    global alleleKey, refAssocKey, accKey, noteKey, mgiKey, annotKey
    global alleleLookup

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = line[:-1].split('\t')
	#print line
        try:
	    markerID = tokens[0]
	    symbol = tokens[1]
	    name = tokens[2]
	    alleleStatus = tokens[3]
	    alleleType = tokens[4]
	    alleleSubtypes = tokens[5]
	    collectionKey = tokens[6]
	    germLine = tokens[7]
	    references = tokens[8]
	    strainOfOrigin = tokens[9]
	    mutantCellLine = tokens[10]
	    molecularNotes = tokens[11]
	    driverNotes = tokens[12]
	    ikmcNotes = tokens[13]
	    mutations = tokens[14]
	    inheritanceMode = tokens[15]
	    isMixed = tokens[16]
	    isExtinct = tokens[17]
	    createdBy = tokens[18]
	    createMCL = tokens[19]
	    createNote = tokens[20]
	    setStatus = tokens[21]
	    existingAlleleID = tokens[22]
	    ikmcSymbol = tokens[23]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

	# creator
	createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)
        if createdByKey == 0:
            continue

	# processing for IKMC-only
	if len(createMCL) > 0 or len(createNote) > 0 or len(setStatus) > 0:
		processFileIKMC(createMCL, createNote, setStatus, \
			symbol, ikmcSymbol, mutantCellLine, ikmcNotes, \
			createdByKey, existingAlleleID)
		continue

	# marker key
	markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile)

	# hard-coded
	# _vocab_key = 73 (Marker-Allele Association Status)
	# _term_key = 4268545 (Curated)
	markerStatusKey = 4268545

	# _vocab_key = 37 (Allele Status)
	alleleStatusKey = loadlib.verifyTerm('', 37, alleleStatus, lineNum, errorFile)

	# _vocab_key = 38 (Allele Type)
	alleleTypeKey = loadlib.verifyTerm('', 38, alleleType, lineNum, errorFile)

	# _vocab_key = 61 (Allele Transmission)
	germLineKey = loadlib.verifyTerm('', 61, germLine, lineNum, errorFile)

	# _vocab_key = 36 (Allele Molecular Mutation)
	allMutations = mutations.split('|')

	# _vocab_key = 35 (Allele Status)
	inheritanceModeKey = loadlib.verifyTerm('', 35, inheritanceMode, lineNum, errorFile)

	# strains
	strainOfOriginKey = sourceloadlib.verifyStrain(strainOfOrigin, lineNum, errorFile)

	# reference
	refKey = loadlib.verifyReference(jnum, lineNum, errorFile)

        # if errors, continue to next record
	# errors are stored (via loadlib) in the .error log

        if markerKey == 0 \
		or markerStatusKey == 0 \
		or alleleStatusKey == 0 \
		or alleleTypeKey == 0 \
		or germLineKey == 0 \
		or allMutations == 0 \
		or inheritanceModeKey == 0 \
		or strainOfOriginKey == 0 \
		or refKey == 0 \
		or createdByKey == 0:
            continue

        # if no errors, process the allele

	# not specified/testing
	#collectionKey = 11025586

	# allele (master)
        alleleFile.write('%d|%s|%s|%s|%s|%s|%s|%s|%s|%s|0|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
            % (alleleKey, markerKey, strainOfOriginKey, inheritanceModeKey, alleleTypeKey, \
	    alleleStatusKey, germLineKey, collectionKey, symbol, name, \
	    isExtinct, isMixed, refKey, markerStatusKey, \
	    createdByKey, createdByKey, createdByKey, loaddate, loaddate, loaddate))

	# molecular mutation
	for mutation in allMutations:
		mutationKey = loadlib.verifyTerm('', 36, mutation, lineNum, errorFile)
        	mutationFile.write('%s|%s|%s|%s\n' \
	    	% (alleleKey, mutationKey, loaddate, loaddate))

	#
	# allele references
	#
	allReferences = references.split('||')
	for reference in allReferences:
		refType, refID = reference.split('|')
		refKey = loadlib.verifyReference(refID, lineNum, errorFile)

		if refType == 'Original':
			refAssocTypeKey = 1011
		elif refType == 'Transmission':
			refAssocTypeKey = 1023
		elif refType == 'Molecular':
			refAssocTypeKey = 1012

        	refFile.write('%s|%s|%s|%s|%s|%s|%s|%s|%s\n' \
	    		% (refAssocKey, refKey, alleleKey, mgiTypeKey, refAssocTypeKey, \
	       		createdByKey, createdByKey, loaddate, loaddate))
		refAssocKey = refAssocKey + 1

	#
	# allele subtypes
	#
	allSubtypes = alleleSubtypes.split('|')
	for s in allSubtypes:

		# _vocab_key = 93 (Allele Subtype)
		alleleSubtypeKey = loadlib.verifyTerm('', 93, s, lineNum, errorFile)

        	annotFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
                	% (annotKey, annotTypeKey, alleleKey, alleleSubtypeKey, \
        			qualifierKey, loaddate, loaddate))
		annotKey = annotKey + 1

        #
        # mutant cell line
        #
        if len(mutantCellLine) > 0:
            addMutantCellLine(alleleKey, mutantCellLine, createdByKey)

        # MGI Accession ID for the allelearker

        accFile.write('%s|%s%d|%s|%s|1|%d|%d|0|1|%s|%s|%s|%s\n' \
            % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, alleleKey, mgiTypeKey, \
	       createdByKey, createdByKey, loaddate, loaddate))

	# storing data in MGI_Note/MGI_NoteChunk
	# molecular notes

	mgiNoteSeqNum = 1
	if len(molecularNotes) > 0:

	    noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \
		% (noteKey, alleleKey, mgiNoteObjectKey, mgiMolecularNoteTypeKey, \
		   createdByKey, createdByKey, loaddate, loaddate))

	    noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
		% (noteKey, mgiNoteSeqNum, molecularNotes, createdByKey, createdByKey, loaddate, loaddate))

	    noteKey = noteKey + 1

	# driver notes
	# TR12662/MGI_Relationship._Category_key = 1006
	# removed noteFile code
	# place hodler for MGI_Relationship code
	# the IKMC is the only product using this and IKMC does not add any driver note
	#mgiNoteSeqNum = 1
	#if len(driverNotes) > 0:

	# ikmc notes
	useIKMCnotekey = 0
	if len(ikmcNotes) > 0:

	    noteFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' \
		% (noteKey, alleleKey, mgiNoteObjectKey, mgiIKMCNoteTypeKey, \
		   createdByKey, createdByKey, loaddate, loaddate))

	    noteChunkFile.write('%s|%s|%s|%s|%s|%s|%s\n' \
		% (noteKey, 1, ikmcNotes, createdByKey, createdByKey, loaddate, loaddate))

	    useIKMCnotekey = noteKey
	    noteKey = noteKey + 1

	# Print out a new text file and attach the new MGI Allele IDs as the last field

	if createdBy == 'ikmc_alleleload':
        	newAlleleFile.write('%s\t%s%s\t%s\n' \
	    	% (mgi_utils.prvalue(ikmcNotes), \
			mgi_utils.prvalue(mgiPrefix), mgi_utils.prvalue(mgiKey), \
			mgi_utils.prvalue(ikmcSymbol)))
	else:
        	newAlleleFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%s\n' \
	    	% (mgi_utils.prvalue(markerID), \
	       	mgi_utils.prvalue(symbol), \
	       	mgi_utils.prvalue(name), \
	       	mgi_utils.prvalue(alleleStatus), \
	       	mgi_utils.prvalue(alleleType), \
	       	mgi_utils.prvalue(alleleSubtype), \
	       	mgi_utils.prvalue(collection), \
	       	mgi_utils.prvalue(germLine), \
	       	mgi_utils.prvalue(references), \
	       	mgi_utils.prvalue(strainOfOrigin), \
	       	mgi_utils.prvalue(mutantCellLine), \
	       	mgi_utils.prvalue(allMutations), \
	       	mgi_utils.prvalue(inheritanceMode), \
	       	mgi_utils.prvalue(isMixed), \
	       	mgi_utils.prvalue(isExtinct), \
	       	mgi_utils.prvalue(refKey), \
	       	mgi_utils.prvalue(markerStatusKey), \
	       	mgi_utils.prvalue(createdBy), \
	       	mgi_utils.prvalue(mgiPrefix), mgi_utils.prvalue(mgiKey)))

	# save symbol/alleleKey/ikmc note key
	alleleLookup[symbol] = []
	alleleLookup[symbol].append((alleleKey, useIKMCnotekey, mgiPrefix + str(mgiKey)))

        accKey = accKey + 1
        mgiKey = mgiKey + 1
        alleleKey = alleleKey + 1

    #	end of "for line in inputFile.readlines():"

    #
    # Update the AccessionMax value
    #

    if not DEBUG:
        db.sql('select * from ACC_setMax(%d)' % (lineNum), None)
	db.commit()
예제 #33
0
    ''', 'auto')

s = ''
count = 0

for r in results:

    stage = r['stage']

    age = r['age']
    m = re.search('[0-9]',age)

    # if age has no numeric specified, print it out; probable error

    if m == None:
       	s = s + r['mgi'] + TAB + r['jnum'] + TAB + mgi_utils.prvalue(r['label']) + CRT
       	count = count + 1
       	continue

    start = m.start()
    range = age[start:]

    # parse by range "-" or list ","

    m = re.search('[-,]', range)

    if m == None:
       	minAge = string.atof(range)
       	maxAge = minAge
    else:
        delim = m.start()
예제 #34
0
파일: gelload.py 프로젝트: mgijax/assayload
def processGelLaneFile():

    global assayGelLane, gelLaneKey

    lineNum = 0

    # For each line in the input file

    for line in inGelLaneFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
	    assayID = tokens[0]
	    laneID = tokens[1]
	    laneLabel = tokens[2]
	    genotypeID = tokens[3]
	    rnaType = tokens[4]
	    control = tokens[5]
	    sampleAmount = tokens[6]
	    gender = tokens[7]
	    age = tokens[8]
	    ageNote = tokens[9]
	    laneNote = tokens[10]
	    emapaID = tokens[11]
	    structureTS = tokens[12]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

	# if control is set to "No", then there *is* a structure
	# else there are no structures

	hasStructure = 0
	if control == "No":
	    hasStructure = 1

	genotypeKey = gxdloadlib.verifyGenotype(genotypeID, lineNum, errorFile)
	rnaTypeKey = gxdloadlib.verifyGelRNAType(rnaType, lineNum, errorFile)
	controlKey = gxdloadlib.verifyGelControl(control, lineNum, errorFile)
	ageMin, ageMax = agelib.ageMinMax(age)

	if hasStructure:
	    structureKey = gxdloadlib.verifyTerm(emapaID, 90, '', lineNum, errorFile)
	    if structureKey == 0:
                error = 1

	#
	# if age = "Not Specified", then ageMin/ageMax = -1 which is < 0
	# so, removed this check:
	#	ageMin < 0 or ageMax < 0:
	#

        if genotypeKey == 0 or rnaTypeKey == 0 or controlKey == 0:
            # set error flag to true
            error = 1

        # if errors, continue to next record
        if error:
            continue

        # if no errors, process

	key = '%s:%s' % (assayID, laneID)

	# if this is a lane that has not been added to the gel lane yet...

	if not assayGelLane.has_key(key):

            outGelLaneFile.write(
	        str(gelLaneKey) + TAB + \
	        str(assayAssay[assayID]) + TAB + \
	        str(genotypeKey) + TAB + \
	        str(rnaTypeKey) + TAB + \
	        str(controlKey) + TAB + \
	        str(laneID) + TAB + \
	        laneLabel + TAB + \
	        mgi_utils.prvalue(sampleAmount) + TAB + \
	        gender + TAB + \
	        age + TAB + \
	        str(ageMin) + TAB + \
	        str(ageMax) + TAB + \
	        mgi_utils.prvalue(ageNote) + TAB + \
	        mgi_utils.prvalue(laneNote) + TAB + \
	        loaddate + TAB + loaddate + CRT)

	    if hasStructure:
	        outGelLaneStFile.write(
	            str(gelLaneKey) + TAB + \
	            str(structureKey) + TAB + \
	            loaddate + TAB + loaddate + CRT)

	    assayGelLane[key] = gelLaneKey
            gelLaneKey = gelLaneKey + 1

	# else if gel lanes has more than one structure...

	else:
	    if hasStructure:
	        outGelLaneStFile.write(
	            str(assayGelLane[key]) + TAB + \
	            str(structureKey) + TAB + \
	            loaddate + TAB + loaddate + CRT)

    #	end of "for line in inGelLaneFile.readlines():"

    #print assayGelLane

    return
예제 #35
0
             and a1._LogicalDB_key = 1 
             and a1.prefixPart = 'MGI:' 
             and a1.preferred = 1 
             and p._Marker_key = a2._Object_key 
	     and a2._MGIType_key = 2 
             and a2._LogicalDB_key = 1 
             and a2.prefixPart = 'MGI:' 
             and a2.preferred = 1 
       order by p.symbol
       ''', 'auto')

for r in results:
    mname = r['mname']
    pname = r['pname']
    p1seq = r['primer1sequence']
    p2seq = r['primer2sequence']
    prodSize = r['productSize']

    fp.write(r['symbol'] + TAB +
	mname + TAB +
	pname + TAB +
        r['markerID'] + TAB + 
	r['probeID'] + TAB +
        mgi_utils.prvalue(p1seq) + TAB + 
	mgi_utils.prvalue(p2seq) + TAB + 
	mgi_utils.prvalue(prodSize) + TAB +
        r['chromosome'] + TAB + 
	str(r['cmoffset']) + CRT)

reportlib.finish_nonps(fp)
예제 #36
0
파일: gelload.py 프로젝트: mgijax/assayload
def processGelBandFile():

    global gelRowKey, gelBandKey

    lineNum = 0
    prevAssay = 0
    prevLane = 0
    prevRow = 0

    # For each line in the input file

    for line in inGelBandFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
	    assayID = tokens[0]
	    laneID = tokens[1]
	    rowID = tokens[2]
	    bandSize = tokens[3]
	    bandUnits = tokens[4]
	    bandStrength = tokens[5]
	    rowNote = tokens[6]
	    bandNote = tokens[7]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

	unitsKey = gxdloadlib.verifyGelUnits(bandUnits, lineNum, errorFile)
	strengthKey = gxdloadlib.verifyGelStrength(bandStrength, lineNum, errorFile)

        if unitsKey == 0 or strengthKey == 0:
            # set error flag to true
            error = 1

        # if errors, continue to next record
        if error:
            continue

	# new Assay means new Row

	if prevAssay != assayID:

          gelRowKey = gelRowKey + 1

          outGelRowFile.write(
	      str(gelRowKey) + TAB + \
	      str(assayAssay[assayID]) + TAB + \
	      str(unitsKey) + TAB + \
	      str(rowID) + TAB + \
	      mgi_utils.prvalue(bandSize) + TAB + \
	      mgi_utils.prvalue(rowNote) + TAB + \
	      loaddate + TAB + loaddate + CRT)

	  prevAssay = assayID

	# determine the lane key based on assayID and laneID
	key = '%s:%s' % (assayID, laneID)
	laneKey = assayGelLane[key]

	outGelBandFile.write(
	    str(gelBandKey) + TAB + \
	    str(laneKey) + TAB + \
	    str(gelRowKey) + TAB + \
	    str(strengthKey) + TAB + \
	    mgi_utils.prvalue(bandNote) + TAB + \
	    loaddate + TAB + loaddate + CRT)

        gelBandKey = gelBandKey + 1

    #	end of "for line in inGelLaneFile.readlines():"

    return
예제 #37
0
def processFile():

    global probeKey, refKey, aliasKey, accKey, mgiKey

    lineNum = 0
    # For each line in the input file

    for line in inputFile.readlines():

        error = 0
        lineNum = lineNum + 1

        # Split the line into tokens
        tokens = string.split(line[:-1], '\t')

        try:
            name = tokens[0]
            jnum = tokens[1]
            parentID = tokens[2]
            sourceName = tokens[3]
            organism = tokens[4]
            strain = tokens[5]
            tissue = tokens[6]
            gender = tokens[7]
            cellLine = tokens[8]
            age = tokens[9]
            vectorType = tokens[10]
            segmentType = tokens[11]
            regionCovered = tokens[12]
            insertSite = tokens[13]
            insertSize = tokens[14]
            markerIDs = string.split(tokens[15], '|')
            relationship = tokens[16]
            sequenceIDs = tokens[17]
            aliasList = string.split(tokens[18], '|')
            notes = tokens[19]
            rawnotes = tokens[20]
            createdBy = tokens[21]
        except:
            exit(1, 'Invalid Line (%d): %s\n' % (lineNum, line))

        isParent = 0
        isSource = 0
        parentProbeKey = ''
        sourceKey = 0

        if parentID != '':
            isParent = 1

        if sourceName != '':
            isSource = 1

        if not isParent and not isSource:
            organismKey = sourceloadlib.verifyOrganism(organism, lineNum,
                                                       errorFile)
            strainKey = sourceloadlib.verifyStrain(strain, lineNum, errorFile)
            tissueKey = sourceloadlib.verifyTissue(tissue, lineNum, errorFile)
            genderKey = sourceloadlib.verifyGender(gender, lineNum, errorFile)
            cellLineKey = sourceloadlib.verifyCellLine(cellLine, lineNum,
                                                       errorFile)
            vectorKey = sourceloadlib.verifyVectorType(vectorType, lineNum,
                                                       errorFile)
            segmentTypeKey = sourceloadlib.verifySegmentType(
                segmentType, lineNum, errorFile)
            sourceKey = sourceloadlib.verifySource(segmentTypeKey, \
         vectorKey, organismKey, strainKey, \
         tissueKey, genderKey, cellLineKey, age, lineNum, errorFile)

            if organismKey == 0 or strainKey == 0 or tissueKey == 0 or \
                      genderKey == 0 or cellLineKey == 0 or vectorKey == 0 or \
                      segmentTypeKey == 0 or sourceKey == 0:
                errorFile.write('%s, %s, %s, %s, %s, %s, %s, %s\n' %
                                (segmentType, vectorType, organism, strain,
                                 tissue, gender, cellLine, age))
                error = 1

        elif not isParent and isSource:
            vectorKey = sourceloadlib.verifyVectorType(vectorType, lineNum,
                                                       errorFile)
            segmentTypeKey = sourceloadlib.verifySegmentType(
                segmentType, lineNum, errorFile)
            sourceKey = sourceloadlib.verifyLibrary(sourceName, lineNum,
                                                    errorFile)

            if vectorKey == 0 or segmentTypeKey == 0 or sourceKey == 0:
                error = 1

# parent from = yes, source given = yes or no (ignored)
        else:
            parentProbeKey, sourceKey = verifyParentProbe(
                parentID, lineNum, errorFile)
            vectorKey = sourceloadlib.verifyVectorType(vectorType, lineNum,
                                                       errorFile)
            segmentTypeKey = sourceloadlib.verifySegmentType(
                segmentType, lineNum, errorFile)

            if parentProbeKey == 0 or sourceKey == 0 or vectorKey == 0 or segmentTypeKey == 0:
                error = 1

        referenceKey = loadlib.verifyReference(jnum, lineNum, errorFile)
        createdByKey = loadlib.verifyUser(createdBy, lineNum, errorFile)

        if referenceKey == 0:
            errorFile.write('Invalid Reference:  %s\n' % (jnum))
            error = 1

        if createdByKey == 0:
            errorFile.write('Invalid Creator:  %s\n\n' % (createdBy))
            error = 1

# marker IDs

        markerList = []
        for markerID in markerIDs:

            markerKey = loadlib.verifyMarker(markerID, lineNum, errorFile)

            if len(markerID) > 0 and markerKey == 0:
                errorFile.write('Invalid Marker:  %s, %s\n' % (name, markerID))
                error = 1
            elif len(markerID) > 0:
                markerList.append(markerKey)

# sequence IDs
        seqAccDict = {}
        for seqID in string.split(sequenceIDs, '|'):
            if len(seqID) > 0:
                [logicalDB, acc] = string.split(seqID, ':')
                logicalDBKey = loadlib.verifyLogicalDB(logicalDB, lineNum,
                                                       errorFile)
                if logicalDBKey > 0:
                    seqAccDict[acc] = logicalDBKey

        # if errors, continue to next record
        if error:
            continue

        # if no errors, process the probe

        probeFile.write('%d\t%s\t%s\t%s\t%s\t%s\t\t\t%s\t%s\t%s\t\t%s\t%s\t%s\t%s\n' \
            % (probeKey, name, parentProbeKey, sourceKey, vectorKey, segmentTypeKey, mgi_utils.prvalue(regionCovered), \
     mgi_utils.prvalue(insertSite), mgi_utils.prvalue(insertSize), createdByKey, createdByKey, loaddate, loaddate))

        for markerKey in markerList:
            if markerList.count(markerKey) == 1:
                markerFile.write('%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n' \
      % (probeKey, markerKey, referenceKey, relationship, createdByKey, createdByKey, loaddate, loaddate))
            else:
                errorFile.write('Invalid Marker Duplicate:  %s, %s\n' %
                                (name, markerID))

        refFile.write('%s\t%s\t%s\t0\t0\t%s\t%s\t%s\t%s\n' \
  % (refKey, probeKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate))

        # aliases

        for alias in aliasList:
            if len(alias) == 0:
                continue
            aliasFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
      % (aliasKey, refKey, alias, createdByKey, createdByKey, loaddate, loaddate))
            aliasKey = aliasKey + 1

        # MGI Accession ID for the marker

        accFile.write('%s\t%s%d\t%s\t%s\t1\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \
            % (accKey, mgiPrefix, mgiKey, mgiPrefix, mgiKey, probeKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate))

        # Print out a new text file and attach the new MGI Probe IDs as the last field

        newProbeFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%d\n' \
     % (name, jnum, \
     mgi_utils.prvalue(sourceName), \
     organism, \
     mgi_utils.prvalue(strain), \
     mgi_utils.prvalue(tissue), \
     mgi_utils.prvalue(gender), \
     mgi_utils.prvalue(cellLine), \
     mgi_utils.prvalue(age), \
     mgi_utils.prvalue(vectorType), \
     mgi_utils.prvalue(segmentType), \
     mgi_utils.prvalue(regionCovered) + \
     mgi_utils.prvalue(insertSite), \
     mgi_utils.prvalue(insertSize), \
     string.join(markerIDs, '|'), \
     relationship, \
     mgi_utils.prvalue(sequenceIDs), \
     string.join(aliasList, '|'), \
     mgi_utils.prvalue(notes), \
     createdBy, mgiPrefix, mgiKey))

        # Print out a raw note file

        if len(rawnotes) > 0:
            rawNoteFile.write('%s%d\t%s\n' % (mgiPrefix, mgiKey, rawnotes))

# Notes

        if len(notes) > 0:
            noteFile.write('%s\t%s\t%s\t%s\n' %
                           (probeKey, notes, loaddate, loaddate))

        accKey = accKey + 1
        mgiKey = mgiKey + 1

        # sequence accession ids
        for acc in seqAccDict.keys():
            prefixPart, numericPart = accessionlib.split_accnum(acc)
            accFile.write('%s\t%s\t%s\t%s\t%s\t%d\t%d\t0\t1\t%s\t%s\t%s\t%s\n' \
                % (accKey, acc, prefixPart, numericPart, seqAccDict[acc], probeKey, mgiTypeKey, createdByKey, createdByKey, loaddate, loaddate))
            accRefFile.write('%s\t%s\t%s\t%s\t%s\t%s\n' \
                % (accKey, referenceKey, createdByKey, createdByKey, loaddate, loaddate))
            accKey = accKey + 1

        refKey = refKey + 1
        probeKey = probeKey + 1

    #	end of "for line in inputFile.readlines():"

    #
    # Update the AccessionMax value
    #

    if not DEBUG:
        db.sql('select * from ACC_setMax (%d)' % (lineNum), None)