Beispiel #1
0
def weightedLoad(infile,weightthresh=None):
	
	myAlign = simformat.read(infile)
	myHeader = myAlign.header
	
	if weightthresh is not None:
		try:
			weightsindex = myHeader.cutoffs.index(weightthresh)
		except:
			raise Exception("No such weighting cutoff, valid cutoffs are: " + repr(myHeader.cutoffs))
		simformat.annotateAlignment(myAlign)
		weights = S.array(simformat.getinvnormsim(myAlign,weightsindex))
	else:
		weights = S.ones(len(myAlign))
	
	N = len(myAlign)
	Width = len(myAlign[0])

	Matrix = sp.lil_matrix((N,Q*Width)) #LiL is better to populate, csc might be even better, but we'd have to write more complex code.
	
	for seqRec,one_weight,i in izip(myAlign,weights,count()):
		seq_as_ints = intConv(seqRec.seq.tostring())
		for residue,j in izip(seq_as_ints,count()):
			Matrix[i,j*Q + residue] = one_weight

	return Matrix.tocsc()
def main():
	#parse commandline arguments
	parser = OptionParser(usage="Usage: %prog [options] <inputFAA> <outputSIM>")
	parser.add_option('-u','--unique',dest='unique',default=True,action='store_true')
	parser.add_option('-i','--ids',dest='cutoffs',type='string',default='100,98,95,90,85,80,75,70')
	parser.add_option('-f','--fids',dest='fcutoffs',type='string',default=None)
	options, args = parser.parse_args()
	if len(args) != 2:
		parser.print_help()
		sys.exit()
		
	#read the alignment from file
	with open(args[0]) as infile:
		myAlignment = simformat.read(infile)

	if options.unique:
		myAlignment._records = uniqSeqs(myAlignment._records)
	
	simformat.annotateAlignment(myAlignment)
	
	if options.fcutoffs is None:
		thresholds = S.array(map(float,options.cutoffs.split(',')))/100.0
	else:
		thresholds = S.array(map(float,options.fcutoffs.split(',')))
	myAlignment.header.cutoffs = thresholds
	
	#MORE CODE HERE!!!
	AsVects = [S.array(map(ord, record.seq.tostring())) for record in myAlignment]
	
	AllSimilarities = 1.0 - D.cdist(AsVects,AsVects,'hamming')
	
	weights = S.zeros((len(myAlignment),len(thresholds)))
	
	for oneThresh, col in izip(thresholds,count()):
		weights[:,col] = (AllSimilarities > oneThresh).sum(1)
	
	for i in range(0,len(myAlignment)):
		myAlignment._records[i].annotations["weights"] = map(int,weights[i])
	
	#write output file
	with open(args[1],"w") as outfile:
		simformat.write(outfile,myAlignment)