Esempio n. 1
0
def GetFrameStats(data, inRunningSum, inRunningVoiceSum, inRunningCount):

  Ndata = len(data)
  voicefilter = filters.mult(filters.hpf(Ndata, 80), filters.lpf(Ndata, 1000))
  (voicedata, voicefreq) = filters.apply(data, voicefilter)

  maxVal = max([abs(d) for d in data])
  
  outRunningCount     = inRunningCount    + Ndata
  outRunningSum       = inRunningSum      + sum([abs(d) for d in data])
  outRunningVoiceSum  = inRunningVoiceSum + sum([abs(d) for d in voicedata])

  outRunningAvg       = outRunningSum / outRunningCount
  outRunningVoiceAvg  = outRunningVoiceSum / outRunningCount

  voiceMax = max([abs(d) for d in voicedata])

  return (maxVal, voicedata, voiceMax, outRunningCount, outRunningSum, outRunningAvg, outRunningVoiceSum, outRunningVoiceAvg)
Esempio n. 2
0
def main( options=None, args=None ):
	"""The main method"""
	global COLORS

	if options.scriptMode:
		matplotlib.use( 'Agg' )
	import pylab

	thisFigure = pylab.figure( )

	if options.outputFile:
		thisFigure.set_size_inches(( 
		  float( options.width ) / options.dpi, 
			float( options.height ) / options.dpi ))

	pylab.subplots_adjust( left = 0.6 / options.width * options.dpi, 
	                       right = 1.0 - 0.2 / options.width * options.dpi, 
	                       top = 1.0 - 0.45 / options.height * options.dpi, 
												 bottom = 0.5 / options.height * options.dpi )
	barOffset = options.lineWidth / 2
	barAlpha = options.markerAlpha * 2 / 3


	rawFiles = [ ]
	rawTypes = [ '.csv', '.mzdata', '.mzxml', '.mzxml.xml', 
	             '.json', '.json.gz' ]
	for i in range( len( args )-1, -1, -1 ):
		arg = args[ i ]
		try:
			# check the extension to see if this is xmass input data
			for type_ in rawTypes:
				if arg.lower( ).endswith( type_ ):
					rawFiles.append( args.pop( i ))
					continue
		except ValueError:
			pass
		

	if rawFiles:
		for r in rawFiles:
			ref = mzlib.RawData( )
			if not ( ref.read( r )):
				sys.stderr.write( "Error: Unable to load data from '%s'" % r )
				sys.exit( -1 )

			if options.shortFilename:
				filename = os.path.basename( r )
			else:
				filename = r

			# apply any filters
			if options.mass:
				ref.onlyMz( options.mass, options.massWindow )

			if options.maxTime or options.minTime:
				if options.maxTime:
					ref.onlyScans( options.minTime, options.maxTime )
				else:
					ref.onlyScans( options.minTime )

			rt = [ scan[ "retentionTime" ] for scan in ref if scan[ "msLevel" ] == 1 ]
			if options.bpc:
				yAxis = ref.bpc( 1 )
			else:
				yAxis = ref.tic( 1 )

			if filters:
				if options.lpfThreshold and options.hpfThreshold:
					yAxis = filters.bpf( yAxis, options.hpfThreshold, options.lpfThreshold )
				elif options.lpfThreshold:
					yAxis = filters.lpf( yAxis, options.lpfThreshold )
				elif options.hpfThreshold:
					yAxis = filters.hpf( yAxis, options.hpfThreshold )

			if options.normalize:
				if len( yAxis ):
					max_ = max( yAxis )
					if max_:
						yAxis = [ x / max_ for x in yAxis ]

			if options.normalize:
				label = filename + " (normalized)"
			else:
				label = filename
			pylab.plot( rt, yAxis, COLORS['ref'][0] , alpha = options.markerAlpha,
			     linewidth=options.lineWidth,  
					 label = label )
			COLORS['ref'] = COLORS['ref'][1:] + [ COLORS['ref'][0]]
			if not options.scriptMode:

				def findMedian( sortedArray ):
					arrayLen = len( sortedArray )
					if arrayLen % 2:
						median = sortedArray[ arrayLen // 2 ] * sortedArray[ arrayLen // 2 + 1 ]
					else:
						median = sortedArray[ arrayLen // 2 ]
					return median

				array = list( yAxis );
				array.sort( )
				median = findMedian( array )
				q1 = findMedian( array[ : len( array ) // 2 ])
				q3 = findMedian( array[ int( math.ceil( len( array ) / 2.0 )) : ])
				min_ = min( yAxis )
				max_ = max( yAxis )
				print( "Plot statistics for %s:" % label )
				print( "\tRange:               %g (%g - %g)" % ( max_ - min_, min_, max_ ))
				print( "\tMean:                %g" % numerical.mean( yAxis ))
				print( "\tMedian:              %g" % median )
				print( "\tInterquartile Range: %g (%g - %g)" % ( q3 - q1, q1, q3 ))
				print( "\tStandard Deviation:  %g" % numerical.std( yAxis ))
				print( "\tVariance:            %g" % numerical.var( yAxis ))



	# The following section of code is specific to the OmicsDP data formats.
	# You can safely delete this section if you are using this software outside
	# of that environment.
  # BEGIN READING DLTs
	for arg in args:
		scan = numerical.empty( 0, numerical.uint64 ) # scan number
		barRt = numerical.empty( 0, numerical.float64 ) # retention time 
		barIntensity = numerical.empty( 0, numerical.float64 )
		barNoise = numerical.empty( 0, numerical.float64 )
		labels = [ ]
		try:
			f = open( arg )
			lines = DictReader( f )
		except IOError:
			sys.stderr.write("Error: unable to read file '%s'\n" % arg )
			sys.exit( -1 )

		if options.shortFilename:
			filename = os.path.basename( arg )
		else:
			filename = arg

		for line in lines:
			try:
				scanValue = int( line[ 'Scan' ])
				rtValue = float( line[ 'RT(min)'] )
				mzValue = float( line[ 'M/Z' ] )
				noiseValue = float( line[ 'LC_Noise' ] )
				intValue = float( line[ 'Int' ] ) 
				if ( rtValue < options.minTime or 
					   ( options.maxTime and rtValue > options.maxTime )):
					continue
				if ((( not noiseValue ) or 
					     intValue/noiseValue < options.filterLevel ) or 
				       ( options.mass and 
							   abs( options.mass - mzValue ) > options.massWindow )):
					if options.verbosity:
						sys.stderr.write( "Dropping line %s" % ( line ))
					continue
				# using plot( ) produces a more responsive graph than vlines( )
				if len( scan ) and scanValue == scan[ -1 ]:
					if options.bpc:
						if intValue > barIntensity[ -2 ]:
							barIntensity[ -2 ] = intValue
							barNoise[ -2 ] = noiseValue
							labels[ -1 ] = "(%.2f," % ( mzValue - 0.005 ) #truncate, don't round
					else:
						barIntensity[ -2 ] += intValue
						barNoise[ -2 ] += noiseValue
						labels[ -1 ] += " %.2f," % ( mzValue - 0.005 ) #truncate, don't round
				else:
					# appending [0, value, 0] allows us to plot a bar graph using lines
					barRt = numerical.append( barRt, [ rtValue, rtValue, rtValue ])
					barIntensity = numerical.append( barIntensity, [ 0, intValue, 0 ])
					barNoise = numerical.append( barNoise, [ 0, noiseValue, 0 ])
					scan = numerical.append( scan, scanValue )
					if ( len( labels )):
						labels[ -1 ] = labels[ -1 ][ :-1 ] + ')' # replace the last , with )
					labels.append(  "(%.2f," % ( mzValue - 0.005 )) #truncate, don't round



			except ( ValueError, IndexError ):
				if options.verbosity:
					sys.stderr.write( "Skipping line %s" % ( line ))

		if ( len( labels )):
			labels[ -1 ] = labels[ -1 ][ :-1 ] + ')' # replace the last , with )

		if options.normalize:
			if len( barIntensity ):
				max_ = max( barIntensity )
				if max_:
					barIntensity /= max_
					barNoise /= max_
			
		if options.massLabels:
			for i in xrange( len( labels )):
				pylab.annotate( labels[ i ], ( barRt[ 3 * i + 1 ], barIntensity[ 3 * i + 1 ]),
				          size=9)

		# calculate alpha based on which file this is in the list
		alpha = ( options.markerAlpha - options.markerAlpha * 
		          ( args.index( arg ) / float( len( args ))) * 0.75 )

		if options.showPeaks:
			if not options.removeNoise:
				barIntensity += barNoise

			if options.normalize:
				label = label = ( "%s - intensity (%d peaks, normalized)" % 
						( filename, len( barIntensity )/3))
			else:
				label = label = ( "%s - intensity (%d peaks)" % 
						( filename, len( barIntensity )/3))
			pylab.plot( barRt, barIntensity, COLORS['intensity'][0] , 
			      linewidth = options.lineWidth*2, alpha = alpha, label = label )

		if options.connectPeaks:
			pylab.plot( barRt[ 2::3 ], barIntensity[ 1::3 ], COLORS['intensity'][0], 
			      alpha = alpha, linewidth=options.lineWidth  )
		COLORS['intensity'] = COLORS['intensity'][1:] + [ COLORS['intensity'][0]]
				
		if options.showNoise:
			if options.normalize:
				label = ( "%s - noise (%d points, normalized)" % ( filename, len( barNoise )/3))
			else:
				label = ( "%s - noise (%d points)" % ( filename, len( barNoise )/3))
			pylab.plot( barRt[ 2::3 ], barNoise[ 1::3 ], COLORS['noise'][0], alpha = alpha, 
			      linewidth=options.lineWidth, label = label)
			COLORS['noise'] = COLORS['noise'][1:] + [ COLORS['noise'][0]]
		if len( barRt ):
			#draw a horizontal black line at 0
			pylab.plot( [barRt[1], barRt[-2]], [0,0], 'k', linewidth=options.lineWidth )

		f.close( )
		# END READING DLTs

	if options.showLegend:
		legend = pylab.legend( loc="upper left", prop=FontProperties( size='small' ))

	pylab.grid( )
	axes = thisFigure.get_axes( )[ 0 ]
	axes.set_xlabel( "Time (min)" )
	axes.set_ylabel( "Intensity" )
	axes.ticklabel_format( style="scientific", axis="y", scilimits=(3,3) )

	if not len( rawFiles ):
		if ( options.bpc ):
			axes.set_title( "Base Peaks" )
		else:
			axes.set_title( "Peaks" )
	elif options.bpc:
		if options.mass:
			axes.set_title( 
				"Selected Base Peak Chromatogram (M/Z: %f, Tolerance: %f)" % 
				( options.mass, options.massWindow ))
		else:
			axes.set_title( "Base Peak Chromatogram" )
	else:
		if options.mass:
			axes.set_title( 
				"Selected Ion Chromatogram (M/Z: %f, Tolerance: %f)" %
				( options.mass, options.massWindow ))
		else:
			axes.set_title( "Total Ion Chromatogram" )
	if options.outputFile:
		thisFigure.savefig( options.outputFile, dpi=options.dpi )
	if not options.scriptMode:
		pylab.show( )