예제 #1
0
def reconstruct_audio(matches, distances, bounds, target_file, source_files, p, win, hop, beta=2.0):
    """
    make a new audio signal based on matches and source media 
     inputs:
      matches - list of matches from match_sequences
      distances - list of distances from match_sequences
      target_file - file name of target media (the one to reconstruct)      
      source_files - list of file names of source media (the database)
      p - feature parameters
      win - sequence length
      hop - sequence hop
      beta - stiffness coefficient for mixing based on distances [2.0]
     returns:
      y - the reconstructued audio signal
    """
    y_list = list()
    hamm = hamming(p['nhop']*2)[:p['nhop']]
    for i in range(len(matches)):
        x = _fetch_audio(target_file, i, p, win, hop)
        y = zeros((win*p['nhop']))
        for j, m in enumerate(matches[i,:]):
            yy = _fetch_audio(source_files[_bounds_to_locator(m,bounds)], _bounds_to_index(m,bounds), p, win, hop)
            y +=  yy * exp(-beta * distances[i,j]) # weight match contribution by distance prior
        y *= rms_flat(x) / rms_flat(y) # energy balance output rms using input rms
        if win>1 and hop<win:
            y[:p['nhop']]*=hamm
            y[:-p['nhop']-1:-1]*=hamm
        y_list.append(y) 
    return _sequence_overlap_add(y_list, p, win, hop)
예제 #2
0
def reconstruct_audio(matches, distances, bounds, target_file, source_files, p, win, hop, beta=2.0):
    """
    make a new audio signal based on matches and source media 
     inputs:
      matches - list of matches from match_sequences
      distances - list of distances from match_sequences
      target_file - file name of target media (the one to reconstruct)      
      source_files - list of file names of source media (the database)
      p - feature parameters
      win - sequence length
      hop - sequence hop
      beta - stiffness coefficient for mixing based on distances [2.0]
     returns:
      y - the reconstructued audio signal
    """
    y_list = list()
    hamm = hamming(p['nhop']*2)[:p['nhop']]
    for i in range(len(matches)):
        x = _fetch_audio(target_file, i, p, win, hop)
        y = zeros((win*p['nhop']))
        for j, m in enumerate(matches[i,:]):
            yy = _fetch_audio(source_files[_bounds_to_locator(m,bounds)], _bounds_to_index(m,bounds), p, win, hop)
            y +=  yy * exp(-beta * distances[i,j]) # weight match contribution by distance prior
        y *= rms_flat(x) / rms_flat(y) # energy balance output rms using input rms
        if win>1 and hop<win:
            y[:p['nhop']]*=hamm
            y[:-p['nhop']-1:-1]*=hamm
        y_list.append(y) 
    return _sequence_overlap_add(y_list, p, win, hop)
예제 #3
0
파일: nist.py 프로젝트: issfangks/milo-lab
 def verify_formation(self, html_writer, thermodynamics, name=None):
     cid2errors = defaultdict(list)
     cid2refs = defaultdict(set)
     reaction2errors = defaultdict(list)
     reaction2refs = defaultdict(set)
     for row_data in self.SelectRowsFromNist():
         dG0_est = row_data.PredictReactionEnergy(thermodynamics)
         if np.isnan(dG0_est):
             continue
         err = row_data.dG0_r - dG0_est
         for cid in row_data.GetAllCids():
             cid2errors[cid].append(err)
             cid2refs[cid].add((row_data.ref_id, row_data.url))
         reaction2errors[row_data.reaction].append(err)
         reaction2refs[row_data.reaction].add((row_data.ref_id, row_data.url))
     
     rowdicts = []
     for cid, err_list in cid2errors.iteritems():
         refs = cid2refs[cid]
         urls = ', '.join(['<a href="%s">%s</a>' % (url, ref_id)
                           for ref_id, url in refs])
         rowdict = {'cid':'C%05d' % cid,
                    'name':self.kegg.cid2name(cid),
                    'RMSE':rms_flat(err_list),
                    'E[err]':np.mean(err_list),
                    '#err':len(err_list),
                    'std[err]':np.std(err_list),
                    'URLs':urls}
         rowdicts.append(rowdict)
     
     rowdicts.sort(key=lambda x:x['RMSE'], reverse=True)
     html_writer.write_table(rowdicts, ['#', 'cid', 'name', 'RMSE',
                                        '#err', 'E[err]', 'std[err]', 'URLs'], decimal=1)
     
     rowdicts = []
     for reaction, err_list in reaction2errors.iteritems():
         refs = reaction2refs[reaction]
         urls = ', '.join(['<a href="%s">%s</a>' % (url, ref_id)
                           for ref_id, url in refs])
         rowdict = {'reaction':reaction.to_hypertext(show_cids=False),
                    'RMSE':rms_flat(err_list),
                    'E[err]':np.mean(err_list),
                    '#err':len(err_list),
                    'std[err]':np.std(err_list),
                    'URLs':urls}
         rowdicts.append(rowdict)
     
     rowdicts.sort(key=lambda x:x['RMSE'], reverse=True)
     html_writer.write_table(rowdicts, ['#', 'reaction', 'RMSE',
                                        '#err', 'E[err]', 'std[err]', 'URLs'], decimal=1)
    def Report(self, est, title):
        self.html_writer.write('</br><b>%s</b><br>\n' % title)

        finite = np.isfinite(est)
        resid = abs(self.b[finite] - est[finite])
        fig = plt.figure(figsize=(5,5), dpi=60)
        cdf(list(resid.flat), figure=fig)
        #plt.plot(self.b[finite].T, est[finite].T, '.', figure=fig)
        plt.title("RMSE = %.1f, N = %d" % (rms_flat(resid.flat), resid.shape[1]))
        plt.xlabel(r"$|\Delta_r G^{'\circ} obs - \Delta_r G^{'\circ} est|$ [kJ/mol]")
        plt.ylabel(r"CDF")
        self.html_writer.embed_matplotlib_figure(fig)

        rowdicts = []
        for i in xrange(self.b.shape[1]):
            rowdict = {}
            rowdict['row'] = i
            rowdict['type'] = self.obs_types[i]
            rowdict['reaction'] = UnifiedGroupContribution.row2hypertext(self.S[:, i], self.cids)
            rowdict['anchored'] = self.anchored[0, i]
            rowdict['obs'] = self.b[0, i]
            rowdict['est'] = est[0, i]
            if np.isfinite(est[0, i]):
                rowdict['|err|'] = abs(self.b[0, i] - est[0, i])
            else:
                rowdict['|err|'] = 0 
            rowdicts.append(rowdict)

        rowdicts.sort(key=lambda x:x['|err|'], reverse=True)            
        self.html_writer.insert_toggle(start_here=True, label="Show table")
        self.html_writer.write_table(rowdicts,
            headers=['row', 'type', 'reaction', 'anchored', 'obs', 'est', '|err|'], decimal=1)
        self.html_writer.div_end()
예제 #5
0
def noise_brown(ncols, nrows=1, weight=1, filter=None, filterargs=None):
    '''Return 1/f^2 noise of shape(nrows, ncols obtained by taking 
    the cumulative sum of gaussian white noise, with rms weight.
    
    If filter is not None, this function will apply the filter coefficients obtained
    by:
    ::
        
        >>> b, a = filter(**filterargs)
        >>> signal = scipy.signal.lfilter(b, a, signal)
    '''
    from matplotlib.mlab import rms_flat

    if filter is not None:
        coeff_b, coeff_a = list(filter(**filterargs))

    noise = np.empty((nrows, ncols))
    for i in range(nrows):
        signal = np.random.normal(size=ncols + 10000).cumsum()
        if filter is not None:
            signal = ss.lfilter(coeff_b, coeff_a, signal)
        noise[i, :] = signal[10000:]
        noise[i, :] /= rms_flat(noise[i, :])
        noise[i, :] *= weight
    return noise
예제 #6
0
파일: tools.py 프로젝트: dragly/LFPy
def noise_brown(ncols, nrows=1, weight=1, filter=None, filterargs=None):
    '''Return 1/f^2 noise of shape(nrows, ncols obtained by taking 
    the cumulative sum of gaussian white noise, with rms weight.
    
    If filter is not None, this function will apply the filter coefficients obtained
    by:
    ::
        
        >>> b, a = filter(**filterargs)
        >>> signal = scipy.signal.lfilter(b, a, signal)
    '''
    from matplotlib.mlab import rms_flat

    if filter is not None:
        coeff_b, coeff_a = list(filter(**filterargs))
    
    noise = np.empty((nrows, ncols))    
    for i in range(nrows):
        signal = np.random.normal(size=ncols+10000).cumsum()
        if filter is not None:
            signal = ss.lfilter(coeff_b, coeff_a, signal)
        noise[i, :] = signal[10000:]
        noise[i, :] /= rms_flat(noise[i, :])
        noise[i, :] *= weight
    return noise
    
예제 #7
0
def binned_plot(x, y, bins, y_type='mean', figure=None, plot_counts=True):
    bins_array = np.array([min(x)-1e-14] + list(sorted(bins)) + [max(x)-1e-14])
    binned_y = {}
    for i in xrange(len(x)):
        bin_index = max(np.nonzero(bins_array < x[i])[0])
        binned_y.setdefault(bin_index, []).append(y[i])
    
    y_count = []
    y_vec = []
    for j in xrange(len(bins) + 1):
        if j in binned_y:
            binned_y[j] = np.array(binned_y[j])
            y_count.append(len(binned_y[j]))
            if y_type == 'mean':
                y_vec.append(np.mean(binned_y[j]))
            elif y_type == 'rmse':
                y_vec.append(rms_flat(binned_y[j]))
            elif y_type == 'std':
                y_vec.append(np.std(binned_y[j]))
        else:
            y_count.append(0)
            y_vec.append(0.0)
    
    bin_width = bins_array[1:] - bins_array[0:-1]
    bin_center = (bins_array[1:] + bins_array[0:-1])/2
    
    if not figure:
        figure = plt.figure()
    figure.hold(True)
    plt.bar(left=bins_array[0:-1], height=y_vec, width=bin_width, figure=figure)
    for i in xrange(len(bins) + 1):
        if y_count[i] > 0:
            plt.text(bin_center[i], y_vec[i], '%d' % y_count[i], horizontalalignment='center', fontsize='small')
예제 #8
0
def crest_factor(signal):
    """
    Crest factor of a 1D signal
    """
    peak = np.amax(np.absolute(signal))
    rms = rms_flat(signal)
    if rms == 0:
        rms = .000001
    return peak / rms
예제 #9
0
def analyse_rotation(img, plot=False, line_spacing=False):
    # converting to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    I = gray - mean(
        gray)  # Demean; make the brightness extend above and below zero

    # Do the radon transform and display the result
    with warnings.catch_warnings():  # Some warning inside the radon
        warnings.simplefilter("ignore")
        sinogram = radon(I)

    # Find the RMS value of each row and find "busiest" rotation,
    # where the transform is lined up perfectly with the alternating dark
    # text and white lines
    with warnings.catch_warnings(
    ):  # rms_flat is deprecated but I didn't found an alternative. Still works though
        warnings.simplefilter("ignore")
        r = array([rms_flat(line) for line in sinogram.transpose()])
    rotation = 90 - argmax(r)

    # Plot the busy row
    row = sinogram[:, rotation]
    N = len(row)

    # Take spectrum of busy row and find line spacing
    window = blackman(N)
    spectrum = rfft(row * window)
    frequency = argmax(abs(spectrum))

    if line_spacing:
        line_spacing = N / frequency  # pixels
        print('Line spacing: {:.2f} pixels'.format(line_spacing))

    if plot:
        print('Rotation: {:.2f} degrees'.format(rotation))
        plt.subplot(2, 2, 1)
        plt.imshow(I)

        plt.subplot(2, 2, 2)
        plt.imshow(sinogram.T, aspect='auto')
        plt.gray()

        plt.axhline(rotation, color='r')

        plt.subplot(2, 2, 3)
        plt.plot(row)

        plt.plot(row * window)

        plt.subplot(2, 2, 4)
        plt.plot(abs(spectrum))
        plt.axvline(frequency, color='r')
        plt.yscale('log')
        plt.show()

    return rotation
예제 #10
0
def rms_spectrum_test(song='tainted', tuning_f0=110., channel=0):
    """
	Extract spectral RMS power for equal temperament pitches
	inputs:
		song - directory name of song (contains: song/mix_000.wav and song/mix_100.wav non-autotuned and autotuned mixes)
		tuning_f0 - lowest frequency to track melody (110Hz = A440Hz/4) [110]
		channel - whether to use 0=left, 1=right, or 2=both channels [0] 
	outputs:
		dict {'nontuned_rms':df0, 'autotuned_rms':df1} energy (RMS power) at ideal pitch tuning freqs
	"""
    x0, sr, fmt = wavread(song + os.sep + 'mix_000.wav')
    x1, sr, fmt = wavread(song + os.sep + 'mix_100.wav')
    if channel == 2:  # mix the channels
        if len(x0.shape) > 1:
            x0 = x0.mean(1)
        if len(x1.shape) > 1:
            x1 = x1.mean(1)
    else:  # extract given channel
        if len(x0.shape) > 1:
            x0 = x0[:, channel]
        if len(x1.shape) > 1:
            x1 = x1[:, channel]
    # Short-time Fourier analysis
    F0 = LinearFrequencySpectrum(x0, nfft=8192, wfft=8192, nhop=2048)
    F1 = LinearFrequencySpectrum(x1, nfft=8192, wfft=8192, nhop=2048)
    eq_freqs = tuning_f0 * 2**(arange(0, 5, 1 / 12.))
    eq_bins = array([argmin(abs(F0._fftfrqs - f)) for f in eq_freqs])
    # df0 = normalize(F0.X)[eq_bins].mean(1)
    df0 = (normalize(F0.X)[eq_bins]**2).mean(1)**0.5
    #df1 = nomalize(F1.X)[eq_bins].mean(1)
    df1 = (normalize(F1.X)[eq_bins]**2).mean(1)**0.5
    figure()
    semilogx(F0._fftfrqs[eq_bins], df0)
    semilogx(F0._fftfrqs[eq_bins], df1)
    legend(['Original vocals', 'Autotuned vocals'], loc=0)
    title(song + ': ET bands untuned/tuned vocals mixed with background',
          fontsize=20)
    xlabel('Equal Temperament Bands (Hz)', fontsize=20)
    ylabel('Power', fontsize=20)
    grid()
    return {'nontuned_rms': rms_flat(df0), 'autotuned_rms': rms_flat(df1)}
예제 #11
0
    def compute_skew(cls, image):
        image = image - np.mean(image)  # Demean; make the brightness extend above and below zero

        # Do the radon transform and display the result
        sinogram = radon(image)

        # Find the RMS value of each row and find "busiest" rotation,
        # where the transform is lined up perfectly with the alternating dark
        # text and white lines
        r = np.array([rms_flat(line) for line in sinogram.transpose()])
        rotation = np.argmax(r)
        return (90 - rotation) / 100
예제 #12
0
def rms_spectrum_test(song='tainted', tuning_f0=110., channel=0):
	"""
	Extract spectral RMS power for equal temperament pitches
	inputs:
		song - directory name of song (contains: song/mix_000.wav and song/mix_100.wav non-autotuned and autotuned mixes)
		tuning_f0 - lowest frequency to track melody (110Hz = A440Hz/4) [110]
		channel - whether to use 0=left, 1=right, or 2=both channels [0] 
	outputs:
		dict {'nontuned_rms':df0, 'autotuned_rms':df1} energy (RMS power) at ideal pitch tuning freqs
	"""
	x0, sr, fmt = wavread(song+os.sep+'mix_000.wav')
	x1, sr, fmt = wavread(song+os.sep+'mix_100.wav')
	if channel==2: # mix the channels
		if len(x0.shape) > 1:
			x0 = x0.mean(1)
		if len(x1.shape) > 1:
			x1 = x1.mean(1)
	else: # extract given channel
		if len(x0.shape) > 1:
			x0 = x0[:,channel]
		if len(x1.shape) > 1:
			x1 = x1[:,channel]
	# Short-time Fourier analysis
	F0 = LinearFrequencySpectrum(x0,nfft=8192,wfft=8192,nhop=2048)
	F1 = LinearFrequencySpectrum(x1,nfft=8192,wfft=8192,nhop=2048)
	eq_freqs = tuning_f0*2**(arange(0,5,1/12.))
	eq_bins = array([argmin(abs(F0._fftfrqs-f)) for f in eq_freqs])
	# df0 = normalize(F0.X)[eq_bins].mean(1)
	df0 = (normalize(F0.X)[eq_bins]**2).mean(1)**0.5	
	#df1 = nomalize(F1.X)[eq_bins].mean(1)
	df1 = (normalize(F1.X)[eq_bins]**2).mean(1)**0.5
	figure()
	semilogx(F0._fftfrqs[eq_bins], df0)
	semilogx(F0._fftfrqs[eq_bins], df1)
	legend(['Original vocals','Autotuned vocals'],loc=0)
	title(song+': ET bands untuned/tuned vocals mixed with background', fontsize=20)
	xlabel('Equal Temperament Bands (Hz)',fontsize=20)
	ylabel('Power',fontsize=20)	
	grid()
	return {'nontuned_rms':rms_flat(df0), 'autotuned_rms':rms_flat(df1)}
    def Report(self, est, title):
        self.html_writer.write('</br><b>%s</b><br>\n' % title)

        finite = np.isfinite(est)
        resid = abs(self.b[finite] - est[finite])
        fig = plt.figure(figsize=(5, 5), dpi=60)
        cdf(list(resid.flat), figure=fig)
        #plt.plot(self.b[finite].T, est[finite].T, '.', figure=fig)
        plt.title("RMSE = %.1f, N = %d" %
                  (rms_flat(resid.flat), resid.shape[1]))
        plt.xlabel(
            r"$|\Delta_r G^{'\circ} obs - \Delta_r G^{'\circ} est|$ [kJ/mol]")
        plt.ylabel(r"CDF")
        self.html_writer.embed_matplotlib_figure(fig)

        rowdicts = []
        for i in xrange(self.b.shape[1]):
            rowdict = {}
            rowdict['row'] = i
            rowdict['type'] = self.obs_types[i]
            rowdict['reaction'] = UnifiedGroupContribution.row2hypertext(
                self.S[:, i], self.cids)
            rowdict['anchored'] = self.anchored[0, i]
            rowdict['obs'] = self.b[0, i]
            rowdict['est'] = est[0, i]
            if np.isfinite(est[0, i]):
                rowdict['|err|'] = abs(self.b[0, i] - est[0, i])
            else:
                rowdict['|err|'] = 0
            rowdicts.append(rowdict)

        rowdicts.sort(key=lambda x: x['|err|'], reverse=True)
        self.html_writer.insert_toggle(start_here=True, label="Show table")
        self.html_writer.write_table(rowdicts,
                                     headers=[
                                         'row', 'type', 'reaction', 'anchored',
                                         'obs', 'est', '|err|'
                                     ],
                                     decimal=1)
        self.html_writer.div_end()
예제 #14
0
def main(argv):
    filename = ''

    if len(sys.argv) < 3:
        print('Usage: rotation.py -f <filename>')
        sys.exit()
    try:
        opts, args = getopt.getopt(argv, "hf:", ["file="])
    except getopt.GetoptError:
        print('rotation.py -f <filename>')
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print('Usage: rotation.py -f <filename>')
            sys.exit()
        elif opt in ("-f", "--file"):
            filename = arg

    try:
        from parabolic import parabolic

        def argmax(x):
            return parabolic(x, numpy.argmax(x))[0]
    except ImportError:
        from numpy import argmax

    # Load file, converting to grayscale
    I = asarray(Image.open(filename).convert('L'))
    I = I - mean(I)  # Demean; make the brightness extend above and below zero

    # Do the radon transform and display the result
    sinogram = radon(I)

    # Find the RMS value of each row and find "busiest" rotation,
    # where the transform is lined up perfectly with the alternating dark
    # text and white lines
    r = array([rms_flat(line) for line in sinogram.transpose()])
    rotation = argmax(r)

    print('{:.2f}'.format(-(90 - rotation)))
예제 #15
0
    def verify_formation(self, html_writer, thermodynamics, name=None):
        cid2errors = defaultdict(list)
        cid2refs = defaultdict(set)
        reaction2errors = defaultdict(list)
        reaction2refs = defaultdict(set)
        for row_data in self.SelectRowsFromNist():
            dG0_est = row_data.PredictReactionEnergy(thermodynamics)
            if np.isnan(dG0_est):
                continue
            err = row_data.dG0_r - dG0_est
            for cid in row_data.GetAllCids():
                cid2errors[cid].append(err)
                cid2refs[cid].add((row_data.ref_id, row_data.url))
            reaction2errors[row_data.reaction].append(err)
            reaction2refs[row_data.reaction].add(
                (row_data.ref_id, row_data.url))

        rowdicts = []
        for cid, err_list in cid2errors.iteritems():
            refs = cid2refs[cid]
            urls = ', '.join([
                '<a href="%s">%s</a>' % (url, ref_id) for ref_id, url in refs
            ])
            rowdict = {
                'cid': 'C%05d' % cid,
                'name': self.kegg.cid2name(cid),
                'RMSE': rms_flat(err_list),
                'E[err]': np.mean(err_list),
                '#err': len(err_list),
                'std[err]': np.std(err_list),
                'URLs': urls
            }
            rowdicts.append(rowdict)

        rowdicts.sort(key=lambda x: x['RMSE'], reverse=True)
        html_writer.write_table(
            rowdicts,
            ['#', 'cid', 'name', 'RMSE', '#err', 'E[err]', 'std[err]', 'URLs'],
            decimal=1)

        rowdicts = []
        for reaction, err_list in reaction2errors.iteritems():
            refs = reaction2refs[reaction]
            urls = ', '.join([
                '<a href="%s">%s</a>' % (url, ref_id) for ref_id, url in refs
            ])
            rowdict = {
                'reaction': reaction.to_hypertext(show_cids=False),
                'RMSE': rms_flat(err_list),
                'E[err]': np.mean(err_list),
                '#err': len(err_list),
                'std[err]': np.std(err_list),
                'URLs': urls
            }
            rowdicts.append(rowdict)

        rowdicts.sort(key=lambda x: x['RMSE'], reverse=True)
        html_writer.write_table(
            rowdicts,
            ['#', 'reaction', 'RMSE', '#err', 'E[err]', 'std[err]', 'URLs'],
            decimal=1)
예제 #16
0
def main(argv):
    filename = ''

    if len(sys.argv) < 3:
        print('Usage: rotation_spacing.py -f <filename>')
        sys.exit()
    try:
        opts, args = getopt.getopt(argv, "hf:", ["file="])
    except getopt.GetoptError:
        print('rotation_spacing.py -f <filename>')
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print('Usage: rotation_spacing.py -f <filename>')
            sys.exit()
        elif opt in ("-f", "--file"):
            filename = arg

    try:
        # More accurate peak finding from
        # https://gist.github.com/endolith/255291#file-parabolic-py
        from parabolic import parabolic

        def argmax(x):
            return parabolic(x, numpy.argmax(x))[0]
    except ImportError:
        from numpy import argmax

    # Load file, converting to grayscale
    I = asarray(Image.open(filename).convert('L'))
    I = I - mean(I)  # Demean; make the brightness extend above and below zero
    #plt.subplot(2, 2, 1)
    #plt.imshow(I)

    # Do the radon transform and display the result
    sinogram = radon(I)

    #plt.subplot(2, 2, 2)
    #plt.imshow(sinogram.T, aspect='auto')
    #plt.gray()

    # Find the RMS value of each row and find "busiest" rotation,
    # where the transform is lined up perfectly with the alternating dark
    # text and white lines
    r = array([rms_flat(line) for line in sinogram.transpose()])
    rotation = argmax(r)
    #print('Rotation: {:.2f} degrees'.format(90 - rotation))
    '''
	rotation = 90 - rotation
	rotation = -rotation
	print('{:.2f}'.format(rotation))
	'''

    print('{:.2f}'.format(-(90 - rotation)))
    #plt.axhline(rotaotion, color='r')

    # Plot the busy row
    row = sinogram[:, rotation]
    N = len(row)
    #plt.subplot(2, 2, 3)
    #plt.plot(row)

    # Take spectrum of busy row and find line spacing
    window = blackman(N)
    spectrum = rfft(row * window)
    #plt.plot(row * window)
    frequency = argmax(abs(spectrum))
    line_spacing = N / frequency  # pixels
예제 #17
0
def main():
    kegg = Kegg.getInstance()
    prefix = '../res/prc_'

    fixed_cids = {}  # a dictionary from CID to pairs of (nH, dG0)

    # Alberty formation energies directly measured, linearly independent:
    fixed_cids[1] = (2, -237.19)  # H2O
    fixed_cids[9] = (1, -1096.1)  # HPO3(-2)
    fixed_cids[14] = (4, -79.31)  # NH4(+1)
    fixed_cids[59] = (0, -744.53)  # SO4(-2)
    fixed_cids[288] = (1, -586.77)  # HCO3(-1)

    # Alberty zeros:
    fixed_cids[3] = (26, 0.0)  # NAD(ox)
    fixed_cids[10] = (32, 0.0)  # CoA
    fixed_cids[127] = (30, 0.0)  # glutathione(ox)
    fixed_cids[376] = (28, 0.0)  # retinal(ox)

    # Directly measured values
    fixed_cids[4] = (27, 22.65)  # NAD(red) -- relative to NAD(ox)
    fixed_cids[212] = (13, -194.5)  # adenosine
    #fixed_cids[294] = (12, -409.2) # inosine - linearly dependent on other 'anchors'

    # Alberty zeros which are not in NIST:
    #fixed_cids[524] = ( 0, 0.0) # cytochrome c(ox)
    #fixed_cids[16]  = (31, 0.0) # FAD(ox)
    #fixed_cids[139] = ( 0, 0.0) # ferredoxin(ox)
    #fixed_cids[61]  = (19, 0.0) # FMN(ox)
    #fixed_cids[343] = ( 0, 0.0) # thioredoxin(ox)
    #fixed_cids[399] = (90, 0.0) # ubiquinone(ox)

    public_db = SqliteDatabase("../data/public_data.sqlite")
    alberty = PsuedoisomerTableThermodynamics.FromDatabase(
        public_db, 'alberty_pseudoisomers', label=None, name='Alberty')
    alberty_cid2dG0 = {}
    alberty_cid2nH = {}
    for cid in alberty.get_all_cids():
        pmap = alberty.cid2PseudoisomerMap(cid)
        dG0, _dG0_tag, nH, _z, _nMg = pmap.GetMostAbundantPseudoisomer(
            pH=default_pH, I=default_I, pMg=default_pMg, T=default_T)
        alberty_cid2nH[cid] = nH
        alberty_cid2dG0[cid] = dG0

    if not os.path.exists(prefix + 'S.txt'):
        db = SqliteDatabase("../res/gibbs.sqlite")
        nist_regression = NistRegression(db)

        cid2nH = {}
        for cid in nist_regression.nist.GetAllCids():
            if cid in fixed_cids:
                cid2nH[cid] = fixed_cids[cid][0]
            elif cid in alberty_cid2nH:
                cid2nH[cid] = alberty_cid2nH[cid]
            else:
                tmp = nist_regression.dissociation.GetMostAbundantPseudoisomer(
                    cid,
                    pH=default_pH,
                    I=default_I,
                    pMg=default_pMg,
                    T=default_T)
                if tmp is not None:
                    cid2nH[cid] = tmp[0]
                else:
                    logging.warning(
                        'The most abundant pseudoisomer of %s (C%05d) '
                        'cannot be resolved. Using nH = 0.' %
                        (kegg.cid2name(cid), cid))
                    cid2nH[cid] = 0

        #nist_regression.std_diff_threshold = 2.0 # the threshold over which to print an analysis of a reaction
        #nist_regression.nist.T_range = None#(273.15 + 24, 273.15 + 40)
        S, dG0, cids = nist_regression.ReverseTransform(cid2nH=cid2nH)

        # export the raw data matrices to text files

        C = np.array([[cid, cid2nH.get(cid, 0)] for cid in cids])
        np.savetxt(prefix + 'CID.txt', C, fmt='%d', delimiter=',')
        np.savetxt(prefix + 'S.txt', S, fmt='%g', delimiter=',')
        np.savetxt(prefix + 'dG0.txt', dG0, fmt='%.2f', delimiter=',')
    else:
        C = np.loadtxt(prefix + 'CID.txt', delimiter=',')
        cids = [int(cid) for cid in C[:, 0]]
        cid2nH = {}
        for i, cid in enumerate(cids):
            cid2nH[cid] = int(C[i, 1])
        S = np.loadtxt(prefix + 'S.txt', delimiter=',')
        dG0 = np.loadtxt(prefix + 'dG0.txt', delimiter=',')
        dG0 = np.reshape(dG0, (dG0.shape[0], 1))

    html_writer = HtmlWriter('../res/regression_fast.html')
    html_writer.write("<h1>Pseudoisomeric Reactant Contributions</h1>\n")
    html_writer.write("<p>The stoichiometric matrix (S):")
    html_writer.insert_toggle(start_here=True)
    stoichiometric_matrix2html(html_writer, S, cids)
    html_writer.div_end()
    html_writer.write('</p>')

    index2value = {}
    S_extended = S  # the stoichiometric matrix, extended with elementary basis vector for the fixed compounds
    for cid in fixed_cids.keys():
        i = cids.index(cid)
        e_i = np.zeros((1, len(cids)))
        e_i[0, i] = 1.0
        S_extended = np.vstack([S_extended, e_i])
        nH, dG0_fixed = fixed_cids[cid]
        index2value[i] = dG0_fixed

    x, _K = LinearRegression.LeastSquaresWithFixedPoints(S, dG0, index2value)
    cid2dG0 = {}
    for i, cid in enumerate(cids):
        cid2dG0[cid] = x[i]

    # Calculate the Kernel of the reduced stoichiometric matrix (after removing
    # the columns of the fixed compounds).
    cids_red = [cid for cid in cids if cid not in fixed_cids]
    index_red = [i for i in xrange(len(cids)) if i not in index2value]
    S_red = S[:, index_red]
    K_red = LinearRegression.Kernel(S_red)

    #print "Reduced Stoichiometric Matrix:"
    #print matrix2string(S_red, cids_red, kegg)
    #print '-'*80

    # Find all CIDs that are completely determined and do not depend on any
    # free variable. In other words, all zeros columns in K2.
    dict_list = []

    determined_indices = np.where(
        np.sum(abs(K_red), 0) < 1e-10)[0]  # all zero-columns in reducedK
    determined_cids = [cids_red[i] for i in determined_indices]
    plot_data = []
    for i, cid in enumerate(cids):
        d = {
            'CID': 'C%05d' % cid,
            'Compound': kegg.cid2name(cid),
            'nH': '%d' % cid2nH[cid],
            'dG0 (PRC)': '%.1f' % cid2dG0[cid]
        }
        if cid in alberty_cid2dG0:
            d['dG0 (Alberty)'] = '%.1f' % alberty_cid2dG0[cid]
            if cid not in fixed_cids:
                plot_data.append(
                    (alberty_cid2dG0[cid], cid2dG0[cid], kegg.cid2name(cid)))
        else:
            d['dG0 (Alberty)'] = ''

        if cid in fixed_cids:
            d['Depends on'] = 'anchored'
        elif cid in determined_cids:
            d['Depends on'] = 'fixed compounds'
        else:
            d['Depends on'] = 'kernel dimensions'

        dict_list.append(d)

    dict_list.sort(key=lambda (x): (x['Depends on'], x['CID']))
    html_writer.write(
        "<p>Formation energies determined by the linear constraints:")
    html_writer.insert_toggle(start_here=True)
    html_writer.write('<font size="1">')
    html_writer.write_table(dict_list,
                            headers=[
                                '#', 'Compound', 'CID', 'nH', 'dG0 (PRC)',
                                'dG0 (Alberty)', 'Depends on'
                            ])
    html_writer.write('</font>')
    html_writer.div_end()
    html_writer.write('</p>')

    # Plot a comparison between PRC and Alberty formation energies
    fig = plt.figure(figsize=(8, 8), dpi=80)
    plt.plot([x[0] for x in plot_data], [x[1] for x in plot_data],
             'b.',
             figure=fig)
    for x, y, name in plot_data:
        plt.text(x, y, name, fontsize=6)
    plt.xlabel('Alberty $\Delta_f G^\circ$')
    plt.ylabel('PRC $\Delta_f G^\circ$')
    html_writer.write("<p>Plot comparing PRC and Alberty results:")
    html_writer.insert_toggle(start_here=True)
    html_writer.embed_matplotlib_figure(fig)
    html_writer.div_end()
    html_writer.write("</p>")

    K_sparse = SparseKernel(S_red).Solve()
    html_writer.write(
        "<p>The sparse null-space of the reduced stoichiometric matrix:")
    html_writer.insert_toggle(start_here=True)
    stoichiometric_matrix2html(html_writer, K_sparse, cids_red)
    html_writer.div_end()
    html_writer.write("</p>")

    dict_list = []
    index2string_html = dict(
        (i, "V<sub>%02d</sub>" % i) for i in xrange(K_sparse.shape[0]))
    index2string = dict((i, "V%d" % i) for i in xrange(K_sparse.shape[0]))
    for i, cid in enumerate(cids_red):
        d = {}
        d['KEGG ID'] = '<a href="%s">C%05d</a>' % (kegg.cid2link(cid), cid)
        d['KEGG ID plain'] = 'C%05d' % cid
        d['Compound'] = kegg.cid2name(cid)
        d['nH'] = '%d' % cid2nH[cid]

        if cid in alberty_cid2dG0:
            d['dG0 (Alberty)'] = '%.1f' % alberty_cid2dG0[cid]
        else:
            d['dG0 (Alberty)'] = ''

        d['dG0 (PRC)'] = '%.1f' % cid2dG0[cid]
        d['dG0 (PRC) plain'] = '%.1f' % cid2dG0[cid]

        indic = np.where(abs(K_sparse[:, i]) > 1e-10, 1, 0).tolist()
        indic.reverse()
        d['order_key'] = indic
        if mlab.rms_flat(K_sparse[:, i]) > 1e-10:
            d['dG0 (PRC)'] += " + (" + vector2string(K_sparse[:, i],
                                                     index2string_html) + ")"
            d['dG0 (PRC) plain'] += " + (" + vector2string(
                K_sparse[:, i], index2string) + ")"
        dict_list.append(d)

    dict_list.sort(key=lambda (d): (d['order_key'], d['KEGG ID plain']))

    # Export the results to CSV
    csv_writer = csv.writer(open('../res/prc_results.csv', 'w'))
    csv_writer.writerow(
        ['KEGG ID', 'Compound', 'nH', 'dG0 (PRC)', 'dG0 (Alberty)'])
    for d in dict_list:
        csv_writer.writerow([
            d['KEGG ID plain'], d['Compound'], d['nH'], d['dG0 (PRC) plain'],
            d['dG0 (Alberty)']
        ])

    html_writer.write(
        "<p>All formation energies as a function of the free variables:")
    html_writer.insert_toggle(start_here=True)
    html_writer.write('<font size="1">')
    html_writer.write_table(dict_list,
                            headers=[
                                '#', 'KEGG ID', 'Compound', 'nH', 'dG0 (PRC)',
                                'dG0 (Alberty)'
                            ])
    html_writer.write('</font>')
    html_writer.div_end()
    html_writer.write('</p>')

    fp = open('../res/prc_latex.txt', 'w')
    fp.write(
        latex.table2LaTeX(dict_list,
                          headers=[
                              '#', 'KEGG ID plain', 'Compound', 'nH',
                              'dG0 (PRC) plain', 'dG0 (Alberty)'
                          ]))
    fp.close()
예제 #18
0
            if LinearRegression.MatrixRank(self.K) < self.dimension + 1:
                self.K[self.dimension, :] = 0
            else:
                # normalize the kernel vector so that it will have nice coefficients
                g = min(abs(coeffs[nonzero_indices]))
                self.K[self.dimension, :] /= g
                #if sum(self.K[:, self.dimension] < 0.0):
                #    self.K[:, self.dimension] *= -1.0

                v = self.K[self.dimension, :]
                self.AddLinearConstraint(v)
                self.dimension += 1
                return v

    def Solve(self):
        if self.dimension == 0:
            for _ in self:
                pass
        return self.K


if __name__ == '__main__':
    A = np.array([[1, 0, 1, 1, 2, 1, 1], [0, 1, 1, 1, 2, 1, 1],
                  [1, 1, 2, 2, 4, 2, 2]])
    K = SparseKernel(A)
    print A
    for v in K:
        print "nullvector: ", ', '.join(['%g' % x for x in v])
    print "RMS(A*K.T) =", mlab.rms_flat(np.dot(A, K.Solve().T))
예제 #19
0
    def AnalyzeTrainingSet(self, skip_formations=True):
        n_obs = self.group_matrix.shape[1]
        rowdicts = []
        fit_results = np.dot(self.group_contributions, self.group_matrix)
        residuals = fit_results - self.obs_values
        
        if self.transformed:
            sym = symbol_d_G0_prime
        else:
            sym = symbol_d_G0
        for i in xrange(n_obs):
            if self.obs_types[i] in [KeggObservation.TYPE_ACID_BASE,
                                     KeggObservation.TYPE_MG,
                                     KeggObservation.TYPE_REDOX]:
                continue
            if skip_formations and self.obs_types[i] == KeggObservation.TYPE_FORMATION:
                continue

            rowdict = {'Observation':self.obs_ids[i]}
            rowdict[sym + ' (obs)'] = self.obs_values[0, i]
            rowdict[sym + ' (fit)'] = fit_results[0, i]
            rowdict[sym + ' (res)'] = residuals[0, i]
            rowdict['LOO ' + sym + ' (fit)'] = np.nan
            rowdict['LOO ' + sym + ' (res)'] = np.nan
            rowdict['sortkey'] = 0
            rowdicts.append(rowdict)
            logging.info('Fit Error = %.1f' % residuals[0, i])

            # leave out the row corresponding with observation 'i'
            logging.info('Cross validation, leaving-one-out: ' + self.obs_ids[i])
            subset = range(n_obs)
            subset.pop(i)
            loo_group_contributions, loo_nullspace = LinearRegression.LeastSquares(
                self.group_matrix[:, subset], self.obs_values[:, subset])
            
            if loo_nullspace.shape[1] > self.group_nullspace.shape[1]:
                logging.warning('example %d is not linearly dependent in the other examples' % i)
                continue
            rowdict['LOO ' + sym + ' (fit)'] = float(np.dot(loo_group_contributions, self.group_matrix[:, i]))
            rowdict['LOO ' + sym + ' (res)'] = \
                rowdict['LOO ' + sym + ' (fit)'] - self.obs_values[0, i]
            rowdict['sortkey'] = abs(rowdict['LOO ' + sym + ' (res)'])
            logging.info('LOO Error = %.1f' % rowdict['LOO ' + sym + ' (res)'])
        
        logging.info("writing the table of estimation errors for each compound")
        self.html_writer.write('</br><b>Cross validation table</b>')
        self.html_writer.insert_toggle(start_here=True)
        self.html_writer.write('<font size="1">\n')
        obs_vec = np.matrix([row[sym + ' (obs)'] for row in rowdicts])
        resid_vec = np.matrix([row[sym + ' (res)'] for row in rowdicts])
        rmse = rms_flat(resid_vec.flat)
        
        loo_resid_vec = np.matrix([row['LOO ' + sym + ' (res)']
                                  for row in rowdicts])
        loo_rmse = rms_flat(loo_resid_vec[np.isfinite(loo_resid_vec)].flat)

        self.html_writer.write_ul(['fit RMSE = %.1f [kJ/mol]' % rmse,
                                   'leave-one-out RMSE = %.1f [kJ/mol]' % loo_rmse])
        logging.info("Goodness of fit: RMSE = %.1f [kJ/mol]" % rmse)
        logging.info("Leave-one-out test: RMSE = %.1f [kJ/mol]" % loo_rmse)

        headers = ['Observation',
                   sym + ' (obs)',
                   sym + ' (fit)',
                   sym + ' (res)',
                   'LOO ' + sym + ' (fit)',
                   'LOO ' + sym + ' (res)']
        rowdicts.sort(key=lambda(x):x['sortkey'], reverse=True)
        self.html_writer.write_table(rowdicts, headers, decimal=1)
        self.html_writer.write('</font>\n')
        self.html_writer.div_end()
        
        self.html_writer.write('</br><b>Cross-validation figure</b>')
        self.html_writer.insert_toggle(start_here=True)
        
        obs_vs_err_fig = plt.figure(figsize=[6.0, 6.0], dpi=100)
        plt.plot(obs_vec.T, resid_vec.T, '.')
        plt.xlabel('Observation')
        plt.ylabel('Estimated (PGC) Residuals')
        plt.hold(True)
        for row in rowdicts:
            if abs(row[sym + ' (res)']) > 2*rmse:
                plt.text(row[sym + ' (obs)'],
                         row[sym + ' (res)'],
                         row['Observation'], fontsize=4,
                         figure=obs_vs_err_fig)
        plt.title('Observed vs. Fitted (PGC) Residuals', figure=obs_vs_err_fig)
        self.html_writer.embed_matplotlib_figure(obs_vs_err_fig)
        self.html_writer.div_end()
예제 #20
0
def two_way_comparison(html_writer, thermo1, thermo2, reaction_list, name=None):
    """
        Compare the estimation errors of two different evaluation methods.
    
    Write results to HTML.
    
    Args:
        thermo1: a Thermodynamics object that provides dG estimates.
        thermo2: a Thermodynamics object that provides dG estimates.
    """
    pH, pMg, I, T = (7, 14, 0.1, 298.15)
    
    total_list = []
    
    for reaction in reaction_list:
        try:
            dG0_pred1 = reaction.PredictReactionEnergy(thermo1, pH=pH, pMg=pMg, I=I, T=T)
            dG0_pred2 = reaction.PredictReactionEnergy(thermo2, pH=pH, pMg=pMg, I=I, T=T)
        except MissingReactionEnergy:
            continue
            
        total_list.append([dG0_pred1, dG0_pred2, reaction])
    
    if not total_list:
        return 0, 0
    
    # plot the profile graph
    plt.rcParams['text.usetex'] = False
    plt.rcParams['font.family'] = 'sans-serif'
    plt.rcParams['font.size'] = 8
    plt.rcParams['lines.linewidth'] = 2
    plt.rcParams['lines.markersize'] = 2
    plt.rcParams['figure.dpi'] = 100
    
    data_mat = np.array([(x[0], x[1]) for x in total_list])
    non_nan = list(np.isfinite(data_mat.sum(1)).nonzero()[0].flat)
    
    fig2 = plt.figure(figsize=(5,5))
    plt.plot(data_mat[non_nan,0], data_mat[non_nan,1], 'b.')
    rmse = rms_flat((data_mat[non_nan,0] - data_mat[non_nan,1]).flat)
    plt.text(-50, 40, r'RMSE = %.1f [kJ/mol]' % (rmse))
    plt.xlabel(r'$\Delta G_r^\circ$ from %s [kJ/mol]' % thermo1.name)
    plt.ylabel(r'$\Delta G_r^\circ$ from %s [kJ/mol]' % thermo2.name)
    plt.plot([-200, 200], [-200, 200], 'k--')
    plt.axis([-200, 200, -200, 200])
    
    html_writer.embed_matplotlib_figure(fig2, name=name+"_eval")

    table_headers = ["#", '|diff|', "dG'0 (%s)" % thermo1.name, 
                     "dG'0 (%s)" % thermo2.name,\
                     "reaction", "rid"]
    dict_list = []
    for row in total_list:
        d = {}
        if np.isnan(row[0]) or np.isnan(row[1]):
            d["|diff|"] = 0
        else:
            d["|diff|"] = abs(row[0] - row[1])
        d["dG'0 (%s)" % thermo1.name] = row[0]
        d["dG'0 (%s)" % thermo2.name] = row[1]
        d['reaction'] = row[2].to_hypertext(show_cids=True)
        if row[2].rid is not None:
            d['rid'] = '<a href="%s">R%05d</a>' % (row[2].get_link(), row[2].rid)
        else:
            d['rid'] = ''
        dict_list.append(d)
    dict_list.sort(key=lambda d:d['|diff|'], reverse=True)
    html_writer.write_table(dict_list, table_headers, decimal=1)
예제 #21
0
    def LinearRegression(self, S, obs_dG0_r, cids, cid2nH_nMg,
                         prior_thermodynamics=None):
        logging.info("Regression matrix is %d x %d" % \
                     (S.shape[0], S.shape[1]))

        cid2ref = dict((cid, 'PRC') for cid in cids)
        if prior_thermodynamics:
            # Normalize the contribution of compounds which have formation energies
            # given in the prior. Perform the regression only on the residuals
            # remaining after the normalization (note that the stoichiometric
            # matrix must also be trimmed).
            cid_index_prior = []
            dG0_prior = []
            for i, cid in enumerate(cids):
                nH, nMg = cid2nH_nMg[cid]
                try:
                    pmap_prior = prior_thermodynamics.cid2PseudoisomerMap(cid)
                except MissingCompoundFormationEnergy:
                    continue
                for p_nH, p_z, p_nMg, dG0 in pmap_prior.ToMatrix():
                    if nH == p_nH and p_nMg == nMg:
                        cid_index_prior.append(i)
                        dG0_prior.append(dG0)
                        cid2ref[cid] = pmap_prior.GetRef(p_nH, p_z, p_nMg)
                        break
            
            S_prior = np.matrix(np.zeros((len(cids), len(cid_index_prior))))
            for j, i in enumerate(cid_index_prior):
                S_prior[i, j] = 1
            dG0_prior = np.matrix(dG0_prior)
            g, _ = LinearRegression.LeastSquares(S_prior, dG0_prior)
            P_C, P_L = LinearRegression.ColumnProjection(S_prior)
            prior_dG0_r = g * P_C * S
            new_obs_dG0_r = obs_dG0_r - prior_dG0_r
            new_S = P_L * S
            
            # Find all reactions in new_S which are completely zero. This means that
            # they are completely determined by the prior.
            zero_cols = (abs(new_S).sum(0) < 1e-10).nonzero()[1]
            rowdicts = []
            for j in zero_cols.flat:
                rowdict = {}
                rowdict['reaction'] = NistRegression.row2hypertext(S[:, j], cids)
                rowdict['|error|'] = abs(new_obs_dG0_r[0, j])
                rowdict['error'] = new_obs_dG0_r[0, j]
                rowdict['NIST'] = obs_dG0_r[0, j]
                rowdict['prior'] = prior_dG0_r[0, j]
                rowdicts.append(rowdict)
            rowdicts.sort(key=lambda x:x['|error|'], reverse=True)
            self.html_writer.write('</br><b>Alberty Errors</b>\n')
            self.html_writer.write_table(rowdicts,
                                         headers=['reaction', 'error', 'NIST', 'prior'],
                                         decimal=1)
            
            est_dG0_f, _ = LinearRegression.LeastSquares(new_S, new_obs_dG0_r)
            for j, i in enumerate(cid_index_prior):
                est_dG0_f[0, i] = dG0_prior[0, j]
        else:
            est_dG0_f, _ = LinearRegression.LeastSquares(S, obs_dG0_r)
        
        est_dG0_r = est_dG0_f * S
        residuals = est_dG0_r - obs_dG0_r
        rmse = rms_flat(residuals.flat)
        logging.info("Regression results for reverse transformed data:")
        logging.info("N = %d, RMSE = %.1f" % (S.shape[1], rmse))
       
        self.html_writer.write('<p>RMSE = %.1f [kJ/mol]</p>\n' % rmse)
        rowdicts = []
        headers = ['#', 'Reaction',
                   symbol_dr_G0 + ' (obs)',
                   symbol_dr_G0 + ' (fit)',
                   symbol_dr_G0 + ' (res)']
        for i in xrange(S.shape[1]):
            rowdict = {}
            rowdict['Reaction'] = NistRegression.row2hypertext(S[:, i], cids)
            rowdict[symbol_dr_G0 + ' (obs)'] = obs_dG0_r[0, i]
            rowdict[symbol_dr_G0 + ' (fit)'] = est_dG0_r[0, i]
            rowdict[symbol_dr_G0 + ' (res)'] = residuals[0, i]
            rowdicts.append(rowdict)
        rowdicts.sort(key=lambda x:abs(x[symbol_dr_G0 + ' (res)']), reverse=True)
        self.html_writer.write_table(rowdicts, headers, decimal=1)

        # copy the solution into the diss_tables of all the compounds,
        # and then generate their PseudoisomerMaps.
        for i, cid in enumerate(cids):
            nH, nMg = cid2nH_nMg[cid]
            diss_table = self.GetDissociation().GetDissociationTable(cid)
            z = diss_table.min_charge + (nH - diss_table.min_nH)
            diss_table.SetFormationEnergyByNumHydrogens(est_dG0_f[0, i], nH, nMg)
            pmap = diss_table.GetPseudoisomerMap(nH, nMg)
            pmap.SetRef(nH, z, nMg, cid2ref[cid])
            self.cid2pmap_dict[cid] = pmap
예제 #22
0
            nonzero_indices = np.nonzero(g_plus > 0.5)[0].tolist() + np.nonzero(g_minus > 0.5)[0].tolist()
            self.K[self.dimension, nonzero_indices] = coeffs[nonzero_indices]
            
            if LinearRegression.MatrixRank(self.K) < self.dimension+1:
                self.K[self.dimension, :] = 0
            else:
                # normalize the kernel vector so that it will have nice coefficients
                g = min(abs(coeffs[nonzero_indices]))
                self.K[self.dimension, :] /= g
                #if sum(self.K[:, self.dimension] < 0.0):
                #    self.K[:, self.dimension] *= -1.0
                
                v = self.K[self.dimension, :]
                self.AddLinearConstraint(v)
                self.dimension += 1
                return v
        
    def Solve(self):
        if self.dimension == 0:
            for _ in self:
                pass
        return self.K

if __name__ == '__main__':
    A = np.array([[1, 0, 1, 1, 2, 1, 1],[0, 1, 1, 1, 2, 1, 1],[1, 1, 2, 2, 4, 2, 2]])
    K = SparseKernel(A)
    print A
    for v in K:
        print "nullvector: ", ', '.join(['%g' % x for x in v])
    print "RMS(A*K.T) =", mlab.rms_flat(np.dot(A, K.Solve().T))
예제 #23
0
    def verify_results(self, key, thermodynamics, html_writer):
        """Calculate all the dG0_r for the reaction from NIST and compare to
           the measured data.
        
        Write results to HTML.
        
        Args:
            key: The name of this group of results.
            thermodynamics: a Thermodynamics object that provides dG estimates.
            html_writer: to write HTML.
            ignore_I: whether or not to ignore the ionic strength in NIST.
        """
        
        logging.info("calculate the correlation between %s's predictions and the NIST database" % key)
        
        known_cid_set = thermodynamics.get_all_cids()
        dG0_obs_vec = []
        dG0_est_vec = []
       
        # A mapping from each evaluation method (NIST calls separates them to
        # A, B, C and D) to the results of the relevant measurements
        evaluation_map = {}
        total_list = []
        
        cid2count = {}
        for row_data in self.data:
            for cid in row_data.GetAllCids():
                cid2count[cid] = cid2count.setdefault(cid, 0) + 1
        
        for row_data in self.data:
            unknown_set = set(row_data.GetAllCids()).difference(known_cid_set)

            if unknown_set:
                logging.debug("a compound in (%s) doesn't have a dG0_f" % row_data.origin)
                continue
            
            #label = row_data.evaluation
            label = row_data.K_type
            
            if label not in evaluation_map:
                evaluation_map[label] = ([], [])
            
            try:
                dG0_pred = row_data.PredictReactionEnergy(thermodynamics)
            except MissingCompoundFormationEnergy:
                logging.debug("a compound in (%s) doesn't have a dG0_f" % row_data.origin)
                continue
                
            dG0_obs_vec.append(row_data.dG0_r)
            dG0_est_vec.append(dG0_pred)
            evaluation_map[label][0].append(row_data.dG0_r)
            evaluation_map[label][1].append(dG0_pred)
            n_measurements = min([cid2count[cid] for cid in row_data.GetAllCids()])
            error = abs(row_data.dG0_r - dG0_pred)

            total_list.append([error, row_data.dG0_r, dG0_pred, 
                               row_data.sparse, row_data.pH, row_data.pMg, 
                               row_data.I, row_data.T, row_data.evaluation, 
                               n_measurements])
        
        # plot the profile graph
        rcParams['text.usetex'] = False
        rcParams['legend.fontsize'] = 12
        rcParams['font.family'] = 'sans-serif'
        rcParams['font.size'] = 16
        rcParams['lines.linewidth'] = 2
        rcParams['lines.markersize'] = 3
        rcParams['figure.figsize'] = [8.0, 6.0]
        rcParams['figure.dpi'] = 100
        
        fig1 = figure()
        hold(True)
        
        colors = ['purple', 'orange', 'lightgreen', 'red', 'cyan']
        for e in sorted(evaluation_map.keys()):
            measured, predicted = evaluation_map[e]
            resid = np.array(measured) - np.array(predicted)
            label = '%s (N = %d, RMSE = %.2f [kJ/mol])' % (e, len(measured), rms_flat(resid.flat))
            c = colors.pop(0)
            plot(measured, predicted, marker='.', linestyle='None', markerfacecolor=c, markeredgecolor=c, markersize=5, label=label)
        
        legend(loc='upper left')
        
        resid = np.array(dG0_obs_vec) - np.array(dG0_est_vec)
        rmse = rms_flat(resid.flat)
        title(r'N = %d, RMSE = %.1f [kJ/mol]' % (len(dG0_obs_vec), rmse), fontsize=14)
        xlabel(r'$\Delta_{obs} G^\circ$ [kJ/mol]', fontsize=14)
        ylabel(r'$\Delta_{est} G^\circ$ [kJ/mol]', fontsize=14)
        min_x = min(dG0_obs_vec)
        max_x = max(dG0_obs_vec)
        plot([min_x, max_x], [min_x, max_x], 'k--')
        axis([-60, 60, -60, 60])
        
        fig2 = figure()
        hist([(row[1] - row[2]) for row in total_list], bins=arange(-50, 50, 0.5))
        title(r'RMSE = %.1f [kJ/mol]' % rmse, fontsize=14)
        xlabel(r'$\Delta_{obs} G^\circ - \Delta_{est} G^\circ$ [kJ/mol]', fontsize=14)
        ylabel(r'no. of measurements', fontsize=14)

        fig3 = figure()
        plot([row[9] for row in total_list], [abs(row[1] - row[2]) for row in total_list], '.')
        title(r'The effect of the number of measurements on the estimation error' % rmse, fontsize=14)
        xlabel(r'minimum no. of measurements among reaction compounds', fontsize=14)
        ylabel(r'$|| \Delta_{obs} G^\circ - \Delta_{est} G^\circ ||$ [kJ/mol]', fontsize=14)
        xscale('log')
        
        html_writer.write("<h2>%s</h2>" % key)
        
        html_writer.embed_matplotlib_figure(fig1, width=400, height=300)
        html_writer.embed_matplotlib_figure(fig2, width=400, height=300)
        
        html_writer.write('<input type="button" class="button" onclick="return toggleMe(\'%s\')" value="Show">\n' % (key))
        html_writer.write('<div id="%s" style="display:none">' % key)

        html_writer.embed_matplotlib_figure(fig3, width=400, height=300)

        table_headers = ["|error|", "dG0(obs)", "dG0(pred)", "reaction", "pH", "pMg", "I", "T", "evaluation", "min_num_measurements"]
        html_writer.write("<table>\n")
        html_writer.write("<tr><td>" + "</td><td>".join(table_headers) + "</td></tr>\n")
        
        for row in sorted(total_list, reverse=True):
            sparse_reaction = row[3]
            row[3] = self.kegg.sparse_to_hypertext(sparse_reaction, show_cids=False)
            html_writer.write("<tr><td>" + "</td><td>".join(["%.1f" % x for x in row[:3]] + [str(x) for x in row[3:]]) + "</td></tr>\n")
        html_writer.write("</table>\n")
        html_writer.write("</div><br>\n")
예제 #24
0
    def verify_results(self, html_writer, thermodynamics, name=None):
        """Calculate all the dG0_r for the reaction from NIST and compare to
           the measured data.
        
        Write results to HTML.
        
        Args:
            thermodynamics: a Thermodynamics object that provides dG estimates.
            ignore_I: whether or not to ignore the ionic strength in NIST.
        """

        dG0_obs_vec = []
        dG0_est_vec = []

        # A mapping from each evaluation method (NIST calls separates them to
        # A, B, C and D) to the results of the relevant measurements
        evaluation_map = {}
        rowdicts = []
        finite_rowdicts = []

        eval_to_label = {
            'A': 'high quality',
            'B': 'low quality',
            'C': 'low quality',
            'D': 'low quality',
            'E': 'low quality'
        }

        for row_data in self.SelectRowsFromNist():
            rowdict = {}
            label = eval_to_label[row_data.evaluation]
            if label not in evaluation_map:
                evaluation_map[label] = ([], [])
            rowdict[symbol_dr_G0_prime + ' (obs)'] = np.round(
                row_data.dG0_r, 1)
            rowdict['_reaction'] = row_data.reaction
            rowdict['reaction'] = row_data.reaction.to_hypertext(
                show_cids=False)
            if row_data.reaction.rid is not None:
                rowdict['rid'] = '<a href="%s">R%05d</a>' % (
                    row_data.reaction.get_link(), row_data.reaction.rid)
            else:
                rowdict['rid'] = ''
            rowdict['pH'] = row_data.pH
            rowdict['pMg'] = row_data.pMg
            rowdict['I'] = row_data.I
            rowdict['T'] = row_data.T
            rowdict['eval.'] = row_data.evaluation
            rowdict['url'] = '<a href="%s">%s</a>' % (row_data.url,
                                                      row_data.ref_id)

            dG0_est = row_data.PredictReactionEnergy(thermodynamics)
            if np.isfinite(dG0_est):
                dG0_obs_vec.append(row_data.dG0_r)
                dG0_est_vec.append(dG0_est)
                evaluation_map[label][0].append(row_data.dG0_r)
                evaluation_map[label][1].append(dG0_est)
                rowdict[symbol_dr_G0_prime + ' (est)'] = np.round(dG0_est, 1)
                rowdict['residual'] = np.round(row_data.dG0_r - dG0_est, 3)
                rowdict['|error|'] = abs(rowdict['residual'])
                rowdict['sort_key'] = -rowdict['|error|']
                finite_rowdicts.append(rowdict)
            else:
                rowdict['sort_key'] = 1

            rowdicts.append(rowdict)

        rowdicts.sort(key=lambda x: x['sort_key'])

        if not dG0_obs_vec:
            return 0, 0

        unique_reaction_dict = defaultdict(list)
        for rowdict in finite_rowdicts:
            unique_reaction_dict[rowdict['_reaction']].append(
                rowdict['|error|'])
        unique_rmse_list = [
            rms_flat(error_list)
            for error_list in unique_reaction_dict.values()
        ]
        unique_rmse = rms_flat(unique_rmse_list)

        resid = np.array(dG0_obs_vec) - np.array(dG0_est_vec)
        rmse = rms_flat(resid.flat)

        # plot the profile graph
        plt.rcParams['text.usetex'] = False
        plt.rcParams['legend.fontsize'] = 10
        plt.rcParams['font.family'] = 'sans-serif'
        plt.rcParams['font.size'] = 12
        plt.rcParams['lines.linewidth'] = 1
        plt.rcParams['lines.markersize'] = 3

        fig1 = plt.figure(figsize=(6, 6), dpi=90)
        plt.hold(True)

        colors = ['purple', 'orange']
        for i, label in enumerate(sorted(evaluation_map.keys())):
            measured, predicted = evaluation_map[label]
            plt.plot(measured,
                     predicted,
                     marker='.',
                     linestyle='None',
                     markerfacecolor=colors[i],
                     markeredgecolor=colors[i],
                     markersize=5,
                     label=label,
                     figure=fig1)

        plt.legend(loc='lower right')

        plt.text(-50,
                 40,
                 r'RMSE = %.1f [kJ/mol]' % (unique_rmse),
                 fontsize=14,
                 figure=fig1)
        plt.xlabel(r'observed $\Delta_r G^{\'\circ}$ [kJ/mol]',
                   fontsize=14,
                   figure=fig1)
        plt.ylabel(r'estimated $\Delta_r G^{\'\circ}$ [kJ/mol]',
                   fontsize=14,
                   figure=fig1)
        #min_x = min(dG0_obs_vec)
        #max_x = max(dG0_obs_vec)
        plt.plot([-60, 60], [-60, 60], 'k--', figure=fig1)
        plt.axis([-60, 60, -60, 60])
        if name:
            html_writer.embed_matplotlib_figure(fig1, name=name + "_eval")
        else:
            html_writer.embed_matplotlib_figure(fig1)

        fig2 = plt.figure(figsize=(6, 6), dpi=90)
        binned_plot(x=[rowdict['pH'] for rowdict in finite_rowdicts],
                    y=[rowdict['|error|'] for rowdict in finite_rowdicts],
                    bins=[5, 6, 7, 8, 9],
                    y_type='rmse',
                    figure=fig2)
        plt.xlim((4, 11))
        plt.ylim((0, 12))
        plt.title(r'effect of pH', fontsize=14, figure=fig2)
        plt.xlabel('pH', fontsize=14, figure=fig2)
        plt.ylabel(r'RMSE ($\Delta_r G^{\'\circ}$) [kJ/mol]',
                   fontsize=14,
                   figure=fig2)
        if name:
            html_writer.embed_matplotlib_figure(fig2, name=name + "_pH")
        else:
            html_writer.embed_matplotlib_figure(fig2)

        fig3 = plt.figure(figsize=(6, 6), dpi=90)
        plt.hist([rowdict['residual'] for rowdict in finite_rowdicts],
                 bins=np.arange(-50, 50, 0.5))
        plt.title(r'RMSE = %.1f [kJ/mol]' % rmse, fontsize=14, figure=fig3)
        plt.xlabel(r'residual $\Delta_r G^{\'\circ}$ [kJ/mol]',
                   fontsize=14,
                   figure=fig3)
        plt.ylabel(r'no. of measurements', fontsize=14, figure=fig3)
        if name:
            html_writer.embed_matplotlib_figure(fig3, name=name + "_hist")
        else:
            html_writer.embed_matplotlib_figure(fig3)

        table_headers = [
            "#", "|error|", symbol_dr_G0_prime + " (obs)",
            symbol_dr_G0_prime + " (est)", "reaction", "rid", "pH", "pMg", "I",
            "T", "eval.", "url"
        ]
        html_writer.write_table(rowdicts, table_headers, decimal=1)

        return len(dG0_obs_vec), unique_rmse
예제 #25
0
    def two_way_comparison(self, html_writer, thermo1, thermo2, name=None):
        """
            Compare the estimation errors of two different evaluation methods.
        
        Write results to HTML.
        
        Args:
            thermo1: a Thermodynamics object that provides dG estimates.
            thermo2: a Thermodynamics object that provides dG estimates.
        """

        total_list = []

        for row_data in self.SelectRowsFromNist():
            try:
                dG0_pred1 = row_data.PredictReactionEnergy(thermo1)
                dG0_pred2 = row_data.PredictReactionEnergy(thermo2)
            except MissingReactionEnergy as e:
                logging.debug("the reaction in (%s) cannot be estimated: %s" %
                              (row_data.ref_id, str(e)))
                continue

            total_list.append([
                row_data.dG0_r, dG0_pred1, dG0_pred2, row_data.reaction,
                row_data.pH, row_data.pMg, row_data.I, row_data.T,
                row_data.evaluation, row_data.url
            ])

        if not total_list:
            return 0, 0

        # plot the profile graph
        plt.rcParams['text.usetex'] = False
        plt.rcParams['font.family'] = 'sans-serif'
        plt.rcParams['font.size'] = 8
        plt.rcParams['lines.linewidth'] = 2
        plt.rcParams['lines.markersize'] = 2
        plt.rcParams['figure.dpi'] = 100

        data_mat = np.array(total_list)
        fig1 = plt.figure(figsize=(4, 4))
        plt.hold(True)
        error1 = data_mat[:, 0] - data_mat[:, 1]
        error2 = data_mat[:, 0] - data_mat[:, 2]

        max_err = max(error1.max(), error2.max())
        min_err = min(error1.min(), error2.min())
        plt.plot([min_err, max_err], [min_err, max_err], 'k--', figure=fig1)
        plt.plot(error1, error2, '.', figure=fig1)
        plt.title("Error Comparison per Reaction (in kJ/mol)")
        plt.xlabel(thermo1.name, figure=fig1)
        plt.ylabel(thermo2.name, figure=fig1)
        html_writer.embed_matplotlib_figure(fig1, name=name + "_corr")

        fig2 = plt.figure(figsize=(7, 3))
        for i, thermo in enumerate([thermo1, thermo2]):
            fig2.add_subplot(1, 2, i + 1)
            plt.plot(data_mat[:, 0], data_mat[:, i + 1], 'b.')
            rmse = rms_flat((data_mat[:, 0] - data_mat[:, i + 1]).flat)
            plt.text(-50, 40, r'RMSE = %.1f [kJ/mol]' % (rmse))
            plt.xlabel(r'observed $\Delta G_r^\circ$ from NIST [kJ/mol]')
            plt.ylabel(r'estimated $\Delta G_r^\circ$ using %s [kJ/mol]' %
                       thermo.name)
            plt.plot([-60, 60], [-60, 60], 'k--')
            plt.axis([-60, 60, -60, 60])

        html_writer.embed_matplotlib_figure(fig2, name=name + "_eval")

        table_headers = [
            "dG'0 (obs)",
            "dG'0 (%s)" % thermo1.name,
            "dG'0 (%s)" % thermo2.name, "reaction", "rid", "pH", "pMg", "I",
            "T", "eval.", "url"
        ]
        dict_list = []
        for row in sorted(total_list,
                          key=lambda (x): abs(x[1] - x[2]),
                          reverse=True):
            d = {}
            d["dG'0 (obs)"] = '%.1f' % row[0]
            d["dG'0 (%s)" % thermo1.name] = '%.1f' % row[1]
            d["dG'0 (%s)" % thermo2.name] = '%.1f' % row[2]
            d['reaction'] = row[3].to_hypertext(show_cids=False)
            if row[3].rid is not None:
                d['rid'] = '<a href="%s">R%05d</a>' % (row[3].get_link(),
                                                       row[3].rid)
            else:
                d['rid'] = ''
            d['pH'] = '%.1f' % row[4]
            d['pMg'] = '%.1f' % row[5]
            d['I'] = '%.2f' % row[6]
            d['T'] = '%.1f' % row[7]
            d['eval.'] = row[8]
            if row[9]:
                d['url'] = '<a href="%s">link</a>' % row[9]
            else:
                d['url'] = ''
            dict_list.append(d)
        html_writer.write_table(dict_list, table_headers)
예제 #26
0
data = np.loadtxt(DATA_FNAME, dtype='float', delimiter=',')

#plt.plot(data[:, 0], data[:, 1], '.')

feist_idx = set(np.nonzero(np.isfinite(data[:, 1]))[0].flat)
ugcm_idx = set(np.nonzero(np.isfinite(data[:, 2]))[0].flat)
nist_idx = set(np.nonzero(np.isfinite(data[:, 3]))[0].flat)

comp_idx = list(feist_idx.intersection(ugcm_idx).intersection(nist_idx))
minG, maxG = (np.min(data[comp_idx, 0]), np.max(data[comp_idx, 0]))

plt.figure(figsize=(10, 5), dpi=90)
plt.subplot(1, 2, 1)
err_feist_nist = data[comp_idx, 1] - data[comp_idx, 3]
rms_feist_nist = rms_flat(err_feist_nist)
plt.plot(data[comp_idx, 1], data[comp_idx, 3], '.g')
plt.plot([minG, maxG], [minG, maxG], ':k')
plt.ylabel('TECRDB observation [kJ/mol]')
plt.xlabel('value in iAF1260 [kJ/mol]')
plt.title('N = %d, RMSE = %.1f [kJ/mol]' % (len(comp_idx), rms_feist_nist))

plt.subplot(1, 2, 2)
err_ugcm_nist = data[comp_idx, 2] - data[comp_idx, 3]
rms_ugcm_nist = rms_flat(err_ugcm_nist)
plt.plot(data[comp_idx, 2], data[comp_idx, 3], '.g')
plt.plot([minG, maxG], [minG, maxG], ':k')
plt.ylabel('TECRDB observation [kJ/mol]')
plt.xlabel('UGCM estimation [kJ/mol]')
plt.title('N = %d, RMSE = %.1f [kJ/mol]' % (len(comp_idx), rms_ugcm_nist))
plt.tight_layout()
    def Loo(self, no_anchoring=True):
        n = self.S.shape[1]
        dG0_r_ugc = np.matrix(np.zeros((3, n))) * np.nan
        dG0_r_pgc = np.matrix(np.zeros((1, n))) * np.nan

        rowdicts = []
        class2ugc_err = defaultdict(list)
        class2pgc_err = defaultdict(list)
        for i in xrange(n):
            if self.obs_types[i] != 'reaction':
                continue
            if self.anchored[0, i]:
                continue
            if abs(self.S[:, i]).sum(0) < self.epsilon:  # empty reaction
                continue

            no_i = range(0, i) + range(i + 1, n)
            obs_S = self.S[:, no_i].copy()
            obs_anchored = self.anchored[0, no_i]
            if no_anchoring:
                obs_anchored = obs_anchored * 0

            obs_b = self.b[:, no_i].copy()
            est_S = self.S[:, i].copy()
            dG0_r_ugc[:, i], parts, dG0_r_pgc[
                0,
                i] = self._GetChemicalReactionEnergies(obs_S, self.cids, obs_b,
                                                       obs_anchored, est_S,
                                                       self.cids)

            if parts[3, 0] > self.epsilon:
                classification = 'kernel'
            elif parts[1, 0] > self.epsilon and parts[2, 0] > self.epsilon:
                classification = 'PRC + PGC'
            elif parts[1, 0] > self.epsilon:
                classification = 'PRC'
            elif parts[2, 0] > self.epsilon:
                classification = 'PGC'
            else:
                classification = 'anchored'

            est_b = float(dG0_r_ugc[:, i].sum(0))
            ugc_err = self.b[0, i] - est_b
            class2ugc_err[classification].append(ugc_err)

            pgc_err = self.b[0, i] - dG0_r_pgc[0, i]
            class2pgc_err[classification].append(pgc_err)

            rowdict = {}
            rowdict['row'] = i
            rowdict['type'] = self.obs_types[i]
            rowdict['reaction'] = UnifiedGroupContribution.row2hypertext(
                self.S[:, i], self.cids)
            rowdict['obs'] = self.b[0, i]
            rowdict['est'] = est_b
            rowdict['est(PGC)'] = dG0_r_pgc[0, i]
            if np.isfinite(ugc_err):
                rowdict['|err|'] = abs(ugc_err)
            else:
                rowdict['|err|'] = 0
            rowdict['est_ANCH'] = dG0_r_ugc[0, i]
            rowdict['est_PRC'] = dG0_r_ugc[1, i]
            rowdict['est_PGC'] = dG0_r_ugc[2, i]
            rowdict['part_ANCH'] = parts[0, 0]
            rowdict['part_PRC'] = parts[1, 0]
            rowdict['part_PGC'] = parts[2, 0]
            rowdict['part_NULL'] = parts[3, 0]
            rowdict['class'] = classification
            rowdicts.append(rowdict)

        class_errors = []
        for classification in class2ugc_err.keys():
            ugc_err_list = class2ugc_err[classification]
            pgc_err_list = class2pgc_err[classification]
            class_errors.append(
                '%s: N = %d, rmse(UGC) = %.1f kJ/mol, rmse(PGC) = %.1f kJ/mol'
                % (classification, len(ugc_err_list), rms_flat(ugc_err_list),
                   rms_flat(pgc_err_list)))

        self.Report(dG0_r_ugc.sum(0), 'UGC - Leave one out')
        self.Report(dG0_r_pgc, 'PGC - Leave one out')

        rowdicts.sort(key=lambda x: x['|err|'], reverse=True)
        self.html_writer.write(
            '<h2>Linear Regression Leave-One-Out Analysis</h2>\n')
        self.html_writer.insert_toggle(start_here=True, label="Show table")
        self.html_writer.write_ul(class_errors)
        self.html_writer.write_table(rowdicts,
                                     headers=[
                                         'row', 'type', 'reaction', 'class',
                                         'obs', 'est', 'est(PGC)', '|err|',
                                         'est_ANCH', 'est_PRC', 'est_PGC',
                                         'part_ANCH', 'part_PRC', 'part_PGC',
                                         'part_NULL'
                                     ],
                                     decimal=1)
        self.html_writer.div_end()
예제 #28
0
def main(argv):
	filename = ''
	
	if len(sys.argv) < 3:
		print('Usage: rotation_spacing.py -f <filename>')
		sys.exit()
	try:
	  opts, args = getopt.getopt(argv,"hf:",["file="])
	except getopt.GetoptError:
	  print('rotation_spacing.py -f <filename>')
	  sys.exit(2)
	for opt, arg in opts:
	  if opt == '-h':
	     print('Usage: rotation_spacing.py -f <filename>')
	     sys.exit()
	  elif opt in ("-f", "--file"):
	     filename = arg

	try:
		# More accurate peak finding from
		# https://gist.github.com/endolith/255291#file-parabolic-py
		from parabolic import parabolic

		def argmax(x):
		    return parabolic(x, numpy.argmax(x))[0]
	except ImportError:
		from numpy import argmax

	# Load file, converting to grayscale
	I = asarray(Image.open(filename).convert('L'))
	I = I - mean(I)  # Demean; make the brightness extend above and below zero
    #plt.subplot(2, 2, 1)
    #plt.imshow(I)

	# Do the radon transform and display the result
	sinogram = radon(I)

    #plt.subplot(2, 2, 2)
    #plt.imshow(sinogram.T, aspect='auto')
    #plt.gray()

	# Find the RMS value of each row and find "busiest" rotation,
	# where the transform is lined up perfectly with the alternating dark
	# text and white lines
	r = array([rms_flat(line) for line in sinogram.transpose()])
	rotation = argmax(r)
	#print('Rotation: {:.2f} degrees'.format(90 - rotation))

	'''
	rotation = 90 - rotation
	rotation = -rotation
	print('{:.2f}'.format(rotation))
	'''

	print('{:.2f}'.format(-(90-rotation)))
	#plt.axhline(rotaotion, color='r')

	# Plot the busy row
	row = sinogram[:, rotation]
	N = len(row)
	#plt.subplot(2, 2, 3)
	#plt.plot(row)

	# Take spectrum of busy row and find line spacing
	window = blackman(N)
	spectrum = rfft(row * window)
	#plt.plot(row * window)
	frequency = argmax(abs(spectrum))
	line_spacing = N / frequency  # pixels
예제 #29
0
    def verify_results(self, key, thermodynamics, html_writer):
        """Calculate all the dG0_r for the reaction from NIST and compare to
           the measured data.
        
        Write results to HTML.
        
        Args:
            key: The name of this group of results.
            thermodynamics: a Thermodynamics object that provides dG estimates.
            html_writer: to write HTML.
            ignore_I: whether or not to ignore the ionic strength in NIST.
        """

        logging.info(
            "calculate the correlation between %s's predictions and the NIST database"
            % key)

        known_cid_set = thermodynamics.get_all_cids()
        dG0_obs_vec = []
        dG0_est_vec = []

        # A mapping from each evaluation method (NIST calls separates them to
        # A, B, C and D) to the results of the relevant measurements
        evaluation_map = {}
        total_list = []

        cid2count = {}
        for row_data in self.data:
            for cid in row_data.GetAllCids():
                cid2count[cid] = cid2count.setdefault(cid, 0) + 1

        for row_data in self.data:
            unknown_set = set(row_data.GetAllCids()).difference(known_cid_set)

            if unknown_set:
                logging.debug("a compound in (%s) doesn't have a dG0_f" %
                              row_data.origin)
                continue

            #label = row_data.evaluation
            label = row_data.K_type

            if label not in evaluation_map:
                evaluation_map[label] = ([], [])

            try:
                dG0_pred = row_data.PredictReactionEnergy(thermodynamics)
            except MissingCompoundFormationEnergy:
                logging.debug("a compound in (%s) doesn't have a dG0_f" %
                              row_data.origin)
                continue

            dG0_obs_vec.append(row_data.dG0_r)
            dG0_est_vec.append(dG0_pred)
            evaluation_map[label][0].append(row_data.dG0_r)
            evaluation_map[label][1].append(dG0_pred)
            n_measurements = min(
                [cid2count[cid] for cid in row_data.GetAllCids()])
            error = abs(row_data.dG0_r - dG0_pred)

            total_list.append([
                error, row_data.dG0_r, dG0_pred, row_data.sparse, row_data.pH,
                row_data.pMg, row_data.I, row_data.T, row_data.evaluation,
                n_measurements
            ])

        # plot the profile graph
        rcParams['text.usetex'] = False
        rcParams['legend.fontsize'] = 12
        rcParams['font.family'] = 'sans-serif'
        rcParams['font.size'] = 16
        rcParams['lines.linewidth'] = 2
        rcParams['lines.markersize'] = 3
        rcParams['figure.figsize'] = [8.0, 6.0]
        rcParams['figure.dpi'] = 100

        fig1 = figure()
        hold(True)

        colors = ['purple', 'orange', 'lightgreen', 'red', 'cyan']
        for e in sorted(evaluation_map.keys()):
            measured, predicted = evaluation_map[e]
            resid = np.array(measured) - np.array(predicted)
            label = '%s (N = %d, RMSE = %.2f [kJ/mol])' % (
                e, len(measured), rms_flat(resid.flat))
            c = colors.pop(0)
            plot(measured,
                 predicted,
                 marker='.',
                 linestyle='None',
                 markerfacecolor=c,
                 markeredgecolor=c,
                 markersize=5,
                 label=label)

        legend(loc='upper left')

        resid = np.array(dG0_obs_vec) - np.array(dG0_est_vec)
        rmse = rms_flat(resid.flat)
        title(r'N = %d, RMSE = %.1f [kJ/mol]' % (len(dG0_obs_vec), rmse),
              fontsize=14)
        xlabel(r'$\Delta_{obs} G^\circ$ [kJ/mol]', fontsize=14)
        ylabel(r'$\Delta_{est} G^\circ$ [kJ/mol]', fontsize=14)
        min_x = min(dG0_obs_vec)
        max_x = max(dG0_obs_vec)
        plot([min_x, max_x], [min_x, max_x], 'k--')
        axis([-60, 60, -60, 60])

        fig2 = figure()
        hist([(row[1] - row[2]) for row in total_list],
             bins=arange(-50, 50, 0.5))
        title(r'RMSE = %.1f [kJ/mol]' % rmse, fontsize=14)
        xlabel(r'$\Delta_{obs} G^\circ - \Delta_{est} G^\circ$ [kJ/mol]',
               fontsize=14)
        ylabel(r'no. of measurements', fontsize=14)

        fig3 = figure()
        plot([row[9] for row in total_list],
             [abs(row[1] - row[2]) for row in total_list], '.')
        title(
            r'The effect of the number of measurements on the estimation error'
            % rmse,
            fontsize=14)
        xlabel(r'minimum no. of measurements among reaction compounds',
               fontsize=14)
        ylabel(r'$|| \Delta_{obs} G^\circ - \Delta_{est} G^\circ ||$ [kJ/mol]',
               fontsize=14)
        xscale('log')

        html_writer.write("<h2>%s</h2>" % key)

        html_writer.embed_matplotlib_figure(fig1, width=400, height=300)
        html_writer.embed_matplotlib_figure(fig2, width=400, height=300)

        html_writer.write(
            '<input type="button" class="button" onclick="return toggleMe(\'%s\')" value="Show">\n'
            % (key))
        html_writer.write('<div id="%s" style="display:none">' % key)

        html_writer.embed_matplotlib_figure(fig3, width=400, height=300)

        table_headers = [
            "|error|", "dG0(obs)", "dG0(pred)", "reaction", "pH", "pMg", "I",
            "T", "evaluation", "min_num_measurements"
        ]
        html_writer.write("<table>\n")
        html_writer.write("<tr><td>" + "</td><td>".join(table_headers) +
                          "</td></tr>\n")

        for row in sorted(total_list, reverse=True):
            sparse_reaction = row[3]
            row[3] = self.kegg.sparse_to_hypertext(sparse_reaction,
                                                   show_cids=False)
            html_writer.write("<tr><td>" +
                              "</td><td>".join(["%.1f" % x for x in row[:3]] +
                                               [str(x) for x in row[3:]]) +
                              "</td></tr>\n")
        html_writer.write("</table>\n")
        html_writer.write("</div><br>\n")
    def Loo(self, no_anchoring=True):
        n = self.S.shape[1]
        dG0_r_ugc = np.matrix(np.zeros((3, n))) * np.nan
        dG0_r_pgc = np.matrix(np.zeros((1, n))) * np.nan

        rowdicts = []
        class2ugc_err = defaultdict(list)
        class2pgc_err = defaultdict(list)
        for i in xrange(n):
            if self.obs_types[i] != 'reaction':
                continue
            if self.anchored[0, i]:
                continue
            if abs(self.S[:, i]).sum(0) < self.epsilon: # empty reaction
                continue

            no_i = range(0, i) + range(i+1, n)
            obs_S = self.S[:, no_i].copy()
            obs_anchored = self.anchored[0, no_i]
            if no_anchoring:
                obs_anchored = obs_anchored * 0;
            
            obs_b = self.b[:, no_i].copy()
            est_S = self.S[:, i].copy()
            dG0_r_ugc[:, i], parts, dG0_r_pgc[0, i] = self._GetChemicalReactionEnergies(
                obs_S, self.cids, obs_b, obs_anchored, est_S, self.cids)

            if parts[3, 0] > self.epsilon:
                classification = 'kernel'
            elif parts[1, 0] > self.epsilon and parts[2, 0] > self.epsilon:
                classification = 'PRC + PGC'
            elif parts[1, 0] > self.epsilon:
                classification = 'PRC'
            elif parts[2, 0] > self.epsilon:
                classification = 'PGC'
            else:
                classification = 'anchored'
            
            est_b = float(dG0_r_ugc[:, i].sum(0))
            ugc_err = self.b[0, i] - est_b
            class2ugc_err[classification].append(ugc_err)
            
            pgc_err = self.b[0, i] - dG0_r_pgc[0, i]
            class2pgc_err[classification].append(pgc_err)

            rowdict = {}
            rowdict['row'] = i
            rowdict['type'] = self.obs_types[i]
            rowdict['reaction'] = UnifiedGroupContribution.row2hypertext(self.S[:, i], self.cids)
            rowdict['obs'] = self.b[0, i]
            rowdict['est'] = est_b
            rowdict['est(PGC)'] = dG0_r_pgc[0, i]
            if np.isfinite(ugc_err):
                rowdict['|err|'] = abs(ugc_err)
            else:
                rowdict['|err|'] = 0
            rowdict['est_ANCH'] = dG0_r_ugc[0, i]
            rowdict['est_PRC'] = dG0_r_ugc[1, i]
            rowdict['est_PGC'] = dG0_r_ugc[2, i]
            rowdict['part_ANCH'] = parts[0, 0]
            rowdict['part_PRC'] = parts[1, 0]
            rowdict['part_PGC'] = parts[2, 0]
            rowdict['part_NULL'] = parts[3, 0]
            rowdict['class'] = classification
            rowdicts.append(rowdict)
            
        class_errors = []
        for classification in class2ugc_err.keys():
            ugc_err_list = class2ugc_err[classification]
            pgc_err_list = class2pgc_err[classification]
            class_errors.append('%s: N = %d, rmse(UGC) = %.1f kJ/mol, rmse(PGC) = %.1f kJ/mol' % 
                                (classification, len(ugc_err_list),
                                 rms_flat(ugc_err_list), rms_flat(pgc_err_list)))
        
        self.Report(dG0_r_ugc.sum(0), 'UGC - Leave one out')
        self.Report(dG0_r_pgc, 'PGC - Leave one out')

        rowdicts.sort(key=lambda x:x['|err|'], reverse=True)            
        self.html_writer.write('<h2>Linear Regression Leave-One-Out Analysis</h2>\n')
        self.html_writer.insert_toggle(start_here=True, label="Show table")
        self.html_writer.write_ul(class_errors)
        self.html_writer.write_table(rowdicts,
            headers=['row', 'type', 'reaction', 'class', 'obs', 'est',
                     'est(PGC)', '|err|', 'est_ANCH', 'est_PRC', 'est_PGC',
                     'part_ANCH', 'part_PRC', 'part_PGC', 'part_NULL'], decimal=1)
        self.html_writer.div_end()
예제 #31
0
    def AnalyzeTrainingSet(self, skip_formations=True):
        n_obs = self.group_matrix.shape[1]
        rowdicts = []
        fit_results = np.dot(self.group_contributions, self.group_matrix)
        residuals = fit_results - self.obs_values

        if self.transformed:
            sym = symbol_d_G0_prime
        else:
            sym = symbol_d_G0
        for i in xrange(n_obs):
            if self.obs_types[i] in [
                    KeggObservation.TYPE_ACID_BASE, KeggObservation.TYPE_MG,
                    KeggObservation.TYPE_REDOX
            ]:
                continue
            if skip_formations and self.obs_types[
                    i] == KeggObservation.TYPE_FORMATION:
                continue

            rowdict = {'Observation': self.obs_ids[i]}
            rowdict[sym + ' (obs)'] = self.obs_values[0, i]
            rowdict[sym + ' (fit)'] = fit_results[0, i]
            rowdict[sym + ' (res)'] = residuals[0, i]
            rowdict['LOO ' + sym + ' (fit)'] = np.nan
            rowdict['LOO ' + sym + ' (res)'] = np.nan
            rowdict['sortkey'] = 0
            rowdicts.append(rowdict)
            logging.info('Fit Error = %.1f' % residuals[0, i])

            # leave out the row corresponding with observation 'i'
            logging.info('Cross validation, leaving-one-out: ' +
                         self.obs_ids[i])
            subset = range(n_obs)
            subset.pop(i)
            loo_group_contributions, loo_nullspace = LinearRegression.LeastSquares(
                self.group_matrix[:, subset], self.obs_values[:, subset])

            if loo_nullspace.shape[1] > self.group_nullspace.shape[1]:
                logging.warning(
                    'example %d is not linearly dependent in the other examples'
                    % i)
                continue
            rowdict['LOO ' + sym + ' (fit)'] = float(
                np.dot(loo_group_contributions, self.group_matrix[:, i]))
            rowdict['LOO ' + sym + ' (res)'] = \
                rowdict['LOO ' + sym + ' (fit)'] - self.obs_values[0, i]
            rowdict['sortkey'] = abs(rowdict['LOO ' + sym + ' (res)'])
            logging.info('LOO Error = %.1f' % rowdict['LOO ' + sym + ' (res)'])

        logging.info(
            "writing the table of estimation errors for each compound")
        self.html_writer.write('</br><b>Cross validation table</b>')
        self.html_writer.insert_toggle(start_here=True)
        self.html_writer.write('<font size="1">\n')
        obs_vec = np.matrix([row[sym + ' (obs)'] for row in rowdicts])
        resid_vec = np.matrix([row[sym + ' (res)'] for row in rowdicts])
        rmse = rms_flat(resid_vec.flat)

        loo_resid_vec = np.matrix(
            [row['LOO ' + sym + ' (res)'] for row in rowdicts])
        loo_rmse = rms_flat(loo_resid_vec[np.isfinite(loo_resid_vec)].flat)

        self.html_writer.write_ul([
            'fit RMSE = %.1f [kJ/mol]' % rmse,
            'leave-one-out RMSE = %.1f [kJ/mol]' % loo_rmse
        ])
        logging.info("Goodness of fit: RMSE = %.1f [kJ/mol]" % rmse)
        logging.info("Leave-one-out test: RMSE = %.1f [kJ/mol]" % loo_rmse)

        headers = [
            'Observation', sym + ' (obs)', sym + ' (fit)', sym + ' (res)',
            'LOO ' + sym + ' (fit)', 'LOO ' + sym + ' (res)'
        ]
        rowdicts.sort(key=lambda (x): x['sortkey'], reverse=True)
        self.html_writer.write_table(rowdicts, headers, decimal=1)
        self.html_writer.write('</font>\n')
        self.html_writer.div_end()

        self.html_writer.write('</br><b>Cross-validation figure</b>')
        self.html_writer.insert_toggle(start_here=True)

        obs_vs_err_fig = plt.figure(figsize=[6.0, 6.0], dpi=100)
        plt.plot(obs_vec.T, resid_vec.T, '.')
        plt.xlabel('Observation')
        plt.ylabel('Estimated (PGC) Residuals')
        plt.hold(True)
        for row in rowdicts:
            if abs(row[sym + ' (res)']) > 2 * rmse:
                plt.text(row[sym + ' (obs)'],
                         row[sym + ' (res)'],
                         row['Observation'],
                         fontsize=4,
                         figure=obs_vs_err_fig)
        plt.title('Observed vs. Fitted (PGC) Residuals', figure=obs_vs_err_fig)
        self.html_writer.embed_matplotlib_figure(obs_vs_err_fig)
        self.html_writer.div_end()
예제 #32
0
파일: nist.py 프로젝트: issfangks/milo-lab
    def verify_results(self, html_writer, thermodynamics, name=None):
        """Calculate all the dG0_r for the reaction from NIST and compare to
           the measured data.
        
        Write results to HTML.
        
        Args:
            thermodynamics: a Thermodynamics object that provides dG estimates.
            ignore_I: whether or not to ignore the ionic strength in NIST.
        """
        
        dG0_obs_vec = []
        dG0_est_vec = []
       
        # A mapping from each evaluation method (NIST calls separates them to
        # A, B, C and D) to the results of the relevant measurements
        evaluation_map = {}
        rowdicts = []
        finite_rowdicts = []
        
        eval_to_label = {'A':'high quality',
                         'B':'low quality',
                         'C':'low quality',
                         'D':'low quality',
                         'E':'low quality'}
        
        for row_data in self.SelectRowsFromNist():
            rowdict = {}
            label = eval_to_label[row_data.evaluation]
            if label not in evaluation_map:
                evaluation_map[label] = ([], [])
            rowdict[symbol_dr_G0_prime + ' (obs)'] = np.round(row_data.dG0_r, 1)
            rowdict['_reaction'] = row_data.reaction
            rowdict['reaction'] = row_data.reaction.to_hypertext(show_cids=False)
            if row_data.reaction.rid is not None:
                rowdict['rid'] = '<a href="%s">R%05d</a>' % (row_data.reaction.get_link(), row_data.reaction.rid)
            else:
                rowdict['rid'] = ''
            rowdict['pH'] = row_data.pH
            rowdict['pMg'] = row_data.pMg
            rowdict['I'] = row_data.I
            rowdict['T'] = row_data.T
            rowdict['eval.'] = row_data.evaluation
            rowdict['url'] = '<a href="%s">%s</a>' % (row_data.url, row_data.ref_id)

            dG0_est = row_data.PredictReactionEnergy(thermodynamics)
            if np.isfinite(dG0_est):
                dG0_obs_vec.append(row_data.dG0_r)
                dG0_est_vec.append(dG0_est)
                evaluation_map[label][0].append(row_data.dG0_r)
                evaluation_map[label][1].append(dG0_est)
                rowdict[symbol_dr_G0_prime + ' (est)'] = np.round(dG0_est, 1)
                rowdict['residual'] = np.round(row_data.dG0_r - dG0_est, 3)
                rowdict['|error|'] = abs(rowdict['residual'])
                rowdict['sort_key'] = -rowdict['|error|']
                finite_rowdicts.append(rowdict)
            else:
                rowdict['sort_key'] = 1
            
            rowdicts.append(rowdict)
        
        rowdicts.sort(key=lambda x:x['sort_key'])
        
        if not dG0_obs_vec:
            return 0, 0

        unique_reaction_dict = defaultdict(list)
        for rowdict in finite_rowdicts:
            unique_reaction_dict[rowdict['_reaction']].append(rowdict['|error|'])
        unique_rmse_list = [rms_flat(error_list)
                            for error_list in unique_reaction_dict.values()]
        unique_rmse = rms_flat(unique_rmse_list)
        
        resid = np.array(dG0_obs_vec) - np.array(dG0_est_vec)
        rmse = rms_flat(resid.flat)

        # plot the profile graph
        plt.rcParams['text.usetex'] = False
        plt.rcParams['legend.fontsize'] = 10
        plt.rcParams['font.family'] = 'sans-serif'
        plt.rcParams['font.size'] = 12
        plt.rcParams['lines.linewidth'] = 1
        plt.rcParams['lines.markersize'] = 3
        
        fig1 = plt.figure(figsize=(6,6), dpi=90)
        plt.hold(True)
        
        colors = ['purple', 'orange']
        for i, label in enumerate(sorted(evaluation_map.keys())):
            measured, predicted = evaluation_map[label]
            plt.plot(measured, predicted, marker='.', linestyle='None', 
                       markerfacecolor=colors[i], markeredgecolor=colors[i], 
                       markersize=5, label=label, figure=fig1)
        
        plt.legend(loc='lower right')
        
        plt.text(-50, 40, r'RMSE = %.1f [kJ/mol]' % (unique_rmse), fontsize=14,
                 figure=fig1)
        plt.xlabel(r'observed $\Delta_r G^{\'\circ}$ [kJ/mol]', fontsize=14, figure=fig1)
        plt.ylabel(r'estimated $\Delta_r G^{\'\circ}$ [kJ/mol]', fontsize=14, figure=fig1)
        #min_x = min(dG0_obs_vec)
        #max_x = max(dG0_obs_vec)
        plt.plot([-60, 60], [-60, 60], 'k--', figure=fig1)
        plt.axis([-60, 60, -60, 60])
        if name:
            html_writer.embed_matplotlib_figure(fig1, name=name+"_eval")
        else:
            html_writer.embed_matplotlib_figure(fig1)
        
        fig2 = plt.figure(figsize=(6,6), dpi=90)
        binned_plot(x=[rowdict['pH'] for rowdict in finite_rowdicts],
                    y=[rowdict['|error|'] for rowdict in finite_rowdicts],
                    bins=[5,6,7,8,9],
                    y_type='rmse',
                    figure=fig2)
        plt.xlim((4, 11))
        plt.ylim((0, 12))
        plt.title(r'effect of pH', fontsize=14, figure=fig2)
        plt.xlabel('pH', fontsize=14, figure=fig2)
        plt.ylabel(r'RMSE ($\Delta_r G^{\'\circ}$) [kJ/mol]', 
                   fontsize=14, figure=fig2)
        if name:
            html_writer.embed_matplotlib_figure(fig2, name=name+"_pH")
        else:
            html_writer.embed_matplotlib_figure(fig2)
        
        fig3 = plt.figure(figsize=(6,6), dpi=90)
        plt.hist([rowdict['residual'] for rowdict in finite_rowdicts],
                 bins=np.arange(-50, 50, 0.5))
        plt.title(r'RMSE = %.1f [kJ/mol]' % rmse, fontsize=14, figure=fig3)
        plt.xlabel(r'residual $\Delta_r G^{\'\circ}$ [kJ/mol]',
                   fontsize=14, figure=fig3)
        plt.ylabel(r'no. of measurements', fontsize=14, figure=fig3)
        if name:
            html_writer.embed_matplotlib_figure(fig3, name=name+"_hist")
        else:
            html_writer.embed_matplotlib_figure(fig3)

        table_headers = ["#", "|error|",
                         symbol_dr_G0_prime + " (obs)",
                         symbol_dr_G0_prime + " (est)",
                         "reaction", "rid", "pH", "pMg", "I", "T",
                         "eval.", "url"]
        html_writer.write_table(rowdicts, table_headers, decimal=1)
        
        return len(dG0_obs_vec), unique_rmse
예제 #33
0
def show_rotation_spacing(filename):
    """
    Automatically detect rotation and line spacing of an image of text using
    Radon transform
    If image is rotated by the inverse of the output, the lines will be
    horizontal (though they may be upside-down depending on the original image)
    It doesn't work with black borders
    """

    # from __future__ import division, print_function
    from skimage.transform import radon
    from PIL import Image
    from numpy import asarray, mean, array, blackman
    import numpy
    from numpy.fft import rfft
    import matplotlib.pyplot as plt
    from matplotlib.mlab import rms_flat
    try:
        # More accurate peak finding from
        # https://gist.github.com/endolith/255291#file-parabolic-py
        from parabolic import parabolic

        def argmax(x):
            return parabolic(x, numpy.argmax(x))[0]
    except ImportError:
        from numpy import argmax

    # Load file, converting to grayscale
    I = asarray(Image.open(filename).convert('L'))
    I = I - mean(I)  # Demean; make the brightness extend above and below zero
    plt.subplot(2, 2, 1)
    plt.imshow(I)

    # Do the radon transform and display the result
    sinogram = radon(I)

    plt.subplot(2, 2, 2)
    plt.imshow(sinogram.T, aspect='auto')
    plt.gray()

    # Find the RMS value of each row and find "busiest" rotation,
    # where the transform is lined up perfectly with the alternating dark
    # text and white lines
    r = array([rms_flat(line) for line in sinogram.transpose()])
    rotation = argmax(r)
    print('Rotation: {:.2f} degrees'.format(90 - rotation))
    plt.axhline(rotation, color='r')

    # Plot the busy row
    row = sinogram[:, rotation]
    N = len(row)
    plt.subplot(2, 2, 3)
    plt.plot(row)

    # Take spectrum of busy row and find line spacing
    window = blackman(N)
    spectrum = rfft(row * window)
    plt.plot(row * window)
    frequency = argmax(abs(spectrum))
    line_spacing = N / frequency  # pixels
    print('Line spacing: {:.2f} pixels'.format(line_spacing))

    plt.subplot(2, 2, 4)
    plt.plot(abs(spectrum))
    plt.axvline(frequency, color='r')
    plt.yscale('log')
    plt.show()
예제 #34
0
파일: nist.py 프로젝트: issfangks/milo-lab
    def two_way_comparison(self, html_writer, thermo1, thermo2, name=None):
        """
            Compare the estimation errors of two different evaluation methods.
        
        Write results to HTML.
        
        Args:
            thermo1: a Thermodynamics object that provides dG estimates.
            thermo2: a Thermodynamics object that provides dG estimates.
        """
        
        total_list = []
        
        for row_data in self.SelectRowsFromNist():
            try:
                dG0_pred1 = row_data.PredictReactionEnergy(thermo1)
                dG0_pred2 = row_data.PredictReactionEnergy(thermo2)
            except MissingReactionEnergy as e:
                logging.debug("the reaction in (%s) cannot be estimated: %s" % (row_data.ref_id, str(e)))
                continue
                
            total_list.append([row_data.dG0_r, dG0_pred1, dG0_pred2, 
                               row_data.reaction, row_data.pH, row_data.pMg, 
                               row_data.I, row_data.T, row_data.evaluation, 
                               row_data.url])
        
        if not total_list:
            return 0, 0
        
        # plot the profile graph
        plt.rcParams['text.usetex'] = False
        plt.rcParams['font.family'] = 'sans-serif'
        plt.rcParams['font.size'] = 8
        plt.rcParams['lines.linewidth'] = 2
        plt.rcParams['lines.markersize'] = 2
        plt.rcParams['figure.dpi'] = 100
        
        data_mat = np.array(total_list)
        fig1 = plt.figure(figsize=(4,4))
        plt.hold(True)
        error1 = data_mat[:,0]-data_mat[:,1]
        error2 = data_mat[:,0]-data_mat[:,2]
        
        max_err = max(error1.max(), error2.max())
        min_err = min(error1.min(), error2.min())
        plt.plot([min_err, max_err], [min_err, max_err], 'k--', figure=fig1)
        plt.plot(error1, error2, '.', figure=fig1)
        plt.title("Error Comparison per Reaction (in kJ/mol)")
        plt.xlabel(thermo1.name, figure=fig1)
        plt.ylabel(thermo2.name, figure=fig1)
        html_writer.embed_matplotlib_figure(fig1, name=name+"_corr")
        
        fig2 = plt.figure(figsize=(7,3))
        for i, thermo in enumerate([thermo1, thermo2]):
            fig2.add_subplot(1,2,i+1)
            plt.plot(data_mat[:,0], data_mat[:,i+1], 'b.')
            rmse = rms_flat((data_mat[:,0] - data_mat[:,i+1]).flat)
            plt.text(-50, 40, r'RMSE = %.1f [kJ/mol]' % (rmse))
            plt.xlabel(r'observed $\Delta G_r^\circ$ from NIST [kJ/mol]')
            plt.ylabel(r'estimated $\Delta G_r^\circ$ using %s [kJ/mol]' % thermo.name)
            plt.plot([-60, 60], [-60, 60], 'k--')
            plt.axis([-60, 60, -60, 60])
        
        html_writer.embed_matplotlib_figure(fig2, name=name+"_eval")

        table_headers = ["dG'0 (obs)", "dG'0 (%s)" % thermo1.name, 
                         "dG'0 (%s)" % thermo2.name, "reaction", "rid", "pH", 
                         "pMg", "I", "T", "eval.", "url"]
        dict_list = []
        for row in sorted(total_list, key=lambda(x):abs(x[1]-x[2]), reverse=True):
            d = {}
            d["dG'0 (obs)"] = '%.1f' % row[0]
            d["dG'0 (%s)" % thermo1.name] = '%.1f' % row[1]
            d["dG'0 (%s)" % thermo2.name] = '%.1f' % row[2]
            d['reaction'] = row[3].to_hypertext(show_cids=False)
            if row[3].rid is not None:
                d['rid'] = '<a href="%s">R%05d</a>' % (row[3].get_link(), row[3].rid)
            else:
                d['rid'] = ''
            d['pH'] = '%.1f' % row[4]
            d['pMg'] = '%.1f' % row[5]
            d['I'] = '%.2f' % row[6]
            d['T'] = '%.1f' % row[7]
            d['eval.'] = row[8]
            if row[9]:
                d['url'] = '<a href="%s">link</a>' % row[9]
            else:
                d['url'] = ''
            dict_list.append(d)
        html_writer.write_table(dict_list, table_headers)
예제 #35
0
I = asarray(Image.open(filename).convert('L'))
I = I - mean(I)  # Demean; make the brightness extend above and below zero
plt.subplot(2, 2, 1)
plt.imshow(I)

# Do the radon transform and display the result
sinogram = radon(I)

plt.subplot(2, 2, 2)
plt.imshow(sinogram.T, aspect='auto')
plt.gray()

# Find the RMS value of each row and find "busiest" rotation,
# where the transform is lined up perfectly with the alternating dark
# text and white lines
r = array([rms_flat(line) for line in sinogram.transpose()])
rotation = argmax(r)
print('Rotation: {:.2f} degrees'.format(90 - rotation))
plt.axhline(rotation, color='r')

# Plot the busy row
row = sinogram[:, rotation]
N = len(row)
plt.subplot(2, 2, 3)
plt.plot(row)

# Take spectrum of busy row and find line spacing
window = blackman(N)
spectrum = rfft(row * window)
plt.plot(row * window)
frequency = argmax(abs(spectrum))
예제 #36
0
def main():
    kegg = Kegg.getInstance()
    prefix = "../res/prc_"

    fixed_cids = {}  # a dictionary from CID to pairs of (nH, dG0)

    # Alberty formation energies directly measured, linearly independent:
    fixed_cids[1] = (2, -237.19)  # H2O
    fixed_cids[9] = (1, -1096.1)  # HPO3(-2)
    fixed_cids[14] = (4, -79.31)  # NH4(+1)
    fixed_cids[59] = (0, -744.53)  # SO4(-2)
    fixed_cids[288] = (1, -586.77)  # HCO3(-1)

    # Alberty zeros:
    fixed_cids[3] = (26, 0.0)  # NAD(ox)
    fixed_cids[10] = (32, 0.0)  # CoA
    fixed_cids[127] = (30, 0.0)  # glutathione(ox)
    fixed_cids[376] = (28, 0.0)  # retinal(ox)

    # Directly measured values
    fixed_cids[4] = (27, 22.65)  # NAD(red) -- relative to NAD(ox)
    fixed_cids[212] = (13, -194.5)  # adenosine
    # fixed_cids[294] = (12, -409.2) # inosine - linearly dependent on other 'anchors'

    # Alberty zeros which are not in NIST:
    # fixed_cids[524] = ( 0, 0.0) # cytochrome c(ox)
    # fixed_cids[16]  = (31, 0.0) # FAD(ox)
    # fixed_cids[139] = ( 0, 0.0) # ferredoxin(ox)
    # fixed_cids[61]  = (19, 0.0) # FMN(ox)
    # fixed_cids[343] = ( 0, 0.0) # thioredoxin(ox)
    # fixed_cids[399] = (90, 0.0) # ubiquinone(ox)

    public_db = SqliteDatabase("../data/public_data.sqlite")
    alberty = PsuedoisomerTableThermodynamics.FromDatabase(
        public_db, "alberty_pseudoisomers", label=None, name="Alberty"
    )
    alberty_cid2dG0 = {}
    alberty_cid2nH = {}
    for cid in alberty.get_all_cids():
        pmap = alberty.cid2PseudoisomerMap(cid)
        dG0, _dG0_tag, nH, _z, _nMg = pmap.GetMostAbundantPseudoisomer(
            pH=default_pH, I=default_I, pMg=default_pMg, T=default_T
        )
        alberty_cid2nH[cid] = nH
        alberty_cid2dG0[cid] = dG0

    if not os.path.exists(prefix + "S.txt"):
        db = SqliteDatabase("../res/gibbs.sqlite")
        nist_regression = NistRegression(db)

        cid2nH = {}
        for cid in nist_regression.nist.GetAllCids():
            if cid in fixed_cids:
                cid2nH[cid] = fixed_cids[cid][0]
            elif cid in alberty_cid2nH:
                cid2nH[cid] = alberty_cid2nH[cid]
            else:
                tmp = nist_regression.dissociation.GetMostAbundantPseudoisomer(
                    cid, pH=default_pH, I=default_I, pMg=default_pMg, T=default_T
                )
                if tmp is not None:
                    cid2nH[cid] = tmp[0]
                else:
                    logging.warning(
                        "The most abundant pseudoisomer of %s (C%05d) "
                        "cannot be resolved. Using nH = 0." % (kegg.cid2name(cid), cid)
                    )
                    cid2nH[cid] = 0

        # nist_regression.std_diff_threshold = 2.0 # the threshold over which to print an analysis of a reaction
        # nist_regression.nist.T_range = None#(273.15 + 24, 273.15 + 40)
        S, dG0, cids = nist_regression.ReverseTransform(cid2nH=cid2nH)

        # export the raw data matrices to text files

        C = np.array([[cid, cid2nH.get(cid, 0)] for cid in cids])
        np.savetxt(prefix + "CID.txt", C, fmt="%d", delimiter=",")
        np.savetxt(prefix + "S.txt", S, fmt="%g", delimiter=",")
        np.savetxt(prefix + "dG0.txt", dG0, fmt="%.2f", delimiter=",")
    else:
        C = np.loadtxt(prefix + "CID.txt", delimiter=",")
        cids = [int(cid) for cid in C[:, 0]]
        cid2nH = {}
        for i, cid in enumerate(cids):
            cid2nH[cid] = int(C[i, 1])
        S = np.loadtxt(prefix + "S.txt", delimiter=",")
        dG0 = np.loadtxt(prefix + "dG0.txt", delimiter=",")
        dG0 = np.reshape(dG0, (dG0.shape[0], 1))

    html_writer = HtmlWriter("../res/regression_fast.html")
    html_writer.write("<h1>Pseudoisomeric Reactant Contributions</h1>\n")
    html_writer.write("<p>The stoichiometric matrix (S):")
    html_writer.insert_toggle(start_here=True)
    stoichiometric_matrix2html(html_writer, S, cids)
    html_writer.div_end()
    html_writer.write("</p>")

    index2value = {}
    S_extended = S  # the stoichiometric matrix, extended with elementary basis vector for the fixed compounds
    for cid in fixed_cids.keys():
        i = cids.index(cid)
        e_i = np.zeros((1, len(cids)))
        e_i[0, i] = 1.0
        S_extended = np.vstack([S_extended, e_i])
        nH, dG0_fixed = fixed_cids[cid]
        index2value[i] = dG0_fixed

    x, _K = LinearRegression.LeastSquaresWithFixedPoints(S, dG0, index2value)
    cid2dG0 = {}
    for i, cid in enumerate(cids):
        cid2dG0[cid] = x[i]

    # Calculate the Kernel of the reduced stoichiometric matrix (after removing
    # the columns of the fixed compounds).
    cids_red = [cid for cid in cids if cid not in fixed_cids]
    index_red = [i for i in xrange(len(cids)) if i not in index2value]
    S_red = S[:, index_red]
    K_red = LinearRegression.Kernel(S_red)

    # print "Reduced Stoichiometric Matrix:"
    # print matrix2string(S_red, cids_red, kegg)
    # print '-'*80

    # Find all CIDs that are completely determined and do not depend on any
    # free variable. In other words, all zeros columns in K2.
    dict_list = []

    determined_indices = np.where(np.sum(abs(K_red), 0) < 1e-10)[0]  # all zero-columns in reducedK
    determined_cids = [cids_red[i] for i in determined_indices]
    plot_data = []
    for i, cid in enumerate(cids):
        d = {
            "CID": "C%05d" % cid,
            "Compound": kegg.cid2name(cid),
            "nH": "%d" % cid2nH[cid],
            "dG0 (PRC)": "%.1f" % cid2dG0[cid],
        }
        if cid in alberty_cid2dG0:
            d["dG0 (Alberty)"] = "%.1f" % alberty_cid2dG0[cid]
            if cid not in fixed_cids:
                plot_data.append((alberty_cid2dG0[cid], cid2dG0[cid], kegg.cid2name(cid)))
        else:
            d["dG0 (Alberty)"] = ""

        if cid in fixed_cids:
            d["Depends on"] = "anchored"
        elif cid in determined_cids:
            d["Depends on"] = "fixed compounds"
        else:
            d["Depends on"] = "kernel dimensions"

        dict_list.append(d)

    dict_list.sort(key=lambda (x): (x["Depends on"], x["CID"]))
    html_writer.write("<p>Formation energies determined by the linear constraints:")
    html_writer.insert_toggle(start_here=True)
    html_writer.write('<font size="1">')
    html_writer.write_table(
        dict_list, headers=["#", "Compound", "CID", "nH", "dG0 (PRC)", "dG0 (Alberty)", "Depends on"]
    )
    html_writer.write("</font>")
    html_writer.div_end()
    html_writer.write("</p>")

    # Plot a comparison between PRC and Alberty formation energies
    fig = plt.figure(figsize=(8, 8), dpi=80)
    plt.plot([x[0] for x in plot_data], [x[1] for x in plot_data], "b.", figure=fig)
    for x, y, name in plot_data:
        plt.text(x, y, name, fontsize=6)
    plt.xlabel("Alberty $\Delta_f G^\circ$")
    plt.ylabel("PRC $\Delta_f G^\circ$")
    html_writer.write("<p>Plot comparing PRC and Alberty results:")
    html_writer.insert_toggle(start_here=True)
    html_writer.embed_matplotlib_figure(fig)
    html_writer.div_end()
    html_writer.write("</p>")

    K_sparse = SparseKernel(S_red).Solve()
    html_writer.write("<p>The sparse null-space of the reduced stoichiometric matrix:")
    html_writer.insert_toggle(start_here=True)
    stoichiometric_matrix2html(html_writer, K_sparse, cids_red)
    html_writer.div_end()
    html_writer.write("</p>")

    dict_list = []
    index2string_html = dict((i, "V<sub>%02d</sub>" % i) for i in xrange(K_sparse.shape[0]))
    index2string = dict((i, "V%d" % i) for i in xrange(K_sparse.shape[0]))
    for i, cid in enumerate(cids_red):
        d = {}
        d["KEGG ID"] = '<a href="%s">C%05d</a>' % (kegg.cid2link(cid), cid)
        d["KEGG ID plain"] = "C%05d" % cid
        d["Compound"] = kegg.cid2name(cid)
        d["nH"] = "%d" % cid2nH[cid]

        if cid in alberty_cid2dG0:
            d["dG0 (Alberty)"] = "%.1f" % alberty_cid2dG0[cid]
        else:
            d["dG0 (Alberty)"] = ""

        d["dG0 (PRC)"] = "%.1f" % cid2dG0[cid]
        d["dG0 (PRC) plain"] = "%.1f" % cid2dG0[cid]

        indic = np.where(abs(K_sparse[:, i]) > 1e-10, 1, 0).tolist()
        indic.reverse()
        d["order_key"] = indic
        if mlab.rms_flat(K_sparse[:, i]) > 1e-10:
            d["dG0 (PRC)"] += " + (" + vector2string(K_sparse[:, i], index2string_html) + ")"
            d["dG0 (PRC) plain"] += " + (" + vector2string(K_sparse[:, i], index2string) + ")"
        dict_list.append(d)

    dict_list.sort(key=lambda (d): (d["order_key"], d["KEGG ID plain"]))

    # Export the results to CSV
    csv_writer = csv.writer(open("../res/prc_results.csv", "w"))
    csv_writer.writerow(["KEGG ID", "Compound", "nH", "dG0 (PRC)", "dG0 (Alberty)"])
    for d in dict_list:
        csv_writer.writerow([d["KEGG ID plain"], d["Compound"], d["nH"], d["dG0 (PRC) plain"], d["dG0 (Alberty)"]])

    html_writer.write("<p>All formation energies as a function of the free variables:")
    html_writer.insert_toggle(start_here=True)
    html_writer.write('<font size="1">')
    html_writer.write_table(dict_list, headers=["#", "KEGG ID", "Compound", "nH", "dG0 (PRC)", "dG0 (Alberty)"])
    html_writer.write("</font>")
    html_writer.div_end()
    html_writer.write("</p>")

    fp = open("../res/prc_latex.txt", "w")
    fp.write(
        latex.table2LaTeX(
            dict_list, headers=["#", "KEGG ID plain", "Compound", "nH", "dG0 (PRC) plain", "dG0 (Alberty)"]
        )
    )
    fp.close()
예제 #37
0
data = np.loadtxt(DATA_FNAME, dtype='float', delimiter=',')

#plt.plot(data[:, 0], data[:, 1], '.')

feist_idx = set(np.nonzero(np.isfinite(data[:, 1]))[0].flat)
ugcm_idx = set(np.nonzero(np.isfinite(data[:, 2]))[0].flat)
nist_idx = set(np.nonzero(np.isfinite(data[:, 3]))[0].flat)

comp_idx = list(feist_idx.intersection(ugcm_idx).intersection(nist_idx))
minG, maxG = (np.min(data[comp_idx, 0]), np.max(data[comp_idx, 0]))

plt.figure(figsize=(10, 5), dpi=90)
plt.subplot(1,2,1)
err_feist_nist = data[comp_idx, 1] - data[comp_idx, 3]
rms_feist_nist = rms_flat(err_feist_nist)
plt.plot(data[comp_idx, 1], data[comp_idx, 3], '.g')
plt.plot([minG, maxG], [minG, maxG], ':k')
plt.ylabel('TECRDB observation [kJ/mol]')
plt.xlabel('value in iAF1260 [kJ/mol]')
plt.title('N = %d, RMSE = %.1f [kJ/mol]' % (len(comp_idx), rms_feist_nist))

plt.subplot(1,2,2)
err_ugcm_nist = data[comp_idx, 2] - data[comp_idx, 3]
rms_ugcm_nist = rms_flat(err_ugcm_nist)
plt.plot(data[comp_idx, 2], data[comp_idx, 3], '.g')
plt.plot([minG, maxG], [minG, maxG], ':k')
plt.ylabel('TECRDB observation [kJ/mol]')
plt.xlabel('UGCM estimation [kJ/mol]')
plt.title('N = %d, RMSE = %.1f [kJ/mol]' % (len(comp_idx), rms_ugcm_nist))
plt.tight_layout()
예제 #38
0
def two_way_comparison(html_writer,
                       thermo1,
                       thermo2,
                       reaction_list,
                       name=None):
    """
        Compare the estimation errors of two different evaluation methods.
    
    Write results to HTML.
    
    Args:
        thermo1: a Thermodynamics object that provides dG estimates.
        thermo2: a Thermodynamics object that provides dG estimates.
    """
    pH, pMg, I, T = (7, 14, 0.1, 298.15)

    total_list = []

    for reaction in reaction_list:
        try:
            dG0_pred1 = reaction.PredictReactionEnergy(thermo1,
                                                       pH=pH,
                                                       pMg=pMg,
                                                       I=I,
                                                       T=T)
            dG0_pred2 = reaction.PredictReactionEnergy(thermo2,
                                                       pH=pH,
                                                       pMg=pMg,
                                                       I=I,
                                                       T=T)
        except MissingReactionEnergy:
            continue

        total_list.append([dG0_pred1, dG0_pred2, reaction])

    if not total_list:
        return 0, 0

    # plot the profile graph
    plt.rcParams['text.usetex'] = False
    plt.rcParams['font.family'] = 'sans-serif'
    plt.rcParams['font.size'] = 8
    plt.rcParams['lines.linewidth'] = 2
    plt.rcParams['lines.markersize'] = 2
    plt.rcParams['figure.dpi'] = 100

    data_mat = np.array([(x[0], x[1]) for x in total_list])
    non_nan = list(np.isfinite(data_mat.sum(1)).nonzero()[0].flat)

    fig2 = plt.figure(figsize=(5, 5))
    plt.plot(data_mat[non_nan, 0], data_mat[non_nan, 1], 'b.')
    rmse = rms_flat((data_mat[non_nan, 0] - data_mat[non_nan, 1]).flat)
    plt.text(-50, 40, r'RMSE = %.1f [kJ/mol]' % (rmse))
    plt.xlabel(r'$\Delta G_r^\circ$ from %s [kJ/mol]' % thermo1.name)
    plt.ylabel(r'$\Delta G_r^\circ$ from %s [kJ/mol]' % thermo2.name)
    plt.plot([-200, 200], [-200, 200], 'k--')
    plt.axis([-200, 200, -200, 200])

    html_writer.embed_matplotlib_figure(fig2, name=name + "_eval")

    table_headers = ["#", '|diff|', "dG'0 (%s)" % thermo1.name,
                     "dG'0 (%s)" % thermo2.name,\
                     "reaction", "rid"]
    dict_list = []
    for row in total_list:
        d = {}
        if np.isnan(row[0]) or np.isnan(row[1]):
            d["|diff|"] = 0
        else:
            d["|diff|"] = abs(row[0] - row[1])
        d["dG'0 (%s)" % thermo1.name] = row[0]
        d["dG'0 (%s)" % thermo2.name] = row[1]
        d['reaction'] = row[2].to_hypertext(show_cids=True)
        if row[2].rid is not None:
            d['rid'] = '<a href="%s">R%05d</a>' % (row[2].get_link(),
                                                   row[2].rid)
        else:
            d['rid'] = ''
        dict_list.append(d)
    dict_list.sort(key=lambda d: d['|diff|'], reverse=True)
    html_writer.write_table(dict_list, table_headers, decimal=1)