def returnoptimised(): "Returns the curvatures optimised to produce the smallest rms at the output, z=150mm in this case." output = 150 optimised = optimise(func=optimiserms, x0=[0.005, -0.005], args=(output, ), bounds=[(0, 0.5), (-0.5, 0)], maxfun=200, approx_grad=True) print optimised
def iterative_assignment(exp_peaks, calculated_shifts, H_labels, rounded_integrals, settings): calculated_shifts = np.array(calculated_shifts) H_labels = np.array(H_labels) lnum = 0 new_assigned_shifts = [] old_assigned_shifts = [1] #print("calc shifts",calculated_shifts) while old_assigned_shifts != new_assigned_shifts: if lnum == 0: scaled_shifts = external_scale_proton_shifts(calculated_shifts) scaled_mu = 0 scaled_std = 1 else: old_assigned_shifts = copy.copy(new_assigned_shifts) old_assigned_peaks = copy.copy(new_assigned_peaks) scaled_shifts, slope, intercept = internal_scale_proton_shifts( old_assigned_shifts, old_assigned_peaks, calculated_shifts) scaled_std = 1 ###############assign methyl groups first #find methyl groups m_protons = methyl_protons(settings.InputFiles[0].split('.sdf')[0] + ".sdf") m_shifts = np.array([]) # find the average shifts of these groups for m_group in m_protons: s = 0 for proton in m_group: w = np.where(H_labels == proton) s += scaled_shifts[w] / 3 m_shifts = np.hstack((m_shifts, s)) #find peaks these can be assigned too methyl_peaks = [] rounded_integrals = np.array(rounded_integrals) w = (rounded_integrals - (rounded_integrals % 3)) // 3 for ind, peak in enumerate(sorted(list(set(exp_peaks)))[::-1]): methyl_peaks += [peak] * w[ind] #create difference matrix diff_matrix = np.zeros((len(m_shifts), len(methyl_peaks))) for ind1, i in enumerate(m_shifts): for ind2, j in enumerate(methyl_peaks): diff_matrix[ind1, ind2] = j - i prob_matrix = proton_probabilities(diff_matrix, scaled_mu, scaled_std) prob_matrix = prob_matrix**2 prob_matrix = 1 - prob_matrix vertical_ind, horizontal_ind = optimise(prob_matrix) #unpack this assignment opt_labelsm = [] opt_shiftsm = [] opt_peaksm = [] for j in vertical_ind: opt_labelsm.extend(m_protons[j]) for i in horizontal_ind: opt_peaksm += 3 * [methyl_peaks[i]] for label in opt_labelsm: w = np.where(H_labels == label) opt_shiftsm.append(calculated_shifts[w][0]) #remove shifts/peaks/labels for the list to assign calculated_shiftsp = copy.copy(calculated_shifts) exp_peaksp = copy.copy(exp_peaks) scaled_shiftsp = copy.copy(scaled_shifts) H_labelsp = copy.copy(H_labels) #peaks for p in opt_peaksm: w = np.where(exp_peaksp == p)[0][0] exp_peaksp = np.delete(exp_peaksp, w) #shifts for s in opt_shiftsm: w = np.where(calculated_shiftsp == s)[0][0] calculated_shiftsp = np.delete(calculated_shiftsp, w) scaled_shiftsp = np.delete(scaled_shiftsp, w) #labels for l in opt_labelsm: w = np.where(H_labelsp == l)[0][0] H_labelsp = np.delete(H_labelsp, w) ###############assigned everything else diff_matrix = np.zeros((len(calculated_shiftsp), len(exp_peaksp))) for ind1, i in enumerate(scaled_shiftsp): for ind2, j in enumerate(exp_peaksp): diff_matrix[ind1, ind2] = j - i prob_matrix = proton_probabilities(diff_matrix, scaled_mu, scaled_std) b = abs(diff_matrix) >= 1 ##############################find any rows that are all zeros b = np.where(np.sum(prob_matrix, 1) == 0) prob_matrix[b] = -np.inf prob_matrix = np.delete(prob_matrix, b, 0) unassignable_shifts = calculated_shiftsp[b] ccalculated_shiftsp = np.delete(calculated_shiftsp, b) ############################## prob_matrix = prob_matrix**2 prob_matrix = 1 - prob_matrix vertical_ind, horizontal_ind = optimise(prob_matrix) opt_peaksp = exp_peaksp[horizontal_ind] opt_shiftsp = ccalculated_shiftsp[vertical_ind] opt_labelsp = H_labelsp[vertical_ind] opt_shifts, opt_peaks, opt_labels = removecrossassignments( opt_peaksp, opt_shiftsp, opt_labelsp) ################ combine these assignments opt_peaks = np.hstack((opt_peaksm, opt_peaksp)) opt_shifts = np.hstack((opt_shiftsm, opt_shiftsp)) opt_labels = np.hstack((opt_labelsm, opt_labelsp)) #check for any shifts that have not been assigned copyshifts = list(copy.copy(calculated_shifts)) copylabels = list(copy.copy(H_labels)) for shift, label in zip(opt_shifts, opt_labels): copyshifts.remove(shift) copylabels.remove(label) #assign these to the closest peaks - regardless of integrals for shift, label in zip(copyshifts, copylabels): mindiff = np.array(exp_peaks - shift).argmin() opt_peaks = np.append(opt_peaks, exp_peaks[mindiff]) opt_labels = np.append(opt_labels, label) opt_shifts = np.append(opt_shifts, shift) #### sort output wrt original H labels indv = [] for label in opt_labels: wh = np.where(H_labels == label) indv.append(wh[0][0]) ind = np.argsort(opt_shifts)[::-1] assigned_shifts = opt_shifts[indv] assigned_peaks = opt_peaks[indv] assigned_labels = opt_labels[indv] ind = np.argsort(assigned_shifts) assigned_shifts = assigned_shifts[ind].tolist() assigned_peaks = assigned_peaks[ind].tolist() assigned_labels = assigned_labels[ind].tolist() lnum += 1 new_assigned_shifts = copy.copy(assigned_shifts) new_assigned_peaks = copy.copy(assigned_peaks) return assigned_shifts, assigned_peaks, assigned_labels, scaled_shifts
def editsolvent_removal2(solvent, y_data, x_data, picked_peaks, peak_regions, grouped_peaks, total_params, uc): picked_peaks = np.array(picked_peaks) # define the solvents if solvent == 'chloroform': exp_ppm = [7.26] Jv = [[]] elif solvent == 'dimethylsulfoxide': exp_ppm = [2.50] Jv = [[1.9, 1.9]] elif solvent == 'methanol': exp_ppm = [4.78, 3.31] Jv = [[], [1.7, 1.7]] elif solvent == 'benzene': exp_ppm = [7.16] Jv = [[]] elif solvent == 'pyridine': exp_ppm = [8.74, 7.58, 7.22] Jv = [[], [], []] else: exp_ppm = [] Jv = [[]] # find picked peaks ppm values picked_peaks_ppm = x_data[picked_peaks] # make differences vector for referencing against multiple solvent peaks differences = [] peaks_to_remove = [] solvent_regions = [] # now remove each peak in turn for ind1, speak_ppm in enumerate(exp_ppm): # if only a singlet is expected for this peak find solvent peak based on amplitude and position if len(Jv[ind1]) == 0: probs = norm.pdf(abs(picked_peaks_ppm - speak_ppm), loc=0, scale=0.1) * y_data[picked_peaks] # find the maximum probability w = np.argmax(probs) # append this to the list to remove peaks_to_remove.append(picked_peaks[w]) # append this to the list of differences differences.append(speak_ppm - picked_peaks_ppm[w]) # if the peak displays a splitting pattern then we have to be a bit more selective # do optimisation problem with projected peaks else: amp_res = [] dist_res = [] pos_res = [] # limit the search to peaks +- 1 ppm either side srange = (picked_peaks_ppm > speak_ppm - 1) * (picked_peaks_ppm < speak_ppm + 1) for peak in picked_peaks_ppm[srange]: # print("picked ppm ", peak) fit_s_peaks, amp_vector, fit_s_y = new_first_order_peak( peak, Jv[ind1], np.arange(len(x_data)), 0.1, uc, 1) diff_matrix = np.zeros((len(fit_s_peaks), len(picked_peaks))) for i, f in enumerate(fit_s_peaks): for j, g in enumerate(picked_peaks): diff_matrix[i, j] = abs(f - g) # minimise these distances vertical_ind, horizontal_ind = optimise(diff_matrix) closest_peaks = np.sort(picked_peaks[horizontal_ind]) closest_amps = [] for cpeak in closest_peaks: closest_amps.append(total_params['A' + str(cpeak)]) # find the amplitude residual between the closest peaks and the predicted pattern # normalise these amplitudes amp_vector = [i / max(amp_vector) for i in amp_vector] closest_amps = [i / max(closest_amps) for i in closest_amps] # append to the vector amp_res.append( sum([ abs(amp_vector[i] - closest_amps[i]) for i in range(len(amp_vector)) ])) dist_res.append(np.sum(np.abs(closest_peaks - fit_s_peaks))) pos_res.append( norm.pdf(abs(peak - speak_ppm), loc=0, scale=0.5)) # use the gsd data to find amplitudes of these peaks pos_res = [1 - i / max(pos_res) for i in pos_res] dist_res = [i / max(dist_res) for i in dist_res] amp_res = [i / max(amp_res) for i in amp_res] # calculate geometric mean of metrics for each peak g_mean = [(dist_res[i] + amp_res[i] + pos_res[i]) / 3 for i in range(0, len(amp_res))] # compare the residuals and find the minimum minres = np.argmin(g_mean) # append the closest peaks to the vector fit_s_peaks, amp_vector, fit_s_y = new_first_order_peak( picked_peaks_ppm[srange][minres], Jv[ind1], np.arange(len(x_data)), 0.1, uc, 1) diff_matrix = np.zeros((len(fit_s_peaks), len(picked_peaks))) for i, f in enumerate(fit_s_peaks): for j, g in enumerate(picked_peaks): diff_matrix[i, j] = abs(f - g) # minimise these distances vertical_ind, horizontal_ind = optimise(diff_matrix) closest_peaks = np.sort(picked_peaks[horizontal_ind]) for peak in closest_peaks: ind3 = np.abs(picked_peaks - peak).argmin() peaks_to_remove.append(picked_peaks[ind3]) differences.append(picked_peaks_ppm[ind3] - uc.ppm(peak)) # find the region this peak is in and append it to the list for peak in peaks_to_remove: for ind2, region in enumerate(peak_regions): if (peak > region[0]) & (peak < region[-1]): solvent_regions.append(ind2) break # now remove the selected peaks from the picked peaks list and grouped peaks w = np.searchsorted(picked_peaks, peaks_to_remove) picked_peaks = np.delete(picked_peaks, w) for ind4, peak in enumerate(peaks_to_remove): grouped_peaks[solvent_regions[ind4]] = np.delete( grouped_peaks[solvent_regions[ind4]], np.where(grouped_peaks[solvent_regions[ind4]] == peak)) # resimulate the solvent regions solvent_region_ind = sorted(list(set(solvent_regions))) # now need to reference the spectrum # differences = list of differences in ppm found_solvent_peaks - expected_solvent_peaks s_differences = sum(differences) x_data = x_data + s_differences return peak_regions, picked_peaks, grouped_peaks, x_data, solvent_region_ind
def iterative_assignment(picked_peaks, spectral_xdata_ppm, total_spectral_ydata, calculated_shifts, C_labels): calculated_shifts = np.array(calculated_shifts) original_C_labels = np.array(C_labels) s = np.argsort(np.array(calculated_shifts)) calculated_shifts = calculated_shifts[s] scaled_shifts = copy.copy(calculated_shifts) C_labels = original_C_labels[s] exp_peaks = spectral_xdata_ppm[picked_peaks] new_assigned_peaks = [] new_assigned_shifts = [] for lnum in range(0, 2): if lnum == 0: scaled_shifts = external_scale_carbon_shifts(calculated_shifts) scaled_mu = 0 scaled_std = 2.486068603518297 copy_calc_shifts = copy.copy(calculated_shifts) elif lnum == 1: old_assigned_shifts = copy.copy(new_assigned_shifts) old_assigned_peaks = copy.copy(new_assigned_peaks) scaled_shifts, slope, intercept = internal_scale_carbon_shifts( old_assigned_shifts, old_assigned_peaks, calculated_shifts) scaled_mu = 0 scaled_std = 10 copy_calc_shifts = copy.copy(calculated_shifts) ####calculate difference matrix diff_matrix = np.zeros((len(calculated_shifts), len(exp_peaks))) for ind1, i in enumerate(scaled_shifts): for ind2, j in enumerate(exp_peaks): diff_matrix[ind1, ind2] = j - i ####find any errors larger than 10 ppm and nans ####calculate pos matirx pos_matrix = carbon_probabilities(diff_matrix, scaled_mu, scaled_std) pos_matrix[abs(diff_matrix) >= 10] = 0 pos_matrix[np.isnan(pos_matrix)] = 0 ####calculate amp matrix amp_matrix = amp_kde(total_spectral_ydata, picked_peaks, pos_matrix, calculated_shifts) ####duplicate the pos matrix along the horizontal to allow multiple assignment weighting pos_matrixc = copy.copy(pos_matrix) for d in range(0, len(calculated_shifts) - 1): pos_matrix = np.hstack((pos_matrix, pos_matrixc)) ####calculate the probability matrix prob_matrix = (pos_matrix * amp_matrix)**0.5 ####check for any shifts that have zero probabilites for all peaks b = np.where(np.sum(prob_matrix, 1) == 0) prob_matrix = np.delete(prob_matrix, b, 0) unassignable_shifts = calculated_shifts[b] copy_calc_shifts = np.delete(copy_calc_shifts, b) copy_labels = np.delete(C_labels, b) ####do the assignment vertical_ind, horizontal_ind = optimise(1 - prob_matrix) horizontal_ind = horizontal_ind % len(picked_peaks) opt_peaks = exp_peaks[horizontal_ind] opt_shifts = copy_calc_shifts[vertical_ind] opt_labels = copy_labels[vertical_ind] ####do some sorting so = np.argsort(opt_shifts) new_assigned_peaks = opt_peaks[so] new_assigned_shifts = opt_shifts[so] new_assigned_labels = opt_labels[so] ############################ # in the third round only reassign shifts that have had a change of bias old_assigned_shifts = copy.copy(new_assigned_shifts) old_assigned_peaks = copy.copy(new_assigned_peaks) new_assigned_shifts = copy.copy(new_assigned_shifts) new_assigned_peaks = copy.copy(new_assigned_peaks) new_assigned_labels = copy.copy(new_assigned_labels) bias_weights = [] # find unassigned peaks ampdivide = np.zeros(len(picked_peaks)) peak_amps = total_spectral_ydata[picked_peaks] reassign_shifts_ind = [] for i in old_assigned_peaks: w = np.where(exp_peaks == i) ampdivide[w] += 1 c = 0 for shift, peak in zip(old_assigned_shifts, old_assigned_peaks): # find where peaks are within 20 ppm window w = np.where((exp_peaks < peak + 10) & (exp_peaks > peak - 10))[0] if len(w) > 0: # find maximum peak height within this window - when taking into account how many times the peak has already been assigned # find amplitude of peak given how many times it has been assigned assigned_amp = (peak_amps[exp_peaks == peak] / ampdivide[exp_peaks == peak])[0] # find amplitude of max peak in the 20 ppm window given how many times it would be assigned if the current shift was assigned to it as well div_amps = peak_amps / (ampdivide + 1) pi = np.where(exp_peaks == peak) div_amps[pi] = peak_amps[pi] / ampdivide[pi] max_window_amp = np.max(div_amps[w]) ratio = max_window_amp / assigned_amp if ratio > 1: bias_weights.append(ratio) reassign_shifts_ind.append(c) c += 1 ####reassign the shifts with a bias above zero in order of bias to peak within ten ppm with largest unassigned amplitude bias_weights = np.array(bias_weights) reassign_shifts = np.array(old_assigned_shifts)[reassign_shifts_ind] s = np.argsort(bias_weights) reassign_shifts = reassign_shifts[s] reassign_shifts_ind = np.array(reassign_shifts_ind)[s] for shift, ind in zip(reassign_shifts, reassign_shifts_ind): # find peak this shift is assigned to p = new_assigned_peaks[ind] pi = np.where(exp_peaks == p) new_peak_amps = peak_amps / (ampdivide + 1) new_peak_amps[pi] = peak_amps[pi] / (ampdivide[pi]) # find peaks within 10 ppm w = np.where((exp_peaks < p + 10) & (exp_peaks > p - 10))[0] if len(w) > 0: assigned_peak = exp_peaks[w[np.argmax(new_peak_amps[w])]] new_assigned_peaks[ind] = assigned_peak # recalculate estimated peak heights ampdivide = np.zeros(len(picked_peaks)) for i in new_assigned_peaks: w = np.where(exp_peaks == i) ampdivide[w] += 1 ############################# # remove cross assignments new_assigned_shifts, new_assigned_peaks, new_assigned_labels = removecrossassignments( new_assigned_peaks, new_assigned_shifts, new_assigned_labels) #### sortoutput wrt original H labels assigned_labels = [] assigned_shifts = [] assigned_peaks = [] for label in original_C_labels: wh = np.where(new_assigned_labels == label)[0] assigned_labels.append(label) if len(wh) > 0: assigned_shifts.append(new_assigned_shifts[wh[0]]) assigned_peaks.append(new_assigned_peaks[wh[0]]) else: assigned_shifts.append('') assigned_peaks.append('') return assigned_shifts, assigned_peaks, assigned_labels, scaled_shifts
def returnoptimised(): "Returns the curvatures optimised to produce the smallest rms at the output, z=150mm in this case." output = 150 optimised = optimise(func=optimiserms, x0=[0.005,-0.005], args=(output,), bounds=[(0,0.5),(-0.5,0)], maxfun=200, approx_grad=True) print optimised