Beispiel #1
0
def returnoptimised():
    "Returns the curvatures optimised to produce the smallest rms at the output, z=150mm in this case."
    output = 150
    optimised = optimise(func=optimiserms,
                         x0=[0.005, -0.005],
                         args=(output, ),
                         bounds=[(0, 0.5), (-0.5, 0)],
                         maxfun=200,
                         approx_grad=True)
    print optimised
Beispiel #2
0
def iterative_assignment(exp_peaks, calculated_shifts, H_labels,
                         rounded_integrals, settings):

    calculated_shifts = np.array(calculated_shifts)

    H_labels = np.array(H_labels)

    lnum = 0

    new_assigned_shifts = []
    old_assigned_shifts = [1]

    #print("calc shifts",calculated_shifts)

    while old_assigned_shifts != new_assigned_shifts:

        if lnum == 0:

            scaled_shifts = external_scale_proton_shifts(calculated_shifts)

            scaled_mu = 0

            scaled_std = 1

        else:
            old_assigned_shifts = copy.copy(new_assigned_shifts)
            old_assigned_peaks = copy.copy(new_assigned_peaks)

            scaled_shifts, slope, intercept = internal_scale_proton_shifts(
                old_assigned_shifts, old_assigned_peaks, calculated_shifts)

            scaled_std = 1

        ###############assign methyl groups first

        #find methyl groups

        m_protons = methyl_protons(settings.InputFiles[0].split('.sdf')[0] +
                                   ".sdf")

        m_shifts = np.array([])

        # find the average shifts of these groups

        for m_group in m_protons:

            s = 0

            for proton in m_group:

                w = np.where(H_labels == proton)

                s += scaled_shifts[w] / 3

            m_shifts = np.hstack((m_shifts, s))

        #find peaks these can be assigned too

        methyl_peaks = []

        rounded_integrals = np.array(rounded_integrals)

        w = (rounded_integrals - (rounded_integrals % 3)) // 3

        for ind, peak in enumerate(sorted(list(set(exp_peaks)))[::-1]):
            methyl_peaks += [peak] * w[ind]

        #create difference matrix

        diff_matrix = np.zeros((len(m_shifts), len(methyl_peaks)))

        for ind1, i in enumerate(m_shifts):
            for ind2, j in enumerate(methyl_peaks):
                diff_matrix[ind1, ind2] = j - i

        prob_matrix = proton_probabilities(diff_matrix, scaled_mu, scaled_std)

        prob_matrix = prob_matrix**2

        prob_matrix = 1 - prob_matrix

        vertical_ind, horizontal_ind = optimise(prob_matrix)

        #unpack this assignment

        opt_labelsm = []

        opt_shiftsm = []

        opt_peaksm = []

        for j in vertical_ind:

            opt_labelsm.extend(m_protons[j])

        for i in horizontal_ind:

            opt_peaksm += 3 * [methyl_peaks[i]]

        for label in opt_labelsm:

            w = np.where(H_labels == label)

            opt_shiftsm.append(calculated_shifts[w][0])

        #remove shifts/peaks/labels for the list to assign

        calculated_shiftsp = copy.copy(calculated_shifts)

        exp_peaksp = copy.copy(exp_peaks)

        scaled_shiftsp = copy.copy(scaled_shifts)

        H_labelsp = copy.copy(H_labels)

        #peaks

        for p in opt_peaksm:

            w = np.where(exp_peaksp == p)[0][0]

            exp_peaksp = np.delete(exp_peaksp, w)

        #shifts

        for s in opt_shiftsm:

            w = np.where(calculated_shiftsp == s)[0][0]

            calculated_shiftsp = np.delete(calculated_shiftsp, w)
            scaled_shiftsp = np.delete(scaled_shiftsp, w)

        #labels

        for l in opt_labelsm:

            w = np.where(H_labelsp == l)[0][0]

            H_labelsp = np.delete(H_labelsp, w)

        ###############assigned everything else

        diff_matrix = np.zeros((len(calculated_shiftsp), len(exp_peaksp)))

        for ind1, i in enumerate(scaled_shiftsp):
            for ind2, j in enumerate(exp_peaksp):
                diff_matrix[ind1, ind2] = j - i

        prob_matrix = proton_probabilities(diff_matrix, scaled_mu, scaled_std)

        b = abs(diff_matrix) >= 1

        ##############################find any rows that are all zeros

        b = np.where(np.sum(prob_matrix, 1) == 0)

        prob_matrix[b] = -np.inf

        prob_matrix = np.delete(prob_matrix, b, 0)

        unassignable_shifts = calculated_shiftsp[b]

        ccalculated_shiftsp = np.delete(calculated_shiftsp, b)

        ##############################

        prob_matrix = prob_matrix**2

        prob_matrix = 1 - prob_matrix

        vertical_ind, horizontal_ind = optimise(prob_matrix)

        opt_peaksp = exp_peaksp[horizontal_ind]

        opt_shiftsp = ccalculated_shiftsp[vertical_ind]

        opt_labelsp = H_labelsp[vertical_ind]

        opt_shifts, opt_peaks, opt_labels = removecrossassignments(
            opt_peaksp, opt_shiftsp, opt_labelsp)

        ################ combine these assignments

        opt_peaks = np.hstack((opt_peaksm, opt_peaksp))

        opt_shifts = np.hstack((opt_shiftsm, opt_shiftsp))

        opt_labels = np.hstack((opt_labelsm, opt_labelsp))

        #check for any shifts that have not been assigned

        copyshifts = list(copy.copy(calculated_shifts))
        copylabels = list(copy.copy(H_labels))

        for shift, label in zip(opt_shifts, opt_labels):

            copyshifts.remove(shift)
            copylabels.remove(label)

        #assign these to the closest peaks - regardless of integrals

        for shift, label in zip(copyshifts, copylabels):

            mindiff = np.array(exp_peaks - shift).argmin()

            opt_peaks = np.append(opt_peaks, exp_peaks[mindiff])

            opt_labels = np.append(opt_labels, label)

            opt_shifts = np.append(opt_shifts, shift)

        #### sort output wrt original H labels

        indv = []

        for label in opt_labels:

            wh = np.where(H_labels == label)

            indv.append(wh[0][0])

        ind = np.argsort(opt_shifts)[::-1]

        assigned_shifts = opt_shifts[indv]

        assigned_peaks = opt_peaks[indv]

        assigned_labels = opt_labels[indv]

        ind = np.argsort(assigned_shifts)

        assigned_shifts = assigned_shifts[ind].tolist()
        assigned_peaks = assigned_peaks[ind].tolist()
        assigned_labels = assigned_labels[ind].tolist()

        lnum += 1

        new_assigned_shifts = copy.copy(assigned_shifts)
        new_assigned_peaks = copy.copy(assigned_peaks)

    return assigned_shifts, assigned_peaks, assigned_labels, scaled_shifts
def editsolvent_removal2(solvent, y_data, x_data, picked_peaks, peak_regions,
                         grouped_peaks, total_params, uc):
    picked_peaks = np.array(picked_peaks)

    # define the solvents

    if solvent == 'chloroform':

        exp_ppm = [7.26]

        Jv = [[]]

    elif solvent == 'dimethylsulfoxide':

        exp_ppm = [2.50]

        Jv = [[1.9, 1.9]]

    elif solvent == 'methanol':

        exp_ppm = [4.78, 3.31]

        Jv = [[], [1.7, 1.7]]

    elif solvent == 'benzene':

        exp_ppm = [7.16]

        Jv = [[]]

    elif solvent == 'pyridine':

        exp_ppm = [8.74, 7.58, 7.22]

        Jv = [[], [], []]

    else:
        exp_ppm = []
        Jv = [[]]

    # find picked peaks ppm values

    picked_peaks_ppm = x_data[picked_peaks]

    # make differences vector for referencing against multiple solvent peaks

    differences = []

    peaks_to_remove = []

    solvent_regions = []

    # now remove each peak in turn

    for ind1, speak_ppm in enumerate(exp_ppm):

        # if only a singlet is expected for this peak find solvent peak based on amplitude and position

        if len(Jv[ind1]) == 0:

            probs = norm.pdf(abs(picked_peaks_ppm - speak_ppm),
                             loc=0,
                             scale=0.1) * y_data[picked_peaks]

            # find the maximum probability

            w = np.argmax(probs)

            # append this to the list to remove

            peaks_to_remove.append(picked_peaks[w])

            # append this to the list of differences

            differences.append(speak_ppm - picked_peaks_ppm[w])

        # if the peak displays a splitting pattern then we have to be a bit more selective
        # do optimisation problem with projected peaks
        else:

            amp_res = []
            dist_res = []
            pos_res = []

            # limit the search to peaks +- 1 ppm either side

            srange = (picked_peaks_ppm > speak_ppm - 1) * (picked_peaks_ppm <
                                                           speak_ppm + 1)

            for peak in picked_peaks_ppm[srange]:

                # print("picked ppm ", peak)

                fit_s_peaks, amp_vector, fit_s_y = new_first_order_peak(
                    peak, Jv[ind1], np.arange(len(x_data)), 0.1, uc, 1)

                diff_matrix = np.zeros((len(fit_s_peaks), len(picked_peaks)))

                for i, f in enumerate(fit_s_peaks):

                    for j, g in enumerate(picked_peaks):
                        diff_matrix[i, j] = abs(f - g)

                # minimise these distances

                vertical_ind, horizontal_ind = optimise(diff_matrix)

                closest_peaks = np.sort(picked_peaks[horizontal_ind])

                closest_amps = []

                for cpeak in closest_peaks:
                    closest_amps.append(total_params['A' + str(cpeak)])

                # find the amplitude residual between the closest peaks and the predicted pattern

                # normalise these amplitudes

                amp_vector = [i / max(amp_vector) for i in amp_vector]

                closest_amps = [i / max(closest_amps) for i in closest_amps]

                # append to the vector

                amp_res.append(
                    sum([
                        abs(amp_vector[i] - closest_amps[i])
                        for i in range(len(amp_vector))
                    ]))

                dist_res.append(np.sum(np.abs(closest_peaks - fit_s_peaks)))

                pos_res.append(
                    norm.pdf(abs(peak - speak_ppm), loc=0, scale=0.5))

                # use the gsd data to find amplitudes of these peaks

            pos_res = [1 - i / max(pos_res) for i in pos_res]

            dist_res = [i / max(dist_res) for i in dist_res]

            amp_res = [i / max(amp_res) for i in amp_res]

            # calculate geometric mean of metrics for each peak

            g_mean = [(dist_res[i] + amp_res[i] + pos_res[i]) / 3
                      for i in range(0, len(amp_res))]

            # compare the residuals and find the minimum

            minres = np.argmin(g_mean)

            # append the closest peaks to the vector

            fit_s_peaks, amp_vector, fit_s_y = new_first_order_peak(
                picked_peaks_ppm[srange][minres], Jv[ind1],
                np.arange(len(x_data)), 0.1, uc, 1)

            diff_matrix = np.zeros((len(fit_s_peaks), len(picked_peaks)))

            for i, f in enumerate(fit_s_peaks):

                for j, g in enumerate(picked_peaks):
                    diff_matrix[i, j] = abs(f - g)

            # minimise these distances

            vertical_ind, horizontal_ind = optimise(diff_matrix)

            closest_peaks = np.sort(picked_peaks[horizontal_ind])

            for peak in closest_peaks:
                ind3 = np.abs(picked_peaks - peak).argmin()

                peaks_to_remove.append(picked_peaks[ind3])

                differences.append(picked_peaks_ppm[ind3] - uc.ppm(peak))

    # find the region this peak is in and append it to the list

    for peak in peaks_to_remove:

        for ind2, region in enumerate(peak_regions):

            if (peak > region[0]) & (peak < region[-1]):
                solvent_regions.append(ind2)
                break

    # now remove the selected peaks from the picked peaks list and grouped peaks

    w = np.searchsorted(picked_peaks, peaks_to_remove)

    picked_peaks = np.delete(picked_peaks, w)

    for ind4, peak in enumerate(peaks_to_remove):
        grouped_peaks[solvent_regions[ind4]] = np.delete(
            grouped_peaks[solvent_regions[ind4]],
            np.where(grouped_peaks[solvent_regions[ind4]] == peak))

    # resimulate the solvent regions

    solvent_region_ind = sorted(list(set(solvent_regions)))

    # now need to reference the spectrum

    # differences = list of differences in ppm found_solvent_peaks - expected_solvent_peaks

    s_differences = sum(differences)

    x_data = x_data + s_differences

    return peak_regions, picked_peaks, grouped_peaks, x_data, solvent_region_ind
Beispiel #4
0
def iterative_assignment(picked_peaks, spectral_xdata_ppm,
                         total_spectral_ydata, calculated_shifts, C_labels):
    calculated_shifts = np.array(calculated_shifts)

    original_C_labels = np.array(C_labels)

    s = np.argsort(np.array(calculated_shifts))

    calculated_shifts = calculated_shifts[s]

    scaled_shifts = copy.copy(calculated_shifts)

    C_labels = original_C_labels[s]

    exp_peaks = spectral_xdata_ppm[picked_peaks]

    new_assigned_peaks = []

    new_assigned_shifts = []

    for lnum in range(0, 2):

        if lnum == 0:

            scaled_shifts = external_scale_carbon_shifts(calculated_shifts)

            scaled_mu = 0

            scaled_std = 2.486068603518297

            copy_calc_shifts = copy.copy(calculated_shifts)

        elif lnum == 1:

            old_assigned_shifts = copy.copy(new_assigned_shifts)

            old_assigned_peaks = copy.copy(new_assigned_peaks)

            scaled_shifts, slope, intercept = internal_scale_carbon_shifts(
                old_assigned_shifts, old_assigned_peaks, calculated_shifts)
            scaled_mu = 0

            scaled_std = 10

            copy_calc_shifts = copy.copy(calculated_shifts)

        ####calculate difference matrix

        diff_matrix = np.zeros((len(calculated_shifts), len(exp_peaks)))

        for ind1, i in enumerate(scaled_shifts):
            for ind2, j in enumerate(exp_peaks):
                diff_matrix[ind1, ind2] = j - i

        ####find any errors larger than 10 ppm and nans
        ####calculate pos matirx

        pos_matrix = carbon_probabilities(diff_matrix, scaled_mu, scaled_std)

        pos_matrix[abs(diff_matrix) >= 10] = 0

        pos_matrix[np.isnan(pos_matrix)] = 0

        ####calculate amp matrix

        amp_matrix = amp_kde(total_spectral_ydata, picked_peaks, pos_matrix,
                             calculated_shifts)

        ####duplicate the pos matrix along the horizontal to allow multiple assignment weighting

        pos_matrixc = copy.copy(pos_matrix)

        for d in range(0, len(calculated_shifts) - 1):
            pos_matrix = np.hstack((pos_matrix, pos_matrixc))

        ####calculate the probability matrix

        prob_matrix = (pos_matrix * amp_matrix)**0.5

        ####check for any shifts that have zero probabilites for all peaks

        b = np.where(np.sum(prob_matrix, 1) == 0)

        prob_matrix = np.delete(prob_matrix, b, 0)

        unassignable_shifts = calculated_shifts[b]

        copy_calc_shifts = np.delete(copy_calc_shifts, b)

        copy_labels = np.delete(C_labels, b)

        ####do the assignment

        vertical_ind, horizontal_ind = optimise(1 - prob_matrix)

        horizontal_ind = horizontal_ind % len(picked_peaks)

        opt_peaks = exp_peaks[horizontal_ind]

        opt_shifts = copy_calc_shifts[vertical_ind]

        opt_labels = copy_labels[vertical_ind]

        ####do some sorting

        so = np.argsort(opt_shifts)

        new_assigned_peaks = opt_peaks[so]

        new_assigned_shifts = opt_shifts[so]

        new_assigned_labels = opt_labels[so]

    ############################
    # in the third round only reassign shifts that have had a change of bias

    old_assigned_shifts = copy.copy(new_assigned_shifts)

    old_assigned_peaks = copy.copy(new_assigned_peaks)

    new_assigned_shifts = copy.copy(new_assigned_shifts)

    new_assigned_peaks = copy.copy(new_assigned_peaks)

    new_assigned_labels = copy.copy(new_assigned_labels)

    bias_weights = []

    # find unassigned peaks

    ampdivide = np.zeros(len(picked_peaks))

    peak_amps = total_spectral_ydata[picked_peaks]

    reassign_shifts_ind = []

    for i in old_assigned_peaks:
        w = np.where(exp_peaks == i)

        ampdivide[w] += 1

    c = 0

    for shift, peak in zip(old_assigned_shifts, old_assigned_peaks):

        # find where peaks are within 20 ppm window

        w = np.where((exp_peaks < peak + 10) & (exp_peaks > peak - 10))[0]

        if len(w) > 0:

            # find maximum peak height within this window - when taking into account how many times the peak has already been assigned

            # find amplitude of peak given how many times it has been assigned

            assigned_amp = (peak_amps[exp_peaks == peak] /
                            ampdivide[exp_peaks == peak])[0]

            # find amplitude of max peak in the 20 ppm window given how many times it would be assigned if the current shift was assigned to it as well

            div_amps = peak_amps / (ampdivide + 1)

            pi = np.where(exp_peaks == peak)

            div_amps[pi] = peak_amps[pi] / ampdivide[pi]

            max_window_amp = np.max(div_amps[w])

            ratio = max_window_amp / assigned_amp

            if ratio > 1:
                bias_weights.append(ratio)

                reassign_shifts_ind.append(c)

        c += 1

    ####reassign the shifts with a bias above zero in order of bias to peak within ten ppm with largest unassigned amplitude

    bias_weights = np.array(bias_weights)

    reassign_shifts = np.array(old_assigned_shifts)[reassign_shifts_ind]

    s = np.argsort(bias_weights)

    reassign_shifts = reassign_shifts[s]

    reassign_shifts_ind = np.array(reassign_shifts_ind)[s]

    for shift, ind in zip(reassign_shifts, reassign_shifts_ind):

        # find peak this shift is assigned to

        p = new_assigned_peaks[ind]

        pi = np.where(exp_peaks == p)

        new_peak_amps = peak_amps / (ampdivide + 1)

        new_peak_amps[pi] = peak_amps[pi] / (ampdivide[pi])

        # find peaks within 10 ppm

        w = np.where((exp_peaks < p + 10) & (exp_peaks > p - 10))[0]

        if len(w) > 0:
            assigned_peak = exp_peaks[w[np.argmax(new_peak_amps[w])]]

            new_assigned_peaks[ind] = assigned_peak

        # recalculate estimated peak heights

        ampdivide = np.zeros(len(picked_peaks))

        for i in new_assigned_peaks:
            w = np.where(exp_peaks == i)

            ampdivide[w] += 1

    #############################

    # remove cross assignments

    new_assigned_shifts, new_assigned_peaks, new_assigned_labels = removecrossassignments(
        new_assigned_peaks, new_assigned_shifts, new_assigned_labels)

    #### sortoutput wrt original H labels

    assigned_labels = []

    assigned_shifts = []

    assigned_peaks = []

    for label in original_C_labels:

        wh = np.where(new_assigned_labels == label)[0]

        assigned_labels.append(label)

        if len(wh) > 0:

            assigned_shifts.append(new_assigned_shifts[wh[0]])

            assigned_peaks.append(new_assigned_peaks[wh[0]])

        else:
            assigned_shifts.append('')

            assigned_peaks.append('')

    return assigned_shifts, assigned_peaks, assigned_labels, scaled_shifts
Beispiel #5
0
def returnoptimised():
    "Returns the curvatures optimised to produce the smallest rms at the output, z=150mm in this case."
    output = 150
    optimised = optimise(func=optimiserms, x0=[0.005,-0.005], args=(output,), bounds=[(0,0.5),(-0.5,0)], maxfun=200, approx_grad=True)
    print optimised