def test_most_probable_isotopic_composition(): assert (Formula.from_string('F').most_probable_isotopic_composition() == ( Formula({ "F[19]": 1, "F[18]": 0 }), 1.0)) Br2 = Formula.from_string("Br2") assert Br2.most_probable_isotopic_composition()[0] == Formula({ "Br[79]": 1, "Br[81]": 1 }) assert rounders(Br2.most_probable_isotopic_composition()[1], "0.000") == decimal.Decimal("0.5") C6Br6 = Formula.from_string("C6Br6") assert C6Br6.most_probable_isotopic_composition()[0] == Formula({ "C[12]": 6, "C[13]": 0, "Br[79]": 3, "Br[81]": 3 }) assert rounders(C6Br6.most_probable_isotopic_composition()[1], "0.000") == decimal.Decimal("0.293") assert ( Formula.from_string("F10").most_probable_isotopic_composition() == ( Formula({ "F[19]": 10, }), 1.0)) assert Formula.from_string("CF4").most_probable_isotopic_composition( elements_with_isotopes=['F'], ) == (Formula({'C': 1, "F[19]": 4}), 1.0) # yapf: disable
def test_mass_from_composition(): mass1 = mass_from_composition({11: 1, 9: 1}) assert rounders(mass1, "0.000000") == decimal.Decimal("41.988172") mass2 = mass_from_composition({"Na": 1, 'F': 1}) assert mass1 == mass2 assert rounders(mass2, "0.000000") == decimal.Decimal("41.988172")
def generate_spectrum_image(self, sample, rt_data, ms_data, path): """ :param sample: :type sample: :param rt_data: :type rt_data: :param ms_data: :type ms_data: :param path: :type path: :return: :rtype: """ # return sample from GSMatch.GSMatch_Core.charts import PlotSpectrum for row_idx in range(len(rt_data)): rt = rt_data.iloc[row_idx].loc[sample] ms = ms_data.iloc[row_idx].loc[sample] # TODO: Use mass range given in settings PlotSpectrum(numpy.column_stack((ms.mass_list, ms.mass_spec)), label="{} {}".format(sample, rounders(rt, "0.000")), xlim=(45, 500), mode=path) return
def test_calculate_mass(): # Calculate mass by a formula. mass = rounders( Formula.from_string("(C6H5)2NH").monoisotopic_mass, "0.000000") assert mass == decimal.Decimal("169.089149") # Calculate average mass / molecular weight by a formula. assert rounders(Formula.from_string("(C6H5)2NH").average_mass, "0.00") == decimal.Decimal("169.22") assert Formula.from_string( "(C6H5)2NH").average_mass == Formula.from_string( "(C6H5)2NH").average_mass # mz assert Formula.from_string("C12H13N+").get_mz() == Formula.from_string( "C12H13N", charge=1).average_mass
def test_mass_from_composition__formula(): mass = mass_from_composition(string_to_composition("NaF")) # type: ignore assert rounders(mass, "0.000000") == decimal.Decimal("41.988172") Fminus = mass_from_composition( string_to_composition("F/-")) # type: ignore assert abs(Fminus - 18.998403163 - 5.489e-4) < 1e-7
def SetValue(self, value): self.value = value if self.value: self.metric_value.SetValue(str(rounders(self.value, "0.0000"))) else: self.metric_value.ChangeValue('') self.inches_value.ChangeValue('')
def make_alignment_table(self): alignment_table_style = TableStyle([ # (x, y) ('FONTSIZE', (0, 0), (-1, -1), font_size), ("SPAN", (1, 0), (-1, 0)), ('ALIGN', (1, 0), (-1, 0), 'CENTER'), ('ALIGN', (1, 0), (-1, -1), 'RIGHT'), ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'), ("LINEBELOW", (0, 0), (-1, 1), 1, colors.black), ("LINEBELOW", (0, 2), (-1, -1), 0.5, colors.lightgrey), ]) top_row = ['', Paragraph("Retention Time (minutes)", styles["Center"])] header_row = [Paragraph("Peak No.", styles["Normal"])] rows = [top_row, header_row] for experiment in self.alignment_panel.project.experiment_name_list: header_row.append(Paragraph(experiment, styles["Right"])) for peak in self.alignment_panel.project.rt_alignment.itertuples(): row_data = [] for experiment in peak: rt = rounders(experiment, "0.00000") if rt.is_nan(): rt = "-" row_data.append(rt) row_data[0] = peak.Index if self.alignment_panel.n_experiments - Counter(row_data)[ "-"] >= self.alignment_panel.filter_min_experiments: for experiment in row_data[1:]: if experiment != "-": if self.alignment_panel.filter_min_rt <= experiment <= self.alignment_panel.filter_max_rt: rows.append([ Paragraph(str(x), styles["Right"]) for x in row_data ]) break # As long as one of the peaks is in range, add the peak peak_no_prop = 1.5 expr_prop = 2 base_total = peak_no_prop + expr_prop * len( self.alignment_panel.project.experiment_name_list) peak_no_width = self.inner_width * (peak_no_prop / base_total) expr_width = self.inner_width * (expr_prop / base_total) col_widths = [peak_no_width] + [expr_width] * len( self.alignment_panel.project.experiment_name_list) return Table(rows, colWidths=col_widths, rowHeights=[None] * len(rows), style=alignment_table_style, hAlign="LEFT", repeatRows=2)
def __init__( self, parent, label, value='', id=wx.ID_ANY, pos=wx.DefaultPosition, size=wx.DefaultSize, style=wx.TAB_TRAVERSAL, name="CalibreMeasurementPanel" ): """ :param parent: The parent window. :type parent: wx.Window :param label: :type label: :param id: An identifier for the panel. wx.ID_ANY is taken to mean a default. :type id: wx.WindowID, optional :param pos: The panel position. The value wx.DefaultPosition indicates a default position, chosen by either the windowing system or wxWidgets, depending on platform. :type pos: wx.Point, optional :param size: The panel size. The value wx.DefaultSize indicates a default size, chosen by either the windowing system or wxWidgets, depending on platform. :type size: wx.Size, optional :param style: The window style. See wx.Panel. :type style: int, optional :param name: Window name. :type name: str, optional """ args = (parent, id, pos, size) kwds = { "style": style, "name": name, } self.label = label if value: self.value = Decimal(value) else: self.value = '' # begin wxGlade: CalibreMeasurementPanel.__init__ kwds["style"] = kwds.get("style", 0) | wx.TAB_TRAVERSAL wx.Panel.__init__(self, *args, **kwds) self.metric_value = wx.TextCtrl(self, wx.ID_ANY, "") self.inches_value = wx.TextCtrl(self, wx.ID_ANY, "") self.__set_properties() self.__do_layout() self.Bind(wx.EVT_TEXT, self.on_metric_change, self.metric_value) self.Bind(wx.EVT_TEXT_ENTER, self.on_metric_change, self.metric_value) self.Bind(wx.EVT_TEXT, self.on_inches_change, self.inches_value) self.Bind(wx.EVT_TEXT_ENTER, self.on_inches_change, self.inches_value) # end wxGlade self.set_label(self.label) if self.value: self.metric_value.SetValue(str(rounders(self.value, "0.0000")))
def mm(inch): """ Convert inches to mm :param inch: The size in inch :type inch: str, int, float or decimal.Decimal :return: The size in mm :rtype: str """ return str(rounders(Decimal(inch) * _conversion_factor, "0.0000"))
def round_rt(rt): """ Limit to 10 decimal places as that's what Pandas writes JSON data as; no need for greater precision :param rt: :type rt: :return: :rtype: """ return rounders(rt, "0.0000000000")
def inch(mm): """ Convert mm to inches :param mm: The size in mm :type mm: str, int, float or decimal.Decimal :return: The size in inches :rtype: str """ return str(rounders(Decimal(mm) / _conversion_factor, "0.000"))
def on_inches_change(self, event): # wxGlade: CalibreMeasurementPanel.<event_handler> value = self.inches_value.GetValue() if value == ".": self.inches_value.ChangeValue("0.") wx.CallAfter(self.inches_value.SetInsertionPointEnd) pass elif value: self.value = mm(str(rounders(value, "0.000"))) else: self.value = '' self.metric_value.ChangeValue(self.value) event.Skip()
def create_msp(sample_name, mass_list, mass_spec): """Generate .MSP files for NIST MS Search""" #if sys.version_info[0] == 2: if not os.path.exists("MSP"): os.makedirs("MSP") msp_file = open(os.path.join("MSP",sample_name + ".MSP"),"w") msp_file.write("Name: {}\n".format(sample_name)) msp_file.write("Num Peaks: {}\n".format(len(mass_list))) for mass, intensity in zip(mass_list, mass_spec): msp_file.write("{} {},\n".format(rounders(mass,"0.0"),intensity)) msp_file.close()
def _populate_ctrl(self): itemDataMap = {} for peak_idx, peak in enumerate(self.peak_list): peak_data = (peak.UID, rounders(peak.rt / 60, "0.000"), f'{rounders(peak.area, "0.000"):,}') self.expr_list.Append(peak_data) itemDataMap[peak_idx] = peak_data self.expr_list.SetItemData(peak_idx, peak_idx) self.itemDataMap = itemDataMap
def create_msp(self, sample_name, mass_list, mass_spec): """ Generate .MSP files for NIST MS Search :param sample_name: The name of the sample :type sample_name: str :param mass_list: :type mass_list: :param mass_spec: :type mass_spec: """ msp_file = open(os.path.join(self.config.msp_dir, sample_name + ".MSP"), "w") msp_file.write("Name: {}\n".format(sample_name)) msp_file.write("Num Peaks: {}\n".format(len(mass_list))) for mass, intensity in zip(mass_list, mass_spec): msp_file.write("{} {},\n".format(rounders(mass, "0.0"), intensity)) msp_file.close()
def _filter_peak_list(self): # Filter peaks filtered_peak_list = [] for peak in self.peak_list: if not rounders(self.filter_min_rt, "0.00") <= rounders(peak.rt, "0.00") <= rounders(self.filter_max_rt, "0.00"): print("RT") print(self.filter_min_rt, peak.rt, rounders(peak.rt, "0.00"), self.filter_max_rt) continue if not rounders(self.filter_min_area, "0.00") <= rounders(peak.area, "0.00") <= rounders( self.filter_max_area, "0.00"): print("Area") print(self.filter_min_area, peak.area, rounders(peak.area, "0.00"), self.filter_max_area) continue filtered_peak_list.append(peak) return filtered_peak_list
def make_peak_list_tab(self): self.experiment_peaks = SorterPanels.SorterPanel( self.notebook, wx.ID_ANY) self.experiment_peaks.AppendColumn("UID", format=wx.LIST_FORMAT_LEFT, width=150) self.experiment_peaks.AppendColumn("RT", format=wx.LIST_FORMAT_RIGHT, width=80) self.experiment_peaks.AppendColumn("Area", format=wx.LIST_FORMAT_RIGHT, width=130) for peak_idx, peak in enumerate(self.experiment.peak_list_data): peak_data = (peak.UID, rounders(peak.rt / 60, "0.000"), f'{rounders(peak.area, "0.000"):,}') self.experiment_peaks.Append(peak_data) self.notebook.AddPage(self.experiment_peaks, "Peak List")
def _populate_table(self): for peak in self.project.rt_alignment.itertuples(): row_data = [] for experiment in peak: rt = rounders(experiment, "0.00000") if rt.is_nan(): rt = "-" row_data.append(rt) row_data[0] = peak.Index if self.n_experiments - Counter( row_data)["-"] >= self.filter_min_experiments: for experiment in row_data[1:]: if experiment != "-": if self.filter_min_rt <= experiment <= self.filter_max_rt: self.alignment_table.Append(row_data) break # As long as one of the peaks is in range, add the peak
def GetValues(self): """ Returns a list of the current contents of the control. :return: :rtype: """ values = [] for i in range(self.m_listCtrl.GetItemCount()): val = self.m_listCtrl.GetItemText(i) if val: if self.decimal_places == -1: # Don't format values.append(Decimal(val)) else: values.append(rounders(val, self._rounders_string)) return values
def _populate_ms_sim_grid(self): if self.selected_peak: peak = self.selected_peak # print(peak.ms_comparison) print(type(peak.ms_comparison)) for samples, sim_score in peak.ms_comparison.items(): # print(samples) left_sample, right_sample = samples.split(" & ") print(left_sample, right_sample) left_sample_idx = self._expr_name_list.index(left_sample) right_sample_idx = self._expr_name_list.index(right_sample) print(left_sample_idx) print(right_sample_idx) rounded_sim_score = str(rounders(sim_score, "0.000")) self.ms_similarity_grid.SetCellValue(left_sample_idx, right_sample_idx, rounded_sim_score) self.ms_similarity_grid.SetCellValue(right_sample_idx, left_sample_idx, rounded_sim_score)
def add_peak_compound(self, peak, show_peak_number=True): col_widths = [ self.inner_width * (2.2 / 20), self.inner_width * (10.6 / 20), self.inner_width * (3 / 20), self.inner_width * (2 / 20), self.inner_width * (2.2 / 20), ] hits_style = TableStyle([ # (x, y) ('FONTSIZE', (0, 0), (-1, -1), font_size), ('ALIGN', (0, 0), (0, -1), 'LEFT'), ('ALIGN', (1, 0), (-1, -1), 'RIGHT'), ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'), ("LINEAFTER", (0, 0), (-2, -1), 0.5, colors.lightgrey), ("LINEBELOW", (0, 0), (-1, 1), 1, colors.black), ("LINEABOVE", (0, 0), (-1, 0), 1, colors.black), ("LINEBELOW", (0, 2), (-1, -1), 0.5, colors.lightgrey), ("SPAN", (0, 0), (1, 0)), ("SPAN", (2, 0), (4, 0)), ]) rt_string = str(rounders(peak.rt / 60, '0.000000')).rjust(15).replace(' ', ' ') area_string = f"{rounders(peak.area, '0.000000'):,}".rjust(25).replace(' ', ' ') peak_no_string = str(peak.peak_number).rjust(4).replace(' ', ' ') if show_peak_number: rt_paragraph = Paragraph( f"Retention Time: {rt_string} minutes{' '*8}Peak Number: {peak_no_string}", styles["Normal"], ) else: rt_paragraph = Paragraph( f"Retention Time: {rt_string} minutes", styles["Normal"], ) rows = [ [ rt_paragraph, '', Paragraph(f"Peak Area: {area_string}", styles["Normal"]), '', '', ], [ Paragraph("Hit Num.", styles["Normal"]), Paragraph("Name", styles["Normal"]), Paragraph("CAS", styles["Right"]), Paragraph("Match", styles["Right"]), Paragraph("R Match", styles["Right"]), ] ] hit_list = list(enumerate(peak.hits)) for hit_number, hit in hit_list: rows.append([ Paragraph(f'{hit_number + 1}', styles["Right"]), Paragraph(f"{hit.name}", styles["Normal"]), Paragraph(f"{hit.cas}", styles["Right"]), Paragraph(f"{hit.match_factor}", styles["Right"]), Paragraph(f"{hit.reverse_match_factor}", styles["Right"]), ]) self.elements.append(Table( rows, colWidths=col_widths, rowHeights=[None] * len(rows), style=hits_style, hAlign="LEFT", repeatRows=2 )) self.elements.append(Spacer(1, cm/2))
def import_processing(jcamp_file, spectrum_csv_file, report_csv_file, combined_csv_file, bb_points = 9, bb_scans = 2, noise_thresh = 2, target_range = (0,120), tophat_struct="1.5m", nistpath = "../MSSEARCH", base_peak_filter = ['73'], ExprDir = "."): global nist_path nist_path = nistpath # Parameters base_peak_filter = [int(x) for x in base_peak_filter] target_range = tuple(target_range) sample_name = os.path.splitext(os.path.basename(jcamp_file))[0] number_of_peaks = 80 data = JCAMP_reader(jcamp_file) # list of all retention times, in seconds times = data.get_time_list() # get Total Ion Chromatogram tic = data.get_tic() # RT Range, time step, no. scans, min, max, mean and median m/z data.info() #data.write("output/data") # save output # Mass Binning im = build_intensity_matrix_i(data) # covnert to intensity matrix #im.get_size() #number of scans, number of bins masses = im.get_mass_list() # list of mass bins print(" Minimum m/z bin: {}".format(im.get_min_mass())) print(" Maximum m/z bin: {}".format(im.get_max_mass())) # Write Binned Mass Spectra to OpenChrom-like CSV file ms = im.get_ms_at_index(0) # first mass spectrum spectrum_csv = open(spectrum_csv_file, 'w') spectrum_csv.write('RT(milliseconds);RT(minutes) - NOT USED BY IMPORT;RI;') spectrum_csv.write(';'.join(str(mz) for mz in ms.mass_list)) spectrum_csv.write("\n") for scan in range(len(times)): spectrum_csv.write("{};{};{};".format(int(times[scan]*1000),rounders((times[scan]/60),"0.0000000000"),0)) ms = im.get_ms_at_index(scan) spectrum_csv.write(';'.join(str(intensity) for intensity in ms.mass_spec)) spectrum_csv.write('\n') spectrum_csv.close() ## Data filtering # Note that Turbomass does not use smoothing for qualitative method. # Top-hat baseline Correction seems to bring down noise, # retaning shapes, but keeps points on actual peaks #dump_object(im, "output/im.dump") # un-processed output n_scan, n_mz = im.get_size() for ii in range(n_mz): #print("\rWorking on IC#", ii+1, ' ',end='') ic = im.get_ic_at_index(ii) ic_smooth = savitzky_golay(ic) ic_bc = tophat(ic_smooth, struct=tophat_struct) im.set_ic_at_index(ii, ic_bc) #dump_object(im, "output/im-proc.dump") # processed output # Peak Detection based on Biller and Biemann, 1974, with a window # of n points, and combining y scans if they apex next to each other peak_list = BillerBiemann(im, points=bb_points, scans=bb_scans) print(" Number of peaks identified before filtering: {}".format(len(peak_list))) # Filtering peak lists with automatic noise filtering noise_level = window_analyzer(tic) peak_list = num_ions_threshold(peak_list, noise_thresh, noise_level) # why use 2 for number of ions above threshold? print(" Number of peaks identified: {}".format(len(peak_list))) # Peak Areas peak_area_list = [] filtered_peak_list = [] for peak in peak_list: apex_mass_list = peak.get_mass_spectrum().mass_list apex_mass_spec = peak.get_mass_spectrum().mass_spec base_peak_intensity = max(apex_mass_spec) base_peak_index = [index for index, intensity in enumerate(apex_mass_spec) if intensity == base_peak_intensity][0] base_peak_mass = apex_mass_list[base_peak_index] #print(base_peak_mass) if base_peak_mass in base_peak_filter: continue # skip the peak if the base peak is at e.g. m/z 73, i.e. septum bleed area = peak_sum_area(im, peak) peak.set_area(area) peak_area_list.append(area) filtered_peak_list.append(peak) # Save the TIC and Peak List tic.write(os.path.join(ExprDir,"{}_tic.dat".format(sample_name)),formatting=False) store_peaks(filtered_peak_list,os.path.join(ExprDir,"{}_peaks.dat".format(sample_name))) # from https://stackoverflow.com/questions/16878715/how-to-find-the-index-of-n-largest-elements-in-a-list-or-np-array-python?lq=1 top_peaks = sorted(range(len(peak_area_list)), key=lambda x: peak_area_list[x]) # Write to turbomass-like CSV file report_csv = open(report_csv_file, "w") # Write to GunShotMatch Combine-like CSV file combine_csv = open(combined_csv_file, "w") combine_csv.write(sample_name) combine_csv.write("\n") report_csv.write("#;RT;Scan;Height;Area\n") combine_csv.write("Retention Time;Peak Area;;Lib;Match;R Match;Name;CAS Number;Scan\n") report_buffer = [] for index in top_peaks: # Peak Number (1-80) peak_number = top_peaks.index(index)+1 # Retention time (minutes, 3dp) RT = rounders(filtered_peak_list[index].get_rt()/60,"0.000") if not target_range[0] < RT <= target_range[1]: continue # skip the peak if it is outside the desired range # scan number, not that we really nead it as the peak object has the spectrum Scan = data.get_index_at_time(filtered_peak_list[index].get_rt())+1 # the binned mass spectrum filtered_peak_list[index].get_mass_spectrum() # TIC intensity, as proxy for Peak height, which should be from baseline Height = '{:,}'.format(rounders(tic.get_intensity_at_index(data.get_index_at_time(filtered_peak_list[index].get_rt())),"0")) # Peak area, originally in "intensity seconds", so dividing by 60 to # get "intensity minutes" like turbomass uses Area = '{:,}'.format(rounders(filtered_peak_list[index].get_area()/60,"0.0")) #report_csv.write("{};{};{};{};{};{}\n".format(peak_number, RT, Scan, Height, Area,bounds)) report_buffer.append([peak_number, RT, Scan, Height, Area]) report_buffer = report_buffer[::-1] # Reverse list order # List of peaks already added to report existing_peaks = [] filtered_report_buffer = [] for row in report_buffer: filtered_report_buffer.append(row) filtered_report_buffer = filtered_report_buffer[:number_of_peaks] filtered_report_buffer.sort(key=operator.itemgetter(2)) for row in filtered_report_buffer: index = filtered_report_buffer.index(row) report_csv.write(";".join([str(i) for i in row])) ms = im.get_ms_at_index(row[2]-1) create_msp("{}_{}".format(sample_name,row[1]),ms.mass_list, ms.mass_spec) matches_dict = nist_ms_comparison("{}_{}".format(sample_name,row[1]),ms.mass_list, ms.mass_spec) combine_csv.write("{};{};Page {} of 80;;;;;;{}\n".format(row[1],row[4],index+1,row[2])) for hit in range(1,6): report_csv.write(str(matches_dict["Hit{}".format(hit)])) report_csv.write(";") combine_csv.write(";;{};{};{};{};{};{};\n".format(hit, matches_dict["Hit{}".format(hit)]["Lib"], matches_dict["Hit{}".format(hit)]["MF"], matches_dict["Hit{}".format(hit)]["RMF"], matches_dict["Hit{}".format(hit)]["Name"], matches_dict["Hit{}".format(hit)]["CAS"], )) report_csv.write("\n") time.sleep(2) report_csv.close() combine_csv.close() # Create an experiment expr = Experiment(sample_name, filtered_peak_list) expr.sele_rt_range(["{}m".format(target_range[0]),"{}m".format(target_range[1])]) store_expr(os.path.join(ExprDir,"{}.expr".format(sample_name)), expr) return 0
def propgrid(self): """ Returns a property for wx.propgrid.PropertyGrid :return: :rtype: """ if isinstance(self.type, Measurement): value = self.value else: value = str(self.value) arguments = dict() property_function = wx.propgrid.StringProperty # TODO: specify rounding format for floats if self.type == str: # String if self.value is None: value = "Not Specified" elif self.type == longstr: # LongString property_function = wx.propgrid.LongStringProperty elif self.type == datetime: # Date and Time value = datetime.datetime.fromtimestamp(self.value).strftime("%d/%m/%Y %H:%M:%S") elif self.type in (int, float): if self.value is None: self.value = -1 value = self.type(self.value) if self.type == int: # Int property_function = wx.propgrid.IntProperty elif self.type == float: # Float property_function = wx.propgrid.FloatProperty elif self.type == Decimal: # Decimal, displayed as a string with requested formatting if self.decimal_format: value = str(rounders(self.value, self.decimal_format)) else: value = str(Decimal(self.value)) elif self.type == format: # Filetype value = lookup_filetype(self.value) elif self.type == dir: # Directory property_function = wx.propgrid.DirProperty elif self.type in {MassRange, rtrange}: value = self.value if self.type == MassRange: # Mass Range property_function = MassRange elif self.type == RTRange: # Retention Time Range property_function = RTRange elif self.type in {list, fixed_list}: if self.editable: arguments = dict( labels=self.dropdown_choices, values=list(range(len(self.dropdown_choices))), ) if self.type == list: # EditEnumProperty # TODO: Finish ComboBoxProperty. wx.propgrid.EditEnumProperty will have to do for now property_function = wx.propgrid.EditEnumProperty elif self.type == fixed_list: # EnumProperty value = self.dropdown_choices.index(self.value) property_function = wx.propgrid.EnumProperty else: if self.value is None: value = "" elif self.type == bool: value = bool(self.value) property_function = wx.propgrid.BoolProperty elif self.type == CalibreProperty: property_function = CalibreProperty elif isinstance(self.type, Measurement): property_function = MeasurementProperty arguments["decimal_places"] = self.type.decimal_places else: return NotImplemented prop = property_function(self.label, self.name, value=value, **arguments) if self.help: prop.SetHelpString(self.help) if self.type == bool: prop.SetAttribute(wx.propgrid.PG_BOOL_USE_CHECKBOX, True) return prop
def identify_compounds(self, rt_alignment, n_hits=10): """ Identify the compounds that produced each of the peaks in the Chromatogram :param rt_alignment: :type rt_alignment: :param n_hits: The number of hits to return from NIST MS Search :type n_hits: int """ print(f"Identifying Compounds for {self.name}") rt_list = rt_alignment[self.name] # tic = self.tic n_peaks = 80 print(rt_list) # Obtain area for each peak peak_area_list = [] for peak in self.peak_list: area = peak.get_area() peak_area_list.append(area) # Write output to CSV file combined_csv_file = os.path.join("/home/domdf/.config/GunShotMatch", "{}_COMBINED.csv".format(self.name)) with open(combined_csv_file, "w") as combine_csv: # Sample name and header row combine_csv.write(f"{self.name}\n{csv_header_row}\n") report_buffer = [] # Filter to those peaks present in all samples, by UID for peak in self.peak_list: # if str(rounders(peak.get_rt()/60,"0.000")) in rt_list: # print(peak.get_rt()/60.0) # TODO: there is a simpler way to do this as part of the DPA functions # DDF 20/11/19 # limit to 10 decimal places as that's what Pandas writes JSON data as; no need for greater precision print(rounders(peak.get_rt() / 60, "0.0000000000")) if rounders(peak.get_rt() / 60, "0.0000000000") in rt_list: print(internal_config.nist_path) report_buffer.append([ '', # rounders(peak.get_rt()/60,"0.000"), (peak.get_rt() / 60), '', peak.get_mass_spectrum(), # '{:,}'.format(rounders(peak.get_area()/60,"0.0")) '{:,}'.format(peak.get_area() / 60), peak ]) # TODO: I thought this was supposed to filter to show the 80 largest peaks, # but I'm not sure it actually does that # DDF 20/11/19 # Reverse list order report_buffer = report_buffer[::-1] # Get last 80 peaks report_buffer = report_buffer[:n_peaks] # Sort by retention time report_buffer.sort(key=operator.itemgetter(1)) # Iterate over peaks for row_idx, row in enumerate(report_buffer): # TODO: some tidying up here; is writing to disk the most efficient? # DDF 20/11/19 # Get mass spectrum ms = row[3] qualified_peak = QualifiedPeak.from_peak(row[5]) # Create MSP file for the peak create_msp("{}_{}".format(self.name, row[1]), ms.mass_list, ms.mass_spec) matches_dict = self.nist_ms_comparison( "{}_{}".format(self.name, row[1]), # ms.mass_list, ms.mass_spec, n_hits) combine_csv.write("{};{};Page {} of 80;;;;;;{}\n".format( row[1], row[4], row_idx + 1, row[2])) for hit in range(1, n_hits + 1): search_result = pyms_nist_search.SearchResult.from_pynist( matches_dict["Hit{}".format(hit)]) combine_csv.write(';;{};{};{};{};{};{};\n'.format( hit, '', # search_result.library, search_result.match_factor, search_result.reverse_match_factor, search_result.name, search_result.cas, )) qualified_peak.hits.append(search_result) time.sleep(2) return 0
def test_rounders(): assert isinstance(utils.rounders(1234.5678, "0.0"), decimal.Decimal) assert str(utils.rounders(1234.5678, "0.0")) == "1234.6"
def test_masses(formula, mass, exact_mass): f = Formula.from_string(formula) print(f) assert rounders(f.mass, "0.00000") == decimal.Decimal(mass) assert rounders(f.exact_mass, "0.00000") == decimal.Decimal(exact_mass)
def test_relative_atomic_masses(): assert rounders(relative_atomic_masses[0], "0.0000") == decimal.Decimal("1.0079")
def add_peak_compound(self, peak): rsd_width = self.inner_width * (1.9 / 25) pm_width = self.inner_width * (0.5 / 25) mf_width = self.inner_width * (1.5 / 25) col_widths = [ self.inner_width * (0.5 / 25), self.inner_width * (1 / 25), # Hit Num. pm_width, rsd_width, self.inner_width * (9 / 25), # Name self.inner_width * (2.8 / 25), # CAS mf_width, # Match pm_width, rsd_width, mf_width, # R Match pm_width, rsd_width, ] # Final column width should be remaining width col_widths.append(self.inner_width - sum(col_widths)) hits_style = TableStyle([ # (x, y) # Whole Table ('FONTSIZE', (0, 0), (-1, -1), font_size), ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'), # Lines ("LINEAFTER", (0, 0), (-2, -1), 0.5, colors.lightgrey), ("LINEBELOW", (0, 0), (-1, 1), 1, colors.black), ("LINEABOVE", (0, 0), (-1, 0), 1, colors.black), ("LINEBELOW", (0, 2), (-1, -1), 0.5, colors.lightgrey), # rt_area table ('ALIGN', (0, 0), (0, -1), 'LEFT'), ('LEFTPADDING', (0, 0), (-1, 0), 0), ('RIGHTPADDING', (0, 0), (-1, 0), 0), ("SPAN", (0, 0), (-1, 0)), # Hit Num ("SPAN", (1, 1), (3, 1)), ('ALIGN', (2, 1), (2, -1), 'CENTER'), ('ALIGN', (1, 1), (1, 1), 'CENTER'), ('RIGHTPADDING', (2, 1), (2, -1), 0), ('LEFTPADDING', (2, 1), (2, -1), 0), # Match ("SPAN", (6, 1), (8, 1)), ('ALIGN', (7, 1), (7, -1), 'CENTER'), ('ALIGN', (6, 1), (6, 1), 'CENTER'), ('RIGHTPADDING', (7, 1), (7, -1), 0), ('LEFTPADDING', (7, 1), (7, -1), 0), # R Match ("SPAN", (9, 1), (11, 1)), ('ALIGN', (10, 1), (10, -1), 'CENTER'), ('ALIGN', (9, 1), (9, 1), 'CENTER'), ('RIGHTPADDING', (10, 1), (10, -1), 0), ('LEFTPADDING', (10, 1), (10, -1), 0), ]) rt_string = str(rounders(peak.rt / 60, '0.000000')) area_string = f"{rounders(peak.area, '0.000000'):,}" peak_no_string = str(peak.peak_number).rjust(4).replace(' ', ' ') rt_area_table_style = TableStyle([ # (x, y) ('FONTSIZE', (0, 0), (-1, -1), font_size), ('ALIGN', (0, 0), (0, -1), 'LEFT'), ('ALIGN', (1, 0), (-1, -1), 'RIGHT'), ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'), ("LINEAFTER", (2, 0), (2, -1), 0.5, colors.lightgrey), ("LINEAFTER", (5, 0), (5, -1), 0.5, colors.lightgrey), ("LINEAFTER", (8, 0), (8, -1), 0.5, colors.lightgrey), ("LINEAFTER", (10, 0), (10, -1), 0.5, colors.lightgrey), ("LINEBELOW", (0, 0), (-1, 0), 0.5, colors.lightgrey), # ("LINEAFTER", (0, 0), (-1, -1), 0.5, colors.lightgrey), # Diagnostic lines ]) rt_area_table_col_widths = [ self.inner_width * (0.5 / 25) + pm_width + rsd_width, # RT / Area label self.inner_width * (1 / 25), # xbar self.inner_width * (5 / 25), # mean pm_width, # ± rsd_width, # rsd col_widths[4] - self.inner_width * (5 / 25) - pm_width - rsd_width, # spacer col_widths[5], # Similarity / Peak No. label self.inner_width * (1 / 25), # xbar / peak no. mf_width, # mean pm_width, # ± rsd_width, # rsd ] # End of No. Experiments column should align with end of `R Match x` rt_area_table_col_widths.append( sum(col_widths[:-1]) - sum(rt_area_table_col_widths)) rt_area_table_col_widths.append(self.inner_width * (1 / 25)), # No. Experiments value # Final column width should be remaining width rt_area_table_col_widths.append(self.inner_width - sum(rt_area_table_col_widths)) rt_area_table_rows = [[ Paragraph(f"Retention Time:", styles["Normal"]), Paragraph(f"{self.xbar} =", styles["Right"]), Paragraph(f"{rt_string} minutes", styles["Right"]), Paragraph(f"±", styles["Center"]), Paragraph(f"{peak.rt_stdev / peak.rt:.2%}", styles["Right"]), '', Paragraph(f"Similarity:", styles["Normal"]), Paragraph(f"{self.xbar} =", styles["Right"]), Paragraph(f"{peak.average_ms_comparison:.1f}", styles["Right"]), Paragraph(f"±", styles["Center"]), Paragraph( f"{peak.ms_comparison_stdev / peak.average_ms_comparison:3.2%}", styles["Right"]), '', '', '', ], [ Paragraph(f"Peak Area:", styles["Normal"]), Paragraph(f"{self.xbar} =", styles["Right"]), Paragraph(f"{area_string}", styles["Right"]), Paragraph(f"±", styles["Center"]), Paragraph( f"{peak.area_stdev / peak.area:.2%}", styles["Right"]), '', Paragraph(f"Peak Number:", styles["Normal"]), Paragraph(f"{peak_no_string}", styles["Right"]), '', '', '', Paragraph(f"No. Experiments:", styles["Normal"]), Paragraph(f"{len(peak)}", styles["Right"]), '', ]] align_repeat = dict(hAlign="LEFT", repeatRows=2) rows = [[ Table( rt_area_table_rows, colWidths=rt_area_table_col_widths, style=rt_area_table_style, rowHeights=[None, None], **align_repeat, ), '', '', '', '', '', ], [ '', Paragraph(f"Hit Num. {self.xbar}", styles["Center"]), '', '', Paragraph("Name", styles["Normal"]), Paragraph("CAS", styles["Center"]), Paragraph(f"Match {self.xbar}", styles["Center"]), '', '', Paragraph(f"R Match {self.xbar}", styles["Center"]), '', '', Paragraph("Freq.", styles["Center"]), ]] hit_list = list(enumerate(peak.hits)) def make_stats_para(mean, stdev): rsd = self.convert_spaces(f"{stdev / mean:3.2%}", 7) return [ Paragraph(f"{mean:.1f}", styles["Right"]), Paragraph("±", styles["Center"]), Paragraph(f"{rsd}", styles["Right"]), ] for hit_number, hit in hit_list: rows.append([ Paragraph(f'{hit_number + 1}', styles["Center"]), *make_stats_para(hit.average_hit_number, hit.hit_number_stdev), Paragraph(f"{hit.name}", styles["Normal"]), Paragraph(f"{hit.cas}", styles["Center"]), *make_stats_para(hit.match_factor, hit.match_factor_stdev), *make_stats_para(hit.reverse_match_factor, hit.reverse_match_factor_stdev), Paragraph(f"{len(hit)}", styles["Center"]), ]) self.elements.append( Table( rows, colWidths=col_widths, rowHeights=[None] * len(rows), style=hits_style, **align_repeat, )) self.elements.append(Spacer(1, cm / 3))