def nist_ms_comparison(sample_name, mass_list, mass_spec):
	data_dict = {}

	try:
		pynist.generate_ini(nist_path, "mainlib", 5)
		
		def remove_chars(input_string):
			return input_string.replace("Hit 1  : ","").replace("Hit 2  : ","")\
				.replace("Hit 3","").replace("Hit 4","")\
				.replace("Hit 5","").replace("MF:","")\
				.replace(":","").replace("<","").replace(">","")\
				.replace(" ","").replace(sample_name,"")
		
		
		raw_output = pynist.nist_db_connector(nist_path,"MSP/{}.MSP".format(sample_name))
		
		# Process output
		for i in range(1,6):
			raw_output = raw_output.replace("Hit {}  : ".format(i),"Hit{};".format(i))
		raw_output = raw_output.replace("<<",'"').replace(">>",'"').split("\n")
		
		#for row in raw_output:
		#	print(row)

		matches_dict = {}
		
		for i in range(1,6):
			row = raw_output[i].split(";")
			matches_dict[row[0]] = {"Name":row[1], "MF":(row[3].replace("MF:",'').replace(" ",'')), "RMF":(row[4].replace("RMF:",'').replace(" ",'')), "CAS":(row[6].replace("CAS:",'').replace(" ",'')),"Lib":(row[8].replace("Lib:",'').replace(" ",''))}
		
		#for match in matches_dict:
		#	print(match)
		#	print(matches_dict[match])
		

	except:
		traceback.print_exc()	#print the error
		pynist.reload_ini(nist_path)
		sys.exit(1)
	
	print("\n")
	pynist.reload_ini(nist_path)
	return matches_dict
Beispiel #2
0
    def nist_ms_comparison(self, sample_name, n_hits=5):
        """
		
		:param sample_name:
		:type sample_name:
		:param n_hits:
		:type n_hits:

		:return:
		:rtype:
		"""

        # data_dict = {}

        try:
            pynist.generate_ini(internal_config.nist_path, "mainlib", n_hits)

            # def remove_chars(input_string):
            # 	for i in range(n_hits + 1):
            # 		input_string = input_string.replace("Hit {}  : ", "")
            #
            # 	return input_string.replace("MF:", "") \
            # 		.replace(":", "").replace("<", "").replace(">", "") \
            # 		.replace(" ", "").replace(self.name, "")

            raw_output = pynist.nist_db_connector(
                internal_config.nist_path,
                os.path.join(internal_config.msp_dir,
                             "{}.MSP".format(sample_name)))

            # Process output
            for i in range(n_hits + 1):
                raw_output = raw_output.replace("Hit {}  : ".format(i), "Hit{};".format(i)) \
                 .replace("Hit {} : ".format(i), "Hit{};".format(i)) \
                 .replace("Hit {}: ".format(i), "Hit{};".format(i))

            raw_output = raw_output.replace("<<", '"').replace(">>",
                                                               '"').split("\n")

            matches_dict = {}

            for i in range(1, n_hits + 1):
                row = list(
                    csv.reader([raw_output[i]], delimiter=";",
                               quotechar='"'))[0]

                matches_dict[row[0]] = {
                    "Name": row[1],
                    "MF": (row[3].replace("MF:", '').replace(" ", '')),
                    "RMF": (row[4].replace("RMF:", '').replace(" ", '')),
                    "CAS": (row[6].replace("CAS:", '').replace(" ", '')),
                    # "Lib": (row[8].replace("Lib:", '').replace(" ", ''))
                }

        except:
            traceback.print_exc()  # print the error
            pynist.reload_ini(internal_config.nist_path)
            sys.exit(1)

        print("\r\033[KSearch Complete")  # , end='')
        pynist.reload_ini(internal_config.nist_path)
        return matches_dict
Beispiel #3
0
    def run(self):
        # Indicate which steps to perform
        print(f"do_qualitative: {self.config.do_qualitative}")
        print(f"do_merge: {self.config.do_merge}")
        print(f"do_counter: {self.config.do_counter}")
        print(f"do_spectra: {self.config.do_spectra}")
        print(f"do_charts: {self.config.do_charts}")

        # Loads the experiment file created during Quantitative Processing
        for prefix in self.config.prefixList:
            file_name = os.path.join(self.config.expr_dir, prefix + ".expr")
            self.expr_list.append(load_expr(file_name))

        if self.config.do_qualitative:
            print("Qualitative Processing in Progress...")
            for prefix in self.config.prefixList:
                # print(list(rt_alignment[prefix]))
                self.qualitative_processing(prefix, list(rt_alignment[prefix]))

        if self.config.do_merge:
            self.merge()

        if self.config.do_counter:
            chart_data = self.match_counter(self.ms_comparisons(ms_alignment))
            chart_data = chart_data.set_index("Compound", drop=True)

            # remove duplicate compounds:
            # chart_data_count = Counter(chart_data["Compound"])
            chart_data_count = Counter(chart_data.index)
            replacement_data = {
                "Compound": [],
                f"{self.lot_name} Peak Area": [],
                f"{self.lot_name} Standard Deviation": []
            }

            for prefix in self.config.prefixList:
                replacement_data[prefix] = []

            for compound in chart_data_count:
                if chart_data_count[compound] > 1:
                    replacement_data["Compound"].append(compound)
                    replacement_data[f"{self.lot_name} Peak Area"].append(
                        sum(chart_data.loc[compound,
                                           f"{self.lot_name} Peak Area"]))

                    peak_data = []
                    for prefix in self.config.prefixList:
                        replacement_data[prefix].append(
                            sum(chart_data.loc[compound, prefix]))
                        peak_data.append(sum(chart_data.loc[compound, prefix]))

                    replacement_data[
                        f"{self.lot_name} Standard Deviation"].append(
                            numpy.std(peak_data))

                    chart_data = chart_data.drop(compound, axis=0)

            replacement_data = pandas.DataFrame(replacement_data)
            replacement_data = replacement_data.set_index("Compound",
                                                          drop=False)
            chart_data = chart_data.append(replacement_data, sort=False)
            chart_data.sort_index(inplace=True)
            chart_data = chart_data.drop("Compound", axis=1)
            chart_data['Compound Names'] = chart_data.index

            chart_data.to_csv(os.path.join(
                self.config.csv_dir,
                "{}_CHART_DATA.csv".format(self.lot_name)),
                              sep=";")
        else:
            chart_data = pandas.read_csv(os.path.join(
                self.config.csv_dir,
                "{}_CHART_DATA.csv".format(self.lot_name)),
                                         sep=";",
                                         index_col=0)

        # chart_data = chart_data.set_index("Compound", drop=True)

        if self.config.do_spectra:
            self.generate_spectra_from_alignment(rt_alignment, ms_alignment)

            # Write Mass Spectra to OpenChrom-like CSV files

            def generate_spectra_csv(rt_data, ms_data, name):
                # Write Mass Spectra to OpenChrom-like CSV files

                ms = ms_data[0]  # first mass spectrum

                spectrum_csv_file = os.path.join(self.config.spectra_dir,
                                                 self.lot_name,
                                                 f"{name}_data.csv")
                spectrum_csv = open(spectrum_csv_file, 'w')
                spectrum_csv.write(
                    'RT(milliseconds);RT(minutes) - NOT USED BY IMPORT;RI;')
                spectrum_csv.write(';'.join(str(mz) for mz in ms.mass_list))
                spectrum_csv.write("\n")

                for rt, ms in zip(rt_data, ms_data):
                    spectrum_csv.write(
                        f"{int(rt * 60000)};{rounders(rt, '0.0000000000')};0;")
                    spectrum_csv.write(';'.join(
                        str(intensity) for intensity in ms.mass_spec))
                    spectrum_csv.write('\n')
                spectrum_csv.close()

            for prefix in self.config.prefixList:
                print(prefix)
                # print(rt_alignment[prefix])
                # print(ms_alignment[prefix])
                generate_spectra_csv(rt_alignment[prefix],
                                     ms_alignment[prefix], prefix)

        if self.config.do_charts:
            print("\nGenerating Charts")

            chart_data.to_csv(os.path.join(
                self.config.csv_dir,
                "{}_CHART_DATA.csv".format(self.lot_name)),
                              sep=";")

            maybe_make(os.path.join(self.config.charts_dir, self.lot_name))

            if chart_data.empty:
                print("ALERT: No peaks were found for compounds that have")
                print("       previously been reported in literature.")
                print("       Check the results for more information\n")

            else:
                from GSMatch.GSMatch_Core.charts import box_whisker_wrapper, radar_chart_wrapper, \
                 mean_peak_area_wrapper, \
                 peak_area_wrapper

                # from GSMatch.GSMatch_Core.charts import peak_area_wrapper, radar_chart_wrapper

                radar_chart_wrapper(chart_data, [self.lot_name],
                                    use_log=10,
                                    legend=False,
                                    mode=os.path.join(self.config.charts_dir,
                                                      self.lot_name,
                                                      "radar_log10_peak_area"))
                radar_chart_wrapper(chart_data, [self.lot_name],
                                    use_log=False,
                                    legend=False,
                                    mode=os.path.join(self.config.charts_dir,
                                                      self.lot_name,
                                                      "radar_peak_area"))
                mean_peak_area_wrapper(chart_data, [self.lot_name],
                                       mode=os.path.join(
                                           self.config.charts_dir,
                                           self.lot_name, "mean_peak_area"))
                peak_area_wrapper(chart_data,
                                  self.lot_name,
                                  self.config.prefixList,
                                  mode=os.path.join(self.config.charts_dir,
                                                    self.lot_name,
                                                    "peak_area_percentage"))
                peak_area_wrapper(chart_data,
                                  self.lot_name,
                                  self.config.prefixList,
                                  percentage=False,
                                  mode=os.path.join(self.config.charts_dir,
                                                    self.lot_name,
                                                    "peak_area"))
                peak_area_wrapper(chart_data,
                                  self.lot_name,
                                  self.config.prefixList,
                                  use_log=10,
                                  mode=os.path.join(
                                      self.config.charts_dir, self.lot_name,
                                      "log10_peak_area_percentage"))

                samples_to_compare = [(self.lot_name, self.config.prefixList)]

                box_whisker_wrapper(chart_data,
                                    samples_to_compare,
                                    mode=os.path.join(self.config.charts_dir,
                                                      self.lot_name,
                                                      "box_whisker"))

        with open(
                os.path.join(self.config.results_dir, f"{self.lot_name}.info"),
                "w") as info_file:
            for prefix in self.config.prefixList:
                info_file.write(f"{prefix}\n")

        # TODO: self.make_archive()

        pynist.reload_ini(self.config.nist_path)

        print("\nComplete.")