コード例 #1
0
    def __init__(self, left_sample, right_sample, config=None):
        from domdf_python_tools.paths import maybe_make

        if config is None:
            from GSMatch.GSMatch_Core.Config import GSMConfig
            self.config = GSMConfig("config.ini")
        else:
            self.config = config  # GSMConfig object

        maybe_make(os.path.join(self.config.charts_dir, "Comparison"))

        with open(os.path.join(left_sample), "r") as info_file:
            self.left_prefixList = [
                x.rstrip("\r\n") for x in info_file.readlines()
            ]

        with open(os.path.join(right_sample), "r") as info_file:
            self.right_prefixList = [
                x.rstrip("\r\n") for x in info_file.readlines()
            ]

        self.left_sample = os.path.splitext(os.path.split(left_sample)[-1])[0]
        self.right_sample = os.path.splitext(
            os.path.split(right_sample)[-1])[0]

        print(self.left_sample, self.left_prefixList)
        print(self.right_sample, self.right_prefixList)

        self.comparison_name = f"{self.left_sample} v {self.right_sample}"
コード例 #2
0
    def generate_spectra_from_alignment(self, rt_data, ms_data):
        """
		Mass Spectra Images

		:param rt_data:
		:type rt_data:
		:param ms_data:
		:type ms_data:

		:return:
		:rtype:
		"""

        path = os.path.join(self.config.spectra_dir, self.lot_name)
        maybe_make(path)

        print("\nGenerating mass spectra images. Please wait.")

        # Delete Existing Files
        for filename in os.listdir(path):
            os.unlink(os.path.join(path, filename))

        if len(rt_data) > 20:
            arguments = [(sample, rt_data, ms_data, path)
                         for sample in rt_data.columns.values]
            with Pool(len(arguments)) as p:

                p.map(self.spectrum_image_wrapper, arguments)
        else:
            for sample in rt_data.columns.values:
                self.generate_spectrum_image(sample, rt_data, ms_data, path)
コード例 #3
0
ファイル: Config.py プロジェクト: domdfcoding/GunShotMatch
    def log_dir(self, value):
        """
		Sets the directory where log files will be stored.
		The directory will be created if it does not already exist.

		:param value:
		:type value: str or :class:`pathlib.Path`
		"""

        self._log_dir = str(relpath2(value))
        maybe_make(self._log_dir)
コード例 #4
0
ファイル: Config.py プロジェクト: domdfcoding/GunShotMatch
    def msp_dir(self, value):
        """
		Sets the directory where MSP files for NIST MS Search will be stored.
		The directory will be created if it does not already exist.

		:param value:
		:type value: str or :class:`pathlib.Path`
		"""

        self._msp_dir = str(relpath2(value))
        maybe_make(self._msp_dir)
コード例 #5
0
ファイル: Config.py プロジェクト: domdfcoding/GunShotMatch
	def log_dir(self, value):
		"""
		Sets the directory where log files will be stored.
		The directory will be created if it does not already exist.

		:type value: str or pathlib.Path
		"""
		
		if value:
			if not isinstance(value, pathlib.Path):
				value = pathlib.Path(value)
			
			self._log_dir = value
			maybe_make(self._log_dir)
コード例 #6
0
ファイル: Config.py プロジェクト: domdfcoding/GunShotMatch
	def msp_dir(self, value):
		"""
		Sets the directory where MSP files for NIST MS Search will be stored.
		The directory will be created if it does not already exist.

		:type value: str or pathlib.Path
		"""
		
		if value:
			if not isinstance(value, pathlib.Path):
				value = pathlib.Path(value)
				
			self._msp_dir = value
			maybe_make(self._msp_dir)
コード例 #7
0
def test_maybe_make_parents(tmp_pathplus):
    test_dir = tmp_pathplus / "maybe_make" / "child1" / "child2"

    assert test_dir.exists() is False

    # Without parents=True should raise an error

    with pytest.raises(FileNotFoundError):
        paths.maybe_make(test_dir)

    # Maybe make the directory
    paths.maybe_make(test_dir, parents=True)

    assert test_dir.exists()
コード例 #8
0
ファイル: Config.py プロジェクト: domdfcoding/GunShotMatch
	def __load_defaults(self):
		# Touch file
		maybe_make(self._configfile.parent)
		
		self._configfile.write_text("""
[main]

[paths]

[recent_projects]

[charts]
		""")
		
		self._load_from_file()
		return
コード例 #9
0
def test_maybe_make_string(tmp_pathplus):
    test_dir = tmp_pathplus / "maybe_make"

    assert test_dir.exists() is False

    # Maybe make the directory
    paths.maybe_make(str(test_dir))

    assert test_dir.exists()

    # Maybe make the directory
    paths.maybe_make(str(test_dir))

    assert test_dir.exists()

    # Delete the directory and replace with a file
    test_dir.rmdir()
    assert not test_dir.exists()
    test_dir.touch()
    assert test_dir.exists()
    assert test_dir.is_file()

    paths.maybe_make(str(test_dir))
    assert test_dir.exists()
    assert test_dir.is_file()
コード例 #10
0
def init_repo(repo_path: pathlib.Path, templates: Environment) -> List[str]:
    """
	Initialise a new repository, creating the necessary files to get started.

	:param repo_path: Path to the repository root.
	:param templates:
	"""

    repo_path = PathPlus(repo_path)
    templates.globals["len"] = len

    init_repo_templates = Environment(  # nosec: B701
        loader=jinja2.FileSystemLoader(str(init_repo_template_dir)),
        undefined=jinja2.StrictUndefined,
    )
    init_repo_templates.globals.update(templates.globals)

    # package
    (repo_path / templates.globals["import_name"]).maybe_make()

    repo_license = templates.globals["license"]
    if repo_license in license_init_file_lookup:
        __init__ = init_repo_templates.get_template(
            f"{license_init_file_lookup[repo_license]}._py")
    else:
        __init__ = init_repo_templates.get_template("generic._py")

    __init__path = repo_path / templates.globals["import_name"] / "__init__.py"
    __init__path.write_clean(__init__.render())

    # tests
    if templates.globals["enable_tests"]:
        maybe_make(repo_path / templates.globals["tests_dir"])
        (repo_path / templates.globals["tests_dir"] / "__init__.py").touch()
        (repo_path / templates.globals["tests_dir"] /
         "requirements.txt").touch()

    # docs
    docs_files: Sequence[str]

    if templates.globals["enable_docs"]:
        docs_files = enable_docs(repo_path, templates, init_repo_templates)
    else:
        docs_files = ()

    # other
    for filename in {"README.rst"}:
        template = init_repo_templates.get_template(filename)
        (repo_path / filename).write_clean(template.render())

    (repo_path / "LICENSE").write_clean(
        get_license_text(
            repo_license,
            copyright_years=datetime.datetime.today().year,
            author=templates.globals["author"],
            project_name=templates.globals["modname"],
        ))

    # Touch requirements file
    (repo_path / "requirements.txt").touch()

    return [
        posixpath.join(templates.globals["import_name"], "__init__.py"),
        posixpath.join(templates.globals["tests_dir"], "__init__.py"),
        *docs_files,
        posixpath.join(templates.globals["tests_dir"], "requirements.txt"),
        "requirements.txt",
        "LICENSE",
        "README.rst",
    ]
コード例 #11
0
def hit_data(samples):
    # print(request.args)
    samples = samples.split("/")

    maybe_make("cache")  # Internal Cache Directory

    if "data" in request.args:
        # print(request.args.get("data", type=str))
        hit = ConsolidatedSearchResult.from_quoted_string(
            request.args.get("data", type=str))
        # print(hit)

        CAS = hit.cas
        Name = hit.name

        if CAS.replace("-", '').replace("0", '') == '':
            # CAS Number is all zeros
            pickle_name = hashlib.md5(Name.encode("utf-8")).hexdigest()
            # html_file_name = os.path.join(html_file_directory, f"{pickle_name}_{rt}.html")

            if (cache_dir / pickle_name).exists():
                with open(cache_dir / pickle_name, "rb") as f:
                    comp = pickle.load(f)
            else:
                # Check that a connection an be established to PubChem server
                try:
                    urllib.request.urlopen(API_BASE, timeout=2)

                except urllib.error.HTTPError as e:
                    if e.code == 400:
                        pass
                    else:
                        raise e

                except urllib.error.URLError:
                    warnings.warn(
                        "Unable to connect to PubChem server. Check your internet connection and try again."
                    )
                    return render_template(
                        "properties_template_offline.html",
                        hit=hit,
                        samples=samples,
                    )

                try:
                    comp = get_compounds(CAS, 'name')[0]
                except IndexError:
                    comp = None

                # Save to cache
                with open(cache_dir / pickle_name, "wb") as f:
                    pickle.dump(comp, f)

        else:
            if (cache_dir / CAS).exists():
                with open(cache_dir / CAS, "rb") as f:
                    comp = pickle.load(f)
            else:
                # if True:
                try:
                    comp = get_compounds(CAS, 'name')[0]
                except IndexError:
                    comp = None

                # Save to cache
                with open(cache_dir / CAS, "wb") as f:
                    pickle.dump(comp, f)

        # TODO: Combine information from hit.reference_data and comp, e.g. synonyms

        return render_template(
            "properties_template_2.html",
            comp=comp,
            hit=hit,
            samples=samples,
        )

    else:
        # Legacy mode

        index = request.args.get("index", 0, type=int)
        filename = request.args.get("filename", '', type=str)

        peak_data = []

        if filename == '':
            return "Please provide a filename with ?filename=", 400

        with open(os.path.join(filename), "r") as jsonfile:
            for i, peak in enumerate(jsonfile):
                if i == index:
                    peak_data = json.loads(peak)

        if not peak_data:
            # Index was out of range
            return "Peak index out of range", 400

        CAS = peak_data["hits"][0]["CAS"]
        Name = peak_data["hits"][0]["Name"]
        rt = peak_data["average_rt"]

        if CAS.replace("-", '').replace("0", '') == '':
            # CAS Number is all zeros
            pickle_name = hashlib.md5(Name.encode("utf-8")).hexdigest()
            # html_file_name = os.path.join(html_file_directory, f"{pickle_name}_{rt}.html")

            if (cache_dir / pickle_name).exists():
                with open(cache_dir / pickle_name, "rb") as f:
                    comp = pickle.load(f)
            else:
                # if True:
                comp = get_compounds(Name, 'name')[0]
                # Save to cache
                with open(cache_dir / pickle_name, "wb") as f:
                    pickle.dump(comp, f)

        else:
            if (cache_dir / CAS).exists():
                with open(cache_dir / CAS, "rb") as f:
                    comp = pickle.load(f)
            else:
                # if True:
                comp = get_compounds(CAS, 'name')[0]
                # Save to cache
                with open(cache_dir / CAS, "wb") as f:
                    pickle.dump(comp, f)

        return render_template(
            "properties_template.html",
            comp=comp,
            data=peak_data,
            samples=samples,
        )
コード例 #12
0
    def run(self):
        # Indicate which steps to perform
        print(f"do_qualitative: {self.config.do_qualitative}")
        print(f"do_merge: {self.config.do_merge}")
        print(f"do_counter: {self.config.do_counter}")
        print(f"do_spectra: {self.config.do_spectra}")
        print(f"do_charts: {self.config.do_charts}")

        # Loads the experiment file created during Quantitative Processing
        for prefix in self.config.prefixList:
            file_name = os.path.join(self.config.expr_dir, prefix + ".expr")
            self.expr_list.append(load_expr(file_name))

        if self.config.do_qualitative:
            print("Qualitative Processing in Progress...")
            for prefix in self.config.prefixList:
                # print(list(rt_alignment[prefix]))
                self.qualitative_processing(prefix, list(rt_alignment[prefix]))

        if self.config.do_merge:
            self.merge()

        if self.config.do_counter:
            chart_data = self.match_counter(self.ms_comparisons(ms_alignment))
            chart_data = chart_data.set_index("Compound", drop=True)

            # remove duplicate compounds:
            # chart_data_count = Counter(chart_data["Compound"])
            chart_data_count = Counter(chart_data.index)
            replacement_data = {
                "Compound": [],
                f"{self.lot_name} Peak Area": [],
                f"{self.lot_name} Standard Deviation": []
            }

            for prefix in self.config.prefixList:
                replacement_data[prefix] = []

            for compound in chart_data_count:
                if chart_data_count[compound] > 1:
                    replacement_data["Compound"].append(compound)
                    replacement_data[f"{self.lot_name} Peak Area"].append(
                        sum(chart_data.loc[compound,
                                           f"{self.lot_name} Peak Area"]))

                    peak_data = []
                    for prefix in self.config.prefixList:
                        replacement_data[prefix].append(
                            sum(chart_data.loc[compound, prefix]))
                        peak_data.append(sum(chart_data.loc[compound, prefix]))

                    replacement_data[
                        f"{self.lot_name} Standard Deviation"].append(
                            numpy.std(peak_data))

                    chart_data = chart_data.drop(compound, axis=0)

            replacement_data = pandas.DataFrame(replacement_data)
            replacement_data = replacement_data.set_index("Compound",
                                                          drop=False)
            chart_data = chart_data.append(replacement_data, sort=False)
            chart_data.sort_index(inplace=True)
            chart_data = chart_data.drop("Compound", axis=1)
            chart_data['Compound Names'] = chart_data.index

            chart_data.to_csv(os.path.join(
                self.config.csv_dir,
                "{}_CHART_DATA.csv".format(self.lot_name)),
                              sep=";")
        else:
            chart_data = pandas.read_csv(os.path.join(
                self.config.csv_dir,
                "{}_CHART_DATA.csv".format(self.lot_name)),
                                         sep=";",
                                         index_col=0)

        # chart_data = chart_data.set_index("Compound", drop=True)

        if self.config.do_spectra:
            self.generate_spectra_from_alignment(rt_alignment, ms_alignment)

            # Write Mass Spectra to OpenChrom-like CSV files

            def generate_spectra_csv(rt_data, ms_data, name):
                # Write Mass Spectra to OpenChrom-like CSV files

                ms = ms_data[0]  # first mass spectrum

                spectrum_csv_file = os.path.join(self.config.spectra_dir,
                                                 self.lot_name,
                                                 f"{name}_data.csv")
                spectrum_csv = open(spectrum_csv_file, 'w')
                spectrum_csv.write(
                    'RT(milliseconds);RT(minutes) - NOT USED BY IMPORT;RI;')
                spectrum_csv.write(';'.join(str(mz) for mz in ms.mass_list))
                spectrum_csv.write("\n")

                for rt, ms in zip(rt_data, ms_data):
                    spectrum_csv.write(
                        f"{int(rt * 60000)};{rounders(rt, '0.0000000000')};0;")
                    spectrum_csv.write(';'.join(
                        str(intensity) for intensity in ms.mass_spec))
                    spectrum_csv.write('\n')
                spectrum_csv.close()

            for prefix in self.config.prefixList:
                print(prefix)
                # print(rt_alignment[prefix])
                # print(ms_alignment[prefix])
                generate_spectra_csv(rt_alignment[prefix],
                                     ms_alignment[prefix], prefix)

        if self.config.do_charts:
            print("\nGenerating Charts")

            chart_data.to_csv(os.path.join(
                self.config.csv_dir,
                "{}_CHART_DATA.csv".format(self.lot_name)),
                              sep=";")

            maybe_make(os.path.join(self.config.charts_dir, self.lot_name))

            if chart_data.empty:
                print("ALERT: No peaks were found for compounds that have")
                print("       previously been reported in literature.")
                print("       Check the results for more information\n")

            else:
                from GSMatch.GSMatch_Core.charts import box_whisker_wrapper, radar_chart_wrapper, \
                 mean_peak_area_wrapper, \
                 peak_area_wrapper

                # from GSMatch.GSMatch_Core.charts import peak_area_wrapper, radar_chart_wrapper

                radar_chart_wrapper(chart_data, [self.lot_name],
                                    use_log=10,
                                    legend=False,
                                    mode=os.path.join(self.config.charts_dir,
                                                      self.lot_name,
                                                      "radar_log10_peak_area"))
                radar_chart_wrapper(chart_data, [self.lot_name],
                                    use_log=False,
                                    legend=False,
                                    mode=os.path.join(self.config.charts_dir,
                                                      self.lot_name,
                                                      "radar_peak_area"))
                mean_peak_area_wrapper(chart_data, [self.lot_name],
                                       mode=os.path.join(
                                           self.config.charts_dir,
                                           self.lot_name, "mean_peak_area"))
                peak_area_wrapper(chart_data,
                                  self.lot_name,
                                  self.config.prefixList,
                                  mode=os.path.join(self.config.charts_dir,
                                                    self.lot_name,
                                                    "peak_area_percentage"))
                peak_area_wrapper(chart_data,
                                  self.lot_name,
                                  self.config.prefixList,
                                  percentage=False,
                                  mode=os.path.join(self.config.charts_dir,
                                                    self.lot_name,
                                                    "peak_area"))
                peak_area_wrapper(chart_data,
                                  self.lot_name,
                                  self.config.prefixList,
                                  use_log=10,
                                  mode=os.path.join(
                                      self.config.charts_dir, self.lot_name,
                                      "log10_peak_area_percentage"))

                samples_to_compare = [(self.lot_name, self.config.prefixList)]

                box_whisker_wrapper(chart_data,
                                    samples_to_compare,
                                    mode=os.path.join(self.config.charts_dir,
                                                      self.lot_name,
                                                      "box_whisker"))

        with open(
                os.path.join(self.config.results_dir, f"{self.lot_name}.info"),
                "w") as info_file:
            for prefix in self.config.prefixList:
                info_file.write(f"{prefix}\n")

        # TODO: self.make_archive()

        pynist.reload_ini(self.config.nist_path)

        print("\nComplete.")