def __init__(self, left_sample, right_sample, config=None): from domdf_python_tools.paths import maybe_make if config is None: from GSMatch.GSMatch_Core.Config import GSMConfig self.config = GSMConfig("config.ini") else: self.config = config # GSMConfig object maybe_make(os.path.join(self.config.charts_dir, "Comparison")) with open(os.path.join(left_sample), "r") as info_file: self.left_prefixList = [ x.rstrip("\r\n") for x in info_file.readlines() ] with open(os.path.join(right_sample), "r") as info_file: self.right_prefixList = [ x.rstrip("\r\n") for x in info_file.readlines() ] self.left_sample = os.path.splitext(os.path.split(left_sample)[-1])[0] self.right_sample = os.path.splitext( os.path.split(right_sample)[-1])[0] print(self.left_sample, self.left_prefixList) print(self.right_sample, self.right_prefixList) self.comparison_name = f"{self.left_sample} v {self.right_sample}"
def generate_spectra_from_alignment(self, rt_data, ms_data): """ Mass Spectra Images :param rt_data: :type rt_data: :param ms_data: :type ms_data: :return: :rtype: """ path = os.path.join(self.config.spectra_dir, self.lot_name) maybe_make(path) print("\nGenerating mass spectra images. Please wait.") # Delete Existing Files for filename in os.listdir(path): os.unlink(os.path.join(path, filename)) if len(rt_data) > 20: arguments = [(sample, rt_data, ms_data, path) for sample in rt_data.columns.values] with Pool(len(arguments)) as p: p.map(self.spectrum_image_wrapper, arguments) else: for sample in rt_data.columns.values: self.generate_spectrum_image(sample, rt_data, ms_data, path)
def log_dir(self, value): """ Sets the directory where log files will be stored. The directory will be created if it does not already exist. :param value: :type value: str or :class:`pathlib.Path` """ self._log_dir = str(relpath2(value)) maybe_make(self._log_dir)
def msp_dir(self, value): """ Sets the directory where MSP files for NIST MS Search will be stored. The directory will be created if it does not already exist. :param value: :type value: str or :class:`pathlib.Path` """ self._msp_dir = str(relpath2(value)) maybe_make(self._msp_dir)
def log_dir(self, value): """ Sets the directory where log files will be stored. The directory will be created if it does not already exist. :type value: str or pathlib.Path """ if value: if not isinstance(value, pathlib.Path): value = pathlib.Path(value) self._log_dir = value maybe_make(self._log_dir)
def msp_dir(self, value): """ Sets the directory where MSP files for NIST MS Search will be stored. The directory will be created if it does not already exist. :type value: str or pathlib.Path """ if value: if not isinstance(value, pathlib.Path): value = pathlib.Path(value) self._msp_dir = value maybe_make(self._msp_dir)
def test_maybe_make_parents(tmp_pathplus): test_dir = tmp_pathplus / "maybe_make" / "child1" / "child2" assert test_dir.exists() is False # Without parents=True should raise an error with pytest.raises(FileNotFoundError): paths.maybe_make(test_dir) # Maybe make the directory paths.maybe_make(test_dir, parents=True) assert test_dir.exists()
def __load_defaults(self): # Touch file maybe_make(self._configfile.parent) self._configfile.write_text(""" [main] [paths] [recent_projects] [charts] """) self._load_from_file() return
def test_maybe_make_string(tmp_pathplus): test_dir = tmp_pathplus / "maybe_make" assert test_dir.exists() is False # Maybe make the directory paths.maybe_make(str(test_dir)) assert test_dir.exists() # Maybe make the directory paths.maybe_make(str(test_dir)) assert test_dir.exists() # Delete the directory and replace with a file test_dir.rmdir() assert not test_dir.exists() test_dir.touch() assert test_dir.exists() assert test_dir.is_file() paths.maybe_make(str(test_dir)) assert test_dir.exists() assert test_dir.is_file()
def init_repo(repo_path: pathlib.Path, templates: Environment) -> List[str]: """ Initialise a new repository, creating the necessary files to get started. :param repo_path: Path to the repository root. :param templates: """ repo_path = PathPlus(repo_path) templates.globals["len"] = len init_repo_templates = Environment( # nosec: B701 loader=jinja2.FileSystemLoader(str(init_repo_template_dir)), undefined=jinja2.StrictUndefined, ) init_repo_templates.globals.update(templates.globals) # package (repo_path / templates.globals["import_name"]).maybe_make() repo_license = templates.globals["license"] if repo_license in license_init_file_lookup: __init__ = init_repo_templates.get_template( f"{license_init_file_lookup[repo_license]}._py") else: __init__ = init_repo_templates.get_template("generic._py") __init__path = repo_path / templates.globals["import_name"] / "__init__.py" __init__path.write_clean(__init__.render()) # tests if templates.globals["enable_tests"]: maybe_make(repo_path / templates.globals["tests_dir"]) (repo_path / templates.globals["tests_dir"] / "__init__.py").touch() (repo_path / templates.globals["tests_dir"] / "requirements.txt").touch() # docs docs_files: Sequence[str] if templates.globals["enable_docs"]: docs_files = enable_docs(repo_path, templates, init_repo_templates) else: docs_files = () # other for filename in {"README.rst"}: template = init_repo_templates.get_template(filename) (repo_path / filename).write_clean(template.render()) (repo_path / "LICENSE").write_clean( get_license_text( repo_license, copyright_years=datetime.datetime.today().year, author=templates.globals["author"], project_name=templates.globals["modname"], )) # Touch requirements file (repo_path / "requirements.txt").touch() return [ posixpath.join(templates.globals["import_name"], "__init__.py"), posixpath.join(templates.globals["tests_dir"], "__init__.py"), *docs_files, posixpath.join(templates.globals["tests_dir"], "requirements.txt"), "requirements.txt", "LICENSE", "README.rst", ]
def hit_data(samples): # print(request.args) samples = samples.split("/") maybe_make("cache") # Internal Cache Directory if "data" in request.args: # print(request.args.get("data", type=str)) hit = ConsolidatedSearchResult.from_quoted_string( request.args.get("data", type=str)) # print(hit) CAS = hit.cas Name = hit.name if CAS.replace("-", '').replace("0", '') == '': # CAS Number is all zeros pickle_name = hashlib.md5(Name.encode("utf-8")).hexdigest() # html_file_name = os.path.join(html_file_directory, f"{pickle_name}_{rt}.html") if (cache_dir / pickle_name).exists(): with open(cache_dir / pickle_name, "rb") as f: comp = pickle.load(f) else: # Check that a connection an be established to PubChem server try: urllib.request.urlopen(API_BASE, timeout=2) except urllib.error.HTTPError as e: if e.code == 400: pass else: raise e except urllib.error.URLError: warnings.warn( "Unable to connect to PubChem server. Check your internet connection and try again." ) return render_template( "properties_template_offline.html", hit=hit, samples=samples, ) try: comp = get_compounds(CAS, 'name')[0] except IndexError: comp = None # Save to cache with open(cache_dir / pickle_name, "wb") as f: pickle.dump(comp, f) else: if (cache_dir / CAS).exists(): with open(cache_dir / CAS, "rb") as f: comp = pickle.load(f) else: # if True: try: comp = get_compounds(CAS, 'name')[0] except IndexError: comp = None # Save to cache with open(cache_dir / CAS, "wb") as f: pickle.dump(comp, f) # TODO: Combine information from hit.reference_data and comp, e.g. synonyms return render_template( "properties_template_2.html", comp=comp, hit=hit, samples=samples, ) else: # Legacy mode index = request.args.get("index", 0, type=int) filename = request.args.get("filename", '', type=str) peak_data = [] if filename == '': return "Please provide a filename with ?filename=", 400 with open(os.path.join(filename), "r") as jsonfile: for i, peak in enumerate(jsonfile): if i == index: peak_data = json.loads(peak) if not peak_data: # Index was out of range return "Peak index out of range", 400 CAS = peak_data["hits"][0]["CAS"] Name = peak_data["hits"][0]["Name"] rt = peak_data["average_rt"] if CAS.replace("-", '').replace("0", '') == '': # CAS Number is all zeros pickle_name = hashlib.md5(Name.encode("utf-8")).hexdigest() # html_file_name = os.path.join(html_file_directory, f"{pickle_name}_{rt}.html") if (cache_dir / pickle_name).exists(): with open(cache_dir / pickle_name, "rb") as f: comp = pickle.load(f) else: # if True: comp = get_compounds(Name, 'name')[0] # Save to cache with open(cache_dir / pickle_name, "wb") as f: pickle.dump(comp, f) else: if (cache_dir / CAS).exists(): with open(cache_dir / CAS, "rb") as f: comp = pickle.load(f) else: # if True: comp = get_compounds(CAS, 'name')[0] # Save to cache with open(cache_dir / CAS, "wb") as f: pickle.dump(comp, f) return render_template( "properties_template.html", comp=comp, data=peak_data, samples=samples, )
def run(self): # Indicate which steps to perform print(f"do_qualitative: {self.config.do_qualitative}") print(f"do_merge: {self.config.do_merge}") print(f"do_counter: {self.config.do_counter}") print(f"do_spectra: {self.config.do_spectra}") print(f"do_charts: {self.config.do_charts}") # Loads the experiment file created during Quantitative Processing for prefix in self.config.prefixList: file_name = os.path.join(self.config.expr_dir, prefix + ".expr") self.expr_list.append(load_expr(file_name)) if self.config.do_qualitative: print("Qualitative Processing in Progress...") for prefix in self.config.prefixList: # print(list(rt_alignment[prefix])) self.qualitative_processing(prefix, list(rt_alignment[prefix])) if self.config.do_merge: self.merge() if self.config.do_counter: chart_data = self.match_counter(self.ms_comparisons(ms_alignment)) chart_data = chart_data.set_index("Compound", drop=True) # remove duplicate compounds: # chart_data_count = Counter(chart_data["Compound"]) chart_data_count = Counter(chart_data.index) replacement_data = { "Compound": [], f"{self.lot_name} Peak Area": [], f"{self.lot_name} Standard Deviation": [] } for prefix in self.config.prefixList: replacement_data[prefix] = [] for compound in chart_data_count: if chart_data_count[compound] > 1: replacement_data["Compound"].append(compound) replacement_data[f"{self.lot_name} Peak Area"].append( sum(chart_data.loc[compound, f"{self.lot_name} Peak Area"])) peak_data = [] for prefix in self.config.prefixList: replacement_data[prefix].append( sum(chart_data.loc[compound, prefix])) peak_data.append(sum(chart_data.loc[compound, prefix])) replacement_data[ f"{self.lot_name} Standard Deviation"].append( numpy.std(peak_data)) chart_data = chart_data.drop(compound, axis=0) replacement_data = pandas.DataFrame(replacement_data) replacement_data = replacement_data.set_index("Compound", drop=False) chart_data = chart_data.append(replacement_data, sort=False) chart_data.sort_index(inplace=True) chart_data = chart_data.drop("Compound", axis=1) chart_data['Compound Names'] = chart_data.index chart_data.to_csv(os.path.join( self.config.csv_dir, "{}_CHART_DATA.csv".format(self.lot_name)), sep=";") else: chart_data = pandas.read_csv(os.path.join( self.config.csv_dir, "{}_CHART_DATA.csv".format(self.lot_name)), sep=";", index_col=0) # chart_data = chart_data.set_index("Compound", drop=True) if self.config.do_spectra: self.generate_spectra_from_alignment(rt_alignment, ms_alignment) # Write Mass Spectra to OpenChrom-like CSV files def generate_spectra_csv(rt_data, ms_data, name): # Write Mass Spectra to OpenChrom-like CSV files ms = ms_data[0] # first mass spectrum spectrum_csv_file = os.path.join(self.config.spectra_dir, self.lot_name, f"{name}_data.csv") spectrum_csv = open(spectrum_csv_file, 'w') spectrum_csv.write( 'RT(milliseconds);RT(minutes) - NOT USED BY IMPORT;RI;') spectrum_csv.write(';'.join(str(mz) for mz in ms.mass_list)) spectrum_csv.write("\n") for rt, ms in zip(rt_data, ms_data): spectrum_csv.write( f"{int(rt * 60000)};{rounders(rt, '0.0000000000')};0;") spectrum_csv.write(';'.join( str(intensity) for intensity in ms.mass_spec)) spectrum_csv.write('\n') spectrum_csv.close() for prefix in self.config.prefixList: print(prefix) # print(rt_alignment[prefix]) # print(ms_alignment[prefix]) generate_spectra_csv(rt_alignment[prefix], ms_alignment[prefix], prefix) if self.config.do_charts: print("\nGenerating Charts") chart_data.to_csv(os.path.join( self.config.csv_dir, "{}_CHART_DATA.csv".format(self.lot_name)), sep=";") maybe_make(os.path.join(self.config.charts_dir, self.lot_name)) if chart_data.empty: print("ALERT: No peaks were found for compounds that have") print(" previously been reported in literature.") print(" Check the results for more information\n") else: from GSMatch.GSMatch_Core.charts import box_whisker_wrapper, radar_chart_wrapper, \ mean_peak_area_wrapper, \ peak_area_wrapper # from GSMatch.GSMatch_Core.charts import peak_area_wrapper, radar_chart_wrapper radar_chart_wrapper(chart_data, [self.lot_name], use_log=10, legend=False, mode=os.path.join(self.config.charts_dir, self.lot_name, "radar_log10_peak_area")) radar_chart_wrapper(chart_data, [self.lot_name], use_log=False, legend=False, mode=os.path.join(self.config.charts_dir, self.lot_name, "radar_peak_area")) mean_peak_area_wrapper(chart_data, [self.lot_name], mode=os.path.join( self.config.charts_dir, self.lot_name, "mean_peak_area")) peak_area_wrapper(chart_data, self.lot_name, self.config.prefixList, mode=os.path.join(self.config.charts_dir, self.lot_name, "peak_area_percentage")) peak_area_wrapper(chart_data, self.lot_name, self.config.prefixList, percentage=False, mode=os.path.join(self.config.charts_dir, self.lot_name, "peak_area")) peak_area_wrapper(chart_data, self.lot_name, self.config.prefixList, use_log=10, mode=os.path.join( self.config.charts_dir, self.lot_name, "log10_peak_area_percentage")) samples_to_compare = [(self.lot_name, self.config.prefixList)] box_whisker_wrapper(chart_data, samples_to_compare, mode=os.path.join(self.config.charts_dir, self.lot_name, "box_whisker")) with open( os.path.join(self.config.results_dir, f"{self.lot_name}.info"), "w") as info_file: for prefix in self.config.prefixList: info_file.write(f"{prefix}\n") # TODO: self.make_archive() pynist.reload_ini(self.config.nist_path) print("\nComplete.")