def write_intensities_stream(self, file_name: PathLike): """ Loop over all scans and, for each scan, write the intensities to the given file, one intensity per line. Intensities from different scans are joined without any delimiters. :param file_name: Output file name. :authors: Vladimir Likic, Dominic Davis-Foster (pathlib support) """ # noqa: D400 if not is_path(file_name): raise TypeError( "'file_name' must be a string or a PathLike object") file_name = prepare_filepath(file_name) # n = len(self._scan_list) print(" -> Writing scans to a file") fp = file_name.open('w', encoding="UTF-8") for scan in self._scan_list: intensities = scan.intensity_list for i in intensities: fp.write(f"{i:8.4f}\n") fp.close()
def run(self, filename: PathLike): """ Parse configuration from the given file. :param filename: The filename of the YAML configuration file. """ filename = PathPlus(filename) if not filename.is_file(): raise FileNotFoundError(str(filename)) with tempfile.TemporaryDirectory() as tmpdir: tmpdir_p = PathPlus(tmpdir) schema_file = tmpdir_p / "schema.json" schema = make_schema(*self.config_vars) schema["additionalProperties"] = self.allow_unknown_keys schema_file.dump_json(schema) validate_files(schema_file, filename) parsed_config_vars: MutableMapping[str, Any] = {} with filename.open() as file: raw_config_vars: Mapping[str, Any] = YAML(typ="safe", pure=True).load(file) for var in self.config_vars: parsed_config_vars[var.__name__] = getattr( self, f"visit_{var.__name__}", var.get)(raw_config_vars) return self.custom_parsing(raw_config_vars, parsed_config_vars, filename)
def read_expr_list(file_name: PathLike) -> List[Experiment]: """ Reads the set of experiment files and returns a list of :class:`pyms.Experiment.Experiment` objects. :param file_name: The name of the file which lists experiment dump file names, one file per line. :return: A list of Experiment instances. :author: Vladimir Likic """ if not is_path(file_name): raise TypeError("'file_name' must be a string or a PathLike object") file_name = prepare_filepath(file_name, mkdirs=False) fp = file_name.open(encoding="UTF-8") exprfiles = fp.readlines() fp.close() expr_list = [] for exprfile in exprfiles: exprfile = exprfile.strip() expr = load_expr(exprfile) expr_list.append(expr) return expr_list
def write(self, file_name: PathLike, minutes: bool = False, formatting: bool = True): """ Writes the ion chromatogram to the specified file. :param file_name: The name of the output file :param minutes: A boolean value indicating whether to write time in minutes :param formatting: Whether to format the numbers in the output. :authors: Lewis Lee, Vladimir Likic, Dominic Davis-Foster (pathlib support) """ if not is_path(file_name): raise TypeError( "'file_name' must be a string or a PathLike object") file_name = prepare_filepath(file_name) with file_name.open('w', encoding="UTF-8") as fp: time_list = copy.deepcopy(self._time_list) if minutes: for ii in range(len(time_list)): time_list[ii] = time_list[ii] / 60.0 for ii in range(len(time_list)): if formatting: fp.write( f"{time_list[ii]:8.4f} {self._intensity_array[ii]:#.6e}\n" ) else: fp.write(f"{time_list[ii]} {self._intensity_array[ii]}\n")
def get_metadata_for_file(filename: PathLike) -> Dict[str, Any]: """ Returns the EXIF metadata for ``filename``, as a ``key: value`` mapping. :param filename: """ filename = PathPlus(filename) if not filename.is_file(): raise FileNotFoundError(filename) # get the tags with filename.open("rb") as fp: data = exifread.process_file(fp, details=False, debug=False) if data: return {k: str(v) for k, v in data.items()} else: # using exiftool as a backup for some files including videos with exiftool.ExifTool() as et: try: data = et.get_metadata(str(filename)) except json.decoder.JSONDecodeError: raise ValueError( f"Could not parse EXIF data for {filename} or no EXIF data found." ) return dict(data)
def from_jcamp(cls: Type[_M], file_name: PathLike) -> _M: """ Create a MassSpectrum from a JCAMP-DX file. :param file_name: Path of the file to read. :authors: Qiao Wang, Andrew Isaac, Vladimir Likic, David Kainer, Dominic Davis-Foster """ if not is_path(file_name): raise TypeError( "'file_name' must be a string or a PathLike object") file_name = prepare_filepath(file_name, mkdirs=False) print(f" -> Reading JCAMP file '{file_name}'") lines_list = file_name.open('r', encoding="UTF-8") xydata = [] last_tag = None for line in lines_list: if line.strip(): if line.startswith("##"): # key word or information fields = line.split('=', 1) current_tag = fields[0] = fields[0].lstrip("##").upper() last_tag = fields[0] if current_tag.upper().startswith("END"): break else: if last_tag in xydata_tags: line_sub = re.split(r",| ", line.strip()) for item in line_sub: if not len(item.strip()) == 0: xydata.append(float(item.strip())) # By this point we should have all of the xydata if len(xydata) % 2 == 1: # TODO: This means the data is not in x, y pairs # Make a better error message raise ValueError("data not in pair !") mass_list = [] intensity_list = [] for i in range(len(xydata) // 2): mass_list.append(xydata[i * 2]) intensity_list.append(xydata[i * 2 + 1]) return cls(mass_list, intensity_list)
def export_leco_csv(self, file_name: PathLike): """ Exports data in LECO CSV format. :param file_name: The name of the output file. :authors: Andrew Isaac, Vladimir Likic, Dominic Davis-Foster (pathlib support) """ if not is_path(file_name): raise TypeError("'file_name' must be a string or a PathLike object") file_name = prepare_filepath(file_name, mkdirs=False) if not file_name.parent.is_dir(): file_name.parent.mkdir(parents=True) mass_list = self._mass_list time_list = self._time_list vals = self._intensity_array fp = file_name.open('w', encoding="UTF-8") # Format is text header with: # "Scan","Time",... # and the rest is "TIC" or m/z as text, i.e. "50","51"... # The following lines are: # scan_number,time,value,value,... # scan_number is an int, rest seem to be fixed format floats. # The format is 0.000000e+000 # write header fp.write('"Scan","Time"') for ii in mass_list: if is_number(ii): fp.write(f',"{int(ii):d}"') else: raise TypeError("mass list datum not a number") fp.write("\r\n") # windows CR/LF # write lines for ii, time_ in enumerate(time_list): fp.write(f"{ii},{time_:#.6e}") for jj in range(len(vals[ii])): if is_number(vals[ii][jj]): fp.write(f",{vals[ii][jj]:#.6e}") else: raise TypeError("datum not a number") fp.write("\r\n") fp.close()
def load_object(file_name: PathLike) -> object: """ Loads an object previously dumped with :func:`~.dump_object`. :param file_name: Name of the object dump file. :return: Object contained in the file. :authors: Vladimir Likic, Dominic Davis-Foster (pathlib support) """ if not is_path(file_name): raise TypeError("'file_name' must be a string or a PathLike object") file_name = prepare_filepath(file_name) with file_name.open("wb") as fp: return pickle.load(fp)
def file_lines(file_name: PathLike, strip: bool = False) -> List[str]: """ Returns lines from a file, as a list. :param file_name: Name of a file :param strip: If True, lines are pre-processed. Newline characters are removed, leading and trailing whitespaces are removed, and lines starting with '#' are discarded :return: A list of lines :authors: Vladimir Likic, Dominic Davis-Foster (pathlib support) """ if not is_path(file_name): raise TypeError("'file_name' must be a string or a PathLike object") file_name = prepare_filepath(file_name, mkdirs=False) with file_name.open(encoding="UTF-8") as fp: lines = fp.readlines() if strip: # strip leading and trailing whitespaces lines_filtered = [] for line in lines: line = line.strip() lines_filtered.append(line) # discard comments lines_to_discard = [] for line in lines_filtered: # remove empty lines and comments if len(line) == 0 or line[0] == '#': lines_to_discard.append(line) for line in lines_to_discard: lines_filtered.remove(line) lines = lines_filtered return lines
def dump_to_file(self, data: Union[MutableMapping, Sequence], filename: PathLike, mode: str = 'w'): """ Dump the given data to the specified file. :param data: :param filename: :param mode: """ filename = PathPlus(filename) if 'w' in mode: filename.write_lines([ "# Configuration for 'repo_helper' (https://github.com/repo-helper/repo_helper)", self.dumps(data, explicit_start=True), ]) elif 'a' in mode: with filename.open('a') as fp: fp.write('\n') fp.write(self.dumps(data, explicit_start=False))
def import_leco_csv(file_name: PathLike) -> IntensityMatrix: """ Imports data in LECO CSV format. :param file_name: Path of the file to read. :return: Data as an IntensityMatrix. :authors: Andrew Isaac, Dominic Davis-Foster (pathlib support) """ if not is_path(file_name): raise TypeError("'file_name' must be a string or a PathLike object") file_name = prepare_filepath(file_name, mkdirs=False) lines_list = file_name.open('r') data = [] time_list = [] mass_list = [] # Format is text header with: # "Scan","Time",... # and the rest is "TIC" or m/z as text, i.e. "50","51"... # The following lines are: # scan_number,time,value,value,... # scan_number is an int, rest seem to be fixed format floats. # The format is 0.000000e+000 num_mass = 0 FIRST = True HEADER = True data_col = -1 time_col = -1 # get each line for line in lines_list: cols = -1 data_row = [] if len(line.strip()) > 0: data_list = line.strip().split(',') # get each value in line for item in data_list: item = item.strip() item = item.strip("'\"") # remove quotes (in header) # Get header if HEADER: cols += 1 if len(item) > 0: if item.lower().find("time") > -1: time_col = cols try: value = float(item) # find 1st col with number as header if FIRST and value > 1: # assume >1 mass data_col = cols # assume time col is previous col if time_col < 0: time_col = cols - 1 FIRST = False mass_list.append(value) num_mass += 1 except ValueError: pass # Get rest else: cols += 1 if len(item) > 0: try: value = float(item) if cols == time_col: time_list.append(value) elif cols >= data_col: data_row.append(value) except ValueError: pass # check row length if not HEADER: if len(data_row) == num_mass: data.append(data_row) else: warn("ignoring row") HEADER = False # check col lengths if len(time_list) != len(data): warn("number of data rows and time list length differ") return IntensityMatrix(time_list, mass_list, data)
def write_mass_hunter_csv( alignment: Alignment, file_name: PathLike, top_ion_list: List[int], ): # , peak_list_name): """ Creates a csv file with UID, common and qualifying ions and their ratios for mass hunter interpretation. :param alignment: alignment object to write to file :param file_name: name of the output file. :param top_ion_list: a list of the common ions for each peak in the averaged peak list for the alignment. """ # noqa: D400 if not is_path(file_name): raise TypeError("'file_name' must be a string or a PathLike object") file_name = prepare_filepath(file_name) fp = file_name.open('w', encoding="UTF-8") if top_ion_list is None: raise ValueError("List of common ions must be supplied") # write headers fp.write( '"UID","Common Ion","Qual Ion 1","ratio QI1/CI","Qual Ion 2",' '"ratio QI2/CI","l window delta","r window delta"\n' ) rtsums: List[float] = [] rtcounts = [] # The following two arrays will become list of lists # such that: # areas = [ [align1_peak1, align2_peak1, .....,alignn_peak1] # [align1_peak2, ................................] # ............................................. # [align1_peakm,....................,alignn_peakm] ] areas = [] # type: ignore new_peak_lists = [] # type: ignore rtmax = [] rtmin = [] for peak_list in alignment.peakpos: index = 0 for peak in peak_list: # on the first iteration, populate the lists if len(areas) < len(peak_list): areas.append([]) new_peak_lists.append([]) rtsums.append(0) rtcounts.append(0) rtmax.append(0.0) rtmin.append(0.0) if peak is not None: rt = peak.rt # get the area of the common ion for the peak # an area of 'na' shows that while the peak was # aligned, the common ion was not present area = peak.get_ion_area(top_ion_list[index]) areas[index].append(area) new_peak_lists[index].append(peak) # The following code to the else statement is # just for calculating the average rt rtsums[index] += rt rtcounts[index] += 1 # quick workaround for weird problem when # attempting to set rtmin to max time above if rtmin[index] == 0.0: rtmin[index] = 5400.0 if rt > rtmax[index]: rtmax[index] = rt if rt < rtmin[index]: rtmin[index] = rt else: areas[index].append(None) index += 1 out_strings = [] compo_peaks = [] index = 0 # now write the strings for the file for area_list in areas: # write initial info: # peak unique id, peak average rt compo_peak = composite_peak(new_peak_lists[index]) if compo_peak is None: continue compo_peaks.append(compo_peak) peak_UID = compo_peak.UID peak_UID_string = f'"{peak_UID}"' # calculate the time from the leftmost peak to the average l_window_delta = compo_peak.rt - rtmin[index] # print("l_window", l_window_delta, "rt", compo_peak.rt, "rt_min", rtmin[index]) r_window_delta = rtmax[index] - compo_peak.rt common_ion = top_ion_list[index] qual_ion_1 = int(peak_UID_string.split('-')[0].strip('"')) qual_ion_2 = int(peak_UID_string.split('-')[1]) if qual_ion_1 == common_ion: qual_ion_1 = compo_peak.get_third_highest_mz() elif qual_ion_2 == common_ion: qual_ion_2 = compo_peak.get_third_highest_mz() else: pass ci_intensity = compo_peak.get_int_of_ion(common_ion) q1_intensity = compo_peak.get_int_of_ion(qual_ion_1) q2_intensity = compo_peak.get_int_of_ion(qual_ion_2) try: q1_ci_ratio = float(q1_intensity) / float(ci_intensity) except TypeError: # if no area available for that ion q1_ci_ratio = 0.0 except ZeroDivisionError: # shouldn't happen but does!! q1_ci_ratio = 0.01 try: q2_ci_ratio = float(q2_intensity) / float(ci_intensity) except TypeError: q2_ci_ratio = 0.0 except ZeroDivisionError: # shouldn't happen, but does!! q2_ci_ratio = 0.01 out_strings.append( ','.join([ peak_UID, f"{common_ion}", f"{qual_ion_1}", f"{q1_ci_ratio * 100:.1f}", f"{qual_ion_2}", f"{q2_ci_ratio * 100:.1f}", f"{(l_window_delta + 1.5) / 60:.2f}", f"{(r_window_delta + 1.5) / 60:.2f}", ]) ) index += 1 # now write the file # print("length of areas[0]", len(areas[0])) # print("lenght of areas", len(areas)) # print("length of out_strings", len(out_strings)) for row in out_strings: fp.write(f"{row}\n") # dump_object(compo_peaks, peak_list_name) fp.close()