Ejemplo n.º 1
0
    def write_intensities_stream(self, file_name: PathLike):
        """
		Loop over all scans and, for each scan, write the intensities to the
		given file, one intensity per line.

		Intensities from different scans are joined without any delimiters.

		:param file_name: Output file name.

		:authors: Vladimir Likic, Dominic Davis-Foster (pathlib support)
		"""  # noqa: D400

        if not is_path(file_name):
            raise TypeError(
                "'file_name' must be a string or a PathLike object")

        file_name = prepare_filepath(file_name)

        # n = len(self._scan_list)

        print(" -> Writing scans to a file")

        fp = file_name.open('w', encoding="UTF-8")

        for scan in self._scan_list:
            intensities = scan.intensity_list
            for i in intensities:
                fp.write(f"{i:8.4f}\n")

        fp.close()
Ejemplo n.º 2
0
    def run(self, filename: PathLike):
        """
		Parse configuration from the given file.

		:param filename: The filename of the YAML configuration file.
		"""

        filename = PathPlus(filename)

        if not filename.is_file():
            raise FileNotFoundError(str(filename))

        with tempfile.TemporaryDirectory() as tmpdir:
            tmpdir_p = PathPlus(tmpdir)
            schema_file = tmpdir_p / "schema.json"
            schema = make_schema(*self.config_vars)
            schema["additionalProperties"] = self.allow_unknown_keys
            schema_file.dump_json(schema)
            validate_files(schema_file, filename)

        parsed_config_vars: MutableMapping[str, Any] = {}

        with filename.open() as file:
            raw_config_vars: Mapping[str, Any] = YAML(typ="safe",
                                                      pure=True).load(file)

        for var in self.config_vars:
            parsed_config_vars[var.__name__] = getattr(
                self, f"visit_{var.__name__}", var.get)(raw_config_vars)

        return self.custom_parsing(raw_config_vars, parsed_config_vars,
                                   filename)
Ejemplo n.º 3
0
def read_expr_list(file_name: PathLike) -> List[Experiment]:
    """
	Reads the set of experiment files and returns a list of :class:`pyms.Experiment.Experiment` objects.

	:param file_name: The name of the file which lists experiment dump file names, one file per line.

	:return: A list of Experiment instances.

	:author: Vladimir Likic
	"""

    if not is_path(file_name):
        raise TypeError("'file_name' must be a string or a PathLike object")

    file_name = prepare_filepath(file_name, mkdirs=False)

    fp = file_name.open(encoding="UTF-8")

    exprfiles = fp.readlines()
    fp.close()

    expr_list = []

    for exprfile in exprfiles:

        exprfile = exprfile.strip()
        expr = load_expr(exprfile)

        expr_list.append(expr)

    return expr_list
Ejemplo n.º 4
0
    def write(self,
              file_name: PathLike,
              minutes: bool = False,
              formatting: bool = True):
        """
		Writes the ion chromatogram to the specified file.

		:param file_name: The name of the output file
		:param minutes: A boolean value indicating whether to write time in minutes
		:param formatting: Whether to format the numbers in the output.

		:authors: Lewis Lee, Vladimir Likic, Dominic Davis-Foster (pathlib support)
		"""

        if not is_path(file_name):
            raise TypeError(
                "'file_name' must be a string or a PathLike object")

        file_name = prepare_filepath(file_name)

        with file_name.open('w', encoding="UTF-8") as fp:

            time_list = copy.deepcopy(self._time_list)

            if minutes:
                for ii in range(len(time_list)):
                    time_list[ii] = time_list[ii] / 60.0

            for ii in range(len(time_list)):
                if formatting:
                    fp.write(
                        f"{time_list[ii]:8.4f} {self._intensity_array[ii]:#.6e}\n"
                    )
                else:
                    fp.write(f"{time_list[ii]} {self._intensity_array[ii]}\n")
Ejemplo n.º 5
0
def get_metadata_for_file(filename: PathLike) -> Dict[str, Any]:
    """
	Returns the EXIF metadata for ``filename``, as a ``key: value`` mapping.

	:param filename:
	"""

    filename = PathPlus(filename)

    if not filename.is_file():
        raise FileNotFoundError(filename)

    # get the tags
    with filename.open("rb") as fp:
        data = exifread.process_file(fp, details=False, debug=False)

    if data:
        return {k: str(v) for k, v in data.items()}

    else:
        # using exiftool as a backup for some files including videos
        with exiftool.ExifTool() as et:
            try:
                data = et.get_metadata(str(filename))
            except json.decoder.JSONDecodeError:
                raise ValueError(
                    f"Could not parse EXIF data for {filename} or no EXIF data found."
                )

    return dict(data)
Ejemplo n.º 6
0
    def from_jcamp(cls: Type[_M], file_name: PathLike) -> _M:
        """
		Create a MassSpectrum from a JCAMP-DX file.

		:param file_name: Path of the file to read.

		:authors: Qiao Wang, Andrew Isaac, Vladimir Likic, David Kainer, Dominic Davis-Foster
		"""

        if not is_path(file_name):
            raise TypeError(
                "'file_name' must be a string or a PathLike object")

        file_name = prepare_filepath(file_name, mkdirs=False)

        print(f" -> Reading JCAMP file '{file_name}'")
        lines_list = file_name.open('r', encoding="UTF-8")
        xydata = []
        last_tag = None

        for line in lines_list:

            if line.strip():
                if line.startswith("##"):
                    # key word or information
                    fields = line.split('=', 1)
                    current_tag = fields[0] = fields[0].lstrip("##").upper()
                    last_tag = fields[0]

                    if current_tag.upper().startswith("END"):
                        break

                else:
                    if last_tag in xydata_tags:
                        line_sub = re.split(r",| ", line.strip())
                        for item in line_sub:
                            if not len(item.strip()) == 0:
                                xydata.append(float(item.strip()))

        # By this point we should have all of the xydata
        if len(xydata) % 2 == 1:
            # TODO: This means the data is not in x, y pairs
            #  Make a better error message
            raise ValueError("data not in pair !")

        mass_list = []
        intensity_list = []
        for i in range(len(xydata) // 2):
            mass_list.append(xydata[i * 2])
            intensity_list.append(xydata[i * 2 + 1])

        return cls(mass_list, intensity_list)
Ejemplo n.º 7
0
	def export_leco_csv(self, file_name: PathLike):
		"""
		Exports data in LECO CSV format.

		:param file_name: The name of the output file.

		:authors: Andrew Isaac, Vladimir Likic, Dominic Davis-Foster (pathlib support)
		"""

		if not is_path(file_name):
			raise TypeError("'file_name' must be a string or a PathLike object")

		file_name = prepare_filepath(file_name, mkdirs=False)

		if not file_name.parent.is_dir():
			file_name.parent.mkdir(parents=True)

		mass_list = self._mass_list
		time_list = self._time_list
		vals = self._intensity_array

		fp = file_name.open('w', encoding="UTF-8")

		# Format is text header with:
		# "Scan","Time",...
		# and the rest is "TIC" or m/z as text, i.e. "50","51"...
		# The following lines are:
		# scan_number,time,value,value,...
		# scan_number is an int, rest seem to be fixed format floats.
		# The format is 0.000000e+000

		# write header
		fp.write('"Scan","Time"')
		for ii in mass_list:
			if is_number(ii):
				fp.write(f',"{int(ii):d}"')
			else:
				raise TypeError("mass list datum not a number")
		fp.write("\r\n")  # windows CR/LF

		# write lines
		for ii, time_ in enumerate(time_list):
			fp.write(f"{ii},{time_:#.6e}")
			for jj in range(len(vals[ii])):
				if is_number(vals[ii][jj]):
					fp.write(f",{vals[ii][jj]:#.6e}")
				else:
					raise TypeError("datum not a number")
			fp.write("\r\n")

		fp.close()
Ejemplo n.º 8
0
def load_object(file_name: PathLike) -> object:
    """
	Loads an object previously dumped with :func:`~.dump_object`.

	:param file_name: Name of the object dump file.

	:return: Object contained in the file.

	:authors: Vladimir Likic, Dominic Davis-Foster (pathlib support)
	"""

    if not is_path(file_name):
        raise TypeError("'file_name' must be a string or a PathLike object")

    file_name = prepare_filepath(file_name)

    with file_name.open("wb") as fp:
        return pickle.load(fp)
Ejemplo n.º 9
0
def file_lines(file_name: PathLike, strip: bool = False) -> List[str]:
    """
	Returns lines from a file, as a list.

	:param file_name: Name of a file
	:param strip: If True, lines are pre-processed. Newline characters are
		removed, leading and trailing whitespaces are removed, and lines
		starting with '#' are discarded

	:return: A list of lines

	:authors: Vladimir Likic, Dominic Davis-Foster (pathlib support)
	"""

    if not is_path(file_name):
        raise TypeError("'file_name' must be a string or a PathLike object")

    file_name = prepare_filepath(file_name, mkdirs=False)

    with file_name.open(encoding="UTF-8") as fp:
        lines = fp.readlines()

    if strip:
        # strip leading and trailing whitespaces
        lines_filtered = []
        for line in lines:
            line = line.strip()
            lines_filtered.append(line)

        # discard comments
        lines_to_discard = []
        for line in lines_filtered:
            # remove empty lines and comments
            if len(line) == 0 or line[0] == '#':
                lines_to_discard.append(line)
        for line in lines_to_discard:
            lines_filtered.remove(line)
        lines = lines_filtered

    return lines
Ejemplo n.º 10
0
    def dump_to_file(self,
                     data: Union[MutableMapping, Sequence],
                     filename: PathLike,
                     mode: str = 'w'):
        """
		Dump the given data to the specified file.

		:param data:
		:param filename:
		:param mode:
		"""

        filename = PathPlus(filename)

        if 'w' in mode:
            filename.write_lines([
                "# Configuration for 'repo_helper' (https://github.com/repo-helper/repo_helper)",
                self.dumps(data, explicit_start=True),
            ])

        elif 'a' in mode:
            with filename.open('a') as fp:
                fp.write('\n')
                fp.write(self.dumps(data, explicit_start=False))
Ejemplo n.º 11
0
def import_leco_csv(file_name: PathLike) -> IntensityMatrix:
	"""
	Imports data in LECO CSV format.

	:param file_name: Path of the file to read.

	:return: Data as an IntensityMatrix.

	:authors: Andrew Isaac, Dominic Davis-Foster (pathlib support)
	"""

	if not is_path(file_name):
		raise TypeError("'file_name' must be a string or a PathLike object")

	file_name = prepare_filepath(file_name, mkdirs=False)

	lines_list = file_name.open('r')
	data = []
	time_list = []
	mass_list = []

	# Format is text header with:
	# "Scan","Time",...
	# and the rest is "TIC" or m/z as text, i.e. "50","51"...
	# The following lines are:
	# scan_number,time,value,value,...
	# scan_number is an int, rest seem to be fixed format floats.
	# The format is 0.000000e+000

	num_mass = 0
	FIRST = True
	HEADER = True
	data_col = -1
	time_col = -1
	# get each line
	for line in lines_list:
		cols = -1
		data_row = []
		if len(line.strip()) > 0:
			data_list = line.strip().split(',')
			# get each value in line
			for item in data_list:
				item = item.strip()
				item = item.strip("'\"")  # remove quotes (in header)

				# Get header
				if HEADER:
					cols += 1
					if len(item) > 0:
						if item.lower().find("time") > -1:
							time_col = cols
						try:
							value = float(item)
							# find 1st col with number as header
							if FIRST and value > 1:  # assume >1 mass
								data_col = cols
								# assume time col is previous col
								if time_col < 0:
									time_col = cols - 1
								FIRST = False
							mass_list.append(value)
							num_mass += 1
						except ValueError:
							pass
				# Get rest
				else:
					cols += 1
					if len(item) > 0:
						try:
							value = float(item)
							if cols == time_col:
								time_list.append(value)
							elif cols >= data_col:
								data_row.append(value)
						except ValueError:
							pass

			# check row length
			if not HEADER:
				if len(data_row) == num_mass:
					data.append(data_row)
				else:
					warn("ignoring row")

			HEADER = False

	# check col lengths
	if len(time_list) != len(data):
		warn("number of data rows and time list length differ")

	return IntensityMatrix(time_list, mass_list, data)
Ejemplo n.º 12
0
def write_mass_hunter_csv(
		alignment: Alignment,
		file_name: PathLike,
		top_ion_list: List[int],
		):  # , peak_list_name):
	"""
	Creates a csv file with UID, common and qualifying ions and their
	ratios for mass hunter interpretation.

	:param alignment: alignment object to write to file
	:param file_name: name of the output file.

	:param top_ion_list: a list of the common ions for each peak in the
		averaged peak list for the alignment.
	"""  # noqa: D400

	if not is_path(file_name):
		raise TypeError("'file_name' must be a string or a PathLike object")

	file_name = prepare_filepath(file_name)

	fp = file_name.open('w', encoding="UTF-8")

	if top_ion_list is None:
		raise ValueError("List of common ions must be supplied")

	# write headers
	fp.write(
			'"UID","Common Ion","Qual Ion 1","ratio QI1/CI","Qual Ion 2",'
			'"ratio QI2/CI","l window delta","r window delta"\n'
			)

	rtsums: List[float] = []
	rtcounts = []

	# The following two arrays will become list of lists
	# such that:
	# areas = [  [align1_peak1, align2_peak1, .....,alignn_peak1]
	#            [align1_peak2, ................................]
	#              .............................................
	#            [align1_peakm,....................,alignn_peakm]  ]
	areas = []  # type: ignore
	new_peak_lists = []  # type: ignore
	rtmax = []
	rtmin = []

	for peak_list in alignment.peakpos:
		index = 0

		for peak in peak_list:
			# on the first iteration, populate the lists
			if len(areas) < len(peak_list):
				areas.append([])
				new_peak_lists.append([])
				rtsums.append(0)
				rtcounts.append(0)
				rtmax.append(0.0)
				rtmin.append(0.0)

			if peak is not None:
				rt = peak.rt

				# get the area of the common ion for the peak
				# an area of 'na' shows that while the peak was
				# aligned, the common ion was not present
				area = peak.get_ion_area(top_ion_list[index])

				areas[index].append(area)
				new_peak_lists[index].append(peak)

				# The following code to the else statement is
				# just for calculating the average rt
				rtsums[index] += rt
				rtcounts[index] += 1

				# quick workaround for weird problem when
				# attempting to set rtmin to max time above
				if rtmin[index] == 0.0:
					rtmin[index] = 5400.0

				if rt > rtmax[index]:
					rtmax[index] = rt

				if rt < rtmin[index]:
					rtmin[index] = rt

			else:
				areas[index].append(None)

			index += 1

	out_strings = []
	compo_peaks = []
	index = 0
	# now write the strings for the file
	for area_list in areas:

		# write initial info:
		# peak unique id, peak average rt
		compo_peak = composite_peak(new_peak_lists[index])
		if compo_peak is None:
			continue

		compo_peaks.append(compo_peak)
		peak_UID = compo_peak.UID
		peak_UID_string = f'"{peak_UID}"'

		# calculate the time from the leftmost peak to the average
		l_window_delta = compo_peak.rt - rtmin[index]
		# print("l_window", l_window_delta, "rt", compo_peak.rt, "rt_min", rtmin[index])
		r_window_delta = rtmax[index] - compo_peak.rt

		common_ion = top_ion_list[index]
		qual_ion_1 = int(peak_UID_string.split('-')[0].strip('"'))
		qual_ion_2 = int(peak_UID_string.split('-')[1])

		if qual_ion_1 == common_ion:
			qual_ion_1 = compo_peak.get_third_highest_mz()
		elif qual_ion_2 == common_ion:
			qual_ion_2 = compo_peak.get_third_highest_mz()
		else:
			pass

		ci_intensity = compo_peak.get_int_of_ion(common_ion)
		q1_intensity = compo_peak.get_int_of_ion(qual_ion_1)
		q2_intensity = compo_peak.get_int_of_ion(qual_ion_2)

		try:
			q1_ci_ratio = float(q1_intensity) / float(ci_intensity)
		except TypeError:  # if no area available for that ion
			q1_ci_ratio = 0.0
		except ZeroDivisionError:
			# shouldn't happen but does!!
			q1_ci_ratio = 0.01
		try:
			q2_ci_ratio = float(q2_intensity) / float(ci_intensity)
		except TypeError:
			q2_ci_ratio = 0.0
		except ZeroDivisionError:
			# shouldn't happen, but does!!
			q2_ci_ratio = 0.01

		out_strings.append(
				','.join([
						peak_UID,
						f"{common_ion}",
						f"{qual_ion_1}",
						f"{q1_ci_ratio * 100:.1f}",
						f"{qual_ion_2}",
						f"{q2_ci_ratio * 100:.1f}",
						f"{(l_window_delta + 1.5) / 60:.2f}",
						f"{(r_window_delta + 1.5) / 60:.2f}",
						])
				)

		index += 1

	# now write the file
	#        print("length of areas[0]", len(areas[0]))
	#        print("lenght of areas", len(areas))
	#        print("length of out_strings", len(out_strings))
	for row in out_strings:
		fp.write(f"{row}\n")

	# dump_object(compo_peaks, peak_list_name)

	fp.close()