Beispiel #1
0
def JCAMP_reader(file_name: Union[str, os.PathLike]) -> GCMS_data:
    """
	Generic reader for JCAMP DX files

	:param file_name: Path of the file to read
	:type file_name: str or os.PathLike

	:return: GC-MS data object
	:rtype: :class:`pyms.GCMS.Class.GCMS_data`

	:authors: Qiao Wang, Andrew Isaac, Vladimir Likic, David Kainer, Dominic Davis-Foster (pathlib support)
	"""

    if not is_path(file_name):
        raise TypeError("'file_name' must be a string or a PathLike object")

    file_name = prepare_filepath(file_name, mkdirs=False)

    print(f" -> Reading JCAMP file '{file_name}'")
    lines_list = file_name.open('r')
    data = []
    page_idx = 0
    xydata_idx = 0
    time_list = []
    scan_list = []

    header_info = {}  # Dictionary containing header information

    for line in lines_list:

        if len(line.strip()) != 0:
            # prefix = line.find('#')
            # if prefix == 0:
            if line.startswith("##"):
                # key word or information
                fields = line.split('=', 1)
                fields[0] = fields[0].lstrip("##").upper()
                fields[1] = fields[1].strip()

                if "PAGE" in fields[0]:
                    if "T=" in fields[1]:
                        # PAGE contains retention time starting with T=
                        # FileConverter Pro style
                        time = float(fields[1].lstrip(
                            "T="))  # rt for the scan to be submitted
                        time_list.append(time)
                    page_idx = page_idx + 1
                elif "RETENTION_TIME" in fields[0]:
                    # OpenChrom style
                    time = float(fields[1])  # rt for the scan to be submitted

                    # Check to make sure time is not already in the time list;
                    # Can happen when both ##PAGE and ##RETENTION_TIME are specified
                    if time_list[-1] != time:
                        time_list.append(time)

                elif fields[0] in xydata_tags:
                    xydata_idx = xydata_idx + 1

                elif fields[0] in header_info_fields:
                    if fields[1].isdigit():
                        header_info[fields[0]] = int(fields[1])
                    elif is_float(fields[1]):
                        header_info[fields[0]] = float(fields[1])
                    else:
                        header_info[fields[0]] = fields[1]

            # elif prefix == -1:
            else:
                # Line doesn't start with ##
                # data
                if page_idx > 1 or xydata_idx > 1:
                    if len(data) % 2 == 1:
                        # TODO: This means the data is not in x, y pairs
                        #  Make a better error message
                        raise ValueError("data not in pair !")
                    mass_list = []
                    intensity_list = []
                    for i in range(len(data) // 2):
                        mass_list.append(data[i * 2])
                        intensity_list.append(data[i * 2 + 1])
                    if len(mass_list) != len(intensity_list):
                        raise ValueError(
                            "len(mass_list) is not equal to len(intensity_list)"
                        )
                    scan_list.append(Scan(mass_list, intensity_list))
                    data = []
                    data_sub = line.strip().split(',')
                    for item in data_sub:
                        if not len(item.strip()) == 0:
                            data.append(float(item.strip()))
                    if page_idx > 1:
                        page_idx = 1
                    if xydata_idx > 1:
                        xydata_idx = 1
                else:
                    data_sub = line.strip().split(',')
                    for item in data_sub:
                        if not len(item.strip()) == 0:
                            data.append(float(item.strip()))

    if len(data) % 2 == 1:
        # TODO: This means the data is not in x, y pairs
        #  Make a better error message
        raise ValueError("data not in pair !")

    # get last scan
    mass = []
    intensity = []
    for i in range(len(data) // 2):
        mass.append(data[i * 2])
        intensity.append(data[i * 2 + 1])

    if len(mass) != len(intensity):
        raise ValueError("len(mass) is not equal to len(intensity)")
    scan_list.append(Scan(mass, intensity))

    # sanity check
    time_len = len(time_list)
    scan_len = len(scan_list)
    if time_len != scan_len:
        print(time_list)
        print(scan_list)
        raise ValueError(
            f"Number of time points ({time_len}) does not equal the number of scans ({scan_len})"
        )

    data = GCMS_data(time_list, scan_list)

    return data
Beispiel #2
0
    def from_jcamp(cls,
                   file_name: PathLike,
                   ignore_warnings: bool = True) -> "ReferenceData":
        """
		Create a ReferenceData object from a JCAMP-DX file.

		:param file_name: Path of the file to read.
		:param ignore_warnings: Whether warnings about invalid tags should be shown.

		:authors: Qiao Wang, Andrew Isaac, Vladimir Likic, David Kainer, Dominic Davis-Foster
		"""

        with warnings.catch_warnings():

            if ignore_warnings:
                warnings.simplefilter("ignore", JcampTagWarning)

            file_name = PathPlus(file_name)

            # Commented this line because it also gets printed when the MassSpectrum is created
            # print(f" -> Reading JCAMP file '{file_name}'")
            lines_list = file_name.read_lines()
            last_tag = None

            header_info: Dict[str, Any] = {
            }  # Dictionary containing header information

            for line in lines_list:

                if len(line.strip()):
                    if line.startswith("##"):
                        # key word or information
                        fields = line.split('=', 1)
                        current_tag = fields[0] = fields[0].lstrip(
                            "##").upper()
                        last_tag = fields[0]
                        fields[1] = fields[1].strip()

                        if current_tag.upper().startswith("END"):
                            break

                        elif current_tag in xydata_tags:
                            continue

                        elif current_tag in header_info_fields:
                            if fields[1].isdigit():
                                header_info[current_tag] = int(fields[1])
                            elif is_float(fields[1]):
                                header_info[current_tag] = float(fields[1])
                            else:
                                header_info[current_tag] = fields[1]
                        else:
                            warnings.warn(current_tag, JcampTagWarning)

                    else:
                        if last_tag in header_info:
                            header_info[last_tag] += f"{line}"

            return cls(
                name=header_info["TITLE"],
                cas=header_info["CAS REGISTRY NO"],
                nist_no=header_info["$NIST MASS SPEC NO"],
                contributor=header_info["ORIGIN"],
                formula=header_info["MOLFORM"],
                mw=header_info["MW"],
                mass_spec=MassSpectrum.from_jcamp(file_name),
            )