Beispiel #1
0
def diff(data1, data2):
    """
    @summary: Compares two GCMS_data objects

    @param data1: GCMS data set 1
    @type data1: pyms.GCMS.Class.GCMS_data
    @param data2: GCMS data set 2
    @type data2: pyms.GCMS.Class.GCMS_data

    @author: Qiao Wang
    @author: Andrew Isaac
    @author: Vladimir Likic
    """

    # get time attributes
    time_list1 = data1.get_time_list()
    time_list2 = data2.get_time_list()

    #
    # First, check if two data sets have the same number of retention
    # times.
    #
    if not len(time_list1) == len(time_list2):
        print " -> The number of retention time points different."
        print " First data set: %d time points" % (len(time_list1))
        print " Second data set: %d time points" % (len(time_list2))
        print " Data sets are different."
        return
    else:
        time_rmsd = rmsd(time_list1, time_list2)
        print " Data sets have the same number of time points."
        print "   Time RMSD: %.2e" % (time_rmsd)

    #
    # Second, check if each scan has the same number of m/z intensities
    #

    print " Checking for consistency in scan lengths ...",
    sys.stdout.flush()

    scan_list1 = data1.get_scan_list()
    scan_list2 = data2.get_scan_list()
    if not len(scan_list1) == len(scan_list2):
        # since the number of rention times are the same, this indicated
        # some unexpected problem with data
        error("inconsistency in data detected")

    N = len(scan_list1)

    for ii in range(N):
        scan1 = scan_list1[ii]
        scan2 = scan_list2[ii]
        mass_list1 = scan1.get_mass_list()
        mass_list2 = scan2.get_mass_list()
        if len(mass_list1) != len(mass_list2):
            print "\n Different number of points detected in scan no. %d" % (
                ii)
            print " Data sets are different."
            return

    print "OK"

    #
    # Third, if here, calculate the max RMSD for m/z and intensities
    #

    print " Calculating maximum RMSD for m/z values and intensities ...",
    sys.stdout.flush()

    max_mass_rmsd = 0.0
    max_intensity_rmsd = 0.0

    for ii in range(N):
        scan1 = scan_list1[ii]
        scan2 = scan_list2[ii]
        mass_list1 = scan1.get_mass_list()
        mass_list2 = scan2.get_mass_list()
        intensity_list1 = scan1.get_intensity_list()
        intensity_list2 = scan2.get_intensity_list()
        mass_rmsd = rmsd(mass_list1, mass_list2)
        if mass_rmsd > max_mass_rmsd:
            max_mass_rmsd = mass_rmsd
        intensity_rmsd = rmsd(intensity_list1, intensity_list2)
        if intensity_rmsd > max_intensity_rmsd:
            max_intensity_rmsd = intensity_rmsd

    print "\n   Max m/z RMSD: %.2e" % (max_mass_rmsd)
    print "   Max intensity RMSD: %.2e" % (max_intensity_rmsd)
Beispiel #2
0
def diff(data1, data2):
    """
	Compares two GCMS_data objects

	:param data1: GCMS data set 1
	:type data1: pyms.GCMS.Class.GCMS_data
	:param data2: GCMS data set 2
	:type data2: pyms.GCMS.Class.GCMS_data

	:author: Qiao Wang
	:author: Andrew Isaac
	:author: Vladimir Likic
	"""

    # get time attributes
    time_list1 = data1.get_time_list()
    time_list2 = data2.get_time_list()

    # First, check if two data sets have the same number of retention times.
    if len(time_list1) != len(time_list2):
        print(" The number of retention time points differ.")
        print(f"	First data set: {len(time_list1):d} time points")
        print(f"	Second data set: {len(time_list2):d} time points")
        print(" Data sets are different.")
        return
    else:
        time_rmsd = rmsd(time_list1, time_list2)
        print(" Data sets have the same number of time points.")
        print(f"   Time RMSD: {time_rmsd:.2e}")

    # Second, check if each scan has the same number of m/z intensities
    print(" Checking for consistency in scan lengths ...", end='')
    sys.stdout.flush()

    scan_list1 = data1.get_scan_list()
    scan_list2 = data2.get_scan_list()
    if not len(scan_list1) == len(scan_list2):
        # since the number of rention times are the same, this indicated
        # some unexpected problem with data
        raise ValueError("inconsistency in data detected")

    for ii in range(len(scan_list1)):
        scan1 = scan_list1[ii]
        scan2 = scan_list2[ii]
        mass_list1 = scan1.get_mass_list()
        mass_list2 = scan2.get_mass_list()
        if len(mass_list1) != len(mass_list2):
            print(f"\n Different number of points detected in scan no. {ii:d}")
            print(" Data sets are different.")
            return

    print("OK")

    # Third, if here, calculate the max RMSD for m/z and intensities
    print(" Calculating maximum RMSD for m/z values and intensities ...",
          end='')
    sys.stdout.flush()

    max_mass_rmsd = 0.0
    max_intensity_rmsd = 0.0

    for ii in range(len(scan_list1)):
        scan1 = scan_list1[ii]
        scan2 = scan_list2[ii]
        mass_list1 = scan1.get_mass_list()
        mass_list2 = scan2.get_mass_list()
        intensity_list1 = scan1.get_intensity_list()
        intensity_list2 = scan2.get_intensity_list()
        mass_rmsd = rmsd(mass_list1, mass_list2)
        if mass_rmsd > max_mass_rmsd:
            max_mass_rmsd = mass_rmsd
        intensity_rmsd = rmsd(intensity_list1, intensity_list2)
        if intensity_rmsd > max_intensity_rmsd:
            max_intensity_rmsd = intensity_rmsd

    print(f"\n   Max m/z RMSD: {max_mass_rmsd:.2e}")
    print(f"   Max intensity RMSD: {max_intensity_rmsd:.2e}")
Beispiel #3
0
def diff(data1, data2):

    """
    @summary: Compares two GCMS_data objects

    @param data1: GCMS data set 1
    @type data1: pyms.GCMS.Class.GCMS_data
    @param data2: GCMS data set 2
    @type data2: pyms.GCMS.Class.GCMS_data

    @author: Qiao Wang
    @author: Andrew Isaac
    @author: Vladimir Likic
    """

    # get time attributes
    time_list1 = data1.get_time_list()
    time_list2 = data2.get_time_list()

    #
    # First, check if two data sets have the same number of retention
    # times.
    #
    if not len(time_list1) == len(time_list2):
        print " -> The number of retention time points different."
        print " First data set: %d time points" % (len(time_list1))
        print " Second data set: %d time points" % (len(time_list2))
        print " Data sets are different."
        return
    else:
        time_rmsd = rmsd(time_list1, time_list2)
        print " Data sets have the same number of time points."
        print "   Time RMSD: %.2e" % ( time_rmsd )

    #
    # Second, check if each scan has the same number of m/z intensities
    #

    print " Checking for consistency in scan lengths ...",
    sys.stdout.flush()

    scan_list1 = data1.get_scan_list()
    scan_list2 = data2.get_scan_list()
    if not len(scan_list1) == len(scan_list2):
        # since the number of rention times are the same, this indicated
        # some unexpected problem with data
        error("inconsistency in data detected")

    N = len(scan_list1)

    for ii in range(N):
        scan1 = scan_list1[ii]
        scan2 = scan_list2[ii]
        mass_list1 = scan1.get_mass_list()
        mass_list2 = scan2.get_mass_list()
        if len(mass_list1) != len(mass_list2):
            print "\n Different number of points detected in scan no. %d" % ( ii )
            print " Data sets are different."
            return

    print "OK"

    #
    # Third, if here, calculate the max RMSD for m/z and intensities
    #

    print " Calculating maximum RMSD for m/z values and intensities ...",
    sys.stdout.flush()

    max_mass_rmsd = 0.0
    max_intensity_rmsd = 0.0

    for ii in range(N):
        scan1 = scan_list1[ii]
        scan2 = scan_list2[ii]
        mass_list1 = scan1.get_mass_list()
        mass_list2 = scan2.get_mass_list()
        intensity_list1 = scan1.get_intensity_list()
        intensity_list2 = scan2.get_intensity_list()
        mass_rmsd = rmsd(mass_list1, mass_list2)
        if mass_rmsd > max_mass_rmsd:
            max_mass_rmsd = mass_rmsd
        intensity_rmsd = rmsd(intensity_list1, intensity_list2)
        if intensity_rmsd > max_intensity_rmsd:
            max_intensity_rmsd = intensity_rmsd

    print "\n   Max m/z RMSD: %.2e" % ( max_mass_rmsd )
    print "   Max intensity RMSD: %.2e" % ( max_intensity_rmsd )