Exemple #1
0
def store_expr(file_name, expr):
    """
    @summary: stores an expriment to a file

    @param file_name: The name of the file
    @type file_name: StringType
    @param expr: An experiment object
    @type expr: pyms.Experiment.Class.Experiment

    @return: none
    @rtype: NoneType

    @author: Vladimir Likic
    @author: Andrew Isaac
    """

    if not isinstance(expr, Experiment):
        error("argument not an instance of the class 'Experiment'")

    if not is_str(file_name):
        error("'file_name' not a string")

    fp = open(file_name, 'w')
    cPickle.dump(expr, fp, 1)
    fp.close()
Exemple #2
0
    def write_intensities_stream(self, file_name):

        """
        @summary: Writes all intensities to a file

        @param file_name: Output file name
        @type file_name: StringType

        This function loop over all scans, and for each scan
        writes intensities to the file, one intenisity per
        line. Intensities from different scans are joined
        without any delimiters.

        @author: Vladimir Likic
        """

        if not is_str(file_name):
            error("'file_name' must be a string")

        N = len(self.__scan_list)

        print" -> Writing scans to a file"

        fp = open_for_writing(file_name)

        for ii in range(len(self.__scan_list)):
            scan = self.__scan_list[ii]
            intensities = scan.get_intensity_list()
            for I in intensities:
                fp.write("%8.4f\n" % ( I ) )

        close_for_writing(fp)
Exemple #3
0
    def write_intensities_stream(self, file_name):

        """
        @summary: Writes all intensities to a file

        @param file_name: Output file name
        @type file_name: StringType

        This function loop over all scans, and for each scan
        writes intensities to the file, one intenisity per
        line. Intensities from different scans are joined
        without any delimiters.

        @author: Vladimir Likic
        """

        if not is_str(file_name):
            error("'file_name' must be a string")

        N = len(self.__scan_list)

        print" -> Writing scans to a file"

        fp = open_for_writing(file_name)

        for ii in range(len(self.__scan_list)):
            scan = self.__scan_list[ii]
            intensities = scan.get_intensity_list()
            for I in intensities:
                fp.write("%8.4f\n" % ( I ) )

        close_for_writing(fp)
Exemple #4
0
def read_expr_list(file_name):
    """
    @summary: Reads the set of experiment files and returns a list of
    Experiment objects

    @param file_name: The name of the file which lists experiment
        dump file names, one file per line
    @type file_name: StringType

    @return: A list of Experiment instances
    @rtype: ListType

    @author: Vladimir Likic
    """

    if not is_str(file_name):
        error("file_name argument must be a string")
    try:
        fp = open(file_name, 'r')
    except IOError:
        error("error opening file '%s' for reading" % file_name)

    exprfiles = fp.readlines()
    fp.close()

    exprl = []

    for exprfile in exprfiles:

        exprfile = string.strip(exprfile)
        expr = load_expr(exprfile)

        exprl.append(expr)

    return exprl
Exemple #5
0
def load_peaks(file_name):
    """
    @summary: Loads the peak_list stored with 'store_peaks'

    @param file_name: File name of peak list
    @type file_name: StringType

    @return: The list of Peak objects
    @rtype: ListType

    @author: Andrew Isaac
    """

    if not is_str(file_name):
        error("'file_name' not a string")

    fp = open(file_name, 'r')
    peak_list = cPickle.load(fp)
    fp.close()

    if not is_list(peak_list):
        error("'file_name' is not a List")
    if not len(peak_list) > 0 and not isinstance(peak_list[0], Peak):
        error("'peak_list' must be a list of Peak objects")

    return peak_list
Exemple #6
0
    def write(self, file_name, minutes=False):

        """
        @summary: Writes the ion chromatogram to the specified file

        @param file_name: Output file name
        @type file_name: StringType
        @param minutes: A boolean value indicating whether to write
            time in minutes
        @type minutes: BooleanType

        @return: none
        @rtype: NoneType

        @author: Lewis Lee
        @author: Vladimir Likic
        """

        if not is_str(file_name):
            error("'file_name' must be a string")

        fp = open_for_writing(file_name)

        time_list = copy.deepcopy(self.__time_list)

        if minutes:
            for ii in range(len(time_list)):
                time_list[ii] = time_list[ii]/60.0

        for ii in range(len(time_list)):
            fp.write("%8.4f %#.6e\n" % (time_list[ii], self.__ia[ii]))

        close_for_writing(fp)
Exemple #7
0
def time_str_secs(time_str):
    """
    @summary: Resolves time string of the form "<NUMBER>s" or "<NUMBER>m",
        returns time in seconds

    @param time_str: A time string, which must be of the form
        "<NUMBER>s" or "<NUMBER>m" where "<NUMBER>" is a valid number
    @type time_str: StringType

    @return: Time in seconds
    @rtype: FloatType

    @author: Vladimir Likic
    """

    if not is_str(time_str):
        error("time string not a string")

    time_number = time_str[:-1]
    time_spec = time_str[-1].lower()

    if not is_str_num(time_number):
        print " --> received time string '%s'" % (time_number)
        error("improper time string")

    if not time_spec == "s" and not time_spec == "m":
        error("time string must end with either 's' or 'm'")

    time = float(time_number)

    if time_spec == "m":
        time = time * 60.0

    return time
Exemple #8
0
    def write(self, file_name, minutes=False):

        """
        @summary: Writes the ion chromatogram to the specified file

        @param file_name: Output file name
        @type file_name: StringType
        @param minutes: A boolean value indicating whether to write
            time in minutes
        @type minutes: BooleanType

        @return: none
        @rtype: NoneType

        @author: Lewis Lee
        @author: Vladimir Likic
        """

        if not is_str(file_name):
            error("'file_name' must be a string")

        fp = open_for_writing(file_name)

        time_list = copy.deepcopy(self.__time_list)

        if minutes:
            for ii in range(len(time_list)):
                time_list[ii] = time_list[ii]/60.0

        for ii in range(len(time_list)):
            fp.write("%8.4f %#.6e\n" % (time_list[ii], self.__ia[ii]))

        close_for_writing(fp)
Exemple #9
0
def time_str_secs(time_str):

    """
    @summary: Resolves time string of the form "<NUMBER>s" or "<NUMBER>m",
        returns time in seconds

    @param time_str: A time string, which must be of the form
        "<NUMBER>s" or "<NUMBER>m" where "<NUMBER>" is a valid number
    @type time_str: StringType

    @return: Time in seconds
    @rtype: FloatType

    @author: Vladimir Likic
    """

    if not is_str(time_str):
        error("time string not a string")

    time_number = time_str[:-1]
    time_spec = time_str[-1].lower()

    if not is_str_num(time_number):
       print " --> received time string '%s'" % (time_number)
       error("improper time string")

    if not time_spec == "s" and not time_spec == "m":
        error("time string must end with either 's' or 'm'")

    time = float(time_number)

    if time_spec == "m":
        time = time*60.0

    return time
Exemple #10
0
def load_expr(file_name):

    """
    @summary: Loads an experiment saved with 'store_expr'

    @param file_name: Experiment file name
    @type file_name: StringType

    @return: The experiment intensity matrix and peak list
    @rtype: pyms.Experiment.Class.Experiment

    @author: Vladimir Likic
    @author: Andrew Isaac
    """

    if not is_str(file_name):
        error("'file_name' not a string")

    fp = open(file_name,'rb')
    expr = cPickle.load(fp)
    fp.close()

    if not isinstance(expr, Experiment):
        error("'file_name' is not an Experiment object")

    return expr
Exemple #11
0
def store_expr(file_name, expr):

    """
    @summary: stores an expriment to a file

    @param file_name: The name of the file
    @type file_name: StringType
    @param expr: An experiment object
    @type expr: pyms.Experiment.Class.Experiment

    @return: none
    @rtype: NoneType

    @author: Vladimir Likic
    @author: Andrew Isaac
    """

    if not isinstance(expr, Experiment):
        error("argument not an instance of the class 'Experiment'")

    if not is_str(file_name):
        error("'file_name' not a string")

    fp = open(file_name,'wb')
    cPickle.dump(expr, fp, 1)
    fp.close()
Exemple #12
0
def load_peaks(file_name):

    """
    @summary: Loads the peak_list stored with 'store_peaks'

    @param file_name: File name of peak list
    @type file_name: StringType

    @return: The list of Peak objects
    @rtype: ListType

    @author: Andrew Isaac
    """

    if not is_str(file_name):
        error("'file_name' not a string")

    fp = open(file_name,'r')
    peak_list = cPickle.load(fp)
    fp.close()

    if not is_list(peak_list):
        error("'file_name' is not a List")
    if not len(peak_list) > 0 and not isinstance(peak_list[0], Peak):
        error("'peak_list' must be a list of Peak objects")

    return peak_list
Exemple #13
0
def load_expr(file_name):
    """
    @summary: Loads an experiment saved with 'store_expr'

    @param file_name: Experiment file name
    @type file_name: StringType

    @return: The experiment intensity matrix and peak list
    @rtype: pyms.Experiment.Class.Experiment

    @author: Vladimir Likic
    @author: Andrew Isaac
    """

    if not is_str(file_name):
        error("'file_name' not a string")

    fp = open(file_name, 'r')
    expr = cPickle.load(fp)
    fp.close()

    if not isinstance(expr, Experiment):
        error("'file_name' is not an Experiment object")

    return expr
Exemple #14
0
    def write(self, file_root):

        """
        @summary: Writes the entire raw data to two files, one
            'file_root'.I.csv (intensities) and 'file_root'.mz.csv
            (m/z values).

            This method writes two CSV files, containing intensities
            and corresponding m/z values. In general these are not
            two-dimensional matrices, because different scans may
            have different number of m/z values recorded.

        @param file_root: The root for the output file names
        @type file_root: StringType

        @author: Vladimir Likic
        """

        if not is_str(file_root):
            error("'file_root' must be a string")

        file_name1 = file_root + ".I.csv"
        file_name2 = file_root + ".mz.csv"

        print " -> Writing intensities to '%s'" % ( file_name1 )
        print " -> Writing m/z values to '%s'" % ( file_name2 )

        fp1 = open_for_writing(file_name1)
        fp2 = open_for_writing(file_name2)

        for ii in range(len(self.__scan_list)):

            scan = self.__scan_list[ii]

            intensity_list = scan.get_intensity_list()
            mass_list = scan.get_mass_list()

            for ii in range(len(intensity_list)):
                v = intensity_list[ii]
                if ii == 0:
                    fp1.write("%.4f" % (v))
                else:
                    fp1.write(",%.4f" % (v))
            fp1.write("\n")

            for ii in range(len(mass_list)):
                v = mass_list[ii]
                if ii == 0:
                    fp2.write("%.4f" % (v))
                else:
                    fp2.write(",%.4f" % (v))
            fp2.write("\n")

        close_for_writing(fp1)
        close_for_writing(fp2)
Exemple #15
0
    def write(self, file_root):

        """
        @summary: Writes the entire raw data to two files, one
            'file_root'.I.csv (intensities) and 'file_root'.mz.csv
            (m/z values).

            This method writes two CSV files, containing intensities
            and corresponding m/z values. In general these are not
            two-dimensional matrices, because different scans may
            have different number of m/z values recorded.

        @param file_root: The root for the output file names
        @type file_root: StringType

        @author: Vladimir Likic
        """

        if not is_str(file_root):
            error("'file_root' must be a string")

        file_name1 = file_root + ".I.csv"
        file_name2 = file_root + ".mz.csv"

        print " -> Writing intensities to '%s'" % ( file_name1 )
        print " -> Writing m/z values to '%s'" % ( file_name2 )

        fp1 = open_for_writing(file_name1)
        fp2 = open_for_writing(file_name2)

        for ii in range(len(self.__scan_list)):

            scan = self.__scan_list[ii]

            intensity_list = scan.get_intensity_list()
            mass_list = scan.get_mass_list()

            for ii in range(len(intensity_list)):
                v = intensity_list[ii]
                if ii == 0:
                    fp1.write("%.4f" % (v))
                else:
                    fp1.write(",%.4f" % (v))
            fp1.write("\n")

            for ii in range(len(mass_list)):
                v = mass_list[ii]
                if ii == 0:
                    fp2.write("%.4f" % (v))
                else:
                    fp2.write(",%.4f" % (v))
            fp2.write("\n")

        close_for_writing(fp1)
        close_for_writing(fp2)
Exemple #16
0
def sele_peaks_by_rt(peaks, rt_range):

    """
    @summary: Selects peaks from a retention time range

    @param peaks: A list of peak objects
    @type peaks: ListType
    @param rt_range: A list of two time strings, specifying lower and
           upper retention times
    @type rt_range: ListType
    @return: A list of peak objects
    @rtype: ListType
    """

    if not is_peak_list(peaks):
        error("'peaks' not a peak list")

    if not is_list(rt_range):
        error("'rt_range' not a list")
    else:
        if len(rt_range) != 2:
            error("'rt_range' must have exactly two elements")

        if not is_str(rt_range[0]) or not is_str(rt_range[1]):
            error("lower/upper retention time limits must be strings")

    rt_lo = time_str_secs(rt_range[0])
    rt_hi = time_str_secs(rt_range[1])

    if not rt_lo < rt_hi:
        error("lower retention time limit must be less than upper")

    peaks_sele = []

    for peak in peaks:
        rt = peak.get_rt()
        if rt > rt_lo and rt < rt_hi:
            peaks_sele.append(peak)

    # print "%d peaks selected" % (len(peaks_sele))

    return peaks_sele
Exemple #17
0
    def export_leco_csv(self, file_name):

        """
        @summary: Exports data in LECO CSV format

        @param file_name: File name
        @type file_name: StringType

        @return: none
        @rtype: NoneType

        @author: Andrew Isaac
        @author: Vladimir Likic
        """

        if not is_str(file_name):
            error("'file_name' is not a string")

        mass_list = self.__mass_list
        time_list = self.__time_list
        vals = self.__intensity_matrix

        fp = open_for_writing(file_name)

        # Format is text header with:
        # "Scan","Time",...
        # and the rest is "TIC" or m/z as text, i.e. "50","51"...
        # The following lines are:
        # scan_number,time,value,value,...
        # scan_number is an int, rest seem to be fixed format floats.
        # The format is 0.000000e+000

        # write header
        fp.write("\"Scan\",\"Time\"")
        for ii in mass_list:
            if is_number(ii):
                fp.write(",\"%d\"" % int(ii))
            else:
                error("mass list datum not a number")
        fp.write("\r\n")  # windows CR/LF

        # write lines
        for ii in range(len(time_list)):
            fp.write("%s,%#.6e" % (ii, time_list[ii]))
            for jj in range(len(vals[ii])):
                if is_number(vals[ii][jj]):
                    fp.write(",%#.6e" % (vals[ii][jj]))
                else:
                    error("datum not a number")
            fp.write("\r\n")

        close_for_writing(fp)
Exemple #18
0
    def export_leco_csv(self, file_name):

        """
        @summary: Exports data in LECO CSV format

        @param file_name: File name
        @type file_name: StringType

        @return: none
        @rtype: NoneType

        @author: Andrew Isaac
        @author: Vladimir Likic
        """

        if not is_str(file_name):
            error("'file_name' is not a string")

        mass_list = self.__mass_list
        time_list = self.__time_list
        vals = self.__intensity_matrix

        fp = open_for_writing(file_name)

        # Format is text header with:
        # "Scan","Time",...
        # and the rest is "TIC" or m/z as text, i.e. "50","51"...
        # The following lines are:
        # scan_number,time,value,value,...
        # scan_number is an int, rest seem to be fixed format floats.
        # The format is 0.000000e+000

        # write header
        fp.write("\"Scan\",\"Time\"")
        for ii in mass_list:
            if is_number(ii):
                fp.write(",\"%d\"" % int(ii))
            else:
                error("mass list datum not a number")
        fp.write("\r\n")  # windows CR/LF

        # write lines
        for ii in range(len(time_list)):
            fp.write("%s,%#.6e" % (ii, time_list[ii]))
            for jj in range(len(vals[ii])):
                if is_number(vals[ii][jj]):
                    fp.write(",%#.6e" % (vals[ii][jj]))
                else:
                    error("datum not a number")
            fp.write("\r\n")

        close_for_writing(fp)
Exemple #19
0
def window_sele_points(ic, window_sele, half_window=False):

    """
    @summary: Converts window selection parameter into points based
        on the time step in an ion chromatogram

    @param ic: ion chromatogram object relevant for the conversion
    @type ic: pyms.GCMS.Class.IonChromatogram

    @param window_sele: The window selection parameter. This can be
        an integer or time string. If integer, taken as the number
        of points. If a string, must of the form "<NUMBER>s" or
        "<NUMBER>m", specifying a time in seconds or minutes,
        respectively
    @type window_sele: IntType or StringType

    @param half_window: Specifies whether to return half-window
    @type half_window: BooleanType

    @return: The number of points in the window
    @rtype: IntType

    @author: Vladimir Likic
    """

    if not is_int(window_sele) and not is_str(window_sele):
        error("'window' must be an integer or a string")

    if is_int(window_sele):
        if half_window:
            if window_sele % 2 == 0:
                error("window must be an odd number of points")
            else:
                points = int(math.floor(window_sele*0.5))
        else:
            points = window_sele
    else:
        time = time_str_secs(window_sele)
        time_step = ic.get_time_step()

        if half_window:
            time = time*0.5

        points = int(math.floor(time/time_step))

    if half_window:
        if points < 1: error("window too small (half window=%d)" % (points))
    else:
        if points < 2: error("window too small (window=%d)" % (points))

    return points
Exemple #20
0
def window_sele_points(ic, window_sele, half_window=False):
    """
    @summary: Converts window selection parameter into points based
        on the time step in an ion chromatogram

    @param ic: ion chromatogram object relevant for the conversion
    @type ic: pyms.GCMS.Class.IonChromatogram

    @param window_sele: The window selection parameter. This can be
        an integer or time string. If integer, taken as the number
        of points. If a string, must of the form "<NUMBER>s" or
        "<NUMBER>m", specifying a time in seconds or minutes,
        respectively
    @type window_sele: IntType or StringType

    @param half_window: Specifies whether to return half-window
    @type half_window: BooleanType

    @return: The number of points in the window
    @rtype: IntType

    @author: Vladimir Likic
    """

    if not is_int(window_sele) and not is_str(window_sele):
        error("'window' must be an integer or a string")

    if is_int(window_sele):
        if half_window:
            if window_sele % 2 == 0:
                error("window must be an odd number of points")
            else:
                points = int(math.floor(window_sele * 0.5))
        else:
            points = window_sele
    else:
        time = time_str_secs(window_sele)
        time_step = ic.get_time_step()

        if half_window:
            time = time * 0.5

        points = int(math.floor(time / time_step))

    if half_window:
        if points < 1: error("window too small (half window=%d)" % (points))
    else:
        if points < 2: error("window too small (window=%d)" % (points))

    return points
Exemple #21
0
    def export_ascii(self, root_name, format='dat'):

        """
        @summary: Exports the intensity matrix, retention time vector, and
        m/z vector to the ascii format

        By default, export_ascii("NAME") will create NAME.im.dat, NAME.rt.dat,
        and NAME.mz.dat where these are the intensity matrix, retention
        time vector, and m/z vector in tab delimited format. If format='csv',
        the files will be in the CSV format, named NAME.im.csv, NAME.rt.csv,
        and NAME.mz.csv.

        @param root_name: Root name for the output files
        @type root_name: StringType

        @return: none
        @rtype: NoneType

        @author: Milica Ng
        @author: Andrew Isaac
        @author: Vladimir Likic
        """

        if not is_str(root_name):
            error("'root_name' is not a string")

        if format == 'dat':
            separator = " "
            extension = ".dat"
        elif format == 'csv':
            separator = ","
            extension = ".csv"
        else:
            error("unkown format '%s'. Only 'dat' or 'csv' supported" % format)

        # export 2D matrix of intensities
        vals = self.__intensity_matrix
        save_data(root_name+'.im'+extension, vals, sep=separator)

        # export 1D vector of m/z's, corresponding to rows of
        # the intensity matrix
        mass_list = self.__mass_list
        save_data(root_name+'.mz'+extension, mass_list, sep=separator)

        # export 1D vector of retention times, corresponding to
        # columns of the intensity matrix
        time_list = self.__time_list
        save_data(root_name+'.rt'+extension, time_list, sep=separator)
Exemple #22
0
    def export_ascii(self, root_name, format='dat'):

        """
        @summary: Exports the intensity matrix, retention time vector, and
        m/z vector to the ascii format

        By default, export_ascii("NAME") will create NAME.im.dat, NAME.rt.dat,
        and NAME.mz.dat where these are the intensity matrix, retention
        time vector, and m/z vector in tab delimited format. If format='csv',
        the files will be in the CSV format, named NAME.im.csv, NAME.rt.csv,
        and NAME.mz.csv.

        @param root_name: Root name for the output files
        @type root_name: StringType

        @return: none
        @rtype: NoneType

        @author: Milica Ng
        @author: Andrew Isaac
        @author: Vladimir Likic
        """

        if not is_str(root_name):
            error("'root_name' is not a string")

        if format == 'dat':
            separator = " "
            extension = ".dat"
        elif format == 'csv':
            separator = ","
            extension = ".csv"
        else:
            error("unkown format '%s'. Only 'dat' or 'csv' supported" % format)

        # export 2D matrix of intensities
        vals = self.__intensity_matrix
        save_data(root_name+'.im'+extension, vals, sep=separator)

        # export 1D vector of m/z's, corresponding to rows of
        # the intensity matrix
        mass_list = self.__mass_list
        save_data(root_name+'.mz'+extension, mass_list, sep=separator)

        # export 1D vector of retention times, corresponding to
        # columns of the intensity matrix
        time_list = self.__time_list
        save_data(root_name+'.rt'+extension, time_list, sep=separator)
Exemple #23
0
def mzML_reader(file_name):

    """
    @summary: A reader for mzML files, returns
        a GC-MS data object

    @param file_name: The name of the mzML file
    @type file_name: StringType

    @author: Sean O'Callaghan
    """

    if not is_str(file_name):
        error("'file_name' must be a string")
    try:
        mzml_file = pymzml.run.Reader(file_name)
    except:
        error("Cannot open file '%s'" % file_name)

    print " -> Reading mzML file '%s'" % (file_name)

    scan_list = []
    time_list = []

    for spectrum in mzml_file:
        mass_list = []
        intensity_list = []

        for mz,i in spectrum.peaks:
            mass_list.append(mz)
            intensity_list.append(i)

        #scan_list.append(Scan(mass_list, intensity_list))
        for element in spectrum.xmlTree:
            # For some reason there are spectra with no time value, 
            # Ignore these????????????
            if element.get('accession') == "MS:1000016": #time value
                # We need time in seconds not minutes
                time_list.append(60*float(element.get('value')))
                scan_list.append(Scan(mass_list, intensity_list))

    print "time:", len(time_list)
    print "scan:", len(scan_list)

    data = GCMS_data(time_list, scan_list)

    return data
Exemple #24
0
def store_peaks(peak_list, file_name):
    """
    @summary:Store the list of peak objects

    @param peak_list: A list of peak objects
    @type peak_list: pyms.Peaks.Class.Peak
    @param file_name: File name to store peak list
    @type file_name: StringType

    @author: Andrew Isaac
    """

    if not is_str(file_name):
        error("'file_name' must be a string")

    fp = open(file_name, 'w')
    cPickle.dump(peak_list, fp, 1)
    fp.close()
Exemple #25
0
    def __init__(self, expr_code, peak_list):

        """
        @summary: Models an experiment

        @param expr_code: Unique identifier for the experiment
        @type expr_code: StringType
        @param peak_list: A list of peak objects
        @type peak_list: ListType
        """

        if not is_str(expr_code):
            error("'expr_code' must be a string")
        if not is_peak_list(peak_list):
            error("'peak_list' must be a list of Peak objects")

        self.__expr_code = expr_code
        self.__peak_list = peak_list
Exemple #26
0
def file_lines(file_name, filter=False):

    """
    @summary: Returns lines from a file, as a list

    @param file_name: Name of a file
    @type: StringType
    @param filter: If True, lines are pre-processes. Newline character
        if removed, leading and taling whitespaces are removed, and lines
        starting with '#' are discarded
    @type: BooleanType 

    @return: A list of lines
    @rtype: ListType

    @author: Vladimir Likic
    """

    if not is_str(file_name):
        error("'file_name' is not a string")

    fp = open_for_reading(file_name)
    lines = fp.readlines()
    close_for_reading(fp)

    if filter:
        # strip leading and talining whitespaces
        lines_filtered = []
        for line in lines:
            line = line.strip()
            lines_filtered.append(line)

        # discard comments
        lines_to_discard = []
        for line in lines_filtered:
            # remove empty lines and comments
            if len(line) == 0 or line[0] == "#":
                lines_to_discard.append(line)
        for line in lines_to_discard:
            lines_filtered.remove(line)
        lines = lines_filtered

    return lines
Exemple #27
0
def store_peaks(peak_list, file_name):

    """
    @summary:Store the list of peak objects

    @param peak_list: A list of peak objects
    @type peak_list: pyms.Peaks.Class.Peak
    @param file_name: File name to store peak list
    @type file_name: StringType

    @author: Andrew Isaac
    """

    if not is_str(file_name):
        error("'file_name' must be a string")

    fp = open(file_name,'w')
    cPickle.dump(peak_list, fp, 1)
    fp.close()
Exemple #28
0
def file_lines(file_name, filter=False):
    """
    @summary: Returns lines from a file, as a list

    @param file_name: Name of a file
    @type: StringType
    @param filter: If True, lines are pre-processes. Newline character
        if removed, leading and taling whitespaces are removed, and lines
        starting with '#' are discarded
    @type: BooleanType 

    @return: A list of lines
    @rtype: ListType

    @author: Vladimir Likic
    """

    if not is_str(file_name):
        error("'file_name' is not a string")

    fp = open_for_reading(file_name)
    lines = fp.readlines()
    close_for_reading(fp)

    if filter:
        # strip leading and talining whitespaces
        lines_filtered = []
        for line in lines:
            line = line.strip()
            lines_filtered.append(line)

        # discard comments
        lines_to_discard = []
        for line in lines_filtered:
            # remove empty lines and comments
            if len(line) == 0 or line[0] == "#":
                lines_to_discard.append(line)
        for line in lines_to_discard:
            lines_filtered.remove(line)
        lines = lines_filtered

    return lines
Exemple #29
0
def open_for_writing(file_name):
    """
    @summary: Opens file for writing, returns file pointer

    @param file_name: Name of the file to be opened for writing
    @type file_name: StringType

    @return: Pointer to the opened file
    @rtype: FileType

    @author: Vladimir Likic
    """

    if not is_str(file_name):
        error("'file_name' is not a string")
    try:
        fp = open(file_name, "w")
    except IOError:
        error("Cannot open '%s' for writing" % (file_name))

    return fp
Exemple #30
0
def open_for_writing(file_name):

    """
    @summary: Opens file for writing, returns file pointer

    @param file_name: Name of the file to be opened for writing
    @type file_name: StringType

    @return: Pointer to the opened file
    @rtype: FileType

    @author: Vladimir Likic
    """

    if not is_str(file_name):
        error("'file_name' is not a string")
    try:
        fp = open(file_name, "w")
    except IOError:
        error("Cannot open '%s' for writing" % (file_name))

    return fp
Exemple #31
0
def read_expr_list(file_name):

    """
    @summary: Reads the set of experiment files and returns a list of
    Experiment objects

    @param file_name: The name of the file which lists experiment
        dump file names, one file per line
    @type file_name: StringType

    @return: A list of Experiment instances
    @rtype: ListType

    @author: Vladimir Likic
    """

    if not is_str(file_name):
        error("file_name argument must be a string")
    try:
        fp = open(file_name, 'r')
    except IOError:
        error("error opening file '%s' for reading" % file_name)

    exprfiles = fp.readlines()
    fp.close()

    exprl = []

    for exprfile in exprfiles:

        exprfile = string.strip(exprfile)
        expr = load_expr(exprfile)

        exprl.append(expr)

    return exprl
Exemple #32
0
def ANDI_reader(file_name):

    """
    @summary: A reader for ANDI-MS NetCDF files, returns
        a GC-MS data object

    @param file_name: The name of the ANDI-MS file
    @type file_name: StringType

    @author: Qiao Wang
    @author: Andrew Isaac
    @author: Vladimir Likic
    """

    ## TODO: use 'point_count' and allow for zero len scans

    # the keys used to retrieve certain data from the NetCDF file
    __MASS_STRING = "mass_values"
    __INTENSITY_STRING = "intensity_values"
    __TIME_STRING = "scan_acquisition_time"

    if not is_str(file_name):
        error("'file_name' must be a string")
    try:
        #file = CDF(file_name)
        rootgrp = Dataset(file_name, "r+", format='NETCDF3_CLASSIC')
    #except CDFError:
    #    error("Cannot open file '%s'" % file_name)
    except:     ## <TODO> to find out if netCDF4 throws specific errors that we can use here
        error("Cannot open file '%s'" % file_name)

    print " -> Reading netCDF file '%s'" % (file_name)

    print rootgrp.variables[__MASS_STRING][:]


    scan_list = []
    # mass = file.var(__MASS_STRING)  # old pycdf way
    # intensity = file.var(__INTENSITY_STRING)  #old pycdf way
    mass = rootgrp.variables[__MASS_STRING][:]
    intensity = rootgrp.variables[__INTENSITY_STRING][:]


    mass_values = mass.tolist()
    mass_list = []
    mass_previous = mass_values[0]
    mass_list.append(mass_previous)
    intensity_values = intensity.tolist()
    intensity_list = []
    intensity_previous = intensity_values[0]
    intensity_list.append(intensity_previous)
    if not len(mass_values) == len(intensity_values):
        error("length of mass_list is not equal to length of intensity_list !")
    for i in range(len(mass_values) - 1):
        # assume masses in ascending order until new scan
        if mass_previous <= mass_values[i + 1]:
            #print mass_values[i+1]
            mass_list.append(mass_values[i + 1])
            mass_previous = mass_values[i + 1]
            intensity_list.append(intensity_values[i + 1])
            intensity_previous = intensity_values[i + 1]
        # new scan
        else:
            scan_list.append(Scan(mass_list, intensity_list))
            #print "Added scan"
            mass_previous = mass_values[i + 1]
            intensity_previous = intensity_values[i + 1]
            mass_list = []
            intensity_list = []
            mass_list.append(mass_previous)
            intensity_list.append(intensity_previous)
    # store final scan
    scan_list.append(Scan(mass_list, intensity_list))
    # time = file.var(__TIME_STRING)  #old pycdf way
    time = rootgrp.variables[__TIME_STRING][:]
    time_list = time.tolist()

    # sanity check
    if not len(time_list) == len(scan_list):
        error("number of time points does not equal the number of scans")

    data = GCMS_data(time_list, scan_list)

    return data
Exemple #33
0
    def trim(self, begin=None, end=None):

        """
        @summary: trims data in the time domain

        @param begin: begin parameter designating start time or
            scan number
        @type begin: IntType or StrType
        @param end: end parameter designating start time or
            scan number
        @type end: IntType or StrType

            The arguments 'begin' and 'end' can be either integers
            (in which case they are taken as the first/last scan
            number for trimming) or strings in which case they are
            treated as time strings and converted to scan numbers.

            At present both 'begin' and 'end' must be of the same
            type, either both scan numbers or time strings.

        @author: Vladimir Likic
        """

        # trim called with defaults, or silly arguments
        if begin == None and end == None:
            print "Nothing to do."
            return # exit immediately

        N = len(self.__scan_list)

        # process 'begin' and 'end'
        if begin == None:
            first_scan = 0
        elif is_int(begin):
            first_scan = begin-1
        elif is_str(begin):
            time = time_str_secs(begin)
            first_scan = self.get_index_at_time(time) + 1
        else:
            error("invalid 'begin' argument")

        if end == None:
            last_scan = N-1
        elif is_int(end):
            last_scan = end
        elif is_str(end):
            time = time_str_secs(end)
            last_scan = self.get_index_at_time(time) + 1
        else:
            error("invalid 'end' argument")

        # sanity checks
        if not last_scan > first_scan:
            error("last scan=%d, first scan=%d" % (last_scan, first_scan))
        elif first_scan < 0:
            error("scan number must be greater than one")
        elif last_scan > N-1:
            error("last scan=%d, total number of scans=%d" % (last_scan, N))

        print "Trimming data to between %d and %d scans" % \
                (first_scan+1, last_scan+1)

        scan_list_new = []
        time_list_new = []
        for ii in range(len(self.__scan_list)):
            if ii >= first_scan and ii <= last_scan:
                scan = self.__scan_list[ii]
                time = self.__time_list[ii]
                scan_list_new.append(scan)
                time_list_new.append(time)


        # update info
        self.__scan_list = scan_list_new
        self.__set_time(time_list_new)
        self.__set_min_max_mass()
        self.__calc_tic()
Exemple #34
0
def ANDI_writer(file_name, im):

    """
    @summary: A reader for ANDI-MS NetCDF files, returns
        a GC-MS data object

    @param file_name: The name of the ANDI-MS file
    @type file_name: StringType
    @param im: The IntensityMatrix
    @type file_name: pyms.GCMS.Class.IntensityMatrix

    @author: Andrew Isaac
    """

    # netCDF header info for compatability
    # attributes
  #dataset_completeness   0 CHAR     6 C1+C2
  #dataset_origin         4 CHAR    16 Santa Clara, CA
  #experiment_date_time_stamp   7 CHAR    20 20081218044500+1100
  #experiment_title       6 CHAR     7 mix ma
  #experiment_type       10 CHAR    25 Centroided Mass Spectrum
  #external_file_ref_0    9 CHAR     8 MA_5C.M
  #languages              3 CHAR     8 English
  #ms_template_revision   1 CHAR     6 1.0.1
  #netcdf_file_date_time_stamp   5 CHAR    20 20090114001531+1100
  #netcdf_revision        2 CHAR     6 2.3.2
  #number_of_times_calibrated  12 INT      1 0
  #number_of_times_processed  11 INT      1 1
  #operator_name          8 CHAR    12 Dave and Su
  #raw_data_intensity_format  25 CHAR     6 Float
  #raw_data_mass_format  23 CHAR     6 Float
  #raw_data_time_format  24 CHAR     6 Short
  #sample_state          13 CHAR    12 Other State
  #test_detector_type    18 CHAR    20 Electron Multiplier
  #test_ionization_mode  16 CHAR    16 Electron Impact
  #test_ionization_polarity  17 CHAR    18 Positive Polarity
  #test_ms_inlet         15 CHAR    17 Capillary Direct
  #test_resolution_type  19 CHAR    20 Constant Resolution
  #test_scan_direction   21 CHAR     3 Up
  #test_scan_function    20 CHAR    10 Mass Scan
  #test_scan_law         22 CHAR     7 Linear
  #test_separation_type  14 CHAR    18 No Chromatography

    # dimensions
  #_128_byte_string       6    128
  #_16_byte_string        3     16
  #_255_byte_string       7    255
  #_2_byte_string         0      2
  #_32_byte_string        4     32
  #_4_byte_string         1      4
  #_64_byte_string        5     64
  #_8_byte_string         2      8
  #error_number          10      1
  #instrument_number     12      1
  #point_number           9 554826   X
  #range                  8      2
  #scan_number           11   9865

    # variables
  #a_d_coaddition_factor   2 SHORT      0 scan_number(9865)
  #a_d_sampling_rate      1 DOUBLE     0 scan_number(9865)
  #actual_scan_number     7 INT        0 scan_number(9865)
  #error_log              0 CHAR       0 error_number(1), _64_byte_string(64)
  #flag_count            15 INT        0 scan_number(9865)
  #instrument_app_version  27 CHAR       0 instrument_number(1),
#_32_byte_string(32)
  #instrument_comments   28 CHAR       0 instrument_number(1),
#_32_byte_string(32)
  #instrument_fw_version  25 CHAR       0 instrument_number(1),
#_32_byte_string(32)
  #instrument_id         20 CHAR       0 instrument_number(1),
#_32_byte_string(32)
  #instrument_mfr        21 CHAR       0 instrument_number(1),
#_32_byte_string(32)
  #instrument_model      22 CHAR       0 instrument_number(1),
#_32_byte_string(32)
  #instrument_name       19 CHAR       0 instrument_number(1),
#_32_byte_string(32)
  #instrument_os_version  26 CHAR       0 instrument_number(1),
#_32_byte_string(32)
  #instrument_serial_no  23 CHAR       0 instrument_number(1),
#_32_byte_string(32)
  #instrument_sw_version  24 CHAR       0 instrument_number(1),
#_32_byte_string(32)
  #intensity_values      18 FLOAT      3 point_number(554826)
  #inter_scan_time        5 DOUBLE     0 scan_number(9865)
  #mass_range_max        10 DOUBLE     0 scan_number(9865)
  #mass_range_min         9 DOUBLE     0 scan_number(9865)
  #mass_values           16 FLOAT      2 point_number(554826)
  #point_count           14 INT        0 scan_number(9865)
  #resolution             6 DOUBLE     0 scan_number(9865)
  #scan_acquisition_time   3 DOUBLE     0 scan_number(9865)
  #scan_duration          4 DOUBLE     0 scan_number(9865)
  #scan_index            13 INT        0 scan_number(9865)
  #time_range_max        12 DOUBLE     0 scan_number(9865)
  #time_range_min        11 DOUBLE     0 scan_number(9865)
  #time_values           17 FLOAT      2 point_number(554826)
  #total_intensity        8 DOUBLE     1 scan_number(9865)

    # variable information
#intensity_values attributes

  #name                 idx type   len value
  #-------------------- --- ----   --- -----
  #add_offset             1 DOUBLE   1 0.0
  #scale_factor           2 DOUBLE   1 1.0
  #units                  0 CHAR    26 Arbitrary Intensity Units

#mass_values attributes

  #name                 idx type   len value
  #-------------------- --- ----   --- -----
  #scale_factor           1 DOUBLE   1 1.0
  #units                  0 CHAR     4 M/Z

#time_values attributes

  #name                 idx type   len value
  #-------------------- --- ----   --- -----
  #scale_factor           1 DOUBLE   1 1.0
  #units                  0 CHAR     8 Seconds

#total_intensity attributes

  #name                 idx type   len value
  #-------------------- --- ----   --- -----
  #units                  0 CHAR    26 Arbitrary Intensity Units

    # netCDF dimension names
    __POINT_NUMBER = "point_number"
    __SCAN_NUMBER = "scan_number"

    # the keys used to create certain data from the NetCDF file
    __MASS_STRING = "mass_values"
    __INTENSITY_STRING = "intensity_values"
    __TIME_STRING = "scan_acquisition_time"
    __POINT_COUNT = "point_count"

    if not is_str(file_name):
        error("'file_name' must be a string")
    try:
        # Open netCDF file in overwrite mode, creating it if inexistent.
        nc = CDF(file_name, NC.WRITE|NC.TRUNC|NC.CREATE)
        # Automatically set define and data modes.
        nc.automode()
    except CDFError:
        error("Cannot create file '%s'" % file_name)

    mass_list = im.get_mass_list()
    time_list = im.get_time_list()

    # direct access, don't modify
    intensity_matrix = im.intensity_matrix

    # compress by ignoring zero intensities
    # included for consistency with imported netCDF format
    mass_values = []
    intensity_values = []
    point_count_values = []
    for row in xrange(len(intensity_matrix)):
        pc = 0  # point count
        for col in xrange(len(intensity_matrix[0])):  # all rows same len
            if (intensity_matrix[row][col] > 0):
                mass_values.append(mass_list[col])
                intensity_values.append(intensity_matrix[row][col])
                pc += 1
        point_count_values.append(pc)

    # sanity checks
    if not len(time_list) == len(point_count_values):
        error("number of time points does not equal the number of scans")

    # create dimensions
    # total number of data points
    dim_point_number = nc.def_dim(__POINT_NUMBER, len(mass_values))
    # number of scans
    dim_scan_number = nc.def_dim(__SCAN_NUMBER, len(point_count_values))

    # create variables
    # points
    var_mass_values = nc.def_var(__MASS_STRING, NC.FLOAT, dim_point_number)
    var_intensity_values = nc.def_var(__INTENSITY_STRING, NC.FLOAT,
        dim_point_number)
    # scans
    var_time_list = nc.def_var(__TIME_STRING, NC.DOUBLE, dim_scan_number)
    var_point_count_values = nc.def_var(__POINT_COUNT, NC.INT,
        dim_scan_number)

    # populate variables
    # points
    var_mass_values[:] = mass_values
    var_intensity_values[:] = intensity_values
    # scans
    var_time_list[:] = time_list
    var_point_count_values[:] = point_count_values

    # close file
    nc.close()
Exemple #35
0
    def import_leco_csv(self, file_name):
        """
        @summary: Imports data in LECO CSV format

        @param file_name: File name
        @type file_name: StringType

        @return: Data as an IntensityMatrix
        @rtype: pyms.GCMS.Class.IntensityMatrix

        @author: Andrew Isaac
        """

        if not is_str(file_name):
            error("'file_name' not a string")

        lines_list = open(file_name,'r')
        data = []
        time_list = []
        mass_list = []

        # Format is text header with:
        # "Scan","Time",...
        # and the rest is "TIC" or m/z as text, i.e. "50","51"...
        # The following lines are:
        # scan_number,time,value,value,...
        # scan_number is an int, rest seem to be fixed format floats.
        # The format is 0.000000e+000

        num_mass = 0
        FIRST = True
        HEADER = True
        data_col = -1
        time_col = -1
        # get each line
        for line in lines_list:
            cols = -1
            data_row = []
            if len(line.strip()) > 0:
                data_list = line.strip().split(',')
                # get each value in line
                for item in data_list:
                    item = item.strip()
                    item = item.strip('\'"')  # remove quotes (in header)

                    # Get header
                    if HEADER:
                        cols += 1
                        if len(item) > 0:
                            if item.lower().find("time") > -1:
                                time_col = cols
                            try:
                                value = float(item)
                                # find 1st col with number as header
                                if FIRST and value > 1:  # assume >1 mass
                                    data_col = cols
                                    # assume time col is previous col
                                    if time_col < 0:
                                        time_col = cols -1
                                    FIRST = False
                                mass_list.append(value)
                                num_mass += 1
                            except ValueError:
                                pass
                    # Get rest
                    else:
                        cols += 1
                        if len(item) > 0:
                            try:
                                value = float(item)
                                if cols == time_col:
                                    time_list.append(value)
                                elif cols >= data_col:
                                    data_row.append(value)
                            except ValueError:
                                pass

                # check row length
                if not HEADER:
                    if len(data_row) == num_mass:
                        data.append(data_row)
                    else:
                        print ("Warning: ignoring row")

                HEADER = False

        # check col lengths
        if len(time_list) != len(data):
            print ("Warning: number of data rows and time list length differ")

        self.__mass_list = mass_list
        self.__time_list = time_list
        self.__intensity_matrix = data
        # Direct access for speed (DANGEROUS)
        self.intensity_matrix = self.__intensity_matrix
Exemple #36
0
def JCAMP_reader(file_name):

    """
    @summary: Generic reader for JCAMP DX files, produces GC-MS data
       object

    @author: Qiao Wang
    @author: Andrew Isaac
    @author: Vladimir Likic
    """

    if not is_str(file_name):
        error("'file_name' not a string")

    print " -> Reading JCAMP file '%s'" % (file_name)
    lines_list = open(file_name,'r')
    data = []
    page_idx = 0
    xydata_idx = 0
    time_list = []
    scan_list = []

    for line in lines_list:
        if not len(line.strip()) == 0:
            prefix = line.find('#')
            # key word or information
            if prefix == 0:
                fields = line.split('=')
                if fields[0].find("##PAGE") >= 0:
                    time = float(fields[2].strip()) #rt for the scan to be submitted
                    time_list.append(time)
                    page_idx = page_idx + 1
                elif fields[0].find("##DATA TABLE") >= 0:
                    xydata_idx = xydata_idx + 1
            # data
            elif prefix == -1:
                if page_idx > 1 or xydata_idx > 1:
                    if len(data) % 2 == 1:
                        error("data not in pair !")
                    mass = []
                    intensity = []
                    for i in range(len(data) / 2):
                        mass.append(data[i * 2])
                        intensity.append(data[i * 2 + 1])
                    if not len(mass) == len(intensity):
                        error("len(mass) is not equal to len(intensity)")
                    scan_list.append(Scan(mass, intensity))
                    data = []
                    data_sub = line.strip().split(',')
                    for item in data_sub:
                        if not len(item.strip()) == 0:
                            data.append(float(item.strip()))
                    if page_idx > 1:
                        page_idx = 1
                    if xydata_idx > 1:
                        xydata_idx = 1
                else:
                    data_sub = line.strip().split(',')
                    for item in data_sub:
                        if not len(item.strip()) == 0:
                            data.append(float(item.strip()))

    if len(data) % 2 == 1:
        error("data not in pair !")
    # get last scan
    mass = []
    intensity = []
    for i in range(len(data) / 2):
        mass.append(data[i * 2])
        intensity.append(data[i * 2 + 1])

    if not len(mass) == len(intensity):
        error("len(mass) is not equal to len(intensity)")
    scan_list.append(Scan(mass, intensity))

    # sanity check
    if not len(time_list) == len(scan_list):
        error("number of time points does not equal the number of scans")

    data = GCMS_data(time_list, scan_list)

    return data
Exemple #37
0
def ANDI_reader(file_name):

    """
    @summary: A reader for ANDI-MS NetCDF files, returns
        a GC-MS data object

    @param file_name: The name of the ANDI-MS file
    @type file_name: StringType

    @author: Qiao Wang
    @author: Andrew Isaac
    @author: Vladimir Likic
    """

    ## TODO: use 'point_count' and allow for zero len scans

    # the keys used to retrieve certain data from the NetCDF file
    __MASS_STRING = "mass_values"
    __INTENSITY_STRING = "intensity_values"
    __TIME_STRING = "scan_acquisition_time"

    if not is_str(file_name):
        error("'file_name' must be a string")
    try:
        file = CDF(file_name)
    except CDFError:
        error("Cannot open file '%s'" % file_name)

    print " -> Reading netCDF file '%s'" % (file_name)

    scan_list = []
    mass = file.var(__MASS_STRING)
    intensity = file.var(__INTENSITY_STRING)
    mass_values = mass.get().tolist()
    mass_list = []
    mass_previous = mass_values[0]
    mass_list.append(mass_previous)
    intensity_values = intensity.get().tolist()
    intensity_list = []
    intensity_previous = intensity_values[0]
    intensity_list.append(intensity_previous)
    if not len(mass_values) == len(intensity_values):
        error("length of mass_list is not equal to length of intensity_list !")
    for i in range(len(mass_values) - 1):
        # assume masses in ascending order until new scan
        if mass_previous <= mass_values[i + 1]:
            #print mass_values[i+1]
            mass_list.append(mass_values[i + 1])
            mass_previous = mass_values[i + 1]
            intensity_list.append(intensity_values[i + 1])
            intensity_previous = intensity_values[i + 1]
        # new scan
        else:
            scan_list.append(Scan(mass_list, intensity_list))
            #print "Added scan"
            mass_previous = mass_values[i + 1]
            intensity_previous = intensity_values[i + 1]
            mass_list = []
            intensity_list = []
            mass_list.append(mass_previous)
            intensity_list.append(intensity_previous)
    # store final scan
    scan_list.append(Scan(mass_list, intensity_list))
    time = file.var(__TIME_STRING)
    time_list = time.get().tolist()

    # sanity check
    if not len(time_list) == len(scan_list):
        error("number of time points (%d) does not equal the number of scans (%d)"%(len(time_list), len(scan_list)))

    data = GCMS_data(time_list, scan_list)

    return data
Exemple #38
0
def mzML_reader(file_name):
    """
    @summary: A reader for mzML files, returns
        a GC-MS data object

    @param file_name: The name of the mzML file
    @type file_name: StringType

    @author: Sean O'Callaghan
    """

    if not is_str(file_name):
        error("'file_name' must be a string")
    try:
        mzml_file = pymzml.run.Reader(file_name)
    except:
        error("Cannot open file '%s'" % file_name)

    try:  # avoid printing from each rank
        comm = MPI.COMM_WORLD
        rank = comm.Get_rank()
        size = comm.Get_size()

        if rank == 0:
            file_names = []

            for i in range(1, size):
                recv_buffer = ""
                file_n = comm.recv(recv_buffer, i)
                file_names.append(file_n)

            print " -> Reading mzML files:"
            print file_name
            for file_n in file_names:
                print file_n
        else:
            comm.send(file_name, dest=0)
    except:
        print " -> Reading mzML file '%s'" % (file_name)

    scan_list = []
    time_list = []

    for spectrum in mzml_file:
        mass_list = []
        intensity_list = []

        for mz, i in spectrum.peaks:
            mass_list.append(mz)
            intensity_list.append(i)

        #scan_list.append(Scan(mass_list, intensity_list))
        for element in spectrum.xmlTree:
            # For some reason there are spectra with no time value,
            # Ignore these????????????
            if element.get('accession') == "MS:1000016":  #time value
                # We need time in seconds not minutes
                time_list.append(60 * float(element.get('value')))
                scan_list.append(Scan(mass_list, intensity_list))

    #print "time:", len(time_list)
    #print "scan:", len(scan_list)

    data = GCMS_data(time_list, scan_list)

    return data
Exemple #39
0
def mzML_reader(file_name):

    """
    @summary: A reader for mzML files, returns
        a GC-MS data object

    @param file_name: The name of the mzML file
    @type file_name: StringType

    @author: Sean O'Callaghan
    """

    if not is_str(file_name):
        error("'file_name' must be a string")
    try:
        mzml_file = pymzml.run.Reader(file_name)
    except:
        error("Cannot open file '%s'" % file_name)

    try:# avoid printing from each rank
        comm = MPI.COMM_WORLD
        rank = comm.Get_rank()
        size = comm.Get_size()
    
        if rank ==0:
            file_names = []
        
            for i in range(1,size):
                recv_buffer = ""
                file_n = comm.recv(recv_buffer, i)
                file_names.append(file_n)

            print " -> Reading mzML files:"
            print file_name
            for file_n in file_names:
                print file_n
        else:
            comm.send(file_name, dest=0)
    except:
        print " -> Reading mzML file '%s'" % (file_name)

    scan_list = []
    time_list = []

    for spectrum in mzml_file:
        mass_list = []
        intensity_list = []

        for mz,i in spectrum.peaks:
            mass_list.append(mz)
            intensity_list.append(i)

        #scan_list.append(Scan(mass_list, intensity_list))
        for element in spectrum.xmlTree:
            # For some reason there are spectra with no time value, 
            # Ignore these????????????
            if element.get('accession') == "MS:1000016": #time value
                # We need time in seconds not minutes
                time_list.append(60*float(element.get('value')))
                scan_list.append(Scan(mass_list, intensity_list))

    #print "time:", len(time_list)
    #print "scan:", len(scan_list)

    data = GCMS_data(time_list, scan_list)

    return data
Exemple #40
0
def ANDI_reader(file_name):

    """
    @summary: A reader for ANDI-MS NetCDF files, returns
        a GC-MS data object

    @param file_name: The name of the ANDI-MS file
    @type file_name: StringType

    @author: Qiao Wang
    @author: Andrew Isaac
    @author: Vladimir Likic
    """

    ## TODO: use 'point_count' and allow for zero len scans

    # the keys used to retrieve certain data from the NetCDF file
    __MASS_STRING = "mass_values"
    __INTENSITY_STRING = "intensity_values"
    __TIME_STRING = "scan_acquisition_time"

    if not is_str(file_name):
        error("'file_name' must be a string")
    try:
        file = CDF(file_name)
    except CDFError:
        error("Cannot open file '%s'" % file_name)

    try:# avoid printing from each rank
        comm = MPI.COMM_WORLD
        rank = comm.Get_rank()
        size = comm.Get_size()
    
        if rank ==0:
            file_names = []
        
            for i in range(1,size):
                recv_buffer = ""
                file_n = comm.recv(recv_buffer, i)
                file_names.append(file_n)

            print " -> Reading netCDF files:"
            print file_name
            for file_n in file_names:
                print file_n
        else:
            comm.send(file_name, dest=0)
    except:
        print " -> Reading netCDF file '%s'" % (file_name)





    scan_list = []
    mass = file.var(__MASS_STRING)
    intensity = file.var(__INTENSITY_STRING)
    mass_values = mass.get().tolist()
    mass_list = []
    mass_previous = mass_values[0]
    mass_list.append(mass_previous)
    intensity_values = intensity.get().tolist()
    intensity_list = []
    intensity_previous = intensity_values[0]
    intensity_list.append(intensity_previous)
    if not len(mass_values) == len(intensity_values):
        error("length of mass_list is not equal to length of intensity_list !")
    for i in range(len(mass_values) - 1):
        # assume masses in ascending order until new scan
        if mass_previous <= mass_values[i + 1]:
            #print mass_values[i+1]
            mass_list.append(mass_values[i + 1])
            mass_previous = mass_values[i + 1]
            intensity_list.append(intensity_values[i + 1])
            intensity_previous = intensity_values[i + 1]
        # new scan
        else:
            scan_list.append(Scan(mass_list, intensity_list))
            #print "Added scan"
            mass_previous = mass_values[i + 1]
            intensity_previous = intensity_values[i + 1]
            mass_list = []
            intensity_list = []
            mass_list.append(mass_previous)
            intensity_list.append(intensity_previous)
    # store final scan
    scan_list.append(Scan(mass_list, intensity_list))
    time = file.var(__TIME_STRING)
    time_list = time.get().tolist()

    # sanity check
    if not len(time_list) == len(scan_list):
        error("number of time points (%d) does not equal the number of scans (%d)"%(len(time_list), len(scan_list)))

    data = GCMS_data(time_list, scan_list)

    return data
Exemple #41
0
    def trim(self, begin=None, end=None):

        """
        @summary: trims data in the time domain

        @param begin: begin parameter designating start time or
            scan number
        @type begin: IntType or StrType
        @param end: end parameter designating start time or
            scan number
        @type end: IntType or StrType

            The arguments 'begin' and 'end' can be either integers
            (in which case they are taken as the first/last scan
            number for trimming) or strings in which case they are
            treated as time strings and converted to scan numbers.

            At present both 'begin' and 'end' must be of the same
            type, either both scan numbers or time strings.

        @author: Vladimir Likic
        """

        # trim called with defaults, or silly arguments
        if begin == None and end == None:
            print "Nothing to do."
            return # exit immediately

        N = len(self.__scan_list)

        # process 'begin' and 'end'
        if begin == None:
            first_scan = 0
        elif is_int(begin):
            first_scan = begin-1
        elif is_str(begin):
            time = time_str_secs(begin)
            first_scan = self.get_index_at_time(time) + 1
        else:
            error("invalid 'begin' argument")

        if end == None:
            last_scan = N-1
        elif is_int(end):
            last_scan = end
        elif is_str(end):
            time = time_str_secs(end)
            last_scan = self.get_index_at_time(time) + 1
        else:
            error("invalid 'end' argument")

        # sanity checks
        if not last_scan > first_scan:
            error("last scan=%d, first scan=%d" % (last_scan, first_scan))
        elif first_scan < 0:
            error("scan number must be greater than one")
        elif last_scan > N-1:
            error("last scan=%d, total number of scans=%d" % (last_scan, N))

        print "Trimming data to between %d and %d scans" % \
                (first_scan+1, last_scan+1)

        scan_list_new = []
        time_list_new = []
        for ii in range(len(self.__scan_list)):
            if ii >= first_scan and ii <= last_scan:
                scan = self.__scan_list[ii]
                time = self.__time_list[ii]
                scan_list_new.append(scan)
                time_list_new.append(time)


        # update info
        self.__scan_list = scan_list_new
        self.__set_time(time_list_new)
        self.__set_min_max_mass()
        self.__calc_tic()
Exemple #42
0
    def import_leco_csv(self, file_name):
        """
        @summary: Imports data in LECO CSV format

        @param file_name: File name
        @type file_name: StringType

        @return: Data as an IntensityMatrix
        @rtype: pyms.GCMS.Class.IntensityMatrix

        @author: Andrew Isaac
        """

        if not is_str(file_name):
            error("'file_name' not a string")

        lines_list = open(file_name,'r')
        data = []
        time_list = []
        mass_list = []

        # Format is text header with:
        # "Scan","Time",...
        # and the rest is "TIC" or m/z as text, i.e. "50","51"...
        # The following lines are:
        # scan_number,time,value,value,...
        # scan_number is an int, rest seem to be fixed format floats.
        # The format is 0.000000e+000

        num_mass = 0
        FIRST = True
        HEADER = True
        data_col = -1
        time_col = -1
        # get each line
        for line in lines_list:
            cols = -1
            data_row = []
            if len(line.strip()) > 0:
                data_list = line.strip().split(',')
                # get each value in line
                for item in data_list:
                    item = item.strip()
                    item = item.strip('\'"')  # remove quotes (in header)

                    # Get header
                    if HEADER:
                        cols += 1
                        if len(item) > 0:
                            if item.lower().find("time") > -1:
                                time_col = cols
                            try:
                                value = float(item)
                                # find 1st col with number as header
                                if FIRST and value > 1:  # assume >1 mass
                                    data_col = cols
                                    # assume time col is previous col
                                    if time_col < 0:
                                        time_col = cols -1
                                    FIRST = False
                                mass_list.append(value)
                                num_mass += 1
                            except ValueError:
                                pass
                    # Get rest
                    else:
                        cols += 1
                        if len(item) > 0:
                            try:
                                value = float(item)
                                if cols == time_col:
                                    time_list.append(value)
                                elif cols >= data_col:
                                    data_row.append(value)
                            except ValueError:
                                pass

                # check row length
                if not HEADER:
                    if len(data_row) == num_mass:
                        data.append(data_row)
                    else:
                        print ("Warning: ignoring row")

                HEADER = False

        # check col lengths
        if len(time_list) != len(data):
            print ("Warning: number of data rows and time list length differ")

        self.__mass_list = mass_list
        self.__time_list = time_list
        self.__intensity_matrix = data
        # Direct access for speed (DANGEROUS)
        self.intensity_matrix = self.__intensity_matrix
Exemple #43
0
def save_data(file_name,
              data,
              format_str="%.6f",
              prepend="",
              sep=" ",
              compressed=False):
    """
    @summary: Saves a list of numbers or a list of lists of numbers
    to a file with specific formatting

    @param file_name: Name of a file
    @type: StringType
    @param data: A list of numbers, or a list of lists
    @type: ListType
    @param format_str: A format string for individual entries
    @type: StringType
    @param prepend: A string, printed before each row
    @type: StringType
    @param sep: A string, printed after each number
    @type: StringType
    @param compressed: A boolean. If True, the output will be gzipped
    @type: BooleanType

    @return: none
    @rtype: NoneType

    @author: Vladimir Likic
    """

    if not is_str(file_name):
        error("'file_name' is not a string")

    if not is_list(data):
        error("'data' is not a list")

    if not is_str(prepend):
        error("'prepend' is not a string")

    if not is_str(sep):
        error("'sep' is not a string")

    fp = open_for_writing(file_name)

    # decide whether data is a vector or matrix
    if is_number(data[0]):
        for item in data:
            if not is_number(item):
                error("not all elements of the list are numbers")
        data_is_matrix = 0
    else:
        for item in data:
            if not is_list(item):
                error("not all elements of the list are lists")
        data_is_matrix = 1

    if data_is_matrix:
        for ii in range(len(data)):
            fp.write(prepend)
            for jj in range(len(data[ii])):
                if is_number(data[ii][jj]):
                    fp.write(format_str % (data[ii][jj]))
                    if (jj < (len(data[ii]) - 1)): fp.write(sep)
                else:
                    error("datum not a number")
            fp.write("\n")
    else:
        for ii in range(len(data)):
            fp.write(prepend)
            fp.write(format_str % (data[ii]))
            fp.write("\n")

    close_for_writing(fp)

    if compressed:
        status = os.system('gzip %s' % (file_name))
        if status != 0:
            error("gzip compress failed")
Exemple #44
0
def save_data(file_name, data, format_str="%.6f", prepend="", sep=" ",
	compressed=False):

    """
    @summary: Saves a list of numbers or a list of lists of numbers
    to a file with specific formatting

    @param file_name: Name of a file
    @type: StringType
    @param data: A list of numbers, or a list of lists
    @type: ListType
    @param format_str: A format string for individual entries
    @type: StringType
    @param prepend: A string, printed before each row
    @type: StringType
    @param sep: A string, printed after each number
    @type: StringType
    @param compressed: A boolean. If True, the output will be gzipped
    @type: BooleanType

    @return: none
    @rtype: NoneType

    @author: Vladimir Likic
    """

    if not is_str(file_name):
        error("'file_name' is not a string")

    if not is_list(data):
        error("'data' is not a list")

    if not is_str(prepend):
        error("'prepend' is not a string")

    if not is_str(sep):
        error("'sep' is not a string")

    fp = open_for_writing(file_name)

    # decide whether data is a vector or matrix
    if is_number(data[0]):
        for item in data:
            if not is_number(item):
                error("not all elements of the list are numbers")
        data_is_matrix = 0
    else:
        for item in data:
            if not is_list(item):
                error("not all elements of the list are lists")
        data_is_matrix = 1

    if data_is_matrix:
        for ii in range(len(data)):
            fp.write(prepend)
            for jj in range(len(data[ii])):
                if is_number(data[ii][jj]):
                    fp.write(format_str % (data[ii][jj]))
                    if (jj<(len(data[ii])-1)): fp.write(sep)
                else:
                    error("datum not a number")
            fp.write("\n")
    else:
        for ii in range(len(data)):
            fp.write(prepend)
            fp.write(format_str % (data[ii]))
            fp.write("\n")

    close_for_writing(fp)

    if compressed:
        status = os.system('gzip %s' % (file_name))
        if status != 0:
            error("gzip compress failed")
Exemple #45
0
def ANDI_writer(file_name, im):
    """
    @summary: A reader for ANDI-MS NetCDF files, returns
        a GC-MS data object

    @param file_name: The name of the ANDI-MS file
    @type file_name: StringType
    @param im: The IntensityMatrix
    @type file_name: pyms.GCMS.Class.IntensityMatrix

    @author: Andrew Isaac
    """

    # netCDF header info for compatability
    # attributes
    #dataset_completeness   0 CHAR     6 C1+C2
    #dataset_origin         4 CHAR    16 Santa Clara, CA
    #experiment_date_time_stamp   7 CHAR    20 20081218044500+1100
    #experiment_title       6 CHAR     7 mix ma
    #experiment_type       10 CHAR    25 Centroided Mass Spectrum
    #external_file_ref_0    9 CHAR     8 MA_5C.M
    #languages              3 CHAR     8 English
    #ms_template_revision   1 CHAR     6 1.0.1
    #netcdf_file_date_time_stamp   5 CHAR    20 20090114001531+1100
    #netcdf_revision        2 CHAR     6 2.3.2
    #number_of_times_calibrated  12 INT      1 0
    #number_of_times_processed  11 INT      1 1
    #operator_name          8 CHAR    12 Dave and Su
    #raw_data_intensity_format  25 CHAR     6 Float
    #raw_data_mass_format  23 CHAR     6 Float
    #raw_data_time_format  24 CHAR     6 Short
    #sample_state          13 CHAR    12 Other State
    #test_detector_type    18 CHAR    20 Electron Multiplier
    #test_ionization_mode  16 CHAR    16 Electron Impact
    #test_ionization_polarity  17 CHAR    18 Positive Polarity
    #test_ms_inlet         15 CHAR    17 Capillary Direct
    #test_resolution_type  19 CHAR    20 Constant Resolution
    #test_scan_direction   21 CHAR     3 Up
    #test_scan_function    20 CHAR    10 Mass Scan
    #test_scan_law         22 CHAR     7 Linear
    #test_separation_type  14 CHAR    18 No Chromatography

    # dimensions
    #_128_byte_string       6    128
    #_16_byte_string        3     16
    #_255_byte_string       7    255
    #_2_byte_string         0      2
    #_32_byte_string        4     32
    #_4_byte_string         1      4
    #_64_byte_string        5     64
    #_8_byte_string         2      8
    #error_number          10      1
    #instrument_number     12      1
    #point_number           9 554826   X
    #range                  8      2
    #scan_number           11   9865

    # variables
    #a_d_coaddition_factor   2 SHORT      0 scan_number(9865)
    #a_d_sampling_rate      1 DOUBLE     0 scan_number(9865)
    #actual_scan_number     7 INT        0 scan_number(9865)
    #error_log              0 CHAR       0 error_number(1), _64_byte_string(64)
    #flag_count            15 INT        0 scan_number(9865)
    #instrument_app_version  27 CHAR       0 instrument_number(1),
    #_32_byte_string(32)
    #instrument_comments   28 CHAR       0 instrument_number(1),
    #_32_byte_string(32)
    #instrument_fw_version  25 CHAR       0 instrument_number(1),
    #_32_byte_string(32)
    #instrument_id         20 CHAR       0 instrument_number(1),
    #_32_byte_string(32)
    #instrument_mfr        21 CHAR       0 instrument_number(1),
    #_32_byte_string(32)
    #instrument_model      22 CHAR       0 instrument_number(1),
    #_32_byte_string(32)
    #instrument_name       19 CHAR       0 instrument_number(1),
    #_32_byte_string(32)
    #instrument_os_version  26 CHAR       0 instrument_number(1),
    #_32_byte_string(32)
    #instrument_serial_no  23 CHAR       0 instrument_number(1),
    #_32_byte_string(32)
    #instrument_sw_version  24 CHAR       0 instrument_number(1),
    #_32_byte_string(32)
    #intensity_values      18 FLOAT      3 point_number(554826)
    #inter_scan_time        5 DOUBLE     0 scan_number(9865)
    #mass_range_max        10 DOUBLE     0 scan_number(9865)
    #mass_range_min         9 DOUBLE     0 scan_number(9865)
    #mass_values           16 FLOAT      2 point_number(554826)
    #point_count           14 INT        0 scan_number(9865)
    #resolution             6 DOUBLE     0 scan_number(9865)
    #scan_acquisition_time   3 DOUBLE     0 scan_number(9865)
    #scan_duration          4 DOUBLE     0 scan_number(9865)
    #scan_index            13 INT        0 scan_number(9865)
    #time_range_max        12 DOUBLE     0 scan_number(9865)
    #time_range_min        11 DOUBLE     0 scan_number(9865)
    #time_values           17 FLOAT      2 point_number(554826)
    #total_intensity        8 DOUBLE     1 scan_number(9865)

    # variable information
    #intensity_values attributes

    #name                 idx type   len value
    #-------------------- --- ----   --- -----
    #add_offset             1 DOUBLE   1 0.0
    #scale_factor           2 DOUBLE   1 1.0
    #units                  0 CHAR    26 Arbitrary Intensity Units

    #mass_values attributes

    #name                 idx type   len value
    #-------------------- --- ----   --- -----
    #scale_factor           1 DOUBLE   1 1.0
    #units                  0 CHAR     4 M/Z

    #time_values attributes

    #name                 idx type   len value
    #-------------------- --- ----   --- -----
    #scale_factor           1 DOUBLE   1 1.0
    #units                  0 CHAR     8 Seconds

    #total_intensity attributes

    #name                 idx type   len value
    #-------------------- --- ----   --- -----
    #units                  0 CHAR    26 Arbitrary Intensity Units

    # netCDF dimension names
    __POINT_NUMBER = "point_number"
    __SCAN_NUMBER = "scan_number"

    # the keys used to create certain data from the NetCDF file
    __MASS_STRING = "mass_values"
    __INTENSITY_STRING = "intensity_values"
    __TIME_STRING = "scan_acquisition_time"
    __POINT_COUNT = "point_count"

    if not is_str(file_name):
        error("'file_name' must be a string")
    try:
        # Open netCDF file in overwrite mode, creating it if inexistent.
        nc = CDF(file_name, NC.WRITE | NC.TRUNC | NC.CREATE)
        # Automatically set define and data modes.
        nc.automode()
    except CDFError:
        error("Cannot create file '%s'" % file_name)

    mass_list = im.get_mass_list()
    time_list = im.get_time_list()

    # direct access, don't modify
    intensity_matrix = im.intensity_matrix

    # compress by ignoring zero intensities
    # included for consistency with imported netCDF format
    mass_values = []
    intensity_values = []
    point_count_values = []
    for row in xrange(len(intensity_matrix)):
        pc = 0  # point count
        for col in xrange(len(intensity_matrix[0])):  # all rows same len
            if (intensity_matrix[row][col] > 0):
                mass_values.append(mass_list[col])
                intensity_values.append(intensity_matrix[row][col])
                pc += 1
        point_count_values.append(pc)

    # sanity checks
    if not len(time_list) == len(point_count_values):
        error("number of time points does not equal the number of scans")

    # create dimensions
    # total number of data points
    dim_point_number = nc.def_dim(__POINT_NUMBER, len(mass_values))
    # number of scans
    dim_scan_number = nc.def_dim(__SCAN_NUMBER, len(point_count_values))

    # create variables
    # points
    var_mass_values = nc.def_var(__MASS_STRING, NC.FLOAT, dim_point_number)
    var_intensity_values = nc.def_var(__INTENSITY_STRING, NC.FLOAT,
                                      dim_point_number)
    # scans
    var_time_list = nc.def_var(__TIME_STRING, NC.DOUBLE, dim_scan_number)
    var_point_count_values = nc.def_var(__POINT_COUNT, NC.INT, dim_scan_number)

    # populate variables
    # points
    var_mass_values[:] = mass_values
    var_intensity_values[:] = intensity_values
    # scans
    var_time_list[:] = time_list
    var_point_count_values[:] = point_count_values

    # close file
    nc.close()
Exemple #46
0
def ANDI_reader(file_name):
    """
    @summary: A reader for ANDI-MS NetCDF files, returns
        a GC-MS data object

    @param file_name: The name of the ANDI-MS file
    @type file_name: StringType

    @author: Qiao Wang
    @author: Andrew Isaac
    @author: Vladimir Likic
    """

    ## TODO: use 'point_count' and allow for zero len scans

    # the keys used to retrieve certain data from the NetCDF file
    __MASS_STRING = "mass_values"
    __INTENSITY_STRING = "intensity_values"
    __TIME_STRING = "scan_acquisition_time"

    if not is_str(file_name):
        error("'file_name' must be a string")
    try:
        #file = CDF(file_name)
        rootgrp = Dataset(file_name, "r+", format='NETCDF3_CLASSIC')
    except:
        raise RuntimeError("Cannot open file '%s'" % file_name)
    print " -> Reading netCDF file '%s'" % (file_name)
    print rootgrp.variables[__MASS_STRING][:]

    scan_list = []
    # mass = file.var(__MASS_STRING)  # old pycdf way
    # intensity = file.var(__INTENSITY_STRING)  #old pycdf way
    mass = rootgrp.variables[__MASS_STRING][:]
    intensity = rootgrp.variables[__INTENSITY_STRING][:]

    mass_values = mass.tolist()
    mass_list = []
    mass_previous = mass_values[0]
    mass_list.append(mass_previous)
    intensity_values = intensity.tolist()
    intensity_list = []
    intensity_previous = intensity_values[0]
    intensity_list.append(intensity_previous)
    if not len(mass_values) == len(intensity_values):
        error("length of mass_list is not equal to length of intensity_list !")
    for i in range(len(mass_values) - 1):
        # assume masses in ascending order until new scan
        if mass_previous <= mass_values[i + 1]:
            #print mass_values[i+1]
            mass_list.append(mass_values[i + 1])
            mass_previous = mass_values[i + 1]
            intensity_list.append(intensity_values[i + 1])
            intensity_previous = intensity_values[i + 1]
        # new scan
        else:
            scan_list.append(Scan(mass_list, intensity_list))
            #print "Added scan"
            mass_previous = mass_values[i + 1]
            intensity_previous = intensity_values[i + 1]
            mass_list = []
            intensity_list = []
            mass_list.append(mass_previous)
            intensity_list.append(intensity_previous)
    # store final scan
    scan_list.append(Scan(mass_list, intensity_list))
    # time = file.var(__TIME_STRING)  #old pycdf way
    time = rootgrp.variables[__TIME_STRING][:]
    time_list = time.tolist()

    # sanity check
    if not len(time_list) == len(scan_list):
        raise RuntimeError(
            "number of time points does not equal the number of scans")

    data = GCMS_data(time_list, scan_list)

    return data
Exemple #47
0
def ANDI_reader(file_name):
    """
    @summary: A reader for ANDI-MS NetCDF files, returns
        a GC-MS data object

    @param file_name: The name of the ANDI-MS file
    @type file_name: StringType

    @author: Qiao Wang
    @author: Andrew Isaac
    @author: Vladimir Likic
	@author: Tony Chen
    """

    __MASS_STRING = "mass_values"
    __INTENSITY_STRING = "intensity_values"
    __TIME_STRING = "scan_acquisition_time"
    if not is_str(file_name):
        error("'file_name' must be a string")
    try:
        dataset = Dataset(file_name, 'r')
    except:
        error("Cannot open file '%s'" % file_name)

    print " -> Reading netCDF file '%s'" % (file_name)

    scan_list = list()
    #     mass = np.array(dataset.variables[__MASS_STRING])
    #     intensity =  np.array(dataset.variables[__INTENSITY_STRING])
    mass_values = np.array(dataset.variables[__MASS_STRING])
    mass_list = list()
    mass_previous = mass_values[0]
    mass_list.append(mass_previous)
    intensity_values = np.array(dataset.variables[__INTENSITY_STRING])
    intensity_list = list()
    intensity_previous = intensity_values[0]
    intensity_list.append(intensity_previous)

    if not len(mass_values) == len(intensity_values):
        error("length of mass_list is not equal to length of intensity_list !")

    for i in range(len(mass_values) - 1):
        # assume masses in ascending order until new scan
        if mass_previous <= mass_values[i + 1]:
            mass_list.append(mass_values[i + 1])
            mass_previous = mass_values[i + 1]
            intensity_list.append(intensity_values[i + 1])
            intensity_previous = intensity_values[i + 1]
        # new scan
        else:
            scan_list.append(Scan(mass_list, intensity_list))
            mass_previous = mass_values[i + 1]
            intensity_previous = intensity_values[i + 1]
            mass_list = list()
            intensity_list = list()
            mass_list.append(mass_previous)
            intensity_list.append(intensity_previous)
    # store final scan
    scan_list.append(Scan(mass_list, intensity_list))
    time_list = np.array(dataset.variables[__TIME_STRING])

    # sanity check
    if not len(time_list) == len(scan_list):
        error("number of time points does not equal the number of scans")

    data = GCMS_data(time_list, scan_list)
    return data
Exemple #48
0
def ANDI_reader(file_name):
    """
    @summary: A reader for ANDI-MS NetCDF files, returns
        a GC-MS data object

    @param file_name: The name of the ANDI-MS file
    @type file_name: StringType

    @author: Qiao Wang
    @author: Andrew Isaac
    @author: Vladimir Likic
    """

    ## TODO: use 'point_count' and allow for zero len scans

    # the keys used to retrieve certain data from the NetCDF file
    __MASS_STRING = "mass_values"
    __INTENSITY_STRING = "intensity_values"
    __TIME_STRING = "scan_acquisition_time"

    if not is_str(file_name):
        error("'file_name' must be a string")
    try:
        file = CDF(file_name)
    except CDFError:
        error("Cannot open file '%s'" % file_name)

    print " -> Reading netCDF file '%s'" % (file_name)

    scan_list = []
    mass = file.var(__MASS_STRING)
    intensity = file.var(__INTENSITY_STRING)
    mass_values = mass.get().tolist()
    mass_list = []
    mass_previous = mass_values[0]
    mass_list.append(mass_previous)
    intensity_values = intensity.get().tolist()
    intensity_list = []
    intensity_previous = intensity_values[0]
    intensity_list.append(intensity_previous)
    if not len(mass_values) == len(intensity_values):

        error("length of mass_list is not equal to length of intensity_list !")

    for i in range(len(mass_values) - 1):
        # assume masses in ascending order until new scan
        if mass_previous <= mass_values[i + 1]:
            #print mass_values[i+1]
            mass_list.append(mass_values[i + 1])
            mass_previous = mass_values[i + 1]
            intensity_list.append(intensity_values[i + 1])
            intensity_previous = intensity_values[i + 1]
        # new scan
        else:
            scan_list.append(Scan(mass_list, intensity_list))
            #print "Added scan"
            mass_previous = mass_values[i + 1]
            intensity_previous = intensity_values[i + 1]
            mass_list = []
            intensity_list = []
            mass_list.append(mass_previous)
            intensity_list.append(intensity_previous)
    # store final scan
    scan_list.append(Scan(mass_list, intensity_list))
    time = file.var(__TIME_STRING)
    time_list = time.get().tolist()

    # sanity check
    if not len(time_list) == len(scan_list):
        #JT: Debug for old gcms data
        #JT: time longer than scans so trim
        print "Time list is"
        print len(time_list) - len(scan_list)
        print "longer than scan list. Trimming...."
        time_list = time_list[0:len(scan_list)]
        print len(time_list)
        print len(scan_list)
    #error("number of time points does not equal the number of scans")

    data = GCMS_data(time_list, scan_list)

    return data