def __init__(self, time_list, scan_list): """ @summary: Initialize the GC-MS data @param time_list: List of scan retention times @type time_list: ListType @param scan_list: List of Scan objects @type scan_list: ListType @author: Qiao Wang @author: Andrew Isaac @author: Vladimir Likic """ if not is_list(time_list) or not is_number(time_list[0]): error("'time_list' must be a list of numbers") if not is_list(scan_list) or not isinstance(scan_list[0], Scan): error("'scan_list' must be a list of Scan objects") self.__set_time(time_list) self.__scan_list = scan_list self.__set_min_max_mass() self.__calc_tic()
def __init__(self, mass_list, intensity_list): """ @summary: Initialise the MassSpectrum @param mass_list: List of binned masses @type mass_list: ListType @param intensity_list: List of binned intensities @type intensity_list: ListType @author: Andrew Isaac @author: Qiao Wang @author: Vladimir Likic """ if not is_list(mass_list) or not is_number(mass_list[0]): error("'mass_list' must be a list of numbers") if not is_list(intensity_list) or \ not is_number(intensity_list[0]): error("'intensity_list' must be a list of numbers") if not len(mass_list) == len(intensity_list): error("'mass_list' is not the same size as 'intensity_list'") #TODO: should these be public, or accessed through methods??? self.mass_list = mass_list self.mass_spec = intensity_list
def rmsd(list1, list2): """ @summary: Calculates RMSD for the 2 lists @param list1: First data set @type list1: ListType, TupleType, or numpy.core.ndarray @param list2: Second data set @type list2: ListType, TupleType, or numpy.core.ndarray @return: RMSD value @rtype: FloatType @author: Qiao Wang @author: Andrew Isaac @author: Vladimir Likic """ if not is_list(list1): error("argument neither list nor array") if not is_list(list2): error("argument neither list nor array") sum = 0.0 for i in range(len(list1)): sum = sum + (list1[i] - list2[i]) ** 2 rmsd = math.sqrt(sum / len(list1)) return rmsd
def __init__(self, mass_list, intensity_list): """ @summary: Initialize the Scan data @param mass_list: mass values @type mass_list: ListType @param intensity_list: intensity values @type intensity_list: ListType @author: Qiao Wang @author: Andrew Isaac @author: Vladimir Likic """ if not is_list(mass_list) or not is_number(mass_list[0]): error("'mass_list' must be a list of numbers") if not is_list(intensity_list) or \ not is_number(intensity_list[0]): error("'intensity_list' must be a list of numbers") self.__mass_list = mass_list self.__intensity_list = intensity_list self.__min_mass = min(mass_list) self.__max_mass = max(mass_list)
def load_peaks(file_name): """ @summary: Loads the peak_list stored with 'store_peaks' @param file_name: File name of peak list @type file_name: StringType @return: The list of Peak objects @rtype: ListType @author: Andrew Isaac """ if not is_str(file_name): error("'file_name' not a string") fp = open(file_name,'r') peak_list = cPickle.load(fp) fp.close() if not is_list(peak_list): error("'file_name' is not a List") if not len(peak_list) > 0 and not isinstance(peak_list[0], Peak): error("'peak_list' must be a list of Peak objects") return peak_list
def load_peaks(file_name): """ @summary: Loads the peak_list stored with 'store_peaks' @param file_name: File name of peak list @type file_name: StringType @return: The list of Peak objects @rtype: ListType @author: Andrew Isaac """ if not is_str(file_name): error("'file_name' not a string") fp = open(file_name, 'r') peak_list = cPickle.load(fp) fp.close() if not is_list(peak_list): error("'file_name' is not a List") if not len(peak_list) > 0 and not isinstance(peak_list[0], Peak): error("'peak_list' must be a list of Peak objects") return peak_list
def exprl2alignment(exprl): """ @summary: Converts experiments into alignments @param exprl: The list of experiments to be converted into an alignment objects @type exprl: ListType @author: Vladimir Likic """ if not is_list(exprl): error("the argument is not a list") algts = [] for item in exprl: if not isinstance(item, Experiment): error("list items must be 'Experiment' instances") else: algt = Class.Alignment(item) algts.append(algt) return algts
def is_peak_list(peaks): """ @summary: Returns True if 'peaks' is a valid peak list, False otherwise @param peaks: A list of peak objects @type peaks: ListType @return: A boolean indicator @rtype: BooleanType @author: Vladimir Likic """ flag = True if not is_list(peaks): flag = False else: for item in peaks: if not isinstance(item, Peak): flag = False return flag
def __init__(self, ia, time_list, mass=None): """ @param ia: Ion chromatogram intensity values @type ia: numpy.array @param time_list: A list of ion chromatogram retention times @type time_list: ListType @param mass: Mass of ion chromatogram (Null if TIC) @type mass: IntType @author: Lewis Lee @author: Vladimir Likic @author: Vladimir Likic """ if not isinstance(ia, numpy.ndarray): error("'ia' must be a numpy array") if not is_list(time_list) or not is_number(time_list[0]): error("'time_list' must be a list of numbers") if len(ia) != len(time_list): error("Intensity array and time list differ in length") self.__ia = ia self.__time_list = time_list self.__mass = mass self.__time_step = self.__calc_time_step(time_list) self.__min_rt = min(time_list) self.__max_rt = max(time_list)
def MAD(v): """ @summary: median absolute deviation @param v: A list or array @type v: ListType, TupleType, or numpy.core.ndarray @return: median absolute deviation @rtype: FloatType @author: Vladimir Likic """ if not is_list(v): error("argument neither list nor array") m = median(v) m_list = [] for xi in v: d = math.fabs(xi - m) m_list.append(d) mad = median(m_list)/0.6745 return mad
def median(v): """ @summary: Returns a median of a list or numpy array @param v: Input list or array @type v: ListType or numpy.core.ndarray @return: The median of the input list @rtype: FloatType @author: Vladimir Likic """ if not is_list(v): error("argument neither list nor array") local_data = copy.deepcopy(v) local_data.sort() N = len(local_data) if (N % 2) == 0: # even number of points K = N/2 - 1 median = (local_data[K] + local_data[K+1])/2.0 else: # odd number of points K = (N - 1)/2 - 1 median = local_data[K+1] return median
def std(v): """ @summary: Calculates standard deviation @param v: A list or array @type v: ListType, TupleType, or numpy.core.ndarray @return: Mean @rtype: FloatType @author: Vladimir Likic """ if not is_list(v): error("argument neither list nor array") v_mean = mean(v) s = 0.0 for e in v: d = e - v_mean s = s + d*d s_mean = s/float(len(v)-1) v_std = math.sqrt(s_mean) return v_std
def amin(v): """ @summary: Finds the minimum element in a list or array @param v: A list or array @type v: ListType, TupleType, or numpy.core.ndarray @return: Tuple (maxi, maxv), where maxv is the minimum element in the list and maxi is its index @rtype: TupleType @author: Vladimir Likic """ if not is_list(v): error("argument neither list nor array") minv = max(v) # built-in max() function mini = None for ii in range(len(v)): if v[ii] < minv: minv = v[ii] mini = ii if mini == None: error("finding maximum failed") return mini, minv
def median_bounds(im, peak, shared=True): """ @Summary: Calculates the median of the left and right bounds found for each apexing peak mass @param im: The originating IntensityMatrix object @type im: pyms.GCMS.Class.IntensityMatrix @param peak: The Peak object @type peak: pyms.Peak.Class.Peak @param shared: Include shared ions shared with neighbouring peak @type shared: BooleanType @return: median left and right boundary offset in points @rtype: TupleType @author: Andrew Isaac """ mat = im.get_matrix_list() ms = peak.get_mass_spectrum() rt = peak.get_rt() apex = im.get_index_at_time(rt) # check if RT based index is simmilar to stored index tmp = peak.get_pt_bounds() if is_list(tmp) and apex-1 < tmp[1] and tmp[1] < apex+1: apex = tmp[1] # get peak masses with non-zero intensity mass_ii = [ ii for ii in xrange(len(ms.mass_list)) \ if ms.mass_spec[ii] > 0 ] # get stats on boundaries left_list = [] right_list = [] for ii in mass_ii: # get ion chromatogram as list ia = [ mat[scan][ii] for scan in xrange(len(mat)) ] area, left, right, l_share, r_share = ion_area(ia, apex) if shared or not l_share: left_list.append(left) if shared or not r_share: right_list.append(right) # return medians # NB if shared=True, lists maybe empty l_med = 0 r_med = 0 if len(left_list) > 0: l_med = median(left_list) if len(right_list) > 0: r_med = median(right_list) return l_med, r_med
def median_bounds(im, peak, shared=True): """ @Summary: Calculates the median of the left and right bounds found for each apexing peak mass @param im: The originating IntensityMatrix object @type im: pyms.GCMS.Class.IntensityMatrix @param peak: The Peak object @type peak: pyms.Peak.Class.Peak @param shared: Include shared ions shared with neighbouring peak @type shared: BooleanType @return: median left and right boundary offset in points @rtype: TupleType @author: Andrew Isaac """ mat = im.get_matrix_list() ms = peak.get_mass_spectrum() rt = peak.get_rt() apex = im.get_index_at_time(rt) # check if RT based index is simmilar to stored index tmp = peak.get_pt_bounds() if is_list(tmp) and apex - 1 < tmp[1] and tmp[1] < apex + 1: apex = tmp[1] # get peak masses with non-zero intensity mass_ii = [ ii for ii in xrange(len(ms.mass_list)) \ if ms.mass_spec[ii] > 0 ] # get stats on boundaries left_list = [] right_list = [] for ii in mass_ii: # get ion chromatogram as list ia = [mat[scan][ii] for scan in xrange(len(mat))] area, left, right, l_share, r_share = ion_area(ia, apex) if shared or not l_share: left_list.append(left) if shared or not r_share: right_list.append(right) # return medians # NB if shared=True, lists maybe empty l_med = 0 r_med = 0 if len(left_list) > 0: l_med = median(left_list) if len(right_list) > 0: r_med = median(right_list) return l_med, r_med
def get_maxima_indices(ion_intensities, points=3): """ @summary: Find local maxima. @param ion_intensities: A list of intensities for a single ion @type ion_intensities: ListType @param points: Peak if maxima over 'points' number of scans @type points: IntType @return: A list of scan indices @rtype: ListType @author: Andrew Isaac """ if not is_list(ion_intensities) or not is_number(ion_intensities[0]): error("'ion_intensities' must be a List of numbers") # find peak inflection points # use a 'points' point window # for a plateau after a rise, need to check if it is the left edge of # a peak peak_point = [] edge = -1 points = int(points) half = int(points/2) points = 2*half+1 # ensure odd number of points for index in range(len(ion_intensities)-points+1): left = ion_intensities[index:index+half] mid = ion_intensities[index+half] right = ion_intensities[index+half+1:index+points] # max in middle if mid > max(left) and mid > max(right): peak_point.append(index+half) edge = -1 # ignore previous rising edge # flat from rise (left of peak?) if mid > max(left) and mid == max(right): edge = index+half # ignore previous rising edge, update latest # fall from flat if mid == max(left) and mid > max(right): if edge > -1: centre = int((edge+index+half)/2) # mid point peak_point.append(centre) edge = -1 return peak_point
def sele_peaks_by_rt(peaks, rt_range): """ @summary: Selects peaks from a retention time range @param peaks: A list of peak objects @type peaks: ListType @param rt_range: A list of two time strings, specifying lower and upper retention times @type rt_range: ListType @return: A list of peak objects @rtype: ListType """ if not is_peak_list(peaks): error("'peaks' not a peak list") if not is_list(rt_range): error("'rt_range' not a list") else: if len(rt_range) != 2: error("'rt_range' must have exactly two elements") if not is_str(rt_range[0]) or not is_str(rt_range[1]): error("lower/upper retention time limits must be strings") rt_lo = time_str_secs(rt_range[0]) rt_hi = time_str_secs(rt_range[1]) if not rt_lo < rt_hi: error("lower retention time limit must be less than upper") peaks_sele = [] for peak in peaks: rt = peak.get_rt() if rt > rt_lo and rt < rt_hi: peaks_sele.append(peak) # print "%d peaks selected" % (len(peaks_sele)) return peaks_sele
def mean(v): """ @summary: Calculates the mean @param v: A list or array @type v: ListType, TupleType, or numpy.core.ndarray @return: Mean @rtype: FloatType @author: Vladimir Likic """ if not is_list(v): error("argument neither list nor array") s = 0.0 for e in v: s = s + e s_mean = s/float(len(v)) return s_mean
def set_pt_bounds(self, pt_bounds): """ @summary: Sets peak boundaries in points @param pt_bounds: A list containing left, apex, and right peak boundaries in points, left and right are offsets @type pt_bounds: ListType @return: none @rtype: NoneType """ if not is_list(pt_bounds): error("'pt_bounds' must be a list") if not len(pt_bounds) == 3: error("'pt_bounds' must have exactly 3 elements") else: for item in pt_bounds: if not is_int(item): error("'pt_bounds' element not an integer") self.__pt_bounds = pt_bounds
def __init__(self, time_list, mass_list, intensity_matrix): """ @summary: Initialize the IntensityMatrix data @param time_list: Retention time values @type time_list: ListType @param mass_list: Binned mass values @type mass_list: ListType @param intensity_matrix: Binned intensity values per scan @type intensity_matrix: ListType @author: Andrew Isaac """ # sanity check if not is_list(time_list) or not is_number(time_list[0]): error("'time_list' must be a list of numbers") if not is_list(mass_list) or not is_number(mass_list[0]): error("'mass_list' must be a list of numbers") if not is_list(intensity_matrix) or \ not is_list(intensity_matrix[0]) or \ not is_number(intensity_matrix[0][0]): error("'intensity_matrix' must be a list, of a list, of numbers") if not len(time_list) == len(intensity_matrix): error("'time_list' is not the same length as 'intensity_matrix'") if not len(mass_list) == len(intensity_matrix[0]): error("'mass_list' is not the same size as 'intensity_matrix'" " width") self.__time_list = time_list self.__mass_list = mass_list self.__intensity_matrix = intensity_matrix self.__min_mass = min(mass_list) self.__max_mass = max(mass_list) # Direct access for speed (DANGEROUS) self.intensity_matrix = self.__intensity_matrix # Try to include parallelism. try: from mpi4py import MPI comm = MPI.COMM_WORLD num_ranks = comm.Get_size() rank = comm.Get_rank() M, N = len(intensity_matrix), len(intensity_matrix[0]) lrr = (rank*M/num_ranks, (rank + 1)*M/num_ranks) lcr = (rank*N/num_ranks, (rank + 1)*N/num_ranks) m, n = (lrr[1] - lrr[0], lcr[1] - lcr[0]) self.comm = comm self.num_ranks = num_ranks self.rank = rank self.M = M self.N = N self.local_row_range = lrr self.local_col_range = lcr self.m = m self.n = n # If we can't import mpi4py then continue in serial. except: pass
def save_data(file_name, data, format_str="%.6f", prepend="", sep=" ", compressed=False): """ @summary: Saves a list of numbers or a list of lists of numbers to a file with specific formatting @param file_name: Name of a file @type: StringType @param data: A list of numbers, or a list of lists @type: ListType @param format_str: A format string for individual entries @type: StringType @param prepend: A string, printed before each row @type: StringType @param sep: A string, printed after each number @type: StringType @param compressed: A boolean. If True, the output will be gzipped @type: BooleanType @return: none @rtype: NoneType @author: Vladimir Likic """ if not is_str(file_name): error("'file_name' is not a string") if not is_list(data): error("'data' is not a list") if not is_str(prepend): error("'prepend' is not a string") if not is_str(sep): error("'sep' is not a string") fp = open_for_writing(file_name) # decide whether data is a vector or matrix if is_number(data[0]): for item in data: if not is_number(item): error("not all elements of the list are numbers") data_is_matrix = 0 else: for item in data: if not is_list(item): error("not all elements of the list are lists") data_is_matrix = 1 if data_is_matrix: for ii in range(len(data)): fp.write(prepend) for jj in range(len(data[ii])): if is_number(data[ii][jj]): fp.write(format_str % (data[ii][jj])) if (jj < (len(data[ii]) - 1)): fp.write(sep) else: error("datum not a number") fp.write("\n") else: for ii in range(len(data)): fp.write(prepend) fp.write(format_str % (data[ii])) fp.write("\n") close_for_writing(fp) if compressed: status = os.system('gzip %s' % (file_name)) if status != 0: error("gzip compress failed")
def save_data(file_name, data, format_str="%.6f", prepend="", sep=" ", compressed=False): """ @summary: Saves a list of numbers or a list of lists of numbers to a file with specific formatting @param file_name: Name of a file @type: StringType @param data: A list of numbers, or a list of lists @type: ListType @param format_str: A format string for individual entries @type: StringType @param prepend: A string, printed before each row @type: StringType @param sep: A string, printed after each number @type: StringType @param compressed: A boolean. If True, the output will be gzipped @type: BooleanType @return: none @rtype: NoneType @author: Vladimir Likic """ if not is_str(file_name): error("'file_name' is not a string") if not is_list(data): error("'data' is not a list") if not is_str(prepend): error("'prepend' is not a string") if not is_str(sep): error("'sep' is not a string") fp = open_for_writing(file_name) # decide whether data is a vector or matrix if is_number(data[0]): for item in data: if not is_number(item): error("not all elements of the list are numbers") data_is_matrix = 0 else: for item in data: if not is_list(item): error("not all elements of the list are lists") data_is_matrix = 1 if data_is_matrix: for ii in range(len(data)): fp.write(prepend) for jj in range(len(data[ii])): if is_number(data[ii][jj]): fp.write(format_str % (data[ii][jj])) if (jj<(len(data[ii])-1)): fp.write(sep) else: error("datum not a number") fp.write("\n") else: for ii in range(len(data)): fp.write(prepend) fp.write(format_str % (data[ii])) fp.write("\n") close_for_writing(fp) if compressed: status = os.system('gzip %s' % (file_name)) if status != 0: error("gzip compress failed")