예제 #1
0
파일: __init__.py 프로젝트: manor/mz
    def ric(self, *args, **kwargs):
        '''This method is deprecated, use xic() instead.

        Kept for backwards compatibility.'''

        logger_message(40, 'ric() is deprecated: use xic() instead')
        return self.xic(*args, **kwargs)
예제 #2
0
파일: precursor_peaks.py 프로젝트: manor/mz
def extract_peaks(peak_data_path, time_window=(0.5, 0.5), mz_window=(0.1, 0.1),
                  plot_ms1=False, plot_xic=False, peak_area=False,
                  reporter_ions=False, peakfilter=None, plot_ms2=False,
                  ion_list=('b', 'y'), instrument='ESI-TRAP', isMZD=False, im_size=(8.0,6.0)):

    mz_file = API.mzFile(peak_data_path)
    peak_file_type = mz_file.file_type

    if instrument == 'ETD-TRAP':
        ion_list = ('c', 'z')

    logger_message(10, 'MS File opened...')

    (first_time,last_time) = mz_file.time_range()

    logger_message(30, 'Accessing Precursor Peaks ...')

    # all the MS2 scans
    ms2_scans = [(t,mz) for t,mz,sn,st,sm in mz_file.scan_info(first_time, last_time)
                 if st == 'MS2']

    raw_reg = re.compile(r'.+\.(\d+)\.(\d+)\.(.+)\.dta.*', flags=re.I)
    wiff_reg = re.compile(r'.*\(sample number (\d+)\), Elution: (.+) min, '
                          'Period: (\d+), Cycle\(s\): (\d+).*? \(Experiment (\d+)\).*?', flags=re.I)
    wiff_reg2 = re.compile(r'(?:Locus:)?(\d+)\.(\d+)\.(\d+)\.(\d+)\.(\d+)', flags=re.I)

    # doubles as a 'generic' scan regex {file_name}/scans/{scan_time}
    url_reg = re.compile(r'((?:http://.+/files/)?(?:.+)/scans/(\d+(?:\.\d+)?))(?:/.*)?', flags=re.I)

    row = (yield None) # generator initialization

    try:
        while row:
            image_tuples = []

            mz = row['Experimental mz']
            scan_time = row.get('ms2 time', None)

            if scan_time:
                scan = scan_time
            else:
                spec_desc = row['Spectrum Description']

                raw_m = raw_reg.match(spec_desc)
                wiff_m = wiff_reg.match(spec_desc)
                wiff_m2 = wiff_reg2.match(spec_desc)
                url_m = url_reg.match(spec_desc)
                if raw_m:
                    scan = int(raw_m.group(1))
                    if scan == 0:
                        row = yield (row, image_tuples)
                    else:
                        scan_time = mz_file.scan_time_from_scan_name(scan)
                elif wiff_m:
                    scan = (int(wiff_m.group(4)), int(wiff_m.group(5)))
                    scan_time = mz_file.scan_time_from_scan_name(scan)
                elif wiff_m2:
                    scan = (int(wiff_m2.group(4)), int(wiff_m.group(5)))
                    scan_time = mz_file.scan_time_from_scan_name(scan)
                elif url_m:
                    scan = url_m.group(1)
                    scan_time = float(url_m.group(2))
                else:
                    row = yield (row, image_tuples)

            logger_message(10,'Scan = %s, Scan Time = %s, mz = %s' % (str(scan), str(scan_time), str(mz)))

            (time_first_half, time_second_half) = time_window
            (mz_first_half, mz_second_half) = mz_window

            floatmz = float(mz)

            start_mz = floatmz - mz_first_half
            end_mz = floatmz + mz_second_half

            start_time = max(scan_time - time_first_half, first_time + 0.00001)
            end_time = min(scan_time + time_second_half, last_time - 0.00001)

            logger_message(20, 'Making XIC..')
            logger_message(10, '%s %s' % (str(start_time), str(end_time)))

            xic = mz_file.xic(start_time, end_time, start_mz, end_mz, peakfilter)
            (max_time, max_int) = max(xic, key=lambda x: x[1])
            max_xic_index = xic.index((max_time,max_int))

            # plot XIC
            if plot_xic:
                scan_dot = (scan_time, xic[-1][1])
                for (i,r) in enumerate(xic[1:]):
                    if r[0] > scan_time:
                        # indexes are shifted because of slice: xic[i] is the previous xic
                        Slope = float(xic[i][1] - r[1]) / float(xic[i][0] - r[0])
                        scan_dot = (scan_time,
                                    Slope * (scan_time - r[0]) + r[1])
                        break

                xic_times = [i[0] for i in xic] # X axis
                xic_ints = [i[1] for i in xic] # Y axis


                bin_times = [a[0] for a in ms2_scans
                             if (a[0] != scan_time
                                 and start_time < a[0] <= end_time
                                 and start_mz <= a[1] <= end_mz)]
                bin_ints = []

                for bt in bin_times:
                    for (i,r) in enumerate(xic[1:]):
                        if r[0] > bt:
                            # indexes are shifted because of slice: xic[i] is the previous xic
                            Slope = float(xic[i][1] - r[1]) / float(xic[i][0] - r[0])
                            bin_ints.append(Slope * (bt - r[0]) + r[1])
                            break
                        if i == len(xic) - 2:
                            bin_ints.append(r[1])

                if isMZD:
                    image_tuples.append(('Peak Width (sec)', 'xic',
                                         (mz, xic_times, xic_ints, scan_dot, bin_times, bin_ints)))
                else:
                    (h, pngpath) = mkstemp(suffix='.png', prefix='xic', dir=myTemp)
                    os.close(h)

                    logger_message(20, 'Drawing XIC Plot...')

                    mz_image.make_xic_im(pngpath, mz, xic_times, xic_ints, scan_dot, bin_times, bin_ints, im_size=im_size)

                    logger_message(20, 'Inserting XIC Plot into Spreadsheet...')

                    image_tuples.append(('Peak Width (sec)', 'image', pngpath))

            #precursor mass graph info
            if plot_ms1:
                precursorScanTime = scan_time
                for (i,r) in enumerate(xic[1:]):
                    if r[0] > scan_time:
                        precursorScanTime = xic[i][0]
                        break

                data = mz_file.scan(precursorScanTime)

                scan_mode = data.mode

                xy = [ (x,y) for (x,y) in data if abs(x - floatmz) <= 2.0 ]

                pm_scanDot = (floatmz, max(i[1] for i in xy) if xy else 0.0)

                # don't want to use last entry, and can't use first entry.
                for (i,(x,y)) in enumerate(xy[1:-1]):
                    if x > floatmz:
                        # indexes are shifted because of slice: xy[i] is the previous xy
                        Slope = float(xy[i][1] - y) / float(xy[i][0] - x)
                        pm_scanDot = (floatmz, Slope * (floatmz - x) + y)
                        break

                if isMZD:
                    image_tuples.append(('Experimental mz', 'ms1',
                                         (mz, xy, scan_mode, pm_scanDot)))
                else:
                    (h, pngpath) = mkstemp(suffix='.png', prefix='pm', dir=myTemp)
                    os.close(h)

                    logger_message(20, 'Drawing Precursor Mass Plot...')

                    mz_image.make_ms1_im(pngpath, mz, xy, scan_mode, pm_scanDot, im_size=im_size)

                    logger_message(20, 'Inserting Precursor Mass Plot into Spreadsheet...')

                    image_tuples.append(('Experimental mz', 'image', pngpath))

            if plot_ms2:
                ms_ms_scan = mz_file.scan(scan_time)
                scan_mode = ms_ms_scan.mode

                peptide = mz_pep_format(row['Peptide Sequence'], row['Variable Modifications'] or '')
                charge = row['Charge']
                score = row['Peptide Score']

                if isMZD:
                    image_tuples.append(('Peptide Sequence', 'ms2',
                                         (ms_ms_scan, scan_mode, peptide,
                                          None, ion_list, charge, score)))
                else:
                    (h, pngpath) = mkstemp(suffix='.png', prefix='ms2', dir=myTemp)
                    os.close(h)

                    logger_message(20, 'Drawing MS MS Mass Plot...')

                    mz_image.make_ms2_im(pngpath, ms_ms_scan, scan_mode, peptide,
                                                 None, ion_list, charge, score, im_size=im_size)

                    logger_message(20, 'Inserting MS MS Plot into Spreadsheet...')

                    image_tuples.append(('Peptide Sequence', 'image', pngpath))

            #Calculate Peak Width at half max (FWHM)
            halfIntensity = max_int / 2.0

            reverseScans = xic[::-1]

            beforeHalfTime = start_time
            afterHalfTime = end_time
            peak_comment = 'Good'

            ind_off = len(reverseScans) - max_xic_index - 1
            if max_xic_index > 0:
                for i,r in enumerate(reverseScans[-max_xic_index:]):
                    if r[1] < halfIntensity:
                        beforeSlope = (float(reverseScans[i+ind_off][1] - r[1])
                                       / float(reverseScans[i+ind_off][0] - r[0]))
                        beforeHalfTime = (halfIntensity - r[1]) / beforeSlope + r[0]
                        break
                else:
                    # reached end (beginning) without falling below half intensity
                    beforeHalfTime = r[0]
                    peak_comment = 'Half Intensity Not Reached Before Peak'
            else:
                # if the peak is at the beginning, then that'll be the 'before time'
                beforeHalfTime = xic[0][0]

            ind_off = max_xic_index - 1
            for i,r in enumerate(xic[max_xic_index:]):
                if r[1] < halfIntensity:
                    afterSlope = (float(r[1] - xic[i+ind_off][1])
                                  / float(r[0] - xic[i+ind_off][0]))
                    afterHalfTime = (halfIntensity - r[1]) / afterSlope + r[0]
                    break
            else:
                # reached end without falling below half intensity,
                # including the case where max is at end of the XIC
                afterHalfTime = r[0]
                if peak_comment != 'Half Intensity Not Reached Before Peak':
                    peak_comment = 'Half Intensity Not Reached After Peak'
                else:
                    peak_comment = 'Half Intensity Not Reached at Either End'

            if max_xic_index == 0:
                peak_comment = 'Peak at Beginning of XIC'
            elif max_xic_index == len(xic)-1:
                peak_comment = 'Peak at End of XIC'

            peak_width = (afterHalfTime - beforeHalfTime) * 60.0

            row['MS2 Time'] = scan_time
            row['Peak Time'] = max_time
            row['Peak Intensity'] = max_int
            row['Peak Width (sec)'] = peak_width
            row['Peak Comment'] = peak_comment

            if peak_area:
                logger_message(20,'Calculating Peak Area...')

                row['Peak Area'] = calc_peak_area(xic)

            if reporter_ions:
                if not plot_ms2:
                    ms_ms_scan = mz_file.scan(scan_time)

                halfwindow = 0.02

                row['Rep114'] = max([m[1] for m in ms_ms_scan if abs(m[0] - 114.11) <= halfwindow] or [0])
                row['Rep115'] = max([m[1] for m in ms_ms_scan if abs(m[0] - 115.11) <= halfwindow] or [0])
                row['Rep116'] = max([m[1] for m in ms_ms_scan if abs(m[0] - 116.11) <= halfwindow] or [0])
                row['Rep117'] = max([m[1] for m in ms_ms_scan if abs(m[0] - 117.11) <= halfwindow] or [0])


            row = yield (row, image_tuples)
    finally:
        mz_file.close()
예제 #3
0
파일: protein_pilot.py 프로젝트: manor/mz
    def format(self, new_file_name=None):
        fh = open(self.orig_file)

        #Change header names
        header_line = fh.readline()
        headers = header_line.strip().split('\t')

        new_headers = multiplierz.mzReport.default_columns[:]
        new_headers.extend(('MS2 Time', 'Protein Coverage'))
        new_headers.remove('Peptide Rank')
        new_headers.remove('Query')
        new_headers.extend(h for h in headers if h not in self.reps)
        if 'Unused' in new_headers:
            new_headers.remove('Unused')
        if 'Contrib' in new_headers:
            new_headers.remove('Contrib')
        if 'Sc' in new_headers:
            new_headers.remove('Sc')

        rows = []

        protein_matches = defaultdict(int)

        for line in fh:
            new_data = dict((h,None) for h in new_headers)
            data = dict(zip(headers,line[:-1].split('\t')))

            protein_matches[data['Accessions']] += 1

            for h in data:
                if h in self.reps and self.reps[h] in new_data:
                    new_data[self.reps[h]] = data[h]
                elif h in new_data:
                    new_data[h] = data[h]
                elif h not in ('Unused','Contrib','Sc'):
                    logger_message(10, 'Missing key: %s' % h)

            if 'Modifications' in data:
                new_data['Variable Modifications'] = self.convert_var_mod(data['Modifications'])

            if 'Cleavages' in data:
                new_data['Missed Cleavages'] = data['Cleavages'].count('missed')

            if 'Time' in data and 'Spectrum' in data:
                new_data['Spectrum Description'] =  self.convert_spectrum(data['Spectrum'], data['Time'])

            rows.append(new_data)

        fh.close()

        for row in rows:
            row['Protein Matches'] = protein_matches[row['Accession Number']]

        dir_split = os.path.split(self.orig_file)
        if not new_file_name:
            new_file_name = os.path.join(dir_split[0], "mz_" + dir_split[1])

        report = mz.Report.writer(new_file_name, columns=new_headers)

        for row in rows:
            report.write(row)

        report.close()
예제 #4
0
파일: genmod.py 프로젝트: manor/mz
from mz import logger_message
from unimod import UnimodDatabase
import os

myData = os.path.dirname(os.path.abspath(__file__))

unimod = None
try:
    # load the unimod database
    unimod = UnimodDatabase(os.path.join(myData, 'unimod.xml'))
except IOError:
    # this shouldn't prevent multiplierz in general from functioning,
    # although of course anything that relies on unimod will be broken
    logger_message(50, ('Warning: %s not found.\n'
                        'Please download a copy from http://www.unimod.org\n'
                        'or use the copy on your local Mascot server')
    % (os.path.join(myData, 'unimod.xml')))
    import sys
    sys.exit()

AW = unimod.elements # atomic masses
amino_acids = unimod.amino_acids # amino acid masses
name = unimod.mods()
delta = {}
for n in name:
    delta[n] = unimod.get_mod_delta(n)
sites = {}
for n in name:
    #The list,set,list move is just to reduce duplicates if they occur (as I do not quite understand the specificity "groups")...
    sites[n] = list(set([s for specs in unimod.get_mod_specificities(n).values() for (s,p) in specs]))
neutral_losses = {}