Exemple #1
0
    def write(self, scan, title, mass, charge=None):
        self.file.write('BEGIN IONS\n')
        self.file.write('TITLE=%s\n' % title)
        if charge:  # Don't even bother with 0 charge, either.
            charge = str(int(charge))
            self.file.write('CHARGE=%s\n' % charge)
        if mass:  # 0 mass gives errors, and is usually due to null values.
            self.file.write('PEPMASS=%s\n' % mass)

        if len(scan) >= 10000:
            vprint("Scan at %s has %d datapoints, Mascot only allows "
                   "10000; removing least-intense points." %
                   (title, len(scan)))
            scan.sort(key=lambda x: x[1], reverse=True)
            scan = scan[:9999]
            scan.sort(key=lambda x: x[0])

        for pt in scan:
            if len(pt) == 2:
                self.file.write("%s\t%s\n" % (pt[0], pt[1]))
            elif len(pt) >= 3:
                self.file.write("%s\t%s\t%s\n" % (pt[0], pt[1], pt[2]))
            else:
                raise ValueError(
                    "Scan datapoints must have both MZ and intensity!")

        self.file.write("END IONS\n")
Exemple #2
0
def parse_to_generator(mgffile,
                       labelType=(lambda x: x),
                       header=False,
                       rawStrings=False):
    """
    Loads a Mascot Generic Format file and returns it in dict form.

    labelType can be a callable object that transforms the 'TITLE='
    value of an MGF entry into what will be used for the corresponding
    key in the dict.  If this is unspecified, the key will be the
    whole TITLE value.
    
    If "header" is set to True the output dict has an extra entry
    of key 'header' that contains the MGF header info.
    
    If raw_strings is set to True, the charge, pepmass, etc are returned
    as 
    """

    f = open(mgffile, "r")
    topMatter = True
    for line in f:
        if "BEGIN IONS" in line:
            topMatter = False
            entry = {}
            key = None
            rt = charge = mass = None
            spectrum = []
            for line in f:
                if 'END IONS' in line:
                    break
                elif '=' in line:
                    field, value = line.split('=')

                    if (not rawStrings) and field == 'CHARGE':
                        value = int(value.strip('\n\r+ '))
                    elif (not rawStrings) and field == "PEPMASS":
                        value = float(value.split()[0].strip())
                    else:
                        value = value.strip()

                    entry[field.strip().lower()] = value
                    if field == 'TITLE':
                        key = value.strip()
                elif any(line):
                    spectrum.append(tuple(map(float, line.split())))

            entry["spectrum"] = spectrum

            key = labelType(key)
            yield entry
        elif topMatter and '=' in line:
            field, value = line.split('=')
            if field in MGFTopMatter:
                #data['header'][field.lower()] = value
                continue
        elif 'SEARCH=' in line or 'MASS=' in line:
            continue
        else:
            vprint("Unexpected line: %s" % line)
Exemple #3
0
    def __init__(self, data_file):
        self.datafile = data_file
        #self.source = CreateObject("{7e3450b1-75e7-49b2-9be7-64cbb2458c56}")
        self.source = Dispatch("{7e3450b1-75e7-49b2-9be7-64cbb2458c56}")
        self.source.OpenFile(data_file)

        cole, level, _, _, desc, explevel, exppar = self.source.GetInfo

        descwords = desc.split()
        filename = descwords[2].strip(',')
        if filename.split('_')[1].upper() == 'MSMS':
            level = 2
            precursor = float(filename.split('_')[2])
        elif filename.split('_')[1].upper() == 'MS':
            level = 1
            precursor = '-'
        else:
            vprint(
                "Unparsable T2D file name; MS level and precursor mass unavailable."
            )
            level = -1

        mzrange = float(descwords[-3].strip('(')), float(
            descwords[-1].strip(')'))

        self._info = {
            'Collision Energy': float(cole),
            'MS Level': int(level),
            'Precursor': float(precursor) if precursor != '-' else None,
            'Range': mzrange
        }
    def __init__(self, filename):
        self.filename = filename
        assert os.path.exists(filename)
        if filename.lower().endswith('featurepickle'):
            vprint("Legacy mode enabled.")
            self.data = pickle.load(open(filename))
            self.mode = 'pickle'
        else:
            self.connection = sqlite3.connect(filename)
            self.data = self.connection.cursor()
            self.mode = 'sql'

            self.decoder = newMarshal
Exemple #5
0
    def filters(self):
        """
        Thermo-style filter strings for all spectra; used for compatibility with
        various legacy functions.
        """

        ionization = self.source.MSScanFileInformation.IonModes
        if not ionization:
            vprint(
                "Could not determine separation/ionization; defaulting to GCMS."
            )
            separator = 'GC'
        elif ionization & (4 | 2):  # Bitwise OR and AND.
            separator = 'GC'
        else:
            separator = 'TOF'

        colEs = self.source.MSScanFileInformation.CollisionEnergy
        if len(colEs) == 1:
            colE = colEs[0]
        else:
            colE = None

        if not self._filters:
            self._filters = []
            for rt, mz, index, level, polarity in self.scan_info():
                scanObj = self.source.GetSpectrum_6(index)
                rangeobj = scanObj.MeasuredMassRange.QueryInterface(
                    bc.IRange)  # Yep, definitely spectrum-specific.

                if colE:  # Singular collision energy in the file.
                    energy = colE
                else:
                    energy = float(scanObj.CollisionEnergy)

                if level != 'MS1':
                    precstr = '%.4f@%.2f' % (mz, energy)
                else:
                    precstr = ''
                string = "%s MS %s NSI Full ms%s %s[%.2f-%.2f]" % (
                    separator, polarity,
                    int(level[2]) if level != 'MS1' else '', precstr,
                    (rangeobj.Start), (rangeobj.End))
                self._filters.append((rt, string))

        return self._filters
Exemple #6
0
    def __init__(self, filename):
        self.filename = filename
        assert os.path.exists(filename)
        if filename.lower().endswith('featurepickle'):
            vprint("Legacy mode enabled.")
            self.data = pickle.load(open(filename))
            self.mode = 'pickle'
        else:
            self.connection = sqlite3.connect(filename)
            self.data = self.connection.cursor()
            self.mode = 'sql'

            self.data.execute("SELECT data FROM features WHERE ind=1")
            testfeature = str(self.data.fetchone()[0])
            if '\n' in testfeature:
                # Old non-base64 encoded feature file!
                self.decoder = oldMarshal
            else:
                self.decoder = newMarshal
def save_feature_database(features, outputfile, overwrite=None):
    """
    Saves a SQLite-mode feature database. Result file will have the
    extension '.features' .
    """

    if os.path.exists(outputfile):
        if overwrite != True:
            os.remove(outputfile)
        else:
            raise IOError("Target file %s already exists!" % outputfile)

    conn = sqlite3.connect(outputfile)
    cur = conn.cursor()

    createTable = "CREATE TABLE features(ind int, mz real, startscan int, endscan int, data text)"
    cur.execute(createTable)

    vprint("Created table.")
    for index, feature in enumerate(features):
        mz = feature.mz
        startscan, endscan = feature.scanrange
        featureData = base64.b64encode(pickle.dumps(feature, protocol=2))
        addFeature = ('INSERT INTO features VALUES (%s, %s, %s, %s, "%s")' %
                      (index, mz, startscan, endscan, featureData.decode()))
        cur.execute(addFeature)

        if index % 100 == 0:
            conn.commit()

    #print("TEST MODE")
    #sidechannel = open(outputfile + "SIDECHANNEL.pickle", 'wb')
    #pickle.dump(list(enumerate(features)), sidechannel)
    #sidechannel.close()

    vprint("Indexing...")
    createIndex = "CREATE INDEX mzindex ON features(mz, startscan)"
    cur.execute(createIndex)
    vprint("Analyzing...")
    cur.execute("ANALYZE")

    vprint("Final SQLite commit...")
    conn.commit()

    conn.close()
Exemple #8
0
def detect_features(datafile, **constants):
    """
    Runs the feature detection algorithm on the target data file (currently,
    only Thermo .RAW is supported.)  Returns the path to the feature data
    file.
    
    Optional arguments:
    - tolerance (default 10): MZ tolerance in parts-per-million for all determinations
    of peak identity.  Should usually correspond to the mass precision of the
    source instrument.
    - force (default False): If True, feature detection is run even if a
    feature data file already exists for the target data.
    """

    if 'outputfile' in constants:
        featurefile = constants['outputfile']
    else:
        featurefile = datafile + '.features'

    if 'tolerance' in constants and constants['tolerance']:
        global tolerance
        tolerance = constants['tolerance']
        if tolerance < 1:
            print "\n\n\nWARNING- tolerance value for SILAC analysis should now be in PPM!\n\n\n"
    else:
        tolerance = 10

    if 'partial' in constants:
        # This is primarily for testing purposes only.
        scanrange = constants['partial']
    else:
        scanrange = None

    if 'force' in constants:
        force = constants['force']
    else:
        force = False

    if 'whitelist_psms' in constants:
        whitelist_mzs = constants['whitelist_psms']
        featurefile = datafile + '.partial%s.features' % (str(
            hash(frozenset(whitelist_mzs)))[:5])
    else:
        whitelist_mzs = None

    if 'peak_picking_params' in constants:
        peak_pick_params = constants['peak_picking_params']
    elif 'tolerance' in constants and constants['tolerance']:
        peak_pick_params = {'tolerance': constants['tolerance']}
    else:
        peak_pick_params = {'tolerance': 10}

    if os.path.exists(featurefile) and not force:
        vprint("Feature data file already exists: %s" % featurefile)
        return featurefile

    setGlobals(constants)

    times = []
    times.append(time.clock())
    data = mzFile(datafile)

    times.append(time.clock())
    vprint("Opened data file; getting isotopes...")

    scaninfo = [x for x in data.scan_info(0, 99999999) if x[3] == 'MS1']
    rtLookup = dict([(x[2], x[0]) for x in scaninfo])
    scaninfo = [x[2] for x in scaninfo]

    if scanrange:
        scaninfo = [x for x in scaninfo if scanrange[0] < x < scanrange[1]]

    data.close()

    que = multiprocessing.Queue(maxsize=20)
    reader = multiprocessing.Process(target=dataReaderProc,
                                     args=(datafile, que, scaninfo))
    reader.start()

    isotopeData = deque()
    thing = que.get(block=True)
    bar = 0
    while thing != 'done':
        scanNum, scan = thing
        foo = time.clock()
        isotopeData.append((scanNum, peak_pick_PPM(scan,
                                                   **peak_pick_params)[0]))
        bar += time.clock() - foo

        thing = que.get(block=True)

        if verbose_mode and len(isotopeData) % 100 == 0:
            print len(isotopeData)  # Shielded by explicit verbose_mode check.

    reader.join()
    # Could just discard the un-feature'd peaks immediately.
    vprint("Isotopic features acquired; finding features over time...")

    times.append(time.clock())

    ms1ToIndex = {}
    indexToMS1 = {}
    for index, scanNum in enumerate(scaninfo):
        ms1ToIndex[scanNum] = index
        indexToMS1[index] = scanNum

    isotopesByChargePoint = defaultdict(lambda: defaultdict(
        lambda: ProximityIndexedSequence([], lambda x: x[0][0])))
    allIsotopes = []
    for scanNum, isotopesByCharge in isotopeData:
        scanIndex = ms1ToIndex[scanNum]
        for charge, isotopes in isotopesByCharge.items():
            for isoSeq in isotopes:
                isotopesByChargePoint[charge][scanIndex].add(isoSeq)
                allIsotopes.append((isoSeq, scanIndex, charge))

    del isotopeData

    for scanlookup in isotopesByChargePoint.values():
        for proxseq in scanlookup.values():
            proxseq.rebalance()

    if whitelist_mzs:
        vprint("Screening out irrelevant MZs; starting with %s..." %
               len(allIsotopes))
        allIsotopes.sort(key=lambda x: x[0][0][0])
        whitelist_mzs = sorted(list(set([round(x, 2) for x in whitelist_mzs])))
        isoAcc = []
        whitemz = whitelist_mzs.pop()
        while allIsotopes:
            iso = allIsotopes.pop()
            mz = iso[0][0][0]
            while whitelist_mzs and whitemz - mz > whitelist_tol:
                whitemz = whitelist_mzs.pop()
            if abs(whitemz - mz) < whitelist_tol:
                isoAcc.append(iso)

        allIsotopes = isoAcc
        vprint("...%s remain." % len(allIsotopes))

    allIsotopes.sort(key=lambda x: x[0][0][1])

    times.append(time.clock())

    seenIsotopes = set()
    # Can assume isotopic sequences are unique because floats.
    # (But it may not be a valid assumption, because detectors
    # and floating point approximations!)

    featureList = []
    while allIsotopes:
        highIso, highScan, highChg = allIsotopes.pop()
        if tuple(highIso) in seenIsotopes:
            continue

        centerIndex, (centerMZ, _) = max(enumerate(highIso),
                                         key=lambda x: x[1][1])

        newFeature = [[highScan, highIso]]
        curScan = highScan
        continuing = True
        lastSeen = rtLookup[indexToMS1[curScan]]
        while continuing:  # Trailing the feature backwards.
            curScan -= 1
            try:
                curRT = rtLookup[indexToMS1[curScan]]
            except KeyError:
                assert curScan < max(indexToMS1.keys())
                break

            scanSeqs = isotopesByChargePoint[highChg][curScan].returnRange(
                centerMZ - 2, centerMZ + 1.5)
            scanSeqs.sort(key=lambda x: x[centerIndex][1], reverse=True)

            found = False
            for iso in scanSeqs:  # These are known to have centerMZ in common.
                # The indexes between iso and highIso may not be equivalent
                # if there's sub-C12 peak(s) in either.  For a first draft
                # this can be considered a feature, since C12s should be
                # consistent throughout features, but in some cases like
                # single-scan-dropouts of the C12 this is insufficient
                # and such discrepancies should be accounted for.

                if (inPPM(tolerance, iso[0][0], highIso[0][0])
                        and inPPM(tolerance, iso[1][0], highIso[1][0])
                        and tuple(iso) not in seenIsotopes):
                    newFeature.append([curScan, iso])
                    found = True
                    break  # From "for iso in scanSeqs"

            if found:
                lastSeen = curRT
            elif abs(curRT - lastSeen) > dropoutTimeTolerance:
                continuing = False

        curScan = highScan
        continuing = True
        lastSeen = rtLookup[indexToMS1[curScan]]
        while continuing:  # Trailing the feature forwards; mostly repeat code.
            curScan += 1
            try:
                curRT = rtLookup[indexToMS1[curScan]]
            except KeyError:
                assert curScan > max(indexToMS1.keys())
                break

            scanSeqs = isotopesByChargePoint[highChg][curScan].returnRange(
                centerMZ - 2, centerMZ + 1.5)
            scanSeqs.sort(key=lambda x: x[centerIndex][1], reverse=True)

            found = False
            for iso in scanSeqs:  # These are known to have centerMZ in common.
                # Ditto.

                if (inPPM(tolerance, iso[0][0], highIso[0][0])
                        and inPPM(tolerance, iso[1][0], highIso[1][0])
                        and tuple(iso) not in seenIsotopes):
                    newFeature.append([curScan, iso])
                    found = True
                    break  # From "for iso in scanSeqs"

            if found:
                lastSeen = curRT
            elif abs(curRT - lastSeen) > dropoutTimeTolerance:
                continuing = False

        if len(newFeature) > 1:
            featureList.append((highChg, newFeature))

        for _, iso in newFeature:
            seenIsotopes.add(tuple(iso))
    times.append(time.clock())

    for chg, feature in featureList:
        for stage in feature:
            stage[0] = indexToMS1[stage[0]]

    class idLookup():
        def __getitem__(self, thing):
            return thing

    lookup = idLookup()

    if scanrange:
        featurefile = datafile + ('%s-%s.features' % scanrange)

    featureObjects = []
    for chg, feature in featureList:
        newfeature = Feature()
        for scan, envelope in feature:
            newfeature.add(envelope, scan, chg)

        newfeature.calculate_bounds(lookup)

        #newfeature.prepareBoxes(lookup)
        #newfeature.prepareBoxes() # It's entirely different, for some reason?

        #test = Feature()
        #for scan, envelope in feature:
        #test.add(envelope, scan, chg)
        #test.calculate_bounds(lookup)

        #assert test.mz == newfeature.mz and test.charge == newfeature.charge

        featureObjects.append(newfeature)
    save_feature_database(featureObjects, featurefile)

    vprint("Saved feature file.")
    times.append(time.clock())

    return featurefile
Exemple #9
0
def extract(datafile,
            outputfile=None,
            default_charge=2,
            centroid=True,
            scan_type=None,
            deisotope_and_reduce_charge=True,
            maximum_precursor_mass=15999,
            long_ms1=False,
            derive_precursor_via='All',
            deisotope_and_reduce_MS1_args={},
            deisotope_and_reduce_MS2_args={},
            min_mz=140,
            precursor_tolerance=0.005,
            isobaric_labels=None,
            label_tolerance=0.01,
            channel_corrections=None,
            prec_info_file=None,
            region_based_labels=False):
    """
    Converts a mzAPI-compatible data file to MGF.
    
    Writes only MS2 spectra where these can be determined, otherwise takes
    every spectrum in the file.  Likewise writes the precursor charge
    and mass if these can be determined.
    
    deisotope_and_reduce_charge deisotopes and charge-reduces each MS2
    spectrum, which generally improves results from peptide database search
    algorithms. However, it should be disabled for very low-resolution scans.
    """

    for key, val in [('tolerance', 0.01), ('min_peaks', 2),
                     ('enforce_isotopic_ratios', True)]:
        if key not in deisotope_and_reduce_MS1_args:
            deisotope_and_reduce_MS1_args[key] = val

    if not outputfile:
        outputfile = datafile + '.mgf'

    if os.path.exists(outputfile):
        assert outputfile.lower().endswith('mgf'), (
            "Overwriting a non-MGF file %s with "
            "the MGF extractor is probably a mistake." % outputfile)

    data = mzFile(datafile)
    from multiplierz.mgf.extraction import _extractor_
    extractor = _extractor_(data, datafile, default_charge, centroid,
                            scan_type, deisotope_and_reduce_charge,
                            derive_precursor_via, maximum_precursor_mass,
                            long_ms1, deisotope_and_reduce_MS1_args,
                            deisotope_and_reduce_MS2_args, min_mz,
                            precursor_tolerance, isobaric_labels,
                            label_tolerance, channel_corrections,
                            prec_info_file, region_based_labels)
    writer = MGF_Writer(outputfile)

    for scan, title, mz, charge in extractor.run():
        writer.write(scan, title, mass=mz, charge=charge)
    writer.close()

    if extractor.inconsistent_precursors:
        vprint("Precursor inconsistencies: %s/%s" %
               (extractor.inconsistent_precursors, extractor.scans_written))

    return outputfile
Exemple #10
0
def extract(datafile,
            outputfile=None,
            default_charge=2,
            centroid=True,
            scan_type=None,
            deisotope_and_reduce_charge=True,
            deisotope_and_reduce_args={},
            min_mz=140,
            precursor_tolerance=0.005,
            isobaric_labels=None,
            label_tolerance=0.01):
    """
    Converts a mzAPI-compatible data file to MGF.
    
    Writes only MS2 spectra where these can be determined, otherwise takes
    every spectrum in the file.  Likewise writes the precursor charge
    and mass if these can be determined.
    
    deisotope_and_reduce_charge deisotopes and charge-reduces each MS2
    spectrum, which generally improves results from peptide database search
    algorithms. However, it should be disabled for very low-resolution scans.
    """
    # Currently doesn't compensate for injection time! Would be required in
    # order to deal with iTRAQ/TMT labels.

    from multiplierz.spectral_process import deisotope_reduce_scan, peak_pick
    from multiplierz.spectral_process import centroid as centroid_func  # Distinct from 'centroid' argument.

    def _get_precursor(mz, possible_prec, charge):
        try:
            return min([
                x for x in possible_prec if (charge == None or x[1] == charge)
            ],
                       key=lambda x: abs(x[0] - mz))
        except ValueError:
            return None, None

    if not outputfile:
        outputfile = datafile + '.mgf'

    if os.path.exists(outputfile):
        assert outputfile.lower().endswith('mgf'), (
            "Overwriting a non-MGF file %s with "
            "the MGF extractor is probably a mistake." % outputfile)

    writer = MGF_Writer(outputfile)

    data = mzFile(datafile)
    scanInfo = data.scan_info()

    # Coerce that scanInfo be in order of time, so that for .WIFF files
    # we can still use the previous-MS1 method to look up precursor charges.
    scanInfo.sort(key=lambda x: x[0])

    if datafile.lower().endswith('.raw'):  # May also exist for WIFF?
        filters = dict(data.filters())

        # For RAW files only, there's the option to filter by a given
        # scan type.  (It would be more efficient in many cases to
        # actually split files in a single run, though.)
        if scan_type:
            scan_type = scan_type
            assert (scan_type.lower()
                    in ['cid', 'hcd', 'etd',
                        'etdsa']), ("Invalid scan type %s, must be one"
                                    "of (CID, HCD, ETD, ETDSA).") % scan_type
            typestr = "@%s" % scan_type.lower()

            scanInfo = [
                x for x in scanInfo
                if x[3] == 'MS1' or typestr in filters[x[0]]
            ]
    else:
        filters = None
        assert not scan_type, "Scan type filtering only enabled with .RAW format files."

    if isobaric_labels:
        assert centroid, "Isobaric tags can only be read from centroided data; set 'centroid' to True."

    if not isobaric_labels:
        labels = []
    elif isobaric_labels == 4 or isobaric_labels == '4plex':
        labels = zip([114, 115, 116, 117], [114.11, 115.11, 116.11, 117.12])
    elif isobaric_labels == 6 or isobaric_labels == '6plex':
        labels = zip([126, 127, 128, 129, 130, 131],
                     [126.127, 127.131, 128.134, 129.138, 130.141, 131.138])
    elif isobaric_labels == 8 or isobaric_labels == '8plex':
        labels = zip(
            [113, 114, 115, 116, 117, 118, 119, 121],
            [113.11, 114.11, 115.11, 116.11, 117.12, 118.12, 119.12, 121.12])
    elif isobaric_labels == 10 or isobaric_labels == '10plex':
        labels = zip([
            '126', '127N', '127C', '128N', '128C', '129N', '129C', '130N',
            '130C', '131'
        ], [
            126.127726, 127.124761, 127.131081, 128.128116, 128.134436,
            129.131471, 129.137790, 130.134825, 130.141145, 131.138180
        ])

        assert label_tolerance < 0.005, (
            "label_tolerance must be lower "
            "than 0.005 for 10-plex experiments! (Currently %s)" %
            label_tolerance)
    else:
        raise NotImplementedError, ("Labels of type %s not recognized.\n"
                                    "Should be one of [4,6,8,10] or None.")

    def read_labels(scan):
        partscan = [x for x in scan if x[0] < labels[-1][1] + 3]
        if not partscan:
            return dict([(str(l), '0') for l in zip(*labels)[0]])

        # This should probably actually sum all points within
        # the tolerance range.
        scan_values = {}
        for label, mz in labels:
            nearpt = min(partscan, key=lambda x: abs(x[0] - mz))
            if abs(nearpt[0] - mz) < label_tolerance:
                scan_values[str(label)] = '%.3f' % nearpt[1]
            else:
                scan_values[str(label)] = '0'  # Report noise value?

        return scan_values

    inconsistent_precursors = 0
    scans_written = 0

    lastMS1 = None
    lastMS1ScanName = None
    recal_factor = 1
    calibrant = RAW_CAL_MASS
    for time, mz, scanNum, scanLevel, scanMode in scanInfo:
        scanName = scanNum if isinstance(scanNum, int) else time

        if scanLevel == 'MS1':
            lastMS1ScanName = scanName

            possible_precursors = None

            def calculate_precursors(calibrant):
                if data.format == 'raw':
                    lastMS1 = data.lscan(lastMS1ScanName)
                    lastMS1, calibrant = raw_scan_recalibration(
                        lastMS1, calibrant)
                else:
                    try:
                        lastMS1 = data.scan(lastMS1ScanName, centroid=True)
                    except NotImplementedError:
                        lastMS1 = centroid_func(data.scan(lastMS1ScanName))

                envelopes = peak_pick(lastMS1,
                                      tolerance=0.01,
                                      min_peaks=2,
                                      enforce_isotopic_ratios=True)[0]
                return sum([[(x[0][0], c) for x in xs]
                            for c, xs in envelopes.items()], []), calibrant

            continue
        elif scanLevel == 'MS3':
            continue
        elif lastMS1ScanName == None:
            continue

        # Each file type handles centroiding differently (or not at all.)
        if data.format == 'raw':
            scan = data.scan(scanName, centroid=centroid)

            scan, calibrant = raw_scan_recalibration(scan, calibrant)
        elif data.format == 'wiff':
            # explicit_numbering, of course, can't be active here.
            scan = data.scan(scanName)
            if centroid:
                scan = centroid_func(scan)
        elif data.format == 'd':
            scan = data.scan(scanName, centroid=centroid)
            if centroid and not scan:
                # mzAPI.D returns empty if centroid data is not present in
                # the file, but that can be corrected by external centroiding.
                scan = centroid_func(data.scan(scanName, centroid=False))
        else:
            raise NotImplementedError, "Extractor does not handle type %s" % data.format

        if filters and not mz:
            mz = float(filters[time].split('@')[0].split(' ')[-1])

        mzP = None
        chargeP = None
        if "scanPrecursor" in dir(data):
            assert isinstance(scanName, int)
            mzP, chargeP = data.scanPrecursor(scanName)

        if not mzP:  # .scanPrecursor sometimes returns charge and not mzP.
            if possible_precursors == None:
                possible_precursors, calibrant = calculate_precursors(
                    calibrant)

            mzP, chargeP = _get_precursor(mz, possible_precursors, chargeP)
            if not mzP:
                # Release presumed charge possibly obtained from scanPrecursor.
                mzP, chargeP = _get_precursor(mz, possible_precursors, None)
                if mz and chargeP:
                    inconsistent_precursors += 1

        if mzP and (abs(mz - mzP) < 2 or not mz):
            mz = mzP
            charge = chargeP
        else:
            charge = default_charge

        if not charge:
            charge = default_charge

        if not mz:
            import warnings
            errmgf = os.path.abspath(datafile)
            warnings.warn('Unable to recover all precursor masses from %s' %
                          errmgf)
        else:
            if labels:
                scan_labels = read_labels(scan)
            else:
                scan_labels = {}

            title = standard_title_write(datafile,
                                         rt=time,
                                         mz=mz,
                                         mode=scanMode,
                                         scan=scanNum,
                                         **scan_labels)

            # Should expand extract() call to include arguments to this.
            if deisotope_and_reduce_charge and centroid:
                if ('tolerance' not in deisotope_and_reduce_args
                        or not deisotope_and_reduce_args['tolerance']):
                    deisotope_and_reduce_args[
                        'tolerance'] = precursor_tolerance
                scan = deisotope_reduce_scan(scan, **deisotope_and_reduce_args)
            scan = [x for x in scan if x[0] > min_mz]
            assert charge, title
            writer.write(scan, title, mass=mz, charge=charge)
            scans_written += 1

    writer.close

    if inconsistent_precursors:
        vprint("Precursor inconsistencies: %s/%s" %
               (inconsistent_precursors, scans_written))

    return outputfile