Exemple #1
0
def extract(file, directory, timestep=1.0, output="txt"):
    print file
    path = os.path.join(directory, file)
    name = os.path.splitext(file)[0]
    newdir = os.path.join(directory, name)
    if output == "hdf5":
        newdir = directory
    splits = string.split(name, sep="_")
    for i, s in enumerate(splits):
        if s.lower() == "ramp":
            start = float(splits[i + 1])
            end = float(splits[i + 2])
            step = float(splits[i + 3])
            volts = np.arange(start, end + step, step)
            times = np.arange(0.0, len(volts) * timestep, timestep)
            print "Ramp:", times, volts
            parse(path, times, timestep, volts, name, newdir, output=output)
            return True
    if os.path.splitext(file)[1] == ".mzML":
        maxtime = mzMLimporter(path).get_max_time()
    else:
        maxtime = DataImporter(path).get_max_time()

    times = np.arange(0, maxtime, timestep)
    parse(path, times, timestep, None, name, newdir, output=output)
Exemple #2
0
def parse_multiple(paths,
                   timestep,
                   newdir,
                   starttp,
                   endtp,
                   voltsarr=None,
                   outputname=None):
    outfile = outputname + ".hdf5"
    outpath = os.path.join(newdir, outfile)
    hdf = h5py.File(outpath, "a")
    try:
        del hdf["ms_dataset"]
    except:
        pass
    msdataset = hdf.require_group("ms_dataset")

    if voltsarr is not None:
        msdataset.attrs["v1name"] = "Collision Voltage"
    else:
        msdataset.attrs["v1name"] = "timestart"
    msdataset.attrs["timestep"] = timestep
    msdataset.attrs["v2name"] = "Original File"
    config = hdf.require_group("config")
    config.attrs["metamode"] = -1

    num = 0
    v = 0
    print(starttp, endtp, timestep)
    for path in paths:
        if os.path.isfile(path):
            if os.path.splitext(path)[1] == ".mzML":
                d = mzMLimporter(path)
            else:
                d = DataImporter(path)
            for t in np.arange(starttp, endtp, timestep):
                data = d.get_data(time_range=(t, t + timestep))
                if not ud.isempty(data):
                    group = msdataset.require_group(str(num))
                    replace_dataset(group, "raw_data", data=data)
                    # group=msdataset.require_group(str(v))
                    try:
                        if voltsarr[v] is not None:
                            temp = int(t / timestep)
                            group.attrs["Collision Voltage"] = voltsarr[v][
                                temp]
                    except:
                        pass
                    group.attrs["timestart"] = t
                    group.attrs["timeend"] = t + timestep
                    group.attrs["timemid"] = t + timestep * 0.5
                    splits = path.split(sep="\\")
                    group.attrs["Original File"] = splits[len(splits) - 1]
                    num += 1
                    pass
            v += 1
        else:
            print("File not found: ", path)
    msdataset.attrs["num"] = num
    hdf.close()
    return outpath
Exemple #3
0
def auto_from_wizard(data, filename, mode):
    # print data
    eng = mudeng.MetaUniDec()
    eng.data.new_file(filename)
    for i, d in enumerate(data):
        v1 = d[1]
        v2 = d[2]
        f=d[0]
        try:
            start = float(d[3])
        except:
            start = None
        try:
            stop = float(d[4])
        except:
            stop = None
        path = d[5]

        if start is None or stop is None:
            eng.data.add_file(path=path)
        else:
            print start, stop
            if os.path.splitext(path)[1] == ".mzML":
                d = mzMLimporter(path)
            else:
                d = DataImporter(path)
            if mode == 1:
                data = d.get_data(scan_range=(start, stop))
            elif mode == 0:
                data = d.get_data(time_range=(start, stop))
            eng.data.add_data(data, path)
        eng.data.spectra[-1].var1 = v1
        eng.data.spectra[-1].var2 = v2
        eng.data.spectra[-1].name = f
    eng.data.export_hdf5()
Exemple #4
0
def extract_scans_multiple_files(files,
                                 dirs,
                                 startscan=1.0,
                                 endscan=1.0,
                                 outputname="Combined",
                                 existing_path=None):
    paths = []
    names = []
    startscan = int(float(startscan))
    endscan = int(float(endscan))
    newdir = 0
    for f, d in zip(files, dirs):
        path = os.path.join(d, f)
        name = os.path.splitext(f)[0]
        newdir = d
        paths.append(path)
        names.append(name)
    outfile = outputname + ".hdf5"
    if existing_path is None:
        outpath = os.path.join(newdir, outfile)
    else:
        outpath = existing_path
    hdf = h5py.File(outpath, "a")
    try:
        if existing_path is None:
            del hdf["ms_dataset"]
    except:
        pass
    msdataset = hdf.require_group("ms_dataset")
    msdataset.attrs["v1name"] = "timemid"
    msdataset.attrs["v2name"] = "Original File"
    config = hdf.require_group("config")
    config.attrs["metamode"] = -1
    num = 0
    for path in paths:
        if os.path.isfile(path):
            if os.path.splitext(path)[1] == ".mzML":
                d = mzMLimporter(path)
            else:
                d = DataImporter(path)
            data = d.get_data(scan_range=(startscan, endscan))
            if not ud.isempty(data):
                group = msdataset.require_group(str(num))
                replace_dataset(group, "raw_data", data=data)
                times = d.get_times_from_scans([startscan, endscan])
                group.attrs["timestart"] = times[0]
                group.attrs["timeend"] = times[2]
                group.attrs["timemid"] = times[1]
                group.attrs["scanstart"] = startscan
                group.attrs["scanend"] = endscan
                splits = path.split(sep="\\")
                group.attrs["Original File"] = splits[len(splits) - 1]
                num += 1
        else:
            print("File not found: ", path)
    msdataset.attrs["num"] = num
    hdf.close()
    return outpath
Exemple #5
0
def parse(path, times, timestep, volts, outputheader, directory, output="txt"):
    if os.path.isfile(path):
        if output == "hdf5":
            outfile = outputheader + ".hdf5"
            outpath = os.path.join(directory, outfile)
            hdf = h5py.File(outpath, "a")
            try:
                del hdf["ms_dataset"]
            except:
                pass
            msdataset = hdf.require_group("ms_dataset")

            if volts is not None:
                msdataset.attrs["v1name"] = "Collision Voltage"
            else:
                msdataset.attrs["v1name"] = "timestart"
            msdataset.attrs["timestep"] = timestep
            config = hdf.require_group("config")
            config.attrs["metamode"] = -1

        num = 0
        if os.path.splitext(path)[1] == ".mzML":
            d = mzMLimporter(path)
        else:
            d = DataImporter(path)
        for v, time in enumerate(times):
            data = d.get_data(time_range=(time, time + timestep))
            if not ud.isempty(data):
                if output == "txt":
                    if volts is not None:
                        outfile = outputheader + "_" + str(int(
                            volts[v])) + ".txt"
                    else:
                        outfile = outputheader + "_" + str(v) + ".txt"
                    if not os.path.isdir(directory):
                        os.mkdir(directory)
                    outpath = os.path.join(directory, outfile)
                    np.savetxt(outpath, data)
                    print("Saved:", outpath)
                elif output == "hdf5":
                    group = msdataset.require_group(str(v))
                    replace_dataset(group, "raw_data", data=data)
                    # group=msdataset.require_group(str(v))
                    if volts is not None:
                        group.attrs["Collision Voltage"] = volts[v]
                    group.attrs["timestart"] = time
                    group.attrs["timeend"] = time + timestep
                    group.attrs["timemid"] = time + timestep * 0.5
                    num += 1
                    pass
        if output == "hdf5":
            msdataset.attrs["num"] = num
            hdf.close()
    else:
        print("File not found:", path)
    return outpath
Exemple #6
0
def extract(file, directory, timestep=1.0, output="txt"):
    print(file)
    path = os.path.join(directory, file)
    name = os.path.splitext(file)[0]
    newdir = os.path.join(directory, name)
    if output == "hdf5":
        newdir = directory
    #name=ud.smartdecode(name)
    splits = name.split(sep="_")
    try:
        for i, s in enumerate(splits):
            if s.lower() == "ramp":
                start = float(splits[i + 1])
                end = float(splits[i + 2])
                step = float(splits[i + 3])
                volts = np.arange(start, end + step, step)
                times = np.arange(0.0, len(volts) * timestep, timestep)
                print("Ramp:", times, volts)
                parse(path,
                      times,
                      timestep,
                      volts,
                      name,
                      newdir,
                      output=output)
                return True
    except:
        print("Error parsing ramp keyword. Ignoring.")
    if os.path.splitext(file)[1] == ".mzML":
        maxtime = mzMLimporter(path).get_max_time()
    else:
        maxtime = DataImporter(path).get_max_time()

    times = np.arange(0, maxtime, timestep)
    outpath = parse(path, times, timestep, None, name, newdir, output=output)
    return outpath
Exemple #7
0
def extract_scans(file, directory, scanbins=1, output="txt"):
    print file
    scanbins = int(float(scanbins))
    path = os.path.join(directory, file)

    if os.path.isfile(path):
        if os.path.splitext(path)[1] == ".mzML":
            d = mzMLimporter(path)
        else:
            d = DataImporter(path)

        name = os.path.splitext(file)[0]
        newdir = os.path.join(directory, name)
        if output == "hdf5":
            newdir = directory
        if os.path.splitext(file)[1] == ".mzML":
            maxtime = d.get_max_time()
            maxscans = d.get_max_scans()
        else:
            maxtime = d.get_max_time()
            maxscans = d.get_max_scans()
        print maxscans
        scans = np.arange(0, maxscans, scanbins)

        if output == "hdf5":
            outfile = name + ".hdf5"
            outpath = os.path.join(newdir, outfile)
            hdf = h5py.File(outpath, "a")
            try:
                del hdf["ms_dataset"]
            except:
                pass
            msdataset = hdf.require_group("ms_dataset")
            msdataset.attrs["v1name"] = "timemid"
            config = hdf.require_group("config")
            config.attrs["metamode"] = -1
        num = 0

        for v, scan in enumerate(scans):
            data = d.get_data(scan_range=(scan, scan + scanbins))

            if not ud.isempty(data):
                if output == "txt":
                    outfile = name + "_" + str(v) + ".txt"
                    if not os.path.isdir(newdir):
                        os.mkdir(newdir)
                    outpath = os.path.join(newdir, outfile)
                    np.savetxt(outpath, data)
                    print "Saved:", outpath
                elif output == "hdf5":
                    group = msdataset.require_group(str(v))
                    replace_dataset(group, "raw_data", data=data)
                    times = d.get_times_from_scans([scan, scan + scanbins])
                    group.attrs["timestart"] = times[0]
                    group.attrs["timeend"] = times[2]
                    group.attrs["timemid"] = times[1]
                    group.attrs["scanstart"] = scan
                    group.attrs["scanend"] = scan + scanbins
                    num += 1
                    pass
        if output == "hdf5":
            msdataset.attrs["num"] = num
            hdf.close()
            print outpath
    else:
        print "File not found:", path