def storeBlob(data, path=None): assert isinstance(data, basestring) path = _prepare_path(path, extensions=None, store=False) if path is None: return None with open(path, "wb") as fp: fp.write(data)
def storeExcel(tab, path=None): path = _prepare_path(path, extensions=["xls", "xlsx"], store=False) if path is None: return None df = tab.to_pandas(do_format=True) df.to_excel(path, index=False)
def loadPeakMap(path=None): """ loads mzXML, mzML and mzData files If *path* is missing, a dialog for file selection is opened instead. """ # local import in order to keep namespaces clean import os.path import sys from pyopenms import MSExperiment, FileHandler from ..core.data_types import PeakMap path = _prepare_path(path, extensions=["mzML", "mzXML", "mzData"]) if path is None: return None # open-ms returns empty peakmap if file not exists, so we # check ourselves: if not os.path.exists(path): raise Exception("file %s does not exist" % path) if not os.path.isfile(path): raise Exception("path %s is not a file" % path) experiment = MSExperiment() fh = FileHandler() if sys.platform == "win32": path = path.replace("/", "\\") # needed for network shares fh.loadExperiment(path, experiment) return PeakMap.fromMSExperiment(experiment)
def storeTable(tab, path=None, forceOverwrite=False, compressed=True, peakmap_cache_folder=None): """Writes the table in binary format. All information, as corresponding peak maps too. The file name extension in ``path``must be ``.table``. ``forceOverwrite`` must be set to ``True`` to overwrite an existing file. ``compressed`` replaces duplicate copies of the same peakmap of a single one to save space on disk. ``peakmap_cache_folder`` is a folder. if provided the table data and the peakmap are stored separtely. so the table file can then be loaded much faster and the peakmaps are lazily loaded only if one tries to access their spectra. This speeds up workflows but the developer must care about consistency: if the peakmap folder is deleted the table may becom useless ! Latter the file can be loaded with ``emzed.io.loadTable`` """ path = _prepare_path(path, extensions=["table"], store=False) if path is None: return None tab.store(path, forceOverwrite, compressed, peakmap_cache_folder)
def storeCSV(tab, path=None): """ Saves *tab* in a textual ``.csv`` file. If *path* is not provided, a file dialog opens for choosing the files name and location. """ # local import in order to keep namespaces clean path = _prepare_path(path, extensions=["csv"], store=False) if path is None: return None tab.storeCSV(path)
def storePeakMap(pm, path=None): """ Stores peakmap *pm* in mzXML, mzML or mzData format. The used format depends on the file extension given in *path*. If no *path* is given, a dialog for choosing an output file name is opened. """ path = _prepare_path(path, extensions=["mzML", "mzXML", "mzData"], store=False) if path is None: return None pm.store(path)
def loadBlob(path=None): path = _prepare_path(path, None) if path is None: return None from emzed.core.data_types.col_types import Blob import os.path with open(path, "rb") as fp: data = fp.read() __, ext = os.path.splitext(path) type_ = ext[1:].upper() # remove leading "." return Blob(data, type_)
def storeTable(tab, path=None, forceOverwrite=False, compressed=True): """ Saves *tab* in a binary ``.table`` file. If *path* is not provided, a file dialog opens for choosing the files name and location. *path* must have file extension ``.table``. """ # local import in order to keep namespaces clean path = _prepare_path(path, extensions=["table"], store=False) if path is None: return None tab.store(path, forceOverwrite, compressed)
def loadPeakMap(path=None): """ loads mzXML, mzML and mzData files If *path* is missing, a dialog for file selection is opened instead. """ # local import in order to keep namespaces clean from ..core.data_types import PeakMap path = _prepare_path(path, extensions=["mzML", "mzXML", "mzData"]) if path is None: return None return PeakMap.load(path)
def loadTable(path=None, compress_after_load=True): """ load pickled table If *path* is missing, a dialog for file selection is opened instead. """ # local import in order to keep namespaces clean from ..core.data_types import Table path = _prepare_path(path, extensions=["table"]) if path is None: return None result = Table.load(path) if compress_after_load: result.compressPeakMaps() return result
def loadExcel(path=None, sheetname=0, types=None, formats=None): """`sheetname` is either an intger or string for indicating the sheet which will be extracted from the .xls or .xlsx file. The index 0 refers to the first sheet. `types` is either None or a dictionary mapping column names to their types. `formats` is either None or a dictionary mapping column names to formats. """ path = _prepare_path(path, extensions=["xls", "xlsx"]) if path is None: return None from emzed.core.data_types import Table import pandas # sheetname is reuqired for pandas < 0.14.0, later versions have default 0 df = pandas.read_excel(path, sheetname=sheetname) return Table.from_pandas(df, types=types, formats=formats)
def loadCSV(path=None, sep=";", keepNone=False, **specialFormats): """ loads csv file from path. column separator is given by *sep*. If *keepNone* is set to True, "None" strings in file are kept as a string. Else this string is converted to Python None values. *specialFormats* collects positional arguments for setting formats of columns. Example: ``emzed.io.loadCSV("abc.csv", mz="%.3f")`` """ from ..core.data_types import Table path = _prepare_path(path, extensions=["csv"]) if path is None: return None result = Table.loadCSV(path) return result
def storePeakMap(pm, path=None): """ Stores peakmap *pm* in mzXML, mzML or mzData format. The used format depends on the file extension given in *path*. If no *path* is given, a dialog for choosing an output file name is opened. """ # local import in order to keep namespaces clean import sys from pyopenms import FileHandler path = _prepare_path(path, extensions=["mzML", "mzXML", "mzData"], store=False) if path is None: return None if sys.platform == "win32": path = path.replace("/", "\\") # needed for network shares experiment = pm.toMSExperiment() fh = FileHandler() fh.storeExperiment(path, experiment)
def loadCSV(path=None, sep=";", keepNone=False, **specialFormats): """local import in order to keep namespaces clean""" import csv import os.path import re from ..core.data_types.table import (Table, common_type_for, bestConvert, guessFormatFor) path = _prepare_path(path, extensions=["csv"]) if path is None: return None with open(path, "r") as fp: # remove clutter at right margin reader = csv.reader(fp, delimiter=sep) # reduce multiple spaces to single underscore colNames = [re.sub(" +", "_", n.strip()) for n in reader.next()] if keepNone: conv = bestConvert else: conv = lambda v: None if v == "None" else bestConvert(v) rows = [[conv(c.strip()) for c in row] for row in reader] columns = [[row[i] for row in rows] for i in range(len(colNames))] types = [common_type_for(col) for col in columns] # defaultFormats = {float: "%.2f", str: "%s", int: "%d"} formats = dict([(name, guessFormatFor(name, type_)) for (name, type_) in zip(colNames, types)]) formats.update(specialFormats) formats = [formats[n] for n in colNames] title = os.path.basename(path) meta = dict(loaded_from=os.path.abspath(path)) return Table._create(colNames, types, formats, rows, title, meta)