Python append Examples

Programming Language: Python

Namespace/Package Name: vaex.datasets

Method/Function: append

Examples at hotexamples.com: 2

Python append - 2 examples found. These are the top rated real world Python examples of vaex.datasets.append extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: __init__.py Project: tboch/vaex

def open_many(filenames):
    """Open a list of filenames, and return a dataset with all datasets cocatenated

	:param list[str] filenames: list of filenames/paths
	:rtype: Dataset
	"""
    datasets = []
    for filename in filenames:
        filename = filename.strip()
        if filename and filename[0] != "#":
            datasets.append(open(filename))
    return vaex.dataset.DatasetConcatenated(datasets=datasets)

Example #2

Show file

File: __init__.py Project: isaac-you/vaex

def open(path, convert=False, shuffle=False, copy_index=True, *args, **kwargs):
    """Open a dataset from file given by path

    Example:

    >>> ds = vaex.open('sometable.hdf5')
    >>> ds = vaex.open('somedata*.csv', convert='bigdata.hdf5')

    :param str path: local or absolute path to file, or glob string
    :param convert: convert files to an hdf5 file for optimization, can also be a path
    :param bool shuffle: shuffle converted dataset or not
    :param args: extra arguments for file readers that need it
    :param kwargs: extra keyword arguments
    :param bool copy_index: copy index when source is read via pandas
    :return: return dataset if file is supported, otherwise None
    :rtype: Dataset

    :Example:

    >>> import vaex as vx
    >>> vx.open('myfile.hdf5')
    <vaex.dataset.Hdf5MemoryMapped at 0x1136ee3d0>
    >>> vx.open('gadget_file.hdf5', 3) # this will read only particle type 3
    <vaex.dataset.Hdf5MemoryMappedGadget at 0x1136ef3d0>
    """
    import vaex
    try:
        if path in aliases:
            path = aliases[path]
        if path.startswith("http://") or path.startswith("ws://"):  # TODO: think about https and wss
            server, dataset = path.rsplit("/", 1)
            server = vaex.server(server, **kwargs)
            datasets = server.datasets(as_dict=True)
            if dataset not in datasets:
                raise KeyError("no such dataset '%s' at server, possible dataset names: %s" % (dataset, " ".join(datasets.keys())))
            return datasets[dataset]
        if path.startswith("cluster"):
            import vaex.distributed
            return vaex.distributed.open(path, *args, **kwargs)
        else:
            import vaex.file
            import glob
            # sort to get predicatable behaviour (useful for testing)
            filenames = list(sorted(glob.glob(path)))
            ds = None
            if len(filenames) == 0:
                raise IOError('Could not open file: {}, it does not exist'.format(path))
            filename_hdf5 = _convert_name(filenames, shuffle=shuffle)
            filename_hdf5_noshuffle = _convert_name(filenames, shuffle=False)
            if len(filenames) == 1:
                path = filenames[0]
                ext = os.path.splitext(path)[1]
                if os.path.exists(filename_hdf5) and convert:  # also check mtime?
                    if convert:
                        ds = vaex.file.open(filename_hdf5)
                    else:
                        ds = vaex.file.open(filename_hdf5, *args, **kwargs)
                else:
                    if ext == '.csv':  # special support for csv.. should probably approach it a different way
                        ds = from_csv(path, copy_index=copy_index, **kwargs)
                    else:
                        ds = vaex.file.open(path, *args, **kwargs)
                    if convert:
                        ds.export_hdf5(filename_hdf5, shuffle=shuffle)
                        ds = vaex.file.open(filename_hdf5) # argument were meant for pandas?
                if ds is None:
                    if os.path.exists(path):
                        raise IOError('Could not open file: {}, did you install vaex-hdf5?'.format(path))
                    if os.path.exists(path):
                        raise IOError('Could not open file: {}, it does not exist?'.format(path))
            elif len(filenames) > 1:
                if convert not in [True, False]:
                    filename_hdf5 = convert
                else:
                    filename_hdf5 = _convert_name(filenames, shuffle=shuffle)
                if os.path.exists(filename_hdf5) and convert:  # also check mtime
                    ds = open(filename_hdf5)
                else:
                    # with ProcessPoolExecutor() as executor:
                    # executor.submit(read_csv_and_convert, filenames, shuffle=shuffle, **kwargs)
                    datasets = []
                    for filename in filenames:
                        datasets.append(open(filename, convert=bool(convert), shuffle=shuffle, **kwargs))
                    ds = vaex.dataset.DatasetConcatenated(datasets)
                if convert:
                    ds.export_hdf5(filename_hdf5, shuffle=shuffle)
                    ds = vaex.file.open(filename_hdf5, *args, **kwargs)

        if ds is None:
            raise IOError('Unknown error opening: {}'.format(path))
        return ds
    except:
        logging.getLogger("vaex").error("error opening %r" % path)
        raise