Python get_column_names Examples, vaex.dataset.get_column_names Python Examples

Example #1

0

Show file

File: ui.py Project: maartenbreddels/vaex

	def test_export(self):
		path_hdf5 = tempfile.mktemp(".hdf5")
		path_hdf5_ui = tempfile.mktemp(".hdf5")
		path_fits = tempfile.mktemp(".fits")
		path_fits_ui = tempfile.mktemp(".fits")

		for dataset in [self.dataset]:
			self.app.dataset_selector.add(dataset)
			for fraction in [1, 0.5]:
				dataset.set_active_fraction(fraction)
				dataset.select("x > 3")
				length = len(dataset)
				# TODO: gui doesn't export virtual columns, add "z" to this list
				for column_names in [["x", "y"], ["x"], ["y"]]:
					for byteorder in "=<>":
						for shuffle in [False, True]:
							for selection in [False, True]:
								for export in [dataset.export_fits, dataset.export_hdf5] if byteorder == ">" else [dataset.export_hdf5]:
									type = "hdf5" if export == dataset.export_hdf5 else "fits"
									if shuffle and selection:
										continue # TODO: export should fail on this combination
									#print column_names, byteorder, shuffle, selection, type
									if export == dataset.export_hdf5:
										path = path_hdf5
										path_ui = path_hdf5_ui
										export(path, column_names=column_names, byteorder=byteorder, shuffle=shuffle, selection=selection)
									else:
										path = path_fits
										path_ui = path_fits_ui
										export(path, column_names=column_names, shuffle=shuffle, selection=selection)
									compare_direct = vx.open(path)

									dialogs.set_choose(1 if selection else 0).then("=<>".index(byteorder))
									# select columns
									dialogs.set_select_many(True, [name in column_names for name in dataset.get_column_names()])
									counter_confirm = CallCounter(return_value=shuffle)
									counter_info = CallCounter()
									dialogs.dialog_confirm = counter_confirm
									dialogs.dialog_info = counter_info
									dialogs.get_path_save = lambda *args: path_ui
									dialogs.ProgressExecution = dialogs.FakeProgressExecution
									import sys
									sys.stdout.flush()

									self.app.export(type=type)
									compare_ui = vx.open(path_ui)

									column_names = column_names or ["x", "y", "z"]
									self.assertEqual(compare_direct.get_column_names(), compare_ui.get_column_names())
									for column_name in column_names:
										values_ui = compare_ui.evaluate(column_name)
										values = compare_direct.evaluate(column_name)
										self.assertEqual(sorted(values), sorted(values_ui))

Example #2

0

Show file

File: ui.py Project: yokeldd/vaex

    def test_export(self):
        path_hdf5 = tempfile.mktemp(".hdf5")
        path_hdf5_ui = tempfile.mktemp(".hdf5")
        path_fits = tempfile.mktemp(".fits")
        path_fits_ui = tempfile.mktemp(".fits")

        for dataset in [self.dataset]:
            self.app.dataset_selector.add(dataset)
            for fraction in [1, 0.5]:
                dataset.set_active_fraction(fraction)
                dataset.select("x > 3")
                length = len(dataset)
                # TODO: gui doesn't export virtual columns, add "z" to this list
                for column_names in [["x", "y"], ["x"], ["y"]]:
                    for byteorder in "=<>":
                        for shuffle in [False, True]:
                            for selection in [False, True]:
                                for export in [
                                        dataset.export_fits,
                                        dataset.export_hdf5
                                ] if byteorder == ">" else [
                                        dataset.export_hdf5
                                ]:
                                    type = "hdf5" if export == dataset.export_hdf5 else "fits"
                                    if shuffle and selection:
                                        continue  # TODO: export should fail on this combination
                                    #print column_names, byteorder, shuffle, selection, type
                                    if export == dataset.export_hdf5:
                                        path = path_hdf5
                                        path_ui = path_hdf5_ui
                                        export(path,
                                               column_names=column_names,
                                               byteorder=byteorder,
                                               shuffle=shuffle,
                                               selection=selection)
                                    else:
                                        path = path_fits
                                        path_ui = path_fits_ui
                                        export(path,
                                               column_names=column_names,
                                               shuffle=shuffle,
                                               selection=selection)
                                    compare_direct = vx.open(path)

                                    dialogs.set_choose(
                                        1 if selection else 0).then(
                                            "=<>".index(byteorder))
                                    # select columns
                                    dialogs.set_select_many(
                                        True, [
                                            name in column_names for name in
                                            dataset.get_column_names()
                                        ])
                                    counter_confirm = CallCounter(
                                        return_value=shuffle)
                                    counter_info = CallCounter()
                                    dialogs.dialog_confirm = counter_confirm
                                    dialogs.dialog_info = counter_info
                                    dialogs.get_path_save = lambda *args: path_ui
                                    dialogs.ProgressExecution = dialogs.FakeProgressExecution
                                    import sys
                                    sys.stdout.flush()

                                    self.app.export(type=type)
                                    compare_ui = vx.open(path_ui)

                                    column_names = column_names or [
                                        "x", "y", "z"
                                    ]
                                    self.assertEqual(
                                        compare_direct.get_column_names(),
                                        compare_ui.get_column_names())
                                    for column_name in column_names:
                                        values_ui = compare_ui.evaluate(
                                            column_name)
                                        values = compare_direct.evaluate(
                                            column_name)
                                        self.assertEqual(
                                            sorted(values), sorted(values_ui))

Example #3

0

Show file

File: fits.py Project: yaotianzhang/vaex

def export_fits(dataset, path, column_names=None, shuffle=False, selection=False, progress=None, virtual=True, sort=None, ascending=True):
    """
    :param DatasetLocal dataset: dataset to export
    :param str path: path for file
    :param lis[str] column_names: list of column names to export or None for all columns
    :param bool shuffle: export rows in random order
    :param bool selection: export selection or not
    :param progress: progress callback that gets a progress fraction as argument and should return True to continue,
            or a default progress bar when progress=True
    :param: bool virtual: When True, export virtual columns
    :return:
    """
    if shuffle:
        random_index_name = "random_index"
        while random_index_name in dataset.get_column_names():
            random_index_name += "_new"

    column_names = column_names or dataset.get_column_names(virtual=virtual, strings=True)
    logger.debug("exporting columns(fits): %r" % column_names)
    N = len(dataset) if not selection else dataset.selected_length(selection)
    data_types = []
    data_shapes = []
    ucds = []
    units = []
    for column_name in column_names:
        if column_name in dataset.get_column_names(strings=True, virtual=False):
            column = dataset.columns[column_name]
            shape = (N,) + column.shape[1:]
            dtype = column.dtype
            if dataset.is_string(column_name):
                max_length = dataset[column_name].apply(lambda x: len(x)).max(selection=selection)
                dtype = np.dtype('S'+str(int(max_length)))
        else:
            dtype = np.float64().dtype
            shape = (N,)
        ucds.append(dataset.ucds.get(column_name))
        units.append(dataset.units.get(column_name))
        data_types.append(dtype)
        data_shapes.append(shape)

    if shuffle:
        column_names.append(random_index_name)
        data_types.append(np.int64().dtype)
        data_shapes.append((N,))
        ucds.append(None)
        units.append(None)
    else:
        random_index_name = None

    # TODO: all expressions can have missing values.. how to support that?
    null_values = {key: dataset.columns[key].fill_value for key in dataset.get_column_names() if dataset.is_masked(key) and dataset.data_type(key).kind != "f"}
    empty(path, N, column_names, data_types, data_shapes, ucds, units, null_values=null_values)
    if shuffle:
        del column_names[-1]
        del data_types[-1]
        del data_shapes[-1]
    dataset_output = vaex.astro.fits.FitsBinTable(path, write=True)
    df_output = vaex.dataframe.DataFrameLocal(dataset_output)
    vaex.export._export(dataset_input=dataset, dataset_output=df_output, path=path, random_index_column=random_index_name,
            column_names=column_names, selection=selection, shuffle=shuffle,
            progress=progress, sort=sort, ascending=ascending)
    dataset_output.close()

Example #4

0

Show file

def unique_column_names(dataset):
	#return list(set(dataset.column_names) | set(dataset.virtual_columns.keys()))
	return dataset.get_column_names(virtual=True)