Python DataFrame.to_hdf Examples, pandas.DataFrame.to_hdf Python Examples

Example #1

0

Show file

File: CNN_1D.py Project: xub69961415/HyperSpec-WGAN

 def save_history(self, history_dir):
     accuracy = {'Training': self.history.history['accuracy'],
                 'Validation': self.history.history['val_accuracy']}
     loss = {'Training': self.history.history['loss'],
             'Validation': self.history.history['val_loss']}
     DataFrame.to_hdf(DataFrame.from_dict(accuracy),
                      path_or_buf=f"{history_dir}/accuracy.hdf5",
                      key='history',
                      mode='w')
     DataFrame.to_hdf(DataFrame.from_dict(loss),
                      path_or_buf=f"{history_dir}/loss.hdf5",
                      key='history',
                      mode='w')

Example #2

0

Show file

File: test_categorical.py Project: frreiss/pandas-fred

def test_convert_value(setup_path, where: str, df: DataFrame, expected: DataFrame):
    # GH39420
    # Check that read_hdf with categorical columns can filter by where condition.
    df.col = df.col.astype("category")
    max_widths = {"col": 1}
    categorical_values = sorted(df.col.unique())
    expected.col = expected.col.astype("category")
    expected.col.cat.set_categories(categorical_values, inplace=True)

    with ensure_clean_path(setup_path) as path:
        df.to_hdf(path, "df", format="table", min_itemsize=max_widths)
        result = read_hdf(path, where=where)
        tm.assert_frame_equal(result, expected)

Example #3

0

Show file

File: test_read.py Project: Aathi410/Pro123

def test_read_from_pathlib_path(setup_path):

    # GH11773
    expected = DataFrame(np.random.rand(4, 5),
                         index=list("abcd"),
                         columns=list("ABCDE"))
    with ensure_clean_path(setup_path) as filename:
        path_obj = Path(filename)

        expected.to_hdf(path_obj, "df", mode="a")
        actual = read_hdf(path_obj, "df")

    tm.assert_frame_equal(expected, actual)

Example #4

0

Show file

File: test_read.py Project: MarceloDL-A/metodos_python

def test_read_nokey_table(setup_path):
    # GH13231
    df = DataFrame({"i": range(5), "c": Series(list("abacd"), dtype="category")})

    with ensure_clean_path(setup_path) as path:
        df.to_hdf(path, "df", mode="a", format="table")
        reread = read_hdf(path)
        tm.assert_frame_equal(df, reread)
        df.to_hdf(path, "df2", mode="a", format="table")

        msg = "key must be provided when HDF5 file contains multiple datasets."
        with pytest.raises(ValueError, match=msg):
            read_hdf(path)

Example #5

0

Show file

File: test_read.py Project: MarceloDL-A/metodos_python

def test_read_hdf_iterator(setup_path):
    df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))
    df.index.name = "letters"
    df = df.set_index(keys="E", append=True)

    with ensure_clean_path(setup_path) as path:
        df.to_hdf(path, "df", mode="w", format="t")
        direct = read_hdf(path, "df")
        iterator = read_hdf(path, "df", iterator=True)
        assert isinstance(iterator, TableIterator)
        indirect = next(iterator.__iter__())
        tm.assert_frame_equal(direct, indirect)
        iterator.store.close()

Example #6

0

Show file

def opn(info, lines):
    store, parse, path, *_ = info
    lst = []
    for line in lines:
        tokens = line.split()
        if tokens[0] == 'INGRP' and tokens[1] == 'INDELT':
            s = tokens[2].split(':')
            indelt = int(s[0]) if len(s) == 1 else 60 * int(s[0]) + int(s[1])
        elif tokens[0] in ops:
            s = f'{tokens[0][0]}{int(tokens[1]):03d}'
            lst.append((tokens[0], s, indelt))
    dfopn = DataFrame(lst, columns=['OPERATION', 'SEGMENT', 'INDELT_minutes'])
    dfopn.to_hdf(store, '/CONTROL/OP_SEQUENCE', data_columns=True)

Example #7

0

Show file

File: nexus.py Project: TuSun12379/diffractem

def _store_table_to_single_subset(tbl: pd.DataFrame,
                                  fn: str,
                                  path: str,
                                  subset: str,
                                  format: str = 'nexus'):
    """
    Helper function. Internal use only.
    """

    tbl_path = path.replace('%', subset)
    if format == 'table':
        try:
            tbl.to_hdf(fn, tbl_path, format='table', data_columns=True)
        except ValueError:
            tbl.to_hdf(fn, tbl_path, format='table')

    elif format == 'nexus':
        with h5py.File(fn, 'a') as fh:
            for key, val in tbl.iteritems():
                #print(f'Storing {key} ({val.shape}, {val.dtype}) to {fn}: {path}')
                grp = fh.require_group(tbl_path)
                grp.attrs['NX_class'] = 'NXcollection'
                k = key.replace('/', '_').replace('.', ' ')
                try:
                    if k not in grp:
                        ds = grp.require_dataset(k,
                                                 shape=val.shape,
                                                 dtype=val.dtype,
                                                 maxshape=(None, ))
                    else:
                        ds = grp[k]
                        if ds.shape[0] != val.shape[0]:
                            ds.resize(val.shape[0], axis=0)
                            #print('resizing', k)
                    ds[:] = val
                except (TypeError, OSError) as err:
                    if val.dtype == 'O':
                        val2 = val.astype('S')
                        if k in grp:
                            del grp[k]
                        ds = grp.require_dataset(k,
                                                 shape=val.shape,
                                                 dtype=val2.dtype,
                                                 maxshape=(None, ))
                        ds[:] = val2
                    else:
                        raise err

                ds.attrs['label'] = key
    else:
        raise ValueError('Storage format must be "table" or "nexus".')

Example #8

0

Show file

File: transcribe.py Project: andyherzberg/transcribe-comprehend

def export_df(df: pd.DataFrame, export_path: str,
              export_format: str = "pickle",
              hdf_key: Optional[str] = None
              ) -> None:
    """
    Exports the dataframe in a variety of formats. Dataframe is expected to have columns `transcript` and `index`

    :param df: The dataframe to be exported
    :param export_path: Where to write the exported dataframe
    :param export_format: One of "html", "csv", "json", "parquet", "pickle", "hdf"
    :param hdf_key: if hdf format is used, key to store the df under in the HDF5 file
    """
    _, ext = os.path.splitext(export_path)z
    use_ext = '.' + export_format if len(ext) == 0 else ''
    if export_format == "html":
        color_dict = {
            'POSITIVE': 'limegreen',
            'NEGATIVE': 'red',
            'NEUTRAL': 'lightgrey',
            'MIXED': 'yellow'
        }
        spk_dict = {
            'ch_0': '#F0F8FF',
            'spk_0': '#F0F8FF',
            'ch_1': '#FFF8DC',
            'spk_1': '#FFF8DC'
        }
        df.set_index(
            ['transcript', 'recording', 'speaker', 'index']
        ).to_html(export_path + use_ext, encoding='utf-8',
                  formatters={
                      'pred_sent': lambda sent: f'<span style="background-color:{color_dict[sent]}">{sent}</span>',
                      'speaker': lambda speaker: f'<span style="background-color:{spk_dict[speaker]}">{speaker}</span>'
                  }, escape=False)
    elif export_format == "csv":
        df.to_csv(export_path + use_ext)
    elif export_format == "json":
        df.to_json(export_path + use_ext)
    elif export_format == "parquet":
        df.to_parquet(export_path + use_ext)
    elif export_format == "pickle":
        df.to_pickle(export_path + use_ext)
    elif export_format == "excel":
        if len(use_ext) > 0:
            use_ext = '.xlsx'
        df.to_excel(export_path + use_ext, index=False)
    elif export_format == "hdf":
        assert hdf_key is not None, "Parameter hdf_key must be informed if export format is hdf."
        df.to_hdf(export_path + use_ext, hdf_key)
    else:
        raise ValueError(f"Unknown export format: {export_format}")

Example #9

0

Show file

File: test_read.py Project: MarceloDL-A/metodos_python

def test_read_missing_key_opened_store(setup_path):
    # GH 28699
    with ensure_clean_path(setup_path) as path:
        df = DataFrame({"a": range(2), "b": range(2)})
        df.to_hdf(path, "k1")

        with HDFStore(path, "r") as store:

            with pytest.raises(KeyError, match="'No object named k2 in the file'"):
                read_hdf(store, "k2")

            # Test that the file is still open after a KeyError and that we can
            # still read from it.
            read_hdf(store, "k1")

Example #10

0

Show file

def save_timeseries(store, ts, savedict, siminfo, saveall, operation, segment,
                    activity):
    # save computed timeseries (at computation DELT)
    save = {k for k, v in savedict.items() if v or saveall}
    df = DataFrame(index=siminfo['tindex'])
    for y in (save & set(ts.keys())):
        df[y] = ts[y]
    df = df.astype(float32).sort_index(axis='columns')
    path = f'RESULTS/{operation}_{segment}/{activity}'
    if not df.empty:
        df.to_hdf(store, path, complib='blosc', complevel=9)
    else:
        print('Save DataFrame Empty for', path)
    return

Example #11

0

Show file

File: utils.py Project: kushalkolar/mesmerize_manuscript_notebooks

    def save_dataframe(path: str, dataframe: pd.DataFrame, metadata: Optional[dict] = None,
                       metadata_method: str = 'json', raise_meta_fail: bool = True):
        """
        Save DataFrame to hdf5 file along with a meta data dict.

        Meta data dict can either be serialized with json and stored as a str in the hdf5 file, or recursively saved
        into hdf5 groups if the dict contains types that hdf5 can deal with. Experiment with both methods and see what works best

        Currently the hdf5 method can work with these types: [str, bytes, int, float, np.int, np.int8, np.int16,
        np.int32, np.int64, np.float, np.float16, np.float32, np.float64, np.float128, np.complex].

        If it encounters an object that is not of these types it will store whatever that object's __str__() method
        returns if on_meta_fail is False, else it will raise an exception.

        :param path:            path to save the file to
        :param dataframe:       DataFrame to save in the hdf5 file
        :param metadata:        Any associated meta data to store along with the DataFrame in the hdf5 file
        :param metadata_method: method for storing the metadata dict, either 'json' or 'recursive'
        :param raise_meta_fail: raise an exception if recursive metadata saving encounters an unsupported object
        """
        if os.path.isfile(path):
            raise FileExistsError

        f = h5py.File(path, mode='w')

        f.create_group('DATAFRAME')

        if metadata is not None:
            mg = f.create_group('META')
            mg.attrs['method'] = metadata_method

            if metadata_method == 'json':
                bad_keys = []
                for k in metadata.keys():
                    try:
                        mg.create_dataset(k, data=json.dumps(metadata[k]))
                    except TypeError as e:
                        bad_keys.append(str(e))

                if len(bad_keys) > 0:
                    bad_keys = '\n'.join(bad_keys)
                    raise TypeError(f"The following meta data keys are not JSON serializable\n{bad_keys}")


            elif metadata_method == 'recursive':
                HdfTools._dicts_to_group(h5file=f, path='META/', d=metadata, raise_meta_fail=raise_meta_fail)

        f.close()

        dataframe.to_hdf(path, key='DATAFRAME', mode='r+')

Example #12

0

Show file

File: test_retain_attributes.py Project: MarceloDL-A/metodos_python

def test_retain_index_attributes2(setup_path):
    with ensure_clean_path(setup_path) as path:

        with catch_warnings(record=True):

            df = DataFrame({
                "A":
                Series(range(3),
                       index=date_range("2000-1-1", periods=3, freq="H"))
            })
            df.to_hdf(path, "data", mode="w", append=True)
            df2 = DataFrame({
                "A":
                Series(range(3),
                       index=date_range("2002-1-1", periods=3, freq="D"))
            })

            df2.to_hdf(path, "data", append=True)

            idx = date_range("2000-1-1", periods=3, freq="H")
            idx.name = "foo"
            df = DataFrame({"A": Series(range(3), index=idx)})
            df.to_hdf(path, "data", mode="w", append=True)

        assert read_hdf(path, "data").index.name == "foo"

        with catch_warnings(record=True):

            idx2 = date_range("2001-1-1", periods=3, freq="H")
            idx2.name = "bar"
            df2 = DataFrame({"A": Series(range(3), index=idx2)})
            df2.to_hdf(path, "data", append=True)

        assert read_hdf(path, "data").index.name is None

Example #13

0

Show file

File: test_store.py Project: attack68/pandas

def test_duplicate_column_name(setup_path):
    df = DataFrame(columns=["a", "a"], data=[[0, 0]])

    with ensure_clean_path(setup_path) as path:
        msg = "Columns index has to be unique for fixed format"
        with pytest.raises(ValueError, match=msg):
            df.to_hdf(path, "df", format="fixed")

        df.to_hdf(path, "df", format="table")
        other = read_hdf(path, "df")

        tm.assert_frame_equal(df, other)
        assert df.equals(other)
        assert other.equals(df)

Example #14

0

Show file

def global_(info, lines):
    store, parse, path, *_ = info
    d = parseD(lines[1], parse['GLOBAL', 'START'])
    start = str(
        Timestamp(f"{d['SYR']}-{d['SMO']}-{d['SDA']}") +
        Timedelta(int(d['SHR']), 'h') + Timedelta(int(d['SMI']), 'T'))[0:16]
    stop = str(
        Timestamp(f"{d['EYR']}-{d['EMO']}-{d['EDA']}") +
        Timedelta(int(d['EHR']), 'h') + Timedelta(int(d['EMI']), 'T'))[0:16]
    data = [lines[0].strip(), start, stop]
    dfglobal = DataFrame(data,
                         index=['Comment', 'Start', 'Stop'],
                         columns=['Info'])
    dfglobal.to_hdf(store, '/CONTROL/GLOBAL', data_columns=True)

Example #15

0

Show file

File: test_read.py Project: MarceloDL-A/metodos_python

def test_read_nokey(setup_path):
    # GH10443
    df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))

    # Categorical dtype not supported for "fixed" format. So no need
    # to test with that dtype in the dataframe here.
    with ensure_clean_path(setup_path) as path:
        df.to_hdf(path, "df", mode="a")
        reread = read_hdf(path)
        tm.assert_frame_equal(df, reread)
        df.to_hdf(path, "df2", mode="a")

        msg = "key must be provided when HDF5 file contains multiple datasets."
        with pytest.raises(ValueError, match=msg):
            read_hdf(path)

Example #16

0

Show file

File: test_read.py Project: MarceloDL-A/metodos_python

def test_read_hdf_open_store(setup_path):
    # GH10330
    # No check for non-string path_or-buf, and no test of open store
    df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))
    df.index.name = "letters"
    df = df.set_index(keys="E", append=True)

    with ensure_clean_path(setup_path) as path:
        df.to_hdf(path, "df", mode="w")
        direct = read_hdf(path, "df")
        store = HDFStore(path, mode="r")
        indirect = read_hdf(store, "df")
        tm.assert_frame_equal(direct, indirect)
        assert store.is_open
        store.close()

Example #17

0

Show file

File: mainDoE.py Project: timcera/HSPsquared

def make_runlist(store, doe, doename):
    df = DataFrame(doe, columns=['Run', 'DataPath', 'Segment', 'Name', 'Value'])
    df.to_hdf(store, f'{doename}/DoE', format='t', data_columns=True
             )
    rundict = defaultdict(defaultdict)
    for line in doe:
        run, path, segment, name, value = line[:]
        operation, module, *temp = path.split(sep='/', maxsplit=3)
        table = '_'.join(temp)
        runstr = f'{run}'

        if (operation, module, segment) not in rundict[runstr]:
            rundict[runstr][operation, module, segment] = defaultdict(dict)
        rundict[runstr][operation, module, segment][table]  [name] = float(value)
    return rundict

Example #18

0

Show file

def ftables(info, llines):
    store, parse, path, *_ = info
    header=['Depth','Area','Volume','Disch1','Disch2','Disch3','Disch4','Disch5']
    lines = iter(llines)
    for line in lines:
        if line[2:8] == 'FTABLE':
            unit = int(line[8:])
            name = f'FT{unit:03d}'
            rows,cols = next(lines).split()
            lst = []
        elif line[2:5] == 'END':
            dfftable = DataFrame(lst, columns=header[0:int(cols)])
            dfftable.to_hdf(store, f'/FTABLES/{name}', data_columns=True)
        else:
            lst.append(parseD(line, parse['FTABLES','FTABLE']))

Example #19

0

Show file

    def store_dataframe(self, name: str, df: pd.DataFrame) -> None:
        """Serializes a dataframe in h5 format.

        Args:
            name: name of the file we want to save

        Returns:
            None
        """
        log = self.log
        name = name + ".h5"
        log.debug(f"{name=}")
        df.to_hdf(name, key="df", mode="w")

        return None

Example #20

0

Show file

def masslink(info, lines):
    store, parse, path, *_ = info
    lst = []
    for line in lines:
        if line[2:11] == 'MASS-LINK':
            name = line[12:].rstrip()
        elif line[2:5] != 'END':
            d = parseD(line, parse['MASS-LINK','na'])
            d['MLNO'] = f'ML{int(name):03d}'
            lst.append(d)
    if lst:
        dfmasslink = DataFrame(lst, columns=d).replace('na','')
        del dfmasslink['TGRPN']
        dfmasslink['COMMENTS'] = ''
        dfmasslink.to_hdf(store, '/CONTROL/MASS_LINKS', data_columns=True)

Example #21

0

Show file

def gener(info, lines):
    store, parse, path, *_ = info
    lst = []
    sub_blocks = ['OPCODE','PARM']
    current_block  = ''
    d = {}
    for line in lines:
            if line [2:5] == 'END':
                df = DataFrame(lst, columns=d)
                df.to_hdf(store, key=f'GENER/{current_block}', data_columns=True)
                lst.clear()
            elif any(s in line for s in sub_blocks):
                current_block = [s for s in sub_blocks if s in line][0]
            else:
                d = parseD(line, parse['GENER',current_block])
                lst.append(d)

Example #22

0

Show file

def write_df_2_h5(df: pd.DataFrame, save_path: 'hdf5', key_name):
    """
    将报表从df格式转换成h5的格式,方便的存取,
    :param df:报表的df格式数据
    :param key_name:报表的df的命名
    :param save_path:保存的路径(.hdf5文件)
    :return:
    """
    if public_function.detect_df(df) is False:
        return None
    if '.hdf5' not in save_path:
        raise ValueError(f"{save_path} is not a h5(.hdf5) file.")
    h5_save_folder = os.path.dirname(save_path)
    if not os.path.exists(h5_save_folder):
        os.makedirs(h5_save_folder)
    df.to_hdf(save_path, key=key_name, mode='a')

Example #23

0

Show file

File: hdf.py Project: respec/HSPsquared

 def write_ts(self, data_frame: pd.DataFrame, category: Category,
              operation: str, segment: str, activity: str, *args: Any,
              **kwargs: Any) -> None:
     """Saves timeseries to HDF5"""
     path = f'{operation}_{segment}/{activity}'
     if category:
         path = 'RESULTS/' + path
     complevel = None
     if 'compress' in kwargs:
         if kwargs['compress']:
             complevel = 9
     data_frame.to_hdf(self._store,
                       path,
                       format='t',
                       data_columns=True,
                       complevel=complevel)

Example #24

0

Show file

def test_read_hdf_errors(setup_path):
    df = DataFrame(np.random.rand(4, 5),
                   index=list("abcd"),
                   columns=list("ABCDE"))

    with ensure_clean_path(setup_path) as path:
        msg = r"File [\S]* does not exist"
        with pytest.raises(IOError, match=msg):
            read_hdf(path, "key")

        df.to_hdf(path, "df")
        store = HDFStore(path, mode="r")
        store.close()

        msg = "The HDFStore must be open for reading."
        with pytest.raises(IOError, match=msg):
            read_hdf(store, "df")

Example #25

0

Show file

    def insert(self, symbol: str, ts: BiTimestamp, ticks: pd.DataFrame):
        self._check_closed('insert')
        as_at_date = ts.as_at()

        # compose a splay path based on YYYY/MM/DD, symbol and version and pass in as a functor
        # so it can be populated with the bitemporal version
        def create_write_path(version):
            return self.base_path.joinpath('{}/{:02d}/{:02d}/{}_{:04d}.h5'.format(as_at_date.year,
                                                                                  as_at_date.month,
                                                                                  as_at_date.day,
                                                                                  symbol, version))

        write_path = self.index.insert(symbol, as_at_date, create_write_path)

        # do the tick write, with blosc compression
        write_path.parent.mkdir(parents=True, exist_ok=True)
        ticks.to_hdf(str(write_path), 'ticks', mode='w', append=False, complevel=9, complib='blosc')

Example #26

0

Show file

    def run(self, features: pd.DataFrame, parent: FileAdapter) -> FileAdapter:

        if features.empty:
            raise SoftPreconditionFailed('Input features are empty')

        output_file = self.default_outputs()
        features = extract_meta_features(features, config=meta_survey_config)
        if not features.empty:
            features.loc[:, 'file_id'] = parent.id
        self.logger.debug('Obtained %d survey/meta features',
                          features.shape[0])

        with pd.HDFStore(output_file.file, 'w') as store:
            features.to_hdf(store, self.output_hdf5_key)
        deep_update(output_file.metadata,
                    {'standard': infer_standard_groups(output_file.file_str)})
        return output_file

Example #27

0

Show file

def store_voxels_ana_data(file_name: str, group_name: str,
                          voxels_df: pd.DataFrame,
                          voxels_dict: Dict[str, List[Any]]) -> None:
    """
    Adds the voxels new data (coming in voxels_dict), to the pre-existing data
    only for the corresponding voxels (those whose voxel_id's are listed in the
    incoming dict).
    Then dataFrame is stored in	file_name / group_name / voxels.
	"""
    voxels_df.loc[voxels_dict['indexes'], 'newE'] = voxels_dict['newE']
    voxels_df.loc[voxels_dict['indexes'], 'track_id'] = voxels_dict['trackID']
    #voxels_df.to_hdf(file_name, group_name + '/voxels', format='table',
    #                 data_columns='evt_id')
    voxels_df.to_hdf(file_name,
                     group_name + '/voxels',
                     format='table',
                     data_columns=True)

Example #28

0

Show file

File: test_categorical.py Project: vincent-van-chen/pandas

def test_categorical_nan_only_columns(setup_path):
    # GH18413
    # Check that read_hdf with categorical columns with NaN-only values can
    # be read back.
    df = DataFrame({
        "a": ["a", "b", "c", np.nan],
        "b": [np.nan, np.nan, np.nan, np.nan],
        "c": [1, 2, 3, 4],
        "d": Series([None] * 4, dtype=object),
    })
    df["a"] = df.a.astype("category")
    df["b"] = df.b.astype("category")
    df["d"] = df.b.astype("category")
    expected = df
    with ensure_clean_path(setup_path) as path:
        df.to_hdf(path, "df", format="table", data_columns=True)
        result = read_hdf(path, "df")
        tm.assert_frame_equal(result, expected)

Example #29

0

Show file

File: dagmod.py Project: dcaseykc/restart

def rw(format: str, path: Path, df: pd.DataFrame):
    """Write a dataframe to <path> with extension <format>.
    #   str <format> : file format for writing (either .csv or .h5)
    #   posix path <path> : path for writing
    #   pandas DataFrame <df> : dataframe to write
    """
    if format == ".csv":

        df.to_csv(path, mode="w")

    elif format == ".h5":

        df.to_hdf(path, key=df, mode="w")

    else:

        raise IllegalArgumentError(
            "Must specify either .csv or .h5 as file format."
        )

Example #30

0

Show file

def test_complex_mixed_fixed(setup_path):
    complex64 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
                         dtype=np.complex64)
    complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
                          dtype=np.complex128)
    df = DataFrame(
        {
            "A": [1, 2, 3, 4],
            "B": ["a", "b", "c", "d"],
            "C": complex64,
            "D": complex128,
            "E": [1.0, 2.0, 3.0, 4.0],
        },
        index=list("abcd"),
    )
    with ensure_clean_path(setup_path) as path:
        df.to_hdf(path, "df")
        reread = read_hdf(path, "df")
        tm.assert_frame_equal(df, reread)

Example #31

0

Show file

File: hdf.py Project: Itay4/pandas

class HDF(BaseIO):

    params = ['table', 'fixed']
    param_names = ['format']

    def setup(self, format):
        self.fname = '__test__.h5'
        N = 100000
        C = 5
        self.df = DataFrame(np.random.randn(N, C),
                            columns=['float{}'.format(i) for i in range(C)],
                            index=date_range('20000101', periods=N, freq='H'))
        self.df['object'] = tm.makeStringIndex(N)
        self.df.to_hdf(self.fname, 'df', format=format)

    def time_read_hdf(self, format):
        read_hdf(self.fname, 'df')

    def time_write_hdf(self, format):
        self.df.to_hdf(self.fname, 'df', format=format)

Example #32

0

Show file

File: parse.py Project: cliburn/wide-format

    samples = []
    rows = []

    i = 0
    for i, line in enumerate(open(f)):
        if i < 10:
            continue
        snp, sample, g1, g2 = line.split('\t')[:4]
        genotype = ''.join([g1, g2])
        if sample == current_sample:
            rows.append((snp, genotype))
        else:
            print current_sample, i
            samples.append(current_sample)
            df = DataFrame(rows, columns=['SNP', current_sample])
            df.to_hdf(h5f, current_sample)
            rows = [(snp, genotype)]
            current_sample = sample

    # Part 2 - assemble wide format
    print 'Assembling wide format'
    df = pd.read_hdf(h5f, samples[0])
    for sample in samples[1:]:
        print sample
        df1 = pd.read_hdf(filename.replace('txt', 'h5'),sample)
        df = df.merge(df1, on='SNP')

    df.to_hdf(h5f, 'wide_format')
    print 'Wide format assembled'

Example #33

0

Show file

File: pandas_basic.py Project: CeasarSS/books

gdp.pct_change().tail()
gdp.pct_change(periods=4).tail() # Quarterly data, annual difference

state_gdp.to_excel('state_gdp_from_dataframe.xls')
state_gdp.to_excel('state_gdp_from_dataframe_sheetname.xls', sheet_name='State GDP')
state_gdp.to_excel('state_gdp_from_dataframe.xlsx')
state_gdp.to_csv('state_gdp_from_dataframe.csv')
sio = StringIO.StringIO()
state_gdp.to_json(sio)
sio.seek(0)
sio.buf[:50]

df = DataFrame(zeros((1000,1000)))
df.to_csv('size_test.csv')
df.to_hdf('size_test.h5','df') # h5 is the usual extension for HDF5
df.to_hdf('size_test_compressed.h5','df',complib='zlib',complevel=6)
f = gzip.open('size_test.csvz','w')
df.to_csv(f)
f.close()
df_from_csvz = read_csv('size_test.csvz',compression='gzip')

x = randn(100,100)
DataFrame(x).to_csv('numpy_array.csv',header=False,index=False)

codes = ['GDPC1','INDPRO','CPILFESL','UNRATE','GS10','GS1','BAA','AAA']
names = ['Real GDP','Industrial Production','Core CPI','Unemployment Rate',\
   '10 Year Yield','1 Year Yield','Baa Yield','Aaa Yield']
# r to disable escape
base_url = r'http://research.stlouisfed.org/fred2/data/'

Example #34

0

Show file

File: readRinexNav.py Project: januariocf/pyrinex

def readRinexNav(fn,odir=None):
    """
    Michael Hirsch
    It may actually be faster to read the entire file via f.read() and then .split()
    and asarray().reshape() to the final result, but I did it frame by frame.
    http://gage14.upc.es/gLAB/HTML/GPS_Navigation_Rinex_v2.11.html
    """
    fn = Path(fn).expanduser()
    if odir: odir = Path(odir).expanduser()
    
    startcol = 3 #column where numerical data starts
    nfloat=19 #number of text elements per float data number
    nline=7 #number of lines per record

    with fn.open('r') as f:
        #find end of header, which has non-constant length
        while True:
            if 'END OF HEADER' in f.readline(): break
        #handle frame by frame
        sv = []; epoch=[]; raws=''
        while True:
            headln = f.readline()
            if not headln: break
            #handle the header
            sv.append(headln[:2])
            year = int(headln[2:5])
            if 80<= year <=99:
                year+=1900
            elif year<80: #good till year 2180
                year+=2000
            epoch.append(datetime(year =year,
                                  month   =int(headln[5:8]),
                                  day     =int(headln[8:11]),
                                  hour    =int(headln[11:14]),
                                  minute  =int(headln[14:17]),
                                  second  =int(headln[17:20]),
                                  microsecond=int(headln[21])*100000))
            """
            now get the data.
            Use rstrip() to chomp newlines consistently on Windows and Python 2 & Python 3
            Specifically [:-1] doesn't work consistently on multi-platform line endings
            """
            raw = (headln[22:].rstrip() +
                    ''.join(f.readline()[startcol:].rstrip() for _ in range(nline)))
            raws += raw + '\n'

    raws = raws.replace('D','E')

    strio = BytesIO(raws.encode())
    darr = np.genfromtxt(strio,delimiter=nfloat)

    nav= DataFrame(np.hstack((np.asarray(sv,int)[:,None],darr)), epoch,
               ['sv','SVclockBias','SVclockDrift','SVclockDriftRate','IODE',
                'Crs','DeltaN','M0','Cuc','Eccentricity','Cus','sqrtA','TimeEph',
                'Cic','OMEGA','CIS','Io','Crc','omega','OMEGA DOT','IDOT',
                'CodesL2','GPSWeek','L2Pflag','SVacc','SVhealth','TGD','IODC',
                'TransTime','FitIntvl'])

    if odir:
        h5fn = odir/fn.name.with_suffix('.h5')
        print('saving NAV data to {}'.format(h5fn))
        nav.to_hdf(h5fn,key='NAV',mode='a',complevel=6,append=False)

    return nav

Example #35

0

Show file

File: pandas_basic.py Project: Hillash/PythonDataAnalysis

gdp.pct_change().tail()
gdp.pct_change(periods=4).tail()  # Quarterly data, annual difference

state_gdp.to_excel("state_gdp_from_dataframe.xls")
state_gdp.to_excel("state_gdp_from_dataframe_sheetname.xls", sheet_name="State GDP")
state_gdp.to_excel("state_gdp_from_dataframe.xlsx")
state_gdp.to_csv("state_gdp_from_dataframe.csv")
sio = StringIO.StringIO()
state_gdp.to_json(sio)
sio.seek(0)
sio.buf[:50]

df = DataFrame(zeros((1000, 1000)))
df.to_csv("size_test.csv")
df.to_hdf("size_test.h5", "df")  # h5 is the usual extension for HDF5
df.to_hdf("size_test_compressed.h5", "df", complib="zlib", complevel=6)
f = gzip.open("size_test.csvz", "w")
df.to_csv(f)
f.close()
df_from_csvz = read_csv("size_test.csvz", compression="gzip")

x = randn(100, 100)
DataFrame(x).to_csv("numpy_array.csv", header=False, index=False)

codes = ["GDPC1", "INDPRO", "CPILFESL", "UNRATE", "GS10", "GS1", "BAA", "AAA"]
names = [
    "Real GDP",
    "Industrial Production",
    "Core CPI",
    "Unemployment Rate",