Example #1
0
 def save_history(self, history_dir):
     accuracy = {'Training': self.history.history['accuracy'],
                 'Validation': self.history.history['val_accuracy']}
     loss = {'Training': self.history.history['loss'],
             'Validation': self.history.history['val_loss']}
     DataFrame.to_hdf(DataFrame.from_dict(accuracy),
                      path_or_buf=f"{history_dir}/accuracy.hdf5",
                      key='history',
                      mode='w')
     DataFrame.to_hdf(DataFrame.from_dict(loss),
                      path_or_buf=f"{history_dir}/loss.hdf5",
                      key='history',
                      mode='w')
Example #2
0
def test_convert_value(setup_path, where: str, df: DataFrame, expected: DataFrame):
    # GH39420
    # Check that read_hdf with categorical columns can filter by where condition.
    df.col = df.col.astype("category")
    max_widths = {"col": 1}
    categorical_values = sorted(df.col.unique())
    expected.col = expected.col.astype("category")
    expected.col.cat.set_categories(categorical_values, inplace=True)

    with ensure_clean_path(setup_path) as path:
        df.to_hdf(path, "df", format="table", min_itemsize=max_widths)
        result = read_hdf(path, where=where)
        tm.assert_frame_equal(result, expected)
Example #3
0
def test_read_from_pathlib_path(setup_path):

    # GH11773
    expected = DataFrame(np.random.rand(4, 5),
                         index=list("abcd"),
                         columns=list("ABCDE"))
    with ensure_clean_path(setup_path) as filename:
        path_obj = Path(filename)

        expected.to_hdf(path_obj, "df", mode="a")
        actual = read_hdf(path_obj, "df")

    tm.assert_frame_equal(expected, actual)
Example #4
0
def test_read_nokey_table(setup_path):
    # GH13231
    df = DataFrame({"i": range(5), "c": Series(list("abacd"), dtype="category")})

    with ensure_clean_path(setup_path) as path:
        df.to_hdf(path, "df", mode="a", format="table")
        reread = read_hdf(path)
        tm.assert_frame_equal(df, reread)
        df.to_hdf(path, "df2", mode="a", format="table")

        msg = "key must be provided when HDF5 file contains multiple datasets."
        with pytest.raises(ValueError, match=msg):
            read_hdf(path)
Example #5
0
def test_read_hdf_iterator(setup_path):
    df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))
    df.index.name = "letters"
    df = df.set_index(keys="E", append=True)

    with ensure_clean_path(setup_path) as path:
        df.to_hdf(path, "df", mode="w", format="t")
        direct = read_hdf(path, "df")
        iterator = read_hdf(path, "df", iterator=True)
        assert isinstance(iterator, TableIterator)
        indirect = next(iterator.__iter__())
        tm.assert_frame_equal(direct, indirect)
        iterator.store.close()
Example #6
0
def opn(info, lines):
    store, parse, path, *_ = info
    lst = []
    for line in lines:
        tokens = line.split()
        if tokens[0] == 'INGRP' and tokens[1] == 'INDELT':
            s = tokens[2].split(':')
            indelt = int(s[0]) if len(s) == 1 else 60 * int(s[0]) + int(s[1])
        elif tokens[0] in ops:
            s = f'{tokens[0][0]}{int(tokens[1]):03d}'
            lst.append((tokens[0], s, indelt))
    dfopn = DataFrame(lst, columns=['OPERATION', 'SEGMENT', 'INDELT_minutes'])
    dfopn.to_hdf(store, '/CONTROL/OP_SEQUENCE', data_columns=True)
Example #7
0
def _store_table_to_single_subset(tbl: pd.DataFrame,
                                  fn: str,
                                  path: str,
                                  subset: str,
                                  format: str = 'nexus'):
    """
    Helper function. Internal use only.
    """

    tbl_path = path.replace('%', subset)
    if format == 'table':
        try:
            tbl.to_hdf(fn, tbl_path, format='table', data_columns=True)
        except ValueError:
            tbl.to_hdf(fn, tbl_path, format='table')

    elif format == 'nexus':
        with h5py.File(fn, 'a') as fh:
            for key, val in tbl.iteritems():
                #print(f'Storing {key} ({val.shape}, {val.dtype}) to {fn}: {path}')
                grp = fh.require_group(tbl_path)
                grp.attrs['NX_class'] = 'NXcollection'
                k = key.replace('/', '_').replace('.', ' ')
                try:
                    if k not in grp:
                        ds = grp.require_dataset(k,
                                                 shape=val.shape,
                                                 dtype=val.dtype,
                                                 maxshape=(None, ))
                    else:
                        ds = grp[k]
                        if ds.shape[0] != val.shape[0]:
                            ds.resize(val.shape[0], axis=0)
                            #print('resizing', k)
                    ds[:] = val
                except (TypeError, OSError) as err:
                    if val.dtype == 'O':
                        val2 = val.astype('S')
                        if k in grp:
                            del grp[k]
                        ds = grp.require_dataset(k,
                                                 shape=val.shape,
                                                 dtype=val2.dtype,
                                                 maxshape=(None, ))
                        ds[:] = val2
                    else:
                        raise err

                ds.attrs['label'] = key
    else:
        raise ValueError('Storage format must be "table" or "nexus".')
def export_df(df: pd.DataFrame, export_path: str,
              export_format: str = "pickle",
              hdf_key: Optional[str] = None
              ) -> None:
    """
    Exports the dataframe in a variety of formats. Dataframe is expected to have columns `transcript` and `index`

    :param df: The dataframe to be exported
    :param export_path: Where to write the exported dataframe
    :param export_format: One of "html", "csv", "json", "parquet", "pickle", "hdf"
    :param hdf_key: if hdf format is used, key to store the df under in the HDF5 file
    """
    _, ext = os.path.splitext(export_path)z
    use_ext = '.' + export_format if len(ext) == 0 else ''
    if export_format == "html":
        color_dict = {
            'POSITIVE': 'limegreen',
            'NEGATIVE': 'red',
            'NEUTRAL': 'lightgrey',
            'MIXED': 'yellow'
        }
        spk_dict = {
            'ch_0': '#F0F8FF',
            'spk_0': '#F0F8FF',
            'ch_1': '#FFF8DC',
            'spk_1': '#FFF8DC'
        }
        df.set_index(
            ['transcript', 'recording', 'speaker', 'index']
        ).to_html(export_path + use_ext, encoding='utf-8',
                  formatters={
                      'pred_sent': lambda sent: f'<span style="background-color:{color_dict[sent]}">{sent}</span>',
                      'speaker': lambda speaker: f'<span style="background-color:{spk_dict[speaker]}">{speaker}</span>'
                  }, escape=False)
    elif export_format == "csv":
        df.to_csv(export_path + use_ext)
    elif export_format == "json":
        df.to_json(export_path + use_ext)
    elif export_format == "parquet":
        df.to_parquet(export_path + use_ext)
    elif export_format == "pickle":
        df.to_pickle(export_path + use_ext)
    elif export_format == "excel":
        if len(use_ext) > 0:
            use_ext = '.xlsx'
        df.to_excel(export_path + use_ext, index=False)
    elif export_format == "hdf":
        assert hdf_key is not None, "Parameter hdf_key must be informed if export format is hdf."
        df.to_hdf(export_path + use_ext, hdf_key)
    else:
        raise ValueError(f"Unknown export format: {export_format}")
Example #9
0
def test_read_missing_key_opened_store(setup_path):
    # GH 28699
    with ensure_clean_path(setup_path) as path:
        df = DataFrame({"a": range(2), "b": range(2)})
        df.to_hdf(path, "k1")

        with HDFStore(path, "r") as store:

            with pytest.raises(KeyError, match="'No object named k2 in the file'"):
                read_hdf(store, "k2")

            # Test that the file is still open after a KeyError and that we can
            # still read from it.
            read_hdf(store, "k1")
Example #10
0
def save_timeseries(store, ts, savedict, siminfo, saveall, operation, segment,
                    activity):
    # save computed timeseries (at computation DELT)
    save = {k for k, v in savedict.items() if v or saveall}
    df = DataFrame(index=siminfo['tindex'])
    for y in (save & set(ts.keys())):
        df[y] = ts[y]
    df = df.astype(float32).sort_index(axis='columns')
    path = f'RESULTS/{operation}_{segment}/{activity}'
    if not df.empty:
        df.to_hdf(store, path, complib='blosc', complevel=9)
    else:
        print('Save DataFrame Empty for', path)
    return
    def save_dataframe(path: str, dataframe: pd.DataFrame, metadata: Optional[dict] = None,
                       metadata_method: str = 'json', raise_meta_fail: bool = True):
        """
        Save DataFrame to hdf5 file along with a meta data dict.

        Meta data dict can either be serialized with json and stored as a str in the hdf5 file, or recursively saved
        into hdf5 groups if the dict contains types that hdf5 can deal with. Experiment with both methods and see what works best

        Currently the hdf5 method can work with these types: [str, bytes, int, float, np.int, np.int8, np.int16,
        np.int32, np.int64, np.float, np.float16, np.float32, np.float64, np.float128, np.complex].

        If it encounters an object that is not of these types it will store whatever that object's __str__() method
        returns if on_meta_fail is False, else it will raise an exception.

        :param path:            path to save the file to
        :param dataframe:       DataFrame to save in the hdf5 file
        :param metadata:        Any associated meta data to store along with the DataFrame in the hdf5 file
        :param metadata_method: method for storing the metadata dict, either 'json' or 'recursive'
        :param raise_meta_fail: raise an exception if recursive metadata saving encounters an unsupported object
        """
        if os.path.isfile(path):
            raise FileExistsError

        f = h5py.File(path, mode='w')

        f.create_group('DATAFRAME')

        if metadata is not None:
            mg = f.create_group('META')
            mg.attrs['method'] = metadata_method

            if metadata_method == 'json':
                bad_keys = []
                for k in metadata.keys():
                    try:
                        mg.create_dataset(k, data=json.dumps(metadata[k]))
                    except TypeError as e:
                        bad_keys.append(str(e))

                if len(bad_keys) > 0:
                    bad_keys = '\n'.join(bad_keys)
                    raise TypeError(f"The following meta data keys are not JSON serializable\n{bad_keys}")


            elif metadata_method == 'recursive':
                HdfTools._dicts_to_group(h5file=f, path='META/', d=metadata, raise_meta_fail=raise_meta_fail)

        f.close()

        dataframe.to_hdf(path, key='DATAFRAME', mode='r+')
def test_retain_index_attributes2(setup_path):
    with ensure_clean_path(setup_path) as path:

        with catch_warnings(record=True):

            df = DataFrame({
                "A":
                Series(range(3),
                       index=date_range("2000-1-1", periods=3, freq="H"))
            })
            df.to_hdf(path, "data", mode="w", append=True)
            df2 = DataFrame({
                "A":
                Series(range(3),
                       index=date_range("2002-1-1", periods=3, freq="D"))
            })

            df2.to_hdf(path, "data", append=True)

            idx = date_range("2000-1-1", periods=3, freq="H")
            idx.name = "foo"
            df = DataFrame({"A": Series(range(3), index=idx)})
            df.to_hdf(path, "data", mode="w", append=True)

        assert read_hdf(path, "data").index.name == "foo"

        with catch_warnings(record=True):

            idx2 = date_range("2001-1-1", periods=3, freq="H")
            idx2.name = "bar"
            df2 = DataFrame({"A": Series(range(3), index=idx2)})
            df2.to_hdf(path, "data", append=True)

        assert read_hdf(path, "data").index.name is None
Example #13
0
def test_duplicate_column_name(setup_path):
    df = DataFrame(columns=["a", "a"], data=[[0, 0]])

    with ensure_clean_path(setup_path) as path:
        msg = "Columns index has to be unique for fixed format"
        with pytest.raises(ValueError, match=msg):
            df.to_hdf(path, "df", format="fixed")

        df.to_hdf(path, "df", format="table")
        other = read_hdf(path, "df")

        tm.assert_frame_equal(df, other)
        assert df.equals(other)
        assert other.equals(df)
Example #14
0
def global_(info, lines):
    store, parse, path, *_ = info
    d = parseD(lines[1], parse['GLOBAL', 'START'])
    start = str(
        Timestamp(f"{d['SYR']}-{d['SMO']}-{d['SDA']}") +
        Timedelta(int(d['SHR']), 'h') + Timedelta(int(d['SMI']), 'T'))[0:16]
    stop = str(
        Timestamp(f"{d['EYR']}-{d['EMO']}-{d['EDA']}") +
        Timedelta(int(d['EHR']), 'h') + Timedelta(int(d['EMI']), 'T'))[0:16]
    data = [lines[0].strip(), start, stop]
    dfglobal = DataFrame(data,
                         index=['Comment', 'Start', 'Stop'],
                         columns=['Info'])
    dfglobal.to_hdf(store, '/CONTROL/GLOBAL', data_columns=True)
Example #15
0
def test_read_nokey(setup_path):
    # GH10443
    df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))

    # Categorical dtype not supported for "fixed" format. So no need
    # to test with that dtype in the dataframe here.
    with ensure_clean_path(setup_path) as path:
        df.to_hdf(path, "df", mode="a")
        reread = read_hdf(path)
        tm.assert_frame_equal(df, reread)
        df.to_hdf(path, "df2", mode="a")

        msg = "key must be provided when HDF5 file contains multiple datasets."
        with pytest.raises(ValueError, match=msg):
            read_hdf(path)
Example #16
0
def test_read_hdf_open_store(setup_path):
    # GH10330
    # No check for non-string path_or-buf, and no test of open store
    df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))
    df.index.name = "letters"
    df = df.set_index(keys="E", append=True)

    with ensure_clean_path(setup_path) as path:
        df.to_hdf(path, "df", mode="w")
        direct = read_hdf(path, "df")
        store = HDFStore(path, mode="r")
        indirect = read_hdf(store, "df")
        tm.assert_frame_equal(direct, indirect)
        assert store.is_open
        store.close()
Example #17
0
def make_runlist(store, doe, doename):
    df = DataFrame(doe, columns=['Run', 'DataPath', 'Segment', 'Name', 'Value'])
    df.to_hdf(store, f'{doename}/DoE', format='t', data_columns=True
             )
    rundict = defaultdict(defaultdict)
    for line in doe:
        run, path, segment, name, value = line[:]
        operation, module, *temp = path.split(sep='/', maxsplit=3)
        table = '_'.join(temp)
        runstr = f'{run}'

        if (operation, module, segment) not in rundict[runstr]:
            rundict[runstr][operation, module, segment] = defaultdict(dict)
        rundict[runstr][operation, module, segment][table]  [name] = float(value)
    return rundict
Example #18
0
def ftables(info, llines):
    store, parse, path, *_ = info
    header=['Depth','Area','Volume','Disch1','Disch2','Disch3','Disch4','Disch5']
    lines = iter(llines)
    for line in lines:
        if line[2:8] == 'FTABLE':
            unit = int(line[8:])
            name = f'FT{unit:03d}'
            rows,cols = next(lines).split()
            lst = []
        elif line[2:5] == 'END':
            dfftable = DataFrame(lst, columns=header[0:int(cols)])
            dfftable.to_hdf(store, f'/FTABLES/{name}', data_columns=True)
        else:
            lst.append(parseD(line, parse['FTABLES','FTABLE']))
Example #19
0
    def store_dataframe(self, name: str, df: pd.DataFrame) -> None:
        """Serializes a dataframe in h5 format.

        Args:
            name: name of the file we want to save

        Returns:
            None
        """
        log = self.log
        name = name + ".h5"
        log.debug(f"{name=}")
        df.to_hdf(name, key="df", mode="w")

        return None
Example #20
0
def masslink(info, lines):
    store, parse, path, *_ = info
    lst = []
    for line in lines:
        if line[2:11] == 'MASS-LINK':
            name = line[12:].rstrip()
        elif line[2:5] != 'END':
            d = parseD(line, parse['MASS-LINK','na'])
            d['MLNO'] = f'ML{int(name):03d}'
            lst.append(d)
    if lst:
        dfmasslink = DataFrame(lst, columns=d).replace('na','')
        del dfmasslink['TGRPN']
        dfmasslink['COMMENTS'] = ''
        dfmasslink.to_hdf(store, '/CONTROL/MASS_LINKS', data_columns=True)
Example #21
0
def gener(info, lines):
    store, parse, path, *_ = info
    lst = []
    sub_blocks = ['OPCODE','PARM']
    current_block  = ''
    d = {}
    for line in lines:
            if line [2:5] == 'END':
                df = DataFrame(lst, columns=d)
                df.to_hdf(store, key=f'GENER/{current_block}', data_columns=True)
                lst.clear()
            elif any(s in line for s in sub_blocks):
                current_block = [s for s in sub_blocks if s in line][0]
            else:
                d = parseD(line, parse['GENER',current_block])
                lst.append(d)
Example #22
0
def write_df_2_h5(df: pd.DataFrame, save_path: 'hdf5', key_name):
    """
    将报表从df格式转换成h5的格式,方便的存取,
    :param df:报表的df格式数据
    :param key_name:报表的df的命名
    :param save_path:保存的路径(.hdf5文件)
    :return:
    """
    if public_function.detect_df(df) is False:
        return None
    if '.hdf5' not in save_path:
        raise ValueError(f"{save_path} is not a h5(.hdf5) file.")
    h5_save_folder = os.path.dirname(save_path)
    if not os.path.exists(h5_save_folder):
        os.makedirs(h5_save_folder)
    df.to_hdf(save_path, key=key_name, mode='a')
Example #23
0
 def write_ts(self, data_frame: pd.DataFrame, category: Category,
              operation: str, segment: str, activity: str, *args: Any,
              **kwargs: Any) -> None:
     """Saves timeseries to HDF5"""
     path = f'{operation}_{segment}/{activity}'
     if category:
         path = 'RESULTS/' + path
     complevel = None
     if 'compress' in kwargs:
         if kwargs['compress']:
             complevel = 9
     data_frame.to_hdf(self._store,
                       path,
                       format='t',
                       data_columns=True,
                       complevel=complevel)
Example #24
0
def test_read_hdf_errors(setup_path):
    df = DataFrame(np.random.rand(4, 5),
                   index=list("abcd"),
                   columns=list("ABCDE"))

    with ensure_clean_path(setup_path) as path:
        msg = r"File [\S]* does not exist"
        with pytest.raises(IOError, match=msg):
            read_hdf(path, "key")

        df.to_hdf(path, "df")
        store = HDFStore(path, mode="r")
        store.close()

        msg = "The HDFStore must be open for reading."
        with pytest.raises(IOError, match=msg):
            read_hdf(store, "df")
Example #25
0
    def insert(self, symbol: str, ts: BiTimestamp, ticks: pd.DataFrame):
        self._check_closed('insert')
        as_at_date = ts.as_at()

        # compose a splay path based on YYYY/MM/DD, symbol and version and pass in as a functor
        # so it can be populated with the bitemporal version
        def create_write_path(version):
            return self.base_path.joinpath('{}/{:02d}/{:02d}/{}_{:04d}.h5'.format(as_at_date.year,
                                                                                  as_at_date.month,
                                                                                  as_at_date.day,
                                                                                  symbol, version))

        write_path = self.index.insert(symbol, as_at_date, create_write_path)

        # do the tick write, with blosc compression
        write_path.parent.mkdir(parents=True, exist_ok=True)
        ticks.to_hdf(str(write_path), 'ticks', mode='w', append=False, complevel=9, complib='blosc')
Example #26
0
    def run(self, features: pd.DataFrame, parent: FileAdapter) -> FileAdapter:

        if features.empty:
            raise SoftPreconditionFailed('Input features are empty')

        output_file = self.default_outputs()
        features = extract_meta_features(features, config=meta_survey_config)
        if not features.empty:
            features.loc[:, 'file_id'] = parent.id
        self.logger.debug('Obtained %d survey/meta features',
                          features.shape[0])

        with pd.HDFStore(output_file.file, 'w') as store:
            features.to_hdf(store, self.output_hdf5_key)
        deep_update(output_file.metadata,
                    {'standard': infer_standard_groups(output_file.file_str)})
        return output_file
Example #27
0
def store_voxels_ana_data(file_name: str, group_name: str,
                          voxels_df: pd.DataFrame,
                          voxels_dict: Dict[str, List[Any]]) -> None:
    """
    Adds the voxels new data (coming in voxels_dict), to the pre-existing data
    only for the corresponding voxels (those whose voxel_id's are listed in the
    incoming dict).
    Then dataFrame is stored in	file_name / group_name / voxels.
	"""
    voxels_df.loc[voxels_dict['indexes'], 'newE'] = voxels_dict['newE']
    voxels_df.loc[voxels_dict['indexes'], 'track_id'] = voxels_dict['trackID']
    #voxels_df.to_hdf(file_name, group_name + '/voxels', format='table',
    #                 data_columns='evt_id')
    voxels_df.to_hdf(file_name,
                     group_name + '/voxels',
                     format='table',
                     data_columns=True)
def test_categorical_nan_only_columns(setup_path):
    # GH18413
    # Check that read_hdf with categorical columns with NaN-only values can
    # be read back.
    df = DataFrame({
        "a": ["a", "b", "c", np.nan],
        "b": [np.nan, np.nan, np.nan, np.nan],
        "c": [1, 2, 3, 4],
        "d": Series([None] * 4, dtype=object),
    })
    df["a"] = df.a.astype("category")
    df["b"] = df.b.astype("category")
    df["d"] = df.b.astype("category")
    expected = df
    with ensure_clean_path(setup_path) as path:
        df.to_hdf(path, "df", format="table", data_columns=True)
        result = read_hdf(path, "df")
        tm.assert_frame_equal(result, expected)
Example #29
0
def rw(format: str, path: Path, df: pd.DataFrame):
    """Write a dataframe to <path> with extension <format>.
    #   str <format> : file format for writing (either .csv or .h5)
    #   posix path <path> : path for writing
    #   pandas DataFrame <df> : dataframe to write
    """
    if format == ".csv":

        df.to_csv(path, mode="w")

    elif format == ".h5":

        df.to_hdf(path, key=df, mode="w")

    else:

        raise IllegalArgumentError(
            "Must specify either .csv or .h5 as file format."
        )
Example #30
0
def test_complex_mixed_fixed(setup_path):
    complex64 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
                         dtype=np.complex64)
    complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
                          dtype=np.complex128)
    df = DataFrame(
        {
            "A": [1, 2, 3, 4],
            "B": ["a", "b", "c", "d"],
            "C": complex64,
            "D": complex128,
            "E": [1.0, 2.0, 3.0, 4.0],
        },
        index=list("abcd"),
    )
    with ensure_clean_path(setup_path) as path:
        df.to_hdf(path, "df")
        reread = read_hdf(path, "df")
        tm.assert_frame_equal(df, reread)
Example #31
0
File: hdf.py Project: Itay4/pandas
class HDF(BaseIO):

    params = ['table', 'fixed']
    param_names = ['format']

    def setup(self, format):
        self.fname = '__test__.h5'
        N = 100000
        C = 5
        self.df = DataFrame(np.random.randn(N, C),
                            columns=['float{}'.format(i) for i in range(C)],
                            index=date_range('20000101', periods=N, freq='H'))
        self.df['object'] = tm.makeStringIndex(N)
        self.df.to_hdf(self.fname, 'df', format=format)

    def time_read_hdf(self, format):
        read_hdf(self.fname, 'df')

    def time_write_hdf(self, format):
        self.df.to_hdf(self.fname, 'df', format=format)
Example #32
0
    samples = []
    rows = []

    i = 0
    for i, line in enumerate(open(f)):
        if i < 10:
            continue
        snp, sample, g1, g2 = line.split('\t')[:4]
        genotype = ''.join([g1, g2])
        if sample == current_sample:
            rows.append((snp, genotype))
        else:
            print current_sample, i
            samples.append(current_sample)
            df = DataFrame(rows, columns=['SNP', current_sample])
            df.to_hdf(h5f, current_sample)
            rows = [(snp, genotype)]
            current_sample = sample

    # Part 2 - assemble wide format
    print 'Assembling wide format'
    df = pd.read_hdf(h5f, samples[0])
    for sample in samples[1:]:
        print sample
        df1 = pd.read_hdf(filename.replace('txt', 'h5'),sample)
        df = df.merge(df1, on='SNP')

    df.to_hdf(h5f, 'wide_format')
    print 'Wide format assembled'

Example #33
0
gdp.pct_change().tail()
gdp.pct_change(periods=4).tail() # Quarterly data, annual difference

state_gdp.to_excel('state_gdp_from_dataframe.xls')
state_gdp.to_excel('state_gdp_from_dataframe_sheetname.xls', sheet_name='State GDP')
state_gdp.to_excel('state_gdp_from_dataframe.xlsx')
state_gdp.to_csv('state_gdp_from_dataframe.csv')
sio = StringIO.StringIO()
state_gdp.to_json(sio)
sio.seek(0)
sio.buf[:50]

df = DataFrame(zeros((1000,1000)))
df.to_csv('size_test.csv')
df.to_hdf('size_test.h5','df') # h5 is the usual extension for HDF5
df.to_hdf('size_test_compressed.h5','df',complib='zlib',complevel=6)
f = gzip.open('size_test.csvz','w')
df.to_csv(f)
f.close()
df_from_csvz = read_csv('size_test.csvz',compression='gzip')

x = randn(100,100)
DataFrame(x).to_csv('numpy_array.csv',header=False,index=False)

codes = ['GDPC1','INDPRO','CPILFESL','UNRATE','GS10','GS1','BAA','AAA']
names = ['Real GDP','Industrial Production','Core CPI','Unemployment Rate',\
   '10 Year Yield','1 Year Yield','Baa Yield','Aaa Yield']
# r to disable escape
base_url = r'http://research.stlouisfed.org/fred2/data/'
Example #34
0
def readRinexNav(fn,odir=None):
    """
    Michael Hirsch
    It may actually be faster to read the entire file via f.read() and then .split()
    and asarray().reshape() to the final result, but I did it frame by frame.
    http://gage14.upc.es/gLAB/HTML/GPS_Navigation_Rinex_v2.11.html
    """
    fn = Path(fn).expanduser()
    if odir: odir = Path(odir).expanduser()
    
    startcol = 3 #column where numerical data starts
    nfloat=19 #number of text elements per float data number
    nline=7 #number of lines per record

    with fn.open('r') as f:
        #find end of header, which has non-constant length
        while True:
            if 'END OF HEADER' in f.readline(): break
        #handle frame by frame
        sv = []; epoch=[]; raws=''
        while True:
            headln = f.readline()
            if not headln: break
            #handle the header
            sv.append(headln[:2])
            year = int(headln[2:5])
            if 80<= year <=99:
                year+=1900
            elif year<80: #good till year 2180
                year+=2000
            epoch.append(datetime(year =year,
                                  month   =int(headln[5:8]),
                                  day     =int(headln[8:11]),
                                  hour    =int(headln[11:14]),
                                  minute  =int(headln[14:17]),
                                  second  =int(headln[17:20]),
                                  microsecond=int(headln[21])*100000))
            """
            now get the data.
            Use rstrip() to chomp newlines consistently on Windows and Python 2 & Python 3
            Specifically [:-1] doesn't work consistently on multi-platform line endings
            """
            raw = (headln[22:].rstrip() +
                    ''.join(f.readline()[startcol:].rstrip() for _ in range(nline)))
            raws += raw + '\n'

    raws = raws.replace('D','E')

    strio = BytesIO(raws.encode())
    darr = np.genfromtxt(strio,delimiter=nfloat)

    nav= DataFrame(np.hstack((np.asarray(sv,int)[:,None],darr)), epoch,
               ['sv','SVclockBias','SVclockDrift','SVclockDriftRate','IODE',
                'Crs','DeltaN','M0','Cuc','Eccentricity','Cus','sqrtA','TimeEph',
                'Cic','OMEGA','CIS','Io','Crc','omega','OMEGA DOT','IDOT',
                'CodesL2','GPSWeek','L2Pflag','SVacc','SVhealth','TGD','IODC',
                'TransTime','FitIntvl'])

    if odir:
        h5fn = odir/fn.name.with_suffix('.h5')
        print('saving NAV data to {}'.format(h5fn))
        nav.to_hdf(h5fn,key='NAV',mode='a',complevel=6,append=False)

    return nav
gdp.pct_change().tail()
gdp.pct_change(periods=4).tail()  # Quarterly data, annual difference

state_gdp.to_excel("state_gdp_from_dataframe.xls")
state_gdp.to_excel("state_gdp_from_dataframe_sheetname.xls", sheet_name="State GDP")
state_gdp.to_excel("state_gdp_from_dataframe.xlsx")
state_gdp.to_csv("state_gdp_from_dataframe.csv")
sio = StringIO.StringIO()
state_gdp.to_json(sio)
sio.seek(0)
sio.buf[:50]

df = DataFrame(zeros((1000, 1000)))
df.to_csv("size_test.csv")
df.to_hdf("size_test.h5", "df")  # h5 is the usual extension for HDF5
df.to_hdf("size_test_compressed.h5", "df", complib="zlib", complevel=6)
f = gzip.open("size_test.csvz", "w")
df.to_csv(f)
f.close()
df_from_csvz = read_csv("size_test.csvz", compression="gzip")

x = randn(100, 100)
DataFrame(x).to_csv("numpy_array.csv", header=False, index=False)

codes = ["GDPC1", "INDPRO", "CPILFESL", "UNRATE", "GS10", "GS1", "BAA", "AAA"]
names = [
    "Real GDP",
    "Industrial Production",
    "Core CPI",
    "Unemployment Rate",