Python HDFStore.appendの例、pandas.HDFStore.append Pythonの例

コード例 #1

0

ファイルを表示

ファイル: kraken.py プロジェクト: portfolioscout/tf

def storeHdf5(data, tag, path):
    hdf = HDFStore(path,'a')
    if tag in hdf.keys():
        hdf.append(tag,data)
    else:
        hdf.put(tag,data)
    hdf.close()

コード例 #2

0

ファイルを表示

ファイル: hdf.py プロジェクト: zhuomingliang/pandas

class HDFStorePanel(BaseIO):

    goal_time = 0.2

    def setup(self):
        self.fname = '__test__.h5'
        with warnings.catch_warnings(record=True):
            self.p = Panel(np.random.randn(20, 1000, 25),
                           items=['Item%03d' % i for i in range(20)],
                           major_axis=date_range('1/1/2000', periods=1000),
                           minor_axis=['E%03d' % i for i in range(25)])
            self.store = HDFStore(self.fname)
            self.store.append('p1', self.p)

    def teardown(self):
        self.store.close()
        self.remove(self.fname)

    def time_read_store_table_panel(self):
        with warnings.catch_warnings(record=True):
            self.store.select('p1')

    def time_write_store_table_panel(self):
        with warnings.catch_warnings(record=True):
            self.store.append('p2', self.p)

コード例 #3

0

ファイルを表示

ファイル: hdf.py プロジェクト: bkandel/pandas

class HDFStorePanel(BaseIO):

    goal_time = 0.2

    def setup(self):
        self.fname = '__test__.h5'
        with warnings.catch_warnings(record=True):
            self.p = Panel(np.random.randn(20, 1000, 25),
                           items=['Item%03d' % i for i in range(20)],
                           major_axis=date_range('1/1/2000', periods=1000),
                           minor_axis=['E%03d' % i for i in range(25)])
            self.store = HDFStore(self.fname)
            self.store.append('p1', self.p)

    def teardown(self):
        self.store.close()
        self.remove(self.fname)

    def time_read_store_table_panel(self):
        with warnings.catch_warnings(record=True):
            self.store.select('p1')

    def time_write_store_table_panel(self):
        with warnings.catch_warnings(record=True):
            self.store.append('p2', self.p)

コード例 #4

0

ファイルを表示

def storeHdf5(data, tag, path):
    hdf = HDFStore(path, 'a')
    if tag in hdf.keys():
        hdf.append(tag, data)
    else:
        hdf.put(tag, data)
    hdf.close()

コード例 #5

0

ファイルを表示

ファイル: mtr.py プロジェクト: mpenning/pymtr

    def store_results(self, result, index, columns, hdf5_file):
        self.df = DataFrame(result, columns=columns)
        self.df = self.df.set_index(index)
        self.df.sort_index(inplace=True)

        # Store the DataFrame as an HDF5 file...
        hdf = HDFStore(hdf5_file)
        # Append the dataframe, and ensure addr / host can be 17 chars long
        hdf.append('df', self.df, data_columns=list(columns), 
            min_itemsize={'addr': 17, 'host': 17})
        hdf.close()

コード例 #6

0

ファイルを表示

ファイル: test_file_handling.py プロジェクト: Aathi410/Pro123

def test_complibs_default_settings(setup_path):
    # GH15943
    df = tm.makeDataFrame()

    # Set complevel and check if complib is automatically set to
    # default value
    with ensure_clean_path(setup_path) as tmpfile:
        df.to_hdf(tmpfile, "df", complevel=9)
        result = read_hdf(tmpfile, "df")
        tm.assert_frame_equal(result, df)

        with tables.open_file(tmpfile, mode="r") as h5file:
            for node in h5file.walk_nodes(where="/df", classname="Leaf"):
                assert node.filters.complevel == 9
                assert node.filters.complib == "zlib"

    # Set complib and check to see if compression is disabled
    with ensure_clean_path(setup_path) as tmpfile:
        df.to_hdf(tmpfile, "df", complib="zlib")
        result = read_hdf(tmpfile, "df")
        tm.assert_frame_equal(result, df)

        with tables.open_file(tmpfile, mode="r") as h5file:
            for node in h5file.walk_nodes(where="/df", classname="Leaf"):
                assert node.filters.complevel == 0
                assert node.filters.complib is None

    # Check if not setting complib or complevel results in no compression
    with ensure_clean_path(setup_path) as tmpfile:
        df.to_hdf(tmpfile, "df")
        result = read_hdf(tmpfile, "df")
        tm.assert_frame_equal(result, df)

        with tables.open_file(tmpfile, mode="r") as h5file:
            for node in h5file.walk_nodes(where="/df", classname="Leaf"):
                assert node.filters.complevel == 0
                assert node.filters.complib is None

    # Check if file-defaults can be overridden on a per table basis
    with ensure_clean_path(setup_path) as tmpfile:
        store = HDFStore(tmpfile)
        store.append("dfc", df, complevel=9, complib="blosc")
        store.append("df", df)
        store.close()

        with tables.open_file(tmpfile, mode="r") as h5file:
            for node in h5file.walk_nodes(where="/df", classname="Leaf"):
                assert node.filters.complevel == 0
                assert node.filters.complib is None
            for node in h5file.walk_nodes(where="/dfc", classname="Leaf"):
                assert node.filters.complevel == 9
                assert node.filters.complib == "blosc"

コード例 #7

0

ファイルを表示

ファイル: DataTil.py プロジェクト: TaxIPP-Life/til-core

    def store_to_liam(self):
        '''
        Sauvegarde des données au format utilisé ensuite par le modèle Til
        Séléctionne les variables appelée par Til derrière
        Appelle des fonctions de Liam2
        '''
        path = self._output_name()
        h5file = tables.openFile(path, mode="w")

        ent_node = h5file.createGroup("/", "entities", "Entities")
        for ent_name in ['ind', 'foy', 'men', 'futur', 'past']:
            entity = eval('self.' + ent_name)
            if entity is not None:
                entity = entity.fillna(-1)
                try:
                    ent_table = entity.to_records(index=False)
                except:
                    pdb.set_trace()
                dtypes = ent_table.dtype
                final_name = of_name_to_til[ent_name]
                try:
                    table = h5file.createTable(ent_node, final_name, dtypes, title="%s table" % final_name)
                    table.append(ent_table)
                except:
                    pdb.set_trace()
                table.flush()

                if ent_name == 'men':
                    entity = entity.loc[entity['id']>-1]
                    ent_table2 = entity[['pond', 'id', 'period']].to_records(index=False)
                    dtypes2 = ent_table2.dtype
                    table = h5file.createTable(ent_node, 'companies', dtypes2, title="'companies table")
                    table.append(ent_table2)
                    table.flush()
                if ent_name == 'ind':
                    ent_table2 = entity[['agem', 'sexe', 'pere', 'mere', 'id', 'findet', 'period']].to_records(
                        index = False)
                    dtypes2 = ent_table2.dtype
                    table = h5file.createTable(ent_node, 'register', dtypes2, title="register table")
                    table.append(ent_table2)
                    table.flush()
        h5file.close()

        # 3 - table longitudinal
        # Note: on conserve le format pandas ici
        store = HDFStore(path)
        for varname, table in self.longitudinal.iteritems():
            table['id'] = table.index
            store.append('longitudinal/' + varname, table)
        store.close()

コード例 #8

0

ファイルを表示

    def store_results(self, result, index, columns, hdf5_file):
        self.df = DataFrame(result, columns=columns)
        self.df = self.df.set_index(index)
        self.df.sort_index(inplace=True)

        # Store the DataFrame as an HDF5 file...
        hdf = HDFStore(hdf5_file)
        # Append the dataframe, and ensure addr / host can be 17 chars long
        hdf.append('df',
                   self.df,
                   data_columns=list(columns),
                   min_itemsize={
                       'addr': 17,
                       'host': 17
                   })
        hdf.close()

コード例 #9

0

ファイルを表示

ファイル: funcs4pf.py プロジェクト: tsherwen/AC_tools

def pf2pandas(wd, files, vars=None, npwd=None, rmvars=None,   \
            debug=False):
    """ 
    Read in GEOS-Chem planeflight output and convert to HDF format

     - Converts date and time columns to datetime format indexes
     - the resultant HDF is in 2D list form 
    ( aka further processing required to 3D /2D output  )
        
    Note: 
     - This function is limited by the csv read speed. for large csv output expect 
     significant processing times or set to automatically run post run
     - Original files are not removed, so this function will double space usage for 
     output unless the original fiels are deleted.
    """

    # Ensure working dorectory string has leading foreward slash
    if wd[-1] != '/':
        wd += '/'

#    pfdate =( re.findall('\d+', file ) )[-1]
    if not isinstance(vars, list ):
        vars, sites = get_pf_headers( files[0], debug=debug )
    if not isinstance(npwd, str ):
        npwd = get_dir('npwd')
    hdf =HDFStore( npwd+ 'pf_{}_{}.h5'.format( wd.split('/')[-3], \
        wd.split('/')[-2], wd.split('/')[-1]  ))
    
    if debug:
        print hdf

    for file in files:
        print file#, pfdate

        # convert planeflight.log to DataFrame
        df = pf_csv2pandas( file, vars )
            
        if file==files[0]:
            hdf.put('d1', df, format='table', data_columns=True)
        else:
            hdf.append('d1', df, format='table', data_columns=True)

        if debug:
            print hdf['d1'].shape, hdf['d1'].index
        del df
    hdf.close()

コード例 #10

0

ファイルを表示

ファイル: funcs4pf.py プロジェクト: BenNewsome/AC_tools

def pf2pandas(wd, files, vars=None, npwd=None, rmvars=None,   \
            debug=False):
    """ 
    Read in GEOS-Chem planeflight output and convert to HDF format

     - Converts date and time columns to datetime format indexes
     - the resultant HDF is in 2D list form 
    ( aka further processing required to 3D /2D output  )
        
    Note: 
     - This function is limited by the csv read speed. for large csv output expect 
     significant processing times or set to automatically run post run
     - Original files are not removed, so this function will double space usage for 
     output unless the original fiels are deleted.
    """

    # Ensure working dorectory string has leading foreward slash
    if wd[-1] != '/':
        wd += '/'

#    pfdate =( re.findall('\d+', file ) )[-1]
    if not isinstance(vars, list ):
        vars, sites = get_pf_headers( files[0], debug=debug )
    if not isinstance(npwd, str ):
        npwd = get_dir('npwd')
    hdf =HDFStore( npwd+ 'pf_{}_{}.h5'.format( wd.split('/')[-3], \
        wd.split('/')[-2], wd.split('/')[-1]  ))
    
    if debug:
        print hdf

    for file in files:
        print file#, pfdate

        # convert planeflight.log to DataFrame
        df = pf_csv2pandas( file, vars )
            
        if file==files[0]:
            hdf.put('d1', df, format='table', data_columns=True)
        else:
            hdf.append('d1', df, format='table', data_columns=True)

        if debug:
            print hdf['d1'].shape, hdf['d1'].index
        del df
    hdf.close()

コード例 #11

0

ファイルを表示

ファイル: selector.py プロジェクト: haje01/wzdat

 def to_frame_hdf(self, store_path, store_key, df_cb=None, max_msg=None,
                  usecols=None, chunk_cnt=CHUNK_CNT):
     """Convert to Pandas DataFrame and save to HDF then returns
     HDFStore."""
     store = HDFStore(store_path, 'w')
     _c = self._to_frame_prop('to_frame_hdf', False)
     for df in self._to_frame_gen(_c, usecols, chunk_cnt):
         min_itemsize = {'kind': 20, 'msg': 255}
         # pytables not support unicode for now
         df['msg'] = df['msg'].apply(lambda m: m.encode('utf8'))
         if df_cb is not None:
             df_cb(df)
         if max_msg is not None:
             min_itemsize['msg'] = max_msg
         store.append(store_key, df, format='table',
                      min_itemsize=min_itemsize)
     store.flush()
     store.close()
     _c.pg.done()

コード例 #12

0

ファイルを表示

ファイル: test_file_handling.py プロジェクト: Aathi410/Pro123

def test_open_args(setup_path):

    with tm.ensure_clean(setup_path) as path:

        df = tm.makeDataFrame()

        # create an in memory store
        store = HDFStore(
            path, mode="a", driver="H5FD_CORE", driver_core_backing_store=0
        )
        store["df"] = df
        store.append("df2", df)

        tm.assert_frame_equal(store["df"], df)
        tm.assert_frame_equal(store["df2"], df)

        store.close()

    # the file should not have actually been written
    assert not os.path.exists(path)

コード例 #13

0

ファイルを表示

ファイル: utils.py プロジェクト: AntoineSir/openfisca-france-data

def csv2hdf5(csv_name, h5_name, dfname, option='frame'):
    """
    Convert a csv file to a dataframe in a hdf5

    Parameters:

    csv_name: string
              csv file name
    h5_name : string
              hdf5 file name
    dfname  : string
              dataframe name
    option  : string, 'frame' or 'table', default to 'frame'
              stoing type in the pytable
    """

    table = read_csv(csv_name)
    store = HDFStore(h5_name)

    if option == 'frame':
        store.put(dfname, table)

    elif option == 'table':  # for frame_table à la pytables
        object_cols = table.dtypes[table.dtypes == 'object']
        print object_cols.index
        try:
            store.append(dfname, table)
        except:
            print table.get_dtype_counts()
            object_cols = table.dtypes[table.dtypes == 'object']

            for col in object_cols.index:
                print 'removing object column :', col
                del table[col]

            store.append(dfname, table)

    print store
    store.close()

コード例 #14

0

ファイルを表示

ファイル: utils.py プロジェクト: LouisePaulDelvaux/openfisca-france-data

def csv2hdf5(csv_name, h5_name, dfname, option='frame'):
    """
    Convert a csv file to a dataframe in a hdf5

    Parameters:

    csv_name: string
              csv file name
    h5_name : string
              hdf5 file name
    dfname  : string
              dataframe name
    option  : string, 'frame' or 'table', default to 'frame'
              stoing type in the pytable
    """

    table = read_csv(csv_name)
    store = HDFStore(h5_name)

    if option == 'frame':
        store.put(dfname, table)

    elif option == 'table': # for frame_table à la pytables
        object_cols =  table.dtypes[ table.dtypes == 'object']
        print object_cols.index
        try:
            store.append(dfname,table)
        except:
            print table.get_dtype_counts()
            object_cols =  table.dtypes[ table.dtypes == 'object']

            for col in object_cols.index:
                print 'removing object column :', col
                del table[col]

            store.append(dfname,table)

    print store
    store.close()

コード例 #15

0

ファイルを表示

ファイル: of2liam.py プロジェクト: TaxIPP-Life/til-core

def main(period=None):
    temps = time.clock()
    input_tab = "C:/openfisca/output/liam/" + "LiamLeg.h5"
    output_tab = "C:/Myliam2/Model/SimulTest.h5"

    store = HDFStore(input_tab)
    goal = HDFStore(output_tab)

    name_convertion = {"ind": "person", "foy": "declar", "men": "menage", "fam": "menage"}
    # on travaille d'abord sur l'ensemble des tables puis on selectionne chaque annee
    # step 1

    for ent in ("ind", "men", "foy", "fam"):
        dest = name_convertion[ent]
        tab_in = store[ent]
        tab_out = goal["entities/" + dest]
        # on jour sur les variable a garder
        # TODO: remonter au niveau de of_on_liam mais la c'est pratique du fait de
        # l'autre table
        ident = "id" + ent
        if ent == "ind":
            ident = "noi"
        # on garde les valeurs de depart
        to_remove = [x for x in tab_in.columns if x in tab_out.columns]
        # on retire les identifiant sauf celui qui deviendra id
        list_id = ["idmen", "idfoy", "idfam", "id", "quifoy", "quifam", "quimen", "noi"]
        list_id.remove(ident)
        to_remove = to_remove + [x for x in tab_in.columns if x in list_id]
        # on n4oublie pas de garder periode
        to_remove.remove("period")
        tab_in = tab_in.drop(to_remove, axis=1)
        tab_in = tab_in.rename(columns={ident: "id"})
        tab_out = merge(tab_in, tab_out, how="right", on=["id", "period"], sort=False)
        goal.remove("entities/" + dest)
        goal.append("entities/" + dest, tab_out)
    #        new_tab = np.array(tab_out.to_records())

    store.close()
    goal.close()

コード例 #16

0

ファイルを表示

def read_raw_tecplot_case_and_write_pandas_hdf5(
    case_folder,
    root                  = 0,
    output_file           = 0,
    serration_angle       = 0,
    angle_correction      = 0,
    height_correction     = 0,
    streamwise_correction = 0,
    overwrite             = False,
    time_step_limit       = 0,
    airfoil_normal        = False,
):
    from os.path import isfile,join,splitext
    from os import listdir
    from progressbar import ProgressBar,Percentage,Bar,ETA,SimpleProgress
    from pandas import HDFStore

    # File related things ######################################################
    if not output_file:
        output_file = case_folder+".hdf5"

    if airfoil_normal:
        output_file = output_file+"_AirfoilNormal"

    if not output_file.endswith('.hdf5'):
        output_file = output_file.replace(".hdf5","")+".hdf5"

    if isfile(output_file) and not overwrite:
        print "  Exiting; file exists:\n{0}".format(output_file)
        return 0
    else:
        print "  Writing\n{0}".format(output_file)
    # ##########################################################################


    hdf = HDFStore(output_file)

    time_step_files = sorted([f for f in listdir(join(root,case_folder)) \
             if splitext(f)[1] == '.dat'])

    if time_step_limit:
        time_step_files = time_step_files[:time_step_limit]

    progress = ProgressBar(
         widgets=[
             Bar(),' ',
             Percentage(),' ',
             ETA(), ' (file ',
             SimpleProgress(),')'], 
         maxval=len(time_step_files)
         ).start()

    cnt = 0
    for f,t in zip(time_step_files,range(len(time_step_files))):

       df_t = read_tecplot_file_and_correct_for_location_rotation(
           tecplot_file          = join(root,case_folder,f),
           serration_angle       = serration_angle,
           angle_correction      = angle_correction,
           height_correction     = height_correction,
           streamwise_correction = streamwise_correction,
           time_step             = t,
           airfoil_normal        = airfoil_normal,
       )

       df_t = get_vorticity(df_t)

       if cnt == 0:
           df = df_t.copy()
       else:
           df = df.append( df_t, drop_index = True)
           #df = df.drop_duplicates()

           try:
               x_cnt = df.x.value_counts().max() 
           except AttributeError:
               print df
               raise
           if not x_cnt.max() == x_cnt.min():
               print "  There's something wrong, counted {0} instances of x"\
                       .format(x_cnt.max())
               return 0

       if t == 30:
           hdf.put(case_folder,
                   df.convert_objects(), 
                   format='table', data_columns=True
                  )
       elif cnt == 30 and not t == cnt:
           hdf.append(case_folder,
                      df.convert_objects(), 
                      format='table', data_columns=True
                     )
           cnt = 0

       cnt += 1

       progress.update(t)

    progress.finish()
    hdf.close()

    return 1

コード例 #17

0

ファイルを表示

ファイル: test_file_handling.py プロジェクト: Aathi410/Pro123

def test_multiple_open_close(setup_path):
    # gh-4409: open & close multiple times

    with ensure_clean_path(setup_path) as path:

        df = tm.makeDataFrame()
        df.to_hdf(path, "df", mode="w", format="table")

        # single
        store = HDFStore(path)
        assert "CLOSED" not in store.info()
        assert store.is_open

        store.close()
        assert "CLOSED" in store.info()
        assert not store.is_open

    with ensure_clean_path(setup_path) as path:

        if pytables._table_file_open_policy_is_strict:
            # multiples
            store1 = HDFStore(path)
            msg = (
                r"The file [\S]* is already opened\.  Please close it before "
                r"reopening in write mode\."
            )
            with pytest.raises(ValueError, match=msg):
                HDFStore(path)

            store1.close()
        else:

            # multiples
            store1 = HDFStore(path)
            store2 = HDFStore(path)

            assert "CLOSED" not in store1.info()
            assert "CLOSED" not in store2.info()
            assert store1.is_open
            assert store2.is_open

            store1.close()
            assert "CLOSED" in store1.info()
            assert not store1.is_open
            assert "CLOSED" not in store2.info()
            assert store2.is_open

            store2.close()
            assert "CLOSED" in store1.info()
            assert "CLOSED" in store2.info()
            assert not store1.is_open
            assert not store2.is_open

            # nested close
            store = HDFStore(path, mode="w")
            store.append("df", df)

            store2 = HDFStore(path)
            store2.append("df2", df)
            store2.close()
            assert "CLOSED" in store2.info()
            assert not store2.is_open

            store.close()
            assert "CLOSED" in store.info()
            assert not store.is_open

            # double closing
            store = HDFStore(path, mode="w")
            store.append("df", df)

            store2 = HDFStore(path)
            store.close()
            assert "CLOSED" in store.info()
            assert not store.is_open

            store2.close()
            assert "CLOSED" in store2.info()
            assert not store2.is_open

    # ops on a closed store
    with ensure_clean_path(setup_path) as path:

        df = tm.makeDataFrame()
        df.to_hdf(path, "df", mode="w", format="table")

        store = HDFStore(path)
        store.close()

        msg = r"[\S]* file is not open!"
        with pytest.raises(ClosedFileError, match=msg):
            store.keys()

        with pytest.raises(ClosedFileError, match=msg):
            "df" in store

        with pytest.raises(ClosedFileError, match=msg):
            len(store)

        with pytest.raises(ClosedFileError, match=msg):
            store["df"]

        with pytest.raises(ClosedFileError, match=msg):
            store.select("df")

        with pytest.raises(ClosedFileError, match=msg):
            store.get("df")

        with pytest.raises(ClosedFileError, match=msg):
            store.append("df2", df)

        with pytest.raises(ClosedFileError, match=msg):
            store.put("df3", df)

        with pytest.raises(ClosedFileError, match=msg):
            store.get_storer("df2")

        with pytest.raises(ClosedFileError, match=msg):
            store.remove("df2")

        with pytest.raises(ClosedFileError, match=msg):
            store.select("df")

        msg = "'HDFStore' object has no attribute 'df'"
        with pytest.raises(AttributeError, match=msg):
            store.df

コード例 #18

0

ファイルを表示

ファイル: utils.py プロジェクト: alybel/fintf

def load_from_store_or_yahoo(start=None, end=None, symbol=None):
    append = False
    hdf = HDFStore(settings.storage_path)
    today = dt.datetime.today().date()

    yahoo_symbol = symbol
    symbol = clean_symbol(symbol)

    # this case, earlier data than in store is requested. The table needs to be rewritten
    if symbol in hdf:
        df = hdf[symbol]
        start_store = df.index.min()
        if isinstance(start, str):
            start = dt.datetime.strptime(start, '%Y-%m-%d')
        if start_store.date() > start:
            hdf.remove(symbol)
            lprint('start date was earlier than the oldest date in the storage. storage needs to be rewritten.')

    if symbol in hdf:
        df = hdf[symbol]
        end_store = df.index.max()

        # check if today is a weekend day
        weekday = dt.datetime.today().weekday()
        last_trading_day = today
        if weekday in [5, 6]:
            correction = 1 if weekday == 5 else 2
            last_trading_day = today - dt.timedelta(correction)

        # if the last trading day is the max date in the store than do not reload data
        if last_trading_day == end_store.date():
            lprint('loaded %s data from storage.' % symbol)
            return df

        # if the last trading is younger that the last trading day, load the difference
        end = today + dt.timedelta(1)
        start = end_store
        append = True

    # if no store was found, use the start and end from above
    df = None
    count = 0
    while df is None and count < 10:
        try:
            df = get_yahoo_data(start=start, end=end, symbol=yahoo_symbol)
        except RemoteDataError:
            time.sleep(10 + int(np.random.rand() * 10))
        count += 1

    if df is None:
        raise Exception('Even after 10 trials data could not be loaded from yahoo')

    # remove blanks in the header
    df.columns = [x.replace(' ', '_') for x in df.columns]

    # store or append to hdf5 storage

    if symbol in hdf:
        # drop duplicates
        exist_df = hdf[symbol]
        df = df[~df.index.isin(exist_df.index)]

    if append:
        hdf.append(symbol, df, format='table', data_columns=True)
    else:
        df.drop_duplicates(inplace=True)
        hdf.put(symbol, df, format='table', data_columns=True)
    if not df.index.is_unique:
        lprint('index of %s is not unique' % symbol)
    return df

コード例 #19

0

ファイルを表示

ファイル: hdf.py プロジェクト: Itay4/pandas

class HDFStoreDataFrame(BaseIO):

    def setup(self):
        N = 25000
        index = tm.makeStringIndex(N)
        self.df = DataFrame({'float1': np.random.randn(N),
                             'float2': np.random.randn(N)},
                            index=index)
        self.df_mixed = DataFrame({'float1': np.random.randn(N),
                                   'float2': np.random.randn(N),
                                   'string1': ['foo'] * N,
                                   'bool1': [True] * N,
                                   'int1': np.random.randint(0, N, size=N)},
                                  index=index)
        self.df_wide = DataFrame(np.random.randn(N, 100))
        self.start_wide = self.df_wide.index[10000]
        self.stop_wide = self.df_wide.index[15000]
        self.df2 = DataFrame({'float1': np.random.randn(N),
                              'float2': np.random.randn(N)},
                             index=date_range('1/1/2000', periods=N))
        self.start = self.df2.index[10000]
        self.stop = self.df2.index[15000]
        self.df_wide2 = DataFrame(np.random.randn(N, 100),
                                  index=date_range('1/1/2000', periods=N))
        self.df_dc = DataFrame(np.random.randn(N, 10),
                               columns=['C%03d' % i for i in range(10)])

        self.fname = '__test__.h5'

        self.store = HDFStore(self.fname)
        self.store.put('fixed', self.df)
        self.store.put('fixed_mixed', self.df_mixed)
        self.store.append('table', self.df2)
        self.store.append('table_mixed', self.df_mixed)
        self.store.append('table_wide', self.df_wide)
        self.store.append('table_wide2', self.df_wide2)

    def teardown(self):
        self.store.close()
        self.remove(self.fname)

    def time_read_store(self):
        self.store.get('fixed')

    def time_read_store_mixed(self):
        self.store.get('fixed_mixed')

    def time_write_store(self):
        self.store.put('fixed_write', self.df)

    def time_write_store_mixed(self):
        self.store.put('fixed_mixed_write', self.df_mixed)

    def time_read_store_table_mixed(self):
        self.store.select('table_mixed')

    def time_write_store_table_mixed(self):
        self.store.append('table_mixed_write', self.df_mixed)

    def time_read_store_table(self):
        self.store.select('table')

    def time_write_store_table(self):
        self.store.append('table_write', self.df)

    def time_read_store_table_wide(self):
        self.store.select('table_wide')

    def time_write_store_table_wide(self):
        self.store.append('table_wide_write', self.df_wide)

    def time_write_store_table_dc(self):
        self.store.append('table_dc_write', self.df_dc, data_columns=True)

    def time_query_store_table_wide(self):
        self.store.select('table_wide', where="index > self.start_wide and "
                                              "index < self.stop_wide")

    def time_query_store_table(self):
        self.store.select('table', where="index > self.start and "
                                         "index < self.stop")

    def time_store_repr(self):
        repr(self.store)

    def time_store_str(self):
        str(self.store)

    def time_store_info(self):
        self.store.info()

コード例 #20

0

ファイルを表示

ファイル: hdf.py プロジェクト: zhuomingliang/pandas

class HDFStoreDataFrame(BaseIO):

    goal_time = 0.2

    def setup(self):
        N = 25000
        index = tm.makeStringIndex(N)
        self.df = DataFrame(
            {
                'float1': np.random.randn(N),
                'float2': np.random.randn(N)
            },
            index=index)
        self.df_mixed = DataFrame(
            {
                'float1': np.random.randn(N),
                'float2': np.random.randn(N),
                'string1': ['foo'] * N,
                'bool1': [True] * N,
                'int1': np.random.randint(0, N, size=N)
            },
            index=index)
        self.df_wide = DataFrame(np.random.randn(N, 100))
        self.start_wide = self.df_wide.index[10000]
        self.stop_wide = self.df_wide.index[15000]
        self.df2 = DataFrame(
            {
                'float1': np.random.randn(N),
                'float2': np.random.randn(N)
            },
            index=date_range('1/1/2000', periods=N))
        self.start = self.df2.index[10000]
        self.stop = self.df2.index[15000]
        self.df_wide2 = DataFrame(np.random.randn(N, 100),
                                  index=date_range('1/1/2000', periods=N))
        self.df_dc = DataFrame(np.random.randn(N, 10),
                               columns=['C%03d' % i for i in range(10)])

        self.fname = '__test__.h5'

        self.store = HDFStore(self.fname)
        self.store.put('fixed', self.df)
        self.store.put('fixed_mixed', self.df_mixed)
        self.store.append('table', self.df2)
        self.store.append('table_mixed', self.df_mixed)
        self.store.append('table_wide', self.df_wide)
        self.store.append('table_wide2', self.df_wide2)

    def teardown(self):
        self.store.close()
        self.remove(self.fname)

    def time_read_store(self):
        self.store.get('fixed')

    def time_read_store_mixed(self):
        self.store.get('fixed_mixed')

    def time_write_store(self):
        self.store.put('fixed_write', self.df)

    def time_write_store_mixed(self):
        self.store.put('fixed_mixed_write', self.df_mixed)

    def time_read_store_table_mixed(self):
        self.store.select('table_mixed')

    def time_write_store_table_mixed(self):
        self.store.append('table_mixed_write', self.df_mixed)

    def time_read_store_table(self):
        self.store.select('table')

    def time_write_store_table(self):
        self.store.append('table_write', self.df)

    def time_read_store_table_wide(self):
        self.store.select('table_wide')

    def time_write_store_table_wide(self):
        self.store.append('table_wide_write', self.df_wide)

    def time_write_store_table_dc(self):
        self.store.append('table_dc_write', self.df_dc, data_columns=True)

    def time_query_store_table_wide(self):
        self.store.select('table_wide',
                          where="index > self.start_wide and "
                          "index < self.stop_wide")

    def time_query_store_table(self):
        self.store.select('table',
                          where="index > self.start and "
                          "index < self.stop")

    def time_store_repr(self):
        repr(self.store)

    def time_store_str(self):
        str(self.store)

    def time_store_info(self):
        self.store.info()

コード例 #21

0

ファイルを表示

ファイル: hdf.py プロジェクト: ygene2/pandas

class HDFStoreDataFrame(BaseIO):
    def setup(self):
        N = 25000
        index = tm.makeStringIndex(N)
        self.df = DataFrame(
            {"float1": np.random.randn(N), "float2": np.random.randn(N)}, index=index
        )
        self.df_mixed = DataFrame(
            {
                "float1": np.random.randn(N),
                "float2": np.random.randn(N),
                "string1": ["foo"] * N,
                "bool1": [True] * N,
                "int1": np.random.randint(0, N, size=N),
            },
            index=index,
        )
        self.df_wide = DataFrame(np.random.randn(N, 100))
        self.start_wide = self.df_wide.index[10000]
        self.stop_wide = self.df_wide.index[15000]
        self.df2 = DataFrame(
            {"float1": np.random.randn(N), "float2": np.random.randn(N)},
            index=date_range("1/1/2000", periods=N),
        )
        self.start = self.df2.index[10000]
        self.stop = self.df2.index[15000]
        self.df_wide2 = DataFrame(
            np.random.randn(N, 100), index=date_range("1/1/2000", periods=N)
        )
        self.df_dc = DataFrame(
            np.random.randn(N, 10), columns=["C%03d" % i for i in range(10)]
        )

        self.fname = "__test__.h5"

        self.store = HDFStore(self.fname)
        self.store.put("fixed", self.df)
        self.store.put("fixed_mixed", self.df_mixed)
        self.store.append("table", self.df2)
        self.store.append("table_mixed", self.df_mixed)
        self.store.append("table_wide", self.df_wide)
        self.store.append("table_wide2", self.df_wide2)

    def teardown(self):
        self.store.close()
        self.remove(self.fname)

    def time_read_store(self):
        self.store.get("fixed")

    def time_read_store_mixed(self):
        self.store.get("fixed_mixed")

    def time_write_store(self):
        self.store.put("fixed_write", self.df)

    def time_write_store_mixed(self):
        self.store.put("fixed_mixed_write", self.df_mixed)

    def time_read_store_table_mixed(self):
        self.store.select("table_mixed")

    def time_write_store_table_mixed(self):
        self.store.append("table_mixed_write", self.df_mixed)

    def time_read_store_table(self):
        self.store.select("table")

    def time_write_store_table(self):
        self.store.append("table_write", self.df)

    def time_read_store_table_wide(self):
        self.store.select("table_wide")

    def time_write_store_table_wide(self):
        self.store.append("table_wide_write", self.df_wide)

    def time_write_store_table_dc(self):
        self.store.append("table_dc_write", self.df_dc, data_columns=True)

    def time_query_store_table_wide(self):
        self.store.select(
            "table_wide", where="index > self.start_wide and index < self.stop_wide"
        )

    def time_query_store_table(self):
        self.store.select("table", where="index > self.start and index < self.stop")

    def time_store_repr(self):
        repr(self.store)

    def time_store_str(self):
        str(self.store)

    def time_store_info(self):
        self.store.info()

コード例 #22

0

ファイルを表示

ファイル: weatherStation.py プロジェクト: thejonty/sensoRaspi

#store = {} #dictionary to store objects
#hdf = store = {} #dictionary to store objects

am2320 = AM2320(1)

while (1):

    hdf = HDFStore(filename)
    store1 = []
    jj = 12
    curr_time = datetime.datetime.now()

    while (jj > 0):
        jj = jj - 1
        (t, h) = am2320.readSensor()
        #curr_time = datetime.datetime.now()
        #yr = datetime.datetime.strftime(curr_time, "%Y")
        #mn = datetime.datetime.strftime(curr_time, "%m")
        #dy = datetime.datetime.strftime(curr_time, "%d")
        #tm = datetime.datetime.strftime(curr_time, "%H:%m")
        #store1.append([yr,mn,dy,tm,t,h])
        store1.append([t, h])
        sleep(300)
    #df = DataFrame(store1,columns=('Year', 'Month', 'Day', 'Time', 'Temp', 'Humidity'))
    df = DataFrame(store1,
                   index=date_range(start=curr_time, periods=12, freq='300S'),
                   columns=('Temp', 'Humidity'))

    hdf.append('Weather1', df, format='table', data_columns=True)
#    print hdf['Weather1'].shape

コード例 #23

0

ファイルを表示

#Loading insee data
projection = HDFStore(
    'C:\Users\Utilisateur\Documents\GitHub\ga\src\countries\France\sources\data_fr\proj_pop_insee\proj_pop.h5',
    'r')
projection_dataframe = projection[
    '/projpop0760_FECbasESPbasMIGbas']  # <-Do not know the precise meaning of this. For testing only

#Combining
concatened = concat([population, projection_dataframe], verify_integrity=True)
concatened = concatened.reset_index()
concatened['year'] = concatened.year.convert_objects(convert_numeric=True)
concatened = concatened.set_index(['age', 'sex', 'year'])

#Saving as HDF5 file
export = HDFStore('neo_population.h5')
export.append('pop', concatened, data_columns=concatened.columns)
export.close()
export = HDFStore('neo_population.h5', 'r')
print export

#Creating the simulation object
net_payments = Simulation()
net_payments.set_population(population)

France = 'France'
net_payments.set_country(France)
r = 0.0
g = 0.01
net_payments.set_discount_rate(r)
net_payments.set_growth_rate(g)
# print net_payments

コード例 #24

0

ファイルを表示

ファイル: Carole_Bonnet.py プロジェクト: benjello/ga

projection = HDFStore(
    "C:\Users\Utilisateur\Documents\GitHub\ga\src\countries\France\sources\data_fr\proj_pop_insee\proj_pop.h5", "r"
)
projection_dataframe = projection[
    "/projpop0760_FECbasESPbasMIGbas"
]  # <-Do not know the precise meaning of this. For testing only

# Combining
concatened = concat([population, projection_dataframe], verify_integrity=True)
concatened = concatened.reset_index()
concatened["year"] = concatened.year.convert_objects(convert_numeric=True)
concatened = concatened.set_index(["age", "sex", "year"])

# Saving as HDF5 file
export = HDFStore("neo_population.h5")
export.append("pop", concatened, data_columns=concatened.columns)
export.close()
export = HDFStore("neo_population.h5", "r")
print export


# Creating the simulation object
net_payments = Simulation()
net_payments.set_population(population)

France = "France"
net_payments.set_country(France)
r = 0.0
g = 0.01
net_payments.set_discount_rate(r)
net_payments.set_growth_rate(g)

コード例 #25

0

ファイルを表示

ファイル: raw_data_processing_routines.py プロジェクト: carlosarceleon/article3_time_resolved

def read_raw_tecplot_folder_and_write_pandas_hdf5(
    case_folder,
    root                  = 0,
    output_file           = 0,
    output_root           = 0,
    overwrite             = False,
):
    from os.path     import isfile,join,splitext
    from os          import listdir
    from progressbar import ProgressBar,Percentage,Bar
    from progressbar import ETA,SimpleProgress
    from pandas      import DataFrame, HDFStore

    # File related things ######################################################
    if not output_file:
        output_file = case_folder+"_Aligned.hdf5"

    if not output_root:
        output_root = '/media/carlos/6E34D2CD34D29783/' +\
                '2015-02_SerrationPIV/TR_Data_Location_Calibrated_Article3'

    if not output_file.endswith('_Aligned.hdf5'):
        output_file = output_file.replace("_Aligned.hdf5","")+"_Aligned.hdf5"

    if 'STE' in case_folder or 'z10' in case_folder:
        output_file = output_file.replace( '.hdf5', '_AirfoilNormal.hdf5' )

    if isfile(join( output_root, output_file )) and not overwrite:
        print "  Exiting; file exists:\n      {0}".format(output_file)
        return 0
    else:
        print "  Writing\n      {0}".format(output_file)

    # ##########################################################################


    time_step_files = sorted(
        [join(root,case_folder,f) for f in listdir(join( root, case_folder )) \
         if splitext(f)[1] == '.dat']
    )

    progress = ProgressBar(
         widgets=[
             Bar(),' ',
             Percentage(),' ',
             ETA(), ' (file ',
             SimpleProgress(),')'], 
         maxval=len(time_step_files)
         ).start()

    cnt = 0

    hdf_store = HDFStore( join( output_root, output_file ) )

    for f,t in zip(time_step_files,range(len(time_step_files))):

       df_t = read_tecplot_file(
           tecplot_folder         = join( root, case_folder ),
           tecplot_time_step_file = f,
           time_step              = t,
       )

       if cnt == 0:
           df = df_t.copy()
       else:
           df = df.append( df_t, ignore_index = True)

       if cnt == 50:

           df = correct_df_translation_rotation( df )\
                   [['x','y','t','u','v','w']]

           df = df.sort_values( by = ['x','y','t'] )

           #df.set_index( ['x','y'], inplace = True)

           if t == 0:
               hdf_store.put( 'data', df , 
                                data_columns = ['x','y','t'],
                               format = 't')
           else:
               hdf_store.append( 'data', df , 
                                data_columns = ['x','y','t'],
                               format = 't')

           cnt = 0

           df = DataFrame()

       cnt += 1

       progress.update(t)

    progress.finish()

    hdf_store.close()

    return 1

コード例 #26

0

ファイルを表示

ファイル: DataTil.py プロジェクト: leeseungho90/Til

    def store_to_liam(self):
        '''
        Sauvegarde des données au format utilisé ensuite par le modèle Til
        Séléctionne les variables appelée par Til derrière
        Appelle des fonctions de Liam2
        '''

        path_param = os.path.join(path_model, "til_base_model\param", "globals.csv")
        path = os.path.join(path_model, self._output_name())
        h5file = tables.openFile( path, mode="w")
        # 1 - on met d'abord les global en recopiant le code de liam2
        # globals_def = {'periodic': {'path': 'param/globals.csv'}}
        globals_def = {'periodic': {'path': path_param}}

        const_node = h5file.createGroup("/", "globals", "Globals")
        localdir = path_model
        for global_name, global_def in globals_def.iteritems():
            print(" %s" % global_name)
            req_fields = ([('PERIOD', int)] if global_name == 'periodic'
                                            else [])
            kind, info = imp.load_def(localdir, global_name,
                                  global_def, req_fields)
            # comme dans import
#             if kind == 'ndarray':
#                 imp.array_to_disk_array(h5file, const_node, global_name, info,
#                                     title=global_name,
#                                     compression=compression)
#             else:
            assert kind == 'table'
            fields, numlines, datastream, csvfile = info
            imp.stream_to_table(h5file, const_node, global_name, fields,
                            datastream, numlines,
                            title="%s table" % global_name,
                            buffersize=10 * 2 ** 20,
                            compression=None)

        # 2 - ensuite on s'occupe des entities
        ent_node = h5file.createGroup("/", "entities", "Entities")
        for ent_name in ['ind','foy','men','futur','past']:
            entity = eval('self.'+ ent_name)
            if entity is not None:
                entity = entity.fillna(-1)
                ent_table = entity.to_records(index=False)
                dtypes = ent_table.dtype
                final_name = of_name_to_til[ent_name]
                table = h5file.createTable(ent_node, final_name, dtypes, title="%s table" % final_name)
                table.append(ent_table)
                table.flush()

                if ent_name == 'men':
                    entity = entity.loc[entity['id']>-1]
                    ent_table2 = entity[['pond','id','period']].to_records(index=False)
                    dtypes2 = ent_table2.dtype
                    table = h5file.createTable(ent_node, 'companies', dtypes2, title="'companies table")
                    table.append(ent_table2)
                    table.flush()
                if ent_name == 'ind':
                    ent_table2 = entity[['agem','sexe','pere','mere','id','findet','period']].to_records(index=False)
                    dtypes2 = ent_table2.dtype
                    table = h5file.createTable(ent_node, 'register', dtypes2, title="register table")
                    table.append(ent_table2)
                    table.flush()
        h5file.close()

        # 3 - table longitudinal
        # Note: on conserve le format pandas ici
        store = HDFStore(path)
        for varname, tab in self.longitudinal.iteritems():
            #format to liam
            table = tab
            table['id'] = table.index

            store.append('longitudinal/' + varname, table)
        store.close()

コード例 #27

0

ファイルを表示

ファイル: hydllp.py プロジェクト: mullenkamp/HydroPandas

def rd_hydstra(varto, sites=None, data_source='A', from_date=None, to_date=None, from_mod_date=None, to_mod_date=None, interval='day', qual_codes=[30, 20, 10, 11, 21, 18], concat_data=True, export=None):
    """
    Function to read in data from Hydstra's database using HYDLLP. This function extracts all sites with a specific variable code (varto).

    Parameters
    ----------
    varto : int or float
        The hydstra conversion data variable (140.00 is flow).
    sites: list of str
        List of sites to be returned. None includes all sites.
    data_source : str
        Hydstra datasource code (usually 'A').
    from_date: str
        The starting date for the returned data given other constraints.
    to_date: str
        The ending date for the returned data given other constraints.
    from_mod_date: str
        The starting date when the data has been modified.
    to_mod_date: str
        The ending date when the data has been modified.
    interval : str
        The frequency of the output data (year, month, day, hour, minute, second, period). If data_type is 'point', then interval cannot be 'period' (use anything else, it doesn't matter).
    qual_codes : list of int
        The quality codes for output.
    export_path: str
        Path string where the data should be saved, or None to not save the data.

    Return
    ------
    DataFrame
        In long format with site and time as a MultiIndex and data, qual_code, and hydstra_var_code as columns.
    """
    ### Parameters
    device_data_type = {100: 'mean', 140: 'mean', 143: 'mean', 450: 'mean', 110: 'mean', 130: 'mean', 10: 'tot'}

    today1 = date.today()
    dtype_dict = {'Site': 'varchar', 'HydstraCode': 'smallint', 'Time': 'date', 'Value': 'float', 'QualityCode': 'smallint', 'ModDate': 'date'}

    ### Determine the period lengths for all sites and variables
    sites_var_period = hydstra_sites_var_periods(varto=varto, sites=sites, data_source=data_source)
#    sites_list = sites_var_period.site.unique().tolist()
    varto_list = sites_var_period.varto.unique().astype('int32').tolist()

    ### Restrict period ranges - optional
    if isinstance(from_date, str):
        from_date1 = Timestamp(from_date)
        from_date_df = sites_var_period.from_date.apply(lambda x: x if x > from_date1 else from_date1)
        sites_var_period['from_date'] = from_date_df
    if isinstance(to_date, str):
        to_date1 = Timestamp(to_date)
        to_date_df = sites_var_period.to_date.apply(lambda x: x if x > to_date1 else to_date1)
        sites_var_period['to_date'] = to_date_df

    ### Only pull out data according to the modifcation date ranges - optional
    if isinstance(from_mod_date, str):
        sites_block = sites_var_period[sites_var_period.varfrom == sites_var_period.varto]
        varto_block = sites_block.varto.unique().astype('int32').tolist()

        chg1 = hydstra_data_changes(varto_block, sites_block.site.unique(), from_mod_date=from_mod_date, to_mod_date=to_mod_date).drop('to_date', axis=1)
        if 140 in varto_list:
            sites_flow = sites_var_period[(sites_var_period.varfrom != sites_var_period.varto) & (sites_var_period.varto == 140)]
            chg2 = rating_changes(sites_flow.site.unique().tolist(), from_mod_date=from_mod_date, to_mod_date=to_mod_date)
            chg1 = concat([chg1, chg2])

        chg1.rename(columns={'from_date': 'mod_date'}, inplace=True)
        chg3 = merge(sites_var_period, chg1, on=['site', 'varfrom', 'varto'])
        chg4 = chg3[chg3.to_date > chg3.mod_date].copy()
        chg4['from_date'] = chg4['mod_date']
        sites_var_period = chg4.drop('mod_date', axis=1).copy()

    ### Convert datetime to date as str
    sites_var_period2 = sites_var_period.copy()
    sites_var_period2['from_date'] = sites_var_period2['from_date'].dt.date.astype(str)
    sites_var_period2['to_date'] = sites_var_period2['to_date'].dt.date.astype(str)

    site_str_len = sites_var_period2.site.str.len().max()

    if isinstance(export, str):
            if export.endswith('.h5'):
                store = HDFStore(export, mode='a')

    data = DataFrame()
    for tup in sites_var_period2.itertuples(index=False):
        print('Processing site: ' + str(tup.site))
        varto = tup.varto
        data_type = device_data_type[varto]

        df = rd_hydstra_db([tup.site], data_type=data_type, start=tup.from_date, end=tup.to_date, varfrom=tup.varfrom, varto=varto, interval=interval, qual_codes=qual_codes)
        if df.empty:
            continue
        df['HydstraCode'] = varto
        if varto == 143:
            df.loc[:, 'data'] = df.loc[:, 'data'] * 0.001
            df['HydstraCode'] = 140
        ### Make sure the data types are correct
        df.rename(columns={'data': 'Value', 'qual_code': 'QualityCode'}, inplace=True)
        df.index.rename(['Site', 'Time'], inplace=True)
        df.loc[:, 'QualityCode'] = df['QualityCode'].astype('int32')
        df.loc[:, 'HydstraCode'] = df['HydstraCode'].astype('int32')
        df.loc[:, 'ModDate'] = today1
        if isinstance(export, dict):
            df = df.reset_index()
            from_date1 = str(df.Time.min().date())
            to_date1 = str(df.Time.max().date())
            del_rows_dict = {'where_col': {'Site': [str(tup.site)], 'HydstraCode': [str(df['HydstraCode'][0])]}, 'from_date':from_date1, 'to_date': to_date1, 'date_col': 'Time'}
            write_sql(df, dtype_dict=dtype_dict, del_rows_dict=del_rows_dict, drop_table=False, create_table=False, **export)
        elif isinstance(export, str):
            if export.endswith('.h5'):
                try:
                    store.append(key='var_' + str(varto), value=df, min_itemsize={'site': site_str_len})
                except Exception as err:
                    store.close()
                    raise err
        if concat_data:
            data = concat([data, df])
    if isinstance(export, str):
        store.close()
    if concat_data:
        return data

コード例 #28

0

ファイルを表示

ファイル: data_extraction_routines.py プロジェクト: carlosarceleon/article3_time_resolved

def extract_relevant_data( case_list = [], exceptions = [], y_delta_locs = [],
                         x_2h_locs = [] , plot = False):
    """ This will extract the wall normal data at the spanwise location
    TE at a certain y density
    """

    from os                           import listdir
    from os.path                      import join,split
    from pandas                       import DataFrame, HDFStore, read_pickle
    from boundary_layer_routines      import return_bl_parameters
    from raw_data_processing_routines import decript_case_name
    from progressbar                  import ProgressBar,Percentage
    from progressbar                  import Bar,ETA,SimpleProgress
    from numpy                        import array, round, linspace
    from data_cleaning_routines       import show_surface_from_df

    x_2h_locs    = round( array( x_2h_locs ),    2 )
    y_delta_locs = round( array( y_delta_locs ), 2 )

    # Get the available HDF5 files #############################################
    hdf5_root = '/media/carlos/6E34D2CD34D29783/' +\
                '2015-02_SerrationPIV/TR_Data_Location_Calibrated_Article3'

    if not len(case_list):
        hdf5_files = [f for f in listdir( hdf5_root ) \
                      if f.endswith('.hdf5') \
                      and not f in exceptions ]
    else:
        hdf5_files = [f for f in listdir( hdf5_root ) \
                      if f.endswith('.hdf5') \
                      and f in case_list ]
    # ##########################################################################

    for hf in [join( hdf5_root, f ) for f in hdf5_files]:

        f = split( hf )[1].replace('_AirfoilNormal','')\
                .replace('_Aligned.hdf5','')

        print "   Extracting data from {0}".format(f)
        print "     at the normalized streamwise locations:"
        print "     {0}".format( x_2h_locs )


        hdf_t = HDFStore( hf, 'r' )

        # Get the available coordinates ########################################
        hf_coords = hdf_t.select('data', where = [ 't = 0' ], 
                                 columns = [ 'x', 'y' ] )
        # ######################################################################

        # Turn the non-dim requested locations into physical coords ############
        requested_locations = []
        requested_normalized_locations = []
        #for x,x_norm in zip(x_2h_locs * tooth_length, x_2h_locs):
        #    for y_d in y_delta_locs:
        #        bl_params = return_bl_parameters( f , [x] )
        #        d_99 = bl_params.delta_99.values[0]
        #        #if "STE" in f:
        #        #    d_99 = 9.4
        #        y = y_d * d_99
        #        requested_locations.append( (x,y) )
        #        requested_normalized_locations.append( ( x_norm, y_d ) )

        # Get the normalization locations depending on the case ################
        if 'z00' in f and not 'STE' in f:
            x_bl_loc = 40
        elif 'z05' in f:
            x_bl_loc = 20
        elif 'z10' in f or 'STE' in f:
            x_bl_loc = 0

        bl_params = return_bl_parameters( f , [x_bl_loc] )
        d_99 = bl_params.delta_99.values[0]

        for x,x_norm in zip(x_2h_locs * tooth_length, x_2h_locs):
            for y_d in y_delta_locs:
                y = y_d * d_99
                requested_locations.append( (x,y) )
                requested_normalized_locations.append( ( x_norm, y_d ) )
        print "    Normalizing to a BL thickness of {0:.2f} mm".\
                format(d_99)
        # ######################################################################

        available_xy_locs = hf_coords[
            ( hf_coords.x > min( x_2h_locs ) * 40. ) & \
            ( hf_coords.x < max( x_2h_locs ) * 40. ) & \
            ( hf_coords.y > min( y_delta_locs ) * d_99 ) & \
            ( hf_coords.y < max( y_delta_locs ) * d_99 )
        ][ ['x','y'] ]
              
        available_xy_locs = [tuple(x) for x in available_xy_locs.values]

        if plot:

            trailing_edge,phi,alpha,U,z = decript_case_name( f )

            if trailing_edge == 'serrated': device = 'Sr20R21'
            elif trailing_edge == 'straight': device = 'STE'
            elif trailing_edge == 'slitted': device = 'Slit20R21'

            case_name = "{0}_phi{1}_alpha{2}_U{3}_loc{4}_tr.dat".format(
                device, phi, alpha, U, z
            )

            df_av = read_pickle( 'averaged_data/' + case_name + '.p' )
            show_surface_from_df( df_av , points = available_xy_locs ,
                                plot_name = 'ReservedData/' + f + '.png'
                                )

        query   = ''
        cnt_all = 0

        cnt = 0
        time_series_hdf = HDFStore( 'ReservedData/' + f + '.hdf5' , 'w' )

        vertical_split_blocks = 10

        progress = ProgressBar(
             widgets=[
                 Bar(),' ',
                 Percentage(),' ',
                 ETA(), ' (query bunch  ',
                 SimpleProgress(),')'], 
             maxval = vertical_split_blocks
             ).start()

        # Don't try to get it all at once; split the vertical in 4 pieces
        y_ranges = linspace( 
            min( y_delta_locs ),
            max( y_delta_locs ),
            vertical_split_blocks
        ) * d_99

        xmin = min(x_2h_locs) * 40.
        xmax = max(x_2h_locs) * 40.

        for ymin, ymax in zip( y_ranges[:-1], y_ranges[1:] ):

            query = " x>={0} & x<{1} & y>={2} & y<{3} ".\
                    format( xmin, xmax, ymin, ymax )

            df_t = hdf_t.select(
                key   = 'data',
                where = [ query ],
            )

            df_t['near_x_2h']    = round( df_t.x / 40.,  4 )
            df_t['near_y_delta'] = round( df_t.y / d_99, 4 )

            if not cnt:
                time_series_hdf.put( 'data', df_t , 
                                    data_columns = [
                                        'near_x_2h',
                                        'near_y_delta',
                                        't'
                                    ],
                                    format = 't')
            else:
                time_series_hdf.append( 'data', df_t , 
                                       data_columns = [
                                           'near_x_2h',
                                           'near_y_delta',
                                           't'
                                       ],
                               format = 't')

            cnt_all += 1
            cnt     += 1

            progress.update(cnt_all)

            df_t = DataFrame()


        progress.finish()
        hdf_t.close()
        time_series_hdf.close()

コード例 #29

0

ファイルを表示

 def append_hdfs(self, df, path, key):
     try:
         store = HDFStore(path)
         store.append(key, df)
     except:
         print("Error for appending data to {0} in {1}".format(key, path))

コード例 #30

0

ファイルを表示

def junk():
    population = read_csv('C:\Users\Utilisateur\Documents\GitHub\ga\src\countries\France\sources\data_fr\pop.csv', sep=',')
    # print population.columns
    population = population.set_index(['age', 'sex'])
    population = population.stack()
    population = population.reset_index()
    population['level_2'] = population.level_2.convert_objects(convert_numeric=True)
    
    population['year'] = population['level_2']
    population['pop'] = population[0]
    del population['level_2']
    del population[0]
    population = population.set_index(['age', 'sex', 'year'])
    
    #Remove the years 2007 and beyond to ensure integrity when combined with INSEE data
    year = list(range(1991, 2007, 1))
    filter_year = array([x in year for x in population.index.get_level_values(2)])
    population = population.iloc[filter_year, :]
    
    #Loading insee data
    projection = HDFStore('C:\Users\Utilisateur\Documents\GitHub\ga\src\countries\France\sources\data_fr\proj_pop_insee\proj_pop.h5', 'r')
    projection_dataframe = projection['/projpop0760_FECbasESPbasMIGbas'] # <-Do not know the precise meaning of this. For testing only
    
    #Combining
    concatened = concat([population, projection_dataframe], verify_integrity = True)
    concatened = concatened.reset_index()
    concatened['year'] = concatened.year.convert_objects(convert_numeric=True)
    concatened = concatened.set_index(['age', 'sex', 'year'])
    
    #Saving as HDF5 file
    export = HDFStore('neo_population.h5')
    export.append('pop', concatened, data_columns = concatened.columns)
    export.close()
    export = HDFStore('neo_population.h5', 'r')
    print export
    
    
    #Creating the simulation object
    net_payments = Simulation()
    net_payments.set_population(population)
      
    France = 'France'
    net_payments.set_country(France)
    r = 0.0
    g = 0.01
    net_payments.set_discount_rate(r)
    net_payments.set_growth_rate(g)
    # print net_payments
    # print net_payments.growth_rate, net_payments.discount_rate, net_payments.country
    
    net_payments.load_population("neo_population.h5", 'pop')
    net_payments.load_profiles("C:\Users\Utilisateur\Documents\GitHub\ga\src\countries\profiles.h5", "profiles.h5")
    year_length = 100
    net_payments.set_population_projection(year_length = year_length, method = "exp_growth", rate = 0.02)
    net_payments.set_tax_projection(method = "per_capita", typ = None, rate = g, discount_rate = r)
    
    
    net_payments.create_cohorts()
    
    #Creating a column with total taxes paid.
    for typ in net_payments._types:
        net_payments['total'] += hstack(net_payments[typ])
        
    print net_payments['total']

コード例 #31

0

ファイルを表示

ファイル: Carole_Bonnet.py プロジェクト: jsantoul/ga

def junk():
    population = read_csv(
        'C:\Users\Utilisateur\Documents\GitHub\ga\src\countries\France\sources\data_fr\pop.csv',
        sep=',')
    # print population.columns
    population = population.set_index(['age', 'sex'])
    population = population.stack()
    population = population.reset_index()
    population['level_2'] = population.level_2.convert_objects(
        convert_numeric=True)

    population['year'] = population['level_2']
    population['pop'] = population[0]
    del population['level_2']
    del population[0]
    population = population.set_index(['age', 'sex', 'year'])

    #Remove the years 2007 and beyond to ensure integrity when combined with INSEE data
    year = list(range(1991, 2007, 1))
    filter_year = array(
        [x in year for x in population.index.get_level_values(2)])
    population = population.iloc[filter_year, :]

    #Loading insee data
    projection = HDFStore(
        'C:\Users\Utilisateur\Documents\GitHub\ga\src\countries\France\sources\data_fr\proj_pop_insee\proj_pop.h5',
        'r')
    projection_dataframe = projection[
        '/projpop0760_FECbasESPbasMIGbas']  # <-Do not know the precise meaning of this. For testing only

    #Combining
    concatened = concat([population, projection_dataframe],
                        verify_integrity=True)
    concatened = concatened.reset_index()
    concatened['year'] = concatened.year.convert_objects(convert_numeric=True)
    concatened = concatened.set_index(['age', 'sex', 'year'])

    #Saving as HDF5 file
    export = HDFStore('neo_population.h5')
    export.append('pop', concatened, data_columns=concatened.columns)
    export.close()
    export = HDFStore('neo_population.h5', 'r')
    print export

    #Creating the simulation object
    net_payments = Simulation()
    net_payments.set_population(population)

    France = 'France'
    net_payments.set_country(France)
    r = 0.0
    g = 0.01
    net_payments.set_discount_rate(r)
    net_payments.set_growth_rate(g)
    # print net_payments
    # print net_payments.growth_rate, net_payments.discount_rate, net_payments.country

    net_payments.load_population("neo_population.h5", 'pop')
    net_payments.load_profiles(
        "C:\Users\Utilisateur\Documents\GitHub\ga\src\countries\profiles.h5",
        "profiles.h5")
    year_length = 100
    net_payments.set_population_projection(year_length=year_length,
                                           method="exp_growth",
                                           rate=0.02)
    net_payments.set_tax_projection(method="per_capita",
                                    typ=None,
                                    rate=g,
                                    discount_rate=r)

    net_payments.create_cohorts()

    #Creating a column with total taxes paid.
    for typ in net_payments._types:
        net_payments['total'] += hstack(net_payments[typ])

    print net_payments['total']

コード例 #32

0

ファイルを表示

ファイル: createFakeHDF.py プロジェクト: victoriameyer/pydata-pandas-workshop

import numpy as np
from pandas import HDFStore, DataFrame

# create (or open) an hdf5 file and opens in append mode
hdf = HDFStore('data/hdata.h5')

df = DataFrame(np.random.rand(1000, 3), columns=('A', 'B', 'C'))
# put the dataset in the storage
hdf.put('d1', df, format='table', data_columns=True)
print(hdf['d1'].shape)

hdf.append('d1',
           DataFrame(np.random.rand(5, 3), columns=('A', 'B', 'C')),
           format='table',
           data_columns=True)

df = DataFrame(np.random.rand(1000, 3), columns=('A', 'B', 'C'))
# put the dataset in the storage
hdf.put('d2', df, format='table', data_columns=True)
print(hdf['d2'].shape)

hdf.append('d2',
           DataFrame(np.random.rand(5, 3), columns=('A', 'B', 'C')),
           format='table',
           data_columns=True)
hdf.close()  # closes the file