Beispiel #1
0
def test_chunk():
    print "debut"
    writer = None
    years = range(2011, 2012)
    filename = destination_dir + 'output3.h5'
    store = HDFStore(filename)
    for year in years:
        yr = str(year)
        #        fname = "Agg_%s.%s" %(str(yr), "xls")
        simu = SurveySimulation()
        simu.set_config(year=yr)
        simu.set_param()
        import time

        tps = {}
        for nb_chunk in range(1, 5):
            deb_chunk = time.clock()
            simu.set_config(survey_filename='C:\\Til\\output\\to_run_leg.h5',
                            num_table=3,
                            chunks_count=nb_chunk,
                            print_missing=False)
            simu.compute()
            tps[nb_chunk] = time.clock() - deb_chunk

            voir = simu.output_table.table3['foy']
            print len(voir)
            pdb.set_trace()
            agg3 = Aggregates()
            agg3.set_simulation(simu)
            agg3.compute()
            df1 = agg3.aggr_frame
            print df1.to_string()

    print tps
    store.close()
Beispiel #2
0
def append_store_mod( module, path_store, n_days_refresh=None, b_ptrk=False ):
    """ append all new rows in module.field to store. Resize store as appropriate. """ 
    store = HDFStore( path_store )
    for field in module.__dict__.keys():
        if ( type( getattr( module, field ) ) is DataFrame or type( getattr( module, field ) ) is Panel ) and "/{}".format( field ) in store.keys():
            if "tdate" in field:
                getattr( module, field ).to_hdf( path_store, field, mode='a', format='fixed' )
            else:
                solbasic.logger.info( "Working on {}...".format( field ) )
                df = store[ field ].copy()
                df_new = getattr( module, field ).copy()
                if n_days_refresh == None:
                    l_index = sorted( list( set( df_new.index ) - set( df.index ) ) )
                else:
                    l_index = sorted( list( df_new.index[ -n_days_refresh: ] ) )
                l_columns = sorted( list( set( df_new.columns ) - set( df.columns ) ) )
                l_columns_rev = sorted( list( set( df.columns ) - set( df_new.columns ) ) )
                if l_columns:
                    solbasic.logger.info( "Adding {} instruments: {}".format( len( l_columns ), l_columns ) )
                    for col in l_columns:
                        df[ col ] = np.nan
                if l_columns_rev:
                    for col in l_columns_rev:
                        df_new[ col ] = df[ col ]
                if l_index:
                    solbasic.logger.info( "Refreshing {} dates: {}".format( len( l_index ), l_index ) )
                    for ind in l_index:
                        df.ix[ ind ] = df_new.ix[ ind ]
                    df.to_hdf( path_store, field, mode='a', format='fixed' )
    store.close()
    if b_ptrk:
        ptrk_store( path_store )
Beispiel #3
0
    def build_actualisation_groups(self, filename = None):
        '''
        Builds actualisation groups
        '''
        if filename is None:
            data_dir = CONF.get('paths', 'data_dir')
            fname = "actualisation_groups.h5"
            filename = os.path.join(data_dir, fname)

        store = HDFStore(filename)
        df = store['vars']
        coeff_list = sorted(unique(df['coeff'].dropna()))

        vars = dict()
        for coeff in coeff_list:
            vars[coeff] = list(df[ df['coeff']==coeff ]['var'])

        self.actualisation_vars = vars
        self.coeffs_df = store['names']
        self.coeffs_df['coeff'] = self.coeffs_df['coeff'].str.replace(' ','') # remove spaces



        yr = 1*self.survey_year
        self.coeffs_df['value'] = 1
        while yr < self.datesim_year:
            if yr in self.coeffs_df.columns:
                factor = self.coeffs_df[yr]
            else:
                factor =    1
            self.coeffs_df['value'] = self.coeffs_df['value']*factor
            yr += 1

        self.coeffs_df.set_index(['coeff'], inplace = True)
        store.close()
Beispiel #4
0
    def save_simulation(self, filename, attribute_list = ['cohorts', 'aggregate_pv', 'percapita_pv', 
                        'cohorts_alt', 'aggregate_pv_alt', 'percapita_pv_alt'], has_alt = False):
        """
        Saves the output dataframe under default directory in an HDF store.
        Warning : will override .h5 file if already existant !
        Warning : the data is saved as a dataframe, one has to recreate the Cohort when reading.

        Parameters
        ----------
        name : the name of the table inside the store
        filename : the name of the .h5 file where the table is stored. Created if not existant. 
        """
        # Creating the filepath :
        ERF_HDF5_DATA_DIR = os.path.join(SRC_PATH,'countries',self.country,'sources','Output_folder/')
        store = HDFStore(os.path.join(os.path.dirname(ERF_HDF5_DATA_DIR),filename+'.h5'))
        
        #Looping over simulation's attributes, saving only the one who are matching the list 
        # AND aren't empty
        from pandas import DataFrame

        for attrib, value in self.__dict__.iteritems():
            if attrib in attribute_list and value is not None:
                
                #Transforming the data within a cohort in a dataframe so HDFStore can handle it :
                record = DataFrame(index=value.index)
                for col in value.columns:
                    record[col] = value[col]
                print 'saving'
                store[attrib] = record
            else:
                print 'ignored'
        print store
        store.close()
Beispiel #5
0
def main():
    # the loaded data is a DataFrame
    genedata = load_gene_dataset()
    
    # randomly split the dataset to three folds
    # this code should be improved in the future
    kfold = 3.0
    data_kfold = {}
    train, fold1 = train_test_split(genedata, test_size=1/kfold)
    data_kfold['fold1'] = fold1
    fold3, fold2 = train_test_split(train, test_size=0.5)
    data_kfold['fold2'] = fold2
    data_kfold['fold3'] = fold3
    
    # now we want to train a network for each fold
    # store the results in h5 file
    geneStore = HDFStore('predGeneExp1.h5')
    for i, key in enumerate(data_kfold):
        print(key)
        test_data = data_kfold[key]
        X_val, y_val = get_input_output(test_data)
        keys = data_kfold.keys()
        keys.remove(key)
        training_data = pd.concat([data_kfold[keys[0]],data_kfold[keys[1]]])
        X_train, y_train = get_input_output(training_data)
        print(keys)
        # use the these data to train the network
        main_training(key, X_train, y_train, X_val, y_val, geneStore)
   
    # the h5 must be closed after using
    geneStore.close()
Beispiel #6
0
def storeHdf5(data, tag, path):
    hdf = HDFStore(path, 'a')
    if tag in hdf.keys():
        hdf.append(tag, data)
    else:
        hdf.put(tag, data)
    hdf.close()
Beispiel #7
0
def SAVE_ChangeDictOrder(_processedEvents):
    '''Change the nesting order for the final HDF database - insted of correct/attention, it will go attention/present/correct etc'''


    h_path = "/Users/ryszardcetnarski/Desktop/Nencki/TD/HDF/"
    #Replace the '_EVENTS' because the path n HDF must match exactly, otherwise it was not savivng anything, weirdo
    all_event_names = sorted([name.replace('_EVENTS', '') for name in events_names if bef_aft_dict[bef_aft_switch + '_mat'] in name])

    store = HDFStore(h_path +bef_aft_dict[bef_aft_switch+ '_hdf'])

    for _data, recording in zip(_processedEvents, all_event_names):
        print('I')
        sname = recording.rfind("/") +1
        subId = recording[sname:-4].replace("-", "_")

        store[subId + '/events/attention/correct'] = _data['correct']['attention'].convert_objects()
        store[subId + '/events/motor/correct'] = _data['correct']['motor'].convert_objects()

        store[subId + '/events/attention/incorrect'] = _data['incorrect']['attention'].convert_objects()
        store[subId + '/events/motor/incorrect'] = _data['incorrect']['motor'].convert_objects()

        #print(_data['incorrect']['motor'].convert_objects())



    store.close()
Beispiel #8
0
def download():
    """ Convenience method that downloads all the weather data required
    for the machine learning examples.
    """
    reader = GSODDataReader()
    year_list = range(2001, 2012)
    austin = reader.collect_data(year_list, exact_station=True,
        station_name='AUSTIN CAMP MABRY', state='TX', country='US')
    houston = reader.collect_data(year_list, exact_station=True,
        station_name='HOUSTON/D.W. HOOKS', state='TX', country='US')
    new_york = reader.collect_data(year_list, exact_station=True,
        station_name='NEW YORK/LA GUARDIA', state='NY', country='US')
    newark = reader.collect_data(year_list, exact_station=True,
        station_name='NEWARK INTL AIRPORT', state='NJ', country='US')
    punta_arenas = reader.collect_data(year_list, exact_station=True,
        station_name='PUNTA ARENAS', country='CH')
    wellington = reader.collect_data(year_list, exact_station=True,
        station_name='WELLINGTON AIRPORT', country='NZ')
    store = HDFStore('weather.h5')
    store['austin'] = austin
    store['houston'] = houston
    store['nyc'] = new_york
    store['newark'] = newark
    store['punta_arenas'] = punta_arenas
    store['wellington'] = wellington
    store.close()
Beispiel #9
0
def convert_fiducial(filename, output_type="csv"):
    '''
    Converts the fiducial comparison HDF5 files into a CSV file.

    Parameters
    ----------
    filename : str
        HDF5 file.
    output_type : str, optional
           Type of file to output.
    '''

    store = HDFStore(filename)
    data_columns = dict()
    for key in store.keys():
        data = store[key].sort(axis=1)
        mean_data = data.mean(axis=1)
        data_columns[key[1:]] = mean_data
    store.close()

    df = DataFrame(data_columns)

    output_name = "".join(filename.split(".")[:-1]) + "." + output_type

    df.to_csv(output_name)
Beispiel #10
0
class HDFStorePanel(BaseIO):

    goal_time = 0.2

    def setup(self):
        self.fname = '__test__.h5'
        with warnings.catch_warnings(record=True):
            self.p = Panel(np.random.randn(20, 1000, 25),
                           items=['Item%03d' % i for i in range(20)],
                           major_axis=date_range('1/1/2000', periods=1000),
                           minor_axis=['E%03d' % i for i in range(25)])
            self.store = HDFStore(self.fname)
            self.store.append('p1', self.p)

    def teardown(self):
        self.store.close()
        self.remove(self.fname)

    def time_read_store_table_panel(self):
        with warnings.catch_warnings(record=True):
            self.store.select('p1')

    def time_write_store_table_panel(self):
        with warnings.catch_warnings(record=True):
            self.store.append('p2', self.p)
Beispiel #11
0
def init_h5_database(database_name, meta_data, overwrite=False):
    """Initialize a h5 file for storing EEMs using a pandas DataFrame containing EEM meta data 
    
    Args:
        database_name (str): filename and relative path for h5 database
        meta_data (pandas DataFrame): DataFrame containing eem meta data from `pyeem.load_eem_meta_data` 
        function or created manually - see pyeem.load_eem_meta_data for required columns.  NOTE: do not use
        spaces or decimals in column names as this causes a warning when saving to H5 file format
        
    Returns:
        no retun - data is saved as h5 and may be loaded using `pyeem.load_eem_data`
    """
    from pandas import HDFStore

    # check if h5 file exists and overwrite or warn
    if os.path.isfile(database_name):
        if overwrite is True:
            print('overwriting ' + database_name)
            os.remove(database_name)
        else:
            raise ValueError(
                "h5 file " + database_name +
                " exists. Choose new database name or set overwrite=True")

    # create a h5 file to store EEM meta data
    hdf = HDFStore(database_name)
    hdf.put('meta', meta_data, format='table', data_columns=True)
    hdf.close()
    return
Beispiel #12
0
def test_chunk():
    print "debut"
    writer = None
    years = range(2011,2012)
    filename = destination_dir+'output3.h5'
    store = HDFStore(filename)
    for year in years:
        yr = str(year)
#        fname = "Agg_%s.%s" %(str(yr), "xls")
        simu = SurveySimulation()
        simu.set_config(year = yr)
        simu.set_param()
        import time

        tps = {}
        for nb_chunk in range(1,5):
            deb_chunk = time.clock()
            simu.set_config(survey_filename='C:\\Til\\output\\to_run_leg.h5', num_table=3, chunks_count=nb_chunk ,
                            print_missing=False)
            simu.compute()
            tps[nb_chunk] = time.clock() - deb_chunk

            voir = simu.output_table.table3['foy']
            print len(voir)
            pdb.set_trace()
            agg3 = Aggregates()
            agg3.set_simulation(simu)
            agg3.compute()
            df1 = agg3.aggr_frame
            print df1.to_string()

    print tps
    store.close()
Beispiel #13
0
 def remove(self, path):
     s = HDFStore(self.path)
     if path in s:
         print("removing %s" % path)
         s.remove(path)
         s.flush(fsync=True)
     s.close()
Beispiel #14
0
 def get(self, path):
     s = HDFStore(self.path)
     d = None
     if path in s:
         d = s[path]
     s.close()
     return d
Beispiel #15
0
class engine(Engine):
    """Engine instance for writing data to a HDF5 file."""

    name = "HDF5"
    abbreviation = "hdf5"
    insert_limit = 1000
    required_opts = [
        ("file", "Enter the filename of your HDF5 file", "hdf5.h5"),
        ("table_name", "Format of table name", "{db}_{table}"),
        ("data_dir", "Install directory", DATA_DIR),
    ]

    def create_db(self):
        """Override create_db since an SQLite dataset needs to be created
        first followed by the creation of an empty HDFStore file.
        """
        file_path = os.path.join(self.opts["data_dir"], self.opts["file"])
        self.file = HDFStore(file_path)

    def create_table(self):
        """Don't create table for HDF5

        HDF5 doesn't create tables. Each database is a file which has been
        created. This overloads`create_table` to do nothing in this case.
        """
        return None

    def insert_data_from_file(self, filename):
        """Fill the table by fetching the dataframe from the
        SQLite engine and putting it into the HDFStore file.
        """
        table_name = self.table_name()
        df = self.fetch_table(table_name)
        self.file.put(table_name, df, data_columns=True)

    def fetch_table(self, table_name):
        """Return a table from sqlite dataset as pandas dataframe."""
        connection = self.get_sqlite_connection()
        sql_query = "SELECT * FROM {};".format(table_name)
        return pd.read_sql_query(sql_query, connection)

    def get_sqlite_connection(self):
        # self.get_input()
        file = self.opts["file"]
        file = (file.split("."))[0] + ".db"
        db_file = self.opts["data_dir"]
        full_path = os.path.join(db_file, file)
        return dbapi.connect(os.path.normpath(full_path))

    def get_connection(self):
        """Gets the db connection."""
        self.get_input()
        return DummyConnection()

    def disconnect(self):
        """Close the file after being written"""
        self.file.close()
        file = self.opts["file"]
        file = (file.split("."))[0] + ".db"
        os.remove(file)
 def func_wrapper(*args, **kwargs):
     temporary_store = HDFStore(file_path)
     try:
         return func(*args, temporary_store=temporary_store, **kwargs)
     finally:
         gc.collect()
         temporary_store.close()
def save_temp(dataframe, name = None, year = None, config_files_directory = default_config_files_directory):
    """
    Save a temporary table

    Parameters
    ----------
    dataframe : pandas DataFrame
                the dataframe to save
    name : string, default None

    year : integer, default None
           year of the data
    """
    if year is None:
        raise Exception("year is needed")
    if name is None:
        raise Exception("name is needed")
    hdf_file_path = get_tmp_file_path(config_files_directory = config_files_directory)
    store = HDFStore(hdf_file_path)
    log.info("{}".format(store))
    store_path = "{}/{}".format(year, name)

    if store_path in store.keys():
        del store["{}/{}".format(year, name)]

    dataframe.to_hdf(hdf_file_path, store_path)

    store.close()
    return True
Beispiel #18
0
 def in_store(self, path):
     s = HDFStore(self.path)
     val = False
     if path in s:
         val = True
     s.close()
     return val
Beispiel #19
0
class PandasHDFHandler(FileHandler):
    r"""
    Handler for HDF5 files using Pandas.
    """
    def _open_for_read(self):
        self.handle = HDFStore(self.fname, mode='r')

    def _open_for_write(self):
        self.handle = HDFStore(self.fname)

    def list_items(self):
        keys = [key.strip('/') for key in self.handle.keys()]
        items = [(key, _get_type_from_attrs(self.handle.get_storer(key).attrs))
                 for key in keys if '/' not in key]
        # ---- for backward compatibility (LArray < 0.33) ----
        # axes
        items += [(key.split('/')[-1], 'Axis_Backward_Comp') for key in keys
                  if '__axes__' in key]
        # groups
        items += [(key.split('/')[-1], 'Group_Backward_Comp') for key in keys
                  if '__groups__' in key]
        return items

    def _read_item(self, key, typename, *args, **kwargs):
        if typename in _supported_typenames:
            hdf_key = '/' + key
        # ---- for backward compatibility (LArray < 0.33) ----
        elif typename == 'Axis_Backward_Comp':
            hdf_key = '__axes__/' + key
        elif typename == 'Group_Backward_Comp':
            hdf_key = '__groups__/' + key
        else:
            raise TypeError()
        return read_hdf(self.handle, hdf_key, *args, **kwargs)

    def _dump_item(self, key, value, *args, **kwargs):
        hdf_key = '/' + key
        if isinstance(value, (Array, Axis)):
            value.to_hdf(self.handle, hdf_key, *args, **kwargs)
        elif isinstance(value, Group):
            hdf_axis_key = '/' + value.axis.name
            value.to_hdf(self.handle, hdf_key, hdf_axis_key, *args, **kwargs)
        elif isinstance(value, _supported_scalars_types):
            s = pd.Series(data=value)
            self.handle.put(hdf_key, s)
            self.handle.get_storer(hdf_key).attrs.type = type(value).__name__
        else:
            raise TypeError()

    def _read_metadata(self):
        metadata = Metadata.from_hdf(self.handle)
        if metadata is None:
            metadata = Metadata()
        return metadata

    def _dump_metadata(self, metadata):
        metadata.to_hdf(self.handle)

    def close(self):
        self.handle.close()
 def func_wrapper(*args, **kwargs):
     temporary_store = HDFStore(file_path)
     try:
         return func(*args, temporary_store = temporary_store, **kwargs)
     finally:
         gc.collect()
         temporary_store.close()
def load_temp(name=None,
              year=None,
              variables=None,
              config_files_directory=default_config_files_directory):
    """
    Load a temporary saved table

    Parameters
    ----------
    name : string, default None

    year : integer, default None
           year of the data
    """
    if year is None:
        raise Exception("year is needed")
    if name is None:
        raise Exception("name is needed")
    hdf_file_path = get_tmp_file_path(
        config_files_directory=config_files_directory)
    print(hdf_file_path)
    store = HDFStore(hdf_file_path)
    dataframe = store["{}/{}".format(year, name)]
    store.close()
    if variables is None:
        return dataframe
    else:
        return dataframe[variables].copy()
Beispiel #22
0
 def _get(self, path):
     s = HDFStore(self.path)
     d = None
     if path in s:
         d = s[path]
     s.close()
     return d
def writeHD5():
    """Write to local store.h5"""
    global Data1

    store = HDFStore('.\store.h5')
    store['listCrisis'] = Data1
    store.close()
def AddSeqComp(mypath):
    """ Loads TestLogAll.h5 from the specified path, then calls 
    MeasurementGroupTools.AddSeqComp to recalculate seq components using FFT  

    Input:  Directory of the measurment campaign, e.g.: "aLabView2"
    Output: Results1.h5, Results1.pdf in the data subdirs.
    """
    from pandas import HDFStore, ExcelWriter
    import MeasurementGroupTools as mgt

    h5logs = HDFStore(mypath + "\\" + 'TestLogsAll.h5')
    TestLog = h5logs['TestLogsAll']

    dirs = TestLog[u'DirName'].unique()
    for dname in dirs:
        mysubdirpath = mypath + "\\" + dname
        print "Processing: " + dname
        mgt.AddSeqComp(mysubdirpath, TestLog, dname)

    h5logs.put('TestLogsAll',TestLog)
    h5logs.close()

    writer = ExcelWriter(mypath + "\\" + 'TestLogsAll.xlsx')
    TestLog.to_excel(writer,'TestLogsAll') # the second argument defines sheet name
    writer.save()

    return
Beispiel #25
0
def storeHdf5(data, tag, path):
    hdf = HDFStore(path,'a')
    if tag in hdf.keys():
        hdf.append(tag,data)
    else:
        hdf.put(tag,data)
    hdf.close()          
Beispiel #26
0
 def in_store(self, path):
     s = HDFStore(self.path)
     val = False
     if path in s:
         val = True
     s.close()
     return val
Beispiel #27
0
def SAVE_ChangeDictOrder(_processedEvents):
    '''Change the nesting order for the final HDF database - insted of correct/attention, it will go attention/present/correct etc'''

    h_path = "/Users/ryszardcetnarski/Desktop/Nencki/TD/HDF/"
    #Replace the '_EVENTS' because the path n HDF must match exactly, otherwise it was not savivng anything, weirdo
    all_event_names = sorted([
        name.replace('_EVENTS', '') for name in events_names
        if bef_aft_dict[bef_aft_switch + '_mat'] in name
    ])

    store = HDFStore(h_path + bef_aft_dict[bef_aft_switch + '_hdf'])

    for _data, recording in zip(_processedEvents, all_event_names):
        print('I')
        sname = recording.rfind("/") + 1
        subId = recording[sname:-4].replace("-", "_")

        store[subId + '/events/attention/correct'] = _data['correct'][
            'attention'].convert_objects()
        store[subId + '/events/motor/correct'] = _data['correct'][
            'motor'].convert_objects()

        store[subId + '/events/attention/incorrect'] = _data['incorrect'][
            'attention'].convert_objects()
        store[subId + '/events/motor/incorrect'] = _data['incorrect'][
            'motor'].convert_objects()

        #print(_data['incorrect']['motor'].convert_objects())

    store.close()
 def put(self, path, obj):
     s = HDFStore(self.path)
     if path in s:
         print "updating %s" % path
         s.remove(path)
     s[path] = obj
     s.close()
def evaluate(model,
             test_hdf_file,
             get_batch,
             loss_function,
             batch_size,
             cuda=False):
    store_test = HDFStore(test_hdf_file)
    test_loss = 0
    accuracy = 0
    count = 0
    model.eval()
    if cuda:
        model = model.cuda()
    test_gen = get_batch(store_test, batch_size)
    for x, target, src_padding, target_padding in test_gen:
        if cuda:
            x, target = x.cuda(), target.cuda()
        out = model(x)
        loss = loss_function(out, target)
        acc = int(
            torch.all(out.argmax(dim=-1) == target, dim=-1).to(
                torch.int).sum()) / out.shape[0]
        test_loss += loss.item()
        accuracy += acc
        count += 1
    test_loss /= count
    accuracy /= count
    print("Test Loss :", test_loss)
    print("Test accuracy :", accuracy)
    store_test.close()
Beispiel #30
0
class HDFStorePanel(BaseIO):

    goal_time = 0.2

    def setup(self):
        self.fname = '__test__.h5'
        with warnings.catch_warnings(record=True):
            self.p = Panel(np.random.randn(20, 1000, 25),
                           items=['Item%03d' % i for i in range(20)],
                           major_axis=date_range('1/1/2000', periods=1000),
                           minor_axis=['E%03d' % i for i in range(25)])
            self.store = HDFStore(self.fname)
            self.store.append('p1', self.p)

    def teardown(self):
        self.store.close()
        self.remove(self.fname)

    def time_read_store_table_panel(self):
        with warnings.catch_warnings(record=True):
            self.store.select('p1')

    def time_write_store_table_panel(self):
        with warnings.catch_warnings(record=True):
            self.store.append('p2', self.p)
Beispiel #31
0
def storeEEGinHDF():
    """Load EEG from 64 electrodes x ~30 min at 500 hz (large dataset)"""
    h_path = "/Users/ryszardcetnarski/Desktop/Nencki/TD/HDF/"

    all_eeg_names = sorted([
        name for name in eeg_names
        if bef_aft_dict[bef_aft_switch + '_mat'].replace("_EVENTS", "") in name
    ])
    store = HDFStore(h_path + bef_aft_dict[bef_aft_switch + '_hdf'])

    #Create a HDF database with a single-precision point (float 32)
    cnt = 0
    for recording in all_eeg_names:
        cnt = cnt + 1
        sname = recording.rfind("/") + 1

        subId = recording[sname:-4].replace("-", "_")

        sig = pd.DataFrame(
            sio.loadmat(recording,
                        struct_as_record=True)['eegToSave']).transpose()
        #Modified here to save  a filtered version from: store[subId + "/signal/f"] =  sig.convert_objects())

        store[subId + "/signal/filtered_30/"] = sig.convert_objects().apply(
            FilterData, axis=0)
        print(cnt)
    store.close()
def show_temp(config_files_directory = default_config_files_directory):

    hdf_file_path = get_tmp_file_path(config_files_directory = config_files_directory)
    store = HDFStore(hdf_file_path)

    log.info("{}".format(store))
    store.close()
def save_temp(dataframe,
              name=None,
              year=None,
              config_files_directory=default_config_files_directory):
    """
    Save a temporary table

    Parameters
    ----------
    dataframe : pandas DataFrame
                the dataframe to save
    name : string, default None

    year : integer, default None
           year of the data
    """
    if year is None:
        raise Exception("year is needed")
    if name is None:
        raise Exception("name is needed")
    hdf_file_path = get_tmp_file_path(
        config_files_directory=config_files_directory)
    store = HDFStore(hdf_file_path)
    log.info("{}".format(store))
    store_path = "{}/{}".format(year, name)

    if store_path in store.keys():
        del store["{}/{}".format(year, name)]

    dataframe.to_hdf(hdf_file_path, store_path)

    store.close()
    return True
Beispiel #34
0
 def remove(self, path):
     s = HDFStore(self.path)
     if path in s:
         print("removing %s" % path)
         s.remove(path)
         s.flush(fsync=True)
     s.close()
Beispiel #35
0
def build_from_openfisca( directory = None):

    df_age_final = None
    for yr in range(2006,2010):
        simulation = SurveySimulation()
        simulation.set_config(year = yr)
        simulation.set_param()
        simulation.set_survey()


        df_age = get_age_structure(simulation)
        df_age[yr] = df_age['wprm']
        del df_age['wprm']
        if df_age_final is None:
            df_age_final = df_age
        else:
            df_age_final = df_age_final.merge(df_age)

    if directory is None:
        directory = os.path.dirname(__file__)

    fname = os.path.join(directory, H5_FILENAME)
    store = HDFStore(fname)
    print df_age_final.dtypes
    store.put("openfisca", df_age_final)
    store.close()
Beispiel #36
0
 def put(self, path, obj):
     s = HDFStore(self.path)
     if path in s:
         print "updating %s" % path
         s.remove(path)
     s[path] = obj
     s.close()
Beispiel #37
0
def build_actualisation_group_names_h5():
    h5_name = "../actualisation_groups.h5"
    store = HDFStore(h5_name)
    xls = ExcelFile('actualisation_groups.xls')
    df = xls.parse('defs', na_values=['NA'])
    store['names'] = df
    print df.to_string()
    store.close()
def build_actualisation_group_names_h5():
    h5_name = "../actualisation_groups.h5"
    store = HDFStore(h5_name)
    xls = ExcelFile('actualisation_groups.xls')
    df = xls.parse('defs', na_values=['NA'])
    store['names'] = df
    print df.to_string()
    store.close()
Beispiel #39
0
 def get_children_paths(self, node_path):
     s = HDFStore(self.path)
     node = s.get_node(node_path)
     children = []
     for child, df in node._v_children.items():
         children.append(df._v_pathname)
     s.close()
     return children
Beispiel #40
0
 def get_children_paths(self, node_path):
     s = HDFStore(self.path)
     node = s.get_node(node_path)
     children = []
     for child, df in node._v_children.items():
         children.append(df._v_pathname)
     s.close()
     return children
def show_temp(config_files_directory=default_config_files_directory):

    hdf_file_path = get_tmp_file_path(
        config_files_directory=config_files_directory)
    store = HDFStore(hdf_file_path)

    log.info("{}".format(store))
    store.close()
Beispiel #42
0
    def save(self, dataFile):
        """ save data to HDF"""
        print 'Saving data to', dataFile
        store = HDFStore(dataFile)
        for symbol in self.wp.items:
            store[symbol] = self.wp[symbol]

        store.close()
 def save(self,dataFile):
     """ save data to HDF"""
     print(('Saving data to', dataFile))
     store = HDFStore(dataFile)
     for symbol in self.wp.items:
         store[symbol] = self.wp[symbol]
         
     store.close()
def convert_to_3_tables(year=2006, survey_file=None, output_file=None):

    if survey_file is None:
        raise Exception(
            'You need a .h5 file with the survey to extract the variables from'
        )
    if output_file is None:
        output_file = survey_file
        raise Warning(
            'the survey file will be used to store the created tables')

    store = HDFStore(survey_file)
    output = HDFStore(output_file)
    print output

    simulation = SurveySimulation()
    simulation.set_config(year=year)
    table1 = store['survey_' + str(year)]

    for entity in ['ind', 'foy', 'men', 'fam']:
        key = 'survey_' + str(year) + '/' + str(entity)

        vars_matching_entity = vars_matching_entity_from_table(
            table1, simulation, entity)
        print entity, vars_matching_entity_from_table
        print 'table1 enum'

        if entity == 'ind':
            print 'INDIVIDUALS'
            print table1['noindiv']
            table_entity = table1.loc[:, vars_matching_entity]

        # we take care have all ident and selecting qui==0
        else:
            #             print '    entity :', entity
            #             print table1['noindiv'].head()
            position = 'qui' + entity
            #             print table1[position]
            table_entity = table1.ix[table1[position] == 0, [
                'noi', 'idmen', 'idfoy', 'idfam', 'quifoy', 'quimen', 'quifam'
            ] + vars_matching_entity]
            #             print table_entity.noi.head()
            table_entity = table_entity.rename_axis(table_entity['id' +
                                                                 entity],
                                                    axis=1)


#             print '    APRES'
#             print table_entity.noi.head()
        print key
        output.put(key, table_entity)

    del table1
    import gc
    gc.collect()

    store.close()
    output.close()
Beispiel #45
0
class PandasHDFHandler(FileHandler):
    """
    Handler for HDF5 files using Pandas.
    """
    def _open_for_read(self):
        self.handle = HDFStore(self.fname, mode='r')

    def _open_for_write(self):
        self.handle = HDFStore(self.fname)

    def list_items(self):
        keys = [key.strip('/') for key in self.handle.keys()]
        # axes
        items = [(key.split('/')[-1], 'Axis') for key in keys if '__axes__' in key]
        # groups
        items += [(key.split('/')[-1], 'Group') for key in keys if '__groups__' in key]
        # arrays
        items += [(key, 'Array') for key in keys if '/' not in key]
        return items

    def _read_item(self, key, type, *args, **kwargs):
        if type == 'Array':
            hdf_key = '/' + key
        elif type == 'Axis':
            hdf_key = '__axes__/' + key
            kwargs['name'] = key
        elif type == 'Group':
            hdf_key = '__groups__/' + key
            kwargs['name'] = key
        else:
            raise TypeError()
        return key, read_hdf(self.handle, hdf_key, *args, **kwargs)

    def _dump_item(self, key, value, *args, **kwargs):
        if isinstance(value, LArray):
            hdf_key = '/' + key
            value.to_hdf(self.handle, hdf_key, *args, **kwargs)
        elif isinstance(value, Axis):
            hdf_key = '__axes__/' + key
            value.to_hdf(self.handle, hdf_key, *args, **kwargs)
        elif isinstance(value, Group):
            hdf_key = '__groups__/' + key
            hdf_axis_key = '__axes__/' + value.axis.name
            value.to_hdf(self.handle, hdf_key, hdf_axis_key, *args, **kwargs)
        else:
            raise TypeError()

    def _read_metadata(self):
        metadata = Metadata.from_hdf(self.handle)
        if metadata is None:
            metadata = Metadata()
        return metadata

    def _dump_metadata(self, metadata):
        metadata.to_hdf(self.handle)

    def close(self):
        self.handle.close()
Beispiel #46
0
def test_read_nokey_empty(setup_path):
    with ensure_clean_path(setup_path) as path:
        store = HDFStore(path)
        store.close()
        msg = re.escape(
            "Dataset(s) incompatible with Pandas data types, not table, or no "
            "datasets found in HDF5 file.")
        with pytest.raises(ValueError, match=msg):
            read_hdf(path)
Beispiel #47
0
def write_file(format):
    outfile = '../inst/exampledata/pytables_' + format + '.h5'
    
    if os.path.isfile(outfile):
        os.remove(outfile)
    
    hdf = HDFStore(outfile)
    hdf.put('mydata', df, format=format, data_columns=True, encoding="utf-8")
    hdf.close()
Beispiel #48
0
def run_convert(basedir, beam):

    print "Converting for:", beam

    infile = basedir + "/lmon_p"+str(beam)+".root"
    outfile = basedir + "/HCal_p"+str(beam)+".h5"

    #lmon input
    inp = TFile.Open(infile)
    tree = inp.Get("DetectorTree")

    #load the tree
    ucal_edep_EMC = rt.EntryD()
    ucal_edep_HAC1 = rt.EntryD()
    ucal_edep_HAC2 = rt.EntryD()
    ucal_edep_layers = std.vector(float)()
    tree.SetBranchAddress("ucal_edep_EMC", AddressOf(ucal_edep_EMC, "v"))
    tree.SetBranchAddress("ucal_edep_HAC1", AddressOf(ucal_edep_HAC1, "v"))
    tree.SetBranchAddress("ucal_edep_HAC2", AddressOf(ucal_edep_HAC2, "v"))
    tree.SetBranchAddress("ucal_edep_layers", ucal_edep_layers)

    tree.GetEntry(0)
    nlay = ucal_edep_layers.size()

    #output DataFrame
    col = ["ucal_edep_EMC", "ucal_edep_HAC1", "ucal_edep_HAC2"]
    for i in range(nlay):
        col.append( "ucal_edep_layer"+str(i) )

    df_inp = []

    #event loop
    for iev in xrange(tree.GetEntriesFast()):

        tree.GetEntry(iev)

        lin = []
        lin.append(ucal_edep_EMC.v)
        lin.append(ucal_edep_HAC1.v)
        lin.append(ucal_edep_HAC2.v)

        for i in xrange(nlay):

            lin.append(ucal_edep_layers.at(i))

        df_inp.append(lin)

    df = DataFrame(df_inp, columns=col)

    print df

    out = HDFStore(outfile)
    out["hcal"] = df
    out.close()

    inp.Close()
 def load(self,dataFile):
     """load data from HDF"""
     if os.path.exists(dataFile):
         store = HDFStore(dataFile)
         symbols = [str(s).strip('/') for s in list(store.keys()) ]   
         data = dict(list(zip(symbols,[store[symbol] for symbol in symbols])))
         self.wp = Panel(data)
         store.close()
     else:
         raise IOError('Data file does not exist')
 def load(self,dataFile):
     """load data from HDF"""
     if os.path.exists(dataFile):
         store = HDFStore(dataFile)
         symbols = store.keys()    
         data = dict(zip(symbols,[store[symbol] for symbol in symbols]))
         self.wp = WidePanel(data)
         store.close()
     else:
         raise IOError('Data file does not exist')
Beispiel #51
0
 def _put(self, path, obj):
     s = HDFStore(self.path)
     if path in s:
         print("updating %s" % path)
         s.remove(path)
         s.close()
     s = HDFStore(self.path)
     s[path] = obj
     s.flush(fsync=True)
     s.close()
Beispiel #52
0
def anls():
	store = HDFStore('hdf5/divvy.h5')
	pd = store['divvy']
	store.close()
	df = reduce(lambda x,y: x.append(y),[pd[i] for i in pd.items])
	df.index = df.timestamp
	foo = map(lambda x: x[1],df.groupby('id'))
	for i in range(len(foo)): foo[i]['diff'] = foo[i].availableBikes.diff()
	for i in range(len(foo)): foo[i]['diff'].hist(range=[-5,5],bins=20)
	plt.show()
Beispiel #53
0
def load_df(path, default=None):
    """Load DataFrame for HDF5 store path '\logs' table"""
    try:
        store = HDFStore(path)
        print store.keys()
        df = store.get('logs')
        store.close()
        return df
    except:
        return default
Beispiel #54
0
 def load(self, dataFile):
     """load data from HDF"""
     if os.path.exists(dataFile):
         store = HDFStore(dataFile)
         symbols = [str(s).strip('/') for s in store.keys()]
         data = dict(zip(symbols, [store[symbol] for symbol in symbols]))
         self.wp = WidePanel(data)
         store.close()
     else:
         raise IOError('Data file does not exist')
Beispiel #55
0
 def test_store(self):    
     final_store = HDFStore(self.store_path)
     print '----'
     print final_store.keys()
     print '-' * 80
     logs = final_store['/logs']
     print type(logs)
     print len(logs)
     print logs.columns
     final_store.close()
Beispiel #56
0
 def _put(self, path, obj):
     s = HDFStore(self.path)
     if path in s:
         print("updating %s" % path)
         s.remove(path)
         s.close()
     s = HDFStore(self.path)
     s[path] = obj
     s.flush(fsync=True)
     s.close()
Beispiel #57
0
    def store_results(self, result, index, columns, hdf5_file):
        self.df = DataFrame(result, columns=columns)
        self.df = self.df.set_index(index)
        self.df.sort_index(inplace=True)

        # Store the DataFrame as an HDF5 file...
        hdf = HDFStore(hdf5_file)
        # Append the dataframe, and ensure addr / host can be 17 chars long
        hdf.append('df', self.df, data_columns=list(columns), 
            min_itemsize={'addr': 17, 'host': 17})
        hdf.close()
def convert_fiducial(filename, output_type="csv", decimal_places=8,
                     append_comp=True, num_fids=5, return_name=True,
                     mode='mean', **kwargs):
    '''
    Converts the fiducial comparison HDF5 files into a CSV file.

    Parameters
    ----------
    filename : str
        HDF5 file.
    output_type : str, optional
           Type of file to output.
    decimal_places : int, optional
        Specify the number of decimal places to keep.
    append_comp : bool, optional
        Append on columns with fiducial numbers copy
    num_fids : int, optional
        Number of fiducials compared.
    '''

    store = HDFStore(filename)
    data_columns = dict()
    for key in store.keys():
        data = store[key].sort(axis=1)
        mean_data = timestep_choose(data, mode=mode, **kwargs)
        data_columns[key[1:]] = trunc_float(mean_data, decimal_places)
        comp_fids = store[key].index
    store.close()

    df = DataFrame(data_columns)

    if append_comp:
        fids = []
        for fid, num in zip(np.arange(0, num_fids - 1),
                            np.arange(num_fids - 1, 0, -1)):
            for _ in range(num):
                fids.append(fid)

        df["Fiducial 1"] = Series(np.asarray(fids).T, index=df.index)
        df["Fiducial 2"] = Series(comp_fids.T, index=df.index)

    for comp in all_comparisons:
        if comp in filename:
            break
    else:
        raise StandardError("Could not find a face comparison match for " +
                            filename)

    output_name = "fiducials" + comp[:-1] + "." + output_type

    df.to_csv(output_name)

    if return_name:
        return output_name