Exemple #1
0
def append_store_mod( module, path_store, n_days_refresh=None, b_ptrk=False ):
    """ append all new rows in module.field to store. Resize store as appropriate. """ 
    store = HDFStore( path_store )
    for field in module.__dict__.keys():
        if ( type( getattr( module, field ) ) is DataFrame or type( getattr( module, field ) ) is Panel ) and "/{}".format( field ) in store.keys():
            if "tdate" in field:
                getattr( module, field ).to_hdf( path_store, field, mode='a', format='fixed' )
            else:
                solbasic.logger.info( "Working on {}...".format( field ) )
                df = store[ field ].copy()
                df_new = getattr( module, field ).copy()
                if n_days_refresh == None:
                    l_index = sorted( list( set( df_new.index ) - set( df.index ) ) )
                else:
                    l_index = sorted( list( df_new.index[ -n_days_refresh: ] ) )
                l_columns = sorted( list( set( df_new.columns ) - set( df.columns ) ) )
                l_columns_rev = sorted( list( set( df.columns ) - set( df_new.columns ) ) )
                if l_columns:
                    solbasic.logger.info( "Adding {} instruments: {}".format( len( l_columns ), l_columns ) )
                    for col in l_columns:
                        df[ col ] = np.nan
                if l_columns_rev:
                    for col in l_columns_rev:
                        df_new[ col ] = df[ col ]
                if l_index:
                    solbasic.logger.info( "Refreshing {} dates: {}".format( len( l_index ), l_index ) )
                    for ind in l_index:
                        df.ix[ ind ] = df_new.ix[ ind ]
                    df.to_hdf( path_store, field, mode='a', format='fixed' )
    store.close()
    if b_ptrk:
        ptrk_store( path_store )
Exemple #2
0
    def build_actualisation_groups(self, filename = None):
        '''
        Builds actualisation groups
        '''
        if filename is None:
            data_dir = CONF.get('paths', 'data_dir')
            fname = "actualisation_groups.h5"
            filename = os.path.join(data_dir, fname)

        store = HDFStore(filename)
        df = store['vars']
        coeff_list = sorted(unique(df['coeff'].dropna()))

        vars = dict()
        for coeff in coeff_list:
            vars[coeff] = list(df[ df['coeff']==coeff ]['var'])

        self.actualisation_vars = vars
        self.coeffs_df = store['names']
        self.coeffs_df['coeff'] = self.coeffs_df['coeff'].str.replace(' ','') # remove spaces



        yr = 1*self.survey_year
        self.coeffs_df['value'] = 1
        while yr < self.datesim_year:
            if yr in self.coeffs_df.columns:
                factor = self.coeffs_df[yr]
            else:
                factor =    1
            self.coeffs_df['value'] = self.coeffs_df['value']*factor
            yr += 1

        self.coeffs_df.set_index(['coeff'], inplace = True)
        store.close()
Exemple #3
0
    def run(self, fname, *args, **kwargs):
        while True:
            try:
                self._flock = os.open(self._lock,
                                      os.O_CREAT | os.O_EXCL | os.O_WRONLY)
                log.info("SafeHDF:%s lock:%s" % (self._lock, self._flock))
                break
            # except FileExistsError:
#            except FileExistsError as e:
# except (IOError, EOFError, Exception) as e:
            except (IOError, OSError) as e:
                # time.sleep(probe_interval)
                log.error("IOError Error:%s" % (e))
                if self.countlock <= 10:
                    time.sleep(random.randint(1, 3))
                    # time.sleep(random.randint(0,5))
                    self.countlock += 1
                else:
                    os.remove(self._lock)
                    # time.sleep(random.randint(15, 30))
                    log.error("count10 remove lock")


#            except (Exception) as e:
#                print ("Exception Error:%s"%(e))
#                log.info("safeHDF Except:%s"%(e))
#                time.sleep(probe_interval)
#                return None
# HDFStore.__init__(self, fname, *args, **kwargs)
        HDFStore.__init__(self, fname, *args, **kwargs)
def load_temp(name=None,
              year=None,
              variables=None,
              config_files_directory=default_config_files_directory):
    """
    Load a temporary saved table

    Parameters
    ----------
    name : string, default None

    year : integer, default None
           year of the data
    """
    if year is None:
        raise Exception("year is needed")
    if name is None:
        raise Exception("name is needed")
    hdf_file_path = get_tmp_file_path(
        config_files_directory=config_files_directory)
    print(hdf_file_path)
    store = HDFStore(hdf_file_path)
    dataframe = store["{}/{}".format(year, name)]
    store.close()
    if variables is None:
        return dataframe
    else:
        return dataframe[variables].copy()
Exemple #5
0
class engine(Engine):
    """Engine instance for writing data to a HDF5 file."""

    name = "HDF5"
    abbreviation = "hdf5"
    insert_limit = 1000
    required_opts = [
        ("file", "Enter the filename of your HDF5 file", "hdf5.h5"),
        ("table_name", "Format of table name", "{db}_{table}"),
        ("data_dir", "Install directory", DATA_DIR),
    ]

    def create_db(self):
        """Override create_db since an SQLite dataset needs to be created
        first followed by the creation of an empty HDFStore file.
        """
        file_path = os.path.join(self.opts["data_dir"], self.opts["file"])
        self.file = HDFStore(file_path)

    def create_table(self):
        """Don't create table for HDF5

        HDF5 doesn't create tables. Each database is a file which has been
        created. This overloads`create_table` to do nothing in this case.
        """
        return None

    def insert_data_from_file(self, filename):
        """Fill the table by fetching the dataframe from the
        SQLite engine and putting it into the HDFStore file.
        """
        table_name = self.table_name()
        df = self.fetch_table(table_name)
        self.file.put(table_name, df, data_columns=True)

    def fetch_table(self, table_name):
        """Return a table from sqlite dataset as pandas dataframe."""
        connection = self.get_sqlite_connection()
        sql_query = "SELECT * FROM {};".format(table_name)
        return pd.read_sql_query(sql_query, connection)

    def get_sqlite_connection(self):
        # self.get_input()
        file = self.opts["file"]
        file = (file.split("."))[0] + ".db"
        db_file = self.opts["data_dir"]
        full_path = os.path.join(db_file, file)
        return dbapi.connect(os.path.normpath(full_path))

    def get_connection(self):
        """Gets the db connection."""
        self.get_input()
        return DummyConnection()

    def disconnect(self):
        """Close the file after being written"""
        self.file.close()
        file = self.opts["file"]
        file = (file.split("."))[0] + ".db"
        os.remove(file)
 def get(self, path):
     s = HDFStore(self.path)
     d = None
     if path in s:
         d = s[path]
     s.close()
     return d
def save_temp(dataframe, name = None, year = None, config_files_directory = default_config_files_directory):
    """
    Save a temporary table

    Parameters
    ----------
    dataframe : pandas DataFrame
                the dataframe to save
    name : string, default None

    year : integer, default None
           year of the data
    """
    if year is None:
        raise Exception("year is needed")
    if name is None:
        raise Exception("name is needed")
    hdf_file_path = get_tmp_file_path(config_files_directory = config_files_directory)
    store = HDFStore(hdf_file_path)
    log.info("{}".format(store))
    store_path = "{}/{}".format(year, name)

    if store_path in store.keys():
        del store["{}/{}".format(year, name)]

    dataframe.to_hdf(hdf_file_path, store_path)

    store.close()
    return True
 def func_wrapper(*args, **kwargs):
     temporary_store = HDFStore(file_path)
     try:
         return func(*args, temporary_store=temporary_store, **kwargs)
     finally:
         gc.collect()
         temporary_store.close()
def writeHD5():
    """Write to local store.h5"""
    global Data1

    store = HDFStore('.\store.h5')
    store['listCrisis'] = Data1
    store.close()
def show_temp(config_files_directory = default_config_files_directory):

    hdf_file_path = get_tmp_file_path(config_files_directory = config_files_directory)
    store = HDFStore(hdf_file_path)

    log.info("{}".format(store))
    store.close()
Exemple #11
0
def mix_models(output: pd.HDFStore, result_file):
    # +-
    ##
    glm_preds = output.get('test/glm')

    xgb_preds = output.get('test/xgb')

    assert glm_preds.shape[1] == xgb_preds.shape[1]

    if glm_preds.shape[0] != xgb_preds.shape[0]:
        logger.warning(
            'glm and xgb predictions in {0!r} have different lengths: {1}, {2}'
            .format(result_file, glm_preds.shape[0], xgb_preds.shape[0]))

    ##
    joined = pd.merge(glm_preds, xgb_preds, how='inner', on='Id')
    joined['Sales'] = 0.985 * (joined['PredictedSales_x'] +
                               joined['PredictedSales_y']) / 2
    assert joined.shape[0] == glm_preds.shape[0]

    joined = joined[['Id', 'Sales']]

    ##
    joined.to_csv(result_file, index=False)

    return joined
Exemple #12
0
def download():
    """ Convenience method that downloads all the weather data required
    for the machine learning examples.
    """
    reader = GSODDataReader()
    year_list = range(2001, 2012)
    austin = reader.collect_data(year_list, exact_station=True,
        station_name='AUSTIN CAMP MABRY', state='TX', country='US')
    houston = reader.collect_data(year_list, exact_station=True,
        station_name='HOUSTON/D.W. HOOKS', state='TX', country='US')
    new_york = reader.collect_data(year_list, exact_station=True,
        station_name='NEW YORK/LA GUARDIA', state='NY', country='US')
    newark = reader.collect_data(year_list, exact_station=True,
        station_name='NEWARK INTL AIRPORT', state='NJ', country='US')
    punta_arenas = reader.collect_data(year_list, exact_station=True,
        station_name='PUNTA ARENAS', country='CH')
    wellington = reader.collect_data(year_list, exact_station=True,
        station_name='WELLINGTON AIRPORT', country='NZ')
    store = HDFStore('weather.h5')
    store['austin'] = austin
    store['houston'] = houston
    store['nyc'] = new_york
    store['newark'] = newark
    store['punta_arenas'] = punta_arenas
    store['wellington'] = wellington
    store.close()
 def _get(self, path):
     s = HDFStore(self.path)
     d = None
     if path in s:
         d = s[path]
     s.close()
     return d
def convert_fiducial(filename, output_type="csv"):
    '''
    Converts the fiducial comparison HDF5 files into a CSV file.

    Parameters
    ----------
    filename : str
        HDF5 file.
    output_type : str, optional
           Type of file to output.
    '''

    store = HDFStore(filename)
    data_columns = dict()
    for key in store.keys():
        data = store[key].sort(axis=1)
        mean_data = data.mean(axis=1)
        data_columns[key[1:]] = mean_data
    store.close()

    df = DataFrame(data_columns)

    output_name = "".join(filename.split(".")[:-1]) + "." + output_type

    df.to_csv(output_name)
Exemple #15
0
    def save_simulation(self, filename, attribute_list = ['cohorts', 'aggregate_pv', 'percapita_pv', 
                        'cohorts_alt', 'aggregate_pv_alt', 'percapita_pv_alt'], has_alt = False):
        """
        Saves the output dataframe under default directory in an HDF store.
        Warning : will override .h5 file if already existant !
        Warning : the data is saved as a dataframe, one has to recreate the Cohort when reading.

        Parameters
        ----------
        name : the name of the table inside the store
        filename : the name of the .h5 file where the table is stored. Created if not existant. 
        """
        # Creating the filepath :
        ERF_HDF5_DATA_DIR = os.path.join(SRC_PATH,'countries',self.country,'sources','Output_folder/')
        store = HDFStore(os.path.join(os.path.dirname(ERF_HDF5_DATA_DIR),filename+'.h5'))
        
        #Looping over simulation's attributes, saving only the one who are matching the list 
        # AND aren't empty
        from pandas import DataFrame

        for attrib, value in self.__dict__.iteritems():
            if attrib in attribute_list and value is not None:
                
                #Transforming the data within a cohort in a dataframe so HDFStore can handle it :
                record = DataFrame(index=value.index)
                for col in value.columns:
                    record[col] = value[col]
                print 'saving'
                store[attrib] = record
            else:
                print 'ignored'
        print store
        store.close()
Exemple #16
0
def main():
    # the loaded data is a DataFrame
    genedata = load_gene_dataset()
    
    # randomly split the dataset to three folds
    # this code should be improved in the future
    kfold = 3.0
    data_kfold = {}
    train, fold1 = train_test_split(genedata, test_size=1/kfold)
    data_kfold['fold1'] = fold1
    fold3, fold2 = train_test_split(train, test_size=0.5)
    data_kfold['fold2'] = fold2
    data_kfold['fold3'] = fold3
    
    # now we want to train a network for each fold
    # store the results in h5 file
    geneStore = HDFStore('predGeneExp1.h5')
    for i, key in enumerate(data_kfold):
        print(key)
        test_data = data_kfold[key]
        X_val, y_val = get_input_output(test_data)
        keys = data_kfold.keys()
        keys.remove(key)
        training_data = pd.concat([data_kfold[keys[0]],data_kfold[keys[1]]])
        X_train, y_train = get_input_output(training_data)
        print(keys)
        # use the these data to train the network
        main_training(key, X_train, y_train, X_val, y_val, geneStore)
   
    # the h5 must be closed after using
    geneStore.close()
 def in_store(self, path):
     s = HDFStore(self.path)
     val = False
     if path in s:
         val = True
     s.close()
     return val
 def in_store(self, path):
     s = HDFStore(self.path)
     val = False
     if path in s:
         val = True
     s.close()
     return val
def AddSeqComp(mypath):
    """ Loads TestLogAll.h5 from the specified path, then calls 
    MeasurementGroupTools.AddSeqComp to recalculate seq components using FFT  

    Input:  Directory of the measurment campaign, e.g.: "aLabView2"
    Output: Results1.h5, Results1.pdf in the data subdirs.
    """
    from pandas import HDFStore, ExcelWriter
    import MeasurementGroupTools as mgt

    h5logs = HDFStore(mypath + "\\" + 'TestLogsAll.h5')
    TestLog = h5logs['TestLogsAll']

    dirs = TestLog[u'DirName'].unique()
    for dname in dirs:
        mysubdirpath = mypath + "\\" + dname
        print "Processing: " + dname
        mgt.AddSeqComp(mysubdirpath, TestLog, dname)

    h5logs.put('TestLogsAll',TestLog)
    h5logs.close()

    writer = ExcelWriter(mypath + "\\" + 'TestLogsAll.xlsx')
    TestLog.to_excel(writer,'TestLogsAll') # the second argument defines sheet name
    writer.save()

    return
Exemple #20
0
def create_store(sub):
    hdf = HDFStore('all.h5')
    d = DataFrame(columns=[
        'SUB', 'SEED', 'SEED ROI', 'TARGET ROI', 'HEMISPHERE', 'DISTANCE',
        'STRENGTH', 'CAT1', 'CAT2', 'CAT3'
    ])
    for i in range(1, 181):
        LSfname = '../' + sub + '/out/L' + str(
            i) + '/matrix_seeds_to_all_targets'
        LDfname = '../' + sub + '/out/L' + str(
            i) + '/matrix_seeds_to_all_targets_lengths'
        RSfname = '../' + sub + '/out/R' + str(
            i) + '/matrix_seeds_to_all_targets'
        RDfname = '../' + sub + '/out/R' + str(
            i) + '/matrix_seeds_to_all_targets_lengths'
        ls = readS2R(LSfname)
        rs = readS2R(RSfname)
        ld = readS2R_L(LDfname)
        rd = readS2R_L(RDfname)
        numSeeds, numROIs = ls.shape
        for j in tqdm(range(numSeeds), total=numSeeds):
            for q in range(numROIs):
                tmp = Series([
                    sub, j + 1, i + 1, q + 1, 'L', ld[j, q], ls[j, q], '', '',
                    ''
                ])
                d = d.append(tmp, ignore_index=True)
        # numSeeds ,numROIs = rs.shape
        # for j in range(numSeeds):
        #     for q in range(numROIs):
        #         tmp = Series([sub,j+1,i+1,q+1,'R',rd[j,q],rs[j,q],'','',''])
        #         d = d.append(tmp,ignore_index=True)
        if i == 1: break
    hdf.put(sub, d)
Exemple #21
0
def test_chunk():
    print "debut"
    writer = None
    years = range(2011, 2012)
    filename = destination_dir + 'output3.h5'
    store = HDFStore(filename)
    for year in years:
        yr = str(year)
        #        fname = "Agg_%s.%s" %(str(yr), "xls")
        simu = SurveySimulation()
        simu.set_config(year=yr)
        simu.set_param()
        import time

        tps = {}
        for nb_chunk in range(1, 5):
            deb_chunk = time.clock()
            simu.set_config(survey_filename='C:\\Til\\output\\to_run_leg.h5',
                            num_table=3,
                            chunks_count=nb_chunk,
                            print_missing=False)
            simu.compute()
            tps[nb_chunk] = time.clock() - deb_chunk

            voir = simu.output_table.table3['foy']
            print len(voir)
            pdb.set_trace()
            agg3 = Aggregates()
            agg3.set_simulation(simu)
            agg3.compute()
            df1 = agg3.aggr_frame
            print df1.to_string()

    print tps
    store.close()
Exemple #22
0
def compute_and_save_hist_as_pd(values     : np.array           ,
                                out_file   : pd.HDFStore        ,
                                hist_name  : str                ,
                                n_bins     : int                ,
                                range_hist : Tuple[float, float],
                                norm       : bool = False       )->None:
    """
    Computes 1d-histogram and saves it in a file.
    The name of the table inside the file must be provided.
    Parameters
    ----------
    values : np.array
        Array with values to be plotted.
    out_file: pd.HDFStore
        File where histogram will be saved.
    hist_name: string
        Name of the pd.Dataframe to contain the histogram.
    n_bins: int
        Number of bins to make the histogram.
    range_hist: length-2 tuple (optional)
        Range of the histogram.
    norm: bool
        If True, histogram will be normalized.
    """
    n, b = np.histogram(values, bins = n_bins,
                        range = range_hist,
                        density = norm)
    table = pd.DataFrame({'entries': n,
                          'magnitude': shift_to_bin_centers(b)})
    out_file.put(hist_name, table, format='table', data_columns=True)

    return
def save_temp(dataframe,
              name=None,
              year=None,
              config_files_directory=default_config_files_directory):
    """
    Save a temporary table

    Parameters
    ----------
    dataframe : pandas DataFrame
                the dataframe to save
    name : string, default None

    year : integer, default None
           year of the data
    """
    if year is None:
        raise Exception("year is needed")
    if name is None:
        raise Exception("name is needed")
    hdf_file_path = get_tmp_file_path(
        config_files_directory=config_files_directory)
    store = HDFStore(hdf_file_path)
    log.info("{}".format(store))
    store_path = "{}/{}".format(year, name)

    if store_path in store.keys():
        del store["{}/{}".format(year, name)]

    dataframe.to_hdf(hdf_file_path, store_path)

    store.close()
    return True
Exemple #24
0
def build_from_openfisca( directory = None):

    df_age_final = None
    for yr in range(2006,2010):
        simulation = SurveySimulation()
        simulation.set_config(year = yr)
        simulation.set_param()
        simulation.set_survey()


        df_age = get_age_structure(simulation)
        df_age[yr] = df_age['wprm']
        del df_age['wprm']
        if df_age_final is None:
            df_age_final = df_age
        else:
            df_age_final = df_age_final.merge(df_age)

    if directory is None:
        directory = os.path.dirname(__file__)

    fname = os.path.join(directory, H5_FILENAME)
    store = HDFStore(fname)
    print df_age_final.dtypes
    store.put("openfisca", df_age_final)
    store.close()
 def put(self, path, obj):
     s = HDFStore(self.path)
     if path in s:
         print "updating %s" % path
         s.remove(path)
     s[path] = obj
     s.close()
Exemple #26
0
def test_chunk():
    print "debut"
    writer = None
    years = range(2011,2012)
    filename = destination_dir+'output3.h5'
    store = HDFStore(filename)
    for year in years:
        yr = str(year)
#        fname = "Agg_%s.%s" %(str(yr), "xls")
        simu = SurveySimulation()
        simu.set_config(year = yr)
        simu.set_param()
        import time

        tps = {}
        for nb_chunk in range(1,5):
            deb_chunk = time.clock()
            simu.set_config(survey_filename='C:\\Til\\output\\to_run_leg.h5', num_table=3, chunks_count=nb_chunk ,
                            print_missing=False)
            simu.compute()
            tps[nb_chunk] = time.clock() - deb_chunk

            voir = simu.output_table.table3['foy']
            print len(voir)
            pdb.set_trace()
            agg3 = Aggregates()
            agg3.set_simulation(simu)
            agg3.compute()
            df1 = agg3.aggr_frame
            print df1.to_string()

    print tps
    store.close()
Exemple #27
0
def test():

    directory = os.path.dirname(__file__)
    fname = os.path.join(directory, H5_FILENAME)
    store = HDFStore(fname)
    print store
    print store.keys()
Exemple #28
0
def SAVE_ChangeDictOrder(_processedEvents):
    '''Change the nesting order for the final HDF database - insted of correct/attention, it will go attention/present/correct etc'''


    h_path = "/Users/ryszardcetnarski/Desktop/Nencki/TD/HDF/"
    #Replace the '_EVENTS' because the path n HDF must match exactly, otherwise it was not savivng anything, weirdo
    all_event_names = sorted([name.replace('_EVENTS', '') for name in events_names if bef_aft_dict[bef_aft_switch + '_mat'] in name])

    store = HDFStore(h_path +bef_aft_dict[bef_aft_switch+ '_hdf'])

    for _data, recording in zip(_processedEvents, all_event_names):
        print('I')
        sname = recording.rfind("/") +1
        subId = recording[sname:-4].replace("-", "_")

        store[subId + '/events/attention/correct'] = _data['correct']['attention'].convert_objects()
        store[subId + '/events/motor/correct'] = _data['correct']['motor'].convert_objects()

        store[subId + '/events/attention/incorrect'] = _data['incorrect']['attention'].convert_objects()
        store[subId + '/events/motor/incorrect'] = _data['incorrect']['motor'].convert_objects()

        #print(_data['incorrect']['motor'].convert_objects())



    store.close()
 def func_wrapper(*args, **kwargs):
     temporary_store = HDFStore(file_path)
     try:
         return func(*args, temporary_store = temporary_store, **kwargs)
     finally:
         gc.collect()
         temporary_store.close()
Exemple #30
0
def storeEEGinHDF():
    """Load EEG from 64 electrodes x ~30 min at 500 hz (large dataset)"""
    h_path = "/Users/ryszardcetnarski/Desktop/Nencki/TD/HDF/"

    all_eeg_names = sorted([
        name for name in eeg_names
        if bef_aft_dict[bef_aft_switch + '_mat'].replace("_EVENTS", "") in name
    ])
    store = HDFStore(h_path + bef_aft_dict[bef_aft_switch + '_hdf'])

    #Create a HDF database with a single-precision point (float 32)
    cnt = 0
    for recording in all_eeg_names:
        cnt = cnt + 1
        sname = recording.rfind("/") + 1

        subId = recording[sname:-4].replace("-", "_")

        sig = pd.DataFrame(
            sio.loadmat(recording,
                        struct_as_record=True)['eegToSave']).transpose()
        #Modified here to save  a filtered version from: store[subId + "/signal/f"] =  sig.convert_objects())

        store[subId + "/signal/filtered_30/"] = sig.convert_objects().apply(
            FilterData, axis=0)
        print(cnt)
    store.close()
Exemple #31
0
def SAVE_ChangeDictOrder(_processedEvents):
    '''Change the nesting order for the final HDF database - insted of correct/attention, it will go attention/present/correct etc'''

    h_path = "/Users/ryszardcetnarski/Desktop/Nencki/TD/HDF/"
    #Replace the '_EVENTS' because the path n HDF must match exactly, otherwise it was not savivng anything, weirdo
    all_event_names = sorted([
        name.replace('_EVENTS', '') for name in events_names
        if bef_aft_dict[bef_aft_switch + '_mat'] in name
    ])

    store = HDFStore(h_path + bef_aft_dict[bef_aft_switch + '_hdf'])

    for _data, recording in zip(_processedEvents, all_event_names):
        print('I')
        sname = recording.rfind("/") + 1
        subId = recording[sname:-4].replace("-", "_")

        store[subId + '/events/attention/correct'] = _data['correct'][
            'attention'].convert_objects()
        store[subId + '/events/motor/correct'] = _data['correct'][
            'motor'].convert_objects()

        store[subId + '/events/attention/incorrect'] = _data['incorrect'][
            'attention'].convert_objects()
        store[subId + '/events/motor/incorrect'] = _data['incorrect'][
            'motor'].convert_objects()

        #print(_data['incorrect']['motor'].convert_objects())

    store.close()
Exemple #32
0
    def save(self, store: pandas.HDFStore) -> None:
        """
        Save a model to an open HDFStore.

        Notes:
            Performs an IO operation.

        Args:
            store (pandas.HDFStore)

        Returns:
            None

        """
        # save the config as an attribute
        config = self.get_config()
        store.put('model', pandas.DataFrame())
        store.get_storer('model').attrs.config = config
        # save the parameters
        for i in range(self.num_weights):
            key = os.path.join('weights', 'weights' + str(i))
            self.weights[i].save_params(store, key)
        for i in range(self.num_layers):
            key = os.path.join('layers', 'layers' + str(i))
            self.layers[i].save_params(store, key)
def evaluate(model,
             test_hdf_file,
             get_batch,
             loss_function,
             batch_size,
             cuda=False):
    store_test = HDFStore(test_hdf_file)
    test_loss = 0
    accuracy = 0
    count = 0
    model.eval()
    if cuda:
        model = model.cuda()
    test_gen = get_batch(store_test, batch_size)
    for x, target, src_padding, target_padding in test_gen:
        if cuda:
            x, target = x.cuda(), target.cuda()
        out = model(x)
        loss = loss_function(out, target)
        acc = int(
            torch.all(out.argmax(dim=-1) == target, dim=-1).to(
                torch.int).sum()) / out.shape[0]
        test_loss += loss.item()
        accuracy += acc
        count += 1
    test_loss /= count
    accuracy /= count
    print("Test Loss :", test_loss)
    print("Test accuracy :", accuracy)
    store_test.close()
Exemple #34
0
 def __init__(self,
              path: str,
              table: str,
              compute: Optional[Callable] = None) -> None:
     self.table = table
     if compute:
         self.store = PandasHDFStore(path,
                                     complevel=self.complevel,
                                     complib=self.complib)
         dataframe = compute()
         dataframe.sort_values(by="where", axis=0, inplace=True)
         self._mangle_where(dataframe)
         self.store.put(
             self.table,
             dataframe,
             append=False,
             format="table",
             expectedrows=len(dataframe),
             data_columns=[
                 "where_", "where_type", "who", "who_type", "when",
                 "when_type"
             ],
         )
     else:
         self.store = PandasHDFStore(path,
                                     complevel=self.complevel,
                                     complib=self.complib,
                                     mode="r")
Exemple #35
0
def build_actualisation_group_names_h5():
    h5_name = "../actualisation_groups.h5"
    store = HDFStore(h5_name)
    xls = ExcelFile('actualisation_groups.xls')
    df = xls.parse('defs', na_values=['NA'])
    store['names'] = df
    print df.to_string()
    store.close()
 def save(self,dataFile):
     """ save data to HDF"""
     print(('Saving data to', dataFile))
     store = HDFStore(dataFile)
     for symbol in self.wp.items:
         store[symbol] = self.wp[symbol]
         
     store.close()
Exemple #37
0
    def save(self, dataFile):
        """ save data to HDF"""
        print 'Saving data to', dataFile
        store = HDFStore(dataFile)
        for symbol in self.wp.items:
            store[symbol] = self.wp[symbol]

        store.close()
Exemple #38
0
 def __init__(self, delta=1.0, resize=True):
     self.store = HDFStore('../dataset/labels.h5')
     self.ava_table = self.store['labels_train']
     self.ava_path = "../dataset/AVA/data/"
     self.ava_data_path = os.path.join(os.getcwd(), self.ava_path)
     self.h5f = h5py.File(
         '../dataset/images_299x299_delta_{}.h5'.format(delta), 'w')
     self.delta = delta
def build_actualisation_group_names_h5():
    h5_name = "../actualisation_groups.h5"
    store = HDFStore(h5_name)
    xls = ExcelFile('actualisation_groups.xls')
    df = xls.parse('defs', na_values=['NA'])
    store['names'] = df
    print df.to_string()
    store.close()
def show_temp(config_files_directory=default_config_files_directory):

    hdf_file_path = get_tmp_file_path(
        config_files_directory=config_files_directory)
    store = HDFStore(hdf_file_path)

    log.info("{}".format(store))
    store.close()
 def get_children_paths(self, node_path):
     s = HDFStore(self.path)
     node = s.get_node(node_path)
     children = []
     for child, df in node._v_children.items():
         children.append(df._v_pathname)
     s.close()
     return children
 def get_children_paths(self, node_path):
     s = HDFStore(self.path)
     node = s.get_node(node_path)
     children = []
     for child, df in node._v_children.items():
         children.append(df._v_pathname)
     s.close()
     return children
Exemple #43
0
class PandasHDFHandler(FileHandler):
    """
    Handler for HDF5 files using Pandas.
    """
    def _open_for_read(self):
        self.handle = HDFStore(self.fname, mode='r')

    def _open_for_write(self):
        self.handle = HDFStore(self.fname)

    def list_items(self):
        keys = [key.strip('/') for key in self.handle.keys()]
        # axes
        items = [(key.split('/')[-1], 'Axis') for key in keys if '__axes__' in key]
        # groups
        items += [(key.split('/')[-1], 'Group') for key in keys if '__groups__' in key]
        # arrays
        items += [(key, 'Array') for key in keys if '/' not in key]
        return items

    def _read_item(self, key, type, *args, **kwargs):
        if type == 'Array':
            hdf_key = '/' + key
        elif type == 'Axis':
            hdf_key = '__axes__/' + key
            kwargs['name'] = key
        elif type == 'Group':
            hdf_key = '__groups__/' + key
            kwargs['name'] = key
        else:
            raise TypeError()
        return key, read_hdf(self.handle, hdf_key, *args, **kwargs)

    def _dump_item(self, key, value, *args, **kwargs):
        if isinstance(value, LArray):
            hdf_key = '/' + key
            value.to_hdf(self.handle, hdf_key, *args, **kwargs)
        elif isinstance(value, Axis):
            hdf_key = '__axes__/' + key
            value.to_hdf(self.handle, hdf_key, *args, **kwargs)
        elif isinstance(value, Group):
            hdf_key = '__groups__/' + key
            hdf_axis_key = '__axes__/' + value.axis.name
            value.to_hdf(self.handle, hdf_key, hdf_axis_key, *args, **kwargs)
        else:
            raise TypeError()

    def _read_metadata(self):
        metadata = Metadata.from_hdf(self.handle)
        if metadata is None:
            metadata = Metadata()
        return metadata

    def _dump_metadata(self, metadata):
        metadata.to_hdf(self.handle)

    def close(self):
        self.handle.close()
def convert_to_3_tables(year=2006, survey_file=None, output_file=None):

    if survey_file is None:
        raise Exception(
            'You need a .h5 file with the survey to extract the variables from'
        )
    if output_file is None:
        output_file = survey_file
        raise Warning(
            'the survey file will be used to store the created tables')

    store = HDFStore(survey_file)
    output = HDFStore(output_file)
    print output

    simulation = SurveySimulation()
    simulation.set_config(year=year)
    table1 = store['survey_' + str(year)]

    for entity in ['ind', 'foy', 'men', 'fam']:
        key = 'survey_' + str(year) + '/' + str(entity)

        vars_matching_entity = vars_matching_entity_from_table(
            table1, simulation, entity)
        print entity, vars_matching_entity_from_table
        print 'table1 enum'

        if entity == 'ind':
            print 'INDIVIDUALS'
            print table1['noindiv']
            table_entity = table1.loc[:, vars_matching_entity]

        # we take care have all ident and selecting qui==0
        else:
            #             print '    entity :', entity
            #             print table1['noindiv'].head()
            position = 'qui' + entity
            #             print table1[position]
            table_entity = table1.ix[table1[position] == 0, [
                'noi', 'idmen', 'idfoy', 'idfam', 'quifoy', 'quimen', 'quifam'
            ] + vars_matching_entity]
            #             print table_entity.noi.head()
            table_entity = table_entity.rename_axis(table_entity['id' +
                                                                 entity],
                                                    axis=1)


#             print '    APRES'
#             print table_entity.noi.head()
        print key
        output.put(key, table_entity)

    del table1
    import gc
    gc.collect()

    store.close()
    output.close()
Exemple #45
0
 def setup(self):
     self.fname = '__test__.h5'
     with warnings.catch_warnings(record=True):
         self.p = Panel(np.random.randn(20, 1000, 25),
                        items=['Item%03d' % i for i in range(20)],
                        major_axis=date_range('1/1/2000', periods=1000),
                        minor_axis=['E%03d' % i for i in range(25)])
         self.store = HDFStore(self.fname)
         self.store.append('p1', self.p)
Exemple #46
0
    def __init__(self, filename):
        """
        Parameters
        ----------
        filename : filename pointing to an existing HDFStore with
            valid data in it.

        """
        self._store = HDFStore(filename)
Exemple #47
0
def test_read_nokey_empty(setup_path):
    with ensure_clean_path(setup_path) as path:
        store = HDFStore(path)
        store.close()
        msg = re.escape(
            "Dataset(s) incompatible with Pandas data types, not table, or no "
            "datasets found in HDF5 file.")
        with pytest.raises(ValueError, match=msg):
            read_hdf(path)
Exemple #48
0
def run_convert(basedir, beam):

    print "Converting for:", beam

    infile = basedir + "/lmon_p"+str(beam)+".root"
    outfile = basedir + "/HCal_p"+str(beam)+".h5"

    #lmon input
    inp = TFile.Open(infile)
    tree = inp.Get("DetectorTree")

    #load the tree
    ucal_edep_EMC = rt.EntryD()
    ucal_edep_HAC1 = rt.EntryD()
    ucal_edep_HAC2 = rt.EntryD()
    ucal_edep_layers = std.vector(float)()
    tree.SetBranchAddress("ucal_edep_EMC", AddressOf(ucal_edep_EMC, "v"))
    tree.SetBranchAddress("ucal_edep_HAC1", AddressOf(ucal_edep_HAC1, "v"))
    tree.SetBranchAddress("ucal_edep_HAC2", AddressOf(ucal_edep_HAC2, "v"))
    tree.SetBranchAddress("ucal_edep_layers", ucal_edep_layers)

    tree.GetEntry(0)
    nlay = ucal_edep_layers.size()

    #output DataFrame
    col = ["ucal_edep_EMC", "ucal_edep_HAC1", "ucal_edep_HAC2"]
    for i in range(nlay):
        col.append( "ucal_edep_layer"+str(i) )

    df_inp = []

    #event loop
    for iev in xrange(tree.GetEntriesFast()):

        tree.GetEntry(iev)

        lin = []
        lin.append(ucal_edep_EMC.v)
        lin.append(ucal_edep_HAC1.v)
        lin.append(ucal_edep_HAC2.v)

        for i in xrange(nlay):

            lin.append(ucal_edep_layers.at(i))

        df_inp.append(lin)

    df = DataFrame(df_inp, columns=col)

    print df

    out = HDFStore(outfile)
    out["hcal"] = df
    out.close()

    inp.Close()
Exemple #49
0
 def test_store(self):    
     final_store = HDFStore(self.store_path)
     print '----'
     print final_store.keys()
     print '-' * 80
     logs = final_store['/logs']
     print type(logs)
     print len(logs)
     print logs.columns
     final_store.close()
 def _put(self, path, obj):
     s = HDFStore(self.path)
     if path in s:
         print("updating %s" % path)
         s.remove(path)
         s.close()
     s = HDFStore(self.path)
     s[path] = obj
     s.flush(fsync=True)
     s.close()
 def load(self,dataFile):
     """load data from HDF"""
     if os.path.exists(dataFile):
         store = HDFStore(dataFile)
         symbols = store.keys()    
         data = dict(zip(symbols,[store[symbol] for symbol in symbols]))
         self.wp = WidePanel(data)
         store.close()
     else:
         raise IOError('Data file does not exist')
 def load(self,dataFile):
     """load data from HDF"""
     if os.path.exists(dataFile):
         store = HDFStore(dataFile)
         symbols = [str(s).strip('/') for s in list(store.keys()) ]   
         data = dict(list(zip(symbols,[store[symbol] for symbol in symbols])))
         self.wp = Panel(data)
         store.close()
     else:
         raise IOError('Data file does not exist')
Exemple #53
0
def anls():
	store = HDFStore('hdf5/divvy.h5')
	pd = store['divvy']
	store.close()
	df = reduce(lambda x,y: x.append(y),[pd[i] for i in pd.items])
	df.index = df.timestamp
	foo = map(lambda x: x[1],df.groupby('id'))
	for i in range(len(foo)): foo[i]['diff'] = foo[i].availableBikes.diff()
	for i in range(len(foo)): foo[i]['diff'].hist(range=[-5,5],bins=20)
	plt.show()
def final_check(year=2006):
    test_filename = os.path.join(DATA_SOURCES_DIR, "test.h5")
    survey_filename = os.path.join(DATA_SOURCES_DIR, "survey.h5")

    store = HDFStore(test_filename)
    survey = HDFStore(survey_filename)

    final2 = store.get('survey_2006')
    print survey
    finalT = survey.get('survey_2006')

    varlist = [
        'adeben',
        'adfdap',
        'amois',
        'ancchom',
        'ancentr',
        'anciatm',
        'ancrech',
        'anref',
        'contra',
        'datant',
        'dimtyp',
        'ident',
        'idfoy'
        'noi',
        'nondic',
        'rabs',
        'RABSP',
        'RAISTP',
        'raistp',
        'rdem',
        'retrai',
        'sitant',
        'sp10',
        'sp11',
        'stc',
        'TXTPPB',
        ]

    for i in range(0, 10):
        varname = 'sp0' + str(i)
        varlist.append(varname)

    varlist = set(varlist)
    columns = final2.columns
    columns = set(columns)

    print varlist.difference(columns)
    print final2.loc[
        final2.idfoy == 603018901,
        ['idfoy', 'quifoy', 'idfam', 'quifam', 'idmen', 'quimen', 'noi']
        ].to_string()

    return
Exemple #55
0
 def refresh_population(self):
     '''
     Refresh after population update
     '''
     population_file = CONF.get('paths', 'population_file')         
     store_pop = HDFStore(population_file,'r')
     self.population = store_pop[self._param_widget.population_name]
     store_pop.close()
     population = self.population.reset_index()
     self._population_widget.set_dataframe(population)
     self._population_widget.update_view()
Exemple #56
0
    def store_results(self, result, index, columns, hdf5_file):
        self.df = DataFrame(result, columns=columns)
        self.df = self.df.set_index(index)
        self.df.sort_index(inplace=True)

        # Store the DataFrame as an HDF5 file...
        hdf = HDFStore(hdf5_file)
        # Append the dataframe, and ensure addr / host can be 17 chars long
        hdf.append('df', self.df, data_columns=list(columns), 
            min_itemsize={'addr': 17, 'host': 17})
        hdf.close()
def convert_fiducial(filename, output_type="csv", decimal_places=8,
                     append_comp=True, num_fids=5, return_name=True,
                     mode='mean', **kwargs):
    '''
    Converts the fiducial comparison HDF5 files into a CSV file.

    Parameters
    ----------
    filename : str
        HDF5 file.
    output_type : str, optional
           Type of file to output.
    decimal_places : int, optional
        Specify the number of decimal places to keep.
    append_comp : bool, optional
        Append on columns with fiducial numbers copy
    num_fids : int, optional
        Number of fiducials compared.
    '''

    store = HDFStore(filename)
    data_columns = dict()
    for key in store.keys():
        data = store[key].sort(axis=1)
        mean_data = timestep_choose(data, mode=mode, **kwargs)
        data_columns[key[1:]] = trunc_float(mean_data, decimal_places)
        comp_fids = store[key].index
    store.close()

    df = DataFrame(data_columns)

    if append_comp:
        fids = []
        for fid, num in zip(np.arange(0, num_fids - 1),
                            np.arange(num_fids - 1, 0, -1)):
            for _ in range(num):
                fids.append(fid)

        df["Fiducial 1"] = Series(np.asarray(fids).T, index=df.index)
        df["Fiducial 2"] = Series(comp_fids.T, index=df.index)

    for comp in all_comparisons:
        if comp in filename:
            break
    else:
        raise StandardError("Could not find a face comparison match for " +
                            filename)

    output_name = "fiducials" + comp[:-1] + "." + output_type

    df.to_csv(output_name)

    if return_name:
        return output_name
Exemple #58
0
    def __init__(self, *args, **kwargs):
        probe_interval = kwargs.pop("probe_interval", 1.0)
        self._lock = "%s.lock" % args[0]
        while True:
            try:
                self._flock = os.open(self._lock, os.O_CREAT |
                                                  os.O_EXCL |
                                                  os.O_WRONLY)
                break
            except FileExistsError:
                time.sleep(probe_interval)

        HDFStore.__init__(self, *args, **kwargs)