Beispiel #1
0
    def munge_tables(self, hashval, start_date, end_date):

        store = self.store
        # store.select('cache_spec', where=[('hashval', hashval)])

        store['/cache_spec'][['start_date','end_date']].sort(['start_date'])
        df_min = store_select(store, 'cache_spec', where=[('start_date', '<=', start_date)]).reset_index()
        df_max = store_select(store, 'cache_spec', where=[('end_date', '<=', end_date)]).reset_index()

        df_total = df_min.append(df_max)
        df_total.drop_duplicates('_end_row',inplace=True)
        df_total.reset_index(inplace=True)

        ss_vals = df_total[['_start_row','_end_row', ]].values

        df_list = []
        for s in ss_vals:
            start_row = s[0]
            end_row = s[1]
            temp = store_select(store, self.localpath,
                                           start=start_row, stop=end_row)
            temp.head()

            df_list.append(temp)

        df_concat = pd.concat(df_list)
        df_concat.sort(['date'],inplace=True)

        df_return = df_concat[(df_concat['date'] >= start_date) & (df_concat['date'] <= end_date)]

        return df_return
Beispiel #2
0
    def select(self, query_filter, where=None):
        cache_info = self.cache_info(query_filter)
        if cache_info is None:
            self.cache_data(query_filter)
            cache_info = self.cache_info(query_filter)

        start_row, end_row = cache_info
        result = store_select(self.store, self.localpath, where=where,
                              start=start_row, stop=end_row)
        return result
Beispiel #3
0
 def cache_info(self, query_params):
     param_dict = self.parameter_dict(query_params)
     query = param_dict.items()
     try:
         result = store_select(self.store, 'cache_spec', where=query)
     except KeyError:
         return None
     if result is None:
         return None
     if result.shape[0] == 0:
         return None
     else:
         return result['_start_row'], result['_end_row']
Beispiel #4
0
 def cache_info(self, query_filter):
     hashval = self.gethashval(query_filter)
     try:
         #rewriting where statement for 0.13 pandas style
         result = store_select(self.store, 'cache_spec',
                               where=[('hashval', hashval)])
     except KeyError:
         return None
     if result is None:
         return None
     if result.shape[0] == 0:
         return None
     else:
         return result['_start_row'], result['_end_row']
Beispiel #5
0
 def cache_info(self, query_params):
     data = self.parameter_dict(query_params)
     hashval = gethashval(data)
     try:
         result = store_select(self.store, 'cache_spec', 
                               where=[('hashval', hashval)])
     except KeyError:
         return None
     if result is None:
         return None
     if result.shape[0] == 0:
         return None
     else:
         return result['_start_row'], result['_end_row']
Beispiel #6
0
 def select(self, **kwargs):
     for field in self.cache_discrete_fields:
         if not isinstance(kwargs.get(field), (list, tuple, np.ndarray)):
             kwargs[field] = [kwargs.get(field)]
     query_params = kwargs
     where = query_params.pop('where', None)
     cache_info = self.cache_info(query_params)
     if cache_info is None:
         self.cache_data(query_params)
         cache_info = self.cache_info(query_params)
     start_row, end_row = cache_info
     if not where:
         where = None
     result = store_select(self.store, self.localpath, 
                           where=where, start=start_row, stop=end_row)
     return result
Beispiel #7
0
 def _single_select(self, **kwargs):
     query_params = kwargs
     where = query_params.pop('where', None)
     cache_info = self.cache_info(query_params)
     if cache_info is None:
         self.cache_data(query_params)
         cache_info = self.cache_info(query_params)
     start_row, end_row = cache_info
     #convert these series to ints
     start_row = start_row[0]
     end_row = end_row[0]
     if not where:
         where = None
     result = store_select(self.store, self.localpath, 
                           where=where, start=start_row, stop=end_row)
     return result
Beispiel #8
0
 def query_min_itemsize(self):
     try:
         min_itemsize = store_select(self.store, 'min_itemsize')
     except KeyError:
         return None
     return min_itemsize.to_dict()