Example #1
0
    def load_data(self):
        store = self.store
        with self.db() as db:
            logger.debug("connected db!")
            cur = db.cursor()
            logger.debug("query executing!")
            cur.execute(self.query)
            logger.debug("query returned!")
            logger.debug("cursor descr %s", cur.description)

            min_itemsize = self.min_itemsize if self.min_itemsize else {}
            db_string_types = self.db_string_types if self.db_string_types else []
            db_datetime_types = self.db_datetime_types if self.db_datetime_types else []

            columns, min_itemsize, dt_fields = query_info(
                cur,
                min_itemsize=min_itemsize,
                db_string_types=db_string_types,
                db_datetime_types=db_datetime_types
                )
            self.min_itemsize = min_itemsize
            logger.debug("queryinfo %s", str((columns, min_itemsize, dt_fields)))
            overrides = self.col_types
            for k in dt_fields:
                overrides[k] = 'datetime64[ns]'
            write_pandas_hdf_from_cursor(self.store, self.localpath, cur, 
                                         columns, self.min_itemsize, 
                                         dtype_overrides=overrides,
                                         min_item_padding=self.min_item_padding,
                                         chunksize=50000, 
                                         replace=True)
            cur.close()
            self.store.flush()
Example #2
0
    def cache_data(self, query_params, start_date, end_date):

        for f in self.fields:
            if 'date' in f:
                col_date = f
                break;

        all_query = and_(query_params,column(col_date) >=start_date, column(col_date) <= end_date)
        q = self.cache_query(all_query)
        log.debug(str(q))

        cur = self.session.execute(q)

        min_itemsize = self.min_itemsize if self.min_itemsize else {}
        db_string_types = self.db_string_types if self.db_string_types else []
        db_datetime_types = self.db_datetime_types if self.db_datetime_types else []

        #hack
        cur.description = cur._cursor_description()
        cur.arraysize = 500

        columns, min_itemsize, dt_fields = query_info(
            cur,
            min_itemsize=min_itemsize,
            db_string_types=db_string_types,
            db_datetime_types=db_datetime_types
            )
        self.min_itemsize = min_itemsize
        self.finalize_min_itemsize()
        overrides = self.col_types
        for k in dt_fields:
            overrides[k] = 'datetime64[ns]'
        try:
            starting_row = self.table.nrows
        except AttributeError:
            starting_row = 0

        write_pandas_hdf_from_cursor(self.store, self.localpath, cur,
                                     columns, self.min_itemsize,
                                     dtype_overrides=overrides,
                                     min_item_padding=self.min_item_padding,
                                     chunksize=50000,
                                     replace=False)
        try:
            ending_row = self.table.nrows
        except AttributeError:
            ending_row = 0
        self.store_cache_spec(query_params, starting_row, ending_row, start_date, end_date)
Example #3
0
    def load_data(self):
        store = self.store
        with self.db() as db:
            logger.debug("connected db!")
            cur = db.cursor()
            logger.debug("query executing!")
            cur.execute(self.query)
            logger.debug("query returned!")
            logger.debug("cursor descr %s", cur.description)

            min_itemsize = self.min_itemsize if self.min_itemsize else {}
            db_string_types = self.db_string_types if self.db_string_types else []
            db_datetime_types = self.db_datetime_types if self.db_datetime_types else []

            columns, min_itemsize, dt_fields = query_info(
                cur,
                min_itemsize=min_itemsize,
                db_string_types=db_string_types,
                db_datetime_types=db_datetime_types)
            self.min_itemsize = min_itemsize
            logger.debug("queryinfo %s", str(
                (columns, min_itemsize, dt_fields)))
            overrides = self.col_types
            for k in dt_fields:
                overrides[k] = 'datetime64[ns]'
            write_pandas_hdf_from_cursor(
                self.store,
                self.localpath,
                cur,
                columns,
                self.min_itemsize,
                dtype_overrides=overrides,
                min_item_padding=self.min_item_padding,
                chunksize=50000,
                replace=True)
            cur.close()
            self.store.flush()