def load_data(self, df): """ Wraps the LOAD DATA DDL statement. Loads data into an OmniSciDB table from pandas.DataFrame or pyarrow.Table Parameters ---------- df: pandas.DataFrame or pyarrow.Table Returns ------- query : OmniSciDBQuery """ stmt = ddl.LoadData(self._qualified_name, df) return self._execute(stmt)
def read_csv( self, path: Union[str, Path], header: Optional[bool] = True, quotechar: Optional[str] = '"', delimiter: Optional[str] = ',', threads: Optional[int] = None, ) -> OmniSciDBQuery: """ Load data into an Omniscidb table from CSV file. Wraps the COPY FROM DML statement. Parameters ---------- path: str or pathlib.Path Path to the input data file header: bool, optional, default True Indicating whether the input file has a header line quotechar: str, optional, default '"' The character used to denote the start and end of a quoted item. delimiter: str, optional, default ',' threads: int, optional, default number of CPU cores on the system Number of threads for performing the data import. Returns ------- query : OmniSciDBQuery Examples -------- # assumptions: # - dataset can be found on ./datasets/functional_alltypes.csv # https://github.com/ibis-project/testing-data/blob/master/functional_alltypes.csv # - omnisci server is launched on localhost and using port: 6274 import ibis conn = ibis.omniscidb.connect( host="localhost", port="6274", user="******", password="******", ) t_name = "functional_alltypes" db_name = "ibis_testing" filename = "./datasets/functional_alltypes.csv" schema = ibis.schema( [ ('index', 'int64'), ('Unnamed__0', 'int64'), ('id', 'int32'), ('bool_col', 'bool'), ('tinyint_col', 'int16'), ('smallint_col', 'int16'), ('int_col', 'int32'), ('bigint_col', 'int64'), ('float_col', 'float32'), ('double_col', 'double'), ('date_string_col', 'string'), ('string_col', 'string'), ('timestamp_col', 'timestamp'), ('year_', 'int32'), ('month_', 'int32'), ] ) conn.create_table(t_name, schema=schema) db = conn.database(db_name) table = db.table(t_name) table.read_csv(filename, header=False, quotechar='"', delimiter=",") """ kwargs = { 'header': header, # 'quote' field couldn't be empty string for omnisci backend 'quote': quotechar if quotechar else '"', 'quoted': bool(quotechar), 'delimiter': delimiter, 'threads': threads, } stmt = ddl.LoadData(self._qualified_name, path, **kwargs) return self._execute(stmt)