def save_datasource(self, name, source_type, source, file_path=None): datasource_record = Datasource(company_id=self.company_id, name=name) if source_type == 'file' and (file_path is None): raise Exception( '`file_path` argument required when source_type == "file"') ds_meta_dir = os.path.join(self.dir, name) os.mkdir(ds_meta_dir) session.add(datasource_record) session.commit() datasource_record = session.query(Datasource).filter_by( company_id=self.company_id, name=name).first() try: if source_type == 'file': source = os.path.join(ds_meta_dir, source) shutil.move(file_path, source) ds = FileDS(source) creation_info = { 'class': 'FileDS', 'args': [source], 'kwargs': {} } elif source_type in self.config['integrations']: integration = self.config['integrations'][source_type] ds_class_map = { 'clickhouse': ClickhouseDS, 'mariadb': MariaDS, 'mysql': MySqlDS, 'postgres': PostgresDS, 'mssql': MSSQLDS, 'mongodb': MongoDS, 'snowflake': SnowflakeDS } try: dsClass = ds_class_map[integration['type']] except KeyError: raise KeyError( f"Unknown DS type: {source_type}, type is {integration['type']}" ) if integration['type'] in ['clickhouse']: creation_info = { 'class': dsClass.__name__, 'args': [], 'kwargs': { 'query': source['query'], 'user': integration['user'], 'password': integration['password'], 'host': integration['host'], 'port': integration['port'] } } ds = dsClass(**creation_info['kwargs']) elif integration['type'] in [ 'mssql', 'postgres', 'mariadb', 'mysql' ]: creation_info = { 'class': dsClass.__name__, 'args': [], 'kwargs': { 'query': source['query'], 'user': integration['user'], 'password': integration['password'], 'host': integration['host'], 'port': integration['port'] } } if 'database' in integration: creation_info['kwargs']['database'] = integration[ 'database'] if 'database' in source: creation_info['kwargs']['database'] = source[ 'database'] ds = dsClass(**creation_info['kwargs']) elif integration['type'] == 'snowflake': creation_info = { 'class': dsClass.__name__, 'args': [], 'kwargs': { 'query': source['query'], 'schema': source['schema'], 'warehouse': source['warehouse'], 'database': source['database'], 'host': integration['host'], 'password': integration['password'], 'user': integration['user'], 'account': integration['account'] } } ds = dsClass(**creation_info['kwargs']) elif integration['type'] == 'mongodb': if isinstance(source['find'], str): source['find'] = json.loads(source['find']) creation_info = { 'class': dsClass.__name__, 'args': [], 'kwargs': { 'database': source['database'], 'collection': source['collection'], 'query': source['find'], 'user': integration['user'], 'password': integration['password'], 'host': integration['host'], 'port': integration['port'] } } ds = dsClass(**creation_info['kwargs']) else: # This probably only happens for urls ds = FileDS(source) creation_info = { 'class': 'FileDS', 'args': [source], 'kwargs': {} } df = ds.df if '' in df.columns or len(df.columns) != len(set(df.columns)): shutil.rmtree(ds_meta_dir) raise Exception( 'Each column in datasource must have unique non-empty name' ) datasource_record.creation_info = json.dumps(creation_info) datasource_record.data = json.dumps({ 'source_type': source_type, 'source': source, 'row_count': len(df), 'columns': [dict(name=x) for x in list(df.keys())] }) self.fs_store.put( name, f'datasource_{self.company_id}_{datasource_record.id}', self.dir) except Exception: if os.path.isdir(ds_meta_dir): shutil.rmtree(ds_meta_dir) raise session.commit() return self.get_datasource_obj(name, raw=True), name
def save_datasource(self, name, source_type, source, file_path=None): if source_type == 'file' and (file_path is None): raise Exception( '`file_path` argument required when source_type == "file"') for i in range(1, 1000): if name in [x['name'] for x in self.get_datasources()]: previous_index = i - 1 name = name.replace(f'__{previous_index}__', '') name = f'{name}__{i}__' else: break ds_meta_dir = os.path.join(self.dir, name) os.mkdir(ds_meta_dir) try: if source_type == 'file': source = os.path.join(ds_meta_dir, source) shutil.move(file_path, source) ds = FileDS(source) picklable = {'class': 'FileDS', 'args': [source], 'kwargs': {}} elif source_type in self.config['integrations']: integration = self.config['integrations'][source_type] ds_class_map = { 'clickhouse': ClickhouseDS, 'mariadb': MariaDS, 'mysql': MySqlDS, 'postgres': PostgresDS, 'mssql': MSSQLDS, 'mongodb': MongoDS, 'snowflake': SnowflakeDS } try: dsClass = ds_class_map[integration['type']] except KeyError: raise KeyError( f"Unknown DS type: {source_type}, type is {integration['type']}" ) if integration['type'] in ['clickhouse']: picklable = { 'class': dsClass.__name__, 'args': [], 'kwargs': { 'query': source['query'], 'user': integration['user'], 'password': integration['password'], 'host': integration['host'], 'port': integration['port'] } } ds = dsClass(**picklable['kwargs']) elif integration['type'] in [ 'mssql', 'postgres', 'mariadb', 'mysql' ]: picklable = { 'class': dsClass.__name__, 'args': [], 'kwargs': { 'query': source['query'], 'user': integration['user'], 'password': integration['password'], 'host': integration['host'], 'port': integration['port'] } } if 'database' in integration: picklable['kwargs']['database'] = integration[ 'database'] if 'database' in source: picklable['kwargs']['database'] = source['database'] ds = dsClass(**picklable['kwargs']) elif integration['type'] == 'snowflake': picklable = { 'class': dsClass.__name__, 'args': [], 'kwargs': { 'query': source['query'], 'schema': source['schema'], 'warehouse': source['warehouse'], 'database': source['database'], 'host': integration['host'], 'password': integration['password'], 'user': integration['user'], 'account': integration['account'] } } ds = dsClass(**picklable['kwargs']) elif integration['type'] == 'mongodb': picklable = { 'class': dsClass.__name__, 'args': [], 'kwargs': { 'database': source['database'], 'collection': source['collection'], 'query': source['find'], 'user': integration['user'], 'password': integration['password'], 'host': integration['host'], 'port': integration['port'] } } ds = dsClass(**picklable['kwargs']) else: # This probably only happens for urls ds = FileDS(source) picklable = {'class': 'FileDS', 'args': [source], 'kwargs': {}} df = ds.df if '' in df.columns or len(df.columns) != len(set(df.columns)): shutil.rmtree(ds_meta_dir) raise Exception( 'Each column in datasource must have unique name') # Not sure if needed #summary_analysis = self.get_analysis(ds.filter(limit=200))['data_analysis_v2'] with open(os.path.join(ds_meta_dir, 'ds.pickle'), 'wb') as fp: pickle.dump(picklable, fp) with open(os.path.join(ds_meta_dir, 'metadata.json'), 'w') as fp: meta = { 'name': name, 'source_type': source_type, 'source': source, 'created_at': str(datetime.datetime.now()).split('.')[0], 'updated_at': str(datetime.datetime.now()).split('.')[0], 'row_count': len(df), 'columns': [dict(name=x) for x in list(df.keys())] } json.dump(meta, fp, indent=4, sort_keys=True) with open(os.path.join(ds_meta_dir, 'versions.json'), 'wt') as fp: json.dump(self.config.versions, fp, indent=4, sort_keys=True) except Exception: if os.path.isdir(ds_meta_dir): shutil.rmtree(ds_meta_dir) raise return self.get_datasource_obj(name, raw=True), name
def save_datasource(self, name, source_type, source, file_path=None): if source_type == 'file' and (file_path is None): raise Exception( '`file_path` argument required when source_type == "file"') for i in range(1, 1000): if name in [x['name'] for x in self.get_datasources()]: previous_index = i - 1 name = name.replace(f'__{previous_index}__', '') name = f'{name}__{i}__' else: break ds_meta_dir = os.path.join(self.dir, name) os.mkdir(ds_meta_dir) ds_dir = os.path.join(ds_meta_dir, 'datasource') os.mkdir(ds_dir) if source_type == 'file': try: source = os.path.join(ds_dir, source) shutil.move(file_path, source) ds = FileDS(source) except Exception: shutil.rmtree(ds_meta_dir) raise picklable = {'class': 'FileDS', 'args': [source], 'kwargs': {}} elif source_type in self.config['integrations']: integration = self.config['integrations'][source_type] dsClass = None picklable = { 'args': [], 'kwargs': { 'query': source, 'user': integration['user'], 'password': integration['password'], 'host': integration['host'], 'port': integration['port'] } } if integration['type'] == 'clickhouse': dsClass = ClickhouseDS picklable['class'] = 'ClickhouseDS' elif integration['type'] == 'mariadb': dsClass = MariaDS picklable['class'] = 'MariaDS' elif integration['type'] == 'mysql': dsClass = MySqlDS picklable['class'] = 'MySqlDS' elif integration['type'] == 'postgres': dsClass = PostgresDS picklable['class'] = 'PostgresDS' elif integration['type'] == 'mssql': dsClass = MSSQLDS picklable['class'] = 'MSSQLDS' else: raise ValueError(f'Unknown DS source_type: {source_type}') try: ds = dsClass(query=source, user=integration['user'], password=integration['password'], host=integration['host'], port=integration['port']) except Exception: shutil.rmtree(ds_meta_dir) raise else: # This probably only happens for urls print('Create URL data source !') try: ds = FileDS(source) except Exception: shutil.rmtree(ds_meta_dir) raise picklable = {'class': 'FileDS', 'args': [source], 'kwargs': {}} df = ds.df df_with_types = cast_df_columns_types( df, self.get_analysis(df)['data_analysis_v2']) create_sqlite_db(os.path.join(ds_dir, 'sqlite.db'), df_with_types) with open(os.path.join(ds_dir, 'ds.pickle'), 'wb') as fp: pickle.dump(picklable, fp) with open(os.path.join(ds_dir, 'metadata.json'), 'w') as fp: meta = { 'name': name, 'source_type': source_type, 'source': source, 'created_at': str(datetime.datetime.now()).split('.')[0], 'updated_at': str(datetime.datetime.now()).split('.')[0], 'row_count': len(df), 'columns': [dict(name=x) for x in list(df.keys())] } json.dump(meta, fp) return self.get_datasource_obj(name, raw=True), name
def save_datasource(self, name, source_type, source, file_path=None): print(name, source_type, source) if source_type == 'file' and (file_path is None): raise Exception( '`file_path` argument required when source_type == "file"') for i in range(1, 1000): if name in [x['name'] for x in self.get_datasources()]: previous_index = i - 1 name = name.replace(f'__{previous_index}__', '') name = f'{name}__{i}__' else: break ds_meta_dir = os.path.join(self.dir, name) os.mkdir(ds_meta_dir) ds_dir = os.path.join(ds_meta_dir, 'datasource') os.mkdir(ds_dir) print(source_type) if source_type == 'file': source = os.path.join(ds_dir, source) os.replace(file_path, source) ds = FileDS(source) picklable = {'class': 'FileDS', 'args': [source], 'kwargs': {}} elif source_type == 'clickhouse': user = self.config['integrations']['default_clickhouse']['user'] password = self.config['integrations']['default_clickhouse'][ 'password'] # TODO add host port params ds = ClickhouseDS(source, user=user, password=password) picklable = { 'class': 'ClickhouseDS', 'args': [source], 'kwargs': { 'user': user, 'password': password } } elif source_type == 'mariadb': user = self.config['integrations']['default_mariadb']['user'] password = self.config['integrations']['default_mariadb'][ 'password'] host = self.config['integrations']['default_mariadb']['host'] port = self.config['integrations']['default_mariadb']['port'] ds = MariaDS(source, user=user, password=password, host=host, port=port) picklable = { 'class': 'MariaDS', 'args': [source], 'kwargs': { 'user': user, 'password': password, 'host': host, 'port': port } } else: # This probably only happens for urls print('Create URL data source !') ds = FileDS(source) picklable = {'class': 'FileDS', 'args': [source], 'kwargs': {}} df = ds.df df_with_types = cast_df_columns_types( df, self.get_analysis(df)['data_analysis_v2']) create_sqlite_db(os.path.join(ds_dir, 'sqlite.db'), df_with_types) print(picklable) with open(os.path.join(ds_dir, 'ds.pickle'), 'wb') as fp: pickle.dump(picklable, fp) with open(os.path.join(ds_dir, 'metadata.json'), 'w') as fp: json.dump( { 'name': name, 'source_type': source_type, 'source': source, 'created_at': str(datetime.datetime.now()).split('.')[0], 'updated_at': str(datetime.datetime.now()).split('.')[0], 'row_count': len(df), 'columns': [dict(name=x) for x in list(df.keys())] }, fp) return self.get_datasource_obj(name, avoid_crash=True)