def put(self, name): '''add new datasource''' data = {} def on_field(field): print(f'\n\n{field}\n\n') name = field.field_name.decode() value = field.value.decode() data[name] = value def on_file(file): data['file'] = file.file_name.decode() temp_dir_path = tempfile.mkdtemp(prefix='datasource_file_') if request.headers['Content-Type'].startswith('multipart/form-data'): parser = multipart.create_form_parser( headers=request.headers, on_field=on_field, on_file=on_file, config={ 'UPLOAD_DIR': temp_dir_path.encode(), # bytes required 'UPLOAD_KEEP_FILENAME': True, 'UPLOAD_KEEP_EXTENSIONS': True, 'MAX_MEMORY_FILE_SIZE': 0 }) while True: chunk = request.stream.read(8192) if not chunk: break parser.write(chunk) parser.finalize() parser.close() else: data = request.json if 'query' in data: query = request.json['query'] source_type = request.json['integration_id'] ca.default_store.save_datasource(name, source_type, query) os.rmdir(temp_dir_path) return ca.default_store.get_datasource(name) ds_name = data['name'] if 'name' in data else name source = data['source'] if 'source' in data else name source_type = data['source_type'] if source_type == 'file': file_path = os.path.join(temp_dir_path, data['file']) else: file_path = None ca.default_store.save_datasource(ds_name, source_type, source, file_path) os.rmdir(temp_dir_path) return ca.default_store.get_datasource(ds_name)
def put(self, name): '''add new datasource''' data = {} def on_field(field): name = field.field_name.decode() value = field.value.decode() data[name] = value file_object = None def on_file(file): nonlocal file_object data['file'] = file.file_name.decode() file_object = file.file_object temp_dir_path = tempfile.mkdtemp(prefix='datasource_file_') if request.headers['Content-Type'].startswith('multipart/form-data'): parser = multipart.create_form_parser( headers=request.headers, on_field=on_field, on_file=on_file, config={ 'UPLOAD_DIR': temp_dir_path.encode(), # bytes required 'UPLOAD_KEEP_FILENAME': True, 'UPLOAD_KEEP_EXTENSIONS': True, 'MAX_MEMORY_FILE_SIZE': 0 } ) while True: chunk = request.stream.read(8192) if not chunk: break parser.write(chunk) parser.finalize() parser.close() if file_object is not None and not file_object.closed: file_object.close() else: data = request.json if 'query' in data: source_type = request.json['integration_id'] if source_type not in ca.default_store.config['integrations']: # integration doens't exist abort(400, f"{source_type} integration doesn't exist") if ca.default_store.config['integrations'][source_type]['type'] == 'mongodb': data['find'] = data['query'] ca.default_store.save_datasource(name, source_type, data) os.rmdir(temp_dir_path) return ca.default_store.get_datasource(name) ds_name = data['name'] if 'name' in data else name source = data['source'] if 'source' in data else name source_type = data['source_type'] if source_type == 'file': file_path = os.path.join(temp_dir_path, data['file']) else: file_path = None ca.default_store.save_datasource(ds_name, source_type, source, file_path) os.rmdir(temp_dir_path) return ca.default_store.get_datasource(ds_name)
def put(self, name): '''add new datasource''' data = {} def on_field(field): name = field.field_name.decode() value = field.value.decode() data[name] = value def on_file(file): data['file'] = file.file_name.decode() temp_dir_path = tempfile.mkdtemp(prefix='gateway_') if request.headers['Content-Type'].startswith('multipart/form-data'): parser = multipart.create_form_parser( headers=request.headers, on_field=on_field, on_file=on_file, config={ 'UPLOAD_DIR': temp_dir_path.encode(), # bytes required 'UPLOAD_KEEP_FILENAME': True, 'UPLOAD_KEEP_EXTENSIONS': True, 'MAX_MEMORY_FILE_SIZE': 0 } ) while True: chunk = request.stream.read(8192) if not chunk: break parser.write(chunk) parser.finalize() parser.close() else: data = request.json if 'name' in data: datasource_name = data['name'] else: datasource_name = name datasource_type = data['source_type'] if 'source' in data: datasource_source = data['source'] else: datasource_source = name if datasource_type == 'file' and 'file' not in data: abort(400, "Argument 'file' is missing") names = [x['name'] for x in get_datasources()] for i in range(1, 100): if datasource_name in names: previous_index = i - 1 datasource_name = datasource_name.replace(f'({previous_index})', '') datasource_name += f'({i})' else: break os.mkdir(os.path.join(mindsdb.CONFIG.MINDSDB_DATASOURCES_PATH, datasource_name)) os.mkdir(os.path.join(mindsdb.CONFIG.MINDSDB_DATASOURCES_PATH, datasource_name, 'resources')) ds_dir = os.path.join(mindsdb.CONFIG.MINDSDB_DATASOURCES_PATH, datasource_name, 'datasource') os.mkdir(ds_dir) if datasource_type == 'file': datasource_source = os.path.join(mindsdb.CONFIG.MINDSDB_DATASOURCES_PATH, datasource_name, 'datasource', datasource_source) os.replace( os.path.join(temp_dir_path, data['file']), datasource_source ) ds = FileDS(datasource_source) else: ds = FileDS(datasource_source) os.rmdir(temp_dir_path) df = ds.df columns = [dict(name=x) for x in list(df.keys())] row_count = len(df) df_with_types = cast_df_columns_types(df) create_sqlite_db(os.path.join(ds_dir, 'sqlite.db'), df_with_types) new_data_source = { 'name': datasource_name, 'source_type': datasource_type, 'source': datasource_source, 'missed_files': False, 'created_at': datetime.datetime.now(), 'updated_at': datetime.datetime.now(), 'row_count': row_count, 'columns': columns } save_datasource_metadata(new_data_source) return get_datasource(datasource_name)
def put(self, name): '''add new datasource''' data = {} def on_field(field): name = field.field_name.decode() value = field.value.decode() data[name] = value file_object = None def on_file(file): nonlocal file_object data['file'] = file.file_name.decode() file_object = file.file_object temp_dir_path = tempfile.mkdtemp(prefix='datasource_file_') if request.headers['Content-Type'].startswith('multipart/form-data'): parser = multipart.create_form_parser( headers=request.headers, on_field=on_field, on_file=on_file, config={ 'UPLOAD_DIR': temp_dir_path.encode(), # bytes required 'UPLOAD_KEEP_FILENAME': True, 'UPLOAD_KEEP_EXTENSIONS': True, 'MAX_MEMORY_FILE_SIZE': 0 }) while True: chunk = request.stream.read(8192) if not chunk: break parser.write(chunk) parser.finalize() parser.close() if file_object is not None and not file_object.closed: file_object.close() else: data = request.json if 'query' in data: integration_id = request.json['integration_id'] integration = get_db_integration(integration_id, request.company_id) if integration is None: abort(400, f"{integration_id} integration doesn't exist") if integration['type'] == 'mongodb': data['find'] = data['query'] request.default_store.save_datasource(name, integration_id, data) os.rmdir(temp_dir_path) return request.default_store.get_datasource(name) ds_name = data['name'] if 'name' in data else name source = data['source'] if 'source' in data else name source_type = data['source_type'] if source_type == 'file': file_path = os.path.join(temp_dir_path, data['file']) lp = file_path.lower() if lp.endswith(('.zip', '.tar.gz')): if lp.endswith('.zip'): with zipfile.ZipFile(file_path) as f: f.extractall(temp_dir_path) elif lp.endswith('.tar.gz'): with tarfile.open(file_path) as f: f.extractall(temp_dir_path) os.remove(file_path) files = os.listdir(temp_dir_path) if len(files) != 1: os.rmdir(temp_dir_path) return http_error( 400, 'Wrong content.', 'Archive must contain only one data file.') file_path = os.path.join(temp_dir_path, files[0]) source = files[0] if not os.path.isfile(file_path): os.rmdir(temp_dir_path) return http_error( 400, 'Wrong content.', 'Archive must contain data file in root.') else: file_path = None request.default_store.save_datasource(ds_name, source_type, source, file_path) os.rmdir(temp_dir_path) return request.default_store.get_datasource(ds_name)