def ts_locs_array( config: ColumnElement, text: ColumnElement, tsquery: ColumnElement, ) -> ColumnElement: options = f"HighlightAll = TRUE, StartSel = {TS_START}, StopSel = {TS_STOP}" delimited = func.ts_headline(config, text, tsquery, options) parts = func.unnest(func.string_to_array(delimited, TS_START)).alias() part = column(parts.name) part_len = func.length(part) - len(TS_STOP) match_pos = func.sum(part_len).over(rows=(None, -1)) + len(TS_STOP) match_len = func.strpos(part, TS_STOP) - 1 return func.array( select([postgresql.array([match_pos, match_len]) ]).select_from(parts).offset(1).as_scalar(), )
def handle_bucket_event( db: Session, ec: elasticsearch.Elasticsearch, event: indexing_schemas.BucketEventNotification, ): # Find workspace for event bulk_operations = "" for record in event.Records: bucket = record.s3.bucket.name object_key = urllib.parse.unquote(record.s3.object.key) parent_index: indexing_models.RootIndex = ( db.query(indexing_models.RootIndex) .join(models.WorkspaceRoot) .filter( and_( func.strpos(models.WorkspaceRoot.base_path, object_key) == 0, models.WorkspaceRoot.bucket == record.s3.bucket.name, ) ) .first() ) if parent_index is None: raise ValueError(f"no index for object {object_key}") # Find the user who caused this event actor_db: Union[models.User, None] = ( db.query(models.User) .join(models.S3Token) .filter(models.S3Token.access_key_id == record.userIdentity.principalId) .first() ) # Find the workspace workspace: Optional[models.Workspace] = None workspace_prefix = "" workspace_inner_path = "" if parent_index.root.root_type in [ schemas.RootType.PRIVATE, schemas.RootType.PRIVATE, ]: # Extrapolate path parts from root type, assume it's {scope}/{user}/{workspace} key_parts = object_key.split("/") scope = key_parts[0] user_name = key_parts[1] workspace_name = key_parts[2] workspace_inner_path = "/".join(key_parts[3:]) workspace = ( db.query(models.Workspace) .join(models.User) .filter( and_( models.Workspace.name == workspace_name, models.User.username == user_name, ) ) .first() ) workspace_prefix = f"{scope}/{user_name}/{workspace_name}" elif parent_index.root.root_type == schemas.RootType.UNMANAGED: # Search again for matching base workspace_inner_path = object_key.lstrip( parent_index.root.base_path ).lstrip("/") workspace = ( db.query(models.Workspace) .filter( func.strpos(models.Workspace.base_path, workspace_inner_path) == 0 ) .first() ) if workspace is None: raise ValueError(f"No workspace found for object {object_key}") workspace_prefix = workspace.base_path workspace_inner_path = workspace_inner_path.lstrip( workspace.base_path ).lstrip("/") if workspace is None: raise ValueError(f"No workspace found for object {object_key}") resource_owner: models.User = workspace.owner root: models.WorkspaceRoot = workspace.root node: models.StorageNode = root.storage_node primary_key_short_sha256 = make_record_primary_key( api_url=node.api_url, bucket=root.bucket, workspace_prefix=workspace_prefix, path=workspace_inner_path, ) if record.eventName in [ "s3:ObjectCreated:Put", "s3:ObjectCreated:Post", "s3:ObjectCreated:Copy", ]: # Creaete a new record in elasticsearch # this could be an overwrite operation, so query for the old record first. doc = indexing_schemas.IndexDocument( time=record.eventTime, size=record.s3.object.size, eTag=record.s3.object.eTag, workspace_id=workspace.id, workspace_name=workspace.name, owner_id=resource_owner.id, owner_name=resource_owner.username, bucket=record.s3.bucket.name, server=node.api_url, root_path=root.base_path, root_id=root.id, path=workspace_inner_path, extension=posixpath.splitext(workspace_inner_path)[-1], user_shares=[share.sharee.id for share in workspace.shares], # TODO: group shares ) bulk_operations += ( json.dumps( { "update": { "_index": parent_index.index_type, "_id": primary_key_short_sha256, } }, ) + "\n" ) bulk_operations += ( indexing_schemas.ElasticUpsertIndexDocument(doc=doc).json() + "\n" ) elif record.eventName in ["s3:ObjectRemoved:Delete"]: # Remove an existing record bulk_operations += ( json.dumps( { "delete": { "_index": parent_index.index_type, "_id": primary_key_short_sha256, } } ) + "\n" ) else: raise ValueError( f"Bucket notification type unsupported: {record.eventName}" ) ec.bulk(bulk_operations)