def create( cls, user: str, company: str, name: str, description: str, tags: Sequence[str] = None, system_tags: Sequence[str] = None, default_output_destination: str = None, ) -> str: """ Create a new project. Returns project ID """ now = datetime.utcnow() project = Project( id=database.utils.id(), user=user, company=company, name=name, description=description, tags=tags, system_tags=system_tags, default_output_destination=default_output_destination, created=now, last_update=now, ) project.save() return project.id
def merge_project(cls, company, source_id: str, destination_id: str) -> Tuple[int, int, Set[str]]: """ Move all the tasks and sub projects from the source project to the destination Remove the source project Return the amounts of moved entities and subprojects + set of all the affected project ids """ with TimingContext("mongo", "move_project"): if source_id == destination_id: raise errors.bad_request.ProjectSourceAndDestinationAreTheSame( source=source_id) source = Project.get(company, source_id) destination = Project.get(company, destination_id) if source_id in destination.path: raise errors.bad_request.ProjectCannotBeMergedIntoItsChild( source=source_id, destination=destination_id) children = _get_sub_projects([source.id], _only=("id", "name", "parent", "path"))[source.id] cls.validate_projects_depth( projects=children, old_parent_depth=len(source.path) + 1, new_parent_depth=len(destination.path) + 1, ) moved_entities = 0 for entity_type in (Task, Model): moved_entities += entity_type.objects( company=company, project=source_id, system_tags__nin=[EntityVisibility.archived.value], ).update(upsert=False, project=destination_id) moved_sub_projects = 0 for child in Project.objects(company=company, parent=source_id): _reposition_project_with_children( project=child, children=[c for c in children if c.parent == child.id], parent=destination, ) moved_sub_projects += 1 affected = {source.id, *(source.path or [])} source.delete() if destination: destination.update(last_update=datetime.utcnow()) affected.update({destination.id, *(destination.path or [])}) return moved_entities, moved_sub_projects, affected
def delete_project( company: str, project_id: str, force: bool, delete_contents: bool ) -> Tuple[DeleteProjectResult, Set[str]]: project = Project.get_for_writing( company=company, id=project_id, _only=("id", "path") ) if not project: raise errors.bad_request.InvalidProjectId(id=project_id) project_ids = _ids_with_children([project_id]) if not force: for cls, error in ( (Task, errors.bad_request.ProjectHasTasks), (Model, errors.bad_request.ProjectHasModels), ): non_archived = cls.objects( project__in=project_ids, system_tags__nin=[EntityVisibility.archived.value], ).only("id") if non_archived: raise error("use force=true to delete", id=project_id) if not delete_contents: with TimingContext("mongo", "update_children"): for cls in (Model, Task): updated_count = cls.objects(project__in=project_ids).update( project=None ) res = DeleteProjectResult(disassociated_tasks=updated_count) else: deleted_models, model_urls = _delete_models(projects=project_ids) deleted_tasks, event_urls, artifact_urls = _delete_tasks( company=company, projects=project_ids ) res = DeleteProjectResult( deleted_tasks=deleted_tasks, deleted_models=deleted_models, urls=TaskUrls( model_urls=list(model_urls), event_urls=list(event_urls), artifact_urls=list(artifact_urls), ), ) affected = {*project_ids, *(project.path or [])} res.deleted = Project.objects(id__in=project_ids).delete() return res, affected
def update(cls, company: str, project_id: str, **fields): with TimingContext("mongo", "projects_update"): project = Project.get_for_writing(company=company, id=project_id) if not project: raise errors.bad_request.InvalidProjectId(id=project_id) new_name = fields.pop("name", None) if new_name: new_name, new_location = _validate_project_name(new_name) old_name, old_location = _validate_project_name(project.name) if new_location != old_location: raise errors.bad_request.CannotUpdateProjectLocation( name=new_name) fields["name"] = new_name fields["last_update"] = datetime.utcnow() updated = project.update(upsert=False, **fields) if new_name: old_name = project.name project.name = new_name children = _get_sub_projects([project.id], _only=("id", "name", "path"))[project.id] _update_subproject_names(project=project, children=children, old_name=old_name) return updated
def validate( cls, task: Task, validate_models=True, validate_parent=True, validate_project=True, ): """ Validate task properties according to the flag Task project is always checked for being writable in order to disable the modification of public projects """ if (validate_parent and task.parent and not task.parent.startswith(deleted_prefix) and not Task.get(company=task.company, id=task.parent, _only=("id", ), include_public=True)): raise errors.bad_request.InvalidTaskId("invalid parent", parent=task.parent) if task.project: project = Project.get_for_writing(company=task.company, id=task.project) if validate_project and not project: raise errors.bad_request.InvalidProjectId(id=task.project) if validate_models: cls.validate_input_models(task)
def _ensure_project(company: str, user: str, name: str) -> Optional[Project]: """ Makes sure that the project with the given name exists If needed auto-create the project and all the missing projects in the path to it Return the project """ name = name.strip(name_separator) if not name: return None project = _get_writable_project_from_name(company, name) if project: return project now = datetime.utcnow() name, location = _validate_project_name(name) project = Project( id=database.utils.id(), user=user, company=company, created=now, last_update=now, name=name, description="", ) parent = _ensure_project(company, user, location) _save_under_parent(project=project, parent=parent) if parent: parent.update(last_update=now) return project
def get_active_users( cls, company, project_ids: Sequence[str], user_ids: Optional[Sequence[str]] = None, ) -> Set[str]: """ Get the set of user ids that created tasks/models in the given projects If project_ids is empty then all projects are examined If user_ids are passed then only subset of these users is returned """ with TimingContext("mongo", "active_users_in_projects"): query = Q(company=company) if user_ids: query &= Q(user__in=user_ids) projects_query = query if project_ids: project_ids = _ids_with_children(project_ids) query &= Q(project__in=project_ids) projects_query &= Q(id__in=project_ids) res = set(Project.objects(projects_query).distinct(field="user")) for cls_ in (Task, Model): res |= set(cls_.objects(query).distinct(field="user")) return res
def _ids_with_parents(project_ids: Sequence[str]) -> Sequence[str]: """ Return project ids with all the parent projects """ projects = Project.objects(id__in=project_ids).only("id", "path") parent_ids = set( itertools.chain.from_iterable(p.path for p in projects if p.path)) return list({*(p.id for p in projects), *parent_ids})
def update_project_time(project_ids: Union[str, Sequence[str]]): if not project_ids: return if isinstance(project_ids, str): project_ids = [project_ids] return Project.objects(id__in=project_ids).update(last_update=datetime.utcnow())
def move_project(cls, company: str, user: str, project_id: str, new_location: str) -> Tuple[int, Set[str]]: """ Move project with its sub projects from its current location to the target one. If the target location does not exist then it will be created. If it exists then it should be writable. The source location should be writable too. Return the number of moved projects + set of all the affected project ids """ with TimingContext("mongo", "move_project"): project = Project.get(company, project_id) old_parent_id = project.parent old_parent = (Project.get_for_writing(company=project.company, id=old_parent_id) if old_parent_id else None) children = _get_sub_projects([project.id], _only=("id", "name", "path"))[project.id] cls.validate_projects_depth( projects=[project, *children], old_parent_depth=len(project.path), new_parent_depth=_get_project_depth(new_location), ) new_parent = _ensure_project(company=company, user=user, name=new_location) new_parent_id = new_parent.id if new_parent else None if old_parent_id == new_parent_id: raise errors.bad_request.ProjectSourceAndDestinationAreTheSame( location=new_parent.name if new_parent else "") if (new_parent and project_id == new_parent.id or project_id in new_parent.path): raise errors.bad_request.ProjectCannotBeMovedUnderItself( project=project_id, parent=new_parent.id) moved = _reposition_project_with_children(project, children=children, parent=new_parent) now = datetime.utcnow() affected = set() for p in filter(None, (old_parent, new_parent)): p.update(last_update=now) affected.update({p.id, *(p.path or [])}) return moved, affected
def get_all_ex(call: APICall, company_id: str, request: ProjectsGetRequest): conform_tag_fields(call, call.data) allow_public = not request.non_public data = call.data requested_ids = data.get("id") with TimingContext("mongo", "projects_get_all"): data = call.data if request.active_users: ids = project_bll.get_projects_with_active_user( company=company_id, users=request.active_users, project_ids=requested_ids, allow_public=allow_public, ) if not ids: call.result.data = {"projects": []} return data["id"] = ids _adjust_search_parameters(data, shallow_search=request.shallow_search) projects = Project.get_many_with_join( company=company_id, query_dict=data, allow_public=allow_public, ) if request.check_own_contents and requested_ids: existing_requested_ids = { project["id"] for project in projects if project["id"] in requested_ids } if existing_requested_ids: contents = project_bll.calc_own_contents( company=company_id, project_ids=list(existing_requested_ids)) for project in projects: project.update(**contents.get(project["id"], {})) conform_output_tags(call, projects) if not request.include_stats: call.result.data = {"projects": projects} return project_ids = {project["id"] for project in projects} stats, children = project_bll.get_project_stats( company=company_id, project_ids=list(project_ids), specific_state=request.stats_for_state, ) for project in projects: project["stats"] = stats[project["id"]] project["sub_projects"] = children[project["id"]] call.result.data = {"projects": projects}
def find_or_create( cls, user: str, company: str, project_name: str, description: str, project_id: str = None, tags: Sequence[str] = None, system_tags: Sequence[str] = None, default_output_destination: str = None, parent_creation_params: dict = None, ) -> str: """ Find a project named `project_name` or create a new one. Returns project ID """ if not project_id and not project_name: raise errors.bad_request.ValidationError( "project id or name required") if project_id: project = Project.objects(company=company, id=project_id).only("id").first() if not project: raise errors.bad_request.InvalidProjectId(id=project_id) return project_id project_name, _ = _validate_project_name(project_name) project = Project.objects(company=company, name=project_name).only("id").first() if project: return project.id return cls.create( user=user, company=company, name=project_name, description=description, tags=tags, system_tags=system_tags, default_output_destination=default_output_destination, parent_creation_params=parent_creation_params, )
def get_project_tags( cls, company_id: str, include_system: bool, projects: Sequence[str] = None, filter_: Dict[str, Sequence[str]] = None, ) -> Tuple[Sequence[str], Sequence[str]]: with TimingContext("mongo", "get_tags_from_db"): query = Q(company=company_id) if filter_: for name, vals in filter_.items(): if vals: query &= GetMixin.get_list_field_query(name, vals) if projects: query &= Q(id__in=_ids_with_children(projects)) tags = Project.objects(query).distinct("tags") system_tags = (Project.objects(query).distinct("system_tags") if include_system else []) return tags, system_tags
def create(call: APICall): identity = call.identity with translate_errors_context(): fields = parse_from_call(call.data, create_fields, Project.get_fields()) conform_tag_fields(call, fields, validate=True) return IdResponse(id=ProjectBLL.create( user=identity.user, company=identity.company, **fields, ))
def _get_writable_project_from_name( company, name, _only: Optional[Sequence[str]] = ("id", "name", "path", "company", "parent"), ) -> Optional[Project]: """ Return a project from name. If the project not found then return None """ qs = Project.objects(company=company, name=name) if _only: qs = qs.only(*_only) return qs.first()
def _update_task_name(task: Task): if not task or not task.project: return project = Project.objects(id=task.project).only("name").first() if not project: return _, _, name_prefix = project.name.rpartition("/") name_mask = re.compile(rf"{re.escape(name_prefix)}( #\d+)?$") count = Task.objects(project=task.project, system_tags__in=["pipeline"], name=name_mask).count() new_name = f"{name_prefix} #{count}" if count > 0 else name_prefix task.update(name=new_name)
def _get_sub_projects( project_ids: Sequence[str], _only: Sequence[str] = ("id", "path") ) -> Mapping[str, Sequence[Project]]: """ Return the list of child projects of all the levels for the parent project ids """ qs = Project.objects(path__in=project_ids) if _only: _only = set(_only) | {"path"} qs = qs.only(*_only) subprojects = list(qs) return { pid: [s for s in subprojects if pid in (s.path or [])] for pid in project_ids }
def get_by_id(call): assert isinstance(call, APICall) project_id = call.data["project"] with translate_errors_context(): with TimingContext("mongo", "projects_by_id"): query = Q(id=project_id) & get_company_or_none_constraint( call.identity.company) project = Project.objects(query).first() if not project: raise errors.bad_request.InvalidProjectId(id=project_id) project_dict = project.to_proper_dict() conform_output_tags(call, project_dict) call.result.data = {"project": project_dict}
def get_all(call: APICall): conform_tag_fields(call, call.data) data = call.data _adjust_search_parameters(data, shallow_search=data.get("shallow_search", False)) with translate_errors_context(), TimingContext("mongo", "projects_get_all"): projects = Project.get_many( company=call.identity.company, query_dict=data, parameters=data, allow_public=True, ) conform_output_tags(call, projects) call.result.data = {"projects": projects}
def _reposition_project_with_children(project: Project, children: Sequence[Project], parent: Project) -> int: new_location = parent.name if parent else None old_name = project.name old_path = project.path project.name = name_separator.join( filter(None, (new_location, project.name.split(name_separator)[-1]))) _save_under_parent(project, parent=parent) moved = 1 + _update_subproject_names( project=project, children=children, old_name=old_name, update_path=True, old_path=old_path, ) return moved
def create( cls, user: str, company: str, name: str, description: str = "", tags: Sequence[str] = None, system_tags: Sequence[str] = None, default_output_destination: str = None, parent_creation_params: dict = None, ) -> str: """ Create a new project. Returns project ID """ if _get_project_depth(name) > max_depth: raise errors.bad_request.ProjectPathExceedsMax(max_depth=max_depth) name, location = _validate_project_name(name) now = datetime.utcnow() project = Project( id=database.utils.id(), user=user, company=company, name=name, description=description, tags=tags, system_tags=system_tags, default_output_destination=default_output_destination, created=now, last_update=now, ) parent = _ensure_project( company=company, user=user, name=location, creation_params=parent_creation_params, ) _save_under_parent(project=project, parent=parent) if parent: parent.update(last_update=now) return project.id
def update(call: APICall): """ update :summary: Update project information. See `project.create` for parameters. :return: updated - `int` - number of projects updated fields - `[string]` - updated fields """ fields = parse_from_call(call.data, create_fields, Project.get_fields(), discard_none_values=False) conform_tag_fields(call, fields, validate=True) updated = ProjectBLL.update(company=call.identity.company, project_id=call.data["project"], **fields) conform_output_tags(call, fields) call.result.data_model = UpdateResponse(updated=updated, fields=fields)
def validate_project_delete(company: str, project_id: str): project = Project.get_for_writing( company=company, id=project_id, _only=("id", "path") ) if not project: raise errors.bad_request.InvalidProjectId(id=project_id) project_ids = _ids_with_children([project_id]) ret = {} for cls in (Task, Model): ret[f"{cls.__name__.lower()}s"] = cls.objects( project__in=project_ids, ).count() for cls in (Task, Model): ret[f"non_archived_{cls.__name__.lower()}s"] = cls.objects( project__in=project_ids, system_tags__nin=[EntityVisibility.archived.value], ).count() return ret
def _save_under_parent(project: Project, parent: Optional[Project]): """ Save the project under the given parent project or top level (parent=None) Check that the project location matches the parent name """ location, _, _ = project.name.rpartition(name_separator) if not parent: if location: raise ValueError( f"Project location {location} does not match empty parent name" ) project.parent = None project.path = [] project.save() return if location != parent.name: raise ValueError( f"Project location {location} does not match parent name {parent.name}" ) project.parent = parent.id project.path = [*(parent.path or []), parent.id] project.save()
def get_projects_with_active_user( cls, company: str, users: Sequence[str], project_ids: Optional[Sequence[str]] = None, allow_public: bool = True, ) -> Sequence[str]: """ Get the projects ids where user created any tasks including all the parents of these projects If project ids are specified then filter the results by these project ids """ query = Q(user__in=users) if allow_public: query &= get_company_or_none_constraint(company) else: query &= Q(company=company) user_projects_query = query if project_ids: ids_with_children = _ids_with_children(project_ids) query &= Q(project__in=ids_with_children) user_projects_query &= Q(id__in=ids_with_children) res = {p.id for p in Project.objects(user_projects_query).only("id")} for cls_ in (Task, Model): res |= set(cls_.objects(query).distinct(field="project")) res = list(res) if not res: return res ids_with_parents = _ids_with_parents(res) if project_ids: return [pid for pid in ids_with_parents if pid in project_ids] return ids_with_parents
def _ids_with_children(project_ids: Sequence[str]) -> Sequence[str]: """ Return project ids with the ids of all the subprojects """ subprojects = Project.objects(path__in=project_ids).only("id") return list({*project_ids, *(child.id for child in subprojects)})
def make_public(call: APICall, company_id, request: MakePublicRequest): call.result.data = Project.set_public(company_id, ids=request.ids, invalid_cls=InvalidProjectId, enabled=False)
def _cleanup_project(cls, project: Project): project.user = "" project.company = "" project.tags = cls._filter_out_export_tags(project.tags)
def status_report( self, company_id: str, user_id: str, ip: str, report: StatusReportRequest, tags: Sequence[str] = None, ) -> None: """ Write worker status report :param company_id: worker's company ID :param user_id: user_id ID under which this worker is running :param ip: worker IP :param report: the report itself :param tags: tags for this worker :raise bad_request.InvalidTaskId: the reported task was not found :return: worker entry instance """ entry = self._get_worker(company_id, user_id, report.worker) try: entry.ip = ip now = datetime.utcnow() entry.last_activity_time = now if tags is not None: entry.tags = tags if report.machine_stats: self._log_stats_to_es( company_id=company_id, company_name=entry.company.name, worker=report.worker, timestamp=report.timestamp, task=report.task, machine_stats=report.machine_stats, ) entry.queue = report.queue if report.queues: entry.queues = report.queues if not report.task: entry.task = None entry.project = None else: with translate_errors_context(): query = dict(id=report.task, company=company_id) update = dict( last_worker=report.worker, last_worker_report=now, last_update=now, last_change=now, ) # modify(new=True, ...) returns the modified object task = Task.objects(**query).modify(new=True, **update) if not task: raise bad_request.InvalidTaskId(**query) entry.task = IdNameEntry(id=task.id, name=task.name) entry.project = None if task.project: project = Project.objects( id=task.project).only("name").first() if project: entry.project = IdNameEntry(id=project.id, name=project.name) entry.last_report_time = now except APIError: raise except Exception as e: msg = "Failed processing worker status report" log.exception(msg) raise server_error.DataError(msg, err=e.args[0]) finally: self._save_worker(entry)