class User(me.Document): # Basic information first_name = me.StringField(required=True, max_length=100) last_name = me.StringField(required=True, max_length=100) phone = me.IntField() # Default Login info email = me.EmailField(unique=True) password = me.StringField() # Authorization CLIENT = 'CLIENT' LAUNDRYMAN = 'LAUNDRYMAN' ADMIN = 'ADMIN' SUSPENDED = 'SUSPENDED' ROLE_CHOICES = ( CLIENT, LAUNDRYMAN, ADMIN, SUSPENDED ) roles = me.ListField(me.StringField(choices=ROLE_CHOICES), default=lambda: ['CLIENT']) # Pickup information addresses = me.EmbeddedDocumentListField(Address) # Specific role information laundryman = me.EmbeddedDocumentField(LaundrymanDataEmbedded) # Meta updated_at = me.DateTimeField(default=datetime.utcnow) def is_laundryman(self): return self.LAUNDRYMAN in self.roles def full_name(self): return '{first_name} {last_name}'.format(first_name=self.first_name, last_name=self.last_name)
class UserModel(m.Document): registered_date = m.DateTimeField(default=datetime.datetime.now) email = m.StringField(required=True) password = m.StringField(required=True) personal = m.EmbeddedDocumentListField(PersonalModel) meta = {'db_alias': 'core', 'collection': 'users'} @classmethod def lookup(cls, email): return cls.objects(email=email).first() @classmethod def identify(cls, id): return cls.objects(email=id).first() @property def rolenames(self): return [] @property def identity(self): return self.email
class Order(mongoengine.DynamicDocument): rname = mongoengine.StringField(required=True) orders = mongoengine.EmbeddedDocumentListField(Orders, default=[]) end = mongoengine.StringField(required=True) shipping = mongoengine.FloatField(required=True) ogshipping = mongoengine.FloatField(required=True) maxOrder = mongoengine.IntField(required=True) lastUpdate = mongoengine.DateTimeField(default=datetime.datetime.utcnow()) meta = { 'auto_create_index': False, 'index_background': True, 'indexes': ['rname', 'end'], 'collection': 'orders' } def save(self, *args, **kwargs): self.lastUpdate = datetime.datetime.utcnow() return super(Order, self).save(*args, **kwargs) @staticmethod def get_all_restaurant_orders(rname): existing = Order.objects(rname=rname) all_orders = [] for i in existing: all_orders.append(i.to_dict()) return all_orders @staticmethod def get_restaurant_order(rname, end): existing = Order.objects(rname=rname, end=end).first() if not existing: return None return existing def to_dict(self): return mongo_to_dict(self, [])
class Notebook(m.Document): slug = m.StringField(max_length=255, unique=True) notes = m.EmbeddedDocumentListField(NoteReference) def clean(self): if not self.slug: self.slug = self.generate_slug() def add_note(self, note): note.notebook = self note.save() reference = NoteReference() reference.id = note.id reference.resume = note.resume self.notes.append(reference) def remove_note(self, note): to_remove = [x for x in self.notes if x.id == note.id] for reference in to_remove: self.notes.remove(reference) def list_notes(self): ids = [x.id for x in self.notes] return Note.objects(id__in=ids) def generate_slug(self): slug = base64.encodestring(os.urandom(8)).strip() slug = re.sub('[^0-9A-Za-z]', '', slug) if Notebook.objects(slug=slug).count(): slug = self.generate_slug() return slug
class ConditionalClassMixin(object): """Generic condition mixin class used as a handler for different query sets for a specific collection. It constructs a query from a list of query sets which chains together with logical & operator.""" condition_resource_cls = None # Instance of mongoengine model class conditions = me.EmbeddedDocumentListField(BaseCondition) def owner_query(self): return me.Q(owner=self.owner_id) def get_resources(self): query = self.owner_query() for condition in self.conditions: query &= condition.q if 'deleted' in self.condition_resource_cls._fields: query &= me.Q(deleted=None) if 'missing_since' in self.condition_resource_cls._fields: query &= me.Q(missing_since=None) return self.condition_resource_cls.objects(query) def get_ids(self): return [resource.id for resource in self.get_resources()]
class Car(mongoengine.Document): model = mongoengine.StringField(required=True) make = mongoengine.StringField(required=True) year = mongoengine.IntField(required=True) mileage = mongoengine.FloatField(default=0.0) vi_number = mongoengine.StringField(default=lambda: str(uuid.uuid4()).replace('-', '')) engine = mongoengine.EmbeddedDocumentField(Engine, required=True) service_history = mongoengine.EmbeddedDocumentListField(ServiceHistory) # for service_app.py use this meta: # meta = { # 'db_alias': 'core', # 'collection': 'cars' # } # for q_and_a.py/ meta = { 'db_alias': 'dealership', 'collection': 'cars', 'indexes': [ 'service_history.price', ] }
class Group(gj.EmbeddedDocument): intid = db.IntField(required=True, unique=True) description = db.StringField(required=True) devices = db.EmbeddedDocumentListField('Device')
class Organization(Owner): name = me.StringField(required=True) members = me.ListField(me.ReferenceField(User), required=True) members_count = me.IntField(default=0) teams = me.EmbeddedDocumentListField(Team, default=_get_default_org_teams) teams_count = me.IntField(default=0) clouds_count = me.IntField(default=0) # These are assigned only to organization from now on promo_codes = me.ListField() selected_plan = me.StringField() enterprise_plan = me.DictField() enable_r12ns = me.BooleanField(required=True, default=False) default_monitoring_method = me.StringField( choices=config.MONITORING_METHODS) insights_enabled = me.BooleanField(default=config.HAS_INSIGHTS) ownership_enabled = me.BooleanField() created = me.DateTimeField(default=datetime.datetime.now) registered_by = me.StringField() # used to allow creation of sub-org super_org = me.BooleanField(default=False) parent = me.ReferenceField('Organization', required=False) meta = {'indexes': ['name']} @property def mapper(self): """Returns the `PermissionMapper` for the current Org context.""" if config.HAS_RBAC: from mist.rbac.tasks import AsyncPermissionMapper else: from mist.api.dummy.mappings import AsyncPermissionMapper return AsyncPermissionMapper(self) def __str__(self): return 'Org %s (%d teams - %d members)' % (self.name, len(self.teams), len(self.members)) def get_email(self): return self.teams.get(name='Owners').members[0].email def get_emails(self): emails = [] for user in self.teams.get(name='Owners').members: emails.append(user.email) return emails def get_team(self, team_name): try: return self.teams.get(name=team_name) except me.DoesNotExist: raise TeamNotFound("No team found with name '%s'." % team_name) def get_team_by_id(self, team_id): try: return self.teams.get(id=team_id) except me.DoesNotExist: raise TeamNotFound("No team found with id '%s'." % team_id) def add_member_to_team(self, team_name, user): team = self.get_team(team_name) if user not in team.members: team.members.append(user) if user not in self.members: self.members.append(user) def add_member_to_team_by_id(self, team_id, user): team = self.get_team_by_id(team_id) if user not in team.members: team.members.append(user) if user not in self.members: self.members.append(user) def remove_member_from_team(self, team_name, user): team = self.get_team(team_name) for i, member in enumerate(team.members): if user == member: team.members.pop(i) break def remove_member_from_team_by_id(self, team_id, user): team = self.get_team_by_id(team_id) for i, member in enumerate(team.members): if user == member: team.members.pop(i) break def remove_member_from_members(self, user): for i, member in enumerate(self.members): if user == member: self.members.pop(i) break def as_dict(self): view = json.loads(self.to_json()) view_id = view["_id"] del view["_id"] del view["_cls"] view["id"] = view_id view["members"] = [] for member in self.members: try: name = member.get_nice_name() except AttributeError: # Cannot dereference member try: self.members.remove(member) self.save() except Exception as e: log.error("Failed to remove missing member from %s: %r" % ( self.name, e)) continue view["members"].append({ "id": member.id, "name": name, "email": member.email, "pending": False, "parent": False }) team_pending_members = {} invitations = MemberInvitation.objects(org=self) for invitation in invitations: member = invitation.user name = "" name = (member.first_name or ' ') + (member.last_name or '') name = (name.strip() or member.email) view["members"].append({ "id": member.id, "name": name, "email": member.email, "pending": True, "parent": False, }) for team_id in invitation.teams: if team_id not in team_pending_members: team_pending_members[team_id] = [] team_pending_members[team_id].append(member.id) for team in view['teams']: team["parent"] = False if team['id'] in team_pending_members: team['members'].extend(team_pending_members[team['id']]) # handle here the info from parent org if self.parent: view["parent_org_name"] = self.parent.name parent_org = self.parent.as_dict() parent_members = parent_org['members'] parent_teams = parent_org['teams'] for p_member in parent_members: p_member['parent'] = True view['members'].append(p_member) for p_team in parent_teams: p_team['parent'] = True view["teams"].append(p_team) return view def clean(self): # make sure that each team's name is unique used = set() for team in self.teams: if team.name in used: raise me.ValidationError("Team name exists.") used.add(team.name) # make sure that all team members are also org members for team in self.teams: for i, member in enumerate(list(team.members)): if member not in self.members: team.members.pop(i) # make sure that owners team is present try: owners = self.teams.get(name='Owners') except me.DoesNotExist: raise me.ValidationError("Owners team can't be removed.") # make sure that owners team is not empty if not owners.members: raise me.ValidationError("Owners team can't be empty.") if config.HAS_RBAC: # make sure owners policy allows all permissions if owners.policy.operator != 'ALLOW': owners.policy.operator = 'ALLOW' log.warning("Owners policy must be set to ALLOW. Updating...") # make sure owners policy doesn't contain specific rules if owners.policy.rules: raise me.ValidationError("Can't set policy rules for Owners.") # make sure org name is unique - we can't use the unique keyword on the # field definition because both User and Organization subclass Owner # but only Organization has a name if self.name and Organization.objects(name=self.name, id__ne=self.id): raise me.ValidationError("Organization with name '%s' " "already exists." % self.name) self.members_count = len(self.members) self.teams_count = len(self.teams) # Add schedule for metering. try: from mist.api.poller.models import MeteringPollingSchedule MeteringPollingSchedule.add(self, run_immediately=False) except Exception as exc: log.error('Error adding metering schedule for %s: %r', self, exc) super(Organization, self).clean()
class User(Owner): email = HtmlSafeStrField() # NOTE: deprecated. Only still used to migrate old API tokens mist_api_token = me.StringField() last_name = HtmlSafeStrField(default='') feedback = me.EmbeddedDocumentField(Feedback, default=Feedback()) activation_key = me.StringField() first_name = HtmlSafeStrField(default='') invitation_accepted = me.FloatField() invitation_date = me.FloatField() last_login = me.FloatField() password = me.StringField() password_set_token = me.StringField() password_set_token_created = me.FloatField() password_set_user_agent = me.StringField() registration_date = me.FloatField() registration_method = me.StringField() requested_demo = me.BooleanField() demo_request_date = me.FloatField() role = me.StringField() status = me.StringField() # these fields will exists only for org # when migration from user to org completes promo_codes = me.ListField() selected_plan = me.StringField() enterprise_plan = me.DictField() open_id_url = HtmlSafeStrField() password_reset_token_ip_addr = me.StringField() password_reset_token = me.StringField() password_reset_token_created = me.FloatField() whitelist_ip_token_ip_addr = me.StringField() whitelist_ip_token = me.StringField() whitelist_ip_token_created = me.FloatField() user_agent = me.StringField() username = me.StringField() can_create_org = me.BooleanField(default=True) beta_access = me.BooleanField(default=True) ips = me.EmbeddedDocumentListField(WhitelistIP, default=[]) meta = { 'indexes': [ { 'fields': [ '$email', '$first_name', '$last_name', '$username' ], 'default_language': 'english', 'weights': {'last_name': 10, 'first_name': 10} }, ] } def __str__(self): return 'User %s' % self.email def set_password(self, password): """Update user's password.""" # could perform strength measuring first hashed_pwd = pwd_context.encrypt(password) self.password = hashed_pwd self.save() def check_password(self, password): """ Return True if password matches, False otherwise. This will also update the password if it's using a deprecated scheme. If user.password is empty because the user registered through SSO then the password passed as argument should be empty otherwise False will be returned. """ if not self.password or not password: return False ok, new_hash = pwd_context.verify_and_update(password, self.password) if not ok: return False if new_hash: # hashed password was using a deprecated scheme, update it log.info("Updating user's password.") self.password = new_hash self.save() return True def __eq__(self, other): return self.id == other.id def clean(self): # make sure user.email is unique - we can't use the unique keyword on # the field definition because both User and Organization subclass # Owner but only user has an email field if User.objects(email=self.email, id__ne=self.id): raise me.ValidationError("User with email '%s' already exists." % self.email) super(User, self).clean() def get_nice_name(self): try: if self.first_name and not self.last_name: return self.first_name + '(' + self.email + ')' else: name = (self.first_name or '') + ' ' + (self.last_name or '') return name.strip() or self.email except AttributeError: return self.email def get_ownership_mapper(self, org): """Return the `OwnershipMapper` in the specified Org context.""" if config.HAS_RBAC: from mist.rbac.mappings import OwnershipMapper else: from mist.api.dummy.mappings import OwnershipMapper return OwnershipMapper(self, org)
class GatingStrategy(mongoengine.Document): """ A GatingTemplate is synonymous to what an immunologist would classically consider a "gating template"; it is a collection of 'gates' (Gate objects, in the case of CytoPy) that can be applied to multiple fcs files or an entire experiment in bulk. A user defines a GatingTemplate using a single example from an experiment, uses the object to preview gates and label child populations, and when satisfied with the performance save the GatingStrategy to the database to be applied to the remaining samples in the Experiment. Attributes ----------- template_name: str, required unique identifier for template gates: EmbeddedDocumentList list of Gate documents creation_date: DateTime date of creation last_edit: DateTime date of last edit flags: str, optional warnings associated to this gating template notes: str, optional free text comments """ name = mongoengine.StringField(required=True, unique=True) gates = mongoengine.ListField(mongoengine.ReferenceField(Gate, reverse_delete_rule=mongoengine.PULL)) actions = mongoengine.EmbeddedDocumentListField(Action) hyperparameter_search = mongoengine.DictField() creation_date = mongoengine.DateTimeField(default=datetime.now) last_edit = mongoengine.DateTimeField(default=datetime.now) flags = mongoengine.StringField(required=False) notes = mongoengine.StringField(required=False) meta = { 'db_alias': 'core', 'collection': 'gating_strategy' } def __init__(self, *args, **values): self.verbose = values.pop("verbose", True) self.print = vprint(verbose=self.verbose) super().__init__(*args, **values) self.filegroup = None def load_data(self, experiment: Experiment, sample_id: str): """ Load a FileGroup into the GatingStrategy ready for gating. Parameters ---------- experiment: Experiment sample_id: str Returns ------- None """ self.filegroup = experiment.get_sample(sample_id=sample_id) def list_gates(self) -> list: """ List name of existing Gates Returns ------- list """ return [g.gate_name for g in self.gates] def list_populations(self) -> list: """ Wrapper to FileGroup list_populations. Lists populations in associated FileGroup. Returns ------- list """ assert self.filegroup is not None, "No FileGroup associated" return list(self.filegroup.list_populations()) def _gate_exists(self, gate: str): """ Raises AssertionError if given gate does not exist Returns ------- None """ assert gate in self.list_gates(), f"Gate {gate} does not exist" def get_gate(self, gate: str) -> Gate: """ Given the name of a gate, return the Gate object Parameters ---------- gate: str Returns ------- Gate """ self._gate_exists(gate=gate) return [g for g in self.gates if g.gate_name == gate][0] def preview_gate(self, gate: str or Gate or ThresholdGate or PolygonGate or EllipseGate, create_plot_kwargs: dict or None = None, plot_gate_kwargs: dict or None = None): """ Preview the results of some given Gate Parameters ---------- gate: str or Gate or ThresholdGate or PolygonGate or EllipseGate Name of an existing Gate or a Gate object create_plot_kwargs: dict (optional) Additional arguments passed to CreatePlot plot_gate_kwargs: dict (optional) Additional arguments passed to plot_gate call of CreatePlot Returns ------- Matplotlib.Axes """ create_plot_kwargs = create_plot_kwargs or {} plot_gate_kwargs = plot_gate_kwargs or {} if isinstance(gate, str): gate = self.get_gate(gate=gate) parent_data = self.filegroup.load_population_df(population=gate.parent, transform=None, label_downstream_affiliations=False) gate.fit(data=parent_data) plot = CreatePlot(**create_plot_kwargs) return plot.plot_gate_children(gate=gate, parent=parent_data, **plot_gate_kwargs) def add_hyperparameter_grid(self, gate_name: str, params: dict, cost: str or None = None): """ Add a hyperparameter grid to search which applying the given gate to new data. This hyperparameter grid should correspond to valid hyperparameters for the corresponding gate. Invalid parameters will be ignored. Choice of the cost parameter to be minimised is dependent on the type of gate: * ThresholdGate: - "manhattan" (default): optimal parameters are those that result in the population whom's signature is of minimal distance to the original data used to define the gate. The manhattan distance is used as the distance metric. - "euclidean": optimal parameters are those that result in the population whom's signature is of minimal distance to the original data used to define the gate. The euclidean distance is used as the distance metric. - "threshold_dist": optimal parameters are those that result in the threshold whom's distance to the original threshold defined are smallest * PolygonGate & EllipseGate: - "hausdorff" (optional): parameters chosen that minimise the hausdorff distance between the polygon generated from new data and the original polgon gate created when the gate was defined - "manhattan" (default): optimal parameters are those that result in the population whom's signature is of minimal distance to the original data used to define the gate. The manhattan distance is used as the distance metric. - "euclidean": optimal parameters are those that result in the population whom's signature is of minimal distance to the original data used to define the gate. The euclidean distance is used as the distance metric. Parameters ---------- gate_name: str Gate to define hyperparameter grid for params: dict Grid of hyperparameters to be searched cost: str What to be minimised to choose optimal hyperparameters Returns ------- None """ assert gate_name in self.list_gates(), f"{gate_name} is not a valid gate" if isinstance(self.get_gate(gate_name), ThresholdGate): cost = cost or "manhattan" valid_metrics = ["manhattan", "threshold_dist", "euclidean"] err = f"For threshold gate 'cost' should either be one of {valid_metrics}" assert cost in valid_metrics, err if isinstance(self.get_gate(gate_name), PolygonGate) or isinstance(self.get_gate(gate_name), EllipseGate): cost = cost or "hausdorff" valid_metrics = ["hausdorff", "manhattan", "euclidean"] err = f"For threshold gate 'cost' should either be one of {valid_metrics}" assert cost in valid_metrics, err err = "'params' must be a dictionary with each key corresponding to a valid " \ "hyperparameter and each value a list of parameter values" assert isinstance(params, dict), err assert all([isinstance(x, list) for x in params.values()]), err self.hyperparameter_search[gate_name] = {"grid": params, "cost": cost} def apply_gate(self, gate: str or Gate or ThresholdGate or PolygonGate or EllipseGate, plot: bool = True, verbose: bool = True, add_to_strategy: bool = True, create_plot_kwargs: dict or None = None, plot_gate_kwargs: dict or None = None, hyperparam_search: bool = True, overwrite_method_kwargs: dict or None = None): """ Apply a gate to the associated FileGroup. The gate must be previously defined; children associated and labeled. Either a Gate object can be provided or the name of an existing gate saved to this GatingStrategy. Parameters ---------- gate: str or Gate or ThresholdGate or PolygonGate or EllipseGate Name of an existing Gate or a Gate object plot: bool (default=True) If True, returns a Matplotlib.Axes object of plotted gate verbose: bool (default=True) If True, print gating statistics to stdout and provide feedback add_to_strategy: bool (default=True) If True, append the Gate to the GatingStrategy create_plot_kwargs: dict (optional) Additional arguments passed to CreatePlot plot_gate_kwargs: dict (optional) Additional arguments passed to plot_gate call of CreatePlot hyperparam_search: bool (default=True) If True and hyperparameter grid has been defined for the chosen gate, then hyperparameter search is performed to find the optimal fit for the newly encountered data. overwrite_method_kwargs: dict, optional If a dictionary is provided (and hyperparameter search isn't defined for this gate) then method parameters are overwritten with these new parameters. Returns ------- Matplotlib.Axes or None """ if isinstance(gate, str): gate = self.get_gate(gate=gate) add_to_strategy = False if add_to_strategy: assert gate.gate_name not in self.list_gates(), \ f"Gate with name {gate.gate_name} already exists. To continue set add_to_strategy to False" create_plot_kwargs = create_plot_kwargs or {} plot_gate_kwargs = plot_gate_kwargs or {} parent_data = self.filegroup.load_population_df(population=gate.parent, transform=None, label_downstream_affiliations=False) original_method_kwargs = gate.method_kwargs.copy() if overwrite_method_kwargs is not None: gate.method_kwargs = overwrite_method_kwargs if gate.gate_name in self.hyperparameter_search.keys() and hyperparam_search: populations = hyperparameter_gate(gate=gate, grid=self.hyperparameter_search.get(gate.gate_name).get("grid"), cost=self.hyperparameter_search.get(gate.gate_name).get("cost"), parent=parent_data, verbose=verbose) elif gate.ctrl is None: populations = gate.fit_predict(data=parent_data) else: populations = self._control_gate(gate=gate) for p in populations: self.filegroup.add_population(population=p) if verbose: gate_stats(gate=gate, parent_data=parent_data, populations=populations) if add_to_strategy: self.gates.append(gate) if plot: plot = CreatePlot(**create_plot_kwargs) return plot.plot_population_geoms(parent=parent_data, children=populations, **plot_gate_kwargs) gate.method_kwargs = original_method_kwargs return None def apply_all(self, verbose: bool = True): """ Apply all the gates associated to this GatingStrategy Parameters ---------- verbose: bool (default=True) If True, print feedback to stdout Returns ------- None """ feedback = vprint(verbose) populations_created = [[c.name for c in g.children] for g in self.gates] populations_created = [x for sl in populations_created for x in sl] assert len(self.gates) > 0, "No gates to apply" err = "One or more of the populations generated from this gating strategy are already " \ "presented in the population tree" assert all([x not in self.list_populations() for x in populations_created]), err gates_to_apply = list(self.gates) actions_to_apply = list(self.actions) i = 0 iteration_limit = len(gates_to_apply) * 100 feedback("=====================================================") while len(gates_to_apply) > 0: if i >= len(gates_to_apply): i = 0 gate = gates_to_apply[i] if gate.parent in self.list_populations(): if self.filegroup.population_stats(gate.parent).get("n") <= 3: raise ValueError(f"Insufficient events in parent population {gate.parent}") feedback(f"------ Applying {gate.gate_name} ------") self.apply_gate(gate=gate, plot=False, verbose=verbose, add_to_strategy=False) feedback("----------------------------------------") gates_to_apply = [g for g in gates_to_apply if g.gate_name != gate.gate_name] actions_applied_this_loop = list() for a in actions_to_apply: if a.left in self.list_populations() and a.right in self.list_populations(): feedback(f"------ Applying {a.action_name} ------") self.apply_action(action=a, print_stats=verbose, add_to_strategy=False) feedback("----------------------------------------") actions_applied_this_loop.append(a.action_name) actions_to_apply = [a for a in actions_to_apply if a.action_name not in actions_applied_this_loop] i += 1 iteration_limit -= 1 assert iteration_limit > 0, "Maximum number of iterations reached. This means that one or more parent " \ "populations are not being identified." def delete_actions(self, action_name: str): """ Delete an action associated to this GatingStrategy Parameters =========== action_name: str Returns ------- None """ self.actions = [a for a in self.actions if a.action_name != action_name] def apply_action(self, action: Action or str, print_stats: bool = True, add_to_strategy: bool = True): """ Apply an action, that is, a merge or subtraction: * Merge: merge two populations present in the current population tree. The merged population will have the combined index of both populations but will not inherit any clusters and will not be associated to any children downstream of either the left or right population. The population will be added to the tree as a descendant of the left populations parent * Subtraction: subtract the right population from the left population. The right population must either have the same parent as the left population or be downstream of the left population. The new population will descend from the same parent as the left population. The new population will have a PolygonGeom geom. Parameters ---------- action: Action print_stats: bool (default=True) Print population statistics to stdout add_to_strategy: bool (default=True) Add action to this GatingStrategy Returns ------- None """ if isinstance(action, str): matching_action = [a for a in self.actions if a.action_name == action] assert len(matching_action) == 1, f"{action} does not exist" action = matching_action[0] assert action.method in ["merge", "subtract"], "Accepted methods are: merge, subtract" assert action.left in self.list_populations(), f"{action.left} does not exist" assert action.right in self.list_populations(), f"{action.right} does not exist" left = self.filegroup.get_population(action.left) right = self.filegroup.get_population(action.right) if action.method == "merge": self.filegroup.merge_populations(left=left, right=right, new_population_name=action.new_population_name) else: self.filegroup.subtract_populations(left=left, right=right, new_population_name=action.new_population_name) if print_stats: new_pop_name = action.new_population_name or f"{action.method}_{left.population_name}_{right.population_name}" new_pop = self.filegroup.get_population(population_name=new_pop_name) print(f"------ {action.action_name} ------") parent_n = self.filegroup.get_population(left.parent).n print(f"Parent ({left.parent}) n: {parent_n}") print(f"Left pop ({left.population_name}) n: {left.n}; {left.n / parent_n * 100}%") print(f"Right pop ({right.population_name}) n: {right.n}; {right.n / parent_n * 100}%") print(f"New population n: {new_pop.n}; {new_pop.n / parent_n * 100}%") print("-----------------------------------") if add_to_strategy: self.actions.append(action) def delete_gate(self, gate_name: str): """ Remove a gate from this GatingStrategy. Note: populations generated from this gate will not be deleted. These populations must be deleted separately by calling the 'delete_population' method. Parameters ---------- gate_name: str Name of the gate for removal Returns ------- None """ self.gates = [g for g in self.gates if g.gate_name != gate_name] def delete_populations(self, populations: str or list): """ Delete given populations. Populations downstream from delete population(s) will also be removed. Parameters ---------- populations: list or str Either a list of populations (list of strings) to remove or a single population as a string. If a value of "all" is given, all populations are dropped. Returns ------- None """ self.filegroup.delete_populations(populations=populations) def plot_gate(self, gate: str, create_plot_kwargs: dict or None = None, **kwargs): """ Plot a gate. Must provide the name of a Gate currently associated to this GatingStrategy. This will plot the parent population this gate acts on along with the geometries that define the child populations the gate generates. Parameters ---------- gate: str or Gate or EllipseGate or ThresholdGate or PolygonGate create_plot_kwargs: dict Keyword arguments for CreatePlot object. See CytoPy.plotting.CreatePlot for details. kwargs: Keyword arguments for plot_gate call. See CytoPy.plotting.CreatePlot.plot_population_geom for details. Returns ------- Matplotlib.Axes """ create_plot_kwargs = create_plot_kwargs or {} assert isinstance(gate, str), "Provide the name of an existing Gate in this GatingStrategy" assert gate in self.list_gates(), \ f"Gate {gate} not recognised. Have you applied it and added it to the strategy?" gate = self.get_gate(gate=gate) parent = self.filegroup.load_population_df(population=gate.parent, transform=None, label_downstream_affiliations=False) plotting = CreatePlot(**create_plot_kwargs) return plotting.plot_population_geoms(parent=parent, children=[self.filegroup.get_population(c.name) for c in gate.children], **kwargs) def plot_backgate(self, parent: str, overlay: list, x: str, y: str or None = None, create_plot_kwargs: dict or None = None, **backgate_kwargs): """ Given some population as the backdrop (parent) and a list of one or more populations that occur downstream of the parent (overlay), plot the downstream populations as scatter plots over the top of the parent. Parameters ---------- parent: str overlay: list x: str y: str create_plot_kwargs Additional keyword arguments passed to CytoPy.flow.plotting.CreatePlot backgate_kwargs Additional keyword arguments passed to CytoPy.flow.plotting.CreatePlot.backgate Returns ------- Matplotlib.Axes """ assert parent in self.list_populations(), "Parent population does not exist" assert all([x in self.list_populations() for x in overlay]), "One or more given populations could not be found" downstream = self.filegroup.list_downstream_populations(population=parent) assert all([x in downstream for x in overlay]), \ "One or more of the given populations is not downstream of the given parent" plotting = CreatePlot(**create_plot_kwargs) parent = self.filegroup.load_population_df(population=parent, transform=None, label_downstream_affiliations=False) children = {x: self.filegroup.load_population_df(population=x, transform=None, label_downstream_affiliations=False) for x in overlay} return plotting.backgate(parent=parent, children=children, x=x, y=y, **backgate_kwargs) def plot_population(self, population: str, x: str, y: str or None = None, transform_x: str or None = "logicle", transform_y: str or None = "logicle", create_plot_kwargs: dict or None = None, **plot_kwargs): """ Plot an existing population in the associate FileGroup. Parameters ---------- population: str x: str y: str (optional) transform_x: str (optional; default="logicle") transform_y: str (optional; default="logicle") create_plot_kwargs: Additional keyword arguments passed to CytoPy.flow.plotting.CreatePlot plot_kwargs Additional keyword arguments passed to CytoPy.flow.plotting.CreatePlot.plot Returns ------- Matplotlib.Axes """ assert population in self.list_populations(), f"{population} does not exist" data = self.filegroup.load_population_df(population=population, transform=None, label_downstream_affiliations=False) create_plot_kwargs = create_plot_kwargs or {} plotting = CreatePlot(transform_x=transform_x, transform_y=transform_y, **create_plot_kwargs) return plotting.plot(data=data, x=x, y=y, **plot_kwargs) def print_population_tree(self, **kwargs): """ Print the population tree to stdout. Wraps CytoPy.data.fcs.FileGroup.print_population_tree Parameters ---------- kwargs See keyword arguments for CytoPy.data.fcs.FileGroup.print_population_tree Returns ------- None """ self.filegroup.print_population_tree(**kwargs) def edit_gate(self, gate_name: str, x_threshold: float or None = None, y_threshold: float or None = None, x_values: list or None = None, y_values: list or None = None): """ Edit an existing gate (i.e. the polygon or threshold shape that generates the resulting populations). The altered geometry will be applied to the parent population resulting this gate acts upon, resulting in new data. Populations downstream of this edit will also be effected but gates will not adapt dynamically, instead the static results of gating algorithms will still apply, but to a new dataset. For this reason, gates should be checked (similar to the effects of moving a gate in FlowJo). Parameters ---------- gate_name: str x_threshold: float (optional) Required for threshold geometries y_threshold: float (optional) Required for 2D threshold geometries x_values: list Required for Polygon geometries y_values: list Required for Polygon geometries Returns ------- None """ gate = self.get_gate(gate=gate_name) err = "Cannot edit a gate that has not been applied; gate children not present in population " \ "tree." assert all([x in self.filegroup.tree.keys() for x in [c.name for c in gate.children]]), err transforms = [gate.transformations.get(x, None) for x in ["x", "y"]] transforms = {k: v for k, v in zip([gate.x, gate.y], transforms) if k is not None} parent = self.filegroup.load_population_df(population=gate.parent, transform=transforms) for child in gate.children: pop = self.filegroup.get_population(population_name=child.name) if isinstance(pop.geom, ThresholdGeom): assert x_threshold is not None, "For threshold geometry, please provide x_threshold" if pop.geom.y_threshold is not None: assert y_threshold is not None, "For 2D threshold geometry, please provide y_threshold" update_threshold(population=pop, parent_data=parent, x_threshold=x_threshold, y_threshold=y_threshold) elif isinstance(pop.geom, PolygonGeom): assert x_values is not None and y_values is not None, \ "For polygon gate please provide x_values and y_values" update_polygon(population=pop, parent_data=parent, x_values=x_values, y_values=y_values) self._edit_downstream_effects(population_name=child.name) def _edit_downstream_effects(self, population_name: str): """ Echos the downstream effects of an edited gate by iterating over the Population dependencies and reapplying their geometries to the modified data. Should be called after 'edit_population'. Parameters ---------- population_name: str Returns ------- None """ downstream_populations = self.filegroup.list_downstream_populations(population=population_name) for pop in downstream_populations: pop = self.filegroup.get_population(pop) transforms = {k: v for k, v in zip([pop.geom.x, pop.geom.y], [pop.geom.transform_x, pop.geom.transform_y]) if k is not None} parent = self.filegroup.load_population_df(population=pop.parent, transform=transforms) if isinstance(pop.geom, ThresholdGeom): update_threshold(population=pop, parent_data=parent, x_threshold=pop.geom.x_threshold, y_threshold=pop.geom.y_threshold) elif isinstance(pop.geom, PolygonGeom): update_polygon(population=pop, parent_data=parent, x_values=pop.geom.x_values, y_values=pop.geom.y_values) def _control_gate(self, gate: Gate or ThresholdGate or PolygonGate or EllipseGate): """ Internal method for applying a gate using control data. Will first attempt to fetch the parent population for the control data (see CytoPy.data.fcs.FileGroup.load_ctrl_population_df) and then will fit the gate to this data. The resulting gate will be applied statically to the parent population from the primary data. Parameters ---------- gate: Gate or ThresholdGate or PolygonGate or EllipseGate Returns ------- list List of Populations """ assert gate.ctrl in self.filegroup.controls, f"FileGroup does not have data for {gate.ctrl}" ctrl_parent_data = self.filegroup.load_ctrl_population_df(ctrl=gate.ctrl, population=gate.parent, transform=None) # Fit control data populations = gate.fit_predict(data=ctrl_parent_data) updated_children = list() for p in populations: eq_child = [c for c in gate.children if c.name == p.population_name] assert len(eq_child) == 1, "Invalid gate. Estimated populations do not match children." eq_child = eq_child[0] eq_child.geom = p.geom updated_children.append(eq_child) gate.children = updated_children # Predict original data parent_data = self.filegroup.load_population_df(population=gate.parent, transform=None, label_downstream_affiliations=False) return gate.fit_predict(data=parent_data) def save(self, save_strategy: bool = True, save_filegroup: bool = True, *args, **kwargs): """ Save GatingStrategy and the populations generated for the associated FileGroup. Parameters ---------- save_filegroup: bool (default=True) save_strategy: bool (default=True) args: Positional arguments for mongoengine.document.save call kwargs: Keyword arguments for mongoengine.document.save call Returns ------- None """ if save_strategy: for g in self.gates: g.save() super().save(*args, **kwargs) if save_filegroup: if self.name not in self.filegroup.gating_strategy: self.filegroup.gating_strategy.append(self.name) if self.filegroup is not None: self.filegroup.save() def delete(self, delete_gates: bool = True, remove_associations: bool = True, *args, **kwargs): """ Delete gating strategy. If delete_gates is True, then associated Gate objects will also be deleted. If remove_associations is True, then populations generated from this gating strategy will also be deleted. Parameters ---------- delete_gates: bool (default=True) remove_associations: (default=True) args: Positional arguments for mongoengine.document.delete call kwargs: Keyword arguments for mongoengine.document.delete call Returns ------- """ super().delete(*args, **kwargs) populations = [[c.name for c in g.children] for g in self.gates] populations = list(set([x for sl in populations for x in sl])) if delete_gates: self.print("Deleting gates...") for g in self.gates: g.delete() if remove_associations: self.print("Deleting associated populations in FileGroups...") for f in progress_bar(FileGroup.objects(), verbose=self.verbose): if self.name in f.gating_strategy: f.gating_strategy = [gs for gs in f.gating_strategy if gs != self.name] f.delete_populations(populations=populations) f.save() self.print(f"{self.name} successfully deleted.")
class Population(mongoengine.EmbeddedDocument): """ A population of cells identified by either a gate or supervised algorithm. Stores the index of events corresponding to a single population, where the index relates back to the primary data in the FileGroup in which a population is embedded. Populations also store Clusters generated from high dimensional clustering algorithms such as FlowSOM or PhenoGraph. These clusters are derived from this population. Parameters ---------- population_name: str, required name of population n: int number of events associated to this population parent: str, required, (default: "root") name of parent population prop_of_parent: float, required proportion of events as a percentage of parent population prop_of_total: float, required proportion of events as a percentage of all events warnings: list, optional list of warnings associated to population geom: PopulationGeometry PopulationGeometry (see CytoPy.data.geometry) that defines the gate that captures this population. clusters: EmbeddedDocListField list of associated Cluster documents definition: str relevant for populations generated by a ThresholdGate; defines the source of this population e.g. "+" for a 1D threshold or "+-" for a 2D threshold index: Numpy.Array numpy array storing index of events that belong to population signature: dict average of a population feature space (median of each channel); used to match children to newly identified populations for annotating """ population_name = mongoengine.StringField() n = mongoengine.IntField() parent = mongoengine.StringField(required=True, default='root') prop_of_parent = mongoengine.FloatField() prop_of_total = mongoengine.FloatField() warnings = mongoengine.ListField() geom = mongoengine.EmbeddedDocumentField(PopulationGeometry) clusters = mongoengine.EmbeddedDocumentListField(Cluster) definition = mongoengine.StringField() signature = mongoengine.DictField() def __init__(self, *args, **kwargs): # If the Population existed previously, fetched the index self._index = kwargs.pop("index", None) self._ctrl_index = kwargs.pop("ctrl_index", dict()) super().__init__(*args, **kwargs) @property def index(self): return self._index @index.setter def index(self, idx: np.array): assert isinstance(idx, np.ndarray), "idx should be type numpy.array" self.n = len(idx) self._index = np.array(idx) @property def ctrl_index(self): return self._ctrl_index def set_ctrl_index(self, **kwargs): for k, v in kwargs.items(): assert isinstance(v, np.ndarray), "ctrl_idx should be type numpy.array" self._ctrl_index[k] = v def add_cluster(self, cluster: Cluster): """ Add a new cluster generated from CytoPy.flow.clustering.main.Clustering. Parameters ---------- cluster: Cluster Returns ------- None """ _id, tag = cluster.cluster_id, cluster.tag err = f"Cluster already exists with id: {_id}; tag: {tag}" assert not any([x.cluster_id == _id and x.tag == tag for x in self.clusters]), err self.clusters.append(cluster) def delete_cluster(self, cluster_id: str or None = None, tag: str or None = None, meta_label: str or None = None): """ Delete cluster using either cluster ID, tag, or meta label Parameters ---------- cluster_id: str tag: str meta_label: str Returns ------- None """ err = "Must provide either cluster_id, tag or meta_label" assert sum([x is not None for x in [cluster_id, tag, meta_label]]) == 1, err if cluster_id: self.clusters = [c for c in self.clusters if c.cluster_id != cluster_id] elif tag: self.clusters = [c for c in self.clusters if c.tag != tag] elif meta_label: self.clusters = [c for c in self.clusters if c.meta_label != meta_label] def delete_all_clusters(self, clusters: list or str = "all"): """ Provide either a list of cluster IDs for deletion or give value of "all" to delete all clusters. Parameters ---------- clusters: list or str (default="all") Returns ------- None """ if isinstance(clusters, list): self.clusters = [c for c in self.clusters if c.cluster_id not in clusters] else: self.clusters = [] def list_clusters(self, tag: str or None = None, meta_label: str or None = None) -> List[str]: """ List cluster IDs associated to a given tag or meta label Parameters ---------- tag: str meta_label: str Returns ------- List """ if tag: return [c.cluster_id for c in self.clusters if c.tag == tag] elif meta_label: return [c.cluster_id for c in self.clusters if c.meta_label == meta_label] else: return [c.cluster_id for c in self.clusters] def get_clusters(self, cluster_id: list or None = None, tag: str or None = None, meta_label: str or None = None) -> List[Cluster]: """ Returns list of cluster objects by either cluster IDs, tag or meta label Parameters ---------- cluster_id: list tag: str meta_label: str Returns ------- list """ err = "Provide list of cluster IDs and/or tag and/or meta_label" assert len(sum([x is not None for x in [tag, meta_label]])) > 0, err clusters = self.clusters if cluster_id: clusters = [c for c in clusters if c.cluster_id in cluster_id] if tag: clusters = [c for c in clusters if c.tag in tag] if meta_label: clusters = [c for c in clusters if c.meta_label in meta_label] return clusters
class Machine(OwnershipMixin, me.Document): """The basic machine model""" id = me.StringField(primary_key=True, default=lambda: uuid.uuid4().hex) cloud = me.ReferenceField('Cloud', required=True) owner = me.ReferenceField('Organization', required=True) location = me.ReferenceField('CloudLocation', required=False) size = me.ReferenceField('CloudSize', required=False) network = me.ReferenceField('Network', required=False) subnet = me.ReferenceField('Subnet', required=False) name = me.StringField() # Info gathered mostly by libcloud (or in some cases user input). # Be more specific about what this is. # We should perhaps come up with a better name. machine_id = me.StringField(required=True) hostname = me.StringField() public_ips = me.ListField() private_ips = me.ListField() ssh_port = me.IntField(default=22) OS_TYPES = ('windows', 'coreos', 'freebsd', 'linux', 'unix') os_type = me.StringField(default='unix', choices=OS_TYPES) rdp_port = me.IntField(default=3389) actions = me.EmbeddedDocumentField(Actions, default=lambda: Actions()) extra = me.DictField() cost = me.EmbeddedDocumentField(Cost, default=lambda: Cost()) image_id = me.StringField() # libcloud.compute.types.NodeState state = me.StringField(default='unknown', choices=('running', 'starting', 'rebooting', 'terminated', 'pending', 'unknown', 'stopping', 'stopped', 'suspended', 'error', 'paused', 'reconfiguring')) machine_type = me.StringField(default='machine', choices=('machine', 'vm', 'container', 'hypervisor', 'container-host')) parent = me.ReferenceField('Machine', required=False) # We should think this through a bit. key_associations = me.EmbeddedDocumentListField(KeyAssociation) last_seen = me.DateTimeField() missing_since = me.DateTimeField() unreachable_since = me.DateTimeField() created = me.DateTimeField() monitoring = me.EmbeddedDocumentField(Monitoring, default=lambda: Monitoring()) ssh_probe = me.EmbeddedDocumentField(SSHProbe, required=False) ping_probe = me.EmbeddedDocumentField(PingProbe, required=False) # Number of vCPUs gathered from various sources. This field is meant to # be updated ONLY by the mist.api.metering.tasks:find_machine_cores task. cores = me.IntField() meta = { 'collection': 'machines', 'indexes': [{ 'fields': ['cloud', 'machine_id'], 'sparse': False, 'unique': True, 'cls': False, }, { 'fields': ['monitoring.installation_status.activated_at'], 'sparse': True, 'unique': False }], 'strict': False, } def __init__(self, *args, **kwargs): super(Machine, self).__init__(*args, **kwargs) self.ctl = MachineController(self) def clean(self): # Remove any KeyAssociation, whose `keypair` has been deleted. Do NOT # perform an atomic update on self, but rather remove items from the # self.key_associations list by iterating over it and popping matched # embedded documents in order to ensure that the most recent list is # always processed and saved. for ka in reversed(range(len(self.key_associations))): if self.key_associations[ka].keypair.deleted: self.key_associations.pop(ka) # Populate owner field based on self.cloud.owner if not self.owner: self.owner = self.cloud.owner self.clean_os_type() if self.monitoring.method not in config.MONITORING_METHODS: self.monitoring.method = config.DEFAULT_MONITORING_METHOD def clean_os_type(self): """Clean self.os_type""" if self.os_type not in self.OS_TYPES: for os_type in self.OS_TYPES: if self.os_type.lower() == os_type: self.os_type = os_type break else: self.os_type = 'unix' def delete(self): super(Machine, self).delete() mist.api.tag.models.Tag.objects(resource=self).delete() try: self.owner.mapper.remove(self) except (AttributeError, me.DoesNotExist) as exc: log.error(exc) try: if self.owned_by: self.owned_by.get_ownership_mapper(self.owner).remove(self) except (AttributeError, me.DoesNotExist) as exc: log.error(exc) def as_dict(self): # Return a dict as it will be returned to the API # tags as a list return for the ui tags = { tag.key: tag.value for tag in mist.api.tag.models.Tag.objects( resource=self).only('key', 'value') } # Optimize tags data structure for js... if isinstance(tags, dict): tags = [{ 'key': key, 'value': value } for key, value in tags.iteritems()] return { 'id': self.id, 'hostname': self.hostname, 'public_ips': self.public_ips, 'private_ips': self.private_ips, 'name': self.name, 'ssh_port': self.ssh_port, 'os_type': self.os_type, 'rdp_port': self.rdp_port, 'machine_id': self.machine_id, 'actions': {action: self.actions[action] for action in self.actions}, 'extra': self.extra, 'cost': self.cost.as_dict(), 'image_id': self.image_id, 'state': self.state, 'tags': tags, 'monitoring': self.monitoring.as_dict() if self.monitoring and self.monitoring.hasmonitoring else '', 'key_associations': [ka.as_dict() for ka in self.key_associations], 'cloud': self.cloud.id, 'location': self.location.id if self.location else '', 'size': self.size.name if self.size else '', 'cloud_title': self.cloud.title, 'last_seen': str(self.last_seen.replace(tzinfo=None) if self.last_seen else ''), 'missing_since': str( self.missing_since.replace( tzinfo=None) if self.missing_since else ''), 'unreachable_since': str( self.unreachable_since.replace( tzinfo=None) if self.unreachable_since else ''), 'created': str(self.created.replace(tzinfo=None) if self.created else ''), 'machine_type': self.machine_type, 'parent_id': self.parent.id if self.parent is not None else '', 'probe': { 'ping': (self.ping_probe.as_dict() if self.ping_probe is not None else PingProbe().as_dict()), 'ssh': (self.ssh_probe.as_dict() if self.ssh_probe is not None else SSHProbe().as_dict()), }, 'cores': self.cores, 'network': self.network.id if self.network else '', 'subnet': self.subnet.id if self.subnet else '', 'owned_by': self.owned_by.id if self.owned_by else '', 'created_by': self.created_by.id if self.created_by else '', } def __str__(self): return 'Machine %s (%s) in %s' % (self.name, self.id, self.cloud)
class PollingSchedule(ShardedScheduleMixin, me.Document): meta = { 'allow_inheritance': True, 'strict': False, 'indexes': ['shard_id', 'shard_update_at'] } # We use a unique name for easy identification and to avoid running the # same schedule twice. The name is autopopulated during the invocation of # the `clean` method. name = me.StringField(unique=True) # The following fields are defined in celerybeatmongo.models.PeriodicTask. # Here, we define no fields in the base class, and expect subclasses to # either define their fields, or simply use properties. # task = me.StringField(required=True) # args = me.ListField() # kwargs = me.DictField() # Scheduling information. Don't edit them directly, just use the model # methods. default_interval = me.EmbeddedDocumentField( PollingInterval, required=True, default=PollingInterval(every=0)) override_intervals = me.EmbeddedDocumentListField(PollingInterval) # Optional arguments. queue = me.StringField() exchange = me.StringField() routing_key = me.StringField() soft_time_limit = me.IntField() # Used internally by the scheduler. last_run_at = me.DateTimeField() total_run_count = me.IntField(min_value=0) run_immediately = me.BooleanField() def get_name(self): """Construct name based on self.task""" try: return self.task.split('.')[-1] except NotImplementedError: return '%s: No task specified.' % self.__class__.__name__ def clean(self): """Automatically set value of name""" self.name = self.get_name() @property def task(self): """Return task name for this schedule Subclasses should define an attribute, property or field to do this. """ raise NotImplementedError() @property def args(self): """Return task args for this schedule""" return [str(self.id)] @property def kwargs(self): """Return task kwargs for this schedule""" return {} @property def enabled(self): """Whether this task is currently enabled or not""" return bool(self.interval.timedelta) @property def interval(self): """Merge multiple intervals into one Returns a dynamic PollingInterval, with the highest frequency of any override schedule or the default schedule. """ interval = self.default_interval for i in self.override_intervals: if not i.expired(): if not interval.timedelta or i.timedelta < interval.timedelta: interval = i return interval @property def schedule(self): """Return a celery schedule instance This is used internally by celerybeatmongo scheduler """ return celery.schedules.schedule(self.interval.timedelta) @property def expires(self): return None def add_interval(self, interval, ttl=300, name=''): """Add an override schedule to the scheduled task Override schedules must define an interval in seconds, as well as a TTL (time to live), also in seconds. Override schedules cannot be removed, so short TTL's should be used. You can however add a new override schedule again, thus practically extending the time where an override is in effect. Override schedules can only increase, not decrease frequency of the schedule, in relation to that define in the `default_interval`. """ assert isinstance(interval, int) and interval > 0 assert isinstance(ttl, int) and 0 < ttl < 3600 expires = datetime.datetime.now() + datetime.timedelta(seconds=ttl) self.override_intervals.append( PollingInterval(name=name, expires=expires, every=interval)) def cleanup_expired_intervals(self): """Remove override schedules that have expired""" self.override_intervals = [ override for override in self.override_intervals if not override.expired() ] def set_default_interval(self, interval): """Set default interval This is the interval used for this schedule, if there is no active override schedule with a smaller interval. The default interval never expires. To disable a task, simply set `enabled` equal to False. """ self.default_interval = PollingInterval(name='default', every=interval) def __unicode__(self): return "%s %s" % (self.get_name(), self.interval or '(no interval)')
class Orders(mongoengine.EmbeddedDocument): customer = mongoengine.StringField(required=True) total = mongoengine.FloatField(required=True) items = mongoengine.EmbeddedDocumentListField(Item, default=[]) address = mongoengine.StringField(required=True)
class Panel(mongoengine.Document): """ Document representation of channel/marker definition for an experiment. A panel, once associated to an experiment will standardise data upon input; when an fcs file is created in the database, it will be associated to an experiment and the channel/marker definitions in the fcs file will be mapped to the associated panel. Attributes ----------- panel_name: str, required unique identifier for the panel markers: EmbeddedDocListField list of marker names; see NormalisedName channels: EmbeddedDocListField list of channels; see NormalisedName mappings: EmbeddedDocListField list of channel/marker mappings; see ChannelMap initiation_date: DateTime date of creationfiles['controls'] """ panel_name = mongoengine.StringField(required=True, unique=True) markers = mongoengine.EmbeddedDocumentListField(NormalisedName) channels = mongoengine.EmbeddedDocumentListField(NormalisedName) mappings = mongoengine.EmbeddedDocumentListField(ChannelMap) initiation_date = mongoengine.DateTimeField(default=datetime.now) meta = { 'db_alias': 'core', 'collection': 'fcs_panels' } def create_from_excel(self, path: str) -> None: """ Populate panel attributes from an excel template Parameters ---------- path: str path of file Returns -------- None """ assert os.path.isfile(path), f'Error: no such file {path}' nomenclature, mappings = check_excel_template(path) for col_name, attr in zip(['channel', 'marker'], [self.channels, self.markers]): for name in mappings[col_name]: if not pd.isnull(name): d = nomenclature[nomenclature['name'] == name].fillna('').to_dict(orient='list') attr.append(NormalisedName(standard=d['name'][0], regex_str=d['regex'][0], case_sensitive=d['case'][0], permutations=d['permutations'][0])) mappings = mappings.fillna('').to_dict(orient='list') self.mappings = [ChannelMap(channel=c, marker=m) for c, m in zip(mappings['channel'], mappings['marker'])] def create_from_dict(self, x: dict): """ Populate panel attributes from a python dictionary Parameters ---------- x: dict dictionary object containing panel definition Returns -------- None """ # Check validity of input dictionary err = 'Invalid template dictionary; must be a nested dictionary with parent keys: channels, markers' assert all([k in ['channels', 'markers', 'mappings'] for k in x.keys()]), err err = f'Invalid template dictionary; nested dictionaries must contain keys: name, regex case, ' \ f'and permutations' for k in ['channels', 'markers']: assert all([i.keys() == ['name', 'regex', 'case', 'permutations'] for i in x[k]]), err assert type(x['mappings']) == list, 'Invalid template dictionary; mappings must be a list of tuples' err = 'Invalid template dictionary; mappings must be a list of tuples' assert all([type(k) != tuple for k in x['mappings']]), err self.markers = [NormalisedName(standard=k['name'], regex_str=k['regex'], case_sensitive=k['case'], permutations=k['permutations']) for k in x['markers']] self.channels = [NormalisedName(standard=k['name'], regex_str=k['regex'], case_sensitive=k['case'], permutations=k['permutations']) for k in x['channels']] self.mappings = [ChannelMap(channel=c, marker=m) for c, m in x['mappings']] def get_channels(self) -> iter: """ Yields list of channels associated to panel Returns ------- Generator """ for cm in self.mappings: yield cm.channel def get_markers(self) -> iter: """ Yields list of channels associated to panel Returns ------- Generator """ for cm in self.mappings: yield cm.marker
class Cases(mongoengine.Document): _id = mongoengine.IntField(required=True) cases = mongoengine.EmbeddedDocumentListField(Case, default=[]) meta = {'db_alias': 'core', 'collection': 'cases'}
class Entry(me.Document): user = me.ReferenceField('User', reverse_delete_rule=me.CASCADE, required=True) date = me.DateTimeField(required=True, default=datetime.datetime.now) daytime = me.StringField() notes = me.StringField(max_length=500) stats = me.EmbeddedDocumentField('EntryStats') pain_subentries = me.EmbeddedDocumentListField('PainSubEntry') # Classes to be used for a feature implemented at a later date. mood_subentry = me.EmbeddedDocumentField('MoodSubEntry') medication_subentry = me.EmbeddedDocumentField('MedicationSubEntry') activity_subentry = me.EmbeddedDocumentField('ActivitySubEntry') def __repr__(self): return json.dumps(self.serialize(), sort_keys=True, indent=4) def serialize(self, comparisons=None, detail_level='high'): serialized = { 'id': str(self.id), 'date': self.date, 'daytime': self.daytime } if comparisons: serialized['comparisons'] = comparisons if detail_level == 'medium' or 'high': # Create the stats object if it does not exist. if self.stats is not None: stats = self.stats else: stats = EntryStats() stats.update(self.pain_subentries) self.stats = stats self.save() pain_serialized = [] for subentry in self.pain_subentries: pain_serialized.append(subentry.serialize(detail_level)) serialized.update({ 'pain_subentries': pain_serialized, 'notes': self.notes, 'stats': stats.serialize(), }) return serialized # Given stats for an entry, creates an entry stats object and saves it to # the entry object. def create_stats(self, high, low, total, num_pain_subentries): if num_pain_subentries <= 0: return # Create the stats object if it doesn't exist. if self.stats is not None: stats = self.stats else: stats = EntryStats() stats.high = high stats.low = low stats.avg = total / num_pain_subentries stats.num_body_parts = num_pain_subentries self.stats = stats self.save()
class User(Document): """Test schema.""" name = db.StringField() email = db.EmailField() address = db.EmbeddedDocumentListField(Address)
class Commit(mongoengine.Document): """ *Concrete* class representing a version control system commit. """ vcs_hash = mongoengine.StringField() executables = mongoengine.EmbeddedDocumentListField(Executable)
class FileGroup(mongoengine.Document): """ Document representation of a file group; a selection of related fcs files (e.g. a sample and it's associated controls) Parameters ---------- primary_id: str, required Unique ID to associate to group files: EmbeddedDocList List of File objects flags: str, optional Warnings associated to file group notes: str, optional Additional free text populations: EmbeddedDocList Populations derived from this file group gates: EmbeddedDocList Gate objects that have been applied to this file group collection_datetime: DateTime, optional Date and time of sample collection processing_datetime: DateTime, optional Date and time of sample processing """ primary_id = mongoengine.StringField(required=True) data_directory = mongoengine.StringField(required=True) controls = mongoengine.ListField() compensated = mongoengine.BooleanField(default=False) collection_datetime = mongoengine.DateTimeField(required=False) processing_datetime = mongoengine.DateTimeField(required=False) populations = mongoengine.EmbeddedDocumentListField(Population) gating_strategy = mongoengine.ListField() valid = mongoengine.BooleanField(default=True) notes = mongoengine.StringField(required=False) meta = {'db_alias': 'core', 'collection': 'fcs_files'} def __init__(self, *args, **values): data = values.pop("data", None) channels = values.pop("channels", None) markers = values.pop("markers", None) self.columns_default = values.pop("columns_default", "markers") assert self.columns_default in ["markers", "channels"], \ "columns_default must be one of: 'markers', 'channels'" super().__init__(*args, **values) self.cell_meta_labels = {} if data is not None: assert not self.id, "This FileGroup has already been defined" assert channels is not None, "Must provide channels to create new FileGroup" assert markers is not None, "Must provide markers to create new FileGroup" self.save() self.h5path = os.path.join(self.data_directory, f"{self.id.__str__()}.hdf5") self._init_new_file(data=data, channels=channels, markers=markers) else: assert self.id is not None, "FileGroup has not been previously defined. Please provide primary data." self.h5path = os.path.join(self.data_directory, f"{self.id.__str__()}.hdf5") try: self._load_populations() self.tree = construct_tree(populations=self.populations) except AssertionError as err: warn(f"Failed to load data for {self.primary_id} ({self.id}); " f"data may be corrupt or missing; {str(err)}") def data(self, source: str, sample_size: int or float or None = None) -> pd.DataFrame: """ Load the FileGroup dataframe for the desired source file. Parameters ---------- source: str Name of the file to load from e.g. either "primary" or the name of a control sample_size: int or float (optional) Sample the DataFrame Returns ------- Pandas.DataFrame """ with h5py.File(self.h5path, "r") as f: assert source in f.keys( ), f"Invalid source, expected one of: {f.keys()}" channels = [ x.decode("utf-8") for x in f[f"mappings/{source}/channels"][:] ] markers = [ x.decode("utf-8") for x in f[f"mappings/{source}/markers"][:] ] data = _column_names(df=pd.DataFrame(f[source][:]), channels=channels, markers=markers, preference=self.columns_default) if sample_size is not None: return uniform_downsampling(data=data, sample_size=sample_size) return data def _init_new_file(self, data: np.array, channels: List[str], markers: List[str]): """ Under the assumption that this FileGroup has not been previously defined, generate a HDF5 file and initialise the root Population Parameters ---------- data: Numpy.Array channels: list markers: list Returns ------- None """ with h5py.File(self.h5path, "w") as f: f.create_dataset(name="primary", data=data) f.create_group("mappings") f.create_group("mappings/primary") f.create_dataset("mappings/primary/channels", data=np.array(channels, dtype='S')) f.create_dataset("mappings/primary/markers", data=np.array(markers, dtype='S')) f.create_group("index") f.create_group("index/root") f.create_group("clusters") f.create_group("clusters/root") f.create_group("cell_meta_labels") self.populations = [ Population(population_name="root", index=np.arange(0, data.shape[0]), parent="root", n=data.shape[0]) ] self.tree = {"root": anytree.Node(name="root", parent=None)} self.save() def add_ctrl_file(self, ctrl_id: str, data: np.array, channels: List[str], markers: List[str]): """ Add a new control file to this FileGroup. Parameters ---------- ctrl_id: str data: Numpy.Array channels: list markers: list Returns ------- None """ with h5py.File(self.h5path, "a") as f: assert ctrl_id not in self.controls, f"Entry for {ctrl_id} already exists" f.create_dataset(name=ctrl_id, data=data) f.create_group(f"mappings/{ctrl_id}") f.create_dataset(f"mappings/{ctrl_id}/channels", data=np.array(channels, dtype='S')) f.create_dataset(f"mappings/{ctrl_id}/markers", data=np.array(markers, dtype='S')) root = self.get_population(population_name="root") root.set_ctrl_index(**{ctrl_id: np.arange(0, data.shape[0])}) self.controls.append(ctrl_id) self.save() def _load_populations(self): """ Load indexes for existing populations from HDF5 file. This includes indexes for controls and clusters. Returns ------- None """ assert self._hdf5_exists( ), f"Could not locate FileGroup HDF5 record {self.h5path}" with h5py.File(self.h5path, "r") as f: if "cell_meta_labels" in f.keys(): for meta in f["cell_meta_labels"].keys(): self.cell_meta_labels[meta] = f[ f"cell_meta_labels/{meta}"][:] for pop in self.populations: k = f"/index/{pop.population_name}" if k + "/primary" not in f.keys(): warn( f"Population index missing for {pop.population_name}!") else: pop.index = f[k + "/primary"][:] ctrls = [x for x in f[k].keys() if x != "primary"] for c in ctrls: pop.set_ctrl_index(**{c: f[k + f"/{c}"][:]}) k = f"/clusters/{pop.population_name}" for c in pop.clusters: if f"{c.cluster_id}_{c.tag}" not in f[k].keys(): warn( f"Cluster index missing for {c.cluster_id}; tag {c.tag} in population {pop.population_name}!" ) else: c.index = f[k + f"/{c.cluster_id}_{c.tag}"][:] def add_population(self, population: Population): """ Add a new Population to this FileGroup. Parameters ---------- population: Population Returns ------- None """ err = f"Population with name '{population.population_name}' already exists" assert population.population_name not in self.tree.keys(), err self.populations.append(population) self.tree[population.population_name] = anytree.Node( name=population.population_name, parent=self.tree.get(population.parent)) def load_ctrl_population_df(self, ctrl: str, population: str, transform: str or dict or None = "logicle", **kwargs): """ Load the DataFrame for the events pertaining to a single population from a control. If the control is absent from this FileGroup it will raise an AssertionError. If the population has not been estimated for the given control, it will attempt to estimate the population using KNearestNeighbours classifier. See estimated_ctrl_population for details. Parameters ---------- ctrl: str Name of the control sample to load population: str Name of the desired population transform: str or dict (optional) If given, transformation method applied to the columns of the DataFrame. If the value given is a string, it should be the name of the transform method applied to ALL columns. If it is a dictionary, keys should correspond to column names and values the transform to apply to said column. kwargs Additional keyword arguments passed to estimated_ctrl_population Returns ------- """ assert ctrl in self.controls, f"No such control {ctrl} associated to this FileGroup" if ctrl not in self.get_population( population_name=population).ctrl_index.keys(): warn( f"Population {population} missing for control {ctrl}, will attempt to " f"estimate population using KNN") self.estimate_ctrl_population(ctrl=ctrl, population=population, **kwargs) idx = self.get_population( population_name=population).ctrl_index.get(ctrl) data = self.data(source=ctrl).loc[idx] if isinstance(transform, dict): data = apply_transform(data=data, features_to_transform=transform) elif isinstance(transform, str): data = apply_transform(data, transform_method=transform) return data def estimate_ctrl_population(self, ctrl: str, population: str, verbose: bool = True, scoring: str = "balanced_accuracy", **kwargs): """ Estimate a population for a control sample by training a KNearestNeighbors classifier on the population in the primary data and using this model to predict membership in the control data. If n_neighbors parameter of Scikit-Learns KNearestNeighbors class is not given, it will be estimated using grid search cross-validation and optimisation of the given scoring parameter. See CytoPy.flow.neighbours for further details. Results of the population estimation will be saved to the populations ctrl_index property. Parameters ---------- ctrl: str Control to estimate population for population: str Population to estimate verbose: bool (default=True) scoring: str (default="balanced_accuracy") kwargs: dict Additional keyword arguments passed to initiate KNearestNeighbors object Returns ------- None """ feedback = vprint(verbose=verbose) feedback(f"====== Estimating {population} for {ctrl} control ======") population = self.get_population(population_name=population) if ctrl not in self.get_population( population_name=population.parent).ctrl_index.keys(): feedback( f"Control missing parent {population.parent}, will attempt to estimate...." ) self.estimate_ctrl_population(ctrl=ctrl, population=population.parent, verbose=verbose, scoring=scoring, **kwargs) feedback( f"{population.parent} estimated, resuming estimation of {population.population_name}...." ) features = [ x for x in [population.geom.x, population.geom.y] if x is not None ] transformations = { d: transform for d, transform in zip([population.geom.x, population.geom.y], [population.geom.transform_x, population.geom.transform_y]) if d is not None } training_data = self.load_population_df( population=population.parent, transform=transformations, label_downstream_affiliations=False).copy() training_data["labels"] = 0 training_data.loc[population.index]["labels"] = 1 labels = training_data["labels"].values n = kwargs.get("n_neighbors", None) if n is None: feedback("Calculating optimal n_neighbours by grid search CV...") n, score = calculate_optimal_neighbours( x=training_data[features].values, y=labels, scoring=scoring, **kwargs) feedback( f"Continuing with n={n}; chosen with balanced accuracy of {round(score, 3)}..." ) # Estimate control population using KNN feedback("Training KNN classifier....") train_acc, val_acc, model = knn(data=training_data, features=features, labels=labels, n_neighbours=n, holdout_size=0.2, random_state=42, return_model=True, **kwargs) feedback(f"...training balanced accuracy score: {train_acc}") feedback(f"...validation balanced accuracy score: {val_acc}") feedback( f"Predicting {population.population_name} for {ctrl} control...") ctrl_data = self.load_ctrl_population_df( ctrl=ctrl, population=population.parent, transform={ "x": population.geom.transform_x, "y": population.geom.transform_y }, label_downstream_affiliations=False) ctrl_labels = model.predict(ctrl_data[features].values) ctrl_idx = ctrl_data.index.values[np.where(ctrl_labels == 1)] population.set_ctrl_index(**{ctrl: ctrl_idx}) feedback("===============================================") def load_population_df( self, population: str, transform: str or dict or None = "logicle", label_downstream_affiliations: bool = False) -> pd.DataFrame: """ Load the DataFrame for the events pertaining to a single population. Parameters ---------- population: str Name of the desired population transform: str or dict (optional) If given, transformation method applied to the columns of the DataFrame. If the value given is a string, it should be the name of the transform method applied to ALL columns. If it is a dictionary, keys should correspond to column names and values the transform to apply to said column. label_downstream_affiliations: bool (default=False) If True, an additional column will be generated named "population_label" containing the end node membership of each event e.g. if you choose CD4+ population and there are subsequent populations belonging to this CD4+ population in a tree like: "CD4+ -> CD4+CD25+ -> CD4+CD25+CD45RA+" then the population label column will contain the name of the lowest possible "leaf" population that an event is assigned too. Returns ------- Pandas.DataFrame """ assert population in self.tree.keys( ), f"Invalid population, {population} does not exist" idx = self.get_population(population_name=population).index data = self.data(source="primary").loc[idx] if isinstance(transform, dict): data = apply_transform(data=data, features_to_transform=transform) elif isinstance(transform, str): data = apply_transform(data, transform_method=transform) if label_downstream_affiliations: return self._label_downstream_affiliations(parent=population, data=data) return data def _label_downstream_affiliations(self, parent: str, data: pd.DataFrame) -> pd.DataFrame: """ An additional column will be generated named "population_label" containing the end node membership of each event e.g. if you choose CD4+ population and there are subsequent populations belonging to this CD4+ population in a tree like: "CD4+ -> CD4+CD25+ -> CD4+CD25+CD45RA+" then the population label column will contain the name of the lowest possible "leaf" population that an event is assigned too. Parameters ---------- parent: str data: Pandas.DataFrame Returns ------- Pandas.DataFrame """ data["population_label"] = None dependencies = self.list_downstream_populations(parent) for pop in dependencies: idx = self.get_population(pop).index data.loc[idx, 'label'] = pop data["population_label"].fillna(parent, inplace=True) return data def _hdf5_exists(self): """ Tests if associated HDF5 file exists. Returns ------- bool """ return os.path.isfile(self.h5path) def list_gated_controls(self) -> Generator: """ List ID of controls that have a cached index in each population of the saved population tree (i.e. they have been gated) Returns ------- list List of control IDs for gated controls """ for c in self.controls(): if all([p.get_ctrl(c) is not None for p in self.populations]): yield c def list_populations(self) -> iter: """ Yields list of population names Returns ------- Generator """ for p in self.populations: yield p.population_name def print_population_tree(self, image: bool = False, path: str or None = None): """ Print population tree to stdout or save as an image if 'image' is True. Parameters ---------- image: bool (default=False) Save tree as a png image path: str (optional) File path for image, ignored if 'image' is False. Defaults to working directory. Returns ------- None """ root = self.tree['root'] if image: from anytree.exporter import DotExporter path = path or f'{os.getcwd()}/{self.id}_population_tree.png' DotExporter(root).to_picture(path) for pre, fill, node in anytree.RenderTree(root): print('%s%s' % (pre, node.name)) def delete_clusters(self, tag: str or None = None, meta_label: str or None = None, drop_all: bool = False): """ Parameters ---------- tag meta_label drop_all Returns ------- """ if drop_all: for p in self.populations: p.delete_all_clusters(clusters="all") elif tag: for p in self.populations: p.delete_cluster(tag=tag) elif meta_label: for p in self.populations: p.delete_cluster(meta_label=meta_label) else: raise ValueError( "If drop_all is False, must provide tag or meta_label") def delete_populations(self, populations: list or str) -> None: """ Delete given populations. Populations downstream from delete population(s) will also be removed. Parameters ---------- populations: list or str Either a list of populations (list of strings) to remove or a single population as a string. If a value of "all" is given, all populations are dropped. Returns ------- None """ if populations == "all": self.populations = [ p for p in self.populations if p.population_name == "root" ] self.tree = { name: node for name, node in self.tree.items() if name == "root" } else: assert isinstance( populations, list), "Provide a list of population names for removal" assert "root" not in populations, "Cannot delete root population" downstream_effects = [ self.list_downstream_populations(p) for p in populations ] downstream_effects = set( [x for sl in downstream_effects for x in sl]) if len(downstream_effects) > 0: warn( "The following populations are downstream of one or more of the " "populations listed for deletion and will therefore be deleted: " f"{downstream_effects}") populations = list(set(list(downstream_effects) + populations)) self.populations = [ p for p in self.populations if p.population_name not in populations ] for name in populations: self.tree[name].parent = None self.tree = { name: node for name, node in self.tree.items() if name not in populations } def get_population(self, population_name: str) -> Population: """ Given the name of a population associated to the FileGroup, returns the Population object, with index and control index ready loaded. Parameters ---------- population_name: str Name of population to retrieve from database Returns ------- Population """ assert population_name in list(self.list_populations( )), f'Population {population_name} does not exist' return [ p for p in self.populations if p.population_name == population_name ][0] def get_population_by_parent(self, parent: str) -> Generator: """ Given the name of some parent population, return a list of Population object whom's parent matches Parameters ---------- parent: str Name of the parent population to search for Returns ------- Generator List of Populations """ for p in self.populations: if p.parent == parent and p.population_name != "root": yield p def list_downstream_populations(self, population: str) -> list or None: """For a given population find all dependencies Parameters ---------- population : str population name Returns ------- list or None List of populations dependent on given population """ assert population in self.tree.keys(), f'population {population} does not exist; ' \ f'valid population names include: {self.tree.keys()}' root = self.tree['root'] node = self.tree[population] dependencies = [ x.name for x in anytree.findall(root, filter_=lambda n: node in n.path) ] return [p for p in dependencies if p != population] def merge_populations(self, left: Population, right: Population, new_population_name: str or None = None): """ Merge two populations present in the current population tree. The merged population will have the combined index of both populations but will not inherit any clusters and will not be associated to any children downstream of either the left or right population. The population will be added to the tree as a descendant of the left populations parent. New population will be added to FileGroup. Parameters ---------- left: Population right: Population new_population_name: str (optional) Returns ------- None """ self.add_population( merge_populations(left=left, right=right, new_population_name=new_population_name)) def subtract_populations(self, left: Population, right: Population, new_population_name: str or None = None): """ Subtract the right population from the left population. The right population must either have the same parent as the left population or be downstream of the left population. The new population will descend from the same parent as the left population. The new population will have a PolygonGeom geom. New population will be added to FileGroup. Parameters ---------- left: Population right: Population new_population_name: str (optional) Returns ------- """ same_parent = left.parent == right.parent downstream = right.population_name in list( self.list_downstream_populations(left.population_name)) assert same_parent or downstream, "Right population should share the same parent as the " \ "left population or be downstream of the left population" new_population_name = new_population_name or f"subtract_{left.population_name}_{right.population_name}" new_idx = np.array([x for x in left.index if x not in right.index]) x, y = left.geom.x, left.geom.y transform_x, transform_y = left.geom.transform_x, left.geom.transform_y parent_data = self.load_population_df(population=left.parent, transform={ x: transform_x, y: transform_y }) x_values, y_values = create_convex_hull( x_values=parent_data.loc[new_idx][x].values, y_values=parent_data.loc[new_idx][y].values) new_geom = PolygonGeom(x=x, y=y, transform_x=transform_x, transform_y=transform_y, x_values=x_values, y_values=y_values) new_population = Population(population_name=new_population_name, parent=left.parent, n=len(new_idx), index=new_idx, geom=new_geom, warnings=left.warnings + right.warnings + ["SUBTRACTED POPULATION"]) self.add_population(population=new_population) def _write_populations(self): """ Write population data to disk. Returns ------- None """ root_n = self.get_population("root").n with h5py.File(self.h5path, "a") as f: if "cell_meta_labels" in f.keys(): for meta, labels in self.cell_meta_labels.items(): ascii_labels = [ x.encode("ascii", "ignore") for x in labels ] f.create_dataset(f'/cell_meta_labels/{meta}', data=ascii_labels) for p in self.populations: parent_n = self.get_population(p.parent).n p.prop_of_parent = p.n / parent_n p.prop_of_total = p.n / root_n f.create_dataset(f'/index/{p.population_name}/primary', data=p.index) for ctrl, idx in p.ctrl_index.items(): f.create_dataset(f'/index/{p.population_name}/{ctrl}', data=idx) for cluster in p.clusters: cluster.prop_of_events = cluster.n / p.n f.create_dataset( f'/clusters/{p.population_name}/{cluster.cluster_id}_{cluster.tag}', data=cluster.index) def _hdf_reset_population_data(self): """ For each population clear existing data ready for overwriting with current data. Returns ------- None """ with h5py.File(self.h5path, "a") as f: if "cell_meta_labels" in f.keys(): for meta in self.cell_meta_labels.keys(): if meta in f["cell_meta_labels"]: del f[f"cell_meta_labels/{meta}"] for p in self.populations: if p.population_name in f["index"].keys(): if "primary" in f[f"index/{p.population_name}"].keys(): del f[f"index/{p.population_name}/primary"] for ctrl_id in p.ctrl_index.keys(): if ctrl_id in f[f"index/{p.population_name}"].keys(): del f[f"index/{p.population_name}/{ctrl_id}"] if p.population_name in f["clusters"].keys(): del f[f"clusters/{p.population_name}"] def population_stats(self, population: str): """ Parameters ---------- population Returns ------- """ pop = self.get_population(population_name=population) parent = self.get_population(population_name=pop.parent) root = self.get_population(population_name="root") return { "population_name": population, "n": pop.n, "prop_of_parent": pop.n / parent.n, "prop_of_root": pop.n / root.n } def quantile_clean(self, upper: float = 0.999, lower: float = 0.001): df = self.data(source="primary") for x in df.columns: df = df[(df[x] >= df[x].quantile(lower)) & (df[x] <= df[x].quantile(upper))] clean_pop = Population(population_name="root_clean", index=df.index.values, parent="root", n=df.shape[0]) self.add_population(clean_pop) def save(self, *args, **kwargs): # Calculate meta and save indexes to disk if self.populations: # self._hdf_create_population_grps() # Populate h5path for populations self._hdf_reset_population_data() self._write_populations() super().save(*args, **kwargs) def delete(self, delete_hdf5_file: bool = True, *args, **kwargs): super().delete(*args, **kwargs) if delete_hdf5_file: if os.path.isfile(self.h5path): os.remove(self.h5path) else: warn(f"Could not locate hdf5 file {self.h5path}")
class CielEntity(me.Document): type = me.StringField(verbose_name='Entity type (fastpath)', required=True) name = me.StringField(required=True) panels = me.EmbeddedDocumentListField(CielPanel) descripton = me.StringField(verbose_name='Entity details', required=False) meta = {'ordering': ['fastpath']} @classmethod def scan_panels(cls, entity_type, entity_name): """Scan panels for the entity in CIEL. entity_type: wwartc entity_name: bananas-ptw """ em = Emulator(visible=True) em.connect(HOST) em.ciel_login('IEVMLIR1', 'MLIIEVR1') em.send_str(entity_type) em.exec(BTN.F22) em.screen_skip() em.send_str('2') em.exec(BTN.TAB) em.send_str(entity_name) em.exec(BTN.ENTER) entity = CielEntity.objects(type=entity_type, name=entity_name).first() if not entity: entity = CielEntity(type=entity_type, name=entity_name) entity.save() panel_counter = 0 while True: panel_name = em.screen_get_name() # Update panel if exist panel = None for p in entity.panels: if p.name.lower() == panel_name.lower(): panel = p break if panel is not None: panel.data_raw = '\n'.join(em.screen_get_data(html=False)) panel.data_html = '\n'.join(em.screen_get_data(html=True)) else: panel = CielPanel( name=panel_name, data_raw='\n'.join(em.screen_get_data(html=False)), data_html='\n'.join(em.screen_get_data(html=True))) entity.panels.append(panel) entity.save() panel.fields = [] fields = em.field_get_bounds_all() for field in fields: flen = field['col_end'] - field['col_start'] # pu.db fieldset = FieldSet.objects( panel_name=panel.name, coords__row=field['row'], coords__column=field['col_start'], ).first() if not fieldset: fieldset = FieldSet(dbset=None, panel_name=panel.name, coords=Coords( row=field['row'], column=field['col_start'], length=flen)) fieldset.save() cielfield = CielField( fieldset=fieldset, value=field['value'].strip(), ) panel.fields.append(cielfield) entity.save() if em.screen_contains('.*BOTTOM.*'): break else: em.exec(BTN.PAGE_DOWN) panel_counter += 1 em.terminate() return entity @classmethod def scan_dbset(cls, entity_type, library_name): """Returns dictionary of structure: { library_name : { file_name : { field_name : { 'text' : '', 'nulls' : '', 'length' : '', 'type' : '', 'scale' : '', }, }, }, } """ em = Emulator(visible=True) em.connect(HOST) em.ciel_login('IEVMLIR1', 'MLIIEVR1') em.send_str('STRSQL') em.exec(BTN.F22) em.send_str('SELECT * FROM {}'.format(library_name)) em.exec(BTN.F4) em.send_str('\t' * 10) em.field_set_id(2) em.exec(BTN.F4) # Library grabbing part data = {} flatdata = [] def ensure_row_exists(row_library, row_file, row_field): if row_library not in data: data[row_library] = {} if row_file not in data[row_library]: data[row_library][row_file] = {} if row_field not in data[row_library][row_file]: data[row_library][row_file][row_field] = {} def grab_fields(): rows = [] def row_update(n, row, rows): if n >= len(rows): rows.append(row) else: rows[n].update(row) for i in range(0, 3): lines = em.screen_get_data(html=False) if em.screen_contains('.*F11=Display nulls.*'): print('Display nulls') for n, line in enumerate(lines[6:20]): rfield = line[6:20].strip() rfile = line[25:44].strip() rtext = line[44:80].strip() row = {'field': rfield, 'file': rfile, 'text': rtext} row_update(n, row, rows) elif em.screen_contains('.*F11=Display type.*'): print('Display type') for n, line in enumerate(lines[6:20]): rlib = line[45:58].strip() rnulls = line[58:80].strip() row = {'library': rlib, 'nulls': rnulls} row_update(n, row, rows) elif em.screen_contains('.*F11=Display text.*'): print('Display text') for n, line in enumerate(lines[6:20]): rtype = line[44:65].strip().lower() rlength = line[65:73].strip() rscale = line[73:79].strip() row = { 'type': rtype, 'length': rlength, 'scale': rscale } row_update(n, row, rows) # Scroll screen right em.exec(BTN.F11) return rows while (not em.screen_contains('.*Bottom.*') and em.screen_contains('.*More\.\.\..*')): rows = grab_fields() for row in rows: rlib = row['library'] rfile = row['file'] rfield = row['field'] if rlib not in data: data[rlib] = {} if rfile not in data[rlib]: data[rlib][rfile] = {} if rfield not in data[rlib][rfile]: data[rlib][rfile][rfield] = {} item = data[rlib][rfile][rfield] item['text'] = row['text'] item['type'] = row['type'] item['length'] = row['length'] item['scale'] = row['scale'] flatitem = {} # Only if not empty item if len(rfile) and len(rfield): flatitem['clibrary'] = (rlib.lower()).strip() flatitem['cfile'] = (rfile.lower()).strip() flatitem['cfield'] = (rfield.lower()).strip() flatitem['ctype'] = (row['type'].lower()).strip() flatitem['cnulls'] = (row['nulls'].lower()).strip() try: flatitem['length'] = int(row['length']) except ValueError: flatitem['length'] = 0 try: flatitem['cscale'] = int(row['scale']) except ValueError: flatitem['cscale'] = 0 flatitem['ctype'] = (row['type'].strip()).lower() flatitem['ctext'] = (row['text'].strip()).lower() dbset_item = CielDBSet(entity_type=entity_type, file=flatitem['ctype'], field=flatitem['cfield'], text=flatitem['ctext'], type=flatitem['ctype']) dbset_item.save() em.exec(BTN.PAGE_DOWN) return data, flatdata
class Student(me.Document): first_name = me.StringField(min_length=1, max_length=255, required=True) sur_name = me.StringField(min_length=1, max_length=255, required=True) faculty = me.StringField(min_length=1, max_length=255, required=True) id_faculty = me.ReferenceField(Faculties) group = me.StringField(min_length=1, max_length=255, required=True) id_group = me.ReferenceField(Groups) curator = me.StringField(min_length=1, max_length=255, required=True) id_curator = me.ReferenceField(Curators) mark_student = me.EmbeddedDocumentListField(Mark) def __str__(self): return f'{self.sur_name} {self.first_name} студент {self.faculty} факультета, {self.group} групи. ' \ f'Куратор {self.curator} ' def str_mark(self): result = '' for m in self.mark_student: result += f('{m.name_item}: {m.mark}') return result def student_from_curator(first_name, sur_name): curators = Curators.objects.filter(first_name=first_name, sur_name=sur_name) if curators: curator = curators[0] else: return [] return Student.objects.filter(id_curator=curator) def Excellent_Students_Faculties(): result = '' for faculty in Faculties.objects: result += faculty.name_faculty + '\n' students = Student.objects(id_faculty=faculty.id).aggregate([{ '$unwind': '$mark_student' }, { '$group': { '_id': '$_id', 'average_mark': { '$avg': '$mark_student.mark' } } }, { '$match': { 'average_mark': { '$gte': 10.0 } } }]) for student in students: result += f"\t{Student.objects(id=student['_id'])[0].__str__()}, середній бал {round(student['average_mark'], 2)} \n" return result
class FileGroup(mongoengine.Document): """ Document representation of a file group; a selection of related fcs files (e.g. a sample and it's associated controls). Attributes ---------- primary_id: str, required Unique ID to associate to group files: EmbeddedDocList List of File objects flags: str, optional Warnings associated to file group notes: str, optional Additional free text populations: EmbeddedDocList Populations derived from this file group gates: EmbeddedDocList Gate objects that have been applied to this file group collection_datetime: DateTime, optional Date and time of sample collection processing_datetime: DateTime, optional Date and time of sample processing valid: BooleanField (default=True) True if FileGroup is valid subject: ReferenceField Reference to Subject. If Subject is deleted, this field is nullified but the FileGroup will persist """ primary_id = mongoengine.StringField(required=True) controls = mongoengine.ListField() compensated = mongoengine.BooleanField(default=False) collection_datetime = mongoengine.DateTimeField(required=False) processing_datetime = mongoengine.DateTimeField(required=False) populations = mongoengine.EmbeddedDocumentListField(Population) gating_strategy = mongoengine.ListField() valid = mongoengine.BooleanField(default=True) notes = mongoengine.StringField(required=False) subject = mongoengine.ReferenceField( Subject, reverse_delete_rule=mongoengine.NULLIFY) data_directory = mongoengine.StringField() meta = {'db_alias': 'core', 'collection': 'fcs_files'} def __init__(self, *args, **kwargs): data = kwargs.pop("data", None) channels = kwargs.pop("channels", None) markers = kwargs.pop("markers", None) super().__init__(*args, **kwargs) self._columns_default = "markers" self.cell_meta_labels = {} if self.id: self.h5path = os.path.join(self.data_directory, f"{self.id.__str__()}.hdf5") self.tree = construct_tree(populations=self.populations) self._load_cell_meta_labels() self._load_population_indexes() else: if any([x is None for x in [data, channels, markers]]): raise ValueError( "New instance of FileGroup requires that data, channels, and markers " "be provided to the constructor") self.save() self.h5path = os.path.join(self.data_directory, f"{self.id.__str__()}.hdf5") self.init_new_file(data=data, channels=channels, markers=markers) @property def columns_default(self): return self._columns_default @columns_default.setter def columns_default(self, value: str): assert value in [ "markers", "channels" ], "columns_default must be either 'markers' or 'channels'" self._columns_default = value @data_loaded def data(self, source: str, sample_size: int or float or None = None) -> pd.DataFrame: """ Load the FileGroup dataframe for the desired source file. Parameters ---------- source: str Name of the file to load from e.g. either "primary" or the name of a control sample_size: int or float (optional) Sample the DataFrame Returns ------- Pandas.DataFrame Raises ------ AssertionError Invalid source """ with h5py.File(self.h5path, "r") as f: assert source in f.keys( ), f"Invalid source, expected one of: {f.keys()}" channels = [ x.decode("utf-8") for x in f[f"mappings/{source}/channels"][:] ] markers = [ x.decode("utf-8") for x in f[f"mappings/{source}/markers"][:] ] data = set_column_names(df=pd.DataFrame(f[source][:], dtype=np.float32), channels=channels, markers=markers, preference=self.columns_default) if sample_size is not None: return uniform_downsampling(data=data, sample_size=sample_size) return data def init_new_file(self, data: np.array, channels: List[str], markers: List[str]): """ Under the assumption that this FileGroup has not been previously defined, generate a HDF5 file and initialise the root Population Parameters ---------- data: numpy.ndarray channels: list markers: list Returns ------- None """ if os.path.isfile(self.h5path): os.remove(self.h5path) with h5py.File(self.h5path, "w") as f: f.create_dataset(name="primary", data=data) f.create_group("mappings") f.create_group("mappings/primary") f.create_dataset("mappings/primary/channels", data=np.array(channels, dtype='S')) f.create_dataset("mappings/primary/markers", data=np.array(markers, dtype='S')) f.create_group("index") f.create_group("index/root") f.create_group("cell_meta_labels") self.populations = [ Population(population_name="root", index=np.arange(0, data.shape[0]), parent="root", n=data.shape[0], source="root") ] self.tree = {"root": anytree.Node(name="root", parent=None)} self.save() def add_ctrl_file(self, ctrl_id: str, data: np.array, channels: List[str], markers: List[str]): """ Add a new control file to this FileGroup. Parameters ---------- ctrl_id: str Name of the control e.g ("CD45RA FMO" or "HLA-DR isotype control" data: numpy.ndarray Single cell events data obtained for this control channels: list List of channel names markers: list List of marker names Returns ------- None Raises ------ AssertionError If control already exists """ with h5py.File(self.h5path, "a") as f: assert ctrl_id not in self.controls, f"Entry for {ctrl_id} already exists" f.create_dataset(name=ctrl_id, data=data) f.create_group(f"mappings/{ctrl_id}") f.create_dataset(f"mappings/{ctrl_id}/channels", data=np.array(channels, dtype='S')) f.create_dataset(f"mappings/{ctrl_id}/markers", data=np.array(markers, dtype='S')) self.controls.append(ctrl_id) self.save() @data_loaded def _load_cell_meta_labels(self): """ Load single cell meta labels from disk Returns ------- None """ with h5py.File(self.h5path, "r") as f: if "cell_meta_labels" in f.keys(): for meta in f["cell_meta_labels"].keys(): self.cell_meta_labels[meta] = np.array( f[f"cell_meta_labels/{meta}"][:], dtype="U") @data_loaded def _load_population_indexes(self): """ Load population level event index data from disk Returns ------- None """ with h5py.File(self.h5path, "r") as f: for p in self.populations: primary_index = h5_read_population_primary_index( population_name=p.population_name, h5file=f) if primary_index is None: continue p.index = primary_index def add_population(self, population: Population): """ Add a new Population to this FileGroup. Parameters ---------- population: Population Returns ------- None Raises ------ DuplicatePopulationError Population already exists AssertionError Population is missing index """ if population.population_name in self.tree.keys(): err = f"Population with name '{population.population_name}' already exists" raise DuplicatePopulationError(err) assert population.index is not None, "Population index is empty" if population.n is None: population.n = len(population.index) self.populations.append(population) self.tree[population.population_name] = anytree.Node( name=population.population_name, parent=self.tree.get(population.parent)) def update_population(self, pop: Population): """ Replace an existing population. Population to replace identified using 'population_name' field. Note: this method does not allow you to edit the Parameters ---------- pop: Population New population object Returns ------- None """ assert pop.population_name in self.list_populations( ), 'Invalid population, does not exist' self.populations = [ p for p in self.populations if p.population_name != pop.population_name ] self.populations.append(pop) def load_ctrl_population_df(self, ctrl: str, population: str, classifier: str = "XGBClassifier", classifier_params: dict or None = None, scoring: str = "balanced_accuracy", transform: str = "logicle", transform_kwargs: dict or None = None, verbose: bool = True, evaluate_classifier: bool = True, kfolds: int = 5, n_permutations: int = 25, sample_size: int = 10000) -> pd.DataFrame: """ Load a population from an associated control. The assumption here is that control files have been collected at the same time as primary staining and differ by the absence or permutation of a marker/channel/stain. Therefore the population of interest in the primary staining will be used as training data to identify the equivalent population in the control. The user should specify the control file, the population they want (which MUST already exist in the primary staining) and the type of classifier to use. Additional parameters can be passed to control the classifier and stratified cross validation with permutation testing will be performed if evalidate_classifier is set to True. Parameters ---------- ctrl: str Control file to estimate population for population: str Population of interest. MUST already exist in the primary staining. classifier: str (default='XGBClassifier') Classifier to use. String value should correspond to a valid Scikit-Learn classifier class name or XGBClassifier for XGBoost. classifier_params: dict, optional Additional keyword arguments passed when initiating the classifier scoring: str (default='balanced_accuracy') Method used to evaluate the performance of the classifier if evaluate_classifier is True. String value should be one of the functions of Scikit-Learn's classification metrics: https://scikit-learn.org/stable/modules/model_evaluation.html. transform: str (default='logicle') Transformation to be applied to data prior to classification transform_kwargs: dict, optional Additional keyword arguments applied to Transformer verbose: bool (default=True) Whether to provide feedback evaluate_classifier: bool (default=True) If True, stratified cross validation with permutating testing is applied prior to predicting control population, feeding back to stdout the performance of the classifier across k folds and n permutations kfolds: int (default=5) Number of cross validation rounds to perform if evaluate_classifier is True n_permutations: int (default=25) Number of rounds of permutation testing to perform if evaluate_classifier is True sample_size: int (default=10000) Number of events to sample from primary data for training Returns ------- Pandas.DataFrame Raises ------ AssertionError If desired population is not found in the primary staining MissingControlError If the chosen control does not exist """ transform_kwargs = transform_kwargs or {} if ctrl not in self.controls: raise MissingControlError( f"No such control {ctrl} associated to this FileGroup") params = classifier_params or {} transform_kwargs = transform_kwargs or {} feedback = vprint(verbose=verbose) classifier = build_sklearn_model(klass=classifier, **params) assert population in self.list_populations( ), f"Desired population {population} not found" feedback(f"====== Estimating {population} for {ctrl} control ======") feedback("Loading data...") training, ctrl, transformer = _load_data_for_ctrl_estimate( filegroup=self, target_population=population, ctrl=ctrl, transform=transform, sample_size=sample_size, **transform_kwargs) features = [x for x in training.columns if x != "label"] features = [x for x in features if x in ctrl.columns] x, y = training[features], training["label"].values if evaluate_classifier: feedback("Evaluating classifier with permutation testing...") skf = StratifiedKFold(n_splits=kfolds, random_state=42, shuffle=True) score, permutation_scores, pvalue = permutation_test_score( classifier, x, y, cv=skf, n_permutations=n_permutations, scoring=scoring, n_jobs=-1, random_state=42) feedback( f"...Performance (without permutations): {round(score, 4)}") feedback( f"...Performance (average across permutations; standard dev): " f"{round(np.mean(permutation_scores), 4)}; {round(np.std(permutation_scores), 4)}" ) feedback( f"...p-value (comparison of original score to permuations): {round(pvalue, 4)}" ) feedback("Predicting population for control data...") classifier.fit(x, y) ctrl_labels = classifier.predict(ctrl[features]) training_prop_of_root = self.get_population( population).n / self.get_population("root").n ctrl_prop_of_root = np.sum(ctrl_labels) / ctrl.shape[0] feedback( f"{population}: {round(training_prop_of_root, 3)}% of root in primary data" ) feedback( f"Predicted in ctrl: {round(ctrl_prop_of_root, 3)}% of root in control data" ) ctrl = ctrl.iloc[np.where(ctrl_labels == 1)[0]] if transformer: return transformer.inverse_scale(data=ctrl, features=list(ctrl.columns)) return ctrl def load_population_df( self, population: str, transform: str or dict or None = "logicle", features_to_transform: list or None = None, transform_kwargs: dict or None = None, label_downstream_affiliations: bool = False) -> pd.DataFrame: """ Load the DataFrame for the events pertaining to a single population. Parameters ---------- population: str Name of the desired population transform: str or dict, optional (default="logicle") Transform to be applied; specify a value of None to not perform any transformation features_to_transform: list, optional Features (columns) to be transformed. If not provied, all columns transformed transform_kwargs: dict, optional Additional keyword arguments passed to Transformer label_downstream_affiliations: bool (default=False) If True, an additional column will be generated named "population_label" containing the end node membership of each event e.g. if you choose CD4+ population and there are subsequent populations belonging to this CD4+ population in a tree like: "CD4+ -> CD4+CD25+ -> CD4+CD25+CD45RA+" then the population label column will contain the name of the lowest possible "leaf" population that an event is assigned too. Returns ------- Pandas.DataFrame Raises ------ AssertionError Invalid population, does not exist """ assert population in self.tree.keys( ), f"Invalid population, {population} does not exist" idx = self.get_population(population_name=population).index data = self.data(source="primary").loc[idx] if transform is not None: features_to_transform = features_to_transform or list(data.columns) transform_kwargs = transform_kwargs or {} if isinstance(transform, dict): data = apply_transform_map(data=data, feature_method=transform, kwargs=transform_kwargs) else: data = apply_transform(data=data, method=transform, features=features_to_transform, return_transformer=False, **transform_kwargs) if label_downstream_affiliations: return self._label_downstream_affiliations(parent=population, data=data) return data def _label_downstream_affiliations(self, parent: str, data: pd.DataFrame) -> pd.DataFrame: """ An additional column will be generated named "population_label" containing the end node membership of each event e.g. if you choose CD4+ population and there are subsequent populations belonging to this CD4+ population in a tree like: "CD4+ -> CD4+CD25+ -> CD4+CD25+CD45RA+" then the population label column will contain the name of the lowest possible "leaf" population that an event is assigned too. Parameters ---------- parent: str data: Pandas.DataFrame Returns ------- Pandas.DataFrame """ data["population_label"] = None dependencies = self.list_downstream_populations(parent) for pop in dependencies: idx = self.get_population(pop).index data.loc[idx, 'population_label'] = pop data["population_label"].fillna(parent, inplace=True) return data def _hdf5_exists(self): """ Tests if associated HDF5 file exists. Returns ------- bool """ return os.path.isfile(self.h5path) def list_populations(self) -> list: """ List population names Returns ------- List """ return [p.population_name for p in self.populations] def print_population_tree(self, image: bool = False, path: str or None = None): """ Print population tree to stdout or save as an image if 'image' is True. Parameters ---------- image: bool (default=False) Save tree as a png image path: str (optional) File path for image, ignored if 'image' is False. Defaults to working directory. Returns ------- None """ root = self.tree['root'] if image: from anytree.exporter import DotExporter path = path or f'{os.getcwd()}/{self.id}_population_tree.png' DotExporter(root).to_picture(path) for pre, fill, node in anytree.RenderTree(root): print('%s%s' % (pre, node.name)) def delete_populations(self, populations: list or str) -> None: """ Delete given populations. Populations downstream from delete population(s) will also be removed. Parameters ---------- populations: list or str Either a list of populations (list of strings) to remove or a single population as a string. If a value of "all" is given, all populations are dropped. Returns ------- None Raises ------ AssertionError If invalid value given for populations """ if populations == "all": for p in self.populations: self.tree[p.population_name].parent = None self.populations = [ p for p in self.populations if p.population_name == "root" ] self.tree = { name: node for name, node in self.tree.items() if name == "root" } else: assert isinstance( populations, list), "Provide a list of population names for removal" assert "root" not in populations, "Cannot delete root population" downstream_effects = [ self.list_downstream_populations(p) for p in populations ] downstream_effects = set( [x for sl in downstream_effects for x in sl]) if len(downstream_effects) > 0: warn( "The following populations are downstream of one or more of the " "populations listed for deletion and will therefore be deleted: " f"{downstream_effects}") populations = list(set(list(downstream_effects) + populations)) self.populations = [ p for p in self.populations if p.population_name not in populations ] for name in populations: self.tree[name].parent = None self.tree = { name: node for name, node in self.tree.items() if name not in populations } def get_population(self, population_name: str) -> Population: """ Given the name of a population associated to the FileGroup, returns the Population object, with index and control index ready loaded. Parameters ---------- population_name: str Name of population to retrieve from database Returns ------- Population Raises ------ MissingPopulationError If population doesn't exist """ if population_name not in list(self.list_populations()): raise MissingPopulationError( f'Population {population_name} does not exist') return [ p for p in self.populations if p.population_name == population_name ][0] def get_population_by_parent(self, parent: str) -> Generator: """ Given the name of some parent population, return a list of Population object whom's parent matches Parameters ---------- parent: str Name of the parent population to search for Returns ------- Generator List of Populations """ for p in self.populations: if p.parent == parent and p.population_name != "root": yield p def list_downstream_populations(self, population: str) -> list or None: """For a given population find all dependencies Parameters ---------- population : str population name Returns ------- list or None List of populations dependent on given population Raises ------ AssertionError If Population does not exist """ assert population in self.tree.keys(), f'population {population} does not exist; ' \ f'valid population names include: {self.tree.keys()}' root = self.tree['root'] node = self.tree[population] dependencies = [ x.name for x in anytree.findall(root, filter_=lambda n: node in n.path) ] return [p for p in dependencies if p != population] def merge_gate_populations(self, left: Population or str, right: Population or str, new_population_name: str or None = None): """ Merge two populations present in the current population tree. The merged population will have the combined index of both populations but will not inherit any clusters and will not be associated to any children downstream of either the left or right population. The population will be added to the tree as a descendant of the left populations parent. New population will be added to FileGroup. Parameters ---------- left: Population right: Population new_population_name: str (optional) Returns ------- None """ if isinstance(left, str): left = self.get_population(left) if isinstance(right, str): right = self.get_population(right) self.add_population( merge_gate_populations(left=left, right=right, new_population_name=new_population_name)) def merge_non_geom_populations(self, populations: list, new_population_name: str): """ Merge multiple populations that are sourced either for classification or clustering methods. (Not supported for populations from autonomous gates) Parameters ---------- populations: list List of populations to merge new_population_name: str Name of the new population Returns ------- None Raises ------ ValueError If populations is invalid """ pops = list() for p in populations: if isinstance(p, str): pops.append(self.get_population(p)) elif isinstance(p, Population): pops.append(p) else: raise ValueError( "populations should be a list of strings or list of Population objects" ) self.add_population( merge_non_geom_populations( populations=pops, new_population_name=new_population_name)) def subtract_populations(self, left: Population, right: Population, new_population_name: str or None = None): """ Subtract the right population from the left population. The right population must either have the same parent as the left population or be downstream of the left population. The new population will descend from the same parent as the left population. The new population will have a PolygonGeom geom. New population will be added to FileGroup. Parameters ---------- left: Population right: Population new_population_name: str (optional) Returns ------- None Raises ------ ValueError If left and right population are not sourced from root or Gate AssertionError If left and right population do not share the same parent or the right population is not downstream of the left population """ same_parent = left.parent == right.parent downstream = right.population_name in list( self.list_downstream_populations(left.population_name)) if left.source not in ["root", "gate" ] or right.source not in ["root", "gate"]: raise ValueError( "Population source must be either 'root' or 'gate'") assert same_parent or downstream, "Right population should share the same parent as the " \ "left population or be downstream of the left population" new_population_name = new_population_name or f"subtract_{left.population_name}_{right.population_name}" new_idx = np.setdiff1d(left.index, right.index) x, y = left.geom.x, left.geom.y transform_x, transform_y = left.geom.transform_x, left.geom.transform_y parent_data = self.load_population_df(population=left.parent, transform={ x: transform_x, y: transform_y }) x_values, y_values = create_convex_hull( x_values=parent_data.loc[new_idx][x].values, y_values=parent_data.loc[new_idx][y].values) new_geom = PolygonGeom(x=x, y=y, transform_x=transform_x, transform_y=transform_y, x_values=x_values, y_values=y_values) new_population = Population(population_name=new_population_name, parent=left.parent, n=len(new_idx), index=new_idx, geom=new_geom, warnings=left.warnings + right.warnings + ["SUBTRACTED POPULATION"]) self.add_population(population=new_population) def _write_populations(self): """ Write population data to disk. Returns ------- None """ root_n = self.get_population("root").n with h5py.File(self.h5path, "r+") as f: for meta, labels in self.cell_meta_labels.items(): ascii_labels = np.array( [x.encode("ascii", "ignore") for x in labels]) overwrite_or_create(file=f, data=ascii_labels, key=f"/cell_meta_labels/{meta}") for p in self.populations: parent_n = self.get_population(p.parent).n p._prop_of_parent = p.n / parent_n p.prop_of_total = p.n / root_n overwrite_or_create(file=f, data=p.index, key=f"/index/{p.population_name}/primary") def population_stats(self, population: str, warn_missing: bool = False): """ Returns a dictionary of statistics (number of events, proportion of parent, and proportion of all events) for the requested population. Parameters ---------- population: str warn_missing: bool (default=False) Returns ------- Dict """ try: pop = self.get_population(population_name=population) parent = self.get_population(population_name=pop.parent) root = self.get_population(population_name="root") return { "population_name": population, "n": pop.n, "frac_of_parent": pop.n / parent.n, "frac_of_root": pop.n / root.n } except MissingPopulationError: if warn_missing: warn( f"{population} not present in {self.primary_id} FileGroup") return { "population_name": population, "n": 0, "frac_of_parent": 0, "frac_of_root": 0 } def quantile_clean(self, upper: float = 0.999, lower: float = 0.001): """ Iterate over every channel in the flow data and cut the upper and lower quartiles. Parameters ---------- upper: float (default=0.999) lower: float (default=0.001) Returns ------- None """ df = self.load_population_df("root", transform="logicle") for x in df.columns: df = df[(df[x] >= df[x].quantile(lower)) & (df[x] <= df[x].quantile(upper))] clean_pop = Population(population_name="root_clean", index=df.index.values, parent="root", source="root", n=df.shape[0]) self.add_population(clean_pop) def save(self, *args, **kwargs): """ Save FileGroup and associated populations Returns ------- None """ # Calculate meta and save indexes to disk if self.populations: # Populate h5path for populations self._write_populations() super().save(*args, **kwargs) def delete(self, delete_hdf5_file: bool = True, *args, **kwargs): """ Delete FileGroup Parameters ---------- delete_hdf5_file: bool (default=True) Returns ------- None """ super().delete(*args, **kwargs) if delete_hdf5_file: if os.path.isfile(self.h5path): os.remove(self.h5path) else: warn(f"Could not locate hdf5 file {self.h5path}")
class Date(me.Document): day = me.DateTimeField() destinations = me.EmbeddedDocumentListField()
class UserStatus(me.Document): uuid = me.StringField(unique=True, required=True) tokens = me.EmbeddedDocumentListField(Token, default=[])
class Pocket: items = me.EmbeddedDocumentListField()
class Machine(OwnershipMixin, me.Document): """The basic machine model""" id = me.StringField(primary_key=True, default=lambda: uuid.uuid4().hex) cloud = me.ReferenceField('Cloud', required=True, reverse_delete_rule=me.CASCADE) owner = me.ReferenceField('Organization', required=True, reverse_delete_rule=me.CASCADE) location = me.ReferenceField('CloudLocation', required=False, reverse_delete_rule=me.DENY) size = me.ReferenceField('CloudSize', required=False, reverse_delete_rule=me.DENY) image = me.ReferenceField('CloudImage', required=False, reverse_delete_rule=me.DENY) network = me.ReferenceField('Network', required=False, reverse_delete_rule=me.NULLIFY) subnet = me.ReferenceField('Subnet', required=False, reverse_delete_rule=me.NULLIFY) name = me.StringField() # Info gathered mostly by libcloud (or in some cases user input). # Be more specific about what this is. # We should perhaps come up with a better name. machine_id = me.StringField(required=True) hostname = me.StringField() public_ips = me.ListField() private_ips = me.ListField() ssh_port = me.IntField(default=22) OS_TYPES = ('windows', 'coreos', 'freebsd', 'linux', 'unix') os_type = me.StringField(default='unix', choices=OS_TYPES) rdp_port = me.IntField(default=3389) actions = me.EmbeddedDocumentField(Actions, default=lambda: Actions()) extra = MistDictField() cost = me.EmbeddedDocumentField(Cost, default=lambda: Cost()) # libcloud.compute.types.NodeState state = me.StringField(default='unknown', choices=tuple(config.STATES.values())) machine_type = me.StringField(default='machine', choices=('machine', 'vm', 'container', 'hypervisor', 'container-host', 'ilo-host')) parent = me.ReferenceField('Machine', required=False, reverse_delete_rule=me.NULLIFY) # Deprecated TODO: Remove in v5 key_associations = me.EmbeddedDocumentListField(KeyAssociation) last_seen = me.DateTimeField() missing_since = me.DateTimeField() unreachable_since = me.DateTimeField() created = me.DateTimeField() monitoring = me.EmbeddedDocumentField(Monitoring, default=lambda: Monitoring()) ssh_probe = me.EmbeddedDocumentField(SSHProbe, required=False) ping_probe = me.EmbeddedDocumentField(PingProbe, required=False) expiration = me.ReferenceField(Schedule, required=False, reverse_delete_rule=me.NULLIFY) # Number of vCPUs gathered from various sources. This field is meant to # be updated ONLY by the mist.api.metering.tasks:find_machine_cores task. cores = me.IntField() meta = { 'collection': 'machines', 'indexes': [ { 'fields': [ 'cloud', 'machine_id' ], 'sparse': False, 'unique': True, 'cls': False, }, { 'fields': [ 'monitoring.installation_status.activated_at' ], 'sparse': True, 'unique': False } ], 'strict': False, } def __init__(self, *args, **kwargs): super(Machine, self).__init__(*args, **kwargs) self.ctl = MachineController(self) def clean(self): # Remove any KeyAssociation, whose `keypair` has been deleted. Do NOT # perform an atomic update on self, but rather remove items from the # self.key_associations list by iterating over it and popping matched # embedded documents in order to ensure that the most recent list is # always processed and saved. key_associations = KeyMachineAssociation.objects(machine=self) for ka in reversed(list(range(len(key_associations)))): if key_associations[ka].key.deleted: key_associations[ka].delete() # Reset key_associations in case self goes missing/destroyed. This is # going to prevent the machine from showing up as "missing" in the # corresponding keys' associated machines list. if self.missing_since: self.key_associations = [] # Populate owner field based on self.cloud.owner if not self.owner: self.owner = self.cloud.owner self.clean_os_type() if self.monitoring.method not in config.MONITORING_METHODS: self.monitoring.method = config.DEFAULT_MONITORING_METHOD def clean_os_type(self): """Clean self.os_type""" if self.os_type not in self.OS_TYPES: for os_type in self.OS_TYPES: if self.os_type.lower() == os_type: self.os_type = os_type break else: self.os_type = 'unix' def delete(self): if self.expiration: self.expiration.delete() super(Machine, self).delete() mist.api.tag.models.Tag.objects( resource_id=self.id, resource_type='machine').delete() try: self.owner.mapper.remove(self) except (AttributeError, me.DoesNotExist) as exc: log.error(exc) try: if self.owned_by: self.owned_by.get_ownership_mapper(self.owner).remove(self) except (AttributeError, me.DoesNotExist) as exc: log.error(exc) def as_dict(self): # Return a dict as it will be returned to the API tags = {tag.key: tag.value for tag in mist.api.tag.models.Tag.objects( resource_id=self.id, resource_type='machine' ).only('key', 'value')} try: if self.expiration: expiration = { 'id': self.expiration.id, 'action': self.expiration.task_type.action, 'date': self.expiration.schedule_type.entry.isoformat(), 'notify': self.expiration.reminder and int(( self.expiration.schedule_type.entry - self.expiration.reminder.schedule_type.entry ).total_seconds()) or 0, } else: expiration = None except Exception as exc: log.error("Error getting expiration for machine %s: %r" % ( self.id, exc)) self.expiration = None self.save() expiration = None try: from bson import json_util extra = json.loads(json.dumps(self.extra, default=json_util.default)) except Exception as exc: log.error('Failed to serialize extra metadata for %s: %s\n%s' % ( self, self.extra, exc)) extra = {} return { 'id': self.id, 'hostname': self.hostname, 'public_ips': self.public_ips, 'private_ips': self.private_ips, 'name': self.name, 'ssh_port': self.ssh_port, 'os_type': self.os_type, 'rdp_port': self.rdp_port, 'machine_id': self.machine_id, 'actions': {action: self.actions[action] for action in self.actions}, 'extra': extra, 'cost': self.cost.as_dict(), 'state': self.state, 'tags': tags, 'monitoring': self.monitoring.as_dict() if self.monitoring and self.monitoring.hasmonitoring else '', 'key_associations': [ka.as_dict() for ka in KeyMachineAssociation.objects( machine=self)], 'cloud': self.cloud.id, 'location': self.location.id if self.location else '', 'size': self.size.name if self.size else '', 'image': self.image.id if self.image else '', 'cloud_title': self.cloud.title, 'last_seen': str(self.last_seen.replace(tzinfo=None) if self.last_seen else ''), 'missing_since': str(self.missing_since.replace(tzinfo=None) if self.missing_since else ''), 'unreachable_since': str( self.unreachable_since.replace(tzinfo=None) if self.unreachable_since else ''), 'created': str(self.created.replace(tzinfo=None) if self.created else ''), 'machine_type': self.machine_type, 'parent': self.parent.id if self.parent is not None else '', 'probe': { 'ping': (self.ping_probe.as_dict() if self.ping_probe is not None else PingProbe().as_dict()), 'ssh': (self.ssh_probe.as_dict() if self.ssh_probe is not None else SSHProbe().as_dict()), }, 'cores': self.cores, 'network': self.network.id if self.network else '', 'subnet': self.subnet.id if self.subnet else '', 'owned_by': self.owned_by.id if self.owned_by else '', 'created_by': self.created_by.id if self.created_by else '', 'expiration': expiration, 'provider': self.cloud.ctl.provider } def __str__(self): return 'Machine %s (%s) in %s' % (self.name, self.id, self.cloud)
class User(me.DynamicDocument): """Represents a user in the database.""" _id = me.StringField(required=True, primary_key=True) # Generic data from auth0 normalized fields name = me.StringField(required=True) picture = me.URLField(required=True) user_id = me.StringField(required=True, unique=True) email = me.EmailField(required=True) email_verified = me.BooleanField(required=True, default=False) given_name = me.StringField() family_name = me.StringField() # Personalized information for the student profile to use picture_editable = me.EmbeddedDocumentField(File) given_name_editable = me.StringField() family_name_editable = me.StringField() # Identities are used to tell which service the user signed up with identities = me.EmbeddedDocumentListField(Identity) # Github related fields url = me.URLField() # API URL html_url = me.URLField() # PROFILE URL repos_url = me.URLField() # Generic information for portfolio description = me.StringField() tagline = me.StringField(max_length=280) skills = me.ListField(me.StringField(choices=const.skills)) # Allows the user to store their schooling information education = me.EmbeddedDocumentListField(School) # Allows the user to store any awards they want displayed on their page awards = me.EmbeddedDocumentListField(Award) # Allows the user to store their previous work history work_history = me.EmbeddedDocumentListField(Work) # Portfolio holds a list of portfolio items for their page portfolio = me.EmbeddedDocumentListField(PortfolioItem) meta = {'collection': 'users'} def __repr__(self): """Default representation for the user object.""" return '<User: {}>'.format(self.pk) def has_identity(self, provider): """Checks if the user has an identity from the given provider. Args: provider (str): The provider to check for. Returns: bool: If the user has an identity from the given provider. """ return any(provider == identity['provider'] for identity in self.identities) @property def picture_normalized_url(self): return self.picture_editable.url if self.picture_editable is not None else self.picture @property def given_name_normalized(self): return self.given_name_editable if self.given_name_editable is not None else \ (self.given_name if self.given_name is not None else self.name) @property def family_name_normalized(self): return self.family_name_editable if self.family_name_editable is not None else \ (self.family_name if self.family_name is not None else None) @property def name_normalized(self): if self.given_name_editable is not None and self.family_name_editable is not None: return '{} {}'.format(self.given_name_editable, self.family_name_editable) elif self.given_name_editable is not None: return self.given_name_editable elif self.family_name_editable is not None: return self.family_name_editable else: return self.name @property def is_github_user(self): return self.has_identity('github') @property def is_google_user(self): return self.has_identity('google-oauth2') @property def github_identity(self): """Gets the user's github identity""" if self.is_github_user: return next(identity for identity in self.identities if identity.provider == 'github') else: return None @staticmethod def search(name=None, school_name=None, work_position=None, description=None, skills=None, limit=25, offset=0): """ Used for default search queries. Any argument that is given as None will be ignored. Args: name (str): The name of the user to search for. school_name (str): The name of the school to search for. work_position (str): The name of the previous work position to search for. description (str): The phrase to look for in the description. skills (list): The list of skills to search for. limit (int): The number of users to return. offset (int): The number of users to skip. Returns: list: The list of users that fit the criteria and the number of possible users in the format (users, count). """ # Build the search criteria to pass into the user search search = dict() if name is not None and name != '': search['name__icontains'] = name if school_name is not None and school_name != '': search['education__name__icontains'] = school_name if work_position is not None and work_position != '': search['work_history__position__icontains'] = work_position if description is not None and description != '': search['description__icontains'] = description if skills is not None and len(skills) > 0: # Loop through all the skills and add them to the search search['skills__all'] = skills # Perform the search users = User.objects(**search).all() return users[offset:limit + offset], users.count() def add_repo(self, url, old_project=None): """Adds a repo to the user's portfolio items. Args: url (str): The url to the repo. old_project (Repo): The old project with the repo in it. If set, the repo is updated rather than added. """ # Only can add the repo if the user is a github user if self.is_github_user: new_repo = Repo(url, self.github_identity.user_id) if old_project is None: new_item = PortfolioItem(item_type='repo', repo=new_repo) self.portfolio.append(new_item) project_id = new_item._id # Update the repo else: old_project.repo = new_repo project_id = old_project._id self.save() return project_id else: raise exc.IdentityError(self.user_id, 'github')
class Foo(mongoengine.Document): bars = mongoengine.EmbeddedDocumentListField(Bar)
class Order(me.Document): ORDER_ACTIVE = 'active' ORDER_PROCESSED = 'processed' ORDER_COMPLETED = 'completed' ORDER_CANCELED = 'canceled' STATUS_CONSTANT = ((ORDER_CANCELED, 'order canceled'), (ORDER_COMPLETED, 'order completed'), (ORDER_ACTIVE, 'order active'), (ORDER_PROCESSED, 'order processed')) REQUEST_TELEPHONE = 'request_telephone' REQUEST_NAME = 'request_name' LAST_REQUEST = ((REQUEST_TELEPHONE, 'request_telephone'), (REQUEST_NAME, 'request_name')) nom = me.IntField(min_value=1) date = me.DateTimeField(default=datetime.now()) user = me.ReferenceField(User, reverse_delete_rule=me.DENY) sum = me.DecimalField(min_value=0, force_string=True, default=0) status = me.StringField(min_length=5, choices=STATUS_CONSTANT, default=ORDER_ACTIVE, required=True) products = me.EmbeddedDocumentListField(Line_Order) name_recipients = me.StringField(min_length=3, max_length=255) telephone_recipients = me.StringField(min_length=10, max_length=12, regex='^[0-9]*$') last_request = me.StringField(min_length=5, choices=LAST_REQUEST) id_message_cart = me.ListField() def get_text_status_order(self): if self.status == Order.ORDER_CANCELED: return Text.get_body(Text.TEXT_ORDER_CANCELED) elif self.status == Order.ORDER_ACTIVE: return Text.get_body(Text.TEXT_ORDER_ACTIVE) elif self.status == Order.ORDER_COMPLETED: return Text.get_body(Text.TEXT_ORDER_COMPLETED) else: return Text.get_body(Text.TEXT_ORDER_PROCESSED) def add_count_in_line(self, num: int): line_product = self.products[num] line_product.count += 1 line_product.sum = line_product.count * line_product.product.actual_price self.sum = self.get_sum_order() self.save() def sub_count_in_line(self, num: int): line_product = self.products[num] if line_product.count == 1: return line_product.count -= 1 line_product.sum = line_product.count * line_product.product.actual_price self.sum = self.get_sum_order() self.save() def add_product_to_order(self, product: Product, count: int): try: line_product = self.products.get(product=product) line_product.count += count line_product.sum = line_product.count * product.actual_price except me.DoesNotExist: self.products.create(product=product, count=count, sum=count * product.actual_price) self.sum = self.get_sum_order() self.save() def get_sum_order(self): # Не працює функція сум для такого поля. Мінімум - працює, по полю кількості - працює, а по сумі ні. # Тулитиму костиль # sums = Order.objects(id=self.id).aggregate([ # {'$unwind': '$products'}, # {'$group': {'_id': '$_id', 'sum_products': {'$sum': '$products.sum'}}} # ]) # if sums.alive: # elem = sums.next() # print(elem['sum_products']) # return elem['sum_products'] # else: # return 0 total_sum = 0 for product in self.products: total_sum += product.sum return total_sum @classmethod def find_active_order(cls, user: User): try: order = cls.objects().get( Q(user=user) & Q(status=Order.ORDER_ACTIVE)) except me.DoesNotExist: order = None return order @classmethod def get_max_num_orders(cls, user: User): max_num = cls.objects(user=user).aggregate([{ '$group': { '_id': '$user', 'max_num': { '$max': '$nom' } } }]) if max_num.alive: elem = max_num.next() return elem['max_num'] else: return 0 @classmethod def get_count_orders(cls, user: User): return cls.objects(user=user).count() @classmethod def create_order(cls, user: User): return cls.objects.create(user=user, nom=cls.get_max_num_orders(user) + 1) @classmethod def get_active_order(cls, user: User) -> 'Order': active_orders = cls.find_active_order(user) if not active_orders: active_orders = cls.create_order(user) return active_orders @classmethod def get_count_products_in_active_order(cls, user): sums = cls.objects(Q(user=user) & Q(status=Order.ORDER_ACTIVE)).aggregate([{ '$unwind': '$products' }, { '$group': { '_id': '$_id', 'count_products': { '$sum': '$products.count' } } }]) if sums.alive: elem = sums.next() return elem['count_products'] else: return 0