Example #1
0
class User(me.Document):
    # Basic information
    first_name = me.StringField(required=True, max_length=100)
    last_name = me.StringField(required=True, max_length=100)
    phone = me.IntField()

    # Default Login info
    email = me.EmailField(unique=True)
    password = me.StringField()

    # Authorization
    CLIENT = 'CLIENT'
    LAUNDRYMAN = 'LAUNDRYMAN'
    ADMIN = 'ADMIN'
    SUSPENDED = 'SUSPENDED'
    ROLE_CHOICES = (
        CLIENT,
        LAUNDRYMAN,
        ADMIN,
        SUSPENDED
    )
    roles = me.ListField(me.StringField(choices=ROLE_CHOICES), default=lambda: ['CLIENT'])

    # Pickup information
    addresses = me.EmbeddedDocumentListField(Address)

    # Specific role information
    laundryman = me.EmbeddedDocumentField(LaundrymanDataEmbedded)

    # Meta
    updated_at = me.DateTimeField(default=datetime.utcnow)

    def is_laundryman(self):
        return self.LAUNDRYMAN in self.roles

    def full_name(self):
        return '{first_name} {last_name}'.format(first_name=self.first_name, last_name=self.last_name)
Example #2
0
class UserModel(m.Document):
    registered_date = m.DateTimeField(default=datetime.datetime.now)
    email = m.StringField(required=True)
    password = m.StringField(required=True)

    personal = m.EmbeddedDocumentListField(PersonalModel)

    meta = {'db_alias': 'core', 'collection': 'users'}

    @classmethod
    def lookup(cls, email):
        return cls.objects(email=email).first()

    @classmethod
    def identify(cls, id):
        return cls.objects(email=id).first()

    @property
    def rolenames(self):
        return []

    @property
    def identity(self):
        return self.email
Example #3
0
class Order(mongoengine.DynamicDocument):
    rname = mongoengine.StringField(required=True)
    orders = mongoengine.EmbeddedDocumentListField(Orders, default=[])
    end = mongoengine.StringField(required=True)
    shipping = mongoengine.FloatField(required=True)
    ogshipping = mongoengine.FloatField(required=True)
    maxOrder = mongoengine.IntField(required=True)
    lastUpdate = mongoengine.DateTimeField(default=datetime.datetime.utcnow())
    meta = {
        'auto_create_index': False,
        'index_background': True,
        'indexes': ['rname', 'end'],
        'collection': 'orders'
    }

    def save(self, *args, **kwargs):
        self.lastUpdate = datetime.datetime.utcnow()
        return super(Order, self).save(*args, **kwargs)

    @staticmethod
    def get_all_restaurant_orders(rname):
        existing = Order.objects(rname=rname)
        all_orders = []
        for i in existing:
            all_orders.append(i.to_dict())
        return all_orders

    @staticmethod
    def get_restaurant_order(rname, end):
        existing = Order.objects(rname=rname, end=end).first()
        if not existing:
            return None
        return existing

    def to_dict(self):
        return mongo_to_dict(self, [])
Example #4
0
class Notebook(m.Document):
    slug = m.StringField(max_length=255, unique=True)
    notes = m.EmbeddedDocumentListField(NoteReference)

    def clean(self):
        if not self.slug:
            self.slug = self.generate_slug()

    def add_note(self, note):
        note.notebook = self
        note.save()

        reference = NoteReference()
        reference.id = note.id
        reference.resume = note.resume

        self.notes.append(reference)

    def remove_note(self, note):
        to_remove = [x for x in self.notes if x.id == note.id]

        for reference in to_remove:
            self.notes.remove(reference)

    def list_notes(self):
        ids = [x.id for x in self.notes]
        return Note.objects(id__in=ids)

    def generate_slug(self):
        slug = base64.encodestring(os.urandom(8)).strip()
        slug = re.sub('[^0-9A-Za-z]', '', slug)

        if Notebook.objects(slug=slug).count():
            slug = self.generate_slug()

        return slug
Example #5
0
class ConditionalClassMixin(object):
    """Generic condition mixin class used as a handler for different
    query sets for a specific collection. It constructs a query from
    a list of query sets which chains together with logical & operator."""

    condition_resource_cls = None  # Instance of mongoengine model class

    conditions = me.EmbeddedDocumentListField(BaseCondition)

    def owner_query(self):
        return me.Q(owner=self.owner_id)

    def get_resources(self):
        query = self.owner_query()
        for condition in self.conditions:
            query &= condition.q
        if 'deleted' in self.condition_resource_cls._fields:
            query &= me.Q(deleted=None)
        if 'missing_since' in self.condition_resource_cls._fields:
            query &= me.Q(missing_since=None)
        return self.condition_resource_cls.objects(query)

    def get_ids(self):
        return [resource.id for resource in self.get_resources()]
Example #6
0
class Car(mongoengine.Document):
    model = mongoengine.StringField(required=True)
    make = mongoengine.StringField(required=True)
    year = mongoengine.IntField(required=True)
    mileage = mongoengine.FloatField(default=0.0)
    vi_number = mongoengine.StringField(default=lambda: str(uuid.uuid4()).replace('-', ''))
    
    engine = mongoengine.EmbeddedDocumentField(Engine, required=True)
    service_history = mongoengine.EmbeddedDocumentListField(ServiceHistory)

    # for service_app.py use this meta:
    # meta = {
    #     'db_alias': 'core',
    #     'collection': 'cars'
    # }

    # for q_and_a.py/
    meta = {
        'db_alias': 'dealership',
        'collection': 'cars',
        'indexes': [
            'service_history.price',
        ]
    }
Example #7
0
class Group(gj.EmbeddedDocument):
    intid = db.IntField(required=True, unique=True)
    description = db.StringField(required=True)
    devices = db.EmbeddedDocumentListField('Device')
Example #8
0
class Organization(Owner):
    name = me.StringField(required=True)
    members = me.ListField(me.ReferenceField(User), required=True)
    members_count = me.IntField(default=0)
    teams = me.EmbeddedDocumentListField(Team, default=_get_default_org_teams)
    teams_count = me.IntField(default=0)
    clouds_count = me.IntField(default=0)
    # These are assigned only to organization from now on
    promo_codes = me.ListField()
    selected_plan = me.StringField()
    enterprise_plan = me.DictField()
    enable_r12ns = me.BooleanField(required=True, default=False)
    default_monitoring_method = me.StringField(
        choices=config.MONITORING_METHODS)

    insights_enabled = me.BooleanField(default=config.HAS_INSIGHTS)
    ownership_enabled = me.BooleanField()

    created = me.DateTimeField(default=datetime.datetime.now)
    registered_by = me.StringField()

    # used to allow creation of sub-org
    super_org = me.BooleanField(default=False)
    parent = me.ReferenceField('Organization', required=False)

    meta = {'indexes': ['name']}

    @property
    def mapper(self):
        """Returns the `PermissionMapper` for the current Org context."""
        if config.HAS_RBAC:
            from mist.rbac.tasks import AsyncPermissionMapper
        else:
            from mist.api.dummy.mappings import AsyncPermissionMapper
        return AsyncPermissionMapper(self)

    def __str__(self):
        return 'Org %s (%d teams - %d members)' % (self.name, len(self.teams),
                                                   len(self.members))

    def get_email(self):
        return self.teams.get(name='Owners').members[0].email

    def get_emails(self):
        emails = []
        for user in self.teams.get(name='Owners').members:
            emails.append(user.email)
        return emails

    def get_team(self, team_name):
        try:
            return self.teams.get(name=team_name)
        except me.DoesNotExist:
            raise TeamNotFound("No team found with name '%s'." % team_name)

    def get_team_by_id(self, team_id):
        try:
            return self.teams.get(id=team_id)
        except me.DoesNotExist:
            raise TeamNotFound("No team found with id '%s'." % team_id)

    def add_member_to_team(self, team_name, user):
        team = self.get_team(team_name)
        if user not in team.members:
            team.members.append(user)
        if user not in self.members:
            self.members.append(user)

    def add_member_to_team_by_id(self, team_id, user):
        team = self.get_team_by_id(team_id)
        if user not in team.members:
            team.members.append(user)
        if user not in self.members:
            self.members.append(user)

    def remove_member_from_team(self, team_name, user):
        team = self.get_team(team_name)
        for i, member in enumerate(team.members):
            if user == member:
                team.members.pop(i)
                break

    def remove_member_from_team_by_id(self, team_id, user):
        team = self.get_team_by_id(team_id)
        for i, member in enumerate(team.members):
            if user == member:
                team.members.pop(i)
                break

    def remove_member_from_members(self, user):
        for i, member in enumerate(self.members):
            if user == member:
                self.members.pop(i)
                break

    def as_dict(self):
        view = json.loads(self.to_json())
        view_id = view["_id"]
        del view["_id"]
        del view["_cls"]
        view["id"] = view_id
        view["members"] = []
        for member in self.members:
            try:
                name = member.get_nice_name()
            except AttributeError:  # Cannot dereference member
                try:
                    self.members.remove(member)
                    self.save()
                except Exception as e:
                    log.error("Failed to remove missing member from %s: %r" % (
                        self.name, e))
                continue
            view["members"].append({
                "id": member.id,
                "name": name,
                "email": member.email,
                "pending": False,
                "parent": False
            })
        team_pending_members = {}
        invitations = MemberInvitation.objects(org=self)
        for invitation in invitations:
            member = invitation.user
            name = ""
            name = (member.first_name or ' ') + (member.last_name or '')
            name = (name.strip() or member.email)
            view["members"].append({
                "id": member.id,
                "name": name,
                "email": member.email,
                "pending": True,
                "parent": False,
            })
            for team_id in invitation.teams:
                if team_id not in team_pending_members:
                    team_pending_members[team_id] = []
                team_pending_members[team_id].append(member.id)
        for team in view['teams']:
            team["parent"] = False
            if team['id'] in team_pending_members:
                team['members'].extend(team_pending_members[team['id']])

        # handle here the info from parent org
        if self.parent:
            view["parent_org_name"] = self.parent.name
            parent_org = self.parent.as_dict()
            parent_members = parent_org['members']
            parent_teams = parent_org['teams']

            for p_member in parent_members:
                p_member['parent'] = True
                view['members'].append(p_member)

            for p_team in parent_teams:
                p_team['parent'] = True
                view["teams"].append(p_team)

        return view

    def clean(self):
        # make sure that each team's name is unique
        used = set()
        for team in self.teams:
            if team.name in used:
                raise me.ValidationError("Team name exists.")
            used.add(team.name)

        # make sure that all team members are also org members
        for team in self.teams:
            for i, member in enumerate(list(team.members)):
                if member not in self.members:
                    team.members.pop(i)

        # make sure that owners team is present
        try:
            owners = self.teams.get(name='Owners')
        except me.DoesNotExist:
            raise me.ValidationError("Owners team can't be removed.")

        # make sure that owners team is not empty
        if not owners.members:
            raise me.ValidationError("Owners team can't be empty.")

        if config.HAS_RBAC:
            # make sure owners policy allows all permissions
            if owners.policy.operator != 'ALLOW':
                owners.policy.operator = 'ALLOW'
                log.warning("Owners policy must be set to ALLOW. Updating...")

            # make sure owners policy doesn't contain specific rules
            if owners.policy.rules:
                raise me.ValidationError("Can't set policy rules for Owners.")

        # make sure org name is unique - we can't use the unique keyword on the
        # field definition because both User and Organization subclass Owner
        # but only Organization has a name
        if self.name and Organization.objects(name=self.name, id__ne=self.id):
            raise me.ValidationError("Organization with name '%s' "
                                     "already exists." % self.name)

        self.members_count = len(self.members)
        self.teams_count = len(self.teams)

        # Add schedule for metering.
        try:
            from mist.api.poller.models import MeteringPollingSchedule
            MeteringPollingSchedule.add(self, run_immediately=False)
        except Exception as exc:
            log.error('Error adding metering schedule for %s: %r', self, exc)

        super(Organization, self).clean()
Example #9
0
class User(Owner):
    email = HtmlSafeStrField()
    # NOTE: deprecated. Only still used to migrate old API tokens
    mist_api_token = me.StringField()
    last_name = HtmlSafeStrField(default='')
    feedback = me.EmbeddedDocumentField(Feedback, default=Feedback())

    activation_key = me.StringField()
    first_name = HtmlSafeStrField(default='')
    invitation_accepted = me.FloatField()
    invitation_date = me.FloatField()
    last_login = me.FloatField()
    password = me.StringField()
    password_set_token = me.StringField()
    password_set_token_created = me.FloatField()
    password_set_user_agent = me.StringField()
    registration_date = me.FloatField()
    registration_method = me.StringField()
    requested_demo = me.BooleanField()
    demo_request_date = me.FloatField()
    role = me.StringField()
    status = me.StringField()

    # these fields will exists only for org
    # when migration from user to org completes
    promo_codes = me.ListField()
    selected_plan = me.StringField()
    enterprise_plan = me.DictField()

    open_id_url = HtmlSafeStrField()

    password_reset_token_ip_addr = me.StringField()
    password_reset_token = me.StringField()
    password_reset_token_created = me.FloatField()
    whitelist_ip_token_ip_addr = me.StringField()
    whitelist_ip_token = me.StringField()
    whitelist_ip_token_created = me.FloatField()
    user_agent = me.StringField()
    username = me.StringField()

    can_create_org = me.BooleanField(default=True)
    beta_access = me.BooleanField(default=True)

    ips = me.EmbeddedDocumentListField(WhitelistIP, default=[])

    meta = {
        'indexes': [
            {
                'fields': [
                    '$email', '$first_name', '$last_name', '$username'
                ],
                'default_language': 'english',
                'weights': {'last_name': 10, 'first_name': 10}
            },
        ]
    }

    def __str__(self):
        return 'User %s' % self.email

    def set_password(self, password):
        """Update user's password."""
        # could perform strength measuring first
        hashed_pwd = pwd_context.encrypt(password)
        self.password = hashed_pwd
        self.save()

    def check_password(self, password):
        """
        Return True if password matches, False otherwise.
        This will also update the password if it's using a deprecated scheme.
        If user.password is empty because the user registered through SSO then
        the password passed as argument should be empty otherwise False will be
        returned.
        """
        if not self.password or not password:
            return False
        ok, new_hash = pwd_context.verify_and_update(password, self.password)
        if not ok:
            return False
        if new_hash:
            # hashed password was using a deprecated scheme, update it
            log.info("Updating user's password.")
            self.password = new_hash
            self.save()
        return True

    def __eq__(self, other):
        return self.id == other.id

    def clean(self):
        # make sure user.email is unique - we can't use the unique keyword on
        # the field definition because both User and Organization subclass
        # Owner but only user has an email field
        if User.objects(email=self.email, id__ne=self.id):
            raise me.ValidationError("User with email '%s' already exists."
                                     % self.email)

        super(User, self).clean()

    def get_nice_name(self):
        try:
            if self.first_name and not self.last_name:
                return self.first_name + '(' + self.email + ')'
            else:
                name = (self.first_name or '') + ' ' + (self.last_name or '')
                return name.strip() or self.email
        except AttributeError:
                return self.email

    def get_ownership_mapper(self, org):
        """Return the `OwnershipMapper` in the specified Org context."""
        if config.HAS_RBAC:
            from mist.rbac.mappings import OwnershipMapper
        else:
            from mist.api.dummy.mappings import OwnershipMapper
        return OwnershipMapper(self, org)
Example #10
0
class GatingStrategy(mongoengine.Document):
    """
    A GatingTemplate is synonymous to what an immunologist would classically consider
    a "gating template"; it is a collection of 'gates' (Gate objects, in the case of CytoPy)
    that can be applied to multiple fcs files or an entire experiment in bulk. A user defines
    a GatingTemplate using a single example from an experiment, uses the object to preview gates
    and label child populations, and when satisfied with the performance save the GatingStrategy
    to the database to be applied to the remaining samples in the Experiment.

    Attributes
    -----------
    template_name: str, required
        unique identifier for template
    gates: EmbeddedDocumentList
        list of Gate documents
    creation_date: DateTime
        date of creation
    last_edit: DateTime
        date of last edit
    flags: str, optional
        warnings associated to this gating template
    notes: str, optional
        free text comments
    """
    name = mongoengine.StringField(required=True, unique=True)
    gates = mongoengine.ListField(mongoengine.ReferenceField(Gate, reverse_delete_rule=mongoengine.PULL))
    actions = mongoengine.EmbeddedDocumentListField(Action)
    hyperparameter_search = mongoengine.DictField()
    creation_date = mongoengine.DateTimeField(default=datetime.now)
    last_edit = mongoengine.DateTimeField(default=datetime.now)
    flags = mongoengine.StringField(required=False)
    notes = mongoengine.StringField(required=False)
    meta = {
        'db_alias': 'core',
        'collection': 'gating_strategy'
    }

    def __init__(self, *args, **values):
        self.verbose = values.pop("verbose", True)
        self.print = vprint(verbose=self.verbose)
        super().__init__(*args, **values)
        self.filegroup = None

    def load_data(self,
                  experiment: Experiment,
                  sample_id: str):
        """
        Load a FileGroup into the GatingStrategy ready for gating.

        Parameters
        ----------
        experiment: Experiment
        sample_id: str

        Returns
        -------
        None
        """
        self.filegroup = experiment.get_sample(sample_id=sample_id)

    def list_gates(self) -> list:
        """
        List name of existing Gates

        Returns
        -------
        list
        """
        return [g.gate_name for g in self.gates]

    def list_populations(self) -> list:
        """
        Wrapper to FileGroup list_populations. Lists populations
        in associated FileGroup.

        Returns
        -------
        list
        """
        assert self.filegroup is not None, "No FileGroup associated"
        return list(self.filegroup.list_populations())

    def _gate_exists(self,
                     gate: str):
        """
        Raises AssertionError if given gate does not exist

        Returns
        -------
        None
        """
        assert gate in self.list_gates(), f"Gate {gate} does not exist"

    def get_gate(self,
                 gate: str) -> Gate:
        """
        Given the name of a gate, return the Gate object

        Parameters
        ----------
        gate: str

        Returns
        -------
        Gate
        """
        self._gate_exists(gate=gate)
        return [g for g in self.gates if g.gate_name == gate][0]

    def preview_gate(self,
                     gate: str or Gate or ThresholdGate or PolygonGate or EllipseGate,
                     create_plot_kwargs: dict or None = None,
                     plot_gate_kwargs: dict or None = None):
        """
        Preview the results of some given Gate

        Parameters
        ----------
        gate: str or Gate or ThresholdGate or PolygonGate or EllipseGate
            Name of an existing Gate or a Gate object
        create_plot_kwargs: dict (optional)
            Additional arguments passed to CreatePlot
        plot_gate_kwargs: dict (optional)
            Additional arguments passed to plot_gate call of CreatePlot

        Returns
        -------
        Matplotlib.Axes
        """
        create_plot_kwargs = create_plot_kwargs or {}
        plot_gate_kwargs = plot_gate_kwargs or {}
        if isinstance(gate, str):
            gate = self.get_gate(gate=gate)
        parent_data = self.filegroup.load_population_df(population=gate.parent,
                                                        transform=None,
                                                        label_downstream_affiliations=False)
        gate.fit(data=parent_data)
        plot = CreatePlot(**create_plot_kwargs)
        return plot.plot_gate_children(gate=gate,
                                       parent=parent_data,
                                       **plot_gate_kwargs)

    def add_hyperparameter_grid(self,
                                gate_name: str,
                                params: dict,
                                cost: str or None = None):
        """
        Add a hyperparameter grid to search which applying the given gate to new data.
        This hyperparameter grid should correspond to valid hyperparameters for the
        corresponding gate. Invalid parameters will be ignored. Choice of the cost
        parameter to be minimised is dependent on the type of gate:
        * ThresholdGate:
            - "manhattan" (default): optimal parameters are those that result in the population whom's signature
              is of minimal distance to the original data used to define the gate. The manhattan distance is used
              as the distance metric.
            - "euclidean": optimal parameters are those that result in the population whom's signature
              is of minimal distance to the original data used to define the gate. The euclidean distance is used
              as the distance metric.
            - "threshold_dist": optimal parameters are those that result in the threshold
               whom's distance to the original threshold defined are smallest
        * PolygonGate & EllipseGate:
            - "hausdorff" (optional): parameters chosen that minimise the hausdorff distance
              between the polygon generated from new data and the original polgon gate created
              when the gate was defined
            - "manhattan" (default): optimal parameters are those that result in the population whom's signature
              is of minimal distance to the original data used to define the gate. The manhattan distance is used
              as the distance metric.
            - "euclidean": optimal parameters are those that result in the population whom's signature
              is of minimal distance to the original data used to define the gate. The euclidean distance is used
              as the distance metric.

        Parameters
        ----------
        gate_name: str
            Gate to define hyperparameter grid for
        params: dict
            Grid of hyperparameters to be searched
        cost: str
            What to be minimised to choose optimal hyperparameters

        Returns
        -------
        None
        """
        assert gate_name in self.list_gates(), f"{gate_name} is not a valid gate"
        if isinstance(self.get_gate(gate_name), ThresholdGate):
            cost = cost or "manhattan"
            valid_metrics = ["manhattan", "threshold_dist", "euclidean"]
            err = f"For threshold gate 'cost' should either be one of {valid_metrics}"
            assert cost in valid_metrics, err
        if isinstance(self.get_gate(gate_name), PolygonGate) or isinstance(self.get_gate(gate_name), EllipseGate):
            cost = cost or "hausdorff"
            valid_metrics = ["hausdorff", "manhattan", "euclidean"]
            err = f"For threshold gate 'cost' should either be one of {valid_metrics}"
            assert cost in valid_metrics, err
        err = "'params' must be a dictionary with each key corresponding to a valid " \
              "hyperparameter and each value a list of parameter values"
        assert isinstance(params, dict), err
        assert all([isinstance(x, list) for x in params.values()]), err
        self.hyperparameter_search[gate_name] = {"grid": params,
                                                 "cost": cost}

    def apply_gate(self,
                   gate: str or Gate or ThresholdGate or PolygonGate or EllipseGate,
                   plot: bool = True,
                   verbose: bool = True,
                   add_to_strategy: bool = True,
                   create_plot_kwargs: dict or None = None,
                   plot_gate_kwargs: dict or None = None,
                   hyperparam_search: bool = True,
                   overwrite_method_kwargs: dict or None = None):
        """
        Apply a gate to the associated FileGroup. The gate must be previously defined;
        children associated and labeled. Either a Gate object can be provided or the name
        of an existing gate saved to this GatingStrategy.

        Parameters
        ----------
        gate: str or Gate or ThresholdGate or PolygonGate or EllipseGate
            Name of an existing Gate or a Gate object
        plot: bool (default=True)
            If True, returns a Matplotlib.Axes object of plotted gate
        verbose: bool (default=True)
            If True, print gating statistics to stdout and provide feedback
        add_to_strategy: bool (default=True)
            If True, append the Gate to the GatingStrategy
        create_plot_kwargs: dict (optional)
            Additional arguments passed to CreatePlot
        plot_gate_kwargs: dict (optional)
            Additional arguments passed to plot_gate call of CreatePlot
        hyperparam_search: bool (default=True)
            If True and hyperparameter grid has been defined for the chosen gate,
            then hyperparameter search is performed to find the optimal fit for the
            newly encountered data.
        overwrite_method_kwargs: dict, optional
            If a dictionary is provided (and hyperparameter search isn't defined for this gate)
            then method parameters are overwritten with these new parameters.
        Returns
        -------
        Matplotlib.Axes or None
        """
        if isinstance(gate, str):
            gate = self.get_gate(gate=gate)
            add_to_strategy = False
        if add_to_strategy:
            assert gate.gate_name not in self.list_gates(), \
                f"Gate with name {gate.gate_name} already exists. To continue set add_to_strategy to False"
        create_plot_kwargs = create_plot_kwargs or {}
        plot_gate_kwargs = plot_gate_kwargs or {}
        parent_data = self.filegroup.load_population_df(population=gate.parent,
                                                        transform=None,
                                                        label_downstream_affiliations=False)
        original_method_kwargs = gate.method_kwargs.copy()
        if overwrite_method_kwargs is not None:
            gate.method_kwargs = overwrite_method_kwargs
        if gate.gate_name in self.hyperparameter_search.keys() and hyperparam_search:
            populations = hyperparameter_gate(gate=gate,
                                              grid=self.hyperparameter_search.get(gate.gate_name).get("grid"),
                                              cost=self.hyperparameter_search.get(gate.gate_name).get("cost"),
                                              parent=parent_data,
                                              verbose=verbose)
        elif gate.ctrl is None:
            populations = gate.fit_predict(data=parent_data)
        else:
            populations = self._control_gate(gate=gate)
        for p in populations:
            self.filegroup.add_population(population=p)
        if verbose:
            gate_stats(gate=gate, parent_data=parent_data, populations=populations)
        if add_to_strategy:
            self.gates.append(gate)
        if plot:
            plot = CreatePlot(**create_plot_kwargs)
            return plot.plot_population_geoms(parent=parent_data,
                                              children=populations,
                                              **plot_gate_kwargs)
        gate.method_kwargs = original_method_kwargs
        return None

    def apply_all(self,
                  verbose: bool = True):
        """
        Apply all the gates associated to this GatingStrategy

        Parameters
        ----------
        verbose: bool (default=True)
            If True, print feedback to stdout

        Returns
        -------
        None
        """
        feedback = vprint(verbose)
        populations_created = [[c.name for c in g.children] for g in self.gates]
        populations_created = [x for sl in populations_created for x in sl]
        assert len(self.gates) > 0, "No gates to apply"
        err = "One or more of the populations generated from this gating strategy are already " \
              "presented in the population tree"
        assert all([x not in self.list_populations() for x in populations_created]), err
        gates_to_apply = list(self.gates)
        actions_to_apply = list(self.actions)
        i = 0
        iteration_limit = len(gates_to_apply) * 100
        feedback("=====================================================")
        while len(gates_to_apply) > 0:
            if i >= len(gates_to_apply):
                i = 0
            gate = gates_to_apply[i]
            if gate.parent in self.list_populations():
                if self.filegroup.population_stats(gate.parent).get("n") <= 3:
                    raise ValueError(f"Insufficient events in parent population {gate.parent}")
                feedback(f"------ Applying {gate.gate_name} ------")
                self.apply_gate(gate=gate,
                                plot=False,
                                verbose=verbose,
                                add_to_strategy=False)
                feedback("----------------------------------------")
                gates_to_apply = [g for g in gates_to_apply if g.gate_name != gate.gate_name]
            actions_applied_this_loop = list()
            for a in actions_to_apply:
                if a.left in self.list_populations() and a.right in self.list_populations():
                    feedback(f"------ Applying {a.action_name} ------")
                    self.apply_action(action=a,
                                      print_stats=verbose,
                                      add_to_strategy=False)
                    feedback("----------------------------------------")
                    actions_applied_this_loop.append(a.action_name)
            actions_to_apply = [a for a in actions_to_apply
                                if a.action_name not in actions_applied_this_loop]
            i += 1
            iteration_limit -= 1
            assert iteration_limit > 0, "Maximum number of iterations reached. This means that one or more parent " \
                                        "populations are not being identified."

    def delete_actions(self,
                       action_name: str):
        """
        Delete an action associated to this GatingStrategy

        Parameters
        ===========
        action_name: str

        Returns
        -------
        None
        """
        self.actions = [a for a in self.actions if a.action_name != action_name]

    def apply_action(self,
                     action: Action or str,
                     print_stats: bool = True,
                     add_to_strategy: bool = True):
        """
        Apply an action, that is, a merge or subtraction:
            * Merge: merge two populations present in the current population tree.
            The merged population will have the combined index of both populations but
            will not inherit any clusters and will not be associated to any children
            downstream of either the left or right population. The population will be
            added to the tree as a descendant of the left populations parent
            * Subtraction: subtract the right population from the left population.
            The right population must either have the same parent as the left population
            or be downstream of the left population. The new population will descend from
            the same parent as the left population. The new population will have a
            PolygonGeom geom.

        Parameters
        ----------
        action: Action
        print_stats: bool (default=True)
            Print population statistics to stdout
        add_to_strategy: bool (default=True)
            Add action to this GatingStrategy
        Returns
        -------
        None
        """
        if isinstance(action, str):
            matching_action = [a for a in self.actions if a.action_name == action]
            assert len(matching_action) == 1, f"{action} does not exist"
            action = matching_action[0]
        assert action.method in ["merge", "subtract"], "Accepted methods are: merge, subtract"
        assert action.left in self.list_populations(), f"{action.left} does not exist"
        assert action.right in self.list_populations(), f"{action.right} does not exist"
        left = self.filegroup.get_population(action.left)
        right = self.filegroup.get_population(action.right)
        if action.method == "merge":
            self.filegroup.merge_populations(left=left,
                                             right=right,
                                             new_population_name=action.new_population_name)
        else:
            self.filegroup.subtract_populations(left=left,
                                                right=right,
                                                new_population_name=action.new_population_name)
        if print_stats:
            new_pop_name = action.new_population_name or f"{action.method}_{left.population_name}_{right.population_name}"
            new_pop = self.filegroup.get_population(population_name=new_pop_name)
            print(f"------ {action.action_name} ------")
            parent_n = self.filegroup.get_population(left.parent).n
            print(f"Parent ({left.parent}) n: {parent_n}")
            print(f"Left pop ({left.population_name}) n: {left.n}; {left.n / parent_n * 100}%")
            print(f"Right pop ({right.population_name}) n: {right.n}; {right.n / parent_n * 100}%")
            print(f"New population n: {new_pop.n}; {new_pop.n / parent_n * 100}%")
            print("-----------------------------------")
        if add_to_strategy:
            self.actions.append(action)

    def delete_gate(self,
                    gate_name: str):
        """
        Remove a gate from this GatingStrategy. Note: populations generated from this
        gate will not be deleted. These populations must be deleted separately by calling
        the 'delete_population' method.

        Parameters
        ----------
        gate_name: str
            Name of the gate for removal
        Returns
        -------
        None
        """
        self.gates = [g for g in self.gates if g.gate_name != gate_name]

    def delete_populations(self,
                           populations: str or list):
        """
        Delete given populations. Populations downstream from delete population(s) will
        also be removed.

        Parameters
        ----------
        populations: list or str
            Either a list of populations (list of strings) to remove or a single population as a string.
            If a value of "all" is given, all populations are dropped.

        Returns
        -------
        None
        """
        self.filegroup.delete_populations(populations=populations)

    def plot_gate(self,
                  gate: str,
                  create_plot_kwargs: dict or None = None,
                  **kwargs):
        """
        Plot a gate. Must provide the name of a Gate currently associated to this GatingStrategy.
        This will plot the parent population this gate acts on along with the geometries
        that define the child populations the gate generates.

        Parameters
        ----------
        gate: str or Gate or EllipseGate or ThresholdGate or PolygonGate
        create_plot_kwargs: dict
            Keyword arguments for CreatePlot object. See CytoPy.plotting.CreatePlot for details.
        kwargs:
            Keyword arguments for plot_gate call.
            See CytoPy.plotting.CreatePlot.plot_population_geom for details.

        Returns
        -------
        Matplotlib.Axes
        """
        create_plot_kwargs = create_plot_kwargs or {}
        assert isinstance(gate, str), "Provide the name of an existing Gate in this GatingStrategy"
        assert gate in self.list_gates(), \
            f"Gate {gate} not recognised. Have you applied it and added it to the strategy?"
        gate = self.get_gate(gate=gate)
        parent = self.filegroup.load_population_df(population=gate.parent,
                                                   transform=None,
                                                   label_downstream_affiliations=False)
        plotting = CreatePlot(**create_plot_kwargs)
        return plotting.plot_population_geoms(parent=parent,
                                              children=[self.filegroup.get_population(c.name)
                                                        for c in gate.children],
                                              **kwargs)

    def plot_backgate(self,
                      parent: str,
                      overlay: list,
                      x: str,
                      y: str or None = None,
                      create_plot_kwargs: dict or None = None,
                      **backgate_kwargs):
        """
        Given some population as the backdrop (parent) and a list of one or more
        populations that occur downstream of the parent (overlay), plot the downstream
        populations as scatter plots over the top of the parent.

        Parameters
        ----------
        parent: str
        overlay: list
        x: str
        y: str
        create_plot_kwargs
            Additional keyword arguments passed to CytoPy.flow.plotting.CreatePlot
        backgate_kwargs
            Additional keyword arguments passed to CytoPy.flow.plotting.CreatePlot.backgate

        Returns
        -------
        Matplotlib.Axes
        """
        assert parent in self.list_populations(), "Parent population does not exist"
        assert all([x in self.list_populations() for x in overlay]), "One or more given populations could not be found"
        downstream = self.filegroup.list_downstream_populations(population=parent)
        assert all([x in downstream for x in overlay]), \
            "One or more of the given populations is not downstream of the given parent"
        plotting = CreatePlot(**create_plot_kwargs)
        parent = self.filegroup.load_population_df(population=parent,
                                                   transform=None,
                                                   label_downstream_affiliations=False)
        children = {x: self.filegroup.load_population_df(population=x,
                                                         transform=None,
                                                         label_downstream_affiliations=False)
                    for x in overlay}
        return plotting.backgate(parent=parent,
                                 children=children,
                                 x=x,
                                 y=y,
                                 **backgate_kwargs)

    def plot_population(self,
                        population: str,
                        x: str,
                        y: str or None = None,
                        transform_x: str or None = "logicle",
                        transform_y: str or None = "logicle",
                        create_plot_kwargs: dict or None = None,
                        **plot_kwargs):
        """
        Plot an existing population in the associate FileGroup.

        Parameters
        ----------
        population: str
        x: str
        y: str (optional)
        transform_x: str (optional; default="logicle")
        transform_y: str (optional; default="logicle")
        create_plot_kwargs:
            Additional keyword arguments passed to CytoPy.flow.plotting.CreatePlot
        plot_kwargs
            Additional keyword arguments passed to CytoPy.flow.plotting.CreatePlot.plot

        Returns
        -------
        Matplotlib.Axes
        """
        assert population in self.list_populations(), f"{population} does not exist"
        data = self.filegroup.load_population_df(population=population,
                                                 transform=None,
                                                 label_downstream_affiliations=False)
        create_plot_kwargs = create_plot_kwargs or {}
        plotting = CreatePlot(transform_x=transform_x,
                              transform_y=transform_y,
                              **create_plot_kwargs)
        return plotting.plot(data=data, x=x, y=y, **plot_kwargs)

    def print_population_tree(self, **kwargs):
        """
        Print the population tree to stdout.
        Wraps CytoPy.data.fcs.FileGroup.print_population_tree

        Parameters
        ----------
        kwargs
            See keyword arguments for CytoPy.data.fcs.FileGroup.print_population_tree

        Returns
        -------
        None
        """
        self.filegroup.print_population_tree(**kwargs)

    def edit_gate(self,
                  gate_name: str,
                  x_threshold: float or None = None,
                  y_threshold: float or None = None,
                  x_values: list or None = None,
                  y_values: list or None = None):
        """
        Edit an existing gate (i.e. the polygon or threshold shape that generates the resulting
        populations). The altered geometry will be applied to the parent population resulting
        this gate acts upon, resulting in new data. Populations downstream of this edit will
        also be effected but gates will not adapt dynamically, instead the static results of
        gating algorithms will still apply, but to a new dataset. For this reason, gates
        should be checked (similar to the effects of moving a gate in FlowJo).

        Parameters
        ----------
        gate_name: str
        x_threshold: float (optional)
            Required for threshold geometries
        y_threshold: float (optional)
        Required for 2D threshold geometries
        x_values: list
            Required for Polygon geometries
        y_values: list
            Required for Polygon geometries
        Returns
        -------
        None
        """
        gate = self.get_gate(gate=gate_name)
        err = "Cannot edit a gate that has not been applied; gate children not present in population " \
              "tree."
        assert all([x in self.filegroup.tree.keys() for x in [c.name for c in gate.children]]), err
        transforms = [gate.transformations.get(x, None) for x in ["x", "y"]]
        transforms = {k: v for k, v in zip([gate.x, gate.y], transforms) if k is not None}
        parent = self.filegroup.load_population_df(population=gate.parent,
                                                   transform=transforms)
        for child in gate.children:
            pop = self.filegroup.get_population(population_name=child.name)
            if isinstance(pop.geom, ThresholdGeom):
                assert x_threshold is not None, "For threshold geometry, please provide x_threshold"
                if pop.geom.y_threshold is not None:
                    assert y_threshold is not None, "For 2D threshold geometry, please provide y_threshold"
                update_threshold(population=pop,
                                 parent_data=parent,
                                 x_threshold=x_threshold,
                                 y_threshold=y_threshold)
            elif isinstance(pop.geom, PolygonGeom):
                assert x_values is not None and y_values is not None, \
                    "For polygon gate please provide x_values and y_values"
                update_polygon(population=pop,
                               parent_data=parent,
                               x_values=x_values,
                               y_values=y_values)
            self._edit_downstream_effects(population_name=child.name)

    def _edit_downstream_effects(self,
                                 population_name: str):
        """
        Echos the downstream effects of an edited gate by iterating over the Population
        dependencies and reapplying their geometries to the modified data. Should be
        called after 'edit_population'.

        Parameters
        ----------
        population_name: str

        Returns
        -------
        None
        """
        downstream_populations = self.filegroup.list_downstream_populations(population=population_name)
        for pop in downstream_populations:
            pop = self.filegroup.get_population(pop)
            transforms = {k: v for k, v in zip([pop.geom.x, pop.geom.y],
                                               [pop.geom.transform_x, pop.geom.transform_y])
                          if k is not None}
            parent = self.filegroup.load_population_df(population=pop.parent,
                                                       transform=transforms)
            if isinstance(pop.geom, ThresholdGeom):
                update_threshold(population=pop,
                                 parent_data=parent,
                                 x_threshold=pop.geom.x_threshold,
                                 y_threshold=pop.geom.y_threshold)
            elif isinstance(pop.geom, PolygonGeom):
                update_polygon(population=pop,
                               parent_data=parent,
                               x_values=pop.geom.x_values,
                               y_values=pop.geom.y_values)

    def _control_gate(self,
                      gate: Gate or ThresholdGate or PolygonGate or EllipseGate):
        """
        Internal method for applying a gate using control data. Will first attempt to fetch the parent
        population for the control data (see CytoPy.data.fcs.FileGroup.load_ctrl_population_df)
        and then will fit the gate to this data. The resulting gate will be applied statically to
        the parent population from the primary data.

        Parameters
        ----------
        gate: Gate or ThresholdGate or PolygonGate or EllipseGate

        Returns
        -------
        list
            List of Populations
        """
        assert gate.ctrl in self.filegroup.controls, f"FileGroup does not have data for {gate.ctrl}"
        ctrl_parent_data = self.filegroup.load_ctrl_population_df(ctrl=gate.ctrl,
                                                                  population=gate.parent,
                                                                  transform=None)
        # Fit control data
        populations = gate.fit_predict(data=ctrl_parent_data)
        updated_children = list()
        for p in populations:
            eq_child = [c for c in gate.children if c.name == p.population_name]
            assert len(eq_child) == 1, "Invalid gate. Estimated populations do not match children."
            eq_child = eq_child[0]
            eq_child.geom = p.geom
            updated_children.append(eq_child)
        gate.children = updated_children
        # Predict original data
        parent_data = self.filegroup.load_population_df(population=gate.parent,
                                                        transform=None,
                                                        label_downstream_affiliations=False)
        return gate.fit_predict(data=parent_data)

    def save(self,
             save_strategy: bool = True,
             save_filegroup: bool = True,
             *args,
             **kwargs):
        """
        Save GatingStrategy and the populations generated for the associated
        FileGroup.

        Parameters
        ----------
        save_filegroup: bool (default=True)
        save_strategy: bool (default=True)
        args:
            Positional arguments for mongoengine.document.save call
        kwargs:
            Keyword arguments for mongoengine.document.save call

        Returns
        -------
        None
        """
        if save_strategy:
            for g in self.gates:
                g.save()
            super().save(*args, **kwargs)
        if save_filegroup:
            if self.name not in self.filegroup.gating_strategy:
                self.filegroup.gating_strategy.append(self.name)
            if self.filegroup is not None:
                self.filegroup.save()

    def delete(self,
               delete_gates: bool = True,
               remove_associations: bool = True,
               *args, **kwargs):
        """
        Delete gating strategy. If delete_gates is True, then associated Gate objects will
        also be deleted. If remove_associations is True, then populations generated from
        this gating strategy will also be deleted.

        Parameters
        ----------
        delete_gates: bool (default=True)
        remove_associations: (default=True)
        args:
            Positional arguments for mongoengine.document.delete call
        kwargs:
            Keyword arguments for mongoengine.document.delete call

        Returns
        -------

        """
        super().delete(*args, **kwargs)
        populations = [[c.name for c in g.children] for g in self.gates]
        populations = list(set([x for sl in populations for x in sl]))
        if delete_gates:
            self.print("Deleting gates...")
            for g in self.gates:
                g.delete()
        if remove_associations:
            self.print("Deleting associated populations in FileGroups...")
            for f in progress_bar(FileGroup.objects(), verbose=self.verbose):
                if self.name in f.gating_strategy:
                    f.gating_strategy = [gs for gs in f.gating_strategy if gs != self.name]
                    f.delete_populations(populations=populations)
                    f.save()
        self.print(f"{self.name} successfully deleted.")
Example #11
0
class Population(mongoengine.EmbeddedDocument):
    """
    A population of cells identified by either a gate or supervised algorithm. Stores the
    index of events corresponding to a single population, where the index relates back
    to the primary data in the FileGroup in which a population is embedded.

    Populations also store Clusters generated from high dimensional clustering algorithms
    such as FlowSOM or PhenoGraph. These clusters are derived from this population.

    Parameters
    ----------
    population_name: str, required
        name of population
    n: int
        number of events associated to this population
    parent: str, required, (default: "root")
        name of parent population
    prop_of_parent: float, required
        proportion of events as a percentage of parent population
    prop_of_total: float, required
        proportion of events as a percentage of all events
    warnings: list, optional
        list of warnings associated to population
    geom: PopulationGeometry
        PopulationGeometry (see CytoPy.data.geometry) that defines the gate that
        captures this population.
    clusters: EmbeddedDocListField
        list of associated Cluster documents
    definition: str
        relevant for populations generated by a ThresholdGate; defines the source of this
        population e.g. "+" for a 1D threshold or "+-" for a 2D threshold
    index: Numpy.Array
        numpy array storing index of events that belong to population
    signature: dict
        average of a population feature space (median of each channel); used to match
        children to newly identified populations for annotating
    """
    population_name = mongoengine.StringField()
    n = mongoengine.IntField()
    parent = mongoengine.StringField(required=True, default='root')
    prop_of_parent = mongoengine.FloatField()
    prop_of_total = mongoengine.FloatField()
    warnings = mongoengine.ListField()
    geom = mongoengine.EmbeddedDocumentField(PopulationGeometry)
    clusters = mongoengine.EmbeddedDocumentListField(Cluster)
    definition = mongoengine.StringField()
    signature = mongoengine.DictField()

    def __init__(self, *args, **kwargs):
        # If the Population existed previously, fetched the index
        self._index = kwargs.pop("index", None)
        self._ctrl_index = kwargs.pop("ctrl_index", dict())
        super().__init__(*args, **kwargs)

    @property
    def index(self):
        return self._index

    @index.setter
    def index(self, idx: np.array):
        assert isinstance(idx, np.ndarray), "idx should be type numpy.array"
        self.n = len(idx)
        self._index = np.array(idx)

    @property
    def ctrl_index(self):
        return self._ctrl_index

    def set_ctrl_index(self, **kwargs):
        for k, v in kwargs.items():
            assert isinstance(v, np.ndarray), "ctrl_idx should be type numpy.array"
            self._ctrl_index[k] = v

    def add_cluster(self,
                    cluster: Cluster):
        """
        Add a new cluster generated from CytoPy.flow.clustering.main.Clustering.

        Parameters
        ----------
        cluster: Cluster

        Returns
        -------
        None
        """
        _id, tag = cluster.cluster_id, cluster.tag
        err = f"Cluster already exists with id: {_id}; tag: {tag}"
        assert not any([x.cluster_id == _id and x.tag == tag for x in self.clusters]), err
        self.clusters.append(cluster)

    def delete_cluster(self,
                       cluster_id: str or None = None,
                       tag: str or None = None,
                       meta_label: str or None = None):
        """
        Delete cluster using either cluster ID, tag, or meta label

        Parameters
        ----------
        cluster_id: str
        tag: str
        meta_label: str

        Returns
        -------
        None
        """
        err = "Must provide either cluster_id, tag or meta_label"
        assert sum([x is not None for x in [cluster_id, tag, meta_label]]) == 1, err
        if cluster_id:
            self.clusters = [c for c in self.clusters if c.cluster_id != cluster_id]
        elif tag:
            self.clusters = [c for c in self.clusters if c.tag != tag]
        elif meta_label:
            self.clusters = [c for c in self.clusters if c.meta_label != meta_label]

    def delete_all_clusters(self,
                            clusters: list or str = "all"):
        """
        Provide either a list of cluster IDs for deletion or give value of "all"
        to delete all clusters.

        Parameters
        ----------
        clusters: list or str (default="all")

        Returns
        -------
        None
        """
        if isinstance(clusters, list):
            self.clusters = [c for c in self.clusters if c.cluster_id not in clusters]
        else:
            self.clusters = []

    def list_clusters(self,
                      tag: str or None = None,
                      meta_label: str or None = None) -> List[str]:
        """
        List cluster IDs associated to a given tag or meta label

        Parameters
        ----------
        tag: str
        meta_label: str

        Returns
        -------
        List
        """
        if tag:
            return [c.cluster_id for c in self.clusters if c.tag == tag]
        elif meta_label:
            return [c.cluster_id for c in self.clusters if c.meta_label == meta_label]
        else:
            return [c.cluster_id for c in self.clusters]

    def get_clusters(self,
                     cluster_id: list or None = None,
                     tag: str or None = None,
                     meta_label: str or None = None) -> List[Cluster]:
        """
        Returns list of cluster objects by either cluster IDs, tag or meta label

        Parameters
        ----------
        cluster_id: list
        tag: str
        meta_label: str

        Returns
        -------
        list
        """
        err = "Provide list of cluster IDs and/or tag and/or meta_label"
        assert len(sum([x is not None for x in [tag, meta_label]])) > 0, err
        clusters = self.clusters
        if cluster_id:
            clusters = [c for c in clusters if c.cluster_id in cluster_id]
        if tag:
            clusters = [c for c in clusters if c.tag in tag]
        if meta_label:
            clusters = [c for c in clusters if c.meta_label in meta_label]
        return clusters
Example #12
0
class Machine(OwnershipMixin, me.Document):
    """The basic machine model"""

    id = me.StringField(primary_key=True, default=lambda: uuid.uuid4().hex)

    cloud = me.ReferenceField('Cloud', required=True)
    owner = me.ReferenceField('Organization', required=True)
    location = me.ReferenceField('CloudLocation', required=False)
    size = me.ReferenceField('CloudSize', required=False)
    network = me.ReferenceField('Network', required=False)
    subnet = me.ReferenceField('Subnet', required=False)
    name = me.StringField()

    # Info gathered mostly by libcloud (or in some cases user input).
    # Be more specific about what this is.
    # We should perhaps come up with a better name.
    machine_id = me.StringField(required=True)
    hostname = me.StringField()
    public_ips = me.ListField()
    private_ips = me.ListField()
    ssh_port = me.IntField(default=22)
    OS_TYPES = ('windows', 'coreos', 'freebsd', 'linux', 'unix')
    os_type = me.StringField(default='unix', choices=OS_TYPES)
    rdp_port = me.IntField(default=3389)
    actions = me.EmbeddedDocumentField(Actions, default=lambda: Actions())
    extra = me.DictField()
    cost = me.EmbeddedDocumentField(Cost, default=lambda: Cost())
    image_id = me.StringField()
    # libcloud.compute.types.NodeState
    state = me.StringField(default='unknown',
                           choices=('running', 'starting', 'rebooting',
                                    'terminated', 'pending', 'unknown',
                                    'stopping', 'stopped', 'suspended',
                                    'error', 'paused', 'reconfiguring'))
    machine_type = me.StringField(default='machine',
                                  choices=('machine', 'vm', 'container',
                                           'hypervisor', 'container-host'))
    parent = me.ReferenceField('Machine', required=False)

    # We should think this through a bit.
    key_associations = me.EmbeddedDocumentListField(KeyAssociation)

    last_seen = me.DateTimeField()
    missing_since = me.DateTimeField()
    unreachable_since = me.DateTimeField()
    created = me.DateTimeField()

    monitoring = me.EmbeddedDocumentField(Monitoring,
                                          default=lambda: Monitoring())

    ssh_probe = me.EmbeddedDocumentField(SSHProbe, required=False)
    ping_probe = me.EmbeddedDocumentField(PingProbe, required=False)

    # Number of vCPUs gathered from various sources. This field is meant to
    # be updated ONLY by the mist.api.metering.tasks:find_machine_cores task.
    cores = me.IntField()

    meta = {
        'collection':
        'machines',
        'indexes': [{
            'fields': ['cloud', 'machine_id'],
            'sparse': False,
            'unique': True,
            'cls': False,
        }, {
            'fields': ['monitoring.installation_status.activated_at'],
            'sparse': True,
            'unique': False
        }],
        'strict':
        False,
    }

    def __init__(self, *args, **kwargs):
        super(Machine, self).__init__(*args, **kwargs)
        self.ctl = MachineController(self)

    def clean(self):
        # Remove any KeyAssociation, whose `keypair` has been deleted. Do NOT
        # perform an atomic update on self, but rather remove items from the
        # self.key_associations list by iterating over it and popping matched
        # embedded documents in order to ensure that the most recent list is
        # always processed and saved.
        for ka in reversed(range(len(self.key_associations))):
            if self.key_associations[ka].keypair.deleted:
                self.key_associations.pop(ka)
        # Populate owner field based on self.cloud.owner
        if not self.owner:
            self.owner = self.cloud.owner
        self.clean_os_type()
        if self.monitoring.method not in config.MONITORING_METHODS:
            self.monitoring.method = config.DEFAULT_MONITORING_METHOD

    def clean_os_type(self):
        """Clean self.os_type"""
        if self.os_type not in self.OS_TYPES:
            for os_type in self.OS_TYPES:
                if self.os_type.lower() == os_type:
                    self.os_type = os_type
                    break
            else:
                self.os_type = 'unix'

    def delete(self):
        super(Machine, self).delete()
        mist.api.tag.models.Tag.objects(resource=self).delete()
        try:
            self.owner.mapper.remove(self)
        except (AttributeError, me.DoesNotExist) as exc:
            log.error(exc)
        try:
            if self.owned_by:
                self.owned_by.get_ownership_mapper(self.owner).remove(self)
        except (AttributeError, me.DoesNotExist) as exc:
            log.error(exc)

    def as_dict(self):
        # Return a dict as it will be returned to the API

        # tags as a list return for the ui
        tags = {
            tag.key: tag.value
            for tag in mist.api.tag.models.Tag.objects(
                resource=self).only('key', 'value')
        }
        # Optimize tags data structure for js...
        if isinstance(tags, dict):
            tags = [{
                'key': key,
                'value': value
            } for key, value in tags.iteritems()]
        return {
            'id':
            self.id,
            'hostname':
            self.hostname,
            'public_ips':
            self.public_ips,
            'private_ips':
            self.private_ips,
            'name':
            self.name,
            'ssh_port':
            self.ssh_port,
            'os_type':
            self.os_type,
            'rdp_port':
            self.rdp_port,
            'machine_id':
            self.machine_id,
            'actions':
            {action: self.actions[action]
             for action in self.actions},
            'extra':
            self.extra,
            'cost':
            self.cost.as_dict(),
            'image_id':
            self.image_id,
            'state':
            self.state,
            'tags':
            tags,
            'monitoring':
            self.monitoring.as_dict()
            if self.monitoring and self.monitoring.hasmonitoring else '',
            'key_associations': [ka.as_dict() for ka in self.key_associations],
            'cloud':
            self.cloud.id,
            'location':
            self.location.id if self.location else '',
            'size':
            self.size.name if self.size else '',
            'cloud_title':
            self.cloud.title,
            'last_seen':
            str(self.last_seen.replace(tzinfo=None) if self.last_seen else ''),
            'missing_since':
            str(
                self.missing_since.replace(
                    tzinfo=None) if self.missing_since else ''),
            'unreachable_since':
            str(
                self.unreachable_since.replace(
                    tzinfo=None) if self.unreachable_since else ''),
            'created':
            str(self.created.replace(tzinfo=None) if self.created else ''),
            'machine_type':
            self.machine_type,
            'parent_id':
            self.parent.id if self.parent is not None else '',
            'probe': {
                'ping': (self.ping_probe.as_dict() if self.ping_probe
                         is not None else PingProbe().as_dict()),
                'ssh': (self.ssh_probe.as_dict() if self.ssh_probe is not None
                        else SSHProbe().as_dict()),
            },
            'cores':
            self.cores,
            'network':
            self.network.id if self.network else '',
            'subnet':
            self.subnet.id if self.subnet else '',
            'owned_by':
            self.owned_by.id if self.owned_by else '',
            'created_by':
            self.created_by.id if self.created_by else '',
        }

    def __str__(self):
        return 'Machine %s (%s) in %s' % (self.name, self.id, self.cloud)
Example #13
0
class PollingSchedule(ShardedScheduleMixin, me.Document):

    meta = {
        'allow_inheritance': True,
        'strict': False,
        'indexes': ['shard_id', 'shard_update_at']
    }

    # We use a unique name for easy identification and to avoid running the
    # same schedule twice. The name is autopopulated during the invocation of
    # the `clean` method.
    name = me.StringField(unique=True)

    # The following fields are defined in celerybeatmongo.models.PeriodicTask.
    # Here, we define no fields in the base class, and expect subclasses to
    # either define their fields, or simply use properties.
    # task = me.StringField(required=True)
    # args = me.ListField()
    # kwargs = me.DictField()

    # Scheduling information. Don't edit them directly, just use the model
    # methods.
    default_interval = me.EmbeddedDocumentField(
        PollingInterval, required=True, default=PollingInterval(every=0))
    override_intervals = me.EmbeddedDocumentListField(PollingInterval)

    # Optional arguments.
    queue = me.StringField()
    exchange = me.StringField()
    routing_key = me.StringField()
    soft_time_limit = me.IntField()

    # Used internally by the scheduler.
    last_run_at = me.DateTimeField()
    total_run_count = me.IntField(min_value=0)
    run_immediately = me.BooleanField()

    def get_name(self):
        """Construct name based on self.task"""
        try:
            return self.task.split('.')[-1]
        except NotImplementedError:
            return '%s: No task specified.' % self.__class__.__name__

    def clean(self):
        """Automatically set value of name"""
        self.name = self.get_name()

    @property
    def task(self):
        """Return task name for this schedule

        Subclasses should define an attribute, property or field to do this.
        """
        raise NotImplementedError()

    @property
    def args(self):
        """Return task args for this schedule"""
        return [str(self.id)]

    @property
    def kwargs(self):
        """Return task kwargs for this schedule"""
        return {}

    @property
    def enabled(self):
        """Whether this task is currently enabled or not"""
        return bool(self.interval.timedelta)

    @property
    def interval(self):
        """Merge multiple intervals into one

        Returns a dynamic PollingInterval, with the highest frequency of any
        override schedule or the default schedule.

        """
        interval = self.default_interval
        for i in self.override_intervals:
            if not i.expired():
                if not interval.timedelta or i.timedelta < interval.timedelta:
                    interval = i
        return interval

    @property
    def schedule(self):
        """Return a celery schedule instance

        This is used internally by celerybeatmongo scheduler
        """
        return celery.schedules.schedule(self.interval.timedelta)

    @property
    def expires(self):
        return None

    def add_interval(self, interval, ttl=300, name=''):
        """Add an override schedule to the scheduled task

        Override schedules must define an interval in seconds, as well as a
        TTL (time to live), also in seconds. Override schedules cannot be
        removed, so short TTL's should be used. You can however add a new
        override schedule again, thus practically extending the time where an
        override is in effect.

        Override schedules can only increase, not decrease frequency of the
        schedule, in relation to that define in the `default_interval`.
        """
        assert isinstance(interval, int) and interval > 0
        assert isinstance(ttl, int) and 0 < ttl < 3600
        expires = datetime.datetime.now() + datetime.timedelta(seconds=ttl)
        self.override_intervals.append(
            PollingInterval(name=name, expires=expires, every=interval))

    def cleanup_expired_intervals(self):
        """Remove override schedules that have expired"""
        self.override_intervals = [
            override for override in self.override_intervals
            if not override.expired()
        ]

    def set_default_interval(self, interval):
        """Set default interval

        This is the interval used for this schedule, if there is no active
        override schedule with a smaller interval. The default interval never
        expires. To disable a task, simply set `enabled` equal to False.
        """
        self.default_interval = PollingInterval(name='default', every=interval)

    def __unicode__(self):
        return "%s %s" % (self.get_name(), self.interval or '(no interval)')
Example #14
0
class Orders(mongoengine.EmbeddedDocument):
    customer = mongoengine.StringField(required=True)
    total = mongoengine.FloatField(required=True)
    items = mongoengine.EmbeddedDocumentListField(Item, default=[])
    address = mongoengine.StringField(required=True)
Example #15
0
class Panel(mongoengine.Document):
    """
    Document representation of channel/marker definition for an experiment. A panel, once associated to an experiment
    will standardise data upon input; when an fcs file is created in the database, it will be associated to
    an experiment and the channel/marker definitions in the fcs file will be mapped to the associated panel.

    Attributes
    -----------
    panel_name: str, required
        unique identifier for the panel
    markers: EmbeddedDocListField
        list of marker names; see NormalisedName
    channels: EmbeddedDocListField
        list of channels; see NormalisedName
    mappings: EmbeddedDocListField
        list of channel/marker mappings; see ChannelMap
    initiation_date: DateTime
        date of creationfiles['controls']

    """
    panel_name = mongoengine.StringField(required=True, unique=True)
    markers = mongoengine.EmbeddedDocumentListField(NormalisedName)
    channels = mongoengine.EmbeddedDocumentListField(NormalisedName)
    mappings = mongoengine.EmbeddedDocumentListField(ChannelMap)
    initiation_date = mongoengine.DateTimeField(default=datetime.now)
    meta = {
        'db_alias': 'core',
        'collection': 'fcs_panels'
    }

    def create_from_excel(self, path: str) -> None:
        """
        Populate panel attributes from an excel template

        Parameters
        ----------
        path: str
            path of file

        Returns
        --------
        None
        """
        assert os.path.isfile(path), f'Error: no such file {path}'
        nomenclature, mappings = check_excel_template(path)
        for col_name, attr in zip(['channel', 'marker'], [self.channels, self.markers]):
            for name in mappings[col_name]:
                if not pd.isnull(name):
                    d = nomenclature[nomenclature['name'] == name].fillna('').to_dict(orient='list')
                    attr.append(NormalisedName(standard=d['name'][0],
                                               regex_str=d['regex'][0],
                                               case_sensitive=d['case'][0],
                                               permutations=d['permutations'][0]))
        mappings = mappings.fillna('').to_dict(orient='list')
        self.mappings = [ChannelMap(channel=c, marker=m)
                         for c, m in zip(mappings['channel'], mappings['marker'])]

    def create_from_dict(self, x: dict):
        """
        Populate panel attributes from a python dictionary

        Parameters
        ----------
        x: dict
            dictionary object containing panel definition

        Returns
        --------
        None
        """

        # Check validity of input dictionary
        err = 'Invalid template dictionary; must be a nested dictionary with parent keys: channels, markers'
        assert all([k in ['channels', 'markers', 'mappings'] for k in x.keys()]), err
        err = f'Invalid template dictionary; nested dictionaries must contain keys: name, regex case, ' \
              f'and permutations'
        for k in ['channels', 'markers']:
            assert all([i.keys() == ['name', 'regex', 'case', 'permutations'] for i in x[k]]), err

        assert type(x['mappings']) == list, 'Invalid template dictionary; mappings must be a list of tuples'
        err = 'Invalid template dictionary; mappings must be a list of tuples'
        assert all([type(k) != tuple for k in x['mappings']]), err
        self.markers = [NormalisedName(standard=k['name'],
                                       regex_str=k['regex'],
                                       case_sensitive=k['case'],
                                       permutations=k['permutations'])
                        for k in x['markers']]
        self.channels = [NormalisedName(standard=k['name'],
                                        regex_str=k['regex'],
                                        case_sensitive=k['case'],
                                        permutations=k['permutations'])
                         for k in x['channels']]
        self.mappings = [ChannelMap(channel=c, marker=m) for c, m in x['mappings']]

    def get_channels(self) -> iter:
        """
        Yields list of channels associated to panel

        Returns
        -------
        Generator
        """
        for cm in self.mappings:
            yield cm.channel

    def get_markers(self) -> iter:
        """
        Yields list of channels associated to panel

        Returns
        -------
        Generator
        """
        for cm in self.mappings:
            yield cm.marker
Example #16
0
class Cases(mongoengine.Document):
    _id = mongoengine.IntField(required=True)
    cases = mongoengine.EmbeddedDocumentListField(Case, default=[])
    meta = {'db_alias': 'core', 'collection': 'cases'}
Example #17
0
class Entry(me.Document):

    user = me.ReferenceField('User',
                             reverse_delete_rule=me.CASCADE,
                             required=True)
    date = me.DateTimeField(required=True, default=datetime.datetime.now)
    daytime = me.StringField()

    notes = me.StringField(max_length=500)

    stats = me.EmbeddedDocumentField('EntryStats')
    pain_subentries = me.EmbeddedDocumentListField('PainSubEntry')

    # Classes to be used for a feature implemented at a later date.
    mood_subentry = me.EmbeddedDocumentField('MoodSubEntry')
    medication_subentry = me.EmbeddedDocumentField('MedicationSubEntry')
    activity_subentry = me.EmbeddedDocumentField('ActivitySubEntry')

    def __repr__(self):
        return json.dumps(self.serialize(), sort_keys=True, indent=4)

    def serialize(self, comparisons=None, detail_level='high'):
        serialized = {
            'id': str(self.id),
            'date': self.date,
            'daytime': self.daytime
        }

        if comparisons: serialized['comparisons'] = comparisons

        if detail_level == 'medium' or 'high':

            # Create the stats object if it does not exist.
            if self.stats is not None:
                stats = self.stats
            else:
                stats = EntryStats()
                stats.update(self.pain_subentries)
                self.stats = stats
                self.save()

            pain_serialized = []
            for subentry in self.pain_subentries:
                pain_serialized.append(subentry.serialize(detail_level))

            serialized.update({
                'pain_subentries': pain_serialized,
                'notes': self.notes,
                'stats': stats.serialize(),
            })

        return serialized

    # Given stats for an entry, creates an entry stats object and saves it to
    # the entry object.
    def create_stats(self, high, low, total, num_pain_subentries):
        if num_pain_subentries <= 0:
            return

        # Create the stats object if it doesn't exist.
        if self.stats is not None:
            stats = self.stats
        else:
            stats = EntryStats()

        stats.high = high
        stats.low = low
        stats.avg = total / num_pain_subentries
        stats.num_body_parts = num_pain_subentries

        self.stats = stats
        self.save()
Example #18
0
class User(Document):
    """Test schema."""

    name = db.StringField()
    email = db.EmailField()
    address = db.EmbeddedDocumentListField(Address)
Example #19
0
class Commit(mongoengine.Document):
    """
    *Concrete* class representing a version control system commit.
    """
    vcs_hash = mongoengine.StringField()
    executables = mongoengine.EmbeddedDocumentListField(Executable)
Example #20
0
class FileGroup(mongoengine.Document):
    """
    Document representation of a file group; a selection of related fcs files (e.g. a sample and it's associated
    controls)

    Parameters
    ----------
    primary_id: str, required
        Unique ID to associate to group
    files: EmbeddedDocList
        List of File objects
    flags: str, optional
        Warnings associated to file group
    notes: str, optional
        Additional free text
    populations: EmbeddedDocList
        Populations derived from this file group
    gates: EmbeddedDocList
        Gate objects that have been applied to this file group
    collection_datetime: DateTime, optional
        Date and time of sample collection
    processing_datetime: DateTime, optional
        Date and time of sample processing
    """
    primary_id = mongoengine.StringField(required=True)
    data_directory = mongoengine.StringField(required=True)
    controls = mongoengine.ListField()
    compensated = mongoengine.BooleanField(default=False)
    collection_datetime = mongoengine.DateTimeField(required=False)
    processing_datetime = mongoengine.DateTimeField(required=False)
    populations = mongoengine.EmbeddedDocumentListField(Population)
    gating_strategy = mongoengine.ListField()
    valid = mongoengine.BooleanField(default=True)
    notes = mongoengine.StringField(required=False)
    meta = {'db_alias': 'core', 'collection': 'fcs_files'}

    def __init__(self, *args, **values):
        data = values.pop("data", None)
        channels = values.pop("channels", None)
        markers = values.pop("markers", None)
        self.columns_default = values.pop("columns_default", "markers")
        assert self.columns_default in ["markers", "channels"], \
            "columns_default must be one of: 'markers', 'channels'"
        super().__init__(*args, **values)
        self.cell_meta_labels = {}
        if data is not None:
            assert not self.id, "This FileGroup has already been defined"
            assert channels is not None, "Must provide channels to create new FileGroup"
            assert markers is not None, "Must provide markers to create new FileGroup"
            self.save()
            self.h5path = os.path.join(self.data_directory,
                                       f"{self.id.__str__()}.hdf5")
            self._init_new_file(data=data, channels=channels, markers=markers)
        else:
            assert self.id is not None, "FileGroup has not been previously defined. Please provide primary data."
            self.h5path = os.path.join(self.data_directory,
                                       f"{self.id.__str__()}.hdf5")
            try:
                self._load_populations()
                self.tree = construct_tree(populations=self.populations)
            except AssertionError as err:
                warn(f"Failed to load data for {self.primary_id} ({self.id}); "
                     f"data may be corrupt or missing; {str(err)}")

    def data(self,
             source: str,
             sample_size: int or float or None = None) -> pd.DataFrame:
        """
        Load the FileGroup dataframe for the desired source file.

        Parameters
        ----------
        source: str
            Name of the file to load from e.g. either "primary" or the name of a control
        sample_size: int or float (optional)
            Sample the DataFrame
        Returns
        -------
        Pandas.DataFrame
        """
        with h5py.File(self.h5path, "r") as f:
            assert source in f.keys(
            ), f"Invalid source, expected one of: {f.keys()}"
            channels = [
                x.decode("utf-8") for x in f[f"mappings/{source}/channels"][:]
            ]
            markers = [
                x.decode("utf-8") for x in f[f"mappings/{source}/markers"][:]
            ]
            data = _column_names(df=pd.DataFrame(f[source][:]),
                                 channels=channels,
                                 markers=markers,
                                 preference=self.columns_default)
        if sample_size is not None:
            return uniform_downsampling(data=data, sample_size=sample_size)
        return data

    def _init_new_file(self, data: np.array, channels: List[str],
                       markers: List[str]):
        """
        Under the assumption that this FileGroup has not been previously defined,
        generate a HDF5 file and initialise the root Population

        Parameters
        ----------
        data: Numpy.Array
        channels: list
        markers: list

        Returns
        -------
        None
        """
        with h5py.File(self.h5path, "w") as f:
            f.create_dataset(name="primary", data=data)
            f.create_group("mappings")
            f.create_group("mappings/primary")
            f.create_dataset("mappings/primary/channels",
                             data=np.array(channels, dtype='S'))
            f.create_dataset("mappings/primary/markers",
                             data=np.array(markers, dtype='S'))
            f.create_group("index")
            f.create_group("index/root")
            f.create_group("clusters")
            f.create_group("clusters/root")
            f.create_group("cell_meta_labels")
        self.populations = [
            Population(population_name="root",
                       index=np.arange(0, data.shape[0]),
                       parent="root",
                       n=data.shape[0])
        ]
        self.tree = {"root": anytree.Node(name="root", parent=None)}
        self.save()

    def add_ctrl_file(self, ctrl_id: str, data: np.array, channels: List[str],
                      markers: List[str]):
        """
        Add a new control file to this FileGroup.

        Parameters
        ----------
        ctrl_id: str
        data: Numpy.Array
        channels: list
        markers: list

        Returns
        -------
        None
        """
        with h5py.File(self.h5path, "a") as f:
            assert ctrl_id not in self.controls, f"Entry for {ctrl_id} already exists"
            f.create_dataset(name=ctrl_id, data=data)
            f.create_group(f"mappings/{ctrl_id}")
            f.create_dataset(f"mappings/{ctrl_id}/channels",
                             data=np.array(channels, dtype='S'))
            f.create_dataset(f"mappings/{ctrl_id}/markers",
                             data=np.array(markers, dtype='S'))
        root = self.get_population(population_name="root")
        root.set_ctrl_index(**{ctrl_id: np.arange(0, data.shape[0])})
        self.controls.append(ctrl_id)
        self.save()

    def _load_populations(self):
        """
        Load indexes for existing populations from HDF5 file. This includes indexes for controls and clusters.

        Returns
        -------
        None
        """
        assert self._hdf5_exists(
        ), f"Could not locate FileGroup HDF5 record {self.h5path}"
        with h5py.File(self.h5path, "r") as f:
            if "cell_meta_labels" in f.keys():
                for meta in f["cell_meta_labels"].keys():
                    self.cell_meta_labels[meta] = f[
                        f"cell_meta_labels/{meta}"][:]
            for pop in self.populations:
                k = f"/index/{pop.population_name}"
                if k + "/primary" not in f.keys():
                    warn(
                        f"Population index missing for {pop.population_name}!")
                else:
                    pop.index = f[k + "/primary"][:]
                    ctrls = [x for x in f[k].keys() if x != "primary"]
                    for c in ctrls:
                        pop.set_ctrl_index(**{c: f[k + f"/{c}"][:]})
                k = f"/clusters/{pop.population_name}"
                for c in pop.clusters:
                    if f"{c.cluster_id}_{c.tag}" not in f[k].keys():
                        warn(
                            f"Cluster index missing for {c.cluster_id}; tag {c.tag} in population {pop.population_name}!"
                        )
                    else:
                        c.index = f[k + f"/{c.cluster_id}_{c.tag}"][:]

    def add_population(self, population: Population):
        """
        Add a new Population to this FileGroup.

        Parameters
        ----------
        population: Population

        Returns
        -------
        None
        """
        err = f"Population with name '{population.population_name}' already exists"
        assert population.population_name not in self.tree.keys(), err
        self.populations.append(population)
        self.tree[population.population_name] = anytree.Node(
            name=population.population_name,
            parent=self.tree.get(population.parent))

    def load_ctrl_population_df(self,
                                ctrl: str,
                                population: str,
                                transform: str or dict or None = "logicle",
                                **kwargs):
        """
        Load the DataFrame for the events pertaining to a single population from a
        control. If the control is absent from this FileGroup it will raise an AssertionError.
        If the population has not been estimated for the given control, it will attempt to
        estimate the population using KNearestNeighbours classifier. See estimated_ctrl_population
        for details.

        Parameters
        ----------
        ctrl: str
            Name of the control sample to load
        population: str
            Name of the desired population
        transform: str or dict (optional)
            If given, transformation method applied to the columns of the DataFrame. If the
            value given is a string, it should be the name of the transform method applied
            to ALL columns. If it is a dictionary, keys should correspond to column names
            and values the transform to apply to said column.
        kwargs
            Additional keyword arguments passed to estimated_ctrl_population

        Returns
        -------

        """
        assert ctrl in self.controls, f"No such control {ctrl} associated to this FileGroup"
        if ctrl not in self.get_population(
                population_name=population).ctrl_index.keys():
            warn(
                f"Population {population} missing for control {ctrl}, will attempt to "
                f"estimate population using KNN")
            self.estimate_ctrl_population(ctrl=ctrl,
                                          population=population,
                                          **kwargs)
        idx = self.get_population(
            population_name=population).ctrl_index.get(ctrl)
        data = self.data(source=ctrl).loc[idx]
        if isinstance(transform, dict):
            data = apply_transform(data=data, features_to_transform=transform)
        elif isinstance(transform, str):
            data = apply_transform(data, transform_method=transform)
        return data

    def estimate_ctrl_population(self,
                                 ctrl: str,
                                 population: str,
                                 verbose: bool = True,
                                 scoring: str = "balanced_accuracy",
                                 **kwargs):
        """
        Estimate a population for a control sample by training a KNearestNeighbors classifier
        on the population in the primary data and using this model to predict membership
        in the control data. If n_neighbors parameter of Scikit-Learns KNearestNeighbors class
        is not given, it will be estimated using grid search cross-validation and optimisation
        of the given scoring parameter. See CytoPy.flow.neighbours for further details.

        Results of the population estimation will be saved to the populations ctrl_index property.

        Parameters
        ----------
        ctrl: str
            Control to estimate population for
        population: str
            Population to estimate
        verbose: bool (default=True)
        scoring: str (default="balanced_accuracy")
        kwargs: dict
            Additional keyword arguments passed to initiate KNearestNeighbors object

        Returns
        -------
        None
        """
        feedback = vprint(verbose=verbose)
        feedback(f"====== Estimating {population} for {ctrl} control ======")
        population = self.get_population(population_name=population)
        if ctrl not in self.get_population(
                population_name=population.parent).ctrl_index.keys():
            feedback(
                f"Control missing parent {population.parent}, will attempt to estimate...."
            )
            self.estimate_ctrl_population(ctrl=ctrl,
                                          population=population.parent,
                                          verbose=verbose,
                                          scoring=scoring,
                                          **kwargs)
            feedback(
                f"{population.parent} estimated, resuming estimation of {population.population_name}...."
            )
        features = [
            x for x in [population.geom.x, population.geom.y] if x is not None
        ]
        transformations = {
            d: transform
            for d, transform in
            zip([population.geom.x, population.geom.y],
                [population.geom.transform_x, population.geom.transform_y])
            if d is not None
        }
        training_data = self.load_population_df(
            population=population.parent,
            transform=transformations,
            label_downstream_affiliations=False).copy()
        training_data["labels"] = 0
        training_data.loc[population.index]["labels"] = 1
        labels = training_data["labels"].values
        n = kwargs.get("n_neighbors", None)
        if n is None:
            feedback("Calculating optimal n_neighbours by grid search CV...")
            n, score = calculate_optimal_neighbours(
                x=training_data[features].values,
                y=labels,
                scoring=scoring,
                **kwargs)
            feedback(
                f"Continuing with n={n}; chosen with balanced accuracy of {round(score, 3)}..."
            )
        # Estimate control population using KNN
        feedback("Training KNN classifier....")
        train_acc, val_acc, model = knn(data=training_data,
                                        features=features,
                                        labels=labels,
                                        n_neighbours=n,
                                        holdout_size=0.2,
                                        random_state=42,
                                        return_model=True,
                                        **kwargs)
        feedback(f"...training balanced accuracy score: {train_acc}")
        feedback(f"...validation balanced accuracy score: {val_acc}")
        feedback(
            f"Predicting {population.population_name} for {ctrl} control...")
        ctrl_data = self.load_ctrl_population_df(
            ctrl=ctrl,
            population=population.parent,
            transform={
                "x": population.geom.transform_x,
                "y": population.geom.transform_y
            },
            label_downstream_affiliations=False)
        ctrl_labels = model.predict(ctrl_data[features].values)
        ctrl_idx = ctrl_data.index.values[np.where(ctrl_labels == 1)]
        population.set_ctrl_index(**{ctrl: ctrl_idx})
        feedback("===============================================")

    def load_population_df(
            self,
            population: str,
            transform: str or dict or None = "logicle",
            label_downstream_affiliations: bool = False) -> pd.DataFrame:
        """
        Load the DataFrame for the events pertaining to a single population.

        Parameters
        ----------
        population: str
            Name of the desired population
        transform: str or dict (optional)
            If given, transformation method applied to the columns of the DataFrame. If the
            value given is a string, it should be the name of the transform method applied
            to ALL columns. If it is a dictionary, keys should correspond to column names
            and values the transform to apply to said column.
        label_downstream_affiliations: bool (default=False)
            If True, an additional column will be generated named "population_label" containing
            the end node membership of each event e.g. if you choose CD4+ population and
            there are subsequent populations belonging to this CD4+ population in a tree
            like: "CD4+ -> CD4+CD25+ -> CD4+CD25+CD45RA+" then the population label column
            will contain the name of the lowest possible "leaf" population that an event is
            assigned too.

        Returns
        -------
        Pandas.DataFrame
        """
        assert population in self.tree.keys(
        ), f"Invalid population, {population} does not exist"
        idx = self.get_population(population_name=population).index
        data = self.data(source="primary").loc[idx]
        if isinstance(transform, dict):
            data = apply_transform(data=data, features_to_transform=transform)
        elif isinstance(transform, str):
            data = apply_transform(data, transform_method=transform)
        if label_downstream_affiliations:
            return self._label_downstream_affiliations(parent=population,
                                                       data=data)
        return data

    def _label_downstream_affiliations(self, parent: str,
                                       data: pd.DataFrame) -> pd.DataFrame:
        """
        An additional column will be generated named "population_label" containing
        the end node membership of each event e.g. if you choose CD4+ population and
        there are subsequent populations belonging to this CD4+ population in a tree
        like: "CD4+ -> CD4+CD25+ -> CD4+CD25+CD45RA+" then the population label column
        will contain the name of the lowest possible "leaf" population that an event is
        assigned too.

        Parameters
        ----------
        parent: str
        data: Pandas.DataFrame

        Returns
        -------
        Pandas.DataFrame
        """

        data["population_label"] = None
        dependencies = self.list_downstream_populations(parent)
        for pop in dependencies:
            idx = self.get_population(pop).index
            data.loc[idx, 'label'] = pop
        data["population_label"].fillna(parent, inplace=True)
        return data

    def _hdf5_exists(self):
        """
        Tests if associated HDF5 file exists.

        Returns
        -------
        bool
        """
        return os.path.isfile(self.h5path)

    def list_gated_controls(self) -> Generator:
        """
        List ID of controls that have a cached index in each population of the saved population tree
        (i.e. they have been gated)

        Returns
        -------
        list
            List of control IDs for gated controls
        """
        for c in self.controls():
            if all([p.get_ctrl(c) is not None for p in self.populations]):
                yield c

    def list_populations(self) -> iter:
        """
        Yields list of population names
        Returns
        -------
        Generator
        """
        for p in self.populations:
            yield p.population_name

    def print_population_tree(self,
                              image: bool = False,
                              path: str or None = None):
        """
        Print population tree to stdout or save as an image if 'image' is True.

        Parameters
        ----------
        image: bool (default=False)
            Save tree as a png image
        path: str (optional)
            File path for image, ignored if 'image' is False.
            Defaults to working directory.

        Returns
        -------
        None
        """
        root = self.tree['root']
        if image:
            from anytree.exporter import DotExporter
            path = path or f'{os.getcwd()}/{self.id}_population_tree.png'
            DotExporter(root).to_picture(path)
        for pre, fill, node in anytree.RenderTree(root):
            print('%s%s' % (pre, node.name))

    def delete_clusters(self,
                        tag: str or None = None,
                        meta_label: str or None = None,
                        drop_all: bool = False):
        """

        Parameters
        ----------
        tag
        meta_label
        drop_all

        Returns
        -------

        """
        if drop_all:
            for p in self.populations:
                p.delete_all_clusters(clusters="all")
        elif tag:
            for p in self.populations:
                p.delete_cluster(tag=tag)
        elif meta_label:
            for p in self.populations:
                p.delete_cluster(meta_label=meta_label)
        else:
            raise ValueError(
                "If drop_all is False, must provide tag or meta_label")

    def delete_populations(self, populations: list or str) -> None:
        """
        Delete given populations. Populations downstream from delete population(s) will
        also be removed.

        Parameters
        ----------
        populations: list or str
            Either a list of populations (list of strings) to remove or a single population as a string.
            If a value of "all" is given, all populations are dropped.

        Returns
        -------
        None
        """
        if populations == "all":
            self.populations = [
                p for p in self.populations if p.population_name == "root"
            ]
            self.tree = {
                name: node
                for name, node in self.tree.items() if name == "root"
            }
        else:
            assert isinstance(
                populations,
                list), "Provide a list of population names for removal"
            assert "root" not in populations, "Cannot delete root population"
            downstream_effects = [
                self.list_downstream_populations(p) for p in populations
            ]
            downstream_effects = set(
                [x for sl in downstream_effects for x in sl])
            if len(downstream_effects) > 0:
                warn(
                    "The following populations are downstream of one or more of the "
                    "populations listed for deletion and will therefore be deleted: "
                    f"{downstream_effects}")
            populations = list(set(list(downstream_effects) + populations))
            self.populations = [
                p for p in self.populations
                if p.population_name not in populations
            ]
            for name in populations:
                self.tree[name].parent = None
            self.tree = {
                name: node
                for name, node in self.tree.items() if name not in populations
            }

    def get_population(self, population_name: str) -> Population:
        """
        Given the name of a population associated to the FileGroup, returns the Population object, with
        index and control index ready loaded.

        Parameters
        ----------
        population_name: str
            Name of population to retrieve from database

        Returns
        -------
        Population
        """
        assert population_name in list(self.list_populations(
        )), f'Population {population_name} does not exist'
        return [
            p for p in self.populations if p.population_name == population_name
        ][0]

    def get_population_by_parent(self, parent: str) -> Generator:
        """
        Given the name of some parent population, return a list of Population object whom's parent matches

        Parameters
        ----------
        parent: str
            Name of the parent population to search for

        Returns
        -------
        Generator
            List of Populations
        """
        for p in self.populations:
            if p.parent == parent and p.population_name != "root":
                yield p

    def list_downstream_populations(self, population: str) -> list or None:
        """For a given population find all dependencies

        Parameters
        ----------
        population : str
            population name

        Returns
        -------
        list or None
            List of populations dependent on given population

        """
        assert population in self.tree.keys(), f'population {population} does not exist; ' \
                                               f'valid population names include: {self.tree.keys()}'
        root = self.tree['root']
        node = self.tree[population]
        dependencies = [
            x.name
            for x in anytree.findall(root, filter_=lambda n: node in n.path)
        ]
        return [p for p in dependencies if p != population]

    def merge_populations(self,
                          left: Population,
                          right: Population,
                          new_population_name: str or None = None):
        """
        Merge two populations present in the current population tree.
        The merged population will have the combined index of both populations but
        will not inherit any clusters and will not be associated to any children
        downstream of either the left or right population. The population will be
        added to the tree as a descendant of the left populations parent. New
        population will be added to FileGroup.

        Parameters
        ----------
        left: Population
        right: Population
        new_population_name: str (optional)

        Returns
        -------
        None
        """
        self.add_population(
            merge_populations(left=left,
                              right=right,
                              new_population_name=new_population_name))

    def subtract_populations(self,
                             left: Population,
                             right: Population,
                             new_population_name: str or None = None):
        """
        Subtract the right population from the left population.
        The right population must either have the same parent as the left population
        or be downstream of the left population. The new population will descend from
        the same parent as the left population. The new population will have a
        PolygonGeom geom. New population will be added to FileGroup.

        Parameters
        ----------
        left: Population
        right: Population
        new_population_name: str (optional)

        Returns
        -------

        """
        same_parent = left.parent == right.parent
        downstream = right.population_name in list(
            self.list_downstream_populations(left.population_name))
        assert same_parent or downstream, "Right population should share the same parent as the " \
                                          "left population or be downstream of the left population"
        new_population_name = new_population_name or f"subtract_{left.population_name}_{right.population_name}"
        new_idx = np.array([x for x in left.index if x not in right.index])
        x, y = left.geom.x, left.geom.y
        transform_x, transform_y = left.geom.transform_x, left.geom.transform_y
        parent_data = self.load_population_df(population=left.parent,
                                              transform={
                                                  x: transform_x,
                                                  y: transform_y
                                              })
        x_values, y_values = create_convex_hull(
            x_values=parent_data.loc[new_idx][x].values,
            y_values=parent_data.loc[new_idx][y].values)
        new_geom = PolygonGeom(x=x,
                               y=y,
                               transform_x=transform_x,
                               transform_y=transform_y,
                               x_values=x_values,
                               y_values=y_values)
        new_population = Population(population_name=new_population_name,
                                    parent=left.parent,
                                    n=len(new_idx),
                                    index=new_idx,
                                    geom=new_geom,
                                    warnings=left.warnings + right.warnings +
                                    ["SUBTRACTED POPULATION"])
        self.add_population(population=new_population)

    def _write_populations(self):
        """
        Write population data to disk.

        Returns
        -------
        None
        """
        root_n = self.get_population("root").n
        with h5py.File(self.h5path, "a") as f:
            if "cell_meta_labels" in f.keys():
                for meta, labels in self.cell_meta_labels.items():
                    ascii_labels = [
                        x.encode("ascii", "ignore") for x in labels
                    ]
                    f.create_dataset(f'/cell_meta_labels/{meta}',
                                     data=ascii_labels)
            for p in self.populations:
                parent_n = self.get_population(p.parent).n
                p.prop_of_parent = p.n / parent_n
                p.prop_of_total = p.n / root_n
                f.create_dataset(f'/index/{p.population_name}/primary',
                                 data=p.index)
                for ctrl, idx in p.ctrl_index.items():
                    f.create_dataset(f'/index/{p.population_name}/{ctrl}',
                                     data=idx)
                for cluster in p.clusters:
                    cluster.prop_of_events = cluster.n / p.n
                    f.create_dataset(
                        f'/clusters/{p.population_name}/{cluster.cluster_id}_{cluster.tag}',
                        data=cluster.index)

    def _hdf_reset_population_data(self):
        """
        For each population clear existing data ready for overwriting with
        current data.

        Returns
        -------
        None
        """
        with h5py.File(self.h5path, "a") as f:
            if "cell_meta_labels" in f.keys():
                for meta in self.cell_meta_labels.keys():
                    if meta in f["cell_meta_labels"]:
                        del f[f"cell_meta_labels/{meta}"]
            for p in self.populations:
                if p.population_name in f["index"].keys():
                    if "primary" in f[f"index/{p.population_name}"].keys():
                        del f[f"index/{p.population_name}/primary"]
                    for ctrl_id in p.ctrl_index.keys():
                        if ctrl_id in f[f"index/{p.population_name}"].keys():
                            del f[f"index/{p.population_name}/{ctrl_id}"]
                if p.population_name in f["clusters"].keys():
                    del f[f"clusters/{p.population_name}"]

    def population_stats(self, population: str):
        """

        Parameters
        ----------
        population

        Returns
        -------

        """
        pop = self.get_population(population_name=population)
        parent = self.get_population(population_name=pop.parent)
        root = self.get_population(population_name="root")
        return {
            "population_name": population,
            "n": pop.n,
            "prop_of_parent": pop.n / parent.n,
            "prop_of_root": pop.n / root.n
        }

    def quantile_clean(self, upper: float = 0.999, lower: float = 0.001):
        df = self.data(source="primary")
        for x in df.columns:
            df = df[(df[x] >= df[x].quantile(lower))
                    & (df[x] <= df[x].quantile(upper))]
        clean_pop = Population(population_name="root_clean",
                               index=df.index.values,
                               parent="root",
                               n=df.shape[0])
        self.add_population(clean_pop)

    def save(self, *args, **kwargs):
        # Calculate meta and save indexes to disk
        if self.populations:
            # self._hdf_create_population_grps()
            # Populate h5path for populations
            self._hdf_reset_population_data()
            self._write_populations()
        super().save(*args, **kwargs)

    def delete(self, delete_hdf5_file: bool = True, *args, **kwargs):
        super().delete(*args, **kwargs)
        if delete_hdf5_file:
            if os.path.isfile(self.h5path):
                os.remove(self.h5path)
            else:
                warn(f"Could not locate hdf5 file {self.h5path}")
Example #21
0
class CielEntity(me.Document):
    type = me.StringField(verbose_name='Entity type (fastpath)', required=True)
    name = me.StringField(required=True)
    panels = me.EmbeddedDocumentListField(CielPanel)
    descripton = me.StringField(verbose_name='Entity details', required=False)

    meta = {'ordering': ['fastpath']}

    @classmethod
    def scan_panels(cls, entity_type, entity_name):
        """Scan panels for the entity in CIEL.
        entity_type: wwartc
        entity_name: bananas-ptw
        """
        em = Emulator(visible=True)
        em.connect(HOST)
        em.ciel_login('IEVMLIR1', 'MLIIEVR1')

        em.send_str(entity_type)
        em.exec(BTN.F22)
        em.screen_skip()

        em.send_str('2')
        em.exec(BTN.TAB)

        em.send_str(entity_name)
        em.exec(BTN.ENTER)

        entity = CielEntity.objects(type=entity_type, name=entity_name).first()
        if not entity:
            entity = CielEntity(type=entity_type, name=entity_name)
            entity.save()

        panel_counter = 0
        while True:
            panel_name = em.screen_get_name()

            # Update panel if exist
            panel = None
            for p in entity.panels:
                if p.name.lower() == panel_name.lower():
                    panel = p
                    break

            if panel is not None:
                panel.data_raw = '\n'.join(em.screen_get_data(html=False))
                panel.data_html = '\n'.join(em.screen_get_data(html=True))
            else:
                panel = CielPanel(
                    name=panel_name,
                    data_raw='\n'.join(em.screen_get_data(html=False)),
                    data_html='\n'.join(em.screen_get_data(html=True)))
                entity.panels.append(panel)
            entity.save()

            panel.fields = []
            fields = em.field_get_bounds_all()
            for field in fields:
                flen = field['col_end'] - field['col_start']

                # pu.db
                fieldset = FieldSet.objects(
                    panel_name=panel.name,
                    coords__row=field['row'],
                    coords__column=field['col_start'],
                ).first()

                if not fieldset:
                    fieldset = FieldSet(dbset=None,
                                        panel_name=panel.name,
                                        coords=Coords(
                                            row=field['row'],
                                            column=field['col_start'],
                                            length=flen))
                    fieldset.save()

                cielfield = CielField(
                    fieldset=fieldset,
                    value=field['value'].strip(),
                )
                panel.fields.append(cielfield)
                entity.save()

            if em.screen_contains('.*BOTTOM.*'):
                break
            else:
                em.exec(BTN.PAGE_DOWN)
                panel_counter += 1

        em.terminate()
        return entity

    @classmethod
    def scan_dbset(cls, entity_type, library_name):
        """Returns dictionary of structure:
        {
            library_name : {
            file_name : {
                field_name : {
                    'text' : '',
                    'nulls' : '',
                    'length' : '',
                    'type' : '',
                    'scale' : '',
                },
            },
            },
        }
        """
        em = Emulator(visible=True)
        em.connect(HOST)
        em.ciel_login('IEVMLIR1', 'MLIIEVR1')

        em.send_str('STRSQL')
        em.exec(BTN.F22)
        em.send_str('SELECT * FROM {}'.format(library_name))
        em.exec(BTN.F4)

        em.send_str('\t' * 10)
        em.field_set_id(2)
        em.exec(BTN.F4)

        # Library grabbing part
        data = {}
        flatdata = []

        def ensure_row_exists(row_library, row_file, row_field):
            if row_library not in data:
                data[row_library] = {}

            if row_file not in data[row_library]:
                data[row_library][row_file] = {}

            if row_field not in data[row_library][row_file]:
                data[row_library][row_file][row_field] = {}

        def grab_fields():
            rows = []

            def row_update(n, row, rows):
                if n >= len(rows):
                    rows.append(row)
                else:
                    rows[n].update(row)

            for i in range(0, 3):
                lines = em.screen_get_data(html=False)
                if em.screen_contains('.*F11=Display nulls.*'):
                    print('Display nulls')
                    for n, line in enumerate(lines[6:20]):
                        rfield = line[6:20].strip()
                        rfile = line[25:44].strip()
                        rtext = line[44:80].strip()
                        row = {'field': rfield, 'file': rfile, 'text': rtext}
                        row_update(n, row, rows)

                elif em.screen_contains('.*F11=Display type.*'):
                    print('Display type')
                    for n, line in enumerate(lines[6:20]):
                        rlib = line[45:58].strip()
                        rnulls = line[58:80].strip()
                        row = {'library': rlib, 'nulls': rnulls}
                        row_update(n, row, rows)

                elif em.screen_contains('.*F11=Display text.*'):
                    print('Display text')
                    for n, line in enumerate(lines[6:20]):
                        rtype = line[44:65].strip().lower()
                        rlength = line[65:73].strip()
                        rscale = line[73:79].strip()
                        row = {
                            'type': rtype,
                            'length': rlength,
                            'scale': rscale
                        }
                        row_update(n, row, rows)
                # Scroll screen right
                em.exec(BTN.F11)
            return rows

        while (not em.screen_contains('.*Bottom.*')
               and em.screen_contains('.*More\.\.\..*')):

            rows = grab_fields()
            for row in rows:
                rlib = row['library']
                rfile = row['file']
                rfield = row['field']

                if rlib not in data:
                    data[rlib] = {}

                if rfile not in data[rlib]:
                    data[rlib][rfile] = {}

                if rfield not in data[rlib][rfile]:
                    data[rlib][rfile][rfield] = {}

                item = data[rlib][rfile][rfield]
                item['text'] = row['text']
                item['type'] = row['type']
                item['length'] = row['length']
                item['scale'] = row['scale']

                flatitem = {}
                # Only if not empty item
                if len(rfile) and len(rfield):
                    flatitem['clibrary'] = (rlib.lower()).strip()
                    flatitem['cfile'] = (rfile.lower()).strip()
                    flatitem['cfield'] = (rfield.lower()).strip()
                    flatitem['ctype'] = (row['type'].lower()).strip()
                    flatitem['cnulls'] = (row['nulls'].lower()).strip()

                    try:
                        flatitem['length'] = int(row['length'])
                    except ValueError:
                        flatitem['length'] = 0

                    try:
                        flatitem['cscale'] = int(row['scale'])
                    except ValueError:
                        flatitem['cscale'] = 0

                    flatitem['ctype'] = (row['type'].strip()).lower()
                    flatitem['ctext'] = (row['text'].strip()).lower()

                    dbset_item = CielDBSet(entity_type=entity_type,
                                           file=flatitem['ctype'],
                                           field=flatitem['cfield'],
                                           text=flatitem['ctext'],
                                           type=flatitem['ctype'])
                    dbset_item.save()

            em.exec(BTN.PAGE_DOWN)
        return data, flatdata
Example #22
0
class Student(me.Document):
    first_name = me.StringField(min_length=1, max_length=255, required=True)
    sur_name = me.StringField(min_length=1, max_length=255, required=True)

    faculty = me.StringField(min_length=1, max_length=255, required=True)
    id_faculty = me.ReferenceField(Faculties)

    group = me.StringField(min_length=1, max_length=255, required=True)
    id_group = me.ReferenceField(Groups)

    curator = me.StringField(min_length=1, max_length=255, required=True)
    id_curator = me.ReferenceField(Curators)

    mark_student = me.EmbeddedDocumentListField(Mark)

    def __str__(self):
        return f'{self.sur_name} {self.first_name} студент {self.faculty} факультета, {self.group} групи. ' \
               f'Куратор {self.curator} '

    def str_mark(self):
        result = ''
        for m in self.mark_student:
            result += f('{m.name_item}: {m.mark}')

        return result

    def student_from_curator(first_name, sur_name):
        curators = Curators.objects.filter(first_name=first_name,
                                           sur_name=sur_name)
        if curators:
            curator = curators[0]
        else:
            return []

        return Student.objects.filter(id_curator=curator)

    def Excellent_Students_Faculties():
        result = ''
        for faculty in Faculties.objects:
            result += faculty.name_faculty + '\n'
            students = Student.objects(id_faculty=faculty.id).aggregate([{
                '$unwind':
                '$mark_student'
            }, {
                '$group': {
                    '_id': '$_id',
                    'average_mark': {
                        '$avg': '$mark_student.mark'
                    }
                }
            }, {
                '$match': {
                    'average_mark': {
                        '$gte': 10.0
                    }
                }
            }])

            for student in students:
                result += f"\t{Student.objects(id=student['_id'])[0].__str__()},  середній бал {round(student['average_mark'], 2)} \n"

        return result
Example #23
0
class FileGroup(mongoengine.Document):
    """
    Document representation of a file group; a selection of related fcs files (e.g. a sample and it's associated
    controls).

    Attributes
    ----------
    primary_id: str, required
        Unique ID to associate to group
    files: EmbeddedDocList
        List of File objects
    flags: str, optional
        Warnings associated to file group
    notes: str, optional
        Additional free text
    populations: EmbeddedDocList
        Populations derived from this file group
    gates: EmbeddedDocList
        Gate objects that have been applied to this file group
    collection_datetime: DateTime, optional
        Date and time of sample collection
    processing_datetime: DateTime, optional
        Date and time of sample processing
    valid: BooleanField (default=True)
        True if FileGroup is valid
    subject: ReferenceField
        Reference to Subject. If Subject is deleted, this field is nullified but
        the FileGroup will persist
    """
    primary_id = mongoengine.StringField(required=True)
    controls = mongoengine.ListField()
    compensated = mongoengine.BooleanField(default=False)
    collection_datetime = mongoengine.DateTimeField(required=False)
    processing_datetime = mongoengine.DateTimeField(required=False)
    populations = mongoengine.EmbeddedDocumentListField(Population)
    gating_strategy = mongoengine.ListField()
    valid = mongoengine.BooleanField(default=True)
    notes = mongoengine.StringField(required=False)
    subject = mongoengine.ReferenceField(
        Subject, reverse_delete_rule=mongoengine.NULLIFY)
    data_directory = mongoengine.StringField()
    meta = {'db_alias': 'core', 'collection': 'fcs_files'}

    def __init__(self, *args, **kwargs):
        data = kwargs.pop("data", None)
        channels = kwargs.pop("channels", None)
        markers = kwargs.pop("markers", None)
        super().__init__(*args, **kwargs)
        self._columns_default = "markers"
        self.cell_meta_labels = {}
        if self.id:
            self.h5path = os.path.join(self.data_directory,
                                       f"{self.id.__str__()}.hdf5")
            self.tree = construct_tree(populations=self.populations)
            self._load_cell_meta_labels()
            self._load_population_indexes()
        else:
            if any([x is None for x in [data, channels, markers]]):
                raise ValueError(
                    "New instance of FileGroup requires that data, channels, and markers "
                    "be provided to the constructor")
            self.save()
            self.h5path = os.path.join(self.data_directory,
                                       f"{self.id.__str__()}.hdf5")
            self.init_new_file(data=data, channels=channels, markers=markers)

    @property
    def columns_default(self):
        return self._columns_default

    @columns_default.setter
    def columns_default(self, value: str):
        assert value in [
            "markers", "channels"
        ], "columns_default must be either 'markers' or 'channels'"
        self._columns_default = value

    @data_loaded
    def data(self,
             source: str,
             sample_size: int or float or None = None) -> pd.DataFrame:
        """
        Load the FileGroup dataframe for the desired source file.

        Parameters
        ----------
        source: str
            Name of the file to load from e.g. either "primary" or the name of a control
        sample_size: int or float (optional)
            Sample the DataFrame

        Returns
        -------
        Pandas.DataFrame

        Raises
        ------
        AssertionError
            Invalid source
        """
        with h5py.File(self.h5path, "r") as f:
            assert source in f.keys(
            ), f"Invalid source, expected one of: {f.keys()}"
            channels = [
                x.decode("utf-8") for x in f[f"mappings/{source}/channels"][:]
            ]
            markers = [
                x.decode("utf-8") for x in f[f"mappings/{source}/markers"][:]
            ]
            data = set_column_names(df=pd.DataFrame(f[source][:],
                                                    dtype=np.float32),
                                    channels=channels,
                                    markers=markers,
                                    preference=self.columns_default)
        if sample_size is not None:
            return uniform_downsampling(data=data, sample_size=sample_size)
        return data

    def init_new_file(self, data: np.array, channels: List[str],
                      markers: List[str]):
        """
        Under the assumption that this FileGroup has not been previously defined,
        generate a HDF5 file and initialise the root Population

        Parameters
        ----------
        data: numpy.ndarray
        channels: list
        markers: list

        Returns
        -------
        None
        """
        if os.path.isfile(self.h5path):
            os.remove(self.h5path)
        with h5py.File(self.h5path, "w") as f:
            f.create_dataset(name="primary", data=data)
            f.create_group("mappings")
            f.create_group("mappings/primary")
            f.create_dataset("mappings/primary/channels",
                             data=np.array(channels, dtype='S'))
            f.create_dataset("mappings/primary/markers",
                             data=np.array(markers, dtype='S'))
            f.create_group("index")
            f.create_group("index/root")
            f.create_group("cell_meta_labels")
        self.populations = [
            Population(population_name="root",
                       index=np.arange(0, data.shape[0]),
                       parent="root",
                       n=data.shape[0],
                       source="root")
        ]
        self.tree = {"root": anytree.Node(name="root", parent=None)}
        self.save()

    def add_ctrl_file(self, ctrl_id: str, data: np.array, channels: List[str],
                      markers: List[str]):
        """
        Add a new control file to this FileGroup.

        Parameters
        ----------
        ctrl_id: str
            Name of the control e.g ("CD45RA FMO" or "HLA-DR isotype control"
        data: numpy.ndarray
            Single cell events data obtained for this control
        channels: list
            List of channel names
        markers: list
            List of marker names

        Returns
        -------
        None

        Raises
        ------
        AssertionError
            If control already exists
        """
        with h5py.File(self.h5path, "a") as f:
            assert ctrl_id not in self.controls, f"Entry for {ctrl_id} already exists"
            f.create_dataset(name=ctrl_id, data=data)
            f.create_group(f"mappings/{ctrl_id}")
            f.create_dataset(f"mappings/{ctrl_id}/channels",
                             data=np.array(channels, dtype='S'))
            f.create_dataset(f"mappings/{ctrl_id}/markers",
                             data=np.array(markers, dtype='S'))
        self.controls.append(ctrl_id)
        self.save()

    @data_loaded
    def _load_cell_meta_labels(self):
        """
        Load single cell meta labels from disk

        Returns
        -------
        None
        """
        with h5py.File(self.h5path, "r") as f:
            if "cell_meta_labels" in f.keys():
                for meta in f["cell_meta_labels"].keys():
                    self.cell_meta_labels[meta] = np.array(
                        f[f"cell_meta_labels/{meta}"][:], dtype="U")

    @data_loaded
    def _load_population_indexes(self):
        """
        Load population level event index data from disk

        Returns
        -------
        None
        """
        with h5py.File(self.h5path, "r") as f:
            for p in self.populations:
                primary_index = h5_read_population_primary_index(
                    population_name=p.population_name, h5file=f)
                if primary_index is None:
                    continue
                p.index = primary_index

    def add_population(self, population: Population):
        """
        Add a new Population to this FileGroup.

        Parameters
        ----------
        population: Population

        Returns
        -------
        None

        Raises
        ------
        DuplicatePopulationError
            Population already exists

        AssertionError
            Population is missing index
        """
        if population.population_name in self.tree.keys():
            err = f"Population with name '{population.population_name}' already exists"
            raise DuplicatePopulationError(err)
        assert population.index is not None, "Population index is empty"
        if population.n is None:
            population.n = len(population.index)
        self.populations.append(population)
        self.tree[population.population_name] = anytree.Node(
            name=population.population_name,
            parent=self.tree.get(population.parent))

    def update_population(self, pop: Population):
        """
        Replace an existing population. Population to replace identified using 'population_name' field.
        Note: this method does not allow you to edit the

        Parameters
        ----------
        pop: Population
            New population object

        Returns
        -------
        None
        """
        assert pop.population_name in self.list_populations(
        ), 'Invalid population, does not exist'
        self.populations = [
            p for p in self.populations
            if p.population_name != pop.population_name
        ]
        self.populations.append(pop)

    def load_ctrl_population_df(self,
                                ctrl: str,
                                population: str,
                                classifier: str = "XGBClassifier",
                                classifier_params: dict or None = None,
                                scoring: str = "balanced_accuracy",
                                transform: str = "logicle",
                                transform_kwargs: dict or None = None,
                                verbose: bool = True,
                                evaluate_classifier: bool = True,
                                kfolds: int = 5,
                                n_permutations: int = 25,
                                sample_size: int = 10000) -> pd.DataFrame:
        """
        Load a population from an associated control. The assumption here is that control files
        have been collected at the same time as primary staining and differ by the absence or
        permutation of a marker/channel/stain. Therefore the population of interest in the
        primary staining will be used as training data to identify the equivalent population in
        the control.

        The user should specify the control file, the population they want (which MUST already exist
        in the primary staining) and the type of classifier to use. Additional parameters can be
        passed to control the classifier and stratified cross validation with permutation testing
        will be performed if evalidate_classifier is set to True.

        Parameters
        ----------
        ctrl: str
            Control file to estimate population for
        population: str
            Population of interest. MUST already exist in the primary staining.
        classifier: str (default='XGBClassifier')
            Classifier to use. String value should correspond to a valid Scikit-Learn classifier class
            name or XGBClassifier for XGBoost.
        classifier_params: dict, optional
            Additional keyword arguments passed when initiating the classifier
        scoring: str (default='balanced_accuracy')
            Method used to evaluate the performance of the classifier if evaluate_classifier is True.
            String value should be one of the functions of Scikit-Learn's classification metrics:
            https://scikit-learn.org/stable/modules/model_evaluation.html.
        transform: str (default='logicle')
            Transformation to be applied to data prior to classification
        transform_kwargs: dict, optional
            Additional keyword arguments applied to Transformer
        verbose: bool (default=True)
            Whether to provide feedback
        evaluate_classifier: bool (default=True)
            If True, stratified cross validation with permutating testing is applied prior to
            predicting control population,  feeding back to stdout the performance of the classifier
            across k folds and n permutations
        kfolds: int (default=5)
            Number of cross validation rounds to perform if evaluate_classifier is True
        n_permutations: int (default=25)
            Number of rounds of permutation testing to perform if evaluate_classifier is True
        sample_size: int (default=10000)
            Number of events to sample from primary data for training

        Returns
        -------
        Pandas.DataFrame

        Raises
        ------
        AssertionError
            If desired population is not found in the primary staining

        MissingControlError
            If the chosen control does not exist
        """
        transform_kwargs = transform_kwargs or {}
        if ctrl not in self.controls:
            raise MissingControlError(
                f"No such control {ctrl} associated to this FileGroup")
        params = classifier_params or {}
        transform_kwargs = transform_kwargs or {}
        feedback = vprint(verbose=verbose)
        classifier = build_sklearn_model(klass=classifier, **params)
        assert population in self.list_populations(
        ), f"Desired population {population} not found"
        feedback(f"====== Estimating {population} for {ctrl} control ======")
        feedback("Loading data...")
        training, ctrl, transformer = _load_data_for_ctrl_estimate(
            filegroup=self,
            target_population=population,
            ctrl=ctrl,
            transform=transform,
            sample_size=sample_size,
            **transform_kwargs)
        features = [x for x in training.columns if x != "label"]
        features = [x for x in features if x in ctrl.columns]
        x, y = training[features], training["label"].values
        if evaluate_classifier:
            feedback("Evaluating classifier with permutation testing...")
            skf = StratifiedKFold(n_splits=kfolds,
                                  random_state=42,
                                  shuffle=True)
            score, permutation_scores, pvalue = permutation_test_score(
                classifier,
                x,
                y,
                cv=skf,
                n_permutations=n_permutations,
                scoring=scoring,
                n_jobs=-1,
                random_state=42)
            feedback(
                f"...Performance (without permutations): {round(score, 4)}")
            feedback(
                f"...Performance (average across permutations; standard dev): "
                f"{round(np.mean(permutation_scores), 4)}; {round(np.std(permutation_scores), 4)}"
            )
            feedback(
                f"...p-value (comparison of original score to permuations): {round(pvalue, 4)}"
            )
        feedback("Predicting population for control data...")
        classifier.fit(x, y)
        ctrl_labels = classifier.predict(ctrl[features])
        training_prop_of_root = self.get_population(
            population).n / self.get_population("root").n
        ctrl_prop_of_root = np.sum(ctrl_labels) / ctrl.shape[0]
        feedback(
            f"{population}: {round(training_prop_of_root, 3)}% of root in primary data"
        )
        feedback(
            f"Predicted in ctrl: {round(ctrl_prop_of_root, 3)}% of root in control data"
        )
        ctrl = ctrl.iloc[np.where(ctrl_labels == 1)[0]]
        if transformer:
            return transformer.inverse_scale(data=ctrl,
                                             features=list(ctrl.columns))
        return ctrl

    def load_population_df(
            self,
            population: str,
            transform: str or dict or None = "logicle",
            features_to_transform: list or None = None,
            transform_kwargs: dict or None = None,
            label_downstream_affiliations: bool = False) -> pd.DataFrame:
        """
        Load the DataFrame for the events pertaining to a single population.

        Parameters
        ----------
        population: str
            Name of the desired population
        transform: str or dict, optional (default="logicle")
            Transform to be applied; specify a value of None to not perform any transformation
        features_to_transform: list, optional
            Features (columns) to be transformed. If not provied, all columns transformed
        transform_kwargs: dict, optional
            Additional keyword arguments passed to Transformer
        label_downstream_affiliations: bool (default=False)
            If True, an additional column will be generated named "population_label" containing
            the end node membership of each event e.g. if you choose CD4+ population and
            there are subsequent populations belonging to this CD4+ population in a tree
            like: "CD4+ -> CD4+CD25+ -> CD4+CD25+CD45RA+" then the population label column
            will contain the name of the lowest possible "leaf" population that an event is
            assigned too.

        Returns
        -------
        Pandas.DataFrame

        Raises
        ------
        AssertionError
            Invalid population, does not exist
        """
        assert population in self.tree.keys(
        ), f"Invalid population, {population} does not exist"
        idx = self.get_population(population_name=population).index
        data = self.data(source="primary").loc[idx]
        if transform is not None:
            features_to_transform = features_to_transform or list(data.columns)
            transform_kwargs = transform_kwargs or {}
            if isinstance(transform, dict):
                data = apply_transform_map(data=data,
                                           feature_method=transform,
                                           kwargs=transform_kwargs)
            else:
                data = apply_transform(data=data,
                                       method=transform,
                                       features=features_to_transform,
                                       return_transformer=False,
                                       **transform_kwargs)
        if label_downstream_affiliations:
            return self._label_downstream_affiliations(parent=population,
                                                       data=data)
        return data

    def _label_downstream_affiliations(self, parent: str,
                                       data: pd.DataFrame) -> pd.DataFrame:
        """
        An additional column will be generated named "population_label" containing
        the end node membership of each event e.g. if you choose CD4+ population and
        there are subsequent populations belonging to this CD4+ population in a tree
        like: "CD4+ -> CD4+CD25+ -> CD4+CD25+CD45RA+" then the population label column
        will contain the name of the lowest possible "leaf" population that an event is
        assigned too.

        Parameters
        ----------
        parent: str
        data: Pandas.DataFrame

        Returns
        -------
        Pandas.DataFrame
        """

        data["population_label"] = None
        dependencies = self.list_downstream_populations(parent)
        for pop in dependencies:
            idx = self.get_population(pop).index
            data.loc[idx, 'population_label'] = pop
        data["population_label"].fillna(parent, inplace=True)
        return data

    def _hdf5_exists(self):
        """
        Tests if associated HDF5 file exists.

        Returns
        -------
        bool
        """
        return os.path.isfile(self.h5path)

    def list_populations(self) -> list:
        """
        List population names

        Returns
        -------
        List
        """
        return [p.population_name for p in self.populations]

    def print_population_tree(self,
                              image: bool = False,
                              path: str or None = None):
        """
        Print population tree to stdout or save as an image if 'image' is True.

        Parameters
        ----------
        image: bool (default=False)
            Save tree as a png image
        path: str (optional)
            File path for image, ignored if 'image' is False.
            Defaults to working directory.

        Returns
        -------
        None
        """
        root = self.tree['root']
        if image:
            from anytree.exporter import DotExporter
            path = path or f'{os.getcwd()}/{self.id}_population_tree.png'
            DotExporter(root).to_picture(path)
        for pre, fill, node in anytree.RenderTree(root):
            print('%s%s' % (pre, node.name))

    def delete_populations(self, populations: list or str) -> None:
        """
        Delete given populations. Populations downstream from delete population(s) will
        also be removed.

        Parameters
        ----------
        populations: list or str
            Either a list of populations (list of strings) to remove or a single population as a string.
            If a value of "all" is given, all populations are dropped.

        Returns
        -------
        None

        Raises
        ------
        AssertionError
            If invalid value given for populations
        """
        if populations == "all":
            for p in self.populations:
                self.tree[p.population_name].parent = None
            self.populations = [
                p for p in self.populations if p.population_name == "root"
            ]
            self.tree = {
                name: node
                for name, node in self.tree.items() if name == "root"
            }
        else:
            assert isinstance(
                populations,
                list), "Provide a list of population names for removal"
            assert "root" not in populations, "Cannot delete root population"
            downstream_effects = [
                self.list_downstream_populations(p) for p in populations
            ]
            downstream_effects = set(
                [x for sl in downstream_effects for x in sl])
            if len(downstream_effects) > 0:
                warn(
                    "The following populations are downstream of one or more of the "
                    "populations listed for deletion and will therefore be deleted: "
                    f"{downstream_effects}")
            populations = list(set(list(downstream_effects) + populations))
            self.populations = [
                p for p in self.populations
                if p.population_name not in populations
            ]
            for name in populations:
                self.tree[name].parent = None
            self.tree = {
                name: node
                for name, node in self.tree.items() if name not in populations
            }

    def get_population(self, population_name: str) -> Population:
        """
        Given the name of a population associated to the FileGroup, returns the Population object, with
        index and control index ready loaded.

        Parameters
        ----------
        population_name: str
            Name of population to retrieve from database

        Returns
        -------
        Population

        Raises
        ------
        MissingPopulationError
            If population doesn't exist
        """
        if population_name not in list(self.list_populations()):
            raise MissingPopulationError(
                f'Population {population_name} does not exist')
        return [
            p for p in self.populations if p.population_name == population_name
        ][0]

    def get_population_by_parent(self, parent: str) -> Generator:
        """
        Given the name of some parent population, return a list of Population object whom's parent matches

        Parameters
        ----------
        parent: str
            Name of the parent population to search for

        Returns
        -------
        Generator
            List of Populations
        """
        for p in self.populations:
            if p.parent == parent and p.population_name != "root":
                yield p

    def list_downstream_populations(self, population: str) -> list or None:
        """For a given population find all dependencies

        Parameters
        ----------
        population : str
            population name

        Returns
        -------
        list or None
            List of populations dependent on given population

        Raises
        ------
        AssertionError
            If Population does not exist
        """
        assert population in self.tree.keys(), f'population {population} does not exist; ' \
                                               f'valid population names include: {self.tree.keys()}'
        root = self.tree['root']
        node = self.tree[population]
        dependencies = [
            x.name
            for x in anytree.findall(root, filter_=lambda n: node in n.path)
        ]
        return [p for p in dependencies if p != population]

    def merge_gate_populations(self,
                               left: Population or str,
                               right: Population or str,
                               new_population_name: str or None = None):
        """
        Merge two populations present in the current population tree.
        The merged population will have the combined index of both populations but
        will not inherit any clusters and will not be associated to any children
        downstream of either the left or right population. The population will be
        added to the tree as a descendant of the left populations parent. New
        population will be added to FileGroup.

        Parameters
        ----------
        left: Population
        right: Population
        new_population_name: str (optional)

        Returns
        -------
        None
        """
        if isinstance(left, str):
            left = self.get_population(left)
        if isinstance(right, str):
            right = self.get_population(right)
        self.add_population(
            merge_gate_populations(left=left,
                                   right=right,
                                   new_population_name=new_population_name))

    def merge_non_geom_populations(self, populations: list,
                                   new_population_name: str):
        """
        Merge multiple populations that are sourced either for classification or clustering methods.
        (Not supported for populations from autonomous gates)

        Parameters
        ----------
        populations: list
            List of populations to merge
        new_population_name: str
            Name of the new population

        Returns
        -------
        None

        Raises
        ------
        ValueError
            If populations is invalid
        """
        pops = list()
        for p in populations:
            if isinstance(p, str):
                pops.append(self.get_population(p))
            elif isinstance(p, Population):
                pops.append(p)
            else:
                raise ValueError(
                    "populations should be a list of strings or list of Population objects"
                )
        self.add_population(
            merge_non_geom_populations(
                populations=pops, new_population_name=new_population_name))

    def subtract_populations(self,
                             left: Population,
                             right: Population,
                             new_population_name: str or None = None):
        """
        Subtract the right population from the left population.
        The right population must either have the same parent as the left population
        or be downstream of the left population. The new population will descend from
        the same parent as the left population. The new population will have a
        PolygonGeom geom. New population will be added to FileGroup.

        Parameters
        ----------
        left: Population
        right: Population
        new_population_name: str (optional)

        Returns
        -------
        None

        Raises
        ------
        ValueError
            If left and right population are not sourced from root or Gate
        AssertionError
            If left and right population do not share the same parent or the right population
            is not downstream of the left population
        """
        same_parent = left.parent == right.parent
        downstream = right.population_name in list(
            self.list_downstream_populations(left.population_name))
        if left.source not in ["root", "gate"
                               ] or right.source not in ["root", "gate"]:
            raise ValueError(
                "Population source must be either 'root' or 'gate'")
        assert same_parent or downstream, "Right population should share the same parent as the " \
                                          "left population or be downstream of the left population"
        new_population_name = new_population_name or f"subtract_{left.population_name}_{right.population_name}"
        new_idx = np.setdiff1d(left.index, right.index)
        x, y = left.geom.x, left.geom.y
        transform_x, transform_y = left.geom.transform_x, left.geom.transform_y
        parent_data = self.load_population_df(population=left.parent,
                                              transform={
                                                  x: transform_x,
                                                  y: transform_y
                                              })
        x_values, y_values = create_convex_hull(
            x_values=parent_data.loc[new_idx][x].values,
            y_values=parent_data.loc[new_idx][y].values)
        new_geom = PolygonGeom(x=x,
                               y=y,
                               transform_x=transform_x,
                               transform_y=transform_y,
                               x_values=x_values,
                               y_values=y_values)
        new_population = Population(population_name=new_population_name,
                                    parent=left.parent,
                                    n=len(new_idx),
                                    index=new_idx,
                                    geom=new_geom,
                                    warnings=left.warnings + right.warnings +
                                    ["SUBTRACTED POPULATION"])
        self.add_population(population=new_population)

    def _write_populations(self):
        """
        Write population data to disk.

        Returns
        -------
        None
        """
        root_n = self.get_population("root").n
        with h5py.File(self.h5path, "r+") as f:
            for meta, labels in self.cell_meta_labels.items():
                ascii_labels = np.array(
                    [x.encode("ascii", "ignore") for x in labels])
                overwrite_or_create(file=f,
                                    data=ascii_labels,
                                    key=f"/cell_meta_labels/{meta}")
            for p in self.populations:
                parent_n = self.get_population(p.parent).n
                p._prop_of_parent = p.n / parent_n
                p.prop_of_total = p.n / root_n
                overwrite_or_create(file=f,
                                    data=p.index,
                                    key=f"/index/{p.population_name}/primary")

    def population_stats(self, population: str, warn_missing: bool = False):
        """
        Returns a dictionary of statistics (number of events, proportion of parent, and proportion of all events)
        for the requested population.

        Parameters
        ----------
        population: str
        warn_missing: bool (default=False)

        Returns
        -------
        Dict
        """
        try:
            pop = self.get_population(population_name=population)
            parent = self.get_population(population_name=pop.parent)
            root = self.get_population(population_name="root")
            return {
                "population_name": population,
                "n": pop.n,
                "frac_of_parent": pop.n / parent.n,
                "frac_of_root": pop.n / root.n
            }
        except MissingPopulationError:
            if warn_missing:
                warn(
                    f"{population} not present in {self.primary_id} FileGroup")
            return {
                "population_name": population,
                "n": 0,
                "frac_of_parent": 0,
                "frac_of_root": 0
            }

    def quantile_clean(self, upper: float = 0.999, lower: float = 0.001):
        """
        Iterate over every channel in the flow data and cut the upper and lower quartiles.

        Parameters
        ----------
        upper: float (default=0.999)
        lower: float (default=0.001)

        Returns
        -------
        None
        """
        df = self.load_population_df("root", transform="logicle")
        for x in df.columns:
            df = df[(df[x] >= df[x].quantile(lower))
                    & (df[x] <= df[x].quantile(upper))]
        clean_pop = Population(population_name="root_clean",
                               index=df.index.values,
                               parent="root",
                               source="root",
                               n=df.shape[0])
        self.add_population(clean_pop)

    def save(self, *args, **kwargs):
        """
        Save FileGroup and associated populations

        Returns
        -------
        None
        """
        # Calculate meta and save indexes to disk
        if self.populations:
            # Populate h5path for populations
            self._write_populations()
        super().save(*args, **kwargs)

    def delete(self, delete_hdf5_file: bool = True, *args, **kwargs):
        """
        Delete FileGroup

        Parameters
        ----------
        delete_hdf5_file: bool (default=True)

        Returns
        -------
        None
        """
        super().delete(*args, **kwargs)
        if delete_hdf5_file:
            if os.path.isfile(self.h5path):
                os.remove(self.h5path)
            else:
                warn(f"Could not locate hdf5 file {self.h5path}")
Example #24
0
class Date(me.Document):
    day = me.DateTimeField()
    destinations = me.EmbeddedDocumentListField()
Example #25
0
class UserStatus(me.Document):
    uuid = me.StringField(unique=True, required=True)
    tokens = me.EmbeddedDocumentListField(Token, default=[])
Example #26
0
class Pocket:
    items = me.EmbeddedDocumentListField()
Example #27
0
class Machine(OwnershipMixin, me.Document):
    """The basic machine model"""

    id = me.StringField(primary_key=True, default=lambda: uuid.uuid4().hex)

    cloud = me.ReferenceField('Cloud', required=True,
                              reverse_delete_rule=me.CASCADE)
    owner = me.ReferenceField('Organization', required=True,
                              reverse_delete_rule=me.CASCADE)
    location = me.ReferenceField('CloudLocation', required=False,
                                 reverse_delete_rule=me.DENY)
    size = me.ReferenceField('CloudSize', required=False,
                             reverse_delete_rule=me.DENY)
    image = me.ReferenceField('CloudImage', required=False,
                              reverse_delete_rule=me.DENY)
    network = me.ReferenceField('Network', required=False,
                                reverse_delete_rule=me.NULLIFY)
    subnet = me.ReferenceField('Subnet', required=False,
                               reverse_delete_rule=me.NULLIFY)
    name = me.StringField()

    # Info gathered mostly by libcloud (or in some cases user input).
    # Be more specific about what this is.
    # We should perhaps come up with a better name.
    machine_id = me.StringField(required=True)
    hostname = me.StringField()
    public_ips = me.ListField()
    private_ips = me.ListField()
    ssh_port = me.IntField(default=22)
    OS_TYPES = ('windows', 'coreos', 'freebsd', 'linux', 'unix')
    os_type = me.StringField(default='unix', choices=OS_TYPES)
    rdp_port = me.IntField(default=3389)
    actions = me.EmbeddedDocumentField(Actions, default=lambda: Actions())
    extra = MistDictField()
    cost = me.EmbeddedDocumentField(Cost, default=lambda: Cost())
    # libcloud.compute.types.NodeState
    state = me.StringField(default='unknown',
                           choices=tuple(config.STATES.values()))
    machine_type = me.StringField(default='machine',
                                  choices=('machine', 'vm', 'container',
                                           'hypervisor', 'container-host',
                                           'ilo-host'))
    parent = me.ReferenceField('Machine', required=False,
                               reverse_delete_rule=me.NULLIFY)

    # Deprecated TODO: Remove in v5
    key_associations = me.EmbeddedDocumentListField(KeyAssociation)

    last_seen = me.DateTimeField()
    missing_since = me.DateTimeField()
    unreachable_since = me.DateTimeField()
    created = me.DateTimeField()

    monitoring = me.EmbeddedDocumentField(Monitoring,
                                          default=lambda: Monitoring())

    ssh_probe = me.EmbeddedDocumentField(SSHProbe, required=False)
    ping_probe = me.EmbeddedDocumentField(PingProbe, required=False)

    expiration = me.ReferenceField(Schedule, required=False,
                                   reverse_delete_rule=me.NULLIFY)

    # Number of vCPUs gathered from various sources. This field is meant to
    # be updated ONLY by the mist.api.metering.tasks:find_machine_cores task.
    cores = me.IntField()

    meta = {
        'collection': 'machines',
        'indexes': [
            {
                'fields': [
                    'cloud',
                    'machine_id'
                ],
                'sparse': False,
                'unique': True,
                'cls': False,
            }, {
                'fields': [
                    'monitoring.installation_status.activated_at'
                ],
                'sparse': True,
                'unique': False
            }
        ],
        'strict': False,
    }

    def __init__(self, *args, **kwargs):
        super(Machine, self).__init__(*args, **kwargs)
        self.ctl = MachineController(self)

    def clean(self):
        # Remove any KeyAssociation, whose `keypair` has been deleted. Do NOT
        # perform an atomic update on self, but rather remove items from the
        # self.key_associations list by iterating over it and popping matched
        # embedded documents in order to ensure that the most recent list is
        # always processed and saved.
        key_associations = KeyMachineAssociation.objects(machine=self)
        for ka in reversed(list(range(len(key_associations)))):
            if key_associations[ka].key.deleted:
                key_associations[ka].delete()

        # Reset key_associations in case self goes missing/destroyed. This is
        # going to prevent the machine from showing up as "missing" in the
        # corresponding keys' associated machines list.
        if self.missing_since:
            self.key_associations = []

        # Populate owner field based on self.cloud.owner
        if not self.owner:
            self.owner = self.cloud.owner

        self.clean_os_type()

        if self.monitoring.method not in config.MONITORING_METHODS:
            self.monitoring.method = config.DEFAULT_MONITORING_METHOD

    def clean_os_type(self):
        """Clean self.os_type"""
        if self.os_type not in self.OS_TYPES:
            for os_type in self.OS_TYPES:
                if self.os_type.lower() == os_type:
                    self.os_type = os_type
                    break
            else:
                self.os_type = 'unix'

    def delete(self):
        if self.expiration:
            self.expiration.delete()
        super(Machine, self).delete()
        mist.api.tag.models.Tag.objects(
            resource_id=self.id, resource_type='machine').delete()
        try:
            self.owner.mapper.remove(self)
        except (AttributeError, me.DoesNotExist) as exc:
            log.error(exc)
        try:
            if self.owned_by:
                self.owned_by.get_ownership_mapper(self.owner).remove(self)
        except (AttributeError, me.DoesNotExist) as exc:
            log.error(exc)

    def as_dict(self):
        # Return a dict as it will be returned to the API
        tags = {tag.key: tag.value for tag in mist.api.tag.models.Tag.objects(
            resource_id=self.id, resource_type='machine'
        ).only('key', 'value')}
        try:
            if self.expiration:
                expiration = {
                    'id': self.expiration.id,
                    'action': self.expiration.task_type.action,
                    'date': self.expiration.schedule_type.entry.isoformat(),
                    'notify': self.expiration.reminder and int((
                        self.expiration.schedule_type.entry -
                        self.expiration.reminder.schedule_type.entry
                    ).total_seconds()) or 0,
                }
            else:
                expiration = None
        except Exception as exc:
            log.error("Error getting expiration for machine %s: %r" % (
                self.id, exc))
            self.expiration = None
            self.save()
            expiration = None

        try:
            from bson import json_util
            extra = json.loads(json.dumps(self.extra,
                                          default=json_util.default))
        except Exception as exc:
            log.error('Failed to serialize extra metadata for %s: %s\n%s' % (
                self, self.extra, exc))
            extra = {}

        return {
            'id': self.id,
            'hostname': self.hostname,
            'public_ips': self.public_ips,
            'private_ips': self.private_ips,
            'name': self.name,
            'ssh_port': self.ssh_port,
            'os_type': self.os_type,
            'rdp_port': self.rdp_port,
            'machine_id': self.machine_id,
            'actions': {action: self.actions[action]
                        for action in self.actions},
            'extra': extra,
            'cost': self.cost.as_dict(),
            'state': self.state,
            'tags': tags,
            'monitoring':
                self.monitoring.as_dict() if self.monitoring and
                self.monitoring.hasmonitoring else '',
            'key_associations':
                [ka.as_dict() for ka in KeyMachineAssociation.objects(
                    machine=self)],
            'cloud': self.cloud.id,
            'location': self.location.id if self.location else '',
            'size': self.size.name if self.size else '',
            'image': self.image.id if self.image else '',
            'cloud_title': self.cloud.title,
            'last_seen': str(self.last_seen.replace(tzinfo=None)
                             if self.last_seen else ''),
            'missing_since': str(self.missing_since.replace(tzinfo=None)
                                 if self.missing_since else ''),
            'unreachable_since': str(
                self.unreachable_since.replace(tzinfo=None)
                if self.unreachable_since else ''),
            'created': str(self.created.replace(tzinfo=None)
                           if self.created else ''),
            'machine_type': self.machine_type,
            'parent': self.parent.id if self.parent is not None else '',
            'probe': {
                'ping': (self.ping_probe.as_dict()
                         if self.ping_probe is not None
                         else PingProbe().as_dict()),
                'ssh': (self.ssh_probe.as_dict()
                        if self.ssh_probe is not None
                        else SSHProbe().as_dict()),
            },
            'cores': self.cores,
            'network': self.network.id if self.network else '',
            'subnet': self.subnet.id if self.subnet else '',
            'owned_by': self.owned_by.id if self.owned_by else '',
            'created_by': self.created_by.id if self.created_by else '',
            'expiration': expiration,
            'provider': self.cloud.ctl.provider
        }

    def __str__(self):
        return 'Machine %s (%s) in %s' % (self.name, self.id, self.cloud)
class User(me.DynamicDocument):
    """Represents a user in the database."""
    _id = me.StringField(required=True, primary_key=True)
    # Generic data from auth0 normalized fields
    name = me.StringField(required=True)
    picture = me.URLField(required=True)
    user_id = me.StringField(required=True, unique=True)
    email = me.EmailField(required=True)
    email_verified = me.BooleanField(required=True, default=False)
    given_name = me.StringField()
    family_name = me.StringField()

    # Personalized information for the student profile to use
    picture_editable = me.EmbeddedDocumentField(File)
    given_name_editable = me.StringField()
    family_name_editable = me.StringField()

    # Identities are used to tell which service the user signed up with
    identities = me.EmbeddedDocumentListField(Identity)

    # Github related fields
    url = me.URLField()  # API URL
    html_url = me.URLField()  # PROFILE URL
    repos_url = me.URLField()

    # Generic information for portfolio
    description = me.StringField()
    tagline = me.StringField(max_length=280)
    skills = me.ListField(me.StringField(choices=const.skills))

    # Allows the user to store their schooling information
    education = me.EmbeddedDocumentListField(School)

    # Allows the user to store any awards they want displayed on their page
    awards = me.EmbeddedDocumentListField(Award)

    # Allows the user to store their previous work history
    work_history = me.EmbeddedDocumentListField(Work)

    # Portfolio holds a list of portfolio items for their page
    portfolio = me.EmbeddedDocumentListField(PortfolioItem)

    meta = {'collection': 'users'}

    def __repr__(self):
        """Default representation for the user object."""
        return '<User: {}>'.format(self.pk)

    def has_identity(self, provider):
        """Checks if the user has an identity from the given provider.

        Args:
            provider (str): The provider to check for.

        Returns:
            bool: If the user has an identity from the given provider.
        """
        return any(provider == identity['provider']
                   for identity in self.identities)

    @property
    def picture_normalized_url(self):
        return self.picture_editable.url if self.picture_editable is not None else self.picture

    @property
    def given_name_normalized(self):
        return self.given_name_editable if self.given_name_editable is not None else \
            (self.given_name if self.given_name is not None else self.name)

    @property
    def family_name_normalized(self):
        return self.family_name_editable if self.family_name_editable is not None else \
            (self.family_name if self.family_name is not None else None)

    @property
    def name_normalized(self):
        if self.given_name_editable is not None and self.family_name_editable is not None:
            return '{} {}'.format(self.given_name_editable,
                                  self.family_name_editable)
        elif self.given_name_editable is not None:
            return self.given_name_editable
        elif self.family_name_editable is not None:
            return self.family_name_editable
        else:
            return self.name

    @property
    def is_github_user(self):
        return self.has_identity('github')

    @property
    def is_google_user(self):
        return self.has_identity('google-oauth2')

    @property
    def github_identity(self):
        """Gets the user's github identity"""
        if self.is_github_user:
            return next(identity for identity in self.identities
                        if identity.provider == 'github')
        else:
            return None

    @staticmethod
    def search(name=None,
               school_name=None,
               work_position=None,
               description=None,
               skills=None,
               limit=25,
               offset=0):
        """ Used for default search queries. Any argument that is given as None will be ignored.

        Args:
            name (str): The name of the user to search for.
            school_name (str): The name of the school to search for.
            work_position (str): The name of the previous work position to search for.
            description (str): The phrase to look for in the description.
            skills (list): The list of skills to search for.
            limit (int): The number of users to return.
            offset (int): The number of users to skip.

        Returns:
            list: The list of users that fit the criteria and the number of possible users in the format (users, count).
        """

        # Build the search criteria to pass into the user search
        search = dict()
        if name is not None and name != '':
            search['name__icontains'] = name
        if school_name is not None and school_name != '':
            search['education__name__icontains'] = school_name
        if work_position is not None and work_position != '':
            search['work_history__position__icontains'] = work_position
        if description is not None and description != '':
            search['description__icontains'] = description
        if skills is not None and len(skills) > 0:
            # Loop through all the skills and add them to the search
            search['skills__all'] = skills

        # Perform the search
        users = User.objects(**search).all()
        return users[offset:limit + offset], users.count()

    def add_repo(self, url, old_project=None):
        """Adds a repo to the user's portfolio items.

        Args:
            url (str): The url to the repo.
            old_project (Repo): The old project with the repo in it. If set, the repo is updated rather than added.
        """

        # Only can add the repo if the user is a github user
        if self.is_github_user:
            new_repo = Repo(url, self.github_identity.user_id)
            if old_project is None:
                new_item = PortfolioItem(item_type='repo', repo=new_repo)
                self.portfolio.append(new_item)
                project_id = new_item._id
            # Update the repo
            else:
                old_project.repo = new_repo
                project_id = old_project._id

            self.save()
            return project_id
        else:
            raise exc.IdentityError(self.user_id, 'github')
Example #29
0
class Foo(mongoengine.Document):
    bars = mongoengine.EmbeddedDocumentListField(Bar)
Example #30
0
class Order(me.Document):
    ORDER_ACTIVE = 'active'
    ORDER_PROCESSED = 'processed'
    ORDER_COMPLETED = 'completed'
    ORDER_CANCELED = 'canceled'

    STATUS_CONSTANT = ((ORDER_CANCELED, 'order canceled'), (ORDER_COMPLETED,
                                                            'order completed'),
                       (ORDER_ACTIVE, 'order active'), (ORDER_PROCESSED,
                                                        'order processed'))

    REQUEST_TELEPHONE = 'request_telephone'
    REQUEST_NAME = 'request_name'
    LAST_REQUEST = ((REQUEST_TELEPHONE, 'request_telephone'), (REQUEST_NAME,
                                                               'request_name'))

    nom = me.IntField(min_value=1)
    date = me.DateTimeField(default=datetime.now())
    user = me.ReferenceField(User, reverse_delete_rule=me.DENY)
    sum = me.DecimalField(min_value=0, force_string=True, default=0)
    status = me.StringField(min_length=5,
                            choices=STATUS_CONSTANT,
                            default=ORDER_ACTIVE,
                            required=True)
    products = me.EmbeddedDocumentListField(Line_Order)
    name_recipients = me.StringField(min_length=3, max_length=255)
    telephone_recipients = me.StringField(min_length=10,
                                          max_length=12,
                                          regex='^[0-9]*$')
    last_request = me.StringField(min_length=5, choices=LAST_REQUEST)
    id_message_cart = me.ListField()

    def get_text_status_order(self):
        if self.status == Order.ORDER_CANCELED:
            return Text.get_body(Text.TEXT_ORDER_CANCELED)
        elif self.status == Order.ORDER_ACTIVE:
            return Text.get_body(Text.TEXT_ORDER_ACTIVE)
        elif self.status == Order.ORDER_COMPLETED:
            return Text.get_body(Text.TEXT_ORDER_COMPLETED)
        else:
            return Text.get_body(Text.TEXT_ORDER_PROCESSED)

    def add_count_in_line(self, num: int):
        line_product = self.products[num]
        line_product.count += 1
        line_product.sum = line_product.count * line_product.product.actual_price
        self.sum = self.get_sum_order()
        self.save()

    def sub_count_in_line(self, num: int):
        line_product = self.products[num]
        if line_product.count == 1:
            return
        line_product.count -= 1
        line_product.sum = line_product.count * line_product.product.actual_price
        self.sum = self.get_sum_order()
        self.save()

    def add_product_to_order(self, product: Product, count: int):
        try:
            line_product = self.products.get(product=product)
            line_product.count += count
            line_product.sum = line_product.count * product.actual_price
        except me.DoesNotExist:
            self.products.create(product=product,
                                 count=count,
                                 sum=count * product.actual_price)
        self.sum = self.get_sum_order()
        self.save()

    def get_sum_order(self):
        # Не працює функція сум для такого поля. Мінімум -  працює, по полю кількості - працює, а по сумі ні.
        # Тулитиму костиль
        # sums = Order.objects(id=self.id).aggregate([
        #     {'$unwind': '$products'},
        #     {'$group': {'_id': '$_id', 'sum_products': {'$sum': '$products.sum'}}}
        # ])
        # if sums.alive:
        #     elem = sums.next()
        #     print(elem['sum_products'])
        #     return elem['sum_products']
        # else:
        #     return 0
        total_sum = 0
        for product in self.products:
            total_sum += product.sum
        return total_sum

    @classmethod
    def find_active_order(cls, user: User):
        try:
            order = cls.objects().get(
                Q(user=user) & Q(status=Order.ORDER_ACTIVE))
        except me.DoesNotExist:
            order = None
        return order

    @classmethod
    def get_max_num_orders(cls, user: User):
        max_num = cls.objects(user=user).aggregate([{
            '$group': {
                '_id': '$user',
                'max_num': {
                    '$max': '$nom'
                }
            }
        }])
        if max_num.alive:
            elem = max_num.next()
            return elem['max_num']
        else:
            return 0

    @classmethod
    def get_count_orders(cls, user: User):
        return cls.objects(user=user).count()

    @classmethod
    def create_order(cls, user: User):
        return cls.objects.create(user=user,
                                  nom=cls.get_max_num_orders(user) + 1)

    @classmethod
    def get_active_order(cls, user: User) -> 'Order':
        active_orders = cls.find_active_order(user)
        if not active_orders:
            active_orders = cls.create_order(user)
        return active_orders

    @classmethod
    def get_count_products_in_active_order(cls, user):
        sums = cls.objects(Q(user=user)
                           & Q(status=Order.ORDER_ACTIVE)).aggregate([{
                               '$unwind':
                               '$products'
                           }, {
                               '$group': {
                                   '_id': '$_id',
                                   'count_products': {
                                       '$sum': '$products.count'
                                   }
                               }
                           }])
        if sums.alive:
            elem = sums.next()
            return elem['count_products']
        else:
            return 0