Beispiel #1
0
class LinearClassifierModel(LinUCBPredictorModel):

    true_positives = fields.NumField(db_field='tp', default=0)
    false_positives = fields.NumField(db_field='fp', default=0)
    true_negatives = fields.NumField(db_field='tn', default=0)
    false_negatives = fields.NumField(db_field='fn', default=0)
    auc = fields.NumField(db_field='auc')

    @property
    def classifier_class(self):
        "So we can easily plugin other classifier classes if we want."
        from solariat_bottle.db.predictors.classifiers import PassiveAggresiveClassifier, QuantileGradientBoostingClassifier
        return QuantileGradientBoostingClassifier

    def reset_performance_stats(self):
        self.true_positives = 0
        self.false_positives = 0
        self.true_negatives = 0
        self.false_negatives = 0
        self.save()

    def score(self, filtered_context, filtered_actions):
        return self.clf.score(filtered_context, filtered_actions)

    def to_json(self, fields_to_show=None):
        base_json = super(LinearClassifierModel,
                          self).to_json(fields_to_show=fields_to_show)
        base_json['perfomance_metrics'] = self.performance_metrics
        base_json['quality'] = [dict(measure='AUC', score=self.auc)]
        return base_json

    @property
    def performance_metrics(self):
        precision = 'NaN'
        if self.true_positives or self.false_positives:
            precision = self.true_positives / float(self.true_positives +
                                                    self.false_positives)
            precision = float("%.2f" % precision)

        recall = 'NaN'
        if self.true_positives or self.false_negatives:
            recall = self.true_positives / float(self.true_positives +
                                                 self.false_negatives)
            recall = float("%.2f" % recall)

        return "Precision: %s;  Recall: %s, TP: %s, FP: %s, FN: %s, TN: %s" % (
            precision, recall, self.true_positives, self.false_positives,
            self.false_negatives, self.true_negatives)

    def class_validity_check(self, values, min_samples_thresould):
        return len(values) >= min_samples_thresould and len(set(values)) > 1
Beispiel #2
0
class LinearRegressorModel(LinUCBPredictorModel):

    avg_error = fields.NumField(db_field='avg', default=0)
    nr_scores = fields.NumField(db_field='nr_scores', default=0)
    rmse = fields.NumField(db_field='rmse')
    mse = fields.NumField(db_field='mse')  # mean square error
    mae = fields.NumField(db_field='mae')  # mean absolute error
    r2_score = fields.NumField(db_field='re')
    fraction_below_quantile = fields.NumField(db_field='fbq')

    @property
    def classifier_class(self):
        "So we can easily plugin other classifier classes if we want."
        from solariat_bottle.db.predictors.classifiers import PassiveAggresiveRegressor, QuantileGradentBoostingRegressor
        return QuantileGradentBoostingRegressor

    @property
    def performance_metrics(self):
        return "Avg Error: %.2f;  Number of Predictions: %s" % (self.avg_error,
                                                                self.nr_scores)

    def reset_performance_stats(self):
        self.avg_error = 0
        self.nr_scores = 0
        self.save()

    def to_json(self, fields_to_show=None):
        base_json = super(LinearRegressorModel,
                          self).to_json(fields_to_show=fields_to_show)
        base_json['perfomance_metrics'] = self.performance_metrics
        base_json['quality'] = [
            dict(measure='RMSE', score=self.rmse),
            dict(measure='MSE', score=self.mse),
            dict(measure='FBQ', score=self.fraction_below_quantile),
            dict(measure='MAE', score=self.mae),
            dict(measure='R2S', score=self.r2_score)
        ]
        return base_json

    def class_validity_check(self, values, min_samples_thresould):
        return len(values) >= min_samples_thresould
class StateVector(SonDocument):
    time_stamp = fields.DateTimeField(db_field='ts', required=True)

    # Discrete status value from Conversation Model
    status     = fields.NumField(db_field='ss', required=True)

    # The resulting estimate of satisfaction of the contact. Want this to be improving!
    satisfaction  = fields.NumField(db_field='se', default=0.0)

    ###### Features of Interest ###########################################
    # These can be expanded as needed. Idea is that we will track the state
    # elements that help discriminate reasonable score updates with each event

    # Contact Stats.
    contact_post_count_total = fields.NumField(db_field='ct', default=0)
    contact_post_count_last  = fields.NumField(db_field='cl', default=0)
    contact_post_ts          = fields.DateTimeField(db_field='cs', default=NEVER)

    # Brand Stats
    brand_post_count_total   = fields.NumField(db_field='bt', default=0)
    brand_post_count_last    = fields.NumField(db_field='bl', default=0)
    brand_post_ts            = fields.DateTimeField(db_field='ls', default=NEVER)
    intentions               = fields.ListField(fields.NumField(), db_field='in', default=[])
    speaker                  = fields.StringField(db_field='s', default=None)

    # The direction of the last post
    direction                = fields.StringField(db_field='d', default="UNNOWN") 

    @property
    def key(self):
        ''' The key is based only on time. They should be unique in a conversation. '''
        return "%d:%d:%d:%d:%d" % (
            self.time_stamp.year,
            self.time_stamp.month,
            self.time_stamp.day,
            self.time_stamp.hour,
            self.time_stamp.second)

    @property
    def next_speaker(self):
        ''' Prediction for next speaker '''
        assert self.status not in TERMINAL_STATUSES

        if self.status in [WAITING, HOLDING]:
            return BRAND

        return CONTACT

    def __str__(self):
        return "%d:%s" % (
            self.status,
            self.key)

    def __eq__(self, other):
        if other and isinstance(other, StateVector):
            '''
            Had to resort to this string based comparison because the
            date time check was failing for reasons I got fed up trying
            to figure out!
            '''
            return self.key == other.key

        return False

    def is_valid(self):
        '''
        Validation criteria for the state vector. Can add to this...
        '''
        # It is not a valid state if it is in the initial category. This is because
        # we will only be creating a conversation with a post, and we should always
        # be able to determine one of the alternate states based on that.
        if self.status == INITIAL:
            return False

        return xor(self.brand_post_count_last == 0 and self.contact_post_count_last == 0,
                   xor(self.brand_post_count_last > 0, self.contact_post_count_last > 0))

    @classmethod
    def default(cls, status=INITIAL):
        return cls(status=status, time_stamp=timeslot.now())

    def get_intention_key(self):
        if self.status in TERMINAL_STATUSES:
            key = "TERMINATED"
        elif self.intentions:
            key = tuple(SATYPE_ID_TO_NAME_MAP[str(x)] for x in self.intentions)
            key = " ".join(key)
            key = "%s:%s:%s:%s" % (self.direction, self.speaker, key, STATUS_LABELS[self.status])
            key = key.upper()
        else:
            key = ""
        return key
class ConversationStateMachine(Document):
    '''
    The conversation state machine integrates the logic for modeling dialogs
    and assessing call quality. The basic idea is that it captures a sequence
    of state changes.
    '''

    ''' Primitives for state handling '''
    def _get_state(self):
        if self.state_history == []:
            return StateVector.default()
        return self.state_history[-1]

    def _set_state(self, state_vector):
        assert state_vector.is_valid(), state_vector.status
        self.state_history.append(state_vector)

    channel           = fields.ReferenceField(Channel, db_field='cl', required=False)
    state_history     = fields.ListField(fields.EmbeddedDocumentField(StateVector), db_field='h')
    policy_name       = fields.StringField(db_field='py', default="DEFAULT")
    state             = property(_get_state, _set_state)
    quality_score     = fields.NumField(db_field='qy', default=0.0)

    @property
    def policy(self):
        return POLICY_MAP.get(self.policy_name, SimplePolicy())

    @property
    def terminated(self):
        return self.state.status in TERMINAL_STATUSES

    @property
    def next_speaker(self):
        ''' Prediction of who should speak next '''
        return self.state.next_speaker

    def handle_clock_tick(self, time_stamp=None):
        '''
        Clock tick events sent to govern termination of in-progress
        conversations
        '''
        if time_stamp == None:
            time_stamp = dt.now()
        # time_stamp = timeslot.utc(time_stamp)

        # If terminated there is nothing to do
        if self.terminated: 
            return

        # Should be later than the last state update
        assert time_stamp >= self.state.time_stamp

        # If we can terminate, do so. Termination will change the state. But
        # we will ignore it if it does not impact termination. In the future we
        # could change this to update the state incrementally and trigger
        # an alert of some kind.
        if self.policy.should_be_terminated(self.state, time_stamp) == True:
            current_state           = self.state
            new_state               = self.policy.get_final_state(self.state, time_stamp)
            new_state.satisfaction  = self.policy.calc_final_satisfaction(current_state, new_state)
            self.state              = new_state
            result                  = self.policy.calc_scores(self.state_history)
            self.quality_score      = result["quality_score"]
            self.quality_score_rounded = result["quality_score_rounded"]
            self.quality_star_score = result["quality_star_score"]
            self.quality_label      = result["quality_label"]
            self.customer_satisfaction = result["customer_satisfaction"]

    def handle_post(self, post=None, vector=None):
        if post:
            s = "HANDLE POST: %s" % post.content
            print s.encode("utf-8")
        vector = make_post_vector(post, self.channel) if post else vector
        self.state = self.policy.update_state_with_post(self.state, vector)
        # self.save()

    def save(self, **kw):
        assert False, "Lets try to apply CSM without saving CSM entities to mongodb"
        assert self.state_history != [], "Must have a state"
        assert self.state.status != INITIAL
        super(ConversationStateMachine, self).save(**kw)

    def handle_conversation(self, conversation, time_stamp=None):
        for post in conversation.query_posts():
            self.handle_post(post)
        self.handle_clock_tick(time_stamp=time_stamp)

    def get_conversation_quality(self, conversation, time_stamp=None):
        self.handle_conversation(conversation, time_stamp)
        return self.get_quality_label()

    def get_quality_label(self):
        """ At this point we know the conversation score (in range [-1, 1]),
        this function should map the score to verbose value
        """
        assert self.terminated
        return ConversationQualityTrends.CATEGORY_MAP_INVERSE[int(self.quality_score)+1]
Beispiel #5
0
class Group(Document):
    name = fields.StringField(required=True)
    account = fields.ReferenceField('Account', db_field='acnt')
    description = fields.StringField()
    members = fields.ListField(fields.ReferenceField('User'))
    roles = fields.ListField(fields.NumField(choices=USER_ROLES.keys()), db_field='ur')

    channels = fields.ListField(fields.ReferenceField('Channel'), db_field='chs')
    smart_tags = fields.ListField(fields.ReferenceField('Channel'), db_field='sts')
    journey_types = fields.ListField(fields.ReferenceField('JourneyType'), db_field='jty')
    journey_tags = fields.ListField(fields.ReferenceField('JourneyTag'), db_field='jtg')
    funnels = fields.ListField(fields.ReferenceField('Funnel'), db_field='fnl')
    predictors = fields.ListField(fields.ReferenceField('BasePredictor'), db_field='prd')

    manager = GroupManager

    def save(self, **kw):
        # check that no other groups exist with the same combination of
        # name and account
        name = kw.get('name', self.name)
        account = kw.get('account', self.account)
        for g in Group.objects.find(
                name=name,
                account=account):
            if not self.id == g.id:
                raise AppException('A group with same name exists for this account')
        super(Group, self).save(**kw)

    def to_dict(self):
        from solariat.utils.timeslot import datetime_to_timestamp_ms
        return {'id': str(self.id),
                'created_at': datetime_to_timestamp_ms(self.created),
                'members': [str(_.id) for _ in self.members],
                'smart_tags': [str(_.id) for _ in self.smart_tags],
                'channels': [str(_.id) for _ in self.channels],
                'journey_types': [str(_.id) for _ in self.journey_types],
                'journey_tags': [str(_.id) for _ in self.journey_tags],
                'funnels': [str(_.id) for _ in self.funnels],
                'predictors': [str(_.id) for _ in self.predictors],
                'roles': self.roles,
                'name': self.name,
                'description': self.description,
                'members_total': self.members_total}

    @staticmethod
    def analysts(account):
        """ Return a dict representation of a default group for all analysts of an account """
        from solariat.utils.timeslot import datetime_to_timestamp_ms, now
        return {'id': default_analyst_group(account),
                'created_at': datetime_to_timestamp_ms(now()),
                'members': [],
                'smart_tags': [],
                'channels': [],
                'journey_types': [],
                'journey_tags': [],
                'funnels': [],
                'predictors': [],
                'roles': [ANALYST],
                'name': 'All Analysts of account %s' % account.name,
                'description': 'All Analysts of account %s' % account.name,
                'members_total': 'N/A'}

    @staticmethod
    def agents(account):
        """ Return a dict representation of a default group for all agents of an account """
        from solariat.utils.timeslot import datetime_to_timestamp_ms, now
        return {'id': default_agent_group(account),
                'created_at': datetime_to_timestamp_ms(now()),
                'members': [],
                'smart_tags': [],
                'channels': [],
                'journey_types': [],
                'journey_tags': [],
                'funnels': [],
                'predictors': [],
                'roles': [AGENT],
                'name': 'All Agents of account %s' % account.name,
                'description': 'All Agents of account %s' % account.name,
                'members_total': 'N/A'}

    def _role_check(self, member, removed_roles):
        remaining_group_roles = set([role for role in self.roles if role not in removed_roles])
        remaining_user_roles = set([role for role in member.user_roles if role not in removed_roles])
        if not remaining_user_roles.intersection(remaining_group_roles):
            return False
        return True

    def update(self, user, name, description, members, roles, channels, smart_tags=None,
                journey_types=None, journey_tags=None, funnels=None, predictors=None):
        # First, handle any changes in roles. If new ones were added, we automatically need
        # to add extra members. If any were removed, we need to remove batch of members
        from solariat_bottle.db.user import User
        from solariat_bottle.db.channel.base import Channel
        from solariat_bottle.db.journeys.journey_type import JourneyType
        from solariat_bottle.db.journeys.journey_tag import JourneyTag
        from solariat_bottle.db.funnel import Funnel
        from solariat_bottle.db.predictors.base_predictor import BasePredictor

        o_roles = [int(role) for role in roles]
        new_roles = [role for role in o_roles if role not in self.roles]
        removed_roles = [role for role in self.roles if role not in o_roles]
        # Some users have implicit access due to their role. Check if we need to remove this
        # based on the role we just set
        full_user_access = User.objects.find(groups__in=[self.id])
        removed_members = [member for member in full_user_access if not self._role_check(member, removed_roles)]
        # For member that were a part only because of a role on this group, remove them now
        for member in removed_members:
            if self.id in member.groups:
                member.groups.remove(self.id)
                member.save()
        # For new members that would have access because of the role of the group, add group
        for new_member in User.objects.find(account=self.account, user_roles__in=new_roles):
            if self.id not in new_member.groups:
                new_member.groups.append(self.id)
                new_member.save()
        # Now for actual hard specified members, also add group
        user_ids = [User.objects.get(u_id).id for u_id in members]
        User.objects.coll.update({'_id': {'$in': user_ids}},
                                 {'$addToSet': {User.groups.db_field: self.id}},
                                 multi=True)

        new_channels = [channel for channel in channels if channel not in self.channels]
        removed_channels = []
        for channel in self.channels:
            if str(channel.id) not in channels:
                removed_channels.append(channel.id)
        # Remove acl permissions for removed channels
        for channel in Channel.objects.find(id__in=[ObjectId(c_id) for c_id in removed_channels]):
            if channel.is_inbound:
                channel = get_service_channel(channel) or channel
            channel.del_perm(user, group=self, to_save=True)
        # Update acl for objects which this group was given access to
        for channel in Channel.objects.find(id__in=[ObjectId(c_id) for c_id in new_channels]):
            if channel.is_inbound:
                channel = get_service_channel(channel) or channel
            channel.add_perm(user, group=self, to_save=True)

        if smart_tags:
            new_tags = [tag for tag in smart_tags if tag not in self.smart_tags]
            removed_tags = []
            for tag in self.smart_tags:
                if str(tag.id) not in smart_tags:
                    removed_tags.append(tag.id)
            # Remove acl permissions for removed smart_tags
            for tag in Channel.objects.find(id__in=[ObjectId(c_id) for c_id in removed_tags]):
                tag.del_perm(user, group=self, to_save=True)
            # Update acl for objects which this group was given access to
            for tag in Channel.objects.find(id__in=[ObjectId(c_id) for c_id in new_tags]):
                tag.add_perm(user, group=self, to_save=True)

        if journey_types:
            saved_journey_types = set(str(_.id) for _ in self.journey_types)
            new_journey_types = set(journey_types) - saved_journey_types
            removed_journey_types = saved_journey_types - set(journey_types)
            for jty in JourneyType.objects.find(id__in=new_journey_types):
                jty.add_perm(user, group=self, to_save=True)
            for jty in JourneyType.objects.find(id__in=removed_journey_types):
                jty.del_perm(user, group=self, to_save=True)

        if journey_tags:
            saved_journey_tags = set(str(_.id) for _ in self.journey_tags)
            new_journey_tags = set(journey_tags) - saved_journey_tags
            removed_journey_tags = saved_journey_tags - set(journey_tags)
            for jtg in JourneyTag.objects.find(id__in=new_journey_tags):
                jtg.add_perm(user, group=self, to_save=True)
            for jtg in JourneyTag.objects.find(id__in=removed_journey_tags):
                jtg.del_perm(user, group=self, to_save=True)

        if funnels:
            saved_funnels = set(str(_.id) for _ in self.funnels)
            new_funnels = set(funnels) - saved_funnels
            removed_funnels = saved_funnels - set(funnels)
            for fnl in Funnel.objects.find(id__in=new_funnels):
                fnl.add_perm(user, group=self, to_save=True)
            for fnl in Funnel.objects.find(id__in=removed_funnels):
                fnl.del_perm(user, group=self, to_save=True)

        if predictors:
            saved_predictors = set(str(_.id) for _ in self.predictors)
            new_predictors = set(predictors) - saved_predictors
            removed_predictors = saved_predictors - set(predictors)
            for prd in BasePredictor.objects.find(id__in=new_predictors):
                prd.add_perm(user, group=self, to_save=True)
            for prd in BasePredictor.objects.find(id__in=removed_predictors):
                prd.del_perm(user, group=self, to_save=True)

        # Update members which are part of this group
        '''user_ids = [user.objects.get(u_id).id for u_id in members]
        user.objects.coll.update({'_id': {'$in': user_ids}},
                                 {'$addToSet': {user.__class__.groups.db_field: self.id}},
                                 multi=True)'''

        self.name = name
        self.description = description
        self.members = members
        self.roles = [int(r) for r in roles]
        self.channels = channels
        if smart_tags:
            self.smart_tags = smart_tags
        if journey_types:
            self.journey_types = journey_types
        if journey_tags:
            self.journey_tags = journey_tags
        if funnels:
            self.funnels = funnels
        if predictors:
            self.predictors = predictors
        self.save()


    def add_user(self, user, perms='r'):
        """
        Wrapper for add_perms that accepts `user` parameter
        either as email string or object.
        """
        from ..db.account import _get_user
        user = _get_user(user)
        if user:
            user.update(addToSet__groups=self.id)
            if not user in self.members:
                self.members.append(user)
                self.save()
            return True
        else:
            return False

    def can_edit(self, user):
        return user.is_admin or user.is_staff

    def del_user(self, user, perms='rw'):
        from ..db.account import _get_user
        user = _get_user(user)
        if user:
            user.update(pull__groups=str(self.id))
            if user in self.members:
                #self.members.remove(user)  # fails in ListBridge
                self.members = filter(lambda x:x.id!=user.id, list(self.members))
                self.save()
            return True
        else:
            return False

    def clear_users(self):
        from solariat_bottle.db.user import User
        u_ids = [u.id for u in User.objects.find(groups__in=[self.id])]
        User.objects.coll.update({'_id': {'$in': u_ids}},
                                 {'$pull': {User.groups.db_field: self.id}},
                                 multi=True)

    def get_all_users(self):
        """
        Return list of users that have access to account.
        """
        from solariat_bottle.db.user import User
        return User.objects.find(groups__in=[self.id])[:]

    def get_users(self, current_user):
        return [u for u in self.get_all_users() if u != current_user]

    @property
    def members_total(self):
        from solariat_bottle.db.user import User
        return User.objects(groups__in=[self.id]).count()

    def __unicode__(self):
        return self.name
Beispiel #6
0
 class Doc(Document):
     journey_id = fields.ObjectIdField()
     created_at = fields.DateTimeField(default=now)
     nps = fields.NumField()
     status = fields.NumField(choices=[0, 1, 2])