Esempio n. 1
0
class ExtendedEmbeddedStats(EmbeddedStatsBase):
    ''' This is a structure to handle all the stats we need within
    a slot. It will be used as an embedded document in a list field. It is designed
    to support faceted analysis of data. So we have an underlying document list
    embedded in a slot.
    '''
    # Filter Criteria
    agent = fields.NumField(db_field='at', default=0)
    is_leaf = fields.BooleanField(db_field='if', default=True)
    intention = fields.NumField(db_field='in', default=0)
    language = fields.NumField(db_field='le', default=Lang.ALL)

    # Metrics
    topic_count = fields.NumField(db_field='tt', default=0)

    countable_keys = [
        'topic_count'
    ]  # There are the keys which are keeping counts, so we know what to
    # go over in increment process.
    comparable_keys = ['agent', 'is_leaf', 'intention', 'language']

    def __str__(self):
        return "%s(agent=%s, is_leaf=%-5s, intention='%s', language='%s', topic_count=%d)" % (
            self.__class__.__name__, self.agent, bool(self.is_leaf),
            SATYPE_ID_TO_NAME_MAP.get(str(self.intention), self.intention),
            get_lang_code(self.language), self.topic_count)
Esempio n. 2
0
class ReportsEmbeddedStats(EmbeddedStats):
    # Additional metrics for Reports
    response_volume = fields.NumField(db_field='rv', default=0)
    response_time = fields.NumField(db_field='rt', default=0)
    post_count = fields.NumField(db_field='pt', default=0)

    countable_keys = ['response_volume', 'response_time', 'post_count']
Esempio n. 3
0
class QueuedHistoricData(Document):
    DATASIFT_DEFAULT = 0
    TWITTER_API_DM = 1
    SOLARIAT_POST_DATA = 2
    TWITTER_API_PUBLIC = 3

    DATA_FORMATS = (DATASIFT_DEFAULT, TWITTER_API_DM, SOLARIAT_POST_DATA)

    subscription = fields.ReferenceField(BaseHistoricalSubscription,
                                         db_field='sub')
    timestamp = fields.NumField(db_field='tsp')
    post_data = fields.StringField(db_field='pd')
    post_data_format = fields.NumField(choices=DATA_FORMATS,
                                       default=DATASIFT_DEFAULT,
                                       db_field='fmt')

    indexes = [('subscription', 'timestamp')]

    @property
    def solariat_post_data(self):
        data = json.loads(self.post_data)
        transform = {
            self.SOLARIAT_POST_DATA: lambda x: x,
            self.DATASIFT_DEFAULT: datasift_to_post_dict,
            self.TWITTER_API_DM: twitter_dm_to_post_dict,
            self.TWITTER_API_PUBLIC: twitter_status_to_post_dict
        }[self.post_data_format]
        try:
            data = transform(data)
        except KeyError:
            data['_transform_error'] = True
        return data
Esempio n. 4
0
class _ScheduledTaskDoc(Document):

    collection = 'ScheduledTask'
    started_time = fields.DateTimeField()
    last_run = fields.DateTimeField()
    next_run = fields.DateTimeField()
    _interval = fields.NumField()
    state = fields.NumField()

    def get_interval(self):
        return timedelta(milliseconds=int(self._interval) * 1000)

    def __init__(self, data=None, **kwargs):

        interval = kwargs.get('interval', None)
        kwargs.pop('interval', None)
        super(_ScheduledTaskDoc, self).__init__(data, **kwargs)

        if data is None:
            if not isinstance(interval, timedelta):
                raise Exception(
                    "Interval should be an instance of 'timedelta'")
            self._interval = interval.total_seconds()
            self.started_time = init_with_default(kwargs, 'started_time',
                                                  datetime.utcnow())
            self.last_run = init_with_default(kwargs, 'last_run',
                                              self.started_time)
            self.state = init_with_default(kwargs, 'state',
                                           TaskStateEnum.WAIT_NEXT)
            self.set_next_run()

    def set_next_run(self):
        self.last_run = datetime.utcnow()
        self.next_run = self.last_run + self.get_interval()
        self.save()
Esempio n. 5
0
class EmbeddedStats(EmbeddedStatsBase):
    ''' This is a structure to handle short stats we need within
    a slot to show in reports. It is a lean version of ExtendedEmbeddedStats.
    '''
    agent = fields.NumField(db_field='at', default=0)
    language = fields.NumField(db_field='le', default=Lang.ALL)

    comparable_keys = ['agent', 'language']
Esempio n. 6
0
class FollowerTrackingStatus(Document):
    channel = fields.ObjectIdField(db_field='cl')
    twitter_handle = fields.StringField(db_field='th')
    followers_count = fields.NumField(default=0, db_field='fc')
    followers_synced = fields.NumField(default=0, db_field='fs')
    sync_status = fields.StringField(default='idle',
                                     db_field='sy',
                                     choices=('idle', 'sync'))

    indexes = [Index(('channel', 'twitter_handle'), unique=True)]
Esempio n. 7
0
class EventLog(AuthDocument):
    "Store informmation about variouse events in db"
    type_id = fields.NumField(required=True, db_field='ti')
    name = fields.NameField(required=True, db_field='ne')
    timestamp = fields.NumField(default=time.time)
    ip_address = fields.StringField(db_field='ia', default=get_remote_ip)
    user = fields.StringField(default='anonymous', db_field='ur')
    account = fields.StringField(default='solariat', db_field='at')
    note = fields.StringField(db_field='nte')
    extra_info = fields.DictField(db_field='ei')
Esempio n. 8
0
class TwitterRateLimit(Document):
    manager = TwitterRateLimitManager

    id = fields.StringField(db_field='_id')
    remaining = fields.NumField()
    limit = fields.NumField()
    reset = fields.NumField()
    delay = fields.NumField()

    def is_manual(self):
        if self.delay is not None:
            return True
        return False
Esempio n. 9
0
class BaseScore(AuthDocument):

    created = fields.DateTimeField(default=now)
    matching_engine = fields.ObjectIdField()
    model_id = fields.ObjectIdField(null=True)
    counter = fields.NumField(default=1)
    cumulative_latency = fields.NumField(required=True)

    indexes = [
        ('matching_engine', 'created'),
    ]

    @property
    def latency(self):
        return 1.0 * self.cumulative_latency / (self.counter or 1)
Esempio n. 10
0
class ModelMixin(object):

    packed_clf = fields.BinaryField()  # WARNING: 2MB limit!
    counter = fields.NumField(default=0)  # Use to track iterations

    configuration = fields.DictField()

    @property
    def classifier_class(self):
        "So we can easily plugin other classifier classes if we want."
        return None

    @property
    def clf(self):
        if not hasattr(self, '_clf') or not self._clf:
            kwargs = dict()
            if self.packed_clf:
                kwargs['model'] = self.packed_clf

            if self.configuration:
                kwargs.update(self.configuration)

            if hasattr(self, 'model_type'):
                kwargs['model_type'] = self.model_type
            self._clf = self.classifier_class(**kwargs)

        return self._clf

    def pack_model(self):
        # make sure we also save classifier state (pickled and zipped)
        #print 'save(): _clf=%r' % self._clf
        self.packed_clf = self.clf.packed_model
        self.counter += 1
Esempio n. 11
0
class LocalModelsMixin(object):

    # packed_clf = fields.BinaryField()  # WARNING: 2MB limit!
    clf_map = fields.DictField()
    counter = fields.NumField(default=0)  # Use to track iterations

    configuration = fields.DictField()

    @property
    def classifier_class(self):
        "So we can easily plugin other classifier classes if we want."
        return None

    @property
    def clf(self):
        if not hasattr(self, '_clf') or not self._clf:
            self._clf = self.classifier_class(predictor_model=self)
        return self._clf

    def delete(self, *args, **kwargs):
        from solariat_bottle.db.predictors.base_predictor import LocalModel
        LocalModel.objects.remove(predictor_model=self)

    def delete_local_models(self):
        from solariat_bottle.db.predictors.base_predictor import LocalModel
        LocalModel.objects.remove(predictor_model=self)
Esempio n. 12
0
class FAQDocumentInfo(ArchivingAuthDocument):
    collection = 'FAQDocInfo'

    channel = fields.ReferenceField('Channel', db_field='ch')
    _answer_df = fields.StringField()
    _query_df = fields.StringField()
    _stemmer = fields.StringField()
    query_count = fields.NumField()

    indexes = [('channel'), ]
    # indexes = [('channel'), (('_query_df', TEXT), None, 'english')]

    def __get_answer_df(self):
        return json.loads(self._answer_df)

    def __set_answer_df(self, answer_df):
        self._answer_df = json.dumps(answer_df)

    answer_df = property(__get_answer_df, __set_answer_df)

    def __get_query_df(self):
        return json.loads(self._query_df)

    def __set_query_df(self, query_df):
        self._query_df = json.dumps(query_df)

    query_df = property(__get_query_df, __set_query_df)

    def __get_stemmer(self):
        return json.loads(self._stemmer)

    def __set_stemmer(self, stemmer):
        self._stemmer = json.dumps(stemmer)

    stemmer = property(__get_stemmer, __set_stemmer)
Esempio n. 13
0
class NPSOutcome(Post):

    manager = NPSOutcomeManager
    PROFILE_CLASS = NPSProfile

    case_number = fields.StringField(db_field='cr', required=True)
    response_type = fields.StringField(db_field='rp', required=True)
    score = fields.NumField(db_field='se', required=True)
    profile_data = fields.DictField(db_field='pd')

    indexes = [('response_type', ), ('_created', )]

    @property
    def computed_tags(self):
        return list(
            set(self._computed_tags +
                [str(smt.id)
                 for smt in self.accepted_smart_tags] + self.assigned_tags))

    @classmethod
    def gen_id(cls,
               is_inbound,
               actor_id,
               _created,
               in_reply_to_native_id,
               parent_event=None):
        actor_num = cls.get_actor(True, actor_id).actor_num
        packed = pack_event_id(actor_num, _created)
        return packed

    def to_dict(self, fields2show=None):
        base_dict = super(NPSOutcome, self).to_dict(fields2show=fields2show)
        base_dict.pop('profile_data')
        return base_dict
Esempio n. 14
0
class PredictorModelData(SonDocument):
    """Embedded model information to be used in Predictor
    """
    model_id = fields.ObjectIdField()  # reference to PredictorModel

    # denormalized from PredictorModel
    display_name = fields.StringField()
    weight = fields.NumField()
    task_data = fields.EmbeddedDocumentField(TaskData)

    @staticmethod
    def _get_model_data(model):
        return dict(model_id=model.id,
                    display_name=model.display_name,
                    weight=model.weight,
                    task_data=model.task_data)

    @classmethod
    def init_with_model(cls, model):
        return cls(**cls._get_model_data(model))

    def sync_with_model_instance(self, model):
        self.__dict__.update(self._get_model_data(model))

    def __eq__(self, other):
        return isinstance(other, self.__class__) and other.model_id == self.model_id

    def __hash__(self):
        return hash(str(self.model_id))
Esempio n. 15
0
class FacebookRateLimitInfo(Document):
    access_token = fields.StringField()
    failed_request_time = fields.DateTimeField()
    error_code = fields.NumField(null=True, choices=FB_RATE_LIMIT_ERRORS + [None])
    path = fields.StringField()
    wait_until = fields.DateTimeField()
    channel = fields.StringField()
    log_item = fields.ObjectIdField()

    indexes = [('token', 'error_code')]
    manager = FacebookRateLimitInfoManager
    LIMITS_CONFIG = {
        THROTTLING_USER: BackOffStrategy(30*60, 30*60, 1.0),
        THROTTLING_APP: BackOffStrategy(225, 60*60, 2.0),
        ERROR_MISUSE: BackOffStrategy(60 * 60, 24 * 60 * 60, 3.0),
        THROTTLING_API_PATH: BackOffStrategy(60, 60*60, 2.0)
    }

    @property
    def wait_time(self):
        return (utc(self.wait_until) - utc(self.failed_request_time)).total_seconds()

    @property
    def remaining_time(self):
        return (utc(self.wait_until) - now()).total_seconds()

    @property
    def exc(self):
        return FacebookRateLimitError(
            code=self.error_code,
            remaining_time=self.remaining_time,
            path=self.path)
Esempio n. 16
0
class PostFilter(Document):
    '''
    Internal Structure representing the integartion
    data structure with a data stream provider.
    '''
    filter_type_id = fields.NumField(db_field='fd', choices=FILTER_TYPE_IDS)

    # How many entries
    entry_count = fields.NumField(db_field='et', default=0)

    # How many more entries can you handle
    spare_capacity = fields.NumField(db_field='sy',
                                     default=POSTFILTER_CAPACITY)

    datasift_hash = fields.StringField(db_field='dh')

    last_update = fields.DateTimeField(db_field='lu', default=datetime.now())
    last_sync = fields.DateTimeField(db_field='ls')

    def _update_item(self, n):
        self.update(inc__entry_count=n,
                    inc__spare_capacity=-n,
                    set__last_update=datetime.now())

    def add_item(self):
        ''' Increment counters'''
        self._update_item(1)

    def remove_item(self):
        ''' Decrement counters or remove if empty '''
        if self.entry_count >= 2:
            self._update_item(-1)
        else:
            self.delete()

    def set_datasift_hash(self, datasift_hash):
        " set atomically datasift hash and update last_sync "

        return self.objects.coll.find_and_modify(
            query={'_id': self.id},
            update={
                '$set': {
                    self.fields['datasift_hash'].db_field: datasift_hash,
                    self.fields['last_sync'].db_field: datetime.now()
                }
            },
            new=True)
Esempio n. 17
0
class FooBar(Document):
    name = fields.StringField(db_field='nm')
    status = fields.StringField(db_field='stts', choices=['active', 'deactivated', 'suspended'])
    counter = fields.NumField(db_field='cntr')
    created_at = fields.DateTimeField(db_field='crtd')
    updated_at = fields.DateTimeField(db_field='updtd')
    active = fields.BooleanField(db_field='actv')
    stages = fields.ListField(fields.StringField(), db_field='stgs')
Esempio n. 18
0
class ABCMultiClassPredictor(AuthDocument):

    collection = 'ABCMultiPreditor'

    abc_predictors = fields.ListField(
        fields.ObjectIdField())  # Just a grouping of binary predictors
    inclusion_threshold = fields.NumField(default=0.25)
    is_dirty = fields.BooleanField()

    __classes = None

    @property
    def classes(self):
        if not self.__classes:
            options = [
                ABCPredictor.objects.get(o_id) for o_id in self.abc_predictors
            ]
            self.__classes = options
        return self.__classes

    def to_dict(self, fields_to_show=None):
        base_dict = super(ABCMultiClassPredictor,
                          self).to_dict(fields_to_show=fields_to_show)
        base_dict['classes'] = [seg.to_dict() for seg in self.classes]
        return base_dict

    def score(self, customer_profile):
        scores = []
        for option in self.classes:
            scores.append(
                (option.display_name, option.score(customer_profile)))
        return scores

    def match(self, customer_profile):
        max_score = 0
        best_option = None
        for option in self.classes:
            option_score = option.score(customer_profile)
            if option_score > max_score:
                best_option = option
                max_score = option_score
        if max_score > self.inclusion_threshold:
            return True, best_option
        return False, None

    def accept(self, customer_profile, accepted_option):
        for option in self.classes:
            if option.id == accepted_option.id:
                option.accept(customer_profile)
            else:
                option.reject(customer_profile)
        self.is_dirty = True
        self.save()

    def reject(self, customer_profile, rejected_option):
        rejected_option.reject(customer_profile)
        self.is_dirty = True
        self.save()
Esempio n. 19
0
class BaseProfile(AuthDocument):
    manager = ProfileManager

    allow_inheritance = True
    collection = "BaseProfiles"

    account_id = fields.ObjectIdField()
    first_name = fields.StringField()
    last_name = fields.StringField()
    age = fields.NumField()
    sex = fields.StringField()
    location = fields.StringField()
    seniority = fields.StringField()
    assigned_labels = fields.ListField(fields.ObjectIdField())
    date_of_birth = fields.StringField()
    attached_data = fields.DictField()
    products = fields.ListField(fields.StringField())
    actor_num = AutoIncrementField(counter_name='ActorCounter', db_field='ar')
    created_at = fields.DateTimeField(default=now)

    linked_profile_ids = fields.ListField(fields.StringField())

    indexes = ['actor_num', 'linked_profile_ids']

    @property
    def linked_profiles(self):
        from solariat_bottle.db.user_profiles.user_profile import UserProfile
        return UserProfile.objects(id__in=self.linked_profile_ids)[:]

    def get_profile_of_type(self, typename):
        if not isinstance(typename, basestring):
            typename = typename.__name__

        for profile in self.linked_profiles:
            if profile.__class__.__name__ == typename:
                return profile

    def add_profile(self, profile):
        new_id = str(profile.id)
        if new_id not in self.linked_profile_ids:
            self.linked_profile_ids.append(new_id)
        self.update(addToSet__linked_profile_ids=new_id)

    def get_age(self):
        # Best guess we can make is by date of birth if present and properly formatted
        if self.date_of_birth:
            try:
                dob = datetime.strptime(self.date_of_birth, AGE_FORMAT)
                return relativedelta(datetime.now(), dob).years
            except Exception, ex:
                LOGGER.error(ex)
        # Next, if actual age is present, use that but also store updated dob
        if self.age:
            dob = datetime.now() - relativedelta(years=self.age)
            self.date_of_birth = dob.strftime(AGE_FORMAT)
            self.save()
            return self.age
        return None
Esempio n. 20
0
class EventSequenceStatsMixin(object):

    account_id = fields.ObjectIdField(db_field='aid')
    channels = fields.ListField(fields.ObjectIdField(), db_field='chs')
    stage_sequence_names = fields.ListField(fields.StringField(),
                                            db_field='sseqnm')
    status = fields.NumField(db_field='ss',
                             choices=JourneyStageType.STATUSES,
                             default=JourneyStageType.IN_PROGRESS)
    smart_tags = fields.ListField(fields.ObjectIdField(), db_field='sts')
    journey_tags = fields.ListField(fields.ObjectIdField(), db_field='jts')
    journey_type_id = fields.ObjectIdField(db_field='jt')
    journey_attributes = fields.DictField(db_field='jyas')

    def __get_journey_type(self):
        if hasattr(self, '_f_journey_type'):
            return self._f_journey_type
        else:
            self._f_journey_type = JourneyType.objects.get(
                self.journey_type_id)
            return self._f_journey_type

    def __set_journey_type(self, journey_type):
        self._f_journey_type = journey_type

    journey_type = property(__get_journey_type, __set_journey_type)

    @classmethod
    def translate_static_key_name(cls, key_name):
        # translate any static key, leave anything else the same
        if key_name == cls.status.db_field:
            return 'status'
        return key_name

    @classmethod
    def translate_static_key_value(cls, key_name, key_value):
        # translate any static key, leave anything else the same
        if key_name == cls.status.db_field:
            return JourneyStageType.STATUS_TEXT_MAP[key_value]
        return key_value

    @property
    def full_journey_attributes(self):
        # Dynamic defined plus any static defined attributes worth considering in facets or analysis
        from copy import deepcopy
        base_attributes = deepcopy(self.journey_attributes)
        base_attributes['status'] = self.status
        return base_attributes

    @property
    def account(self):
        # TODO Check this for performance. Should cache.
        return Account.objects.get(self.account_id)

        event_id = EventIdField().to_mongo(event_id)
        event_id = EventIdField().to_mongo(event_id)
Esempio n. 21
0
class Score(SonDocument):
    name = fields.StringField()
    score = fields.NumField()

    def __hash__(self):
        return hash(self.id)

    @property
    def id(self):
        return self.name, str(self.score)
Esempio n. 22
0
class BaseProfileLabel(AuthDocument, ClassifierMixin):
    allow_inheritance = True
    collection = 'ProfileLabel'

    account_id = fields.ObjectIdField()
    display_name = fields.StringField()
    _feature_index = fields.NumField()

    @property
    def feature_index(self):
        if self._feature_index is None:
            self._feature_index = NumberSequences.advance(
                str(self.account_id) + '__' + self.__class__.__name__)
            self.save()
        return self._feature_index

    @classmethod
    def get_match(cls, profile):
        matches = []
        for label in cls.objects(account_id=profile.account_id):
            if label.match(profile):
                matches.append(label)
        if not matches:
            LOGGER.warning("Found no match for profile %s and class %s" %
                           (profile, cls))
            return None
        if len(matches) > 1:
            LOGGER.warning(
                "Found more than one match for profile %s and class %s" %
                (profile, cls))
        return matches[0]

    def save(self):
        self.packed_clf = self.clf.packed_model
        super(BaseProfileLabel, self).save()

    def make_profile_vector(self, profile):
        return {
            "content":
            profile.assigned_labels + [profile.location] + [str(profile.age)]
        }

    def match(self, profile):
        if self.id in profile.assigned_labels:
            return True
        if self.clf.score(
                self.make_profile_vector(profile)) > self.inclusion_threshold:
            return True
        return False

    def accept(self, profile):
        self.clf.train([self.make_profile_vector(profile)], [1])

    def reject(self, profile):
        self.clf.train([self.make_profile_vector(profile)], [0])
Esempio n. 23
0
class ConversationEmbeddedStats(EmbeddedStats):

    # Metrics
    count = fields.NumField(db_field='cn', default=0)

    countable_keys = ['count']

    # def __hash__(self):
    #     return hash(self.agent)

    def __str__(self):
        return "|agent=%s;count=%s|" % (self.agent, self.count)
Esempio n. 24
0
class CallEvent(Event):

    collection = 'Post'
    manager = CallEventManager
    PROFILE_CLASS = CallProfile

    call_duration = fields.NumField(db_field='cn')
    catmap = fields.StringField(
        db_field='cp'
    )  # Sample Valuse: '', 'ENQUIRE_CreditManagement', 'CALLBACK_ACB_A_NOANSWER', 'REPORT_FAULT_Number', 'ENQUIRE_MailBox', 'NO_INPUT_Detected'
    custtype = fields.StringField(
        db_field='ce'
    )  # Sample values: HOME, BUSINESS, NOT, WHOLESALE; Mostly empty
    servtype = fields.StringField(
        db_field='se'
    )  # Sample values: INTERNET, MOBILE, FIXED, PAYTV, WIRELESS, ADSL, CABLE, BROADBAND, WIFI, ADSL, DIALUP, CABLE,
    transfer_type = fields.StringField(
        db_field='te')  # Samples values: CTI, ABANDON, HANGUP, CTI_S/S
    sn_segment = fields.StringField(
        db_field='st'
    )  # Samples values: '', Business, Industry, PlatinumConsumer, BusinessCross, Wholesale, Corporate, Test
    sn_contact_reason = fields.StringField(
        db_field=''
    )  # Samples values: 'RequestUsage', 'RequestHelpTcom', 'EnquireExistingComplaint', 'EnquirePlan', 'AssignPlantTCS', 'EscalateInternal717', 'RequestGlobalRoaming'
    sn_contact_type = fields.StringField(
        db_field='')  # Samples values:  'Direct', '', 'Outbound', 'Transfer'
    # call_start_time_melb = fields.StringField(db_field='') # Samples values:  '2014-08-11 13:03:27', '2014-08-25 17:32:55', '2014-08-11 13:03:24'
    sn_service = fields.StringField(
        db_field=''
    )  # Samples values:  '', 'BigpondMusic', 'FoxtelMobile', 'FoxtelBusiness', 'Internet', 'Test', 'ISDN', 'Fax', 'NextG', 'Prepaid', 'DisabilityPhone', 'NextGWirelessLink'
    od_apptag = fields.StringField(
        db_field=''
    )  # Samples values: '', 'enquire-MESSAGE_SERVICES', 'reportFault-PHONE', 'enquire-WAKE_UP_CALL', 'enquire-NBN', 'upgrade-MOBILE_PHONE', 'enquire-BILL', 'enquire-WIRELESS_NETWORK', 'disconnect-INTERNET'
    balance_at_call = fields.NumField(
        db_field=''
    )  # Samples values: '', '360.07', '360.05', '360.04', '360.03', '360.02', '360.01'
    sec_description = fields.StringField(
        db_field=''
    )  # Samples values: '', 'MCO - Broadband faults', 'NEI - Rental lines', 'NHA - General enquiry', '583 - Payphone', 'NHN - Mobile credit', 'N49 - Chk number', 'TSR - Telstra Shop 3 Faults', 'NNJ - Lost phone', 'NEH - Refunds', '359 - Err at Bill/Account', 'ND3 - Account details'
Esempio n. 25
0
class BaseFeedback(AuthDocument):

    created = fields.DateTimeField(default=now)
    action = fields.DictField()
    context = fields.DictField()
    matching_engine = fields.ObjectIdField()
    model_id = fields.ObjectIdField(null=True)
    reward = fields.NumField()

    # predicted score
    est_reward = fields.NumField()

    context_vector = fields.DictField()
    action_vector = fields.DictField()

    # scoring latency in ms
    score_runtime = fields.NumField()  # time taken in millisecond to compute score

    # scoring error %
    score_diff = fields.NumField()  # (reward - score) / reward

    indexes = [('matching_engine', 'created'), ]
Esempio n. 26
0
class TaskData(SonDocument):
    updated_at = fields.DateTimeField()
    total = fields.NumField()
    done = fields.NumField()

    @property
    def status(self):
        return 'training' if 0 < self.done < self.total else 'idle'

    @property
    def progress(self):
        return int(100 * (self.done or 0.0) / (self.total or 1.0))

    def to_dict(self, fields_to_show=None):
        task = self
        json_data = super(TaskData, self).to_dict(fields_to_show)
        json_data.update(
            progress=self.progress,
            status=task.status,
            updated_at=task.updated_at and str(task.updated_at)
        )
        return json_data
Esempio n. 27
0
class MultiEventTag(EventTag):

    feature_extractor = MultiChannelTagVectorizer()

    event_lookup_horizon = fields.NumField(default=5)

    @property
    def classifier_class(self):
        "So we can easily plugin other classifier classes if we want."
        return MultiEventFilterClassifier

    def get_features(self, last_event):
        full_vector = self.feature_extractor.construct_feature_space(last_event, self.features_metadata)
        return self.feature_extractor.merge_event_sequence_vectors(full_vector, self.event_lookup_horizon)
Esempio n. 28
0
class FacebookRequestLog(Document):
    channel = fields.ObjectIdField(null=True, db_field='cl')
    access_token = fields.StringField(db_field='tok')
    path = fields.StringField(db_field='uri')
    method = fields.StringField(db_field='m')
    args = fields.StringField(db_field='arg')
    post_args = fields.StringField(db_field='parg')
    start_time = fields.DateTimeField(db_field='ts')
    end_time = fields.DateTimeField(db_field='et')
    elapsed = fields.NumField(db_field='el')
    error = fields.StringField(db_field='er', null=True)

    indexes = [('start_time', 'access_token', 'path')]
    manager = FacebookRequestLogManager
Esempio n. 29
0
class PredictorModel(Document):
    collection = 'PredictorModel'
    allow_inheritance = True

    version = fields.NumField()
    predictor = fields.ReferenceField('BasePredictor')
    parent = fields.ObjectIdField()
    weight = fields.NumField()
    display_name = fields.StringField()
    description = fields.StringField()
    # is_active = fields.BooleanField(default=False)
    task_data = fields.EmbeddedDocumentField(TaskData)
    last_run = fields.DateTimeField()
    context_features = fields.ListField(fields.DictField())
    action_features = fields.ListField(fields.DictField())
    train_data_percentage = fields.NumField(default=80)
    n_rows = fields.NumField()
    min_samples_thresould = fields.NumField(default=1)

    from_dt = fields.DateTimeField()
    to_dt = fields.DateTimeField()

    def score(self, *args, **kwargs):
        pass

    def feedback(self, *args, **kwargs):
        pass

    def search(self, *args, **kwargs):
        pass

    def to_json(self, *args, **kwargs):
        from solariat_bottle.db.predictors.base_predictor import PredictorConfigurationConversion
        data = super(PredictorModel, self).to_json(*args, **kwargs)
        data = PredictorConfigurationConversion.python_to_json(data)
        return data
Esempio n. 30
0
class GroupingConfig(SonDocument):
    MIN_GRP_TIMEOUT, MAX_GRP_TIMEOUT = 0, 7 * 24 * 60 * 60  # from 0 seconds to 7 days
    DEFAULT_GRP_TIMEOUT = 120  # seconds
    is_enabled = fields.BooleanField(default=False)
    group_by_type = fields.BooleanField(default=True)
    grouping_timeout = fields.NumField(default=DEFAULT_GRP_TIMEOUT)  # seconds

    @classmethod
    def validate_grouping_timeout(cls, timeout):
        allowed_types = (int, float)
        if not isinstance(timeout, allowed_types):
            raise ValueError("%s is not instance of %s" %
                             (timeout, allowed_types))
        if not (timeout == 0
                or cls.MIN_GRP_TIMEOUT <= timeout <= cls.MAX_GRP_TIMEOUT):
            raise ValueError(
                "%s is not in range of [%s, %s]" %
                (timeout, cls.MIN_GRP_TIMEOUT, cls.MAX_GRP_TIMEOUT))
        return timeout