class LocalModelsMixin(object): # packed_clf = fields.BinaryField() # WARNING: 2MB limit! clf_map = fields.DictField() counter = fields.NumField(default=0) # Use to track iterations configuration = fields.DictField() @property def classifier_class(self): "So we can easily plugin other classifier classes if we want." return None @property def clf(self): if not hasattr(self, '_clf') or not self._clf: self._clf = self.classifier_class(predictor_model=self) return self._clf def delete(self, *args, **kwargs): from solariat_bottle.db.predictors.base_predictor import LocalModel LocalModel.objects.remove(predictor_model=self) def delete_local_models(self): from solariat_bottle.db.predictors.base_predictor import LocalModel LocalModel.objects.remove(predictor_model=self)
class JourneyStage(ABCPredictor): collection = 'JourneyStage' manager = JourneyStageManager journey_id = fields.ObjectIdField(db_field='jo') stage_type_id = fields.ObjectIdField( db_field='st') # Will be reference to a JourneyStageType stage_name = fields.StringField(db_field='sn') effort_info = fields.DictField( db_field='ef' ) # Embedded doc with any effort info we're going to track reward_info = fields.DictField( db_field='ri' ) # Embedded doc with any reward info we're going to track start_date = fields.DateTimeField(db_field='sd') end_date = fields.DateTimeField(db_field='ed') last_updated = fields.DateTimeField( db_field='lu' ) # We're probably going to want to know when was the last even from this stage directly last_event = fields.EventIdField( db_field='le') # Keep track of the event itself def check_preconditions(self, event): if hasattr(event, 'stage_id') and event.stage_id: # If 'stage_id' exists on event, and it's not None, that will be a precondition and acceptance rule return str(self.id) == event.stage_id if hasattr(event, 'journeys') and event.journeys: # If a specific set of journeys was passed with the event, the journey for this stage return self.journey_id in event.journeys return True def rule_based_match(self, object): if hasattr(object, 'stage_id') and object.stage_id: # If 'stage_id' exists on event, and it's not None, that will be a precondition and acceptance rule return str(self.id) == object.stage_id return False def process_event(self, event): update_dict = dict(set__last_event=event.data['_id'], set__last_updated=event.datetime_from_id) self.update(**update_dict) def get_journey_stage_type(self): from solariat_bottle.db.journeys.journey_type import JourneyStageType return JourneyStageType.objects.get(self.stage_type_id)
class VOCServiceChannel(VOCChannel, ServiceChannel): classifiers = fields.DictField(db_field="cz") manager = VOCServiceChannelManager @property def InboundChannelClass(self): return InboundVOCChannel @property def OutboundChannelClass(self): return OutboundVOCChannel @property def platform(self): return "VOC" def add_perm(self, user, group=None, to_save=True): super(VOCServiceChannel, self).add_perm(user, group, to_save) for cls in SmartTagChannel.objects.find( id__in=self.classifiers.values()): cls.add_perm(user, group, to_save) def post_received(self, post): # For now skip this, we don't know if / what we want to process here return def find_direction(self, post): # For now just assume all posts are actionable if posted in one # of the channels. return 'direct'
class NPSOutcome(Post): manager = NPSOutcomeManager PROFILE_CLASS = NPSProfile case_number = fields.StringField(db_field='cr', required=True) response_type = fields.StringField(db_field='rp', required=True) score = fields.NumField(db_field='se', required=True) profile_data = fields.DictField(db_field='pd') indexes = [('response_type', ), ('_created', )] @property def computed_tags(self): return list( set(self._computed_tags + [str(smt.id) for smt in self.accepted_smart_tags] + self.assigned_tags)) @classmethod def gen_id(cls, is_inbound, actor_id, _created, in_reply_to_native_id, parent_event=None): actor_num = cls.get_actor(True, actor_id).actor_num packed = pack_event_id(actor_num, _created) return packed def to_dict(self, fields2show=None): base_dict = super(NPSOutcome, self).to_dict(fields2show=fields2show) base_dict.pop('profile_data') return base_dict
class TextChannelFilterItem(Document): content = fields.StringField(db_field='ct') channel_filter = fields.ReferenceField(AuthTextClassifier, db_field='cr') vector = fields.DictField(db_field='vr') filter_type = fields.StringField(choices=['rejected', 'accepted'], default='rejected', db_field='fe')
class ModelMixin(object): packed_clf = fields.BinaryField() # WARNING: 2MB limit! counter = fields.NumField(default=0) # Use to track iterations configuration = fields.DictField() @property def classifier_class(self): "So we can easily plugin other classifier classes if we want." return None @property def clf(self): if not hasattr(self, '_clf') or not self._clf: kwargs = dict() if self.packed_clf: kwargs['model'] = self.packed_clf if self.configuration: kwargs.update(self.configuration) if hasattr(self, 'model_type'): kwargs['model_type'] = self.model_type self._clf = self.classifier_class(**kwargs) return self._clf def pack_model(self): # make sure we also save classifier state (pickled and zipped) #print 'save(): _clf=%r' % self._clf self.packed_clf = self.clf.packed_model self.counter += 1
class LocalStoreChannelFilter(ChannelFilter): _accepted_items = fields.ListField(fields.DictField(), db_field='as') _rejected_items = fields.ListField(fields.DictField(), db_field='rs') @property def accepted_items(self): return self._accepted_items @property def rejected_items(self): return self._rejected_items def reset(self): ''' Remove all related items ''' self._accepted_items = [] self._rejected_items = [] self.save()
class BaseProfile(AuthDocument): manager = ProfileManager allow_inheritance = True collection = "BaseProfiles" account_id = fields.ObjectIdField() first_name = fields.StringField() last_name = fields.StringField() age = fields.NumField() sex = fields.StringField() location = fields.StringField() seniority = fields.StringField() assigned_labels = fields.ListField(fields.ObjectIdField()) date_of_birth = fields.StringField() attached_data = fields.DictField() products = fields.ListField(fields.StringField()) actor_num = AutoIncrementField(counter_name='ActorCounter', db_field='ar') created_at = fields.DateTimeField(default=now) linked_profile_ids = fields.ListField(fields.StringField()) indexes = ['actor_num', 'linked_profile_ids'] @property def linked_profiles(self): from solariat_bottle.db.user_profiles.user_profile import UserProfile return UserProfile.objects(id__in=self.linked_profile_ids)[:] def get_profile_of_type(self, typename): if not isinstance(typename, basestring): typename = typename.__name__ for profile in self.linked_profiles: if profile.__class__.__name__ == typename: return profile def add_profile(self, profile): new_id = str(profile.id) if new_id not in self.linked_profile_ids: self.linked_profile_ids.append(new_id) self.update(addToSet__linked_profile_ids=new_id) def get_age(self): # Best guess we can make is by date of birth if present and properly formatted if self.date_of_birth: try: dob = datetime.strptime(self.date_of_birth, AGE_FORMAT) return relativedelta(datetime.now(), dob).years except Exception, ex: LOGGER.error(ex) # Next, if actual age is present, use that but also store updated dob if self.age: dob = datetime.now() - relativedelta(years=self.age) self.date_of_birth = dob.strftime(AGE_FORMAT) self.save() return self.age return None
class WidgetModel(Document): """ A WidgetModel is a abstract widget that can be instantiated to ConcreteWidget and used in corresponding typed dashboard """ title = fields.StringField(required=True, unique=True) description = fields.StringField() settings = fields.DictField() created = fields.DateTimeField(default=datetime.now)
class EventSequenceStatsMixin(object): account_id = fields.ObjectIdField(db_field='aid') channels = fields.ListField(fields.ObjectIdField(), db_field='chs') stage_sequence_names = fields.ListField(fields.StringField(), db_field='sseqnm') status = fields.NumField(db_field='ss', choices=JourneyStageType.STATUSES, default=JourneyStageType.IN_PROGRESS) smart_tags = fields.ListField(fields.ObjectIdField(), db_field='sts') journey_tags = fields.ListField(fields.ObjectIdField(), db_field='jts') journey_type_id = fields.ObjectIdField(db_field='jt') journey_attributes = fields.DictField(db_field='jyas') def __get_journey_type(self): if hasattr(self, '_f_journey_type'): return self._f_journey_type else: self._f_journey_type = JourneyType.objects.get( self.journey_type_id) return self._f_journey_type def __set_journey_type(self, journey_type): self._f_journey_type = journey_type journey_type = property(__get_journey_type, __set_journey_type) @classmethod def translate_static_key_name(cls, key_name): # translate any static key, leave anything else the same if key_name == cls.status.db_field: return 'status' return key_name @classmethod def translate_static_key_value(cls, key_name, key_value): # translate any static key, leave anything else the same if key_name == cls.status.db_field: return JourneyStageType.STATUS_TEXT_MAP[key_value] return key_value @property def full_journey_attributes(self): # Dynamic defined plus any static defined attributes worth considering in facets or analysis from copy import deepcopy base_attributes = deepcopy(self.journey_attributes) base_attributes['status'] = self.status return base_attributes @property def account(self): # TODO Check this for performance. Should cache. return Account.objects.get(self.account_id) event_id = EventIdField().to_mongo(event_id) event_id = EventIdField().to_mongo(event_id)
class EventLog(AuthDocument): "Store informmation about variouse events in db" type_id = fields.NumField(required=True, db_field='ti') name = fields.NameField(required=True, db_field='ne') timestamp = fields.NumField(default=time.time) ip_address = fields.StringField(db_field='ia', default=get_remote_ip) user = fields.StringField(default='anonymous', db_field='ur') account = fields.StringField(default='solariat', db_field='at') note = fields.StringField(db_field='nte') extra_info = fields.DictField(db_field='ei')
class ABCPredictor(AuthDocument, ClassifierMixin): allow_inheritance = True collection = 'ABCPredictor' precondition = fields.StringField( ) # Hold the precondition as string in a grammar acceptance_rule = fields.StringField() # Hold any acceptance rule is_dirty = fields.BooleanField() features_metadata = fields.DictField( ) # Any hints the classifier can use will be stored as a JSON here feature_extractor = BaseFeatureExtractor() def get_features(self, object): return self.feature_extractor.construct_feature_space( object, self.features_metadata) def save(self): self.packed_clf = self.clf.packed_model super(ABCPredictor, self).save() def match(self, object): if self.score(object) > self.inclusion_threshold: return True return False def score(self, object): if not self.check_preconditions(object): return 0 if self.rule_based_match(object): return 1 return self.clf.score(self.get_features(object)) def accept(self, object): features = self.get_features(object) self.clf.train([features], [1]) self.is_dirty = True self.save() def reject(self, object): self.clf.train([self.get_features(object)], [0]) self.is_dirty = True self.save() @abstractmethod def check_preconditions(self, object): return self.feature_extractor.check_preconditions( object, self.features_metadata) @abstractmethod def rule_based_match(self, object): pass
class BaseFeedback(AuthDocument): created = fields.DateTimeField(default=now) action = fields.DictField() context = fields.DictField() matching_engine = fields.ObjectIdField() model_id = fields.ObjectIdField(null=True) reward = fields.NumField() # predicted score est_reward = fields.NumField() context_vector = fields.DictField() action_vector = fields.DictField() # scoring latency in ms score_runtime = fields.NumField() # time taken in millisecond to compute score # scoring error % score_diff = fields.NumField() # (reward - score) / reward indexes = [('matching_engine', 'created'), ]
class QueueMessage(Document): manager = QueueMessageManager channel_id = fields.ListField(fields.StringField()) created_at = fields.DateTimeField() reserved_until = fields.DateTimeField() post_data = fields.DictField() batch_token = fields.StringField() indexes = [ ('channel_id', 'reserved_until'), ('batch_token', ), ]
class PredictorModel(Document): collection = 'PredictorModel' allow_inheritance = True version = fields.NumField() predictor = fields.ReferenceField('BasePredictor') parent = fields.ObjectIdField() weight = fields.NumField() display_name = fields.StringField() description = fields.StringField() # is_active = fields.BooleanField(default=False) task_data = fields.EmbeddedDocumentField(TaskData) last_run = fields.DateTimeField() context_features = fields.ListField(fields.DictField()) action_features = fields.ListField(fields.DictField()) train_data_percentage = fields.NumField(default=80) n_rows = fields.NumField() min_samples_thresould = fields.NumField(default=1) from_dt = fields.DateTimeField() to_dt = fields.DateTimeField() def score(self, *args, **kwargs): pass def feedback(self, *args, **kwargs): pass def search(self, *args, **kwargs): pass def to_json(self, *args, **kwargs): from solariat_bottle.db.predictors.base_predictor import PredictorConfigurationConversion data = super(PredictorModel, self).to_json(*args, **kwargs) data = PredictorConfigurationConversion.python_to_json(data) return data
class FAQQueryEvent(Event): query = fields.StringField() speech_acts = fields.ListField(fields.DictField()) manager = FAQEventManager PROFILE_CLASS = WebProfile @classmethod def patch_post_kw(cls, kw): pass @property def platform(self): return 'FAQ' def to_dict(self, fields2show=None): base_dict = super(FAQQueryEvent, self).to_dict() base_dict['actor'] = self.actor.to_dict() return base_dict
class DashboardWidget(Document): ''' Internal Structure representing the integartion data structure with a data stream provider. ''' created = fields.DateTimeField(db_field='c', default=datetime.now) settings = fields.DictField(db_field='s') order = fields.NumField(db_field='o') title = fields.StringField(db_field='t', required=True) user = fields.ReferenceField(User, db_field='u') dashboard_id = fields.ObjectIdField(required=True) manager = DashboardWidgetManager def to_dict(self): base_dict = dict(title=self.title, order=self.order, id=str(self.id), dashboard_id=str(self.dashboard_id)) base_dict.update(self.settings) return base_dict def copy_to(self, dashboard): new_widget_data = { 'title': self.title, 'user': dashboard.owner, 'dashboard_id': dashboard.id, } new_widget_data.update(self.settings) widget = DashboardWidget.objects.create_by_user(**new_widget_data) return widget def delete(self): dashboard = Dashboard.objects.get_by_user(self.user, id=self.dashboard_id) dashboard._remove_widget(self) super(DashboardWidget, self).delete() def __repr__(self): return "<DashboardWidget: %s; id: %s>" % (self.title, self.id)
class ChannelFilterItem(Document, ESMixin): ESCollection = ChannelFilterItemCollection() manager = ChannelFilterItemManager item_id = PostOrResponseId(db_field='it') channel_filter = fields.ReferenceField(ChannelFilter, db_field='cr') content = fields.StringField(db_field='ct') vector = fields.DictField(db_field='vr') filter_type = fields.StringField(choices=['rejected', 'starred'], default='rejected', db_field='fe') is_active = fields.BooleanField(default=True, db_field='ia') indexes = [Index(('channel_filter', 'item_id'), unique=True)] def to_dict(self): ''' Used for pushing to ES ''' d = super(ChannelFilterItem, self).to_dict() d['content'] = self.vector['content'] d['filter_type'] = str(self.filter_type) d['channel'] = str(self.channel_filter.channel.id) d['item_id'] = str(d['item_id']) return d
class Dashboard(AuthDocument): collection = 'Dashboard' manager = DashboardManager type_id = fields.ObjectIdField(required=True) title = fields.StringField(required=True) description = fields.StringField() owner = fields.ReferenceField(User) author = fields.ReferenceField(User) widgets = fields.ListField(fields.ObjectIdField()) shared_to = fields.ListField(fields.ObjectIdField()) filters = fields.DictField() created = fields.DateTimeField(default=datetime.now) admin_roles = {STAFF, ADMIN, REVIEWER, ANALYST} def to_dict(self, fields_to_show=None): rv = super(Dashboard, self).to_dict() rv['widgets'] = map(str, self.widgets) rv['shared_to'] = map(str, self.shared_to) rv['owner_name'] = '%s %s' % (self.owner.first_name or '', self.owner.last_name or '') rv['author_name'] = '%s %s' % (self.author.first_name or '', self.author.last_name or '') rv['owner_email'] = self.owner.email rv['author_email'] = self.author.email rv['account_id'] = str(self.owner.account.id) rv['type'] = DashboardType.objects.get(self.type_id).type return rv def __repr__(self): return "<Dashboard: %s; id: %s>" % (self.title, self.id) def _add_widget(self, widget): """ """ self.widgets.append(widget.id) self.save() def _remove_widget(self, widget): """ widget is not automatically deleted. To delete, use `.delete_widget()` instead. `widget.dashboard_id` will still point to this dashboard. """ self.widgets.remove(widget.id) self.save() def delete_widget(self, widget): if isinstance(widget, (basestring, fields.ObjectId)): widget = DashboardWidget.objects.get(widget) widget.delete() def delete(self): for widget_id in self.widgets: self.delete_widget(widget_id) super(Dashboard, self).delete_by_user(self.owner) def copy_to(self, user, title=None, description=None): dashboard_data = { 'type_id': self.type_id, 'title': title or self.title, 'description': description or self.description, 'author': self.owner, 'owner': user, 'widgets': [], 'shared_to': [], 'filters': self.filters, } # FIX: create_by_user is having role error dashboard = Dashboard.objects.create_by_user(user, **dashboard_data) #dashboard = Dashboard.objects.create(**dashboard_data) for widget_id in self.widgets: widget = DashboardWidget.objects.get(widget_id) widget.copy_to(dashboard) return dashboard
class InsightsAnalysis(Document): KEY_WEIGHT = 'discriminative_weight' KEY_RANK = 'rank' KEY_SCORE = 'score' KEY_VALUES = 'values' KEY_CROSSTAB = 'crosstab_results' KEY_VALUE_TYPE = 'value_type' KEY_PIE = 'pie' KEY_BAR = 'bar' KEY_BOX = 'boxplot' KEY_SCATTER = 'scatter' CLASSIFICATION_TYPE = 'classification' REGRESSION_TYPE = 'regression' BOOLEAN_METRIC = 'Boolean' NUMERIC_METRIC = 'Numeric' LABEL_METRIC = 'Label' METRIC_CONVERTED = "converted" METRIC_ABANDONED = "abandoned" METRIC_STUCK = "stuck" IDX_UNKNOWN = -1 IDX_SKIP = -2 NUM_TIMERANGE_SLOTS = 7 user = fields.ObjectIdField(db_field='usr') title = fields.StringField(db_field='te') created_at = fields.NumField(db_field='ca') account_id = fields.ObjectIdField(db_field='ac') filters = fields.DictField(db_field='ft', required=True) analysis_type = fields.StringField(choices=[CLASSIFICATION_TYPE, REGRESSION_TYPE], db_field='at') application = fields.StringField(db_field='an') # e.g. application which's used for the analysis analyzed_metric = fields.StringField(db_field='me') metric_type = fields.StringField(choices=[BOOLEAN_METRIC, NUMERIC_METRIC, LABEL_METRIC], db_field='mt') metric_values = fields.ListField(fields.StringField(), db_field='mv') metric_values_range = fields.ListField(fields.NumField(), db_field='mvr') # e.g. min/max Numeric values or unique labels progress = fields.NumField(db_field='pg', default=0) _results = fields.StringField(db_field='rt') _timerange_results = fields.StringField(db_field='trt') status_message = fields.StringField(db_field='msg') _cached_from_date = None _cached_to_date = None time_increment = None @property def status_progress(self): if self.progress == PROGRESS_STOPPED: return STATUS_STOPPED, 0 elif self.progress == 0: return STATUS_QUEUE, self.progress elif self.progress == PROGRESS_DONE: return STATUS_DONE, self.progress elif self.progress == PROGRESS_ERROR: return STATUS_ERROR, 0 else: return STATUS_IN_PROGRESS, self.progress def is_stopped(self): return self.progress == PROGRESS_STOPPED def compute_class_names(self): import json metric_names = [] try: if self.analyzed_metric == "stage-paths": for metric in self.metric_values: metric_info = json.loads(metric) metric_names.append("%s at step %s" % (metric_info['stage'], metric_info['step'])) return metric_names if self.analyzed_metric == "paths-comparison": for metric in self.metric_values: metric_info = json.loads(metric) metric_names.append( "%s %s (%s)" % (metric_info['measure'], metric_info['path'], metric_info['metric_value'])) return metric_names if self.metric_type == self.NUMERIC_METRIC and self.analysis_type == self.CLASSIFICATION_TYPE: metric_values = [ '%s(%s:%s)' % (self.analyzed_metric, self.metric_values_range[0], self.metric_values[0]), "%s(%s:%s)" % (self.analyzed_metric, self.metric_values[0], self.metric_values[1]), "%s(%s:%s)" % (self.analyzed_metric, self.metric_values[1], self.metric_values_range[1])] return metric_values except: import logging logging.exception(__name__) return self.metric_values def to_dict(self, fields2show=None): base_dict = super(InsightsAnalysis, self).to_dict() base_dict.pop('_results') base_dict.pop('_timerange_results') base_dict['results'] = self.results base_dict['timerange_results'] = self.timerange_results base_dict['status'] = self.status_progress base_dict['metric_values'] = self.compute_class_names() base_dict['metric_values_range'] = self.metric_values_range base_dict['level'] = self.get_timerange_level() return base_dict def get_timerange_level(self): try: return guess_timeslot_level(parse_datetime(self.filters['from']), parse_datetime(self.filters['to'])) except: LOGGER.warn('Unknown period to determine the timerange level') def get_user(self): return User.objects.get(self.user) def initialize_timeslot_counts(self): time_results = {} self.time_increment = (self._cached_to_date - self._cached_from_date).days * 24 / float(self.NUM_TIMERANGE_SLOTS) for class_idx in range(-1, self.get_num_classes()): time_results[class_idx] = dict() for slot_idx in xrange(self.NUM_TIMERANGE_SLOTS): timeslot = datetime_to_timestamp_ms(self._cached_from_date + timedelta(hours=self.time_increment * slot_idx)) time_results[class_idx][timeslot] = 0 return time_results def get_num_classes(self): if self.metric_type == self.NUMERIC_METRIC: return len(self.metric_values) + 1 else: return len(self.metric_values) + 2 def get_timeslot_index(self, item): for idx in xrange(self.NUM_TIMERANGE_SLOTS): if hasattr(item, 'created_at') and utc(item.created_at) > self._cached_from_date + timedelta(hours=self.time_increment * idx): continue else: break return datetime_to_timestamp_ms(self._cached_from_date + timedelta(hours=self.time_increment * idx)) def process(self): if self.application is None: self.application = self.get_user().account.selected_app if self.application == "Journey Analytics": # process_journeys_analysis.ignore(self) process_journeys_analysis(self) elif self.application == "Predictive Matching": # process_predictive_analysis.ignore(self) process_predictive_analysis(self) def save(self, **kw): if 'upsert' not in kw: kw['upsert'] = False # import json # analysis_file = open('analysis_' + str(self.id) + '.json', 'w') # json_data = {} # from bson import ObjectId # for key, val in self.data.iteritems(): # if not isinstance(val, ObjectId): # json_data[key] = val # json.dump(json_data, analysis_file) # analysis_file.close() if self.id: self.objects.update(self.data, **kw) else: self.id = self.objects.insert(self.data, **kw) def start(self): datetime.strptime('2011-01-01', '%Y-%m-%d') # dummy call (https://bugs.launchpad.net/openobject-server/+bug/947231/comments/8) self.process() def stop(self): self.progress = PROGRESS_STOPPED self.save() def restart(self): self.progress = 0 self.save() self.start() def terminate(self): self.progress = PROGRESS_ERROR self.status_message = 'Process had been terminated.' self.save() @property def timerange_results(self): if self._timerange_results: return json.loads(self._timerange_results) return {} @property def results(self): # Just in case we need some post-processing done if self._results: return json.loads(self._results) return {}
class JobStatus(ArchivingAuthDocument): STATUSES = PENDING, RUNNING, ABANDONED, SUCCESSFUL, FAILED, \ RESUBMITTED, SLEEPING, TERMINATED = \ 'Pending', 'Running', 'Abandoned', 'Successful', 'Failed', \ 'Resubmitted', 'Sleeping', 'Terminated' RUNNABLE_STATUSES = PENDING, SLEEPING collection = 'jobs' account = fields.ObjectIdField(null=True) topic = fields.StringField() name = fields.StringField() args = fields.PickledField() kwargs = fields.PickledField() metadata = fields.DictField(null=True) created_at = fields.DateTimeField() started_date = fields.DateTimeField() completion_date = fields.DateTimeField() status = fields.StringField(choices=STATUSES) state = fields.DictField() last_activity = fields.DateTimeField() awake_at = fields.DateTimeField(null=True) @property def git_commit(self): return (self.metadata or {}).get('git_commit') @property def resubmission_info(self): return (self.metadata or {}).get('resubmitted') def abandon(self): if self.status == self.PENDING: self.status = self.ABANDONED res = self.objects.coll.update( { self.F.id: self.id, self.F.status: self.PENDING }, {"$set": { self.F.status: self.ABANDONED }}) if isinstance(res, dict) and res.get('nModified') == 1: return True def resume(self): if self.status != self.FAILED: raise RuntimeError("Job can not be resumed in '{}' state.".format( self.status)) from solariat_bottle.jobs.manager import manager job = manager.registry.get(self.name) res = job.submit(self.topic, self.name, self.args, self.kwargs, self.metadata) # updating old job meta = self.metadata or {} meta.update(resubmitted={ 'new_id': res.job_instance.id, 'result': str(res.submission_result) }) self.update(status=JobStatus.RESUBMITTED, metadata=meta) # updating new job meta = res.job_instance.metadata or {} meta.update(resubmitted={'old_id': self.id}) res.job_instance.update(metadata=meta) return [self, res.job_instance] def can_edit(self, user_or_group, admin_roles=None): if admin_roles is None: admin_roles = self.admin_roles account_check = user_or_group.is_staff or (user_or_group.is_admin and user_or_group.account.id == self.account) edit_check = (bool( set(admin_roles).intersection(set(user_or_group.user_roles))) or (hasattr(user_or_group, 'is_superuser') and user_or_group.is_superuser)) return account_check and edit_check @property def wait_time(self): if self.started_date and self.created_at: return (utc(self.started_date or now()) - utc(self.created_at)).total_seconds() @property def execution_time(self): if self.completion_date and self.started_date: now_ = now() return (utc(self.completion_date or now_) - utc(self.started_date or now_)).total_seconds()
class UserProfile(Document): collection = 'UserProfile' allow_inheritance = True _created = fields.DateTimeField(default=now) updated_at = fields.DateTimeField(default=now, db_field='ts') native_id = fields.StringField( db_field='ui', required=False) # Note: ui is a name for UserProfile.user_id field # All Channels this user has been engaged through engaged_channels = fields.ListField(fields.ReferenceField(Channel)) platform_data = fields.DictField(db_field='pd') actor_num = AutoIncrementField(counter_name="ActorCounter", db_field='ar') manager = UserProfileManager indexes = [('native_id', ), ('engaged_channels', )] def __init__(self, data=None, **kw): """For compatibility with untyped UserProfile. This constructor can be deleted once all profiles in the UserProfile collection have the type information in the _t field """ def _get_class_by_id(profile_id): from solariat_bottle.db.user_profiles.social_profile import DELIMITER, TwitterProfile, FacebookProfile pos = unicode(profile_id).rfind(DELIMITER) + 1 if pos == 0: return self.__class__ platform = None try: index = int(profile_id[pos:]) except ValueError: logger.info( u"Could not obtain platform from profile id: {}".format( profile_id)) else: platform = PLATFORM_BY_INDEX.get(index) class_ = { TwitterProfile.platform: TwitterProfile, FacebookProfile.platform: FacebookProfile }.get(platform, self.__class__) return class_ if data: profile_id = data.get('_id') else: profile_id = kw.get('id') if isinstance(profile_id, basestring): self.__class__ = _get_class_by_id(profile_id) super(UserProfile, self).__init__(data, **kw) @property def screen_name(self): return self.native_id @staticmethod def extract_native_id(data): assert isinstance(data, dict), u"%s is not dict" % repr(data) native_id = None if 'platform_data' in data: native_id = data['platform_data'].get('id') if not native_id: native_id = data.get('id', data.get('native_id')) if not native_id: native_id = ObjectId() return str(native_id) @property def created(self): return utc(self._created) @classmethod def anonymous_profile(cls, platform=None): data = {'id': 'anonymous'} return cls.objects.get_or_create(**data) @classmethod def non_existing_profile(cls): try: profile = cls.objects.get(native_id=NATIVE_REMOVED_PROFILE) except cls.DoesNotExist: profile = cls.objects.create(native_id=NATIVE_REMOVED_PROFILE) return profile def update_history(self, channel): if channel not in self.engaged_channels: self.engaged_channels.append(channel) self.save() def has_history(self, channel): service_channel = get_service_channel( channel) if channel and not channel.is_dispatch_channel else None return (channel and (channel in self.engaged_channels or service_channel in self.engaged_channels)) def get_conversations(self, user, channel=None): '''All conversations for this contact - subject to access controls''' from solariat_bottle.db.conversation import Conversation conv_list = sorted(Conversation.objects.find_by_user(user, contacts=self.id), key=lambda c: c.last_modified) if channel is not None: conv_list = [ conv for conv in conv_list if str(channel.id) in conv.channels ] return conv_list def to_dict(self, fields2show=None): doc = super(UserProfile, self).to_dict(fields2show) doc.update(_type=self.__class__.__name__, screen_name=self.screen_name) return doc
class PostStats(Document): id = fields.CustomIdField() stats_updates = fields.ListField(fields.DictField())
class TwitterRelationsMixin(object): """Deprecated. We are using general short-term cache for determining a relationship status and do not sync followers/friends lists anymore """ # All profile ids from EnterpriseTwitterChannel which are following the user # so this user is friend of users in followed_by_brands list followed_by_brands = fields.ListField(fields.StringField(), db_field='fc') # All profile ids from EnterpriseTwitterChannel which are followed by user, # so this user is follower of users in follows_brands list follows_brands = fields.ListField(fields.StringField(), db_field='fb') relations_history = fields.ListField(fields.DictField(), db_field='rh') @staticmethod def in_set_ignore_case(items, source): lower = lambda x: x.lower() return lower(get_brand_profile_id(source)) in set(map(lower, items)) def is_followed(self, source): return self.in_set_ignore_case(self.followed_by_brands, source) is_friend = is_followed def log_relation(self, action, source): doc = {'a': action, 'u': get_brand_profile_id(source), 't': now()} if isinstance(source, Channel) and hasattr(source, 'status_update'): doc['t'] = source.status_update self.update(push__relations_history=doc) def add_follower(self, source): self.log_relation('add_follower', source) self.update(addToSet__followed_by_brands=get_brand_profile_id(source)) def remove_follower(self, source): self.log_relation('remove_follower', source) self.update(pull__followed_by_brands=get_brand_profile_id(source)) def is_follower(self, source): return self.in_set_ignore_case(self.follows_brands, source) def add_friend(self, source): self.log_relation('add_friend', source) self.update(addToSet__follows_brands=get_brand_profile_id(source)) def remove_friend(self, source): self.log_relation('remove_friend', source) self.update(pull__follows_brands=get_brand_profile_id(source)) def update_relations(self, platform, relation, data, channel): manager = self.objects def get_params(up_data): if platform == 'Twitter': from solariat_bottle.daemons.twitter.parsers import parse_user_profile return parse_user_profile(up_data) return {} for user_profile_data in data: if isinstance(user_profile_data, (basestring, int)): user_p = manager.find_one(user_id=str(user_profile_data)) if not user_p: continue else: user_p = manager.upsert(platform, get_params(user_profile_data)) if relation == RELATION_FRIEND: user_p.add_follower(channel) elif relation == RELATION_FOLLOWER: user_p.add_friend(channel)
class CustomerJourney(AuthDocument, EventSequenceStatsMixin): FEAT_TYPE = 'type' FEAT_LABEL = 'label' FEAT_EXPR = 'field_expr' FEAT_NAME = 'name' collection = "CustomerJourney" manager = CustomerJourneyManager stage_name = fields.StringField( db_field='fs') # stage_name of current_stage # Dict in the form: # <strategy_type> : <list of index__stage_name>. strategy_type can be for now (default, platform, event_type) stage_sequences = fields.DictField(db_field='sseq') # Dict in the form # index__stage_name: {actual attributes computed for this specific stage} stage_information = fields.DictField(db_field='si') customer_id = fields.BaseField( db_field='ci') # dynamic profiles may use custom id type customer_name = fields.StringField( db_field='cn') # Just for quick access w/o extra db call agent_ids = fields.ListField(fields.ObjectIdField(), db_field='ag') agent_names = fields.ListField( fields.StringField(), db_field='ans') # Just for quick access w/o extra db calls journey_tags = fields.ListField(fields.ObjectIdField(), db_field='jts') channels = fields.ListField(fields.ObjectIdField(), db_field='chls') last_updated = fields.DateTimeField(db_field='lu') # time spent by events in each stage-eventtype status node_sequence = fields.ListField(fields.DictField(), db_field='nds') node_sequence_agr = fields.ListField(fields.StringField(), db_field='ndsn') # time spent by events in each stage-eventtype status journey_attributes_schema = fields.ListField(fields.DictField(), db_field='jas') first_event_date = fields.DateTimeField(db_field='fed') last_event_date = fields.DateTimeField(db_field='led') indexes = [('journey_type_id', 'journey_tags'), ('journey_attributes', ), ('journey_type_id', 'channels'), ('customer_id', ), ('agent_ids', )] parsers_cache = dict() @classmethod def to_mongo(cls, data, fill_defaults=True): """ Same as super method, except parser.evaluate is skipped (would be called in process_event) """ return super(CustomerJourney, cls).to_mongo(data, fill_defaults=fill_defaults, evaluate=False) @classmethod def metric_label(cls, metric, param, value): # from solariat_bottle.db.predictors.customer_segment import CustomerSegment if param == 'status': value = JourneyStageType.STATUS_TEXT_MAP[value] if param == 'journey_type_id': value = JourneyType.objects.get(value).display_name #value = value[0] if type(value) in [list, tuple] and value else value if value is not None else 'N/A' if value is None: value = 'N/A' return str(value) def ui_repr(self): base_repr = "Status: %s; Start date: %s; End date: %s;" % ( self.status, self.first_event_date, self.last_event_date) if self.customer_name: base_repr += " Customer: %s;" % self.customer_name if self.agent_names: base_repr += " Agents: %s;" % self.agent_names return base_repr def to_dict(self, *args, **kwargs): # from solariat_bottle.db.predictors.customer_segment import CustomerSegment base_dict = super(CustomerJourney, self).to_dict() base_dict['agents'] = map(str, self.agents) base_dict['channels'] = map(str, self.channels) base_dict['smart_tags'] = map(str, self.smart_tags) base_dict['journey_tags'] = map(str, self.journey_tags) base_dict['status'] = JourneyStageType.STATUS_TEXT_MAP[self.status] base_dict['string_repr'] = self.ui_repr() base_dict['journey_attributes'] = self.journey_attributes return base_dict def handle_add_tag(self, tag_id): tag_id = ObjectId(tag_id) self.update(addToSet__smart_tags=tag_id) def handle_remove_tag(self, tag_id): tag_id = ObjectId(tag_id) self.update(pull__smart_tags=tag_id) def apply_schema(self, expression, context): hash_key = str(expression) + '__'.join(context) if hash_key in CustomerJourney.parsers_cache: parser = CustomerJourney.parsers_cache[hash_key] else: parser = BaseParser(expression, context.keys()) CustomerJourney.parsers_cache[hash_key] = parser try: value = parser.evaluate(context) except TypeError: value = None return value def process_event(self, event, customer, agent, journey_stage_type): self._current_event = event received_event_from_past = False created_at = utc(event.created_at) last_updated = utc(self.last_updated) if self.last_updated else None if last_updated and created_at < last_updated: # log.error("=========RECEIVED EVENT FROM THE PAST %s %s < last updated %s" % ( # event, event.created_at, self.last_updated)) received_event_from_past = True # IMPORTANT: No mongo calls should be done here at all! if agent: if agent.id not in self.agent_ids: self.agent_ids.append(agent.id) # TODO: This needs to be enforced on profile dynamic classes as a separate specific # column (can be optional) self.agent_names.append(str(agent)) # TODO: Same as for agent profile, this needs to be set on dynamic class level self.customer_name = str(customer) if event.channels[0] not in self.channels: self.channels.append(event.channels[0]) if not received_event_from_past: if journey_stage_type: self.status = journey_stage_type.status self.last_event_date = event.created_at self.last_updated = event.created_at # TODO: This whole strategy switch will need to be changed to be defined somehow on journey level # TODO: ISSSUE for the last stage the information is not copied. Will need to do this on journey closure. for strategy in [ STRATEGY_DEFAULT, STRATEGY_PLATFORM, STRATEGY_EVENT_TYPE ]: self.check_for_stage_transition(strategy, event, journey_stage_type) schema_computed_attributes = dict() # All of these need to be returned directly from customer data (no extra mongo calls!) expression_context = dict(agents=self.agents, customer_profile=self.customer_profile, current_event=event, event_sequence=self.event_sequence, current_stage=self.current_stage, previous_stage=self.previous_stage, stage_sequence=self.stage_sequence) # for k in self.field_names: # expression_context[k] = getattr(self, k) # adding func with @property decorator to context for key in CustomerJourney.get_properties(): expression_context[key] = getattr(self, key) for schema_entry in self.journey_attributes_schema: expression = schema_entry[self.FEAT_EXPR] f_name = schema_entry[self.FEAT_NAME] schema_computed_attributes[f_name] = self.apply_schema( expression, expression_context) expression_context[f_name] = schema_computed_attributes[f_name] self.journey_attributes = schema_computed_attributes if self.status in [ JourneyStageType.COMPLETED, JourneyStageType.TERMINATED ]: self.node_sequence_agr = [] for i, item in enumerate(self.node_sequence): key, value = item.items()[0] self.node_sequence_agr.append(key) @classmethod def get_properties(cls): """ returns all list of member funcs decorated with @property """ from copy import deepcopy base = deepcopy(cls.field_names) base = [ field for field in base if field not in ('is_archived', '_t', 'acl', 'match_expression', 'journey_type_id', 'display_name', 'account_id', 'id', 'available_stages') ] base.extend([ name for name, value in vars(cls).items() if isinstance(value, property) ]) return base @property def CONSTANT_DATE_NOW(self): # For being picked up by context from datetime import datetime return datetime.now() @property def CONSTANT_ONE_DAYS(self): from datetime import timedelta return timedelta(hours=24) @property def CONSTANT_ONE_HOUR(self): from datetime import timedelta return timedelta(hours=1) def check_for_stage_transition(self, strategy, event, journey_stage_type): current_stage = self.get_current_stage(strategy) if strategy == STRATEGY_DEFAULT: new_stage = journey_stage_type.display_name if journey_stage_type else current_stage elif strategy == STRATEGY_EVENT_TYPE: new_stage = journey_stage_type.display_name + ':' + str( event.event_type) if journey_stage_type else current_stage elif strategy == STRATEGY_PLATFORM: new_stage = journey_stage_type.display_name + ':' + event.platform if journey_stage_type else current_stage if new_stage != current_stage: stage_index = self.get_current_index(strategy) new_stage_value = STAGE_INDEX_SEPARATOR.join( [new_stage, str(stage_index + 1)]) if current_stage is not None: full_stage_name = STAGE_INDEX_SEPARATOR.join( [current_stage, str(stage_index)]) self.stage_information[ full_stage_name] = self.compute_stage_information(strategy) self.stage_sequences[strategy] = self.stage_sequences.get( strategy, []) + [new_stage_value] if strategy == STRATEGY_DEFAULT: self.stage_name = journey_stage_type.display_name self.stage_sequence_names.append(new_stage) if strategy == STRATEGY_EVENT_TYPE: if current_stage is None and new_stage is None: return # TODO: This is still kind of hard coded for MPC if new_stage != current_stage: self.node_sequence.append({new_stage: 1}) else: self.node_sequence[-1][new_stage] += 1 def compute_stage_information(self, strategy): info = dict() for key, val in self.journey_attributes.iteritems(): info[key] = val info['end_date'] = self.last_event_date if len(self.stage_sequences.get(strategy, [])) <= 1: info['start_date'] = self.first_event_date else: info['start_date'] = self.stage_information[ self.stage_sequences[strategy][-2]]['end_date'] return info def close_journey(self): for strategy in [ STRATEGY_DEFAULT, STRATEGY_PLATFORM, STRATEGY_EVENT_TYPE ]: current_stage = self.get_current_stage(strategy) stage_index = self.get_current_index(strategy) if current_stage is not None: full_stage_name = STAGE_INDEX_SEPARATOR.join( [current_stage, str(stage_index)]) self.stage_information[ full_stage_name] = self.compute_stage_information(strategy) self.save() def get_current_stage(self, strategy_type): if not self.stage_sequences.get(strategy_type): return None else: return self.stage_sequences.get(strategy_type)[-1].split( STAGE_INDEX_SEPARATOR)[0] def get_current_index(self, strategy_type): if not self.stage_sequences.get(strategy_type): return -1 else: return int( self.stage_sequences.get(strategy_type)[-1].split( STAGE_INDEX_SEPARATOR)[1]) def stage_sequence_by_strategy(self, strategy): return [ val.split(STAGE_INDEX_SEPARATOR)[0] for val in self.stage_sequences[strategy] ] def __get_agents(self): if hasattr(self, '_agents'): return self._agents else: self._agents = self.account.get_agent_profile_class().objects.find( id__in=self.agent_ids)[:] return self._agents def __set_agents(self, agents): self._agents = agents agents = property(__get_agents, __set_agents) def __get_customer_profile(self): if hasattr(self, '_customer_profile'): return self._customer_profile else: self._customer_profile = self.account.get_customer_profile_class( ).objects.get(self.customer_id) return self._customer_profile def __set_customer_profile(self, customer_profile): self._customer_profile = customer_profile customer_profile = property(__get_customer_profile, __set_customer_profile) def __get_current_event(self): if hasattr(self, '_current_event'): return self._current_event else: self._current_event = self.event_sequence[ -1] if self.event_sequence else None return self._current_event def __set_current_event(self, event): self._current_event = event current_event = property(__get_current_event, __set_current_event) def __get_event_sequence(self): if hasattr(self, '_event_sequence'): return self._event_sequence else: from solariat_bottle.db.account import Account account = Account.objects.get(self.account_id) CustomerProfile = account.get_customer_profile_class() try: customer = CustomerProfile.objects.get(self.customer_id) except CustomerProfile.DoesNotExist: self._event_sequence = [] return self._event_sequence if self.first_event_date and self.last_event_date: events = Event.objects.events_for_actor( self.first_event_date, self.last_event_date, customer.actor_num)[:] self._event_sequence = events return self._event_sequence # event_type_ids = [x.event_type for x in events] # event_types = EventType.objects(id__in=event_type_ids)[:] # event_type_map = {str(x.id): x.name for x in event_types} # return [event_type_map[x.event_type] for x in events] self._event_sequence = [] return self._event_sequence def __set_event_sequence(self, event_sequence): self._event_sequence = event_sequence event_sequence = property(__get_event_sequence, __set_event_sequence) @property def current_stage(self): if len(self.stage_sequences.get(STRATEGY_DEFAULT, [])) == 0: return None else: last_stage = self.stage_sequences[STRATEGY_DEFAULT][-1].split( STAGE_INDEX_SEPARATOR)[0] return last_stage @property def nps(self): nps1 = self.journey_attributes.get('nps') event = self.current_event from solariat_bottle.db.post.nps import NPSOutcome if isinstance(event, NPSOutcome): nps2 = self.current_event.score else: nps2 = None return max(nps1, nps2) @staticmethod def nps_value_to_label(value): if value is None: return 'n/a' elif 0 <= value <= 6: return 'detractor' elif value in (7, 8): return 'passive' elif value in (9, 10): return 'promoter' else: raise Exception("invalid nps value (%r given)" % value) @property def nps_category(self): # from solariat_bottle.views.facets import nps_value_to_label if self.nps == 'N/A': return 'N/A' else: return self.nps_value_to_label(self.nps) @property def previous_stage(self): if self.current_stage is None: return None if len(self.stage_sequences.get(STRATEGY_DEFAULT, [])) <= 1: return None else: last_stage = self.stage_sequences[STRATEGY_DEFAULT][-2].split( STAGE_INDEX_SEPARATOR)[0] return last_stage @property def stage_sequence(self): if len(self.stage_sequences.get(STRATEGY_DEFAULT, [])) == 0: return [] else: return [ val.split(STAGE_INDEX_SEPARATOR)[0] for val in self.stage_sequences[STRATEGY_DEFAULT] ] @property def first_event(self): event_sequence = self.event_sequence if event_sequence: return event_sequence[0] else: return None @property def is_abandoned(self): if self.status == JourneyStageType.TERMINATED: return 1 else: return 0 @property def days(self): if self.first_event_date and self.last_event_date: return (utc(self.last_event_date) - utc(self.first_event_date)).days else: return None
class EnterpriseTwitterChannel(TwitterChannel): "Channel with twitter specific information for tracking" manager = EnterpriseTwitterChannelManager twitter_handle = fields.StringField(default='') twitter_handle_data = fields.DictField() status_update = fields.DateTimeField(db_field='st', default=datetime.utcnow()) followers_count = fields.NumField(default=0, db_field='fc') friends_count = fields.NumField(default=0, db_field='frc') is_inbound = fields.BooleanField(db_field='in', default=False) def get_twitter_profile(self): if not self.is_authenticated: return None def fetch_api_me(): from solariat_bottle.utils.tweet import TwitterApiWrapper, JSONParser api = TwitterApiWrapper.init_with_channel(self, parser=JSONParser()) return api.me() token_hash = hash( "%s%s" % (self.access_token_key, self.access_token_secret)) & (1 << 8) data = dict(self.twitter_handle_data or {}) if data and 'hash' in data and 'profile' in data and data[ 'hash'] == token_hash: res = data['profile'] else: api_me_json = fetch_api_me() data = {'hash': token_hash, 'profile': api_me_json} self.update(twitter_handle_data=data, twitter_handle=api_me_json['screen_name']) res = api_me_json return res @property def twitter_handle_id(self): if get_var('ON_TEST') and not self.twitter_handle_data: return self.twitter_handle profile_data = self.get_twitter_profile() if profile_data: return profile_data['id_str'] @property def type_name(self): return "Enterprise Twitter" @property def type_id(self): return 1 @property def is_dispatch_channel(self): return True @property def initial_status(self): return 'Active' def on_suspend(self): self.update(status='Suspended', status_update=datetime.utcnow().replace(microsecond=0)) def tracked_entities(self): if self.status not in {'Active', 'Interim'}: return [] return [('USER_NAME', [self.twitter_handle], self, ['en'])] def pre_save(self): "Track/untrack twitter_handle" from solariat_bottle.db.tracking import PostFilterStream stream = PostFilterStream.get() stream.untrack_channel(self) if self.twitter_handle and self.status != 'Archived': stream.track('USER_NAME', [self.twitter_handle], [self]) def save(self, pre_save=True): if pre_save: self.pre_save() super(EnterpriseTwitterChannel, self).save() def save_by_user(self, user, **kw): if self.can_edit(user): self.pre_save() super(EnterpriseTwitterChannel, self).save_by_user(user, **kw) def follow_user(self, user_profile, silent_ex=False): """ For the given user profile, first do the actual twitter follow then also update the user profile object locally so we can quickly get the required status. If :param silent_ex: is set to true, any exception from twitter call is ignored. (e.g. autofollow on DM creation). """ from solariat_bottle.tasks.twitter import tw_follow tw_follow(channel=self, user_profile=user_profile, silent_ex=silent_ex) self.update(inc__friends_count=1) def unfollow_user(self, user_profile, silent_ex=False): """ For the given user profile, first do the actual twitter unfollow then also update the user profile object locally so we can quickly get the required status. If :param silent_ex: is set to true, any exception from twitter call is ignored. (e.g. autofollow on DM creation). """ from solariat_bottle.tasks.twitter import tw_unfollow tw_unfollow(channel=self, user_profile=user_profile, silent_ex=silent_ex) self.update(inc__friends_count=-1) def get_attached_service_channels(self): service_channel = self.get_service_channel() return service_channel and [service_channel] or [] def get_service_channel(self): channel = self.get_user_tracking_channel() if channel: channel = TwitterServiceChannel.objects.find_one( outbound=channel.id) return channel def get_user_tracking_channel(self): if self.twitter_handle is None: return None # candidates = UserTrackingChannel.objects.find(usernames__in=get_sync_usernames_list([self.twitter_handle]), # account=self.account)[:] # case-insensitive lookup for service channel from solariat_bottle.db.tracking import PostFilterEntry, TrackingNLP usernames_list = get_sync_usernames_list([self.twitter_handle]) usernames_list = map(TrackingNLP.normalize_kwd, usernames_list) candidate_channel_ids = set() for pfe in PostFilterEntry.objects.coll.find( {PostFilterEntry.F.entry: { '$in': usernames_list }}, fields=[PostFilterEntry.F.channels]): chs = pfe[PostFilterEntry.F.channels] if not isinstance(chs, (list, tuple)): chs = [chs] for ch in chs: if hasattr(ch, 'id'): candidate_channel_ids.add(ch.id) else: candidate_channel_ids.add(ch) candidates = UserTrackingChannel.objects(id__in=candidate_channel_ids, account=self.account)[:] if candidates: if len(candidates) == 1: return candidates[0] else: LOGGER.warning( "We have multiple candidates for service channel matching for enterprise channel %s" % self) return None LOGGER.warning( "No service channel candidates were found for outbound channel %s. " "Some outbound channel filtering might not work.", self.title) return None def get_outbound_channel(self, user): return self
class ChannelType(ArchivingAuthDocument): STATUSES = IN_SYNC, SYNCING, OUT_OF_SYNC = 'IN_SYNC', 'SYNCING', 'OUT_OF_SYNC' manager = ChannelTypeManager # from base.Channel account = fields.ReferenceField(Account, db_field='at', required=True) name = fields.StringField(required=True) description = fields.StringField() schema = fields.ListField(fields.DictField()) sync_status = fields.StringField(choices=STATUSES, default=IN_SYNC) is_locked = fields.BooleanField(default=False) mongo_collection = fields.StringField() created_at = fields.DateTimeField(default=now) updated_at = fields.DateTimeField(default=now) _channel_class = None @property def data_class_name(self): # keep classname unique system wide, to exclude collisions in MetaDocument.Registry[name] # when creating instance of Channel for different accounts return '%s%s' % (self.name.encode('utf8'), self.account.id) def get_channel_class(self): if self._channel_class is None: newclass = SchemaBased.create_data_class( self.data_class_name, self.schema, self.mongo_collection, inherit_from=DynamicEventsImporterChannel, _platform=self.name) self._channel_class = newclass return self._channel_class def update(self, *args, **kwargs): if 'schema' in kwargs and kwargs['schema'] != self.schema: self._channel_class = None from solariat.db.abstract import MetaDocument try: del MetaDocument.Registry[self.data_class_name] except: pass return super(ChannelType, self).update(*args, **kwargs) def apply_sync(self, user): if self.sync_status != self.OUT_OF_SYNC: raise ImproperStateError(self) self.update(sync_status=self.SYNCING) sync_errors = defaultdict(list) sync_coll = self.mongo_collection + 'Sync' + str(user.account.id) ChClass = self.get_channel_class() SyncClass = SchemaBased.create_data_class( self.data_class_name, self.schema, sync_coll, inherit_from=DynamicEventsImporterChannel, _platform=self.name) temp_coll = SyncClass.objects.coll bulk_insert = temp_coll.initialize_unordered_bulk_op() for doc in ChClass.objects.coll.find({'channel_type_id': self.id}): synced_data = {} for fname, field in ChClass.fields.iteritems(): val = doc.get(field.db_field) if val is None: continue synced_data[fname] = field.to_python(val) try: for col in self.schema: if KEY_EXPRESSION in col: continue col_name = col[KEY_NAME] val = doc.get(col_name) synced_data[col_name] = apply_shema_type( val, col[KEY_TYPE]) bulk_insert.insert(SyncClass(**synced_data).data) except Exception as ex: LOGGER.info('Sync error:\n\n %s', ex, exc_info=True) SchemaBased._put_sync_error(sync_errors, col_name, val, ex) if not sync_errors: try: bulk_insert.execute() except Exception as ex: LOGGER.info('Error inserting synced data %s', ex, exc_info=True) self.update(sync_status=self.OUT_OF_SYNC) temp_coll.drop() raise else: bulk_update = ChClass.objects.coll.initialize_unordered_bulk_op( ) for doc in temp_coll.find(): bulk_update.find({'_id': doc['_id']}).replace_one(doc) bulk_update.execute() temp_coll.drop() self.update(sync_status=self.IN_SYNC, updated_at=utc(now())) return {} self.update(sync_status=self.OUT_OF_SYNC) temp_coll.drop() return sync_errors
class ChannelStats(ChannelAuthDocument): "Store stats for month, day and hour" manager = ChannelStatsManager channel = fields.ObjectIdField(required=True, unique_with='time_slot', db_field='cl') # The time slot is a numeric encoding of elapsed time # see utils.timeslot for details time_slot = fields.NumField(required=True, db_field="ts") number_of_posts = fields.NumField(default=0, db_field='nop') feature_counts = fields.DictField(db_field="fc") number_of_rejected_posts = fields.NumField(default=0, db_field='norp') number_of_starred_posts = fields.NumField(default=0, db_field='nosp') number_of_discarded_posts = fields.NumField(default=0, db_field='nodp') number_of_highlighted_posts = fields.NumField(default=0, db_field='nohp') number_of_actionable_posts = fields.NumField(default=0, db_field="noaep") number_of_assigned_posts = fields.NumField(default=0, db_field="noadp") number_of_replied_posts = fields.NumField(default=0, db_field="noalp") number_of_accepted_posts = fields.NumField(default=0, db_field="noacp") number_of_false_negative = fields.NumField(default=0, db_field="nofn") number_of_true_positive = fields.NumField(default=0, db_field="notp") number_of_false_positive = fields.NumField(default=0, db_field="nofp") # Quality Measures cumulative_relevance = fields.NumField(default=0.0, db_field="cr") cumulative_intention = fields.NumField(default=0.0, db_field="ci") # Outbound Statistics number_of_impressions = fields.NumField(default=0, db_field="noi") number_of_clicks = fields.NumField(default=0, db_field="noc") indexes = [('channel', ), ('time_slot')] @property def level(self): return decode_timeslot(self.time_slot)[1] @property def mean_relevance(self): if self.number_of_posts: return self.cumulative_relevance / self.number_of_posts return 0.0 @property def mean_intention(self): if self.number_of_actionable_posts: return self.cumulative_intention / self.number_of_actionable_posts return 0.0 def to_dict(self, fields2show=None): result = ChannelAuthDocument.to_dict(self, fields2show) del result['cumulative_relevance'] del result['cumulative_intention'] result['mean_relevance'] = self.mean_relevance result['mean_intention'] = self.mean_intention result['level'] = self.level return result def __str__(self): "String repr" return str(self.time_slot) @classmethod def _new_bulk_operation(cls, ordered=False): """ Allocates a new bulk DB operation (only available in PyMongo 2.7+) """ coll = cls.objects.coll if ordered: return coll.initialize_ordered_bulk_op() else: return coll.initialize_unordered_bulk_op() def inc(self, field_name, value, bulk=None): """ Issue an update DB operation that increments a specified field in the corresponding document. bulk -- an optional <BulkOperationBuilder> instance to store the postponed $inc operation instead of doing it right away (only available in PyMongo 2.7+) """ if not isinstance(value, (int, float)): raise AppException("%s must be integer or float" % value) query = self.__class__.get_query(time_slot=self.time_slot, channel=str(self.channel)) update = {'$inc': {self.fields[field_name].db_field: value}} if bulk is None: # sending a DB request right away coll = self.objects.coll return coll.update(query, update, upsert=True) else: # adding a postponed DB request to the bulk set return bulk.find(query).upsert().update_one(update) def set(self, field_name, value, bulk=None): """ Issue an update DB operation that sets a specified field in the corresponding document. bulk -- an optional <BulkOperationBuilder> instance to store the postponed $set operation instead of doing it right away (only available in PyMongo 2.7+) """ if not isinstance(value, (int, float)): raise AppException("%s must be integer or float" % value) query = self.__class__.get_query(time_slot=self.time_slot, channel=str(self.channel)) update = {'$set': {self.fields[field_name].db_field: value}} if bulk is None: # sending a DB request right away coll = self.objects.coll return coll.update(query, update, upsert=True) else: # adding a postponed DB request to the bulk set return bulk.find(query).upsert().update_one(update) def inc_feature_counts(self, speech_acts, bulk=None): """ Update SpeechAct stats. Issue an update DB operation that increments SpeechAct counters in the corresponding document. bulk -- an optional <BulkOperationBuilder> instance to store the postponed $inc operation instead of doing it right away (only available in PyMongo 2.7+) """ increments = {} field_name = self.fields['feature_counts'].db_field def add_increment(int_id): key = '%s.%s' % (field_name, int_id) increments[key] = 1 for sa in speech_acts: int_id = sa['intention_type_id'] if int_id: add_increment(int_id) if increments: # if there is at least one intention -- increment a counter for ALL also add_increment(ALL_INTENTIONS.oid) query = self.__class__.get_query(time_slot=self.time_slot, channel=str(self.channel)) update = {'$inc': increments} if bulk is None: # sending a DB request right away coll = self.objects.coll return coll.update(query, update, upsert=True) else: # adding a postponed DB request to the bulk set return bulk.find(query).upsert().update_one(update) def reload(self): source = ChannelStats.objects.find_one(time_slot=self.time_slot, channel=self.channel) if source is None: LOGGER.warning( "ChannelStats.reload() could not find a document for: channel=%s, time_slot=%s", self.channel, self.time_slot) #LOGGER.warning("Found instead only:") #for s in ChannelStats.objects(): # LOGGER.warning(' - %s %s', s.channel, s.time_slot) else: self.data = source.data
class FacebookServiceChannel(FacebookUserMixin, FacebookChannel, ServiceChannel): # -- pages -- # monitored facebook pages facebook_page_ids = fields.ListField(fields.StringField()) #name should be same as in EnterpriseFacebookChannel facebook_pages = fields.ListField(fields.DictField()) # current pages data all_facebook_pages = fields.ListField(fields.DictField()) # all accessible pages data page_admins = fields.DictField() # {page_id: [list of facebook users json]...} tracked_fb_message_threads_ids = fields.ListField(fields.StringField()) # -- groups -- tracked_fb_group_ids = fields.ListField(fields.StringField()) #name should be same as in EnterpriseFacebookChannel tracked_fb_groups = fields.ListField(fields.DictField()) all_fb_groups = fields.ListField(fields.DictField()) # -- events -- tracked_fb_event_ids = fields.ListField(fields.StringField()) #name should be same as in EnterpriseFacebookChannel tracked_fb_events = fields.ListField(fields.DictField()) all_fb_events = fields.ListField(fields.DictField()) #-- user -- facebook_handle_id = fields.StringField() facebook_access_token = fields.StringField() pull_activity_md = fields.DictField() # there stored info about last data pull operations time fb_pull_mode = fields.NumField(default=PULL_MODE_RARE) last_post_received = fields.StringField() last_pull_success = fields.StringField() @property def InboundChannelClass(self): return InboundFacebookChannel @property def OutboundChannelClass(self): return OutboundFacebookChannel @property def DispatchChannelClass(self): return EnterpriseFacebookChannel def find_direction(self, post): # For now just assume all posts are actionable if posted in one # of the users pages. return 'direct' def set_dispatch_channel(self, value): super(FacebookServiceChannel, self).set_dispatch_channel(value) self.sync_with_account_channel(value) def sync_with_account_channel(self, efc): self.update(_cached_channel_description=None) if efc.facebook_access_token: self.update( facebook_handle_id=efc.facebook_handle_id, facebook_access_token=efc.facebook_access_token, ) self.set_facebook_me(efc.facebook_me()) else: self.update( facebook_page_ids=[], facebook_pages=[], all_facebook_pages=[], tracked_fb_event_ids=[], tracked_fb_events=[], all_fb_events=[] ) def get_access_token(self, user): """ Try to get the access token for this channel. """ # if self.facebook_handle_id: # return self.facebook_access_token # else: try: efc = self.get_outbound_channel(user) if efc: if efc.facebook_access_token: self.sync_with_account_channel(efc) # TODO: do we need sync here? return efc.facebook_access_token error_msg = "Channel %s has no access token. Did you login to facebook from configuration page?" % (efc.title) raise FacebookConfigurationException(error_msg) else: error_msg = 'Please create and configure a channel of type "Facebook : Account" first.' raise FacebookConfigurationException(error_msg) except Exception as ex: LOGGER.error(ex) raise FacebookConfigurationException(ex.message) def get_outbound_channel(self, user): ''' Get the outbound channel based on user access, channel configuration, and as a last resort, channel configurations ''' # The configured channel is only necessary, or correct, if this is no service # channel, or if there is a service channel with multiple candidates if self.dispatch_channel: return self.dispatch_channel configured_user_channel = user.get_outbound_channel(self.platform) configured_account_channel = self.account.get_outbound_channel(self.platform) candidates = EnterpriseFacebookChannel.objects.find_by_user(user, account=self.account, status='Active')[:] # If there are no candidates for the service channel, then do not return anything. if not candidates: return None else: if len(candidates) == 1: return candidates[0] if configured_user_channel in candidates: return configured_user_channel if configured_account_channel in candidates: return configured_account_channel error_msg = "There are multiple Facebook : Account channels on this account: " error_msg += "Channels: (%s), Account: %s. You need to set one in user profile or account settings." % ( [c.title for c in candidates], self.account.name) raise FacebookConfigurationException(error_msg) def track_fb_group(self, group, user): assert isinstance(group, dict) and {'id', 'name'} <= set(group), 'Wrong group object' self.__add_to_tracking(user, group, 'tracked_fb_group_ids', 'tracked_fb_groups') def untrack_fb_group(self, group, user): assert isinstance(group, dict) and {'id', 'name'} <= set(group), 'Wrong group object' self.__remove_from_tracking(user, group, 'tracked_fb_group_ids', 'tracked_fb_groups') def track_fb_event(self, event, user): assert isinstance(event, dict) and {'id', 'name'} <= set(event), 'Wrong event object' self._handle_tracking("add", [], [event['id']]) self.__add_to_tracking(user, event, 'tracked_fb_event_ids', 'tracked_fb_events') def untrack_fb_event(self, event, user): assert isinstance(event, dict) and {'id', 'name'} <= set(event), 'Wrong event object' self._handle_tracking("remove", [], [event['id']]) self.__remove_from_tracking(user, event, 'tracked_fb_event_ids', 'tracked_fb_events') def add_facebook_page(self, page, user): assert isinstance(page, dict) and {'id', 'name'} <= set(page), 'Wrong page object' self._handle_tracking("add", [page['id']]) self.__add_to_tracking(user, page, 'facebook_page_ids', 'facebook_pages') update_page_admins(self, page) def remove_facebook_page(self, page, user): assert isinstance(page, dict) and {'id', 'name'} <= set(page), 'Wrong page object' self._handle_tracking("remove", [page['id']]) self.__remove_from_tracking(user, page, 'facebook_page_ids', 'facebook_pages') def post_received(self, post): """ Adds post to conversations. """ from solariat_bottle.db.post.base import UntrackedPost assert set(post.channels).intersection([self.inbound, self.outbound]) assert not isinstance(post, UntrackedPost), "It should be tracked if we received it." conv = self.upsert_conversation(post, contacts=False) if post.is_pm: self.last_post_received = datetime.utcnow().strftime("%s") self.save() def _handle_tracking(self, action, pages=None, events=None): LOGGER.info(u"Invoked {}[{}]._handle_tracking action={} pages={} events={}".format( self.__class__.__name__, self.id, action, pages, events)) if pages == 'all': pages = self.facebook_page_ids if events == 'all': events = self.tracked_fb_event_ids if pages: FacebookTracking.objects.handle_channel_event(action, self, pages, PAGE) if events: FacebookTracking.objects.handle_channel_event(action, self, events, EVENT) def on_active(self): self.status = 'Active' self.update(set__status='Active') self._handle_tracking("add", "all", "all") subscribe_realtime_updates(self.facebook_pages) self.inbound_channel.on_active() self.outbound_channel.on_active() def on_suspend(self): self.status = 'Suspended' self.update(set__status='Suspended') self._handle_tracking("remove", "all", "all") unsubscribe_realtime_updates(self.facebook_pages) self.inbound_channel.on_suspend() self.outbound_channel.on_suspend() def archive(self): self._handle_tracking("remove", "all", "all") return super(FacebookServiceChannel, self).archive() def list_outbound_channels(self, user): return EnterpriseFacebookChannel.objects.find_by_user(user, account=self.account) def __invalidate_channel_descriptions(self, user): self._cached_channel_description = None # Make sure we no longer consider same cached channel candidates = EnterpriseFacebookChannel.objects.find_by_user(user, account=self.account, status='Active')[:] for candidate in candidates: candidate._cached_channel_description = None candidate.save() def __add_to_tracking(self, user, item, id_fields, tracked_field): if str(item['id']) in getattr(self, id_fields): return getattr(self, tracked_field).append(item) getattr(self, id_fields).append(item['id']) self.__invalidate_channel_descriptions(user) self.save() if self.status == 'Active' and 'access_token' in item and item.get('type') != "event": subscribe_realtime_updates([item]) efc = self.get_outbound_channel(user) if efc: getattr(efc, id_fields).append(str(item['id'])) efc.save() def __remove_from_tracking(self, user, item, ids_field, tracked_field): tracked = getattr(self, tracked_field) setattr(self, tracked_field, filter(lambda p: str(p['id']) != str(item['id']), list(tracked))) try: getattr(self, ids_field).remove(str(item['id'])) except ValueError: pass self.__invalidate_channel_descriptions(user) self.save() # Now also add page to dispatch so facebook bot will have access efc = self.get_outbound_channel(user) if efc: try: getattr(efc, ids_field).remove(str(item['id'])) except ValueError: pass efc.save() if self.status == 'Active' and 'access_token' in item and item.get('type') != "event": unsubscribe_realtime_updates([item]) def get_outbound_ids(self): outbound_ids = set() for page_id, users in (self.page_admins or {}).viewitems(): outbound_ids.add(page_id) for user in users: if user.get('role') == 'Admin' and user.get('id'): outbound_ids.add(user['id']) if self.facebook_handle_id: outbound_ids.add(str(self.facebook_handle_id)) for page_data in list(self.all_facebook_pages): if page_data.get('id'): outbound_ids.add(page_data['id']) return outbound_ids
class TaskState(Document): params = fields.DictField() state = fields.DictField()