def run(self): post_data = None authtoken = None expired_authtoken = None while True: if not authtoken: authtoken = self.get_authtoken(expired_authtoken) if not post_data: post_data = self.task_queue.get() url = '%s/api/v1.2/posts?token=%s' % (self.options.url, authtoken) headers = {'Content-Type': 'application/json'} try: self.handle_connection(url, 'POST', post_data, headers) except ApplicationError as err: if str(err) == 'Auth token %s is expired' % authtoken: LOGGER.info(err) expired_authtoken = authtoken authtoken = None else: LOGGER.error(err) post_data = None self.task_queue.task_done() except InfrastructureError as err: LOGGER.error(err, exc_info=True) gevent.sleep(self.sleep_timeout) else: post_data = None self.task_queue.task_done()
def get_service_channel(self, lookup_by_page_ids=True): candidates = self.get_attached_service_channels() if not candidates and lookup_by_page_ids: # Fallback to lookup by token/page ids if self.facebook_access_token: candidates = FacebookServiceChannel.objects.find( account=self.account, facebook_access_token=self.facebook_access_token)[:] if not candidates: candidates = FacebookServiceChannel.objects.find( account=self.account, facebook_page_ids__in=self.facebook_page_ids)[:] if not candidates: return None if len(candidates) == 1: return candidates[0] else: LOGGER.error( "We have multiple candidates for service channel matching for enterprise channel %s" % self) return None if len(candidates) > 1: LOGGER.warn("We have multiple candidates for service channel matching " "for enterprise channel %s" % self) if candidates: return candidates[0]
def __get_token_for_item(channel, object_id): from solariat_bottle.settings import LOGGER from solariat_bottle.tasks.exceptions import FacebookCommunicationException from solariat_bottle.db.post.facebook import FacebookPost from solariat_bottle.db.channel.facebook import EnterpriseFacebookChannel try: if isinstance(channel, EnterpriseFacebookChannel): from solariat_bottle.utils.post import get_service_channel channel = get_service_channel(channel) fb_post = FacebookPost.objects.get(_native_id=str(object_id)) post_type = fb_post.wrapped_data['source_type'] source_ids = set(map(str, get_page_id_candidates(fb_post))) try: if post_type == 'Event': token = [ event for event in channel.all_fb_events if str(event['id']) in source_ids ][0]['access_token'] else: token = [ page for page in channel.facebook_pages if str(page['id']) in source_ids ][0]['access_token'] except Exception, ex: LOGGER.error( "Failed to get page access token for object_id=%s and channel=%s" % (object_id, channel)) token = channel.facebook_access_token return token
def unsubscribe_realtime_updates(pages): for page in pages: G = facebook_driver.GraphAPI(page['access_token']) try: G.delete_object('%s/subscribed_apps' % page['id']) except facebook.GraphAPIError as e: LOGGER.error(e)
def read_schema(self): from solariat_bottle.utils.predictor_events import translate_column, get_type analysis_temp_file = tempfile.TemporaryFile('r+') headers = self.csv_file.readline() if not headers: raise CsvDataValidationError('Input file is empty') analysis_temp_file.write(headers) for idx, line_data in enumerate(self.csv_file.readlines(), start=1): analysis_temp_file.write(line_data) if idx == self.MAX_ANALYSIS_LINES: break analysis_temp_file.seek(0) schema_json = [] try: dataframe = pandas.read_csv(analysis_temp_file, sep=self.sep) except Exception as ex: LOGGER.error('Cannot parse file:', exc_info=True) raise CsvDataValidationError('Cannot parse file %s' % str(ex)) for col in dataframe.columns: schema_entry = dict(name=translate_column(col), type=get_type(dataframe[col].dtype, dataframe[col].values)) schema_json.append(schema_entry) return schema_json
def main(manager=manager): """ Supposed to be invoked by cron periodically """ # 1. check sleeping jobs _now = now() for job_status in JobStatus.objects.find(status=JobStatus.SLEEPING): if utc(job_status.awake_at) > _now: continue job = manager.registry.get(job_status.name) manager.producer.send_message(job.topic, job_status) LOGGER.info('Job: %s awakened and sent to execution.', job_status.id) # 2. check timed out jobs for job_status in JobStatus.objects.find(status=JobStatus.RUNNING): job = manager.registry.get(job_status.name) last_activity = job_status.last_activity or job_status.started_date if _now - utc(last_activity) < timedelta(seconds=job.timeout): continue job_status.update(completion_date=now(), status=JobStatus.TERMINATED) LOGGER.info('Job: %s terminated. No activity last %s seconds.', job_status.id, job.timeout) if job.terminate_handler: try: job.terminate_handler(*job_status.args, **job_status.kwargs) LOGGER.info('terminate_handler complete for Job: %s.', job_status.id) except Exception as ex: LOGGER.error('Error executing terminate_handler: %s', ex, exc_info=True)
def __gen_authtoken(self): " request new auth token " post_data = { 'username': self.options.username, 'password': self.options.password } post_data = json.dumps(post_data) self.conn.setopt(pycurl.URL, '%s/api/v1.2/authtokens' % self.options.url) self.conn.setopt(pycurl.POSTFIELDS, post_data) self.conn.setopt(pycurl.HTTPHEADER, ['Content-Type: application/json']) while True: self.reset_buff() try: response = self.handle_connection() except FeedAPIError as err: LOGGER.error(err) time.sleep(self.sleep_timeout) else: authtoken = response['item']['token'].encode('utf-8') self.__class__.authtoken = authtoken return authtoken
def run(self): inp_queue = self.inp_queue start_time = time.time() while not self.stopped(): # make sure we intercept all errors try: task = inp_queue.get() if task is self.QUIT or task == 'QUIT': LOGGER.debug('received QUIT signal %s' % self) break start_time = time.time() self._busy = True # Just started doing our post processing post_fields = self.preprocess_post(task) if not post_fields: LOGGER.warning('no post_fields in: %s', task) continue # LOGGER.debug('creating post %r %s', post_fields.get('content'), inp_queue.qsize()) if self.assign_channels(post_fields): self.create_post(**post_fields) else: LOGGER.info('skipping post %r' % post_fields.get('content')) self.inc_skipped() self._busy = False # Just Finished doing our post processing except Exception, err: LOGGER.error(err, exc_info=True) pass finally:
def run(self): self.ds_client = None while not self.stopped(): try: del self.ds_client # to garbage-collect the old client ASAP self._running = False if not get_var('ON_TEST'): self.ds_client = DatasiftClient(ds_login=self.ds_login, ds_api_key=self.ds_api_key, bot_instance=self, sanity_checker=self.checker) else: self.ds_client = TestDatasiftClient(bot_instance=self) self.ds_client.connect() self._running = True LOGGER.info('connected to %s', self.ds_client.WEBSOCKET_BASE_URL) self.checker.set_client(self.ds_client) self.ds_subscriber.set_client(self.ds_client) self.ds_client.run() # receives posts from Datasift except Exception as e: LOGGER.error(e, exc_info=True) sleep(5) # wait a bit on any unexpected error
def check_alive(self): connection_timeout = int(time.time()) - self.keepalive_lastcheck if connection_timeout > self.keepalive_timeout: LOGGER.error('[@%s] TCP connection timeout', self.me) self.keepalive_lastcheck = int(time.time()) return False return True
def tw_unfollow(channel, user_profile, silent_ex=False): """ Task for unfollowing a twitter screen name. :param channel: Used to grab the twitter credentials we are using for the unfollow action. :param screen_name: The twitter screen name we are going to unfollow :param silent_ex: Optional, if true any exceptions will just be silently ignored """ from solariat_bottle.settings import get_var, LOGGER from solariat_bottle.db.user_profiles.user_profile import UserProfile, get_brand_profile_id, get_native_user_id_from_channel result = {} if not get_var('ON_TEST') and get_var('APP_MODE') == 'prod': from solariat_bottle.utils.oauth import get_twitter_oauth_credentials (consumer_key, consumer_secret, _, access_token_key, access_token_secret) = get_twitter_oauth_credentials(channel) try: result = tw_destroy_friendship( consumer_key=consumer_key, consumer_secret=consumer_secret, access_token_key=access_token_key, access_token_secret=access_token_secret, screen_name=user_profile.screen_name) except Exception, ex: LOGGER.error("tasks.tw_unfollow: " + str(ex)) if silent_ex: result = dict(error=str(ex)) else: raise
def _get_request_data(): """Convert request data to dict Data could come with url encoded or as plain html form, or with JSON send with curl -d '{"var": "value"} """ import json form = {} if request.method in ['PUT', 'POST']: form = request.form if not [x for x in form.values() if x] and len(form.keys()): # no values - there is json from -d {...} coming try: form = json.loads("".join(form.keys())) except ValueError: msg = "Could not parse JSON from %s" % "".join(form.keys()) LOGGER.error(msg) raise RuntimeError(msg) body_data = {} if request.data: try: body_data = json.loads(request.data) if not isinstance(body_data, dict): raise ValueError except ValueError: raise RuntimeError("Could not load JSON from %s" % request.data) return dict(form.items() + request.args.items() + body_data.items() + request.files.items())
def get_age(self): # Best guess we can make is by date of birth if present and properly formatted if self.date_of_birth: try: dob = datetime.strptime(self.date_of_birth, AGE_FORMAT) return relativedelta(datetime.now(), dob).years except Exception, ex: LOGGER.error(ex)
def process(self): if not self.channel: from solariat_bottle.settings import LOGGER LOGGER.error("Subscription %s: channel is broken" % self) return None # tw_process_historic_subscription.ignore(self) tw_process_historic_subscription(self)
def get_page_admins(page): api = facebook_driver.GraphAPI(page['access_token']) try: res = api.get_object('/%s/roles' % page['id']) except facebook.GraphAPIError as e: LOGGER.error(e) return None return res['data']
def get_object(self, sender_id, fields=None): try: if fields is not None: result = self.api.get_object(sender_id, fields=fields) else: result = self.api.get_object(sender_id) return result except Exception, ex: LOGGER.error("GET for id %s with fields %s raised %s " % (sender_id, fields, ex)) raise
def _handle_http_response(self, response): "JSON decode, raise RuntimError if any issue" try: resp = json.loads(response.data) if not resp.get('ok', None): LOGGER.error(resp.get('error', 'unknown error')) return resp except ValueError: LOGGER.error( "Could not decode %s" % response.data) return {'ok': False}
def get_datasift_hash(): "Gets a datasift hash from database" try: db = get_connection() res = db.PostFilterStream.find_one({'_id': 'datasift_stream1'}) if res and 'dh' in res and res['dh']: return res['dh'] except Exception as e: LOGGER.error(e, exc_info=True) return None
def get(self, name): with self.lock: if name not in self.registry: LOGGER.info('No Job registered for: %s, trying to import.', name) try: self._import_module(name) except ImportError: LOGGER.error('Cannot import job module:', exc_info=True) raise RegistryError('No Job registered for: %s' % name) return self.registry[name]
def fix_for_neg_value(results, params, pipeline): ''' Workaround for negative values so we do not destroy the UI''' fixed = False for r in results: for p in params: if p not in r: break if r[p] != abs(r[p]): fixed = True r[p] = abs(r[p]) if fixed: LOGGER.error("Negative counts for %s query", pipeline)
def _validate_schema(self, schema_json): schema = self.schema or self.discovered_schema current_fields = { col[KEY_NAME] for col in schema if KEY_EXPRESSION not in col } input_fields = {col[KEY_NAME] for col in schema_json} if len(current_fields & input_fields) != len(current_fields): LOGGER.error('Input schema columns: %s \n\nis different from ' 'current: %s' % (input_fields, current_fields)) raise SchemaValidationError( 'Input schema columns is different from current.')
def run(self): post_data = None authtoken = None expired_authtoken = None while True: if not authtoken: authtoken = self.get_authtoken(expired_authtoken) if not post_data: post_data = self.task_queue.get() # This is used both by datasift and by twitter_bot_dm. # Just be safe, and in case we recieve a dict with no 'channels' key # do the processing here (as is the case with twitter_bot), # otherwise assume it was done before (as is the case with datasift_bot. if isinstance(post_data, dict) and 'channels' not in post_data: channels = handle_post('Twitter', post_data['user_profile'], post_data) if channels: channels = [str(c.id) for c in channels] post_data['channels'] = channels # we need this for getting channels only only if 'direct_message' in post_data: del post_data['direct_message'] post_data = json.dumps(post_data) self.reset_buff() self.conn.setopt(pycurl.POSTFIELDS, post_data) self.conn.setopt( pycurl.URL, '%s/api/v1.2/posts?token=%s' % (self.options.url, authtoken)) self.conn.setopt(pycurl.HTTPHEADER, ['Content-Type: application/json']) try: self.handle_connection() except ApplicationError as err: if str(err) == 'Auth token %s is expired' % authtoken: LOGGER.info(err) expired_authtoken = authtoken authtoken = None else: LOGGER.error(err) post_data = None self.task_queue.task_done() except InfrastructureError as err: LOGGER.error(err) time.sleep(self.sleep_timeout) else: post_data = None self.task_queue.task_done()
def api_data_to_model_data(user, api_data, raw_db_fields=False, fields_mapping=None, profile_schema=None, profile_class=None): """ Helper to transform incoming API data to our model format. """ if fields_mapping is None: fields_mapping = dict() agent_data = api_data for key, val in api_data.iteritems(): if isinstance(val, dict): api_data[key] = AgentsAPIView.replace_dots_in_dict(val) if profile_schema is None: agent_profile_schema = user.account.agent_profile._get() else: agent_profile_schema = profile_schema if profile_class is None: AgentProfile = agent_profile_schema.get_data_class() else: AgentProfile = profile_class schema_changed = False for key in agent_data.keys(): if key not in AgentProfile.fields: inferred_type = get_type_mapping(agent_data[key]) if inferred_type: if agent_profile_schema.schema: agent_profile_schema.schema.append({KEY_NAME: key, KEY_TYPE: inferred_type}) agent_profile_schema._data_cls = None else: agent_profile_schema.discovered_schema.append({KEY_NAME: key, KEY_TYPE: inferred_type}) agent_profile_schema._raw_data_cls = None fields_mapping[key] = key schema_changed = True else: agent_data.pop(key) else: fields_mapping[key] = AgentProfile.fields[key].db_field field_type = reversed_mapping[AgentProfile.fields[key].__class__] try: field_val = apply_shema_type(agent_data[key], field_type) if isinstance(field_val, dict): field_val = AgentsAPIView.replace_dots_in_dict(field_val) agent_data[key] = field_val except Exception, ex: LOGGER.error("Failed to apply schema for field %s and value %s. Got error %s" % ( key, agent_data[key], ex )) agent_data.pop(key)
def fb_put_comment_by_channel(channel, object_id, message): from solariat_bottle.settings import LOGGER from solariat_bottle.tasks.exceptions import FacebookCommunicationException try: return get_facebook_api(channel).put_comment( object_id, force_bytes(message, 'utf-8', 'replace')) except Exception, ex: LOGGER.error( "Failure posting comment to facebook. Exc: %s, Channel: %s, Object_id: %s, Message: %s" % (ex, channel, object_id, message)) raise FacebookCommunicationException(ex.message)
def process_journeys_analysis(analysis_tpl): start_time = utc_now() journey_type_instance = JourneyType.objects.get( analysis_tpl.filters['journey_type'] [0]) # We pass 1 journey_type id as array j_analysis = JourneysAnalysis( analysis_tpl, journey_type_instance) # JourneyAnalysis (data parsing, filtering etc) try: journey_filters = j_analysis.parsed_journeys_filters() # We only use these for matching to classes in case of converstion analysis, pop them from filters if 'funnel_id' in journey_filters: journey_filters.pop('funnel_id') if 'stage_id' in journey_filters: journey_filters.pop('stage_id') match = journey_filters initial_pipeline = [{"$match": match}] pipe = initial_pipeline.append pipe({'$sort': {CustomerJourney.F.first_event_date: 1}}) if 'from_dt' in journey_filters: journey_filters.pop('from_dt') journey_filters.pop('to_dt') timeline_filter = deepcopy(journey_filters) timeline_filter.update({ 'level': analysis_tpl.get_timerange_level(), 'computed_metric': analysis_tpl.analyzed_metric, 'plot_type': 'timeline' }) params = dict(filters=journey_filters, initial_pipeline=initial_pipeline, start_time=start_time, timeline_filter=timeline_filter) analysis_process = AnalysisProcess(j_analysis, params) if analysis_tpl.analysis_type == analysis_tpl.CLASSIFICATION_TYPE: analysis_process.classification() elif analysis_tpl.analysis_type == analysis_tpl.REGRESSION_TYPE: analysis_process.regression() except AnalysisTerminationException, ex: LOGGER.error(ex) j_analysis.analysis.status_message = str(ex) j_analysis.analysis.progress = PROGRESS_ERROR j_analysis.analysis.save() manager.produce_state_update({'error': str(ex)}) return
def run(self): while not self.stopped(): try: # process usernames in reconnect bucket self.process_reconnects() self.reconnect_stat.log_frequent_reconnects(len(self.streams)) # scan db for changes if not self.last_sync or (time.time() - self.last_sync) > self.SYNC_INTERVAL: self.sync_streams() self.last_sync = time.time() except Exception, err: LOGGER.error(err, exc_info=True) finally:
def run(self): cmd_queue = self.cmd_queue cur_hash = None ds_client = None while not self.stopped(): # make sure we intercept all errors try: # react on commands simultaneously making a 10 sec pause try: cmd, arg = cmd_queue.get(block=True, timeout=10 if not get_var('ON_TEST') else 1) LOGGER.debug('received %s command', cmd) if cmd == 'CLIENT': ds_client = arg cur_hash = None elif cmd == 'QUIT': break except Empty: LOGGER.debug('timeout (it\'s okay)') pass if ds_client is None: continue if ds_client.terminated: LOGGER.warning('ds_client is terminated') ds_client = None continue # get current datasift hash from the db ds_hash = get_datasift_hash() if not ds_hash: continue # subscibe/unsubscribe if necessary if not cur_hash: ds_client.subscribe(ds_hash) elif cur_hash != ds_hash: ds_client.unsubscribe(cur_hash) ds_client.subscribe(ds_hash) # remember the current hash cur_hash = ds_hash except Exception, err: LOGGER.error(err, exc_info=True) pass
def run(self, task): # make sure we intercept all errors try: post_fields = self.preprocess_post(task) if not post_fields: LOGGER.warning('no post_fields in: %s', task) return if self.assign_channels(post_fields): self.create_post(**post_fields) else: LOGGER.info('skipping post %r' % post_fields.get('content')) except Exception, err: LOGGER.error(err, exc_info=True) pass
def wrapper(*args, **kw): data = dict() data.update(request.view_args) data.update(request.args) channel_id = data.get(ch_key) try: channel = get_doc_or_error(Channel, channel_id) kw['channel'] = channel return view_func(*args, **kw) except Exception, e: from solariat_bottle.settings import LOGGER LOGGER.error(e) return jsonify(ok=False, error="No channel exists for id=%s" % channel_id)
def __init__(self, user, kwargs): super(KafkaFeedApiPostCreator, self).__init__(user, kwargs) class Options(dict): __getattr__ = dict.__getitem__ self.options = Options(username=user, password=kwargs['password'], url=kwargs['url'], retries=kwargs.get('retries', 3)) if not self.options.password: err_msg = "Configuration Error: password and url are required" LOGGER.error("%s %s" % (err_msg, self.options)) raise RuntimeError(err_msg) self.user_agent = kwargs.pop('user_agent', 'FeedApi-PostCreator') self.sleep_timeout = 30