Esempio n. 1
0
    def run(self):

        post_data = None
        authtoken = None
        expired_authtoken = None

        while True:
            if not authtoken:
                authtoken = self.get_authtoken(expired_authtoken)
            if not post_data:
                post_data = self.task_queue.get()

            url = '%s/api/v1.2/posts?token=%s' % (self.options.url, authtoken)
            headers = {'Content-Type': 'application/json'}

            try:
                self.handle_connection(url, 'POST', post_data, headers)
            except ApplicationError as err:
                if str(err) == 'Auth token %s is expired' % authtoken:
                    LOGGER.info(err)
                    expired_authtoken = authtoken
                    authtoken = None
                else:
                    LOGGER.error(err)
                    post_data = None
                    self.task_queue.task_done()
            except InfrastructureError as err:
                LOGGER.error(err, exc_info=True)
                gevent.sleep(self.sleep_timeout)
            else:
                post_data = None
                self.task_queue.task_done()
Esempio n. 2
0
    def get_service_channel(self, lookup_by_page_ids=True):
        candidates = self.get_attached_service_channels()
        if not candidates and lookup_by_page_ids:
            # Fallback to lookup by token/page ids
            if self.facebook_access_token:
                candidates = FacebookServiceChannel.objects.find(
                    account=self.account,
                    facebook_access_token=self.facebook_access_token)[:]
            if not candidates:
                candidates = FacebookServiceChannel.objects.find(
                    account=self.account,
                    facebook_page_ids__in=self.facebook_page_ids)[:]
            if not candidates:
                return None

            if len(candidates) == 1:
                return candidates[0]
            else:
                LOGGER.error(
                    "We have multiple candidates for service channel matching for enterprise channel %s" %
                    self)
                return None

        if len(candidates) > 1:
            LOGGER.warn("We have multiple candidates for service channel matching "
                        "for enterprise channel %s" % self)
        if candidates:
            return candidates[0]
Esempio n. 3
0
def __get_token_for_item(channel, object_id):

    from solariat_bottle.settings import LOGGER
    from solariat_bottle.tasks.exceptions import FacebookCommunicationException
    from solariat_bottle.db.post.facebook import FacebookPost
    from solariat_bottle.db.channel.facebook import EnterpriseFacebookChannel
    try:

        if isinstance(channel, EnterpriseFacebookChannel):
            from solariat_bottle.utils.post import get_service_channel
            channel = get_service_channel(channel)

        fb_post = FacebookPost.objects.get(_native_id=str(object_id))
        post_type = fb_post.wrapped_data['source_type']
        source_ids = set(map(str, get_page_id_candidates(fb_post)))

        try:
            if post_type == 'Event':
                token = [
                    event for event in channel.all_fb_events
                    if str(event['id']) in source_ids
                ][0]['access_token']
            else:
                token = [
                    page for page in channel.facebook_pages
                    if str(page['id']) in source_ids
                ][0]['access_token']
        except Exception, ex:
            LOGGER.error(
                "Failed to get page access token for object_id=%s and channel=%s"
                % (object_id, channel))
            token = channel.facebook_access_token
        return token
Esempio n. 4
0
def unsubscribe_realtime_updates(pages):
    for page in pages:
        G = facebook_driver.GraphAPI(page['access_token'])
        try:
            G.delete_object('%s/subscribed_apps' % page['id'])
        except facebook.GraphAPIError as e:
            LOGGER.error(e)
Esempio n. 5
0
    def read_schema(self):
        from solariat_bottle.utils.predictor_events import translate_column, get_type

        analysis_temp_file = tempfile.TemporaryFile('r+')
        headers = self.csv_file.readline()
        if not headers:
            raise CsvDataValidationError('Input file is empty')
        analysis_temp_file.write(headers)

        for idx, line_data in enumerate(self.csv_file.readlines(), start=1):
            analysis_temp_file.write(line_data)
            if idx == self.MAX_ANALYSIS_LINES:
                break

        analysis_temp_file.seek(0)
        schema_json = []
        try:
            dataframe = pandas.read_csv(analysis_temp_file, sep=self.sep)
        except Exception as ex:
            LOGGER.error('Cannot parse file:', exc_info=True)
            raise CsvDataValidationError('Cannot parse file %s' % str(ex))

        for col in dataframe.columns:
            schema_entry = dict(name=translate_column(col),
                                type=get_type(dataframe[col].dtype,
                                              dataframe[col].values))
            schema_json.append(schema_entry)

        return schema_json
Esempio n. 6
0
def main(manager=manager):
    """ Supposed to be invoked by cron periodically
    """

    # 1. check sleeping jobs
    _now = now()
    for job_status in JobStatus.objects.find(status=JobStatus.SLEEPING):
        if utc(job_status.awake_at) > _now:
            continue

        job = manager.registry.get(job_status.name)
        manager.producer.send_message(job.topic, job_status)
        LOGGER.info('Job: %s awakened and sent to execution.', job_status.id)

    # 2. check timed out jobs
    for job_status in JobStatus.objects.find(status=JobStatus.RUNNING):
        job = manager.registry.get(job_status.name)

        last_activity = job_status.last_activity or job_status.started_date
        if _now - utc(last_activity) < timedelta(seconds=job.timeout):
            continue

        job_status.update(completion_date=now(), status=JobStatus.TERMINATED)
        LOGGER.info('Job: %s terminated. No activity last %s seconds.',
                    job_status.id, job.timeout)
        if job.terminate_handler:
            try:
                job.terminate_handler(*job_status.args, **job_status.kwargs)
                LOGGER.info('terminate_handler complete for Job: %s.',
                            job_status.id)
            except Exception as ex:
                LOGGER.error('Error executing terminate_handler: %s',
                             ex,
                             exc_info=True)
Esempio n. 7
0
    def __gen_authtoken(self):
        " request new auth token "

        post_data = {
            'username': self.options.username,
            'password': self.options.password
        }
        post_data = json.dumps(post_data)

        self.conn.setopt(pycurl.URL,
                         '%s/api/v1.2/authtokens' % self.options.url)
        self.conn.setopt(pycurl.POSTFIELDS, post_data)
        self.conn.setopt(pycurl.HTTPHEADER, ['Content-Type: application/json'])

        while True:
            self.reset_buff()
            try:
                response = self.handle_connection()
            except FeedAPIError as err:
                LOGGER.error(err)
                time.sleep(self.sleep_timeout)
            else:
                authtoken = response['item']['token'].encode('utf-8')
                self.__class__.authtoken = authtoken
                return authtoken
Esempio n. 8
0
    def run(self):
        inp_queue = self.inp_queue
        start_time = time.time()

        while not self.stopped():
            # make sure we intercept all errors
            try:
                task = inp_queue.get()
                if task is self.QUIT or task == 'QUIT':
                    LOGGER.debug('received QUIT signal %s' % self)
                    break
                start_time = time.time()
                self._busy = True  # Just started doing our post processing
                post_fields = self.preprocess_post(task)
                if not post_fields:
                    LOGGER.warning('no post_fields in: %s', task)
                    continue

                # LOGGER.debug('creating post %r %s', post_fields.get('content'), inp_queue.qsize())

                if self.assign_channels(post_fields):
                    self.create_post(**post_fields)
                else:
                    LOGGER.info('skipping post %r' %
                                post_fields.get('content'))
                    self.inc_skipped()

                self._busy = False  # Just Finished doing our post processing
            except Exception, err:
                LOGGER.error(err, exc_info=True)
                pass

            finally:
Esempio n. 9
0
    def run(self):
        self.ds_client = None

        while not self.stopped():
            try:
                del self.ds_client  # to garbage-collect the old client ASAP
                self._running = False

                if not get_var('ON_TEST'):
                    self.ds_client = DatasiftClient(ds_login=self.ds_login,
                                                    ds_api_key=self.ds_api_key,
                                                    bot_instance=self,
                                                    sanity_checker=self.checker)
                else:
                    self.ds_client = TestDatasiftClient(bot_instance=self)

                self.ds_client.connect()
                self._running = True

                LOGGER.info('connected to %s', self.ds_client.WEBSOCKET_BASE_URL)

                self.checker.set_client(self.ds_client)
                self.ds_subscriber.set_client(self.ds_client)

                self.ds_client.run()  # receives posts from Datasift
            except Exception as e:
                LOGGER.error(e, exc_info=True)
                sleep(5)  # wait a bit on any unexpected error
Esempio n. 10
0
 def check_alive(self):
     connection_timeout = int(time.time()) - self.keepalive_lastcheck
     if connection_timeout > self.keepalive_timeout:
         LOGGER.error('[@%s] TCP connection timeout', self.me)
         self.keepalive_lastcheck = int(time.time())
         return False
     return True
Esempio n. 11
0
def tw_unfollow(channel, user_profile, silent_ex=False):
    """
    Task for unfollowing a twitter screen name.

    :param channel: Used to grab the twitter credentials we are using for the unfollow action.
    :param screen_name: The twitter screen name we are going to unfollow
    :param silent_ex: Optional, if true any exceptions will just be silently ignored
    """
    from solariat_bottle.settings import get_var, LOGGER
    from solariat_bottle.db.user_profiles.user_profile import UserProfile, get_brand_profile_id, get_native_user_id_from_channel
    result = {}
    if not get_var('ON_TEST') and get_var('APP_MODE') == 'prod':
        from solariat_bottle.utils.oauth import get_twitter_oauth_credentials
        (consumer_key, consumer_secret, _, access_token_key,
         access_token_secret) = get_twitter_oauth_credentials(channel)
        try:
            result = tw_destroy_friendship(
                consumer_key=consumer_key,
                consumer_secret=consumer_secret,
                access_token_key=access_token_key,
                access_token_secret=access_token_secret,
                screen_name=user_profile.screen_name)
        except Exception, ex:
            LOGGER.error("tasks.tw_unfollow: " + str(ex))
            if silent_ex:
                result = dict(error=str(ex))
            else:
                raise
Esempio n. 12
0
def _get_request_data():
    """Convert request data to dict
    Data could come with url encoded or as plain html form,
    or with JSON send with curl -d '{"var": "value"}

    """
    import json
    form = {}
    if request.method in ['PUT', 'POST']:
        form = request.form
        if not [x for x in form.values() if x] and len(form.keys()):
            # no values - there is json from -d {...} coming
            try:
                form = json.loads("".join(form.keys()))
            except ValueError:
                msg = "Could not parse JSON from %s" % "".join(form.keys())
                LOGGER.error(msg)
                raise RuntimeError(msg)

    body_data = {}
    if request.data:
        try:
            body_data = json.loads(request.data)
            if not isinstance(body_data, dict):
                raise ValueError
        except ValueError:
            raise RuntimeError("Could not load JSON from %s" % request.data)

    return dict(form.items() + request.args.items() + body_data.items() +
                request.files.items())
Esempio n. 13
0
 def get_age(self):
     # Best guess we can make is by date of birth if present and properly formatted
     if self.date_of_birth:
         try:
             dob = datetime.strptime(self.date_of_birth, AGE_FORMAT)
             return relativedelta(datetime.now(), dob).years
         except Exception, ex:
             LOGGER.error(ex)
Esempio n. 14
0
    def process(self):
        if not self.channel:
            from solariat_bottle.settings import LOGGER

            LOGGER.error("Subscription %s: channel is broken" % self)
            return None

        # tw_process_historic_subscription.ignore(self)
        tw_process_historic_subscription(self)
Esempio n. 15
0
def get_page_admins(page):
    api = facebook_driver.GraphAPI(page['access_token'])
    try:
        res = api.get_object('/%s/roles' % page['id'])
    except facebook.GraphAPIError as e:
        LOGGER.error(e)
        return None

    return res['data']
Esempio n. 16
0
 def get_object(self, sender_id, fields=None):
     try:
         if fields is not None:
             result = self.api.get_object(sender_id, fields=fields)
         else:
             result = self.api.get_object(sender_id)
         return result
     except Exception, ex:
         LOGGER.error("GET for id %s with fields %s raised %s " % (sender_id, fields, ex))
         raise
Esempio n. 17
0
 def _handle_http_response(self, response):
     "JSON decode, raise RuntimError if any issue"
     try:
         resp = json.loads(response.data)
         if not resp.get('ok', None):
             LOGGER.error(resp.get('error', 'unknown error'))
         return resp
     except ValueError:
         LOGGER.error( "Could not decode %s" % response.data)
         return {'ok': False}
Esempio n. 18
0
def get_datasift_hash():
    "Gets a datasift hash from database"
    try:
        db = get_connection()
        res = db.PostFilterStream.find_one({'_id': 'datasift_stream1'})
        if res and 'dh' in res and res['dh']:
            return res['dh']
    except Exception as e:
        LOGGER.error(e, exc_info=True)

    return None
Esempio n. 19
0
 def get(self, name):
     with self.lock:
         if name not in self.registry:
             LOGGER.info('No Job registered for: %s, trying to import.',
                         name)
             try:
                 self._import_module(name)
             except ImportError:
                 LOGGER.error('Cannot import job module:', exc_info=True)
                 raise RegistryError('No Job registered for: %s' % name)
         return self.registry[name]
Esempio n. 20
0
def fix_for_neg_value(results, params, pipeline):
    ''' Workaround for negative values so we do not destroy the UI'''
    fixed = False
    for r in results:
        for p in params:
            if p not in r:
                break
            if r[p] != abs(r[p]):
                fixed = True
            r[p] = abs(r[p])
    if fixed:
        LOGGER.error("Negative counts for %s query", pipeline)
Esempio n. 21
0
 def _validate_schema(self, schema_json):
     schema = self.schema or self.discovered_schema
     current_fields = {
         col[KEY_NAME]
         for col in schema if KEY_EXPRESSION not in col
     }
     input_fields = {col[KEY_NAME] for col in schema_json}
     if len(current_fields & input_fields) != len(current_fields):
         LOGGER.error('Input schema columns: %s \n\nis different from '
                      'current: %s' % (input_fields, current_fields))
         raise SchemaValidationError(
             'Input schema columns is different from current.')
Esempio n. 22
0
    def run(self):

        post_data = None
        authtoken = None
        expired_authtoken = None

        while True:
            if not authtoken:
                authtoken = self.get_authtoken(expired_authtoken)
            if not post_data:
                post_data = self.task_queue.get()
            # This is used both by datasift and by twitter_bot_dm.
            # Just be safe, and in case we recieve a dict with no 'channels' key
            # do the processing here (as is the case with twitter_bot),
            # otherwise assume it was done before (as is the case with datasift_bot.
            if isinstance(post_data, dict) and 'channels' not in post_data:
                channels = handle_post('Twitter', post_data['user_profile'],
                                       post_data)
                if channels:
                    channels = [str(c.id) for c in channels]
                    post_data['channels'] = channels

                # we need this for getting channels only only
                if 'direct_message' in post_data:
                    del post_data['direct_message']

                post_data = json.dumps(post_data)

            self.reset_buff()
            self.conn.setopt(pycurl.POSTFIELDS, post_data)
            self.conn.setopt(
                pycurl.URL,
                '%s/api/v1.2/posts?token=%s' % (self.options.url, authtoken))
            self.conn.setopt(pycurl.HTTPHEADER,
                             ['Content-Type: application/json'])

            try:
                self.handle_connection()
            except ApplicationError as err:
                if str(err) == 'Auth token %s is expired' % authtoken:
                    LOGGER.info(err)
                    expired_authtoken = authtoken
                    authtoken = None
                else:
                    LOGGER.error(err)
                    post_data = None
                    self.task_queue.task_done()
            except InfrastructureError as err:
                LOGGER.error(err)
                time.sleep(self.sleep_timeout)
            else:
                post_data = None
                self.task_queue.task_done()
Esempio n. 23
0
    def api_data_to_model_data(user, api_data, raw_db_fields=False,
                               fields_mapping=None, profile_schema=None, profile_class=None):
        """
        Helper to transform incoming API data to our model format.
        """
        if fields_mapping is None:
            fields_mapping = dict()
        agent_data = api_data
        for key, val in api_data.iteritems():
            if isinstance(val, dict):
                api_data[key] = AgentsAPIView.replace_dots_in_dict(val)

        if profile_schema is None:
            agent_profile_schema = user.account.agent_profile._get()
        else:
            agent_profile_schema = profile_schema

        if profile_class is None:
            AgentProfile = agent_profile_schema.get_data_class()
        else:
            AgentProfile = profile_class

        schema_changed = False
        for key in agent_data.keys():
            if key not in AgentProfile.fields:
                inferred_type = get_type_mapping(agent_data[key])
                if inferred_type:
                    if agent_profile_schema.schema:
                        agent_profile_schema.schema.append({KEY_NAME: key,
                                                            KEY_TYPE: inferred_type})
                        agent_profile_schema._data_cls = None
                    else:
                        agent_profile_schema.discovered_schema.append({KEY_NAME: key,
                                                                       KEY_TYPE: inferred_type})
                        agent_profile_schema._raw_data_cls = None
                    fields_mapping[key] = key
                    schema_changed = True
                else:
                    agent_data.pop(key)
            else:
                fields_mapping[key] = AgentProfile.fields[key].db_field
                field_type = reversed_mapping[AgentProfile.fields[key].__class__]
                try:
                    field_val = apply_shema_type(agent_data[key], field_type)
                    if isinstance(field_val, dict):
                        field_val = AgentsAPIView.replace_dots_in_dict(field_val)
                    agent_data[key] = field_val
                except Exception, ex:
                    LOGGER.error("Failed to apply schema for field %s and value %s. Got error %s" % (
                        key, agent_data[key], ex
                                                                                                    ))
                    agent_data.pop(key)
Esempio n. 24
0
def fb_put_comment_by_channel(channel, object_id, message):

    from solariat_bottle.settings import LOGGER
    from solariat_bottle.tasks.exceptions import FacebookCommunicationException

    try:
        return get_facebook_api(channel).put_comment(
            object_id, force_bytes(message, 'utf-8', 'replace'))
    except Exception, ex:
        LOGGER.error(
            "Failure posting comment to facebook. Exc: %s,  Channel: %s, Object_id: %s, Message: %s"
            % (ex, channel, object_id, message))
        raise FacebookCommunicationException(ex.message)
Esempio n. 25
0
def process_journeys_analysis(analysis_tpl):
    start_time = utc_now()
    journey_type_instance = JourneyType.objects.get(
        analysis_tpl.filters['journey_type']
        [0])  # We pass 1 journey_type id as array
    j_analysis = JourneysAnalysis(
        analysis_tpl,
        journey_type_instance)  # JourneyAnalysis (data parsing, filtering etc)

    try:
        journey_filters = j_analysis.parsed_journeys_filters()
        # We only use these for matching to classes in case of converstion analysis, pop them from filters
        if 'funnel_id' in journey_filters:
            journey_filters.pop('funnel_id')
        if 'stage_id' in journey_filters:
            journey_filters.pop('stage_id')

        match = journey_filters
        initial_pipeline = [{"$match": match}]
        pipe = initial_pipeline.append
        pipe({'$sort': {CustomerJourney.F.first_event_date: 1}})

        if 'from_dt' in journey_filters:
            journey_filters.pop('from_dt')
            journey_filters.pop('to_dt')

        timeline_filter = deepcopy(journey_filters)
        timeline_filter.update({
            'level': analysis_tpl.get_timerange_level(),
            'computed_metric': analysis_tpl.analyzed_metric,
            'plot_type': 'timeline'
        })

        params = dict(filters=journey_filters,
                      initial_pipeline=initial_pipeline,
                      start_time=start_time,
                      timeline_filter=timeline_filter)
        analysis_process = AnalysisProcess(j_analysis, params)

        if analysis_tpl.analysis_type == analysis_tpl.CLASSIFICATION_TYPE:
            analysis_process.classification()
        elif analysis_tpl.analysis_type == analysis_tpl.REGRESSION_TYPE:
            analysis_process.regression()

    except AnalysisTerminationException, ex:
        LOGGER.error(ex)
        j_analysis.analysis.status_message = str(ex)
        j_analysis.analysis.progress = PROGRESS_ERROR
        j_analysis.analysis.save()
        manager.produce_state_update({'error': str(ex)})
        return
Esempio n. 26
0
    def run(self):
        while not self.stopped():
            try:
                # process usernames in reconnect bucket
                self.process_reconnects()
                self.reconnect_stat.log_frequent_reconnects(len(self.streams))

                # scan db for changes
                if not self.last_sync or (time.time() -
                                          self.last_sync) > self.SYNC_INTERVAL:
                    self.sync_streams()
                    self.last_sync = time.time()
            except Exception, err:
                LOGGER.error(err, exc_info=True)
            finally:
Esempio n. 27
0
    def run(self):
        cmd_queue = self.cmd_queue
        cur_hash  = None
        ds_client = None

        while not self.stopped():
            # make sure we intercept all errors
            try:
                # react on commands simultaneously making a 10 sec pause 
                try:
                    cmd, arg = cmd_queue.get(block=True, timeout=10 if not get_var('ON_TEST') else 1)
                    LOGGER.debug('received %s command', cmd)
                    if cmd == 'CLIENT':
                        ds_client = arg
                        cur_hash  = None
                    elif cmd == 'QUIT':
                        break
                except Empty:
                    LOGGER.debug('timeout (it\'s okay)')
                    pass

                if ds_client is None:
                    continue

                if ds_client.terminated:
                    LOGGER.warning('ds_client is terminated')
                    ds_client = None
                    continue

                # get current datasift hash from the db
                ds_hash = get_datasift_hash()
                if not ds_hash:
                    continue

                # subscibe/unsubscribe if necessary
                if not cur_hash:
                    ds_client.subscribe(ds_hash)

                elif cur_hash != ds_hash:
                    ds_client.unsubscribe(cur_hash)
                    ds_client.subscribe(ds_hash)

                # remember the current hash
                cur_hash = ds_hash

            except Exception, err:
                LOGGER.error(err, exc_info=True)
                pass
Esempio n. 28
0
    def run(self, task):
        # make sure we intercept all errors
        try:
            post_fields = self.preprocess_post(task)
            if not post_fields:
                LOGGER.warning('no post_fields in: %s', task)
                return

            if self.assign_channels(post_fields):
                self.create_post(**post_fields)
            else:
                LOGGER.info('skipping post %r' % post_fields.get('content'))

        except Exception, err:
            LOGGER.error(err, exc_info=True)
            pass
Esempio n. 29
0
        def wrapper(*args, **kw):
            data = dict()
            data.update(request.view_args)
            data.update(request.args)
            channel_id = data.get(ch_key)
            try:
                channel = get_doc_or_error(Channel, channel_id)
                kw['channel'] = channel
                return view_func(*args, **kw)
            except Exception, e:
                from solariat_bottle.settings import LOGGER

                LOGGER.error(e)
                return jsonify(ok=False,
                               error="No channel exists for id=%s" %
                               channel_id)
Esempio n. 30
0
    def __init__(self, user, kwargs):
        super(KafkaFeedApiPostCreator, self).__init__(user, kwargs)

        class Options(dict):
            __getattr__ = dict.__getitem__

        self.options = Options(username=user,
                               password=kwargs['password'],
                               url=kwargs['url'],
                               retries=kwargs.get('retries', 3))
        if not self.options.password:
            err_msg = "Configuration Error: password and url are required"
            LOGGER.error("%s %s" % (err_msg, self.options))
            raise RuntimeError(err_msg)

        self.user_agent = kwargs.pop('user_agent', 'FeedApi-PostCreator')
        self.sleep_timeout = 30