def start(self):
     super().start()
     if self.polling_interval().total_seconds() > 0:
         self._poll_job = Job(self.poll, self.polling_interval(), True)
         spawn(self.poll)
     else:
         self.logger.info("No poll job")
Exemple #2
0
 def start(self):
     super().start()
     self.counter = 0
     # Schedule interval simulations for the future
     self._job = Job(self._simulate, self.interval(), True)
     # But also simulate right away
     self._simulate()
Exemple #3
0
 def start(self):
     super().start()
     # Like crontab, check to run jobs every minute
     self._job = Job(self._cron, timedelta(minutes=1), True)
     # Run a cron cycle immediately, but in a new thread since it
     # might take some time and we don't want it to hold up start
     spawn(self._cron)
Exemple #4
0
    def _start_expiry_job(self):
        # Stop the existing job, if it exists
        self._stop_expiry_job()

        self._heartbeat_expiry_job = Job(
            self._no_heartbeat_response,
            timedelta(seconds=self._heartbeat_timeout),
            repeatable=False)
Exemple #5
0
class Collector(object):

    """ A block mixin that provides collection/buffering functionality.

    By including this mixin, your block will have a `collect` property
    added to it which represents the amount of time to collect for. Any
    subsequent calls to notify signals will not be notified to the block
    router until the collection window is finished.

    This is useful for high volume blocks that wish to buffer their output.

    By setting the collect property to 0, notifying signals will happen
    immediately, as if this mixin wasn't even included.
    """

    collect = TimeDeltaProperty(
        title='Collect Timeout', default={"seconds": 1})

    def __init__(self):
        super().__init__()
        self._collect_job = None
        self._collect_lock = Lock()
        self._sigs_out = []

    def start(self):
        # Start the collection job, if we want to be collecting
        if self._are_we_collecting():
            self._collect_job = Job(self._dump_signals, self.collect, True)
        super().start()

    def stop(self):
        if self._collect_job:
            self._collect_job.cancel()
        super().stop()

    def notify_signals(self, signals):
        """Override the notify signals call to keep collecting"""
        if self._are_we_collecting():
            with self._collect_lock:
                self._sigs_out.extend(signals)
        else:
            super().notify_signals(signals)

    def _are_we_collecting(self):
        """Return True if we should be collecting signals"""
        return self.collect.total_seconds() > 0

    def _dump_signals(self):
        """Notify any signals we have collected this window.

        This gets called by the scheduled Job.
        """
        with self._collect_lock:
            if len(self._sigs_out):
                super().notify_signals(self._sigs_out)
                self._sigs_out = []
 def start(self):
     super().start()
     self._authorize()
     self._construct_url()
     self._search_job = Job(
         self._search_tweets,
         self.interval(),
         False,
         self._url
     )
Exemple #7
0
 def start(self):
     now = datetime.utcnow()
     latest = self._last_emission or now
     delta = self.interval - (now - latest)
     self._emission_job = Job(
         self.emit,
         delta,
         False,
         reset=True
     )
class CounterFast(Block):

    version = VersionProperty("0.1.1")
    frequency = ObjectProperty(
        Frequency, title="Report Freqency", default=Frequency())

    def configure(self, context):
        super().configure(context)
        self._cumulative_count = 0
        self._cumulative_count_lock = Lock()

        if self.frequency().enabled():
            self._tracker = FrequencyTracker(
                total_seconds(self.frequency().averaging_interval()))

    def start(self):
        if self.frequency().enabled():
            self._job = Job(self.report_frequency,
                            self.frequency().report_interval(), True)

    def process_signals(self, signals):
        count = len(signals)
        self.logger.debug("Ready to process {} signals".format(count))

        with self._cumulative_count_lock:
            if self.frequency().enabled():
                self._tracker.record(count)
            self._cumulative_count += count
            cumulative_count = self._cumulative_count
        signal = Signal({
            "count": count,
            "cumulative_count": cumulative_count,
        })
        self.notify_signals([signal])

    def report_frequency(self):
        self.logger.debug("Reporting signal frequency")
        signal = Signal({"count_frequency": self._tracker.get_frequency()})
        self.notify_signals([signal])

    def stop(self):
        try:
            self._job.cancel()
        except AttributeError:
            pass
        super().stop()

    def reset(self):
        with self._cumulative_count_lock:
            self._cumulative_count = 0
        return True

    def value(self):
        return self._cumulative_count
Exemple #9
0
 def process_signals(self, signals):
     self.for_each_group(self.process_group, signals)
     # Start a new job if property is checked and there is no active job
     if self.signal_start() and not self._active_job:
         self._emission_job = Job(
             self._emit_job,
             self.interval(),
             False,
             group=None,
             reset=False,
         )
         self._active_job = True  # Added flag for active job
Exemple #10
0
 def start(self):
     # Start emission job on service start if bool property is not checked
     if self.interval() and not self.signal_start():
         now = datetime.utcnow()
         latest = self._last_emission or now
         delta = self.interval() - (now - latest)
         self._emission_job = Job(
             self._emit_job,
             delta,
             False,
             group=None,
             reset=True,
         )
Exemple #11
0
class SafeTrigger():
    """ Guarantees notifying signals every interval, regardless of count """

    interval = TimeDeltaProperty(title='Interval',
                                 default={'seconds': 1},
                                 order=0)
    max_count = IntProperty(title='Max Count', default=1, order=1)

    def __init__(self):
        super().__init__()
        self._job = None
        self.stop_event = Event()
        self.signal_lock = Lock()

    def start(self):
        super().start()
        self._job = Job(self._emit, self.interval(), True)
        # Run an emit cycle immediately, but in a new thread since it
        # might take some time and we don't want it to hold up start
        spawn(self._emit)

    def stop(self):
        """ Stop the simulator thread and signal generation """
        if self._job:
            self._job.cancel()

        self.stop_event.set()
        super().stop()

    def _emit(self):
        """ Called every *interval* to generate then notify the signals """
        self.logger.debug("New generation cycle requested")
        count = 0
        signals = []

        # Stop any currently running simulator threads
        self.stop_event.set()
        # We only want one simulator thread simulating at a time
        with self.signal_lock:
            # Ok, we're running, so clear the event and wait
            self.stop_event.clear()
            self.logger.debug("Starting generation...")
            while count < self.max_count() and not self.stop_event.is_set():
                signals.extend(self.generate_signals(1))
                count += 1

        self.logger.debug("Notifying {} signals".format(len(signals)))
        self.notify_signals(signals)
Exemple #12
0
 def _start_emit_job(self):
     if self.interval.total_seconds() >= 0:
         self._emit_job = Job(
             self.emit,
             self.interval,
             True
         )
Exemple #13
0
 def start(self):
     super().start()
     self.counter = 0
     # Schedule interval simulations for the future
     self._job = Job(self._simulate, self.interval(), True)
     # But also simulate right away
     self._simulate()
    def _retry_poll(self, paging=False):
        """ Helper method to schedule polling retries.

        """
        if self._poll_job is not None:
            self._poll_job.cancel()
            self._poll_job = None
        if self._retry_count < self.retry_limit():
            self.logger.debug("Retrying the polling job...")
            self._retry_count += 1
            self._retry_job = Job(self.poll,
                                  self._retry_interval,
                                  False,
                                  paging=paging,
                                  in_retry=True)
            self._update_retry_interval()
        else:
            self.logger.error("Out of retries. "
                              "Aborting and changing status to Error.")
            status_signal = BlockStatusSignal(RunnerStatus.error,
                                              'Out of retries.')

            # Leaving source for backwards compatibility
            # In the future, you will know that a status signal is a block
            # status signal when it contains service_name and name
            #
            # TODO: Remove when source gets added to status signals in nio
            setattr(status_signal, 'source', 'Block')

            self.notify_management_signal(status_signal)
Exemple #15
0
 def _schedule_signal_expiration_job(self, group, input_id):
     """ Schedule expiration job, cancelling existing job first """
     if self._expiration_jobs[group][input_id]:
         self._expiration_jobs[group][input_id].cancel()
     self._expiration_jobs[group][input_id] = Job(
         self._signal_expiration_job, self.expiration(), False, group,
         input_id)
Exemple #16
0
    def start(self):
        super().start()

        # If we have configured a backup interval, set up the job to
        # periodically save to persistence
        if self.backup_interval.total_seconds() > 0:
            self._backup_job = Job(self._save, self.backup_interval, True)
    def start(self):
        super().start()
        self._publisher.open()

        # create publishing repeatable job
        self._job = Job(self._deliver_stats,
                        timedelta(seconds=self._stats_timeout), True)
Exemple #18
0
 def start(self):
     super().start()
     # Like crontab, check to run jobs every minute
     self._job = Job(self._cron, timedelta(minutes=1), True)
     # Run a cron cycle immediately, but in a new thread since it
     # might take some time and we don't want it to hold up start
     spawn(self._cron)
Exemple #19
0
class SafeTrigger():

    """ Guarantees notifying signals every interval, regardless of count """

    interval = TimeDeltaProperty(title='Interval', default={'seconds': 1})
    max_count = IntProperty(title='Max Count', default=1)

    def __init__(self):
        super().__init__()
        self._job = None
        self.stop_event = Event()
        self.signal_lock = Lock()

    def start(self):
        super().start()
        self._job = Job(self._emit, self.interval, True)
        # Run an emit cycle immediately, but in a new thread since it
        # might take some time and we don't want it to hold up start
        spawn(self._emit)

    def stop(self):
        """ Stop the simulator thread and signal generation """
        if self._job:
            self._job.cancel()

        self.stop_event.set()
        super().stop()

    def _emit(self):
        """ Called every *interval* to generate then notify the signals """
        self._logger.debug("New generation cycle requested")
        count = 0
        signals = []

        # Stop any currently running simulator threads
        self.stop_event.set()
        # We only want one simulator thread simulating at a time
        with self.signal_lock:
            # Ok, we're running, so clear the event and wait
            self.stop_event.clear()
            self._logger.debug("Starting generation...")
            while count < self.max_count and not self.stop_event.is_set():
                signals.extend(self.generate_signals(1))
                count += 1

        self._logger.debug("Notifying {} signals".format(len(signals)))
        self.notify_signals(signals)
Exemple #20
0
    def _start_expiry_job(self):
        # Stop the existing job, if it exists
        self._stop_expiry_job()

        self._heartbeat_expiry_job = Job(
            self._no_heartbeat_response,
            timedelta(seconds=self._heartbeat_timeout),
            repeatable=False)
 def _schedule_timeout_job(self, signal, key, interval, repeatable):
     self.logger.debug("Scheduling new timeout job for group {}, "
                       "interval={} repeatable={}".format(
                           key, interval, repeatable))
     self._jobs[key][interval] = Job(self._timeout_job, interval,
                                     repeatable, signal, key, interval)
     if repeatable:
         self._repeatable_jobs[key][interval] = signal
Exemple #22
0
 def start(self):
     super().start()
     self._start_emit_job()
     self._backup_job = Job(
         self._backup,
         self.backup_interval,
         True
     )
Exemple #23
0
    def start_heartbeats(self):
        """ Start a job which will periodically send heartbeats to the server.

        This method will also start a job that will wait for responses in case
        the server doesn't respond in time.
        """
        # Since we are starting a new heartbeat cycle, cancel anything
        # that was outstanding
        self.stop_heartbeats()

        # Start a job that will send heartbeats indefinitely
        self._heartbeat_job = Job(self._heartbeat_func,
                                  timedelta(seconds=self._heartbeat_interval),
                                  repeatable=True)

        # Also start a job that will wait for heartbeat timeouts
        self._start_expiry_job()
Exemple #24
0
class IntervalTrigger():
    """Generate signals at a regular interval up to total_signals"""

    total_signals = IntProperty(title="Total Number of Signals",
                                default=-1,
                                order=4)
    interval = TimeDeltaProperty(title='Interval',
                                 default={'seconds': 1},
                                 order=0)

    def __init__(self):
        super().__init__()
        self.counter = None
        self._job = None

    def start(self):
        super().start()
        self.counter = 0
        # Schedule interval simulations for the future
        self._job = Job(self._simulate, self.interval(), True)
        # But also simulate right away
        self._simulate()

    def _simulate(self):
        sigs = self.generate_signals()
        # If a generator is returned, build the list
        if not isinstance(sigs, list):
            sigs = list(sigs)
        # Add however many signals were generated (in case multiple
        # signals mixin was used) to the counter and notify them
        self.counter += len(sigs)
        # self.counter - self.total_signals() yield that amount of signals that
        # should be removed
        if self.counter > self.total_signals() and self.total_signals() >= 0:
            sigs_to_remove = self.counter - self.total_signals()
            sigs = sigs[:-1 * sigs_to_remove]
        self.notify_signals(sigs)
        if self.total_signals() > 0 and \
                self.counter >= self.total_signals():
            self._job.cancel()

    def stop(self):
        """ Stop the simulator thread. """
        self._job.cancel()
        super().stop()
Exemple #25
0
 def _start_emit_job(self):
     ''' Start job that emits signals from the queue '''
     if self.interval() and self.interval().total_seconds() > 0:
         # only schedule if the interval is a positive number
         self._emit_job = Job(
             self.emit,
             self.interval(),
             True
         )
Exemple #26
0
 def _emit_job(self, group, reset=False):
     self.logger.debug('Emitting signals')
     if reset:
         self._emission_job.cancel()
         self._emission_job = Job(
             self._emit_job,
             self.interval(),
             True,
             group=group,
         )
     self._last_emission = datetime.utcnow()
     signals = self._get_emit_signals(group)
     self._active_job = False
     if signals:
         self.logger.debug('Notifying {} signals'.format(len(signals)))
         self.notify_signals(signals)
     else:
         self.logger.debug('No signals to notify')
Exemple #27
0
 def start(self):
     super().start()
     self._authorize()
     self._construct_url()
     self._search_job = Job(
         self._search_tweets,
         self.interval(),
         False,
         self._url
     )
Exemple #28
0
 def start(self):
     super().start()
     self._authorize()
     self._start()
     spawn(self._run_stream)
     self._notify_job = Job(
         self._notify_results,
         self.notify_freq(),
         True
     )
 def start(self):
     super().start()
     if self.polling_interval().total_seconds() > 0:
         self._poll_job = Job(
             self.poll,
             self.polling_interval(),
             True
         )
         spawn(self.poll)
     else:
         self.logger.info("No poll job")
Exemple #30
0
class IntervalTrigger():
    """Generate signals at a regular interval up to total_signals"""

    total_signals = IntProperty(title="Total Number of Signals", default=-1, order=4)
    interval = TimeDeltaProperty(title='Interval', default={'seconds': 1}, order=0)

    def __init__(self):
        super().__init__()
        self.counter = None
        self._job = None

    def start(self):
        super().start()
        self.counter = 0
        # Schedule interval simulations for the future
        self._job = Job(self._simulate, self.interval(), True)
        # But also simulate right away
        self._simulate()

    def _simulate(self):
        sigs = self.generate_signals()
        # If a generator is returned, build the list
        if not isinstance(sigs, list):
            sigs = list(sigs)
        # Add however many signals were generated (in case multiple
        # signals mixin was used) to the counter and notify them
        self.counter += len(sigs)
        # self.counter - self.total_signals() yield that amount of signals that
        # should be removed
        if self.counter > self.total_signals() and self.total_signals() >= 0:
            sigs_to_remove = self.counter - self.total_signals()
            sigs = sigs[:-1 * sigs_to_remove]
        self.notify_signals(sigs)
        if self.total_signals() > 0 and \
                self.counter >= self.total_signals():
            self._job.cancel()

    def stop(self):
        """ Stop the simulator thread. """
        self._job.cancel()
        super().stop()
Exemple #31
0
    def handle_reconnect(self):
        self._disconnect_client()

        # Don't need to reconnect if we are stopping
        if self._stopping:
            return

        if self._reconnection_job is not None:
            self._logger.debug("Reconnection job already scheduled")
            return

        self._logger.warning("Attempting to reconnect in {0} seconds.".format(self._timeout))
        self._reconnection_job = Job(self._connect_to_gateway, timedelta(seconds=self._timeout), repeatable=False)
Exemple #32
0
 def emit(self, reset=False):
     self._logger.debug('Emitting signals')
     if reset:
         self._emission_job.cancel()
         self._emission_job = Job(
             self.emit,
             self.interval,
             True
         )
     self._last_emission = datetime.utcnow()
     signals = self._get_emit_signals()
     if signals:
         self._logger.debug('Notifying {} signals'.format(len(signals)))
         self.notify_signals(signals)
     elif self.timeout:
         self._logger.debug('Notifying timeout signal')
         self.notify_signals([Signal({self.timeout_attr: True})])
     else:
         self._logger.debug('No signals to notify')
     if self.use_persistence:
         self.persistence.store('last_emission', self._last_emission)
         self._backup()
    def _epilogue(self):
        """ This can be overridden in user-defined blocks.

        Defines behavior after a query has been fully processed,
        when we are ready for the next query. That is, when paging
        is done and retries are cleared.

        """
        if self.polling_interval().total_seconds() > 0:
            self._poll_job = self._poll_job or Job(
                self.poll, self.polling_interval(), True)
        self._increment_idx()
        if self.queries():
            self.logger.debug("Preparing to query for: %s" %
                              self.current_query)
    def _search_tweets(self, url):
        rsp = requests.get(url, auth=self._auth)
        status = rsp.status_code
        if status == 200:
            data = rsp.json()
            tweets = data['statuses']
            next_results = data['search_metadata'].get('next_results')
            self.notify_signals([Signal(t) for t in tweets])
            if next_results is not None:
                self._search_tweets(
                    "{0}{1}".format(SEARCH_URL, next_results)
                )
            else:
                self.logger.debug("Scheduling next search...")
                self._search_job = Job(
                    self._search_tweets,
                    self.interval(),
                    False,
                    self._url
                )

        else:
            self.logger.error(
                "Twitter search failed with status {0}".format(status))
    def __get_publisher(self, topic, ttl):
        with self._cache_lock:
            publisher, prev_job = self._cache[topic]
            if prev_job is not None:
                prev_job.cancel()

            job = (Job(
                self.__close_publisher,
                ttl,
                False,
                topic,
            ) if ttl.total_seconds() >= 0 else None)

            self._cache[topic] = (publisher, job)

            return publisher
Exemple #36
0
    def _init_access_token(self):
        try:
            self._access_token = self.get_access_token('openid email')
            self.logger.debug("Obtained access token: {}".format(
                self._access_token))

            if self._reauth_job:
                self._reauth_job.cancel()

            # Remember to reauthenticate at a certain point if it's configured
            if self.reauth_interval().total_seconds() > 0:
                self._reauth_job = Job(self._init_access_token,
                                       self.reauth_interval(), False)

        except OAuth2Exception:
            self.logger.exception('Error obtaining access token')
            self._access_token = None
Exemple #37
0
    def start_heartbeats(self):
        """ Start a job which will periodically send heartbeats to the server.

        This method will also start a job that will wait for responses in case
        the server doesn't respond in time.
        """
        # Since we are starting a new heartbeat cycle, cancel anything
        # that was outstanding
        self.stop_heartbeats()

        # Start a job that will send heartbeats indefinitely
        self._heartbeat_job = Job(
            self._heartbeat_func,
            timedelta(seconds=self._heartbeat_interval),
            repeatable=True)

        # Also start a job that will wait for heartbeat timeouts
        self._start_expiry_job()
Exemple #38
0
 def process_group_signals(self, signals, group, input_id='repeat'):
     if input_id == 'cancel':
         self._cancel_group_job(group)
         return
     if len(signals) == 0:
         return
     signal = signals[-1]
     repeats_remaining = self.max_repeats(signal)
     with self._group_locks[group]:
         self._cancel_group_job(group)
         if repeats_remaining == 0:
             # They don't want to repeat, ignore
             return
         self.logger.debug("Setting up repeat for group {}".format(group))
         self.notifications[group]['signal'] = signal
         self.notifications[group]['num_remaining'] = repeats_remaining
         self.notifications[group]['job'] = Job(target=self.notify_group,
                                                delta=self.interval(signal),
                                                repeatable=True,
                                                group=group)
Exemple #39
0
    def _search_tweets(self, url):
        rsp = requests.get(url, auth=self._auth)
        status = rsp.status_code
        if status == 200:
            data = rsp.json()
            tweets = data['statuses']
            next_results = data['search_metadata'].get('next_results')
            self.notify_signals([Signal(t) for t in tweets])
            if next_results is not None:
                self._search_tweets(
                    "{0}{1}".format(SEARCH_URL, next_results)
                )
            else:
                self.logger.debug("Scheduling next search...")
                self._search_job = Job(
                    self._search_tweets,
                    self.interval(),
                    False,
                    self._url
                )

        else:
            self.logger.error(
                "Twitter search failed with status {0}".format(status))
Exemple #40
0
 def start(self):
     super().start()
     # use _start_time if it was loaded from persistence
     self._start_time = self._start_time or _time()
     self._averaging_seconds = self.averaging_interval().total_seconds()
     self._job = Job(self.report_frequency, self.report_interval(), True)
Exemple #41
0
class Buffer(Block):

    interval = TimeDeltaProperty(title='Buffer Interval')
    interval_duration = TimeDeltaProperty(title='Interval Duration')
    timeout = BoolProperty(title='Buffer Timeout', default=False)
    timeout_attr = StringProperty(title='Timeout Attributes', visible=False, default="timeout")
    use_persistence = BoolProperty(title='Use Persistence?', visible=False, default=False)

    def __init__(self):
        super().__init__()
        self._last_emission = None
        self._cache = defaultdict(list)
        self._cache_lock = Lock()
        self._emission_job = None

    def configure(self, context):
        super().configure(context)
        if self.use_persistence:
            self._last_emission = self.persistence.load('last_emission')
            self._cache = self.persistence.load('cache') or defaultdict(list)
            # For backwards compatability, make sure cache is dict.
            if not isinstance(self._cache, dict):
                self._cache = defaultdict(list)

    def start(self):
        now = datetime.utcnow()
        latest = self._last_emission or now
        delta = self.interval - (now - latest)
        self._emission_job = Job(
            self.emit,
            delta,
            False,
            reset=True
        )

    def stop(self):
        if self.use_persistence:
            self._backup()

    def emit(self, reset=False):
        self._logger.debug('Emitting signals')
        if reset:
            self._emission_job.cancel()
            self._emission_job = Job(
                self.emit,
                self.interval,
                True
            )
        self._last_emission = datetime.utcnow()
        signals = self._get_emit_signals()
        if signals:
            self._logger.debug('Notifying {} signals'.format(len(signals)))
            self.notify_signals(signals)
        elif self.timeout:
            self._logger.debug('Notifying timeout signal')
            self.notify_signals([Signal({self.timeout_attr: True})])
        else:
            self._logger.debug('No signals to notify')
        if self.use_persistence:
            self.persistence.store('last_emission', self._last_emission)
            self._backup()

    def _get_emit_signals(self):
        with self._cache_lock:
            now = int(time())
            signals = []
            if self.interval_duration:
                # Remove old signals from cache.
                old = now - int(self.interval_duration.total_seconds())
                self._logger.debug(
                    'Removing signals from cache older than {}'.format(old))
                cache_times = sorted(self._cache.keys())
                for cache_time in cache_times:
                    if cache_time < old:
                        del self._cache[cache_time]
                    else:
                        break
            for cache in self._cache:
                signals.extend(self._cache[cache])
            if not self.interval_duration:
                # Clear cache every time if duration is not set.
                self._logger.debug('Clearing cache of signals')
                self._cache = defaultdict(list)
            return signals

    def process_signals(self, signals):
        with self._cache_lock:
            now = int(time())
            self._cache[now].extend(signals)

    def _backup(self):
        self.persistence.store('cache', self._cache)
        self.persistence.save()
Exemple #42
0
    def _connect_to_streaming(self):
        """Set up a connection to the Twitter Streaming API.

        This method will build the connection and save it in self._stream. On
        a valid connection, it will reset the reconnection and monitoring jobs

        Returns
            success (bool): Whether or not the connection succeeded. If any
                errors occur during connection, it will not schedule the
                reconnects, but rather just return False.
        """

        try:
            self._conn = http.client.HTTPSConnection(
                host=self.streaming_host,
                timeout=45)

            req_headers = {
                'Content-Type': 'application/x-www-form-urlencoded',
                'Accept': '*/*'
            }

            conn_url = 'https://{0}/{1}'.format(
                self.streaming_host,
                self.streaming_endpoint)

            # get the signed request with the proper oauth creds
            req = self._get_oauth_request(conn_url, self.get_params())

            self.logger.debug("Connecting to {0}".format(conn_url))

            if self.get_request_method() == "POST":
                self._conn.request(self.get_request_method(),
                                   conn_url,
                                   body=req.to_postdata(),
                                   headers=req_headers)
            else:
                self._conn.request(self.get_request_method(),
                                   req.to_url(),
                                   headers=req_headers)

            response = self._conn.getresponse()

            if response.status != 200:
                self.logger.warning(
                    'Status: {} returned from twitter: {}'.format(
                        response.status, response.read()))
                return False
            else:
                self.logger.debug('Connected to Streaming API Successfully')

                # Clear any reconnects we had
                if self._rc_job is not None:
                    self.logger.error("We were reconnecting, now we're done!")
                    self._rc_job.cancel()
                    self._rc_delay = timedelta(seconds=1)
                    self._rc_job = None

                self._last_rcv = datetime.utcnow()

                self._monitor_job = Job(
                    self._monitor_connection,
                    self.rc_interval(),
                    True
                )

                self._stream = response
                # Return true, we are connected!
                return True

        except Exception as e:
            self.logger.error('Error opening connection : {0}'.format(e))
            return False
Exemple #43
0
class TwitterStreamBlock(Block):

    """ A parent block for communicating with the Twitter Streaming API.

    Properties:
        fields (list(str)): Outgoing signals will pull these fields
            from incoming tweets. When empty/unset, all fields are
            included.
        notify_freq (timedelta): The interval between signal notifications.
        creds: Twitter app credentials, see above. Defaults to global settings.
        rc_interval (timedelta): Time to wait between receipts (either tweets
            or hearbeats) before attempting to reconnect to Twitter Streaming.

    """
    notify_freq = TimeDeltaProperty(default={"seconds": 2},
                                    title='Notification Frequency')
    creds = ObjectProperty(TwitterCreds, title='Credentials',
                           default=TwitterCreds())
    rc_interval = TimeDeltaProperty(default={"seconds": 90},
                                    title='Reconnect Interval')

    streaming_host = None
    streaming_endpoint = None
    verify_url = 'https://api.twitter.com/1.1/account/verify_credentials.json'

    def __init__(self):
        super().__init__()
        self._result_signals = defaultdict(list)
        self._result_lock = defaultdict(Lock)
        self._lock_lock = Lock()
        self._stop_event = Event()
        self._stream = None
        self._last_rcv = datetime.utcnow()
        self._limit_count = 0

        # Jobs to run throughout execution
        self._notify_job = None    # notifies signals
        self._monitor_job = None   # checks for heartbeats
        self._rc_job = None        # attempts reconnects
        self._rc_delay = timedelta(seconds=1)

    def start(self):
        super().start()
        self._authorize()
        self._start()
        spawn(self._run_stream)
        self._notify_job = Job(
            self._notify_results,
            self.notify_freq(),
            True
        )

    def _start(self):
        """ Override in blocks that need to run code before start """
        pass

    def stop(self):
        self._stop_event.set()
        self._notify_job.cancel()
        if self._monitor_job is not None:
            self._monitor_job.cancel()
        if self._rc_job is not None:
            self._rc_job.cancel()
        super().stop()

    def _run_stream(self):
        """ The main thread for the Twitter block. Reads from Twitter
        streaming, parses and queues results.

        """

        # If we had an existing stream, close it. We will open our own
        if self._stream:
            self._stream.close()
            self._stream = None

        # This is a new stream so reset the limit count
        self._limit_count = 0

        # Try to connect, if we can't, don't start streaming, but try reconnect
        if not self._connect_to_streaming():
            self._setup_reconnect_attempt()
            return

        while(1):
            if self._stop_event.is_set():
                break

            line = None
            try:
                line = self._read_line()
            except Exception as e:
                # Error while getting the tweet, this probably indicates a
                # disconnection so let's try to reconnect
                self.logger.error("While streaming: %s" % str(e))
                self._setup_reconnect_attempt()
                break

            if line and len(line):
                self._record_line(line)

    def _read_line(self):
        """Read the next line off of the stream.

        This will first read the length of the line, then read the next
        N bytes based on the length. It will return the read line if it reads
        successfully. Otherwise, returns None.

        Raises:
            Exception: if there was an error reading bytes - this will most
                likely indicate a disconnection
        """
        # build the length buffer
        buf = bytes('', 'utf-8')
        while not buf or buf[-1] != ord('\n'):
            bytes_read = self._read_bytes(1)
            if bytes_read:
                buf += bytes_read
            else:
                raise Exception("No bytes read from stream")

        # checking to see if it's a 'keep-alive'
        if len(buf) <= 2:
            # only recieved \r\n so it is a keep-alive. move on.
            self.logger.debug('Received a keep-alive signal from Twitter.')
            self._last_rcv = datetime.utcnow()
            return None

        return self._read_bytes(int(buf))

    def _read_bytes(self, n_bytes):
        """Read N bytes off of the current stream.

        Returns:
            len (int): number of bytes actually read - None if no bytes read
        """
        bytes_read = self._stream.read(n_bytes)
        return bytes_read if len(bytes_read) > 0 else None

    def get_params(self):
        """ Return URL connection parameters here """
        return {}

    def _connect_to_streaming(self):
        """Set up a connection to the Twitter Streaming API.

        This method will build the connection and save it in self._stream. On
        a valid connection, it will reset the reconnection and monitoring jobs

        Returns
            success (bool): Whether or not the connection succeeded. If any
                errors occur during connection, it will not schedule the
                reconnects, but rather just return False.
        """

        try:
            self._conn = http.client.HTTPSConnection(
                host=self.streaming_host,
                timeout=45)

            req_headers = {
                'Content-Type': 'application/x-www-form-urlencoded',
                'Accept': '*/*'
            }

            conn_url = 'https://{0}/{1}'.format(
                self.streaming_host,
                self.streaming_endpoint)

            # get the signed request with the proper oauth creds
            req = self._get_oauth_request(conn_url, self.get_params())

            self.logger.debug("Connecting to {0}".format(conn_url))

            if self.get_request_method() == "POST":
                self._conn.request(self.get_request_method(),
                                   conn_url,
                                   body=req.to_postdata(),
                                   headers=req_headers)
            else:
                self._conn.request(self.get_request_method(),
                                   req.to_url(),
                                   headers=req_headers)

            response = self._conn.getresponse()

            if response.status != 200:
                self.logger.warning(
                    'Status: {} returned from twitter: {}'.format(
                        response.status, response.read()))
                return False
            else:
                self.logger.debug('Connected to Streaming API Successfully')

                # Clear any reconnects we had
                if self._rc_job is not None:
                    self.logger.error("We were reconnecting, now we're done!")
                    self._rc_job.cancel()
                    self._rc_delay = timedelta(seconds=1)
                    self._rc_job = None

                self._last_rcv = datetime.utcnow()

                self._monitor_job = Job(
                    self._monitor_connection,
                    self.rc_interval(),
                    True
                )

                self._stream = response
                # Return true, we are connected!
                return True

        except Exception as e:
            self.logger.error('Error opening connection : {0}'.format(e))
            return False

    def _setup_reconnect_attempt(self):
        """Add the reconnection job and double the delay for the next one"""
        if self._monitor_job is not None:
            self._monitor_job.cancel()

        self.logger.debug("Reconnecting in %d seconds" %
                           self._rc_delay.total_seconds())
        self._rc_job = Job(self._run_stream,
                           self._rc_delay, False)
        self._rc_delay *= 2

    def get_request_method(self):
        return "GET"

    def _get_oauth_request(self, conn_url, request_params):
        """This function uses the oauthCreds passed from the transducer to
        sign the request.
        """
        request_params['oauth_version'] = '1.0'
        request_params['oauth_nonce'] = oauth.generate_nonce()
        request_params['oauth_timestamp'] = int(time.time())

        req = oauth.Request(method=self.get_request_method(),
                            url=conn_url,
                            parameters=request_params)

        req.sign_request(
            signature_method=oauth.SignatureMethod_HMAC_SHA1(),
            consumer=oauth.Consumer(
                self.creds().consumer_key(), self.creds().app_secret()),
            token=oauth.Token(
                self.creds().oauth_token(), self.creds().oauth_token_secret())
        )

        return req

    def _record_line(self, line):
        """ Decode the line and add it to the end of the list """
        try:
            # reset the last received timestamp
            self._last_rcv = datetime.utcnow()
            data = json.loads(line.decode('utf-8'))
            self.create_signal(data)
        except Exception as e:
            self.logger.error("Could not parse line: %s" % str(e))

    def create_signal(self, data):
        """ Override this method in the block implementation

        Append the new Signal to appropriate list in the dictionary
        `self._result_signals`, where the key is the name of the block output.
        Below is an example implementation, meant to be overridden.
        """
        self.logger.debug("Default message type")
        data = self.filter_results(data)
        if data:
            with self._get_result_lock('default'):
                self._result_signals['default'].append(Signal(data))

    def _get_result_lock(self, key):
        with self._lock_lock:
            return self._result_lock[key]

    def filter_results(self, data):
        return data

    def _notify_results(self):
        """Method to be called from the notify job, will notify any tweets
        that have been buffered by the block, then clear the buffer.

        """
        for output in self._result_signals:
            with self._get_result_lock(output):
                signals = self._result_signals[output]
                if signals:
                    self.notify_signals(signals, output)
                    self._result_signals[output] = []

    def _monitor_connection(self):
        """ Scheduled to run every self.rc_interval. Makes sure that some
        data has been received in the last self.rc_interval.

        """
        current_time = datetime.utcnow()
        time_since_data = current_time - self._last_rcv
        if time_since_data > self.rc_interval():
            self.logger.warning("No data received, we might be disconnected")
            self._setup_reconnect_attempt()

    def _authorize(self):
        """ Prepare the OAuth handshake and verify.

        """
        try:
            auth = OAuth1(self.creds().consumer_key(),
                          self.creds().app_secret(),
                          self.creds().oauth_token(),
                          self.creds().oauth_token_secret())
            resp = requests.get(self.verify_url, auth=auth)
            if resp.status_code != 200:
                raise Exception("Status %s" % resp.status_code)
        except Exception:
            self.logger.exception("Authentication Failed for consumer key: %s" %
                                  self.creds().consumer_key())
Exemple #44
0
class Queue(GroupBy, Block):
    """ Queue block.

    A NIO block for queueing up signals. As signals pile up,
    the Queue block releases a configurable number at a configurable
    interval. If incoming signals would overflow the queue, signals
    are popped off the front as needed.

    If a 'group_by' string is configured, incoming signals are divided
    and grouped by the value of that attribute. The configured capacity
    applies to *each* such queue, not the block as a whole.

    """
    version = StringProperty(default='1.0')
    interval = TimeDeltaProperty(title='Notification Interval')
    backup_interval = TimeDeltaProperty(title='Backup Interval',
                                        visible=False,
                                        default={"minutes": 10})
    capacity = IntProperty(default=100, title='Capacity')
    group_by = ExpressionProperty(default='null', attr_default='null', title='Group By')
    chunk_size = IntProperty(default=1, title='Chunk Size')
    reload = BoolProperty(default=False, title='Auto-Reload?')
    uniqueness = ExpressionProperty(title='Queue Uniqueness Expression',
                                    attr_default=None)
    update = BoolProperty(title='Update Non-Unique Signals', default=False)

    def __init__(self):
        super().__init__()
        self._queues = defaultdict(list)
        self._queue_locks = {
            'null': Lock()
        }
        self._meta_lock = Lock()
        self._emit_job = None
        self._backup_job = None

    def configure(self, context):
        super().configure(context)
        self._load()

    def start(self):
        super().start()
        self._start_emit_job()
        self._backup_job = Job(
            self._backup,
            self.backup_interval,
            True
        )

    def stop(self):
        if self._emit_job is not None:
            self._emit_job.cancel()
        self._backup_job.cancel()
        self._backup()

    def process_signals(self, signals):
        self._logger.debug("Processing {} signals".format(len(signals)))
        self.for_each_group(self._push_group, signals)

    def pop(self, grp="null"):
        ''' Remove the top n signals from the specified queue.

        Args:
            grp (str): The queue from which to pop.
            count (int): The number of signals to pop off.
            reload (bool): If True, put popped signals back on queue.

        Returns:
            top_n (list): 'Count' signals from the front of the queue.

        '''
        count = self.chunk_size
        reload = self.reload
        # lock the queue we're popping from
        self._logger.debug("pop: {} {} {}".format(grp, count, reload))
        with self._get_lock(grp):
            # check out the front of the queue
            top_n = self._queues[grp][0:count]
            self._logger.debug(
                "Removing %d signals from %s_queue" % (len(top_n), grp))
            self._queues[grp][:] = self._queues[grp][len(top_n):]
            # If reloading, put signal back on queue.
            if reload:
                self._logger.debug("Reloading {}_queue".format(grp))
                self._queues[grp].extend(top_n)
        return top_n

    def push(self, signal, grp):
        ''' Add a signal to the back of the queue.

        Args:
            signal (Signal): The signal to add.
            grp (str): Group to add signal to.

        Returns:
            None

        '''
        queue = self._queues[grp]

        # check for uniqueness if property is set
        try:
            unique_val = self.uniqueness(signal)
            self._logger.debug(
                "Testing uniqueness for signal: {}".format(unique_val))
        except Exception as e:
            unique_val = None
            self._logger.warning(
                "Uniqueness expression failed. Using value of None.")

        if unique_val is not None:
            for idx, sig in enumerate(queue):
                try:
                    sig_val = self.uniqueness(sig)
                except Exception as e:
                    sig_val = None
                if sig_val == unique_val:
                    self._logger.debug(
                        "Signal {} already in {}_queue".format(sig_val, grp)
                    )
                    if self.update:
                       queue[idx] = signal
                    return

        # pop one off the top of that queue if it's at capacity
        if len(queue) == self.capacity:
            self._logger.debug(
                "Pushing signal and capactity of {}_signal is full: {}".format(
                    grp, self.capacity
                )
            )
            queue.pop(0)

        self._logger.debug("Appending signal to {}_queue".format(grp))
        queue.append(signal)

    def _push_group(self, signals, group):
        # lock the queue before appending
        with self._get_lock(group):
            for signal in signals:
                self.push(signal, group)

    def _get_lock(self, grp="null"):
        ''' Returns the lock for a particular queue.

        Note that we're maintaining a synchronized dictionary of locks
        alongside our dict of queues.

        '''
        with self._meta_lock:
            self._queue_locks[grp] = self._queue_locks.get(grp, Lock())
        return self._queue_locks[grp]

    def _start_emit_job(self):
        if self.interval.total_seconds() >= 0:
            self._emit_job = Job(
                self.emit,
                self.interval,
                True
            )

    def emit(self):
        ''' Notify the configured number of signals from the front of the queue.

        '''
        signals_to_notify = self.for_each_group(self.pop)
        if signals_to_notify:
            self._logger.debug(
                "Notifying {} signals".format(len(signals_to_notify))
            )
            self.notify_signals(signals_to_notify)

    def _load(self):
        prev_queues = self.persistence.load('queues')
        # if persisted dictonary is not defaultdict, convert it
        if prev_queues:
            self._queues = defaultdict(list, prev_queues)
        # build _groups for groupby mixin
        self._groups = list(self._queues.keys())

    def _backup(self):
        ''' Persist the current state of the queues using the persistence module.

        '''
        # store the serialized signals and save to disk
        # grab the meta_lock so nobody else can interact with the queues during
        # serialization
        self._logger.debug("Persistence: backing up to file")
        self._meta_lock.acquire()
        self.persistence.store("queues", self._queues)
        self._meta_lock.release()
        self.persistence.save()

    def _inspect_group(self, response, group):
        response_group = {'count': 0, 'signals': []}
        query = response.get('query', '{{ True }}')
        ignored_signals = []
        for signal in self._queues.get(group, []):
            try:
                eval = Evaluator(query, None).evaluate(signal)
            except:
                eval = False
            if eval:
                response_group['signals'].append(json.loads(json.dumps(signal.to_dict(), indent=4, separators=(',', ': '), default=str)))
                response_group['count'] += 1
                response['count'] +=1
            else:
                ignored_signals.append(signal)
        response['groups'][group] = response_group
        return response, ignored_signals

    def view(self, query, group):
        ''' Command to view the signals that are in the queue.

        If no group parameter is specified, all queues are returned.
        '''
        self._logger.debug("Command: view")
        response = {}
        response['query'] = query
        response['group'] = group
        response['count'] = 0
        response['groups'] = {}

        if group and group in self._queues:
            # if group exists, return only the specified group
            self._view_group(group, response)
        elif not group:
            # if no group is specifed in params return all groups
            self.for_each_group(self._view_group,
                                kwargs={'response': response})

        return response

    def _view_group(self, group, response):
        with self._get_lock(group):
            response, _ = self._inspect_group(response, group)

    def remove(self, query, group):
        ''' Remove signals from *group* where *query* is True.

        Signals are not notified.

        '''
        self._logger.debug("Command: remove")
        response = {}
        response['query'] = query
        response['group'] = group
        response['count'] = 0
        response['groups'] = {}

        if group and group in self._queues:
            # if group exists, remove from only only the specified group
            self._remove_from_group(group, response, query)
        elif not group:
            # if no group is specifed in params return all groups
            self.for_each_group(self._remove_from_group,
                                kwargs={'response': response,
                                        'query': query})

        return response

    def _remove_from_group(self, group, response, query):
        with self._get_lock(group):
            response, signals = self._inspect_group(response, group)
            # signals that don't match the query stay in the queue.
            self._queues[group] = signals

    def update_props(self, props):
        ''' Updates the *interval* property.

        The next scheduled emit job with be canceled and a new repeatable emit
        job is started.

        '''
        self._logger.debug("Command: update_props")
        response = {}

        if props is None or not isinstance(props, dict):
            response['message'] = \
                "'props' needs to be a dictionary: {}".format(props)
            return response

        # Update *interval*.
        interval = props.get('interval')
        if interval and isinstance(interval, dict) and \
                (interval.get('days') or interval.get('seconds') \
                 or interval.get('microseconds')):
            days = interval.get('days', 0)
            seconds = interval.get('seconds', 0)
            microseconds = interval.get('microseconds', 0)
            interval = timedelta(days, seconds, microseconds)
            response['interval'] = interval
            response['prev_interval'] = self.interval
            # cancel emit job and restart with new interval
            if self._emit_job is not None:
                self._emit_job.cancel()
            self._start_emit_job()
            self.interval = interval
            self._logger.info('Interval has been updated to {}'.format(interval))
        elif interval:
            response['message'] = "'interval' needs to be a timedelta dict: {}".format(interval)

        return response
Exemple #45
0
 def start(self):
     super().start()
     self._start_emit_job()
     self._backup_job = Job(self._backup, self.backup_interval, True)
Exemple #46
0
class SignalRate(GroupBy, Persistence, Block):

    report_interval = TimeDeltaProperty(default={"seconds": 1},
                                        title="Report Interval")
    averaging_interval = TimeDeltaProperty(default={"seconds": 5},
                                           title="Averaging Interval")
    version = VersionProperty("0.1.1")

    def __init__(self):
        super().__init__()
        self._signal_counts = defaultdict(deque)
        self._signals_lock = Lock()
        self._job = None
        self._start_time = None
        self._averaging_seconds = None

    def persisted_values(self):
        """ Overridden from persistence mixin """
        return ['_start_time', '_signal_counts']

    def configure(self, context):
        super().configure(context)
        # This is just for backwards compatability with persistence
        if self._signal_counts.default_factory == list:
            self._signal_counts.default_factory = deque
            for group in self._signal_counts:
                self._signal_counts[group] = deque(self._signal_counts[group])

    def start(self):
        super().start()
        # use _start_time if it was loaded from persistence
        self._start_time = self._start_time or _time()
        self._averaging_seconds = self.averaging_interval().total_seconds()
        self._job = Job(self.report_frequency, self.report_interval(), True)

    def process_signals(self, signals, input_id='default'):
        # Record the count for each group in this list of signals
        self.for_each_group(self.record_count, signals)

    def record_count(self, signals, group):
        """ Save the time and the counts for each group received """
        with self._signals_lock:
            self._signal_counts[group].append((_time(), len(signals)))

    def report_frequency(self):
        signals = []

        self.for_each_group(self.get_frequency, sigs_out=signals)

        self.logger.debug("Current counts: {}".format(self._signal_counts))

        if signals:
            self.notify_signals(signals)

    def get_frequency(self, group, sigs_out):
        """ Get the frequency for a group and add it to sigs_out """
        with self._signals_lock:
            ctime = _time()
            self._signal_counts[group] = self.trim_old_signals(
                self._signal_counts[group], ctime)

            signals = copy(self._signal_counts[group])

        # Add up all of our current counts
        total_count = sum(grp[1] for grp in signals)

        # If we haven't reached a full period, divide by elapsed time
        rate = total_count / min(ctime - self._start_time,
                                 self._averaging_seconds)

        sigs_out.append(Signal({"group": group, "rate": rate}))

    def trim_old_signals(self, signal_counts, ctime):
        """ Take some signal counts and get rid of old ones """
        while len(signal_counts) and \
                ctime - signal_counts[0][0] >= self._averaging_seconds:
            signal_counts.popleft()
        return signal_counts

    def stop(self):
        if self._job:
            self._job.cancel()
        super().stop()
Exemple #47
0
class Queue(Persistence, GroupBy, Block):
    """ Queue block.

    A NIO block for queueing up signals. As signals pile up,
    the Queue block releases a configurable number at a configurable
    interval. If incoming signals would overflow the queue, signals
    are popped off the front as needed.

    If a 'group_by' string is configured, incoming signals are divided
    and grouped by the value of that attribute. The configured capacity
    applies to *each* such queue, not the block as a whole.

    """
    version = VersionProperty("1.0.1")
    interval = TimeDeltaProperty(title='Notification Interval',
                                 default={'seconds': 1},
                                 allow_none=True)
    capacity = IntProperty(default=100, title='Capacity')
    chunk_size = IntProperty(default=1, title='Chunk Size')
    reload = BoolProperty(default=False, title='Auto-Reload?')
    uniqueness = Property(title='Queue Uniqueness Expression',
                          allow_none=True,
                          default="{{ None }}")
    update = BoolProperty(title='Update Non-Unique Signals', default=False)

    def persisted_values(self):
        return ["_queues"]

    def __init__(self):
        super().__init__()
        self._queues = defaultdict(list)
        self._queue_locks = defaultdict(Lock)
        self._meta_lock = Lock()
        self._emit_job = None

    def configure(self, context):
        super().configure(context)
        # Make sure perisisted queue capacity is less than current config
        for queue_name, queue_values in self._queues.items():
            self._queues[queue_name] = queue_values[:self.capacity()]
        # build _groups for groupby mixin
        self._groups = set(self._queues.keys())

    def start(self):
        super().start()
        self._start_emit_job()

    def stop(self):
        if self._emit_job is not None:
            self._emit_job.cancel()
        super().stop()

    def process_signals(self, signals):
        self.logger.debug("Processing {} signals".format(len(signals)))
        self.for_each_group(self._push_group, signals)

    def pop(self, grp):
        ''' Remove the top n signals from the specified queue.

        Args:
            grp (str): The queue from which to pop.
            count (int): The number of signals to pop off.
            reload (bool): If True, put popped signals back on queue.

        Returns:
            top_n (list): 'Count' signals from the front of the queue.

        '''
        count = self.chunk_size()
        reload = self.reload()
        # lock the queue we're popping from
        self.logger.debug("pop: {} {} {}".format(grp, count, reload))
        with self._get_lock(grp):
            # check out the front of the queue
            top_n = self._queues[grp][0:count]
            self.logger.debug(
                "Removing %d signals from %s_queue" % (len(top_n), grp))
            self._queues[grp][:] = self._queues[grp][len(top_n):]
            # If reloading, put signal back on queue.
            if reload:
                self.logger.debug("Reloading {}_queue".format(grp))
                self._queues[grp].extend(top_n)
        return top_n

    def push(self, signal, grp):
        ''' Add a signal to the back of the queue.

        Args:
            signal (Signal): The signal to add.
            grp (str): Group to add signal to.

        Returns:
            None

        '''
        queue = self._queues[grp]

        # check for uniqueness if property is set
        try:
            unique_val = self.uniqueness(signal)
            self.logger.debug(
                "Testing uniqueness for signal: {}".format(unique_val))
        except Exception as e:
            unique_val = None
            self.logger.warning(
                "Uniqueness expression failed. Using value of None.")

        if unique_val is not None:
            for idx, sig in enumerate(queue):
                try:
                    sig_val = self.uniqueness(sig)
                except Exception as e:
                    sig_val = None
                if sig_val == unique_val:
                    self.logger.debug(
                        "Signal {} already in {}_queue".format(sig_val, grp)
                    )
                    if self.update():
                        queue[idx] = signal
                    return

        # pop one off the top of that queue if it's at capacity
        if len(queue) == self.capacity():
            self.logger.debug(
                "Pushing signal and capactity of {}_signal is full: {}".format(
                    grp, self.capacity()
                )
            )
            queue.pop(0)

        self.logger.debug("Appending signal to {}_queue".format(grp))
        queue.append(signal)

    def _push_group(self, signals, group):
        # lock the queue before appending
        with self._get_lock(group):
            for signal in signals:
                self.push(signal, group)

    def _get_lock(self, grp):
        ''' Returns the lock for a particular queue.

        Note that we're maintaining a synchronized dictionary of locks
        alongside our dict of queues.

        '''
        with self._meta_lock:
            self._queue_locks[grp] = self._queue_locks.get(grp, Lock())
        return self._queue_locks[grp]

    def _start_emit_job(self):
        ''' Start job that emits signals from the queue '''
        if self.interval() and self.interval().total_seconds() > 0:
            # only schedule if the interval is a positive number
            self._emit_job = Job(
                self.emit,
                self.interval(),
                True
            )

    def emit(self):
        ''' Notify the configured number of signals from the front of the queue.

        '''
        signals_to_notify = self.for_each_group(self.pop)
        if signals_to_notify:
            self.logger.debug(
                "Notifying {} signals".format(len(signals_to_notify))
            )
            self.notify_signals(signals_to_notify)

    def _inspect_group(self, response, group):
        response_group = {'count': 0, 'signals': []}
        query = response.get('query', '{{ True }}')
        ignored_signals = []
        for signal in self._queues.get(group, []):
            try:
                eval = Evaluator(query).evaluate(signal)
            except:
                eval = False
            if eval:
                response_group['signals'].append(
                    json.loads(json.dumps(
                        signal.to_dict(),
                        indent=4, separators=(',', ': '),
                        default=str))
                )
                response_group['count'] += 1
                response['count'] += 1
            else:
                ignored_signals.append(signal)
        response['groups'][group] = response_group
        return response, ignored_signals

    def view(self, query, group):
        ''' Command to view the signals that are in the queue.

        If no group parameter is specified, all queues are returned.
        '''
        self.logger.debug("Command: view")
        response = {}
        response['query'] = query
        response['group'] = group
        response['count'] = 0
        response['groups'] = {}

        if group and group in self._queues:
            # if group exists, return only the specified group
            self._view_group(group, response)
        elif not group:
            # if no group is specifed in params return all groups
            self.for_each_group(self._view_group,
                                **{'response': response})

        return response

    def _view_group(self, group, response):
        with self._get_lock(group):
            response, _ = self._inspect_group(response, group)

    def remove(self, query, group):
        ''' Remove signals from *group* where *query* is True.

        Signals are not notified.

        '''
        self.logger.debug("Command: remove")
        response = {}
        response['query'] = query
        response['group'] = group
        response['count'] = 0
        response['groups'] = {}

        if group and group in self._queues:
            # if group exists, remove from only only the specified group
            self._remove_from_group(group, response, query)
        elif not group:
            # if no group is specifed in params return all groups
            self.for_each_group(self._remove_from_group,
                                **{'response': response, 'query': query})
        return response

    def _remove_from_group(self, group, response, query):
        with self._get_lock(group):
            response, signals = self._inspect_group(response, group)
            # signals that don't match the query stay in the queue, but if
            # there are no signals remaining, delete the entire queue.
            if len(signals) > 0:
                self._queues[group] = signals
            else:
                # _queues is a dict with keys that make up the set _groups.
                # These must be kept in sync when removing keys in order to
                # maintain the true state of the block. If these objects are
                # not synced, a "view" or "remove" command for all groups will
                # show that groups which have previously been expired are still
                # present, due to the for_each_group() call, which uses the
                # _groups set to iterate over the groups.
                self.logger.debug("Deleting empty queue {}.".format(group))
                self._queues.pop(group, None)
                self._groups.remove(group)

    def update_props(self, props):
        ''' Updates the *interval* property.

        The next scheduled emit job with be canceled and a new repeatable emit
        job is started.

        '''
        self.logger.debug("Command: update_props")
        response = {}

        if props is None or not isinstance(props, dict):
            response['message'] = \
                "'props' needs to be a dictionary: {}".format(props)
            return response

        # Update *interval*.
        interval = props.get('interval')
        if interval and isinstance(interval, dict) and \
                (interval.get('days') or
                 interval.get('seconds') or interval.get('microseconds')):
            days = interval.get('days', 0)
            seconds = interval.get('seconds', 0)
            microseconds = interval.get('microseconds', 0)
            interval = timedelta(days, seconds, microseconds)
            response['interval'] = interval
            response['prev_interval'] = self.interval
            # cancel emit job and restart with new interval
            if self._emit_job is not None:
                self._emit_job.cancel()
            self._start_emit_job()
            self.interval = interval
            self.logger.info(
                'Interval has been updated to {}'.format(interval))
        elif interval:
            response['message'] = \
                "'interval' needs to be a timedelta dict: {}".format(interval)

        return response
Exemple #48
0
class ARTSS(Block):

    """ A block for communicating with a artss gateway server.

    Properties:
        host (str): location of the socket.io server.
        port (int): socket.io server port.
        version (enum): Which version of socketIO to use
        max_retry (int): Specifies maximum time to wait before a retry

    """

    version = VersionProperty("1.0.1")
    host = StringProperty(title="Host", default="")
    port = IntProperty(title="Port", default=10001)
    history_count = IntProperty(title="Event History Count", default=2)
    connection_timeout = IntProperty(title="Connection Timeout", default=5)
    max_retry = TimeDeltaProperty(title="Max Connection Retry Time", default={"seconds": 300})

    def __init__(self):
        super().__init__()
        self._client = None

        self._timeout = 1
        self._connected = False
        self._reconnection_job = None
        self._stopping = False
        self._polling = False

    def configure(self, context):
        super().configure(context)

        # override loggers with our own
        from .protocol.artss import set_logger as set_protocol_logger

        set_protocol_logger(self._logger)

        from .gateway import set_logger as set_gateway_logger

        set_gateway_logger(self._logger)

        self._client = ARTSSGateway(self.history_count)
        self._connect_to_gateway()

    def stop(self):
        """ Stop the block by disconnecting the client.
        """
        self._stopping = True

        # Cancel any pending reconnects if any
        self._cancel_reconnection_job()

        self._disconnect_client()
        super().stop()

    def handle_reconnect(self):
        self._disconnect_client()

        # Don't need to reconnect if we are stopping
        if self._stopping:
            return

        if self._reconnection_job is not None:
            self._logger.debug("Reconnection job already scheduled")
            return

        self._logger.warning("Attempting to reconnect in {0} seconds.".format(self._timeout))
        self._reconnection_job = Job(self._connect_to_gateway, timedelta(seconds=self._timeout), repeatable=False)

    def _cancel_reconnection_job(self):
        if self._reconnection_job:
            self._reconnection_job.cancel()
            self._reconnection_job = None

    def _connect_to_gateway(self):
        try:
            # clean up any connection job if any
            self._cancel_reconnection_job()

            self._logger.info("Connecting to {0}:{1}".format(self.host, self.port))
            self._client.connect(
                self.host, self.port, timeout=self.connection_timeout if self.connection_timeout else None
            )
            self._connected = True
            self._logger.info("Connected successfully")

            # Reset the timeout
            self._timeout = 1
        except:
            self._timeout *= 2
            # Make sure our timeout is not getting out of hand
            self._timeout = min(self._timeout, self.max_retry.total_seconds())
            self._logger.exception("Error connecting")
            self.handle_reconnect()

    def process_signals(self, signals):
        """ Used to poll gateway. """

        # Don't do any processing or polling if the block is stopping.
        # The connection may be closed and we don't want to re-open
        if self._stopping:
            return

        if self._connected and not self._polling:
            try:
                self._polling = True
                events = self._client.poll()
                if events:
                    signals = []
                    for event in events:
                        signals.append(Signal(event.to_dict()))

                    self._logger.debug("Notifying: {0} signals".format(len(signals)))
                    self.notify_signals(signals)

            except Exception as e:
                self._logger.exception("Polling ARTSS Gateway")

                # connection problems are handled through reconnect
                if isinstance(e, ARTSSGatewayConnectionClosed):
                    self.handle_reconnect()
            finally:
                self._polling = False

    def _disconnect_client(self):
        """ Safely close the client and remove the reference """
        try:
            # Try to close the client if it's open
            self._connected = False
            self._client.disconnect()
        except:
            # If we couldn't close, it's fine. Either the client wasn't
            # opened or it didn't want to respond. That's what we get for
            # being nice and cleaning up our connection
            self._logger.info("Error closing gateway connection", exc_info=True)
Exemple #49
0
class Buffer(Persistence, GroupBy, Block):

    version = VersionProperty("0.1.1")
    signal_start = BoolProperty(title='Start Interval On Signal?',
                                default=False)
    interval = TimeDeltaProperty(title='Buffer Interval',
                                 default={'seconds': 1},
                                 allow_none=True)
    interval_duration = TimeDeltaProperty(title='Interval Duration',
                                          allow_none=True)

    def __init__(self):
        super().__init__()
        self._last_emission = None
        self._cache = defaultdict(lambda: defaultdict(list))
        self._cache_lock = Lock()
        self._emission_job = None
        self._active_job = False

    def persisted_values(self):
        return ['_last_emission', '_cache']

    def start(self):
        # Start emission job on service start if bool property is not checked
        if self.interval() and not self.signal_start():
            now = datetime.utcnow()
            latest = self._last_emission or now
            delta = self.interval() - (now - latest)
            self._emission_job = Job(
                self._emit_job,
                delta,
                False,
                group=None,
                reset=True,
            )

    def emit(self, group=None):
        self._emit_job(group)

    def _emit_job(self, group, reset=False):
        self.logger.debug('Emitting signals')
        if reset:
            self._emission_job.cancel()
            self._emission_job = Job(
                self._emit_job,
                self.interval(),
                True,
                group=group,
            )
        self._last_emission = datetime.utcnow()
        signals = self._get_emit_signals(group)
        self._active_job = False
        if signals:
            self.logger.debug('Notifying {} signals'.format(len(signals)))
            self.notify_signals(signals)
        else:
            self.logger.debug('No signals to notify')

    def _get_emit_signals(self, group=None):
        signals = []
        with self._cache_lock:
            if not group:
                for group in self._cache.keys():
                    signals.extend(self._get_emit_signals_for_group(group))
            else:
                signals.extend(self._get_emit_signals_for_group(group))
        return signals

    def _get_emit_signals_for_group(self, group):
        now = int(time())
        signals = []
        cache_times = sorted(self._cache[group].keys())
        if self.interval_duration():
            # Remove old signals from cache.
            old = now - int(self.interval_duration().total_seconds())
            self.logger.debug(
                'Removing signals from cache older than {}'.format(old))
            for cache_time in cache_times:
                if cache_time < old:
                    del self._cache[group][cache_time]
                else:
                    break
        for cache in cache_times:
            signals.extend(self._cache[group][cache])
        if not self.interval_duration():
            # Clear cache every time if duration is not set.
            self.logger.debug('Clearing cache of signals')
            self._cache[group] = defaultdict(list)
        return signals

    def process_signals(self, signals):
        self.for_each_group(self.process_group, signals)
        # Start a new job if property is checked and there is no active job
        if self.signal_start() and not self._active_job:
            self._emission_job = Job(
                self._emit_job,
                self.interval(),
                False,
                group=None,
                reset=False,
            )
            self._active_job = True  # Added flag for active job

    def process_group(self, signals, key):
        with self._cache_lock:
            now = int(time())
            self._cache[key][now].extend(signals)
Exemple #50
0
 def start(self):
     super().start()
     self._job = Job(self._emit, self.interval(), True)
     # Run an emit cycle immediately, but in a new thread since it
     # might take some time and we don't want it to hold up start
     spawn(self._emit)
Exemple #51
0
class CronTrigger():

    """ Notify signals accoriding to cron-like timetable """

    cron = ObjectProperty(CronConf, title='Cron Schedule', default=CronConf())

    def __init__(self):
        super().__init__()
        self._job = None
        self._cron_specs = None

    def configure(self, context):
        super().configure(context)
        # TODO: check that the config is valid cron syntax
        self._cron_specs = [self.cron.minute,
                            self.cron.hour,
                            self.cron.day_of_month,
                            self.cron.month,
                            self.cron.day_of_week]

    def start(self):
        super().start()
        # Like crontab, check to run jobs every minute
        self._job = Job(self._cron, timedelta(minutes=1), True)
        # Run a cron cycle immediately, but in a new thread since it
        # might take some time and we don't want it to hold up start
        spawn(self._cron)

    def stop(self):
        """ Stop the simulator thread and signal generation """
        if self._job:
            self._job.cancel()
        super().stop()

    def _cron(self):
        """ Called every minute to check if cron job should notify signals """
        self._logger.debug("Checking if cron emit should run")
        now = datetime.utcnow()
        now = [str(now.minute),
               str(now.hour),
               str(now.day),
               str(now.month),
               str(now.weekday())]
        if self._check_cron(now):
            spawn(self._emit)

    def _check_cron(self, now):
        """ Return True if cron property matches with `now`

        `now` is list containing the 5 cron field
        """
        for i in range(5):
            # '*' should match no matter what
            if self._cron_specs[i] == '*':
                now[i] = '*'
        # TODO: handle more interesting cron settings than just numbers and '*'
        return now == self._cron_specs

    def _emit(self):
        self._logger.debug("Generating signals")
        signals = self.generate_signals()
        # If a generator is returned, build the list
        if not isinstance(signals, list):
            signals = list(signals)
        if signals:
            self._logger.debug("Notifying {} signals".format(len(signals)))
            self.notify_signals(signals)
        else:
            self._logger.debug("No signals generated")
class TwitterSearch(Block):
    version = VersionProperty("1.0.0")
    interval = TimeDeltaProperty(title="Query Interval",
                                 default={"minutes": 10})
    tweet_text = ListProperty(StringType, title="Text includes", default=[])
    hashtags = ListProperty(StringType, title="Hashtags", default=[])
    _from = StringProperty(title="From user", default='')
    _to = StringProperty(title="To user", default='')
    at = ListProperty(StringType, title="Referenced users", default=[])
    geo = ObjectProperty(GeoCode, title="Geographical")
    count = IntProperty(title="Max Results", default=25)
    lookback = IntProperty(title="Query Lookback (days)", default=-1)
    creds = ObjectProperty(TwitterCreds, title="Credentials")
    tude = SelectProperty(
        TwitterAttitude,
        default=TwitterAttitude.NEUTRAL,
        title="Tone"
    )
    operator = SelectProperty(
        TwitterQueryOp,
        default=TwitterQueryOp.AND,
        title="Query Operator"
    )
    result_type = SelectProperty(
        TwitterResultType,
        default=TwitterResultType.MIXED,
        title="Result Type"
    )

    def __init__(self):
        super().__init__()
        self._auth = None
        self._url = None
        self._search_job = None

    def configure(self, context):
        super().configure(context)

    def start(self):
        super().start()
        self._authorize()
        self._construct_url()
        self._search_job = Job(
            self._search_tweets,
            self.interval(),
            False,
            self._url
        )

    def stop(self):
        super().stop()
        self._search_job.cancel()

    def _search_tweets(self, url):
        rsp = requests.get(url, auth=self._auth)
        status = rsp.status_code
        if status == 200:
            data = rsp.json()
            tweets = data['statuses']
            next_results = data['search_metadata'].get('next_results')
            self.notify_signals([Signal(t) for t in tweets])
            if next_results is not None:
                self._search_tweets(
                    "{0}{1}".format(SEARCH_URL, next_results)
                )
            else:
                self.logger.debug("Scheduling next search...")
                self._search_job = Job(
                    self._search_tweets,
                    self.interval(),
                    False,
                    self._url
                )

        else:
            self.logger.error(
                "Twitter search failed with status {0}".format(status))

    def _construct_url(self):
        self._url = "{0}?".format(SEARCH_URL)

        query = self._process_query()

        if query:
            self._append_param('q', sep=self.operator().value, vals=query)

        if self.geo().latitude():
            self._append_param('geo', ',', 'mi',
                               [self.geo().latitude(),
                                self.geo().longitude(),
                                self.geo().radius()])

        if self.lookback() >= 0:
            now = datetime.utcnow() - timedelta(days=self.lookback())
            vals = [now.year, now.month, now.day]
            self._append_param('since', '-', vals=vals)

        if self.count():
            self._append_param('count', vals=[self.count()])

        self._append_param('result_type', vals=[self.result_type().value])

    def _append_param(self, p_name, sep='', end='', vals=[]):
        val_str = quote(sep.join([str(v) for v in vals]) + end)
        self._url += "{0}={1}&".format(p_name, val_str)

    def _process_query(self):
        values = []
        values.extend(self.tweet_text())
        for h in self.hashtags():
            values.append("#{0}".format(h))
        for u in self.at():
            values.append("@{0}".format(u))
        if self._from():
            values.append("from:{0}".format(self._from()))
        if self._to():
            values.append("to:{0}".format(self._to()))
        if self.tude().value:
            values.append(self.tude().value)
        return values

    def _authorize(self):
        """ Prepare the OAuth handshake and verify.

        """
        try:
            self._auth = OAuth1(self.creds().consumer_key(),
                                self.creds().app_secret(),
                                self.creds().oauth_token(),
                                self.creds().oauth_token_secret())
            resp = requests.get(VERIFY_CREDS_URL, auth=self._auth)
            if resp.status_code != 200:
                raise Exception("Status %s" % resp.status_code)
        except Exception as e:
            self.logger.error("Authentication Failed"
                              "for consumer key: %s" %
                              self.creds().consumer_key())
Exemple #53
0
class CronTrigger():
    """ Notify signals accoriding to cron-like timetable """

    cron = ObjectProperty(CronConf, title='Cron Schedule', default=CronConf())
    utc = BoolProperty(title='UTC', default=False)

    def __init__(self):
        super().__init__()
        self._job = None
        self._cron_specs = None

    def configure(self, context):
        super().configure(context)
        # TODO: check that the config is valid cron syntax
        self._cron_specs = [
            self.cron().minute(),
            self.cron().hour(),
            self.cron().day_of_month(),
            self.cron().month(),
            self.cron().day_of_week()
        ]

    def start(self):
        super().start()
        # Like crontab, check to run jobs every minute
        self._job = Job(self._cron, timedelta(minutes=1), True)
        # Run a cron cycle immediately, but in a new thread since it
        # might take some time and we don't want it to hold up start
        spawn(self._cron)

    def stop(self):
        """ Stop the simulator thread and signal generation """
        if self._job:
            self._job.cancel()
        super().stop()

    def _cron(self):
        """ Called every minute to check if cron job should notify signals """
        self.logger.debug("Checking if cron emit should run")
        if (self.utc()):
            now = datetime.utcnow()
        else:
            now = datetime.now()
        now = [
            str(now.minute),
            str(now.hour),
            str(now.day),
            str(now.month),
            str(now.weekday())
        ]
        if self._check_cron(now):
            spawn(self._emit)

    def _check_cron(self, now):
        """ Return True if cron property matches with `now`

        `now` is list containing the 5 cron field
        """
        for i in range(5):
            # '*' should match no matter what
            if self._cron_specs[i] == '*':
                now[i] = '*'
        # TODO: handle more interesting cron settings than just numbers and '*'
        return now == self._cron_specs

    def _emit(self):
        self.logger.debug("Generating signals")
        signals = self.generate_signals()
        # If a generator is returned, build the list
        if not isinstance(signals, list):
            signals = list(signals)
        if signals:
            self.logger.debug("Notifying {} signals".format(len(signals)))
            self.notify_signals(signals)
        else:
            self.logger.debug("No signals generated")
Exemple #54
0
class HeartbeatHandler(object):
    """ A class that can send and handle socket.io heartbeats """
    def __init__(self, send_callback, timeout_callback, heartbeat_interval,
                 heartbeat_timeout, logger):
        """ Create a heartbeat handler with some timing parameters

        Args:
            send_callback (func): A function to call when sending heartbeats
            timeout_callback (func): A function to call when a heartbeat
                response is not received in time
            heartbeat_interval (int): How often (secs) to send heartbeats
            heartbeat_timeout (int): How long (secs) to wait for a heartbeat
                response from the server
            logger (Logger): Where to log information and diagnostics
        """
        super().__init__()
        self._heartbeat_func = send_callback
        self._timeout_func = timeout_callback
        self._heartbeat_job = None
        self._heartbeat_expiry_job = None
        self._heartbeat_interval = heartbeat_interval
        self._heartbeat_timeout = heartbeat_timeout
        self.logger = logger

    def handle_heartbeat_response(self):
        """ Handle a response heartbeat from the server """
        self.logger.debug("Heartbeat PONG received")
        # Restart the heartbeat expiry job
        self._start_expiry_job()

    def start_heartbeats(self):
        """ Start a job which will periodically send heartbeats to the server.

        This method will also start a job that will wait for responses in case
        the server doesn't respond in time.
        """
        # Since we are starting a new heartbeat cycle, cancel anything
        # that was outstanding
        self.stop_heartbeats()

        # Start a job that will send heartbeats indefinitely
        self._heartbeat_job = Job(self._heartbeat_func,
                                  timedelta(seconds=self._heartbeat_interval),
                                  repeatable=True)

        # Also start a job that will wait for heartbeat timeouts
        self._start_expiry_job()

    def stop_heartbeats(self):
        self._stop_expiry_job()
        self._stop_heartbeat_job()

    def _start_expiry_job(self):
        # Stop the existing job, if it exists
        self._stop_expiry_job()

        self._heartbeat_expiry_job = Job(
            self._no_heartbeat_response,
            timedelta(seconds=self._heartbeat_timeout),
            repeatable=False)

    def _stop_heartbeat_job(self):
        """ Cancel and remove the job that sends heartbeats """
        if self._heartbeat_job:
            self._heartbeat_job.cancel()
        self._heartbeat_job = None

    def _stop_expiry_job(self):
        """ Cancel and remove the job that waits for responses """
        if self._heartbeat_expiry_job:
            self._heartbeat_expiry_job.cancel()
        self._heartbeat_expiry_job = None

    def _no_heartbeat_response(self):
        """ Called when a heartbeat request has expired.

        All we are going to do in here is tell the client we timed out. We
        don't want to stop sending heartbeats, maybe the next one will go
        through and the server will respond which will kick start the expiry
        process again.
        """
        self.logger.warning(
            "No heartbeat response was received...reconnecting")
        self._timeout_func()
Exemple #55
0
class Persistence(object):

    """ A block mixin that provides persistence for some variables.

    To use, override the persisted_values function and define which variables
    on your class you wish to have persisted. The values should be strings
    that correspond to the variable names to be saved.

    """

    backup_interval = TimeDeltaProperty(
        visible=False, title='Backup Interval', default={"seconds": 60 * 60})
    use_persistence = BoolProperty(
        title='Load from Persistence?', default=True)

    def __init__(self):
        super().__init__()
        self._backup_job = None

    def persisted_values(self):
        """ Return a dictionary containing the values to be persisted.

        This function should be overriden in a Block that wishes to use
        persistence. Return a dictionary with the key being the key you wish
        to save it under (useful for making changes to the block) and the value
        being the name of the instance attribute to save and load into.

        For example, if your block class has an instance level attribute called
        `_values` and you wish to save it, you could return this dictionary:

            {
               "values": "_values"
            }

        """
        return {}

    def _load(self):
        """ Load the values from persistence """
        self._logger.debug("Loading from persistence")
        for persist_key, persist_target in self.persisted_values().items():
            if self.persistence.has_key(persist_key):
                loaded = self.persistence.load(persist_key)
                self._logger.debug("Loaded value {} for attribute {}".format(
                    loaded, persist_target))
                # Set the loaded value to the attribute on this class
                setattr(self, persist_target, loaded)

    def _save(self):
        """ Save the values to persistence """
        self._logger.debug("Saving to persistence")
        for persist_key, persist_target in self.persisted_values().items():
            self.persistence.store(persist_key, getattr(self, persist_target))
        self.persistence.save()

    def configure(self, context):
        super().configure(context)
        if self.use_persistence:
            self._load()

    def start(self):
        super().start()

        # If we have configured a backup interval, set up the job to
        # periodically save to persistence
        if self.backup_interval.total_seconds() > 0:
            self._backup_job = Job(self._save, self.backup_interval, True)

    def stop(self):
        if self._backup_job:
            self._backup_job.cancel()

        # Do one last save before stopping
        self._save()
        super().stop()
Exemple #56
0
 def start(self):
     super().start()
     self._job = Job(self._emit, self.interval, True)
     # Run an emit cycle immediately, but in a new thread since it
     # might take some time and we don't want it to hold up start
     spawn(self._emit)