Example #1
0
class Stagger(Block):

    version = VersionProperty("1.0.1")
    period = TimeDeltaProperty(title='Period', default={"seconds": 1})
    min_interval = TimeDeltaProperty(title='Minimum Interval',
                                     advanced=True,
                                     default={"microseconds": 100000})

    def process_signals(self, signals, input_id=None):
        stagger_period = self._get_stagger_period(len(signals))
        self.logger.debug("{} signals received, notifying every {}".format(
            len(signals), stagger_period))

        # Launch the notification mechanism in a new thread so that it can
        # sleep between notifications
        stagger_data = StaggerData(
            stagger_period,
            math.ceil(self.period() / stagger_period),
            signals,
            self.notify_signals,
            self.logger,
        )
        stagger_data.start_notify()

    def _get_stagger_period(self, num_signals):
        """ Returns the stagger period based on a number of signals """
        return max(self.period() / num_signals, self.min_interval())
Example #2
0
class RethinkDBBase(LimitLock, Retry, Block):
    """
    A block for communicating with a RethinkDB server.

    Properties:
        host (str): server host to connect to
        port (int): port on the server host, default rethink port is 28015
        database_name (str): database name to access
        connect_timeout (interval): time to wait for a successful connection
    """

    version = VersionProperty('1.0.0')
    host = StringProperty(title='Host', default='[[RETHINKDB_HOST]]')
    port = IntProperty(title='Port', default='[[RETHINKDB_PORT]]')
    database_name = StringProperty(title='DB name', default='test')
    connect_timeout = TimeDeltaProperty(title="Connect timeout",
                                        default={"seconds": 20},
                                        visible=False)

    def process_signals(self, signals):
        self.execute_with_lock(self._locked_process_signals,
                               10,
                               signals=signals)

    def _locked_process_signals(self, signals):
        pass
Example #3
0
class SignalRepeater(GroupBy, Block):

    version = VersionProperty('0.1.1')
    max_repeats = IntProperty(title='Max Repeats', default=-1)
    interval = TimeDeltaProperty(title='Repeat Interval',
                                 default={'seconds': 10})

    def configure(self, context):
        super().configure(context)
        self.notifications = defaultdict(dict)
        self._group_locks = defaultdict(Lock)

    def stop(self):
        for group in copy(self.notifications):
            self._cancel_group_job(group)
        super().stop()

    def _cancel_group_job(self, group):
        job = self.notifications[group].get('job')
        if job:
            self.logger.debug("Cancelling job for group {}".format(group))
            job.cancel()
            del self.notifications[group]

    def process_group_signals(self, signals, group, input_id='repeat'):
        if input_id == 'cancel':
            self._cancel_group_job(group)
            return
        if len(signals) == 0:
            return
        signal = signals[-1]
        repeats_remaining = self.max_repeats(signal)
        with self._group_locks[group]:
            self._cancel_group_job(group)
            if repeats_remaining == 0:
                # They don't want to repeat, ignore
                return
            self.logger.debug("Setting up repeat for group {}".format(group))
            self.notifications[group]['signal'] = signal
            self.notifications[group]['num_remaining'] = repeats_remaining
            self.notifications[group]['job'] = Job(target=self.notify_group,
                                                   delta=self.interval(signal),
                                                   repeatable=True,
                                                   group=group)

    def notify_group(self, group):
        with self._group_locks[group]:
            notification = self.notifications[group]
            if notification.get('num_remaining', 0) != 0:
                notification['num_remaining'] -= 1
                self.logger.debug(
                    "Notifying signal for group {}, {} remaining".format(
                        group, notification['num_remaining']))
                self.notify_signals([notification['signal']])
            else:
                self._cancel_group_job(group)
Example #4
0
class SafeTrigger():
    """ Guarantees notifying signals every interval, regardless of count """

    interval = TimeDeltaProperty(title='Interval',
                                 default={'seconds': 1},
                                 order=0)
    max_count = IntProperty(title='Max Count', default=1, order=1)

    def __init__(self):
        super().__init__()
        self._job = None
        self.stop_event = Event()
        self.signal_lock = Lock()

    def start(self):
        super().start()
        self._job = Job(self._emit, self.interval(), True)
        # Run an emit cycle immediately, but in a new thread since it
        # might take some time and we don't want it to hold up start
        spawn(self._emit)

    def stop(self):
        """ Stop the simulator thread and signal generation """
        if self._job:
            self._job.cancel()

        self.stop_event.set()
        super().stop()

    def _emit(self):
        """ Called every *interval* to generate then notify the signals """
        self.logger.debug("New generation cycle requested")
        count = 0
        signals = []

        # Stop any currently running simulator threads
        self.stop_event.set()
        # We only want one simulator thread simulating at a time
        with self.signal_lock:
            # Ok, we're running, so clear the event and wait
            self.stop_event.clear()
            self.logger.debug("Starting generation...")
            while count < self.max_count() and not self.stop_event.is_set():
                signals.extend(self.generate_signals(1))
                count += 1

        self.logger.debug("Notifying {} signals".format(len(signals)))
        self.notify_signals(signals)
Example #5
0
class IntervalTrigger():
    """Generate signals at a regular interval up to total_signals"""

    total_signals = IntProperty(title="Total Number of Signals",
                                default=-1,
                                order=4)
    interval = TimeDeltaProperty(title='Interval',
                                 default={'seconds': 1},
                                 order=0)

    def __init__(self):
        super().__init__()
        self.counter = None
        self._job = None

    def start(self):
        super().start()
        self.counter = 0
        # Schedule interval simulations for the future
        self._job = Job(self._simulate, self.interval(), True)
        # But also simulate right away
        self._simulate()

    def _simulate(self):
        sigs = self.generate_signals()
        # If a generator is returned, build the list
        if not isinstance(sigs, list):
            sigs = list(sigs)
        # Add however many signals were generated (in case multiple
        # signals mixin was used) to the counter and notify them
        self.counter += len(sigs)
        # self.counter - self.total_signals() yield that amount of signals that
        # should be removed
        if self.counter > self.total_signals() and self.total_signals() >= 0:
            sigs_to_remove = self.counter - self.total_signals()
            sigs = sigs[:-1 * sigs_to_remove]
        self.notify_signals(sigs)
        if self.total_signals() > 0 and \
                self.counter >= self.total_signals():
            self._job.cancel()

    def stop(self):
        """ Stop the simulator thread. """
        self._job.cancel()
        super().stop()
Example #6
0
class Debounce(GroupBy, Block):

    interval = TimeDeltaProperty(title='Debounce Interval',
                                 default={
                                     'days': 0,
                                     'seconds': 1,
                                     'microseconds': 0
                                 })
    version = VersionProperty("0.1.1")

    def __init__(self):
        super().__init__()
        self._last_emission = defaultdict(lambda: None)

    def process_group_signals(self, signals, group, input_id):
        """Check configured interval and return a signal if valid."""
        now = datetime.utcnow()
        if self._last_emission[group] is None or \
                now - self._last_emission[group] > self.interval(signals[-1]):
            self._last_emission[group] = now
            return signals[:1]
        else:
            return []
Example #7
0
class FacebookFeed(RESTPolling):
    """ This block polls the Facebook Graph API, using the feed endpoint

    Params:
        phrase (str): The phrase with which to search posts. Need not be
            url-quoted.
        limit (int): Maximum number of posts contained in each response.
        lookback (timedelta): Initial window of desirable posts (for the
            very first request.

    """
    URL_FORMAT = ("https://graph.facebook.com/v2.2/" "{}/{}?since={}&limit={}")
    TOKEN_URL_FORMAT = ("https://graph.facebook.com/oauth"
                        "/access_token?client_id={0}&client_secret={1}"
                        "&grant_type=client_credentials")

    creds = ObjectProperty(Creds, title='Credentials', default=Creds())
    lookback = TimeDeltaProperty(title='Lookback', default={"seconds": 0})
    limit = IntProperty(title='Limit (per poll)', default=10)
    feed_type = SelectProperty(FeedType,
                               default=FeedType.FEED,
                               title='Feed Type')
    version = VersionProperty("1.0.2")

    def __init__(self):
        super().__init__()
        self._url = None
        self._paging_field = "paging"
        self._created_field = "created_time"
        self._access_token = None

    def configure(self, context):
        super().configure(context)
        lb = self._unix_time(datetime.utcnow() - self.lookback())
        self._freshest = [lb] * self._n_queries

    def _authenticate(self):
        """ Overridden from the RESTPolling block.

        Generates and records the access token for pending requests.

        """
        if self.creds().consumer_key() is None or \
                self.creds().app_secret() is None:
            self.logger.error("You need a consumer key and app secret, yo")
        else:
            self._access_token = self._request_access_token()

    def _process_response(self, resp):
        """ Extract fresh posts from the Facebook graph api response object.

        Args:
            resp (Response)

        Returns:
            signals (list(Signal)): The list of signals to notify, each of
                which corresponds to a fresh FB post.
            paging (bool): Denotes whether or not paging requests are
                necessary.

        """
        signals = []
        resp = resp.json()
        fresh_posts = posts = resp['data']
        paging = resp.get(self._paging_field) is not None
        self.logger.debug("Facebook response contains %d posts" % len(posts))

        # we shouldn't see empty responses, but we'll protect our necks.
        if len(posts) > 0:
            self.update_freshness(posts)
            fresh_posts = self.find_fresh_posts(posts)
            paging = len(fresh_posts) == self.limit()

            # store the timestamp of the oldest fresh post for use in url
            # preparation later.
            if len(fresh_posts) > 0:
                self.prev_stalest = self.created_epoch(fresh_posts[-1])

        signals = [FacebookSignal(p) for p in fresh_posts]
        self.logger.debug("Found %d fresh posts" % len(signals))

        return signals, paging

    def _request_access_token(self):
        """ Request an access token directly from facebook.

        Args:
            None

        Returns:
            token (str): The access token, which goes on the end of a request.

        """
        resp = requests.get(
            self.TOKEN_URL_FORMAT.format(self.creds().consumer_key(),
                                         self.creds().app_secret()))
        status = resp.status_code

        # If the token request fails, try to use the configured app id
        # and secret. This probably won't work, but the docs say that it
        # should. for more info, see:
        # https://developers.facebook.com/docs/facebook-login/access-tokens
        token = "%s|%s" % (self.creds().consumer_key(),
                           self.creds().app_secret())
        if status == 200:
            token = resp.text.split('access_token=')[1]
        else:
            self.logger.error("Facebook token request failed with status %d" %
                              status)
        return token

    def _prepare_url(self, paging=False):
        """ Overridden from RESTPolling block.

        Appends the access token to the format string and builds the headers
        dictionary. If paging, we do some string interpolation to get our
        arguments into the request url. Otherwise, we append the until
        parameter to the end.

        Args:
            paging (bool): Are we paging?

        Returns:
            headers (dict): Contains the (case sensitive) http headers.

        """
        headers = {"Content-Type": "application/json"}
        fmt = "%s&access_token=%s" % (self.URL_FORMAT, self._access_token)
        if not paging:
            self.paging_url = None
            feed_type = self.feed_type().value
            self.url = fmt.format(self.current_query, feed_type,
                                  self.freshest - 2, self.limit())
        else:
            self.paging_url = "%s&until=%d" % (self.url, self.prev_stalest)

        return headers

    def _on_failure(self, resp, paging, url):
        execute_retry = True
        try:
            status_code = resp.status_code
            resp = resp.json()
            err_code = resp.get('error', {}).get('code')
            if (status_code == 404 and err_code in [803, 2500]
                    or status_code == 500 and err_code == 2):
                # Page feed requests require only an access token [1] but user
                # feed requsts require a user access token with read_stream
                # permission [2].
                # [1]: https://developers.facebook.com/docs/graph-api/
                # reference/v2.2/page/feed
                # [2]: https://developers.facebook.com/docs/graph-api/
                # reference/v2.2/user/feed
                self.logger.warning("Skipping feed: {}".format(
                    self.current_query))
                execute_retry = False
                self._increment_idx()
        finally:
            self.logger.error(
                "Polling request of {} returned status {}: {}".format(
                    url, status_code, resp))
            if execute_retry:
                self._retry(paging)
class AccelerometerChip(Block):
    """ A block enriches incoming signals with the current values of a
    set of input pins.

    """
    version = VersionProperty("0.1.2")
    signal_name = StringProperty(title="Name", default="value")
    address = IntProperty(default=0x53, title="Address")
    chip = SelectProperty(ChipTypes, title="Chip", default=ChipTypes.ADXL345)
    interval = TimeDeltaProperty(title="Sampling Period",
                                 default={"microseconds": 50000})
    sample = SelectProperty(SampleTypes,
                            title="Sample Type",
                            default=SampleTypes.Stats)
    range = SelectProperty(Ranges, title="G Range", default=Ranges._2G)

    def configure(self, context):
        super().configure(context)
        if self.chip() == ChipTypes.ADXL345:
            obj = get_adxl345()

        self._accel = obj(self.address())
        self._accel.set_range(self.range().value)

        self._job = None
        if self.sample() != SampleTypes.Last:
            self._samples = []
            self._thread = threading.Thread(target=self._sample_threaded)
            self._kill = False
            self._thread.start()

    def stop(self):
        super().stop()
        self._kill = True

    def _sample_threaded(self):
        sleeptime = self.interval().seconds \
                    + self.interval().microseconds * 1e-6
        while not self._kill:
            self._sample()
            time.sleep(sleeptime)

    def _sample(self):
        self._samples.append(self._accel.read())

    def process_signals(self, signals):
        if self.sample() == SampleTypes.Last:
            value = self._accel.read()
            gval = sum(n**2 for n in value)
            gval = math.sqrt(gval)
            value = {
                "last": value,
                "last_magnitude": gval,
            }
        else:
            samples = []
            pop = self._samples.pop
            # get data in threadsafe way
            while self._samples:
                samples.append(pop(0))

            if not samples:
                self.logger.error("Accelerometer has no samples!")
                return

            x, y, z = zip(*samples)
            # get gs squared
            x_gs = map(math.pow, x, itertools.repeat(2))
            y_gs = map(math.pow, y, itertools.repeat(2))
            z_gs = map(math.pow, z, itertools.repeat(2))

            # add them together x^2 + y^2 + z^2
            sample_gs = map(sum, zip(x_gs, y_gs, z_gs))

            # take their sqare root to get the vector value
            sample_gs = tuple(map(math.sqrt, sample_gs))

            max_g = max(sample_gs)
            min_g = min(sample_gs)
            max_i = sample_gs.index(max_g)
            min_i = sample_gs.index(min_g)
            mean_gs = statistics.mean(sample_gs)
            if len(sample_gs) >= 2:
                stdev_gs = statistics.stdev(sample_gs, mean_gs)
            else:
                stdev_gs = None

            value = {
                "max": samples[max_i],
                "min": samples[min_i],
                "mean": mean_gs,
                "stdev": stdev_gs,
                "last": samples[-1],
                "max_magnitude": max_g,
                "min_magnitude": min_g,
                "last_magnitude": sample_gs[-1]
            }

        name = self.signal_name()
        for s in signals:
            setattr(s, name, value)

        self.notify_signals(signals)
class ControlBands(GroupBy, Persistence, Block):

    band_interval = TimeDeltaProperty(default={"days": 1},
                                      title="Band Interval")
    value_expr = Property(default="{{ $value }}", title="Value")
    version = VersionProperty("1.0.2")

    def __init__(self):
        super().__init__()
        self._band_values = defaultdict(list)
        self._signals_lock = Lock()

    def process_signals(self, signals, input_id='default'):
        sigs_out = self.for_each_group(self.record_values, signals)
        if sigs_out:
            self.notify_signals(sigs_out)

    def persisted_values(self):
        """ Overridden from persistence mixin """
        return ['_band_values']

    def record_values(self, signals, group):
        """ Save the time and the list of signals for each group.

        This will return signals with the mean/band data included on them """
        sigs_out = []

        with self._signals_lock:
            ctime = _time()
            # First get rid of the old values
            self.trim_old_values(group, ctime)

            prev_values = self._get_current_values(group)
            self.logger.debug(
                "Previous values for group: {}".format(prev_values))
            # Start off a new band data using the latest value from the
            # previous band data objects
            new_values = BandData(prev_values.last_val)

            for sig in signals:
                try:
                    # the value must be a floating point value
                    value = float(self.value_expr(sig))

                    # Add the moving range data to the signal and add it to
                    # the list of signals to notify
                    sigs_out.append(
                        self._enrich_signal(sig, prev_values + new_values,
                                            value))

                    # Now account for the latest value in the moving range data
                    new_values.register_value(value)
                except:
                    self.logger.exception(
                        "Unable to determine value for signal {}".format(sig))

            # Archive the new values
            if new_values.count_items:
                self._band_values[group].append((ctime, new_values))

        return sigs_out

    def _enrich_signal(self, signal, band_data, value):
        """ Add relevant band data to the signal.

        Args:
            signal: The signal that we should add data to
            band_data (BandData): A single BandData object containing the
                current moving range information
            value: The value this signal contributed to the band data. This is
                used to determine how many deviations from the mean it is.

        Returns:
            The signal with updated data
        """
        range_mean = band_data.get_mean()
        range_deviation = band_data.get_range()
        if range_deviation != 0:
            deviations = (value - range_mean) / range_deviation
        else:
            deviations = 0

        class BandSignalData():
            def __init__(self, value, mean, deviation, deviations):
                self.value = value
                self.mean = mean
                self.deviation = deviation
                self.deviations = deviations

            def to_dict(self):
                """represent all BandSignalData attributes as a dict"""
                return self.__dict__

        setattr(
            signal, 'band_data',
            BandSignalData(value, range_mean, range_deviation,
                           deviations).to_dict())

        return signal

    def _get_current_values(self, group):
        """ Returns a single BandData object for a group.

        This will make use of the __add__ function in the BandData class to
        sum together all of the current data points in the group. The result
        will be a single BandData object with all of the previously saved
        points accounted for. """
        cur_values = self._band_values[group]
        if len(cur_values) > 1:
            # Sum every BandData (after the first), using the first one as
            # the starting point
            return sum([data[1] for data in cur_values[1:]], cur_values[0][1])
        elif len(cur_values) == 1:
            return cur_values[0][1]
        else:
            return BandData()

    def trim_old_values(self, group, ctime):
        """ Remove any "old" saved values for a given group """
        group_values = self._band_values[group]
        self.logger.debug("Trimming old values - had {} items".format(
            len(group_values)))
        group_values[:] = [
            data for data in group_values
            if data[0] > ctime - self.band_interval().total_seconds()
        ]
        self.logger.debug("Now has {} items".format(len(group_values)))
Example #10
0
class Interval(PropertyHolder):
    interval = TimeDeltaProperty(title='Interval', default={})
    repeatable = BoolProperty(title='Repeatable', default=False)
class SlidingWindow(GroupBy, Block):
    """Creates a sliding window of signals.

    Examples:

    { min_signals = 1, max_signals = 3 }
    input:  ----1------2--------3--------4--------5-->
    output: ----•------•--------•--------•--------•-->
              [1]  [1,2]  [1,2,3]  [2,3,4]  [3,4,5]


    { min_signals = 3, max_signals = 3 }
    input:  ----1------2--------3--------4--------5-->
    output: --------------------•--------•--------•-->
                          [1,2,3]  [2,3,4]  [3,4,5]


    { min_signals = 1, max_signals = 3, expiration: { millseconds: 500 } }
    input:  ----1------2--------3--------4--| >500ms |---5-->
    output: ----•------•--------•--------•--|        |---•-->
              [1]  [1,2]  [1,2,3]  [2,3,4]             [5]
    """

    """TODO
    - [x] Window Expriation
    - [ ] Implement Group
    - [ ] Implement Persistence
    - [ ] Use Signal Expiration
    """

    version = VersionProperty("0.0.1")
    min_signals = IntProperty(default=1, title='Min Signals')
    max_signals = IntProperty(default=20, title='Max Signals')
    expiration = TimeDeltaProperty(title='Window Expiration',
                                   allow_none=True)

    def __init__(self):
        super().__init__()
        self._buffers = defaultdict(list)
        self._last_recv = defaultdict(lambda : datetime.min)

    def expire(self):
        self.logger.debug('Clearing the buffer window')
        self._buffers.clear()

    def process_group_signals(self, signals, group, input_id=None):
        now = datetime.utcnow()

        hasExpiration = self.expiration() is not None
        if hasExpiration and (self._last_recv[group] + self.expiration()) < now:
            self.logger.debug('The buffer window has expired')
            self._buffers[group].clear()

        self._last_recv[group] = now

        for signal in signals:
            self._buffers[group].append(signal)

        del self._buffers[group][:-self.max_signals()]

        if len(self._buffers[group]) >= self.min_signals():
            return self._buffers[group]
Example #12
0
class Buffer(Persistence, GroupBy, Block):

    version = VersionProperty("0.1.1")
    signal_start = BoolProperty(title='Start Interval On Signal?',
                                default=False)
    interval = TimeDeltaProperty(title='Buffer Interval',
                                 default={'seconds': 1},
                                 allow_none=True)
    interval_duration = TimeDeltaProperty(title='Interval Duration',
                                          allow_none=True)

    def __init__(self):
        super().__init__()
        self._last_emission = None
        self._cache = defaultdict(lambda: defaultdict(list))
        self._cache_lock = Lock()
        self._emission_job = None
        self._active_job = False

    def persisted_values(self):
        return ['_last_emission', '_cache']

    def start(self):
        # Start emission job on service start if bool property is not checked
        if self.interval() and not self.signal_start():
            now = datetime.utcnow()
            latest = self._last_emission or now
            delta = self.interval() - (now - latest)
            self._emission_job = Job(
                self._emit_job,
                delta,
                False,
                group=None,
                reset=True,
            )

    def emit(self, group=None):
        self._emit_job(group)

    def _emit_job(self, group, reset=False):
        self.logger.debug('Emitting signals')
        if reset:
            self._emission_job.cancel()
            self._emission_job = Job(
                self._emit_job,
                self.interval(),
                True,
                group=group,
            )
        self._last_emission = datetime.utcnow()
        signals = self._get_emit_signals(group)
        self._active_job = False
        if signals:
            self.logger.debug('Notifying {} signals'.format(len(signals)))
            self.notify_signals(signals)
        else:
            self.logger.debug('No signals to notify')

    def _get_emit_signals(self, group=None):
        signals = []
        with self._cache_lock:
            if not group:
                for group in self._cache.keys():
                    signals.extend(self._get_emit_signals_for_group(group))
            else:
                signals.extend(self._get_emit_signals_for_group(group))
        return signals

    def _get_emit_signals_for_group(self, group):
        now = int(time())
        signals = []
        cache_times = sorted(self._cache[group].keys())
        if self.interval_duration():
            # Remove old signals from cache.
            old = now - int(self.interval_duration().total_seconds())
            self.logger.debug(
                'Removing signals from cache older than {}'.format(old))
            for cache_time in cache_times:
                if cache_time < old:
                    del self._cache[group][cache_time]
                else:
                    break
        for cache in cache_times:
            signals.extend(self._cache[group][cache])
        if not self.interval_duration():
            # Clear cache every time if duration is not set.
            self.logger.debug('Clearing cache of signals')
            self._cache[group] = defaultdict(list)
        return signals

    def process_signals(self, signals):
        self.for_each_group(self.process_group, signals)
        # Start a new job if property is checked and there is no active job
        if self.signal_start() and not self._active_job:
            self._emission_job = Job(
                self._emit_job,
                self.interval(),
                False,
                group=None,
                reset=False,
            )
            self._active_job = True  # Added flag for active job

    def process_group(self, signals, key):
        with self._cache_lock:
            now = int(time())
            self._cache[key][now].extend(signals)
Example #13
0
class MergeStreams(Persistence, GroupBy, Block):
    """ Take two input streams and combine signals together. """

    expiration = TimeDeltaProperty(default={}, title="Stream Expiration")
    notify_once = BoolProperty(default=True, title="Notify Once?")
    version = VersionProperty('0.1.0')

    def _default_signals_dict(self):
        return {"input_1": {}, "input_2": {}}

    def _default_expiration_jobs_dict(self):
        return {"input_1": None, "input_2": None}

    def __init__(self):
        super().__init__()
        self._signals = defaultdict(self._default_signals_dict)
        self._signals_lock = defaultdict(Lock)
        self._expiration_jobs = defaultdict(self._default_expiration_jobs_dict)

    def persisted_values(self):
        """Persist signals only when no expiration (ttl) is configured.

        Signals at each input will be persisted between block restarts except
        when an expiration is configured. TODO: Improve this feature so signals
        are always persisted and then properly removed after loaded and the
        expiration has passed.
        """
        if self.expiration():
            return []
        else:
            return ["_signals"]

    def process_group_signals(self, signals, group, input_id):
        merged_signals = []
        with self._signals_lock[group]:
            for signal in signals:
                self._signals[group][input_id] = signal
                signal1 = self._signals[group]["input_1"]
                signal2 = self._signals[group]["input_2"]
                if signal1 and signal2:
                    merged_signal = self._merge_signals(signal1, signal2)
                    merged_signals.append(merged_signal)
                    if self.notify_once():
                        self._signals[group]["input_1"] = {}
                        self._signals[group]["input_2"] = {}
            if self.expiration():
                self._schedule_signal_expiration_job(group, input_id)
        return merged_signals

    def _merge_signals(self, signal1, signal2):
        """ Merge signals 1 and 2 and clear from memory if only notify once """
        sig_1_dict = signal1.to_dict()
        sig_2_dict = signal2.to_dict()

        self._fix_to_dict_hidden_attr_bug(sig_1_dict)
        self._fix_to_dict_hidden_attr_bug(sig_2_dict)
        merged_signal_dict = {}
        merged_signal_dict.update(sig_1_dict)
        merged_signal_dict.update(sig_2_dict)
        return Signal(merged_signal_dict)

    def _fix_to_dict_hidden_attr_bug(self, signal_dict):
        """ Remove special attributes from dictionary

        n.io has a bug when using Signal.to_dict(hidden=True). It should
        include private attributes (i.e. attributes starting withe '_') but not
        special attributes (i.e. attributes starting with '__').

        """
        for key in list(signal_dict.keys()):
            if key.startswith('__'):
                del signal_dict[key]

    def _schedule_signal_expiration_job(self, group, input_id):
        """ Schedule expiration job, cancelling existing job first """
        if self._expiration_jobs[group][input_id]:
            self._expiration_jobs[group][input_id].cancel()
        self._expiration_jobs[group][input_id] = Job(
            self._signal_expiration_job, self.expiration(), False, group,
            input_id)

    def _signal_expiration_job(self, group, input_id):
        self._signals[group][input_id] = {}
        self._expiration_jobs[group][input_id] = None
class TwitterSearch(Block):
    version = VersionProperty("1.0.0")
    interval = TimeDeltaProperty(title="Query Interval",
                                 default={"minutes": 10})
    tweet_text = ListProperty(StringType, title="Text includes", default=[])
    hashtags = ListProperty(StringType, title="Hashtags", default=[])
    _from = StringProperty(title="From user", default='')
    _to = StringProperty(title="To user", default='')
    at = ListProperty(StringType, title="Referenced users", default=[])
    geo = ObjectProperty(GeoCode, title="Geographical")
    count = IntProperty(title="Max Results", default=25)
    lookback = IntProperty(title="Query Lookback (days)", default=-1)
    creds = ObjectProperty(TwitterCreds, title="Credentials")
    tude = SelectProperty(
        TwitterAttitude,
        default=TwitterAttitude.NEUTRAL,
        title="Tone"
    )
    operator = SelectProperty(
        TwitterQueryOp,
        default=TwitterQueryOp.AND,
        title="Query Operator"
    )
    result_type = SelectProperty(
        TwitterResultType,
        default=TwitterResultType.MIXED,
        title="Result Type"
    )

    def __init__(self):
        super().__init__()
        self._auth = None
        self._url = None
        self._search_job = None

    def configure(self, context):
        super().configure(context)

    def start(self):
        super().start()
        self._authorize()
        self._construct_url()
        self._search_job = Job(
            self._search_tweets,
            self.interval(),
            False,
            self._url
        )

    def stop(self):
        super().stop()
        self._search_job.cancel()

    def _search_tweets(self, url):
        rsp = requests.get(url, auth=self._auth)
        status = rsp.status_code
        if status == 200:
            data = rsp.json()
            tweets = data['statuses']
            next_results = data['search_metadata'].get('next_results')
            self.notify_signals([Signal(t) for t in tweets])
            if next_results is not None:
                self._search_tweets(
                    "{0}{1}".format(SEARCH_URL, next_results)
                )
            else:
                self.logger.debug("Scheduling next search...")
                self._search_job = Job(
                    self._search_tweets,
                    self.interval(),
                    False,
                    self._url
                )

        else:
            self.logger.error(
                "Twitter search failed with status {0}".format(status))

    def _construct_url(self):
        self._url = "{0}?".format(SEARCH_URL)

        query = self._process_query()

        if query:
            self._append_param('q', sep=self.operator().value, vals=query)

        if self.geo().latitude():
            self._append_param('geo', ',', 'mi',
                               [self.geo().latitude(),
                                self.geo().longitude(),
                                self.geo().radius()])

        if self.lookback() >= 0:
            now = datetime.utcnow() - timedelta(days=self.lookback())
            vals = [now.year, now.month, now.day]
            self._append_param('since', '-', vals=vals)

        if self.count():
            self._append_param('count', vals=[self.count()])

        self._append_param('result_type', vals=[self.result_type().value])

    def _append_param(self, p_name, sep='', end='', vals=[]):
        val_str = quote(sep.join([str(v) for v in vals]) + end)
        self._url += "{0}={1}&".format(p_name, val_str)

    def _process_query(self):
        values = []
        values.extend(self.tweet_text())
        for h in self.hashtags():
            values.append("#{0}".format(h))
        for u in self.at():
            values.append("@{0}".format(u))
        if self._from():
            values.append("from:{0}".format(self._from()))
        if self._to():
            values.append("to:{0}".format(self._to()))
        if self.tude().value:
            values.append(self.tude().value)
        return values

    def _authorize(self):
        """ Prepare the OAuth handshake and verify.

        """
        try:
            self._auth = OAuth1(self.creds().consumer_key(),
                                self.creds().app_secret(),
                                self.creds().oauth_token(),
                                self.creds().oauth_token_secret())
            resp = requests.get(VERIFY_CREDS_URL, auth=self._auth)
            if resp.status_code != 200:
                raise Exception("Status %s" % resp.status_code)
        except Exception as e:
            self.logger.error("Authentication Failed"
                              "for consumer key: %s" %
                              self.creds().consumer_key())
Example #15
0
class SocketIO(Retry, Block):
    """ A block for communicating with a socket.io server.

    Properties:
        host (str): location of the socket.io server.
        port (int): socket.io server port.
        room (str): socket.io room.
        content (Expression): Content to send to socket.io room.
        listen (bool): Whether or not the block should listen to messages
            FROM the SocketIo room.

    """
    version = VersionProperty('2.0.0')
    host = StringProperty(title='SocketIo Host', default="127.0.0.1")
    port = IntProperty(title='Port', default=443)
    room = StringProperty(title='Socket.io Room', default="default")
    content = Property(title='Content',
                       default="{{ json.dumps($to_dict(), default=str) }}",
                       visible=False)
    listen = BoolProperty(title="Listen to SocketIo Room", default=False)
    connect_timeout = TimeDeltaProperty(title="Connect timeout",
                                        default={"seconds": 10},
                                        visible=False)
    start_without_server = BoolProperty(title="Allow Service Start On Failed "
                                        "Connection",
                                        default=False)
    wsp = SelectProperty(WS_Protocols,
                         title="Websocket Protocol",
                         default="ws")

    def __init__(self):
        super().__init__()
        self._sid = ""
        self._hb_interval = -1  # Heartbeat interval
        self._hb_timeout = -1  # Heartbeat timeout
        self._transports = ""  # Valid transports
        self._client = None
        self._client_ready = False
        # This bounded semaphore will ensure that only one thread can be
        # connecting to the client at a time
        self._connection_semaphore = BoundedSemaphore(1)
        self._socket_url_protocol = "http"
        self._socket_url_base = ""
        self._stopping = False
        self._disconnect_thread = None

    def configure(self, context):
        super().configure(context)
        self._build_socket_url_base()
        # Connect to the socket before starting the block
        # This connection won't happen with a retry, so if the socket
        # server is not running, the connection will fail. In this case,
        # if the user has specified that the service should start anyways,
        # attempt to reconnect based off of the given retry strategy.

        try:
            self._connect_to_socket()
        except:
            if self.start_without_server():
                self.logger.info('Could not connect to web socket. Service '
                                 'will be started and this block will attempt '
                                 'to reconnect using given retry strategy.')
                self._disconnect_thread = spawn(self.handle_disconnect)
            else:
                raise

    def stop(self):
        """ Stop the block by closing the client.

        """
        self._stopping = True
        self.logger.debug("Shutting down socket.io client")

        if self._disconnect_thread:
            self._disconnect_thread.join()

        self._close_client()
        super().stop()

    def handle_disconnect(self):
        """ What to do when the client reports a problem """
        # Don't need to reconnect if we are stopping, the close was expected
        if self._stopping:
            return

        try:
            self.logger.info("Attempting to reconnect to the socket")
            self.execute_with_retry(self.reconnect_client)
        except:
            self.logger.exception("Failed to reconnect - giving up")

            status_signal = BlockStatusSignal(RunnerStatus.error,
                                              'Out of retries.')
            self.notify_management_signal(status_signal)

    def reconnect_client(self):
        # Only allow one connection at a time by wrapping this call in a
        # bounded semaphore
        self.logger.debug("Acquiring connection semaphore")
        if not self._connection_semaphore.acquire(blocking=False):
            self.logger.warning("Already reconnecting, ignoring request")
            return
        self.logger.debug("Connection semaphore acquired")
        try:
            self._close_client()
            self._connect_to_socket()
        finally:
            self.logger.debug("Releasing connection semaphore")
            self._connection_semaphore.release()

    def handle_data(self, data):
        """Handle data coming from the web socket

        data will be a dictionary, containing an event and data
        that was sent, in the form of a python dictionary.
        """
        if data.get('event', '') != 'recvData':
            # We don't care about this event, it's not data
            return
        try:
            sig = Signal(data['data'])
            self.notify_signals([sig])
        except:
            self.logger.warning("Could not parse socket data", exc_info=True)

    def _connect_to_socket(self):
        connected = Event()
        self._do_handshake()

        url = self._get_ws_url()
        self.logger.info("Connecting to %s" % url)
        self._create_client(url, connected)
        self.logger.info("Connected to socket successfully")

        # Give the client some time to report that it's connected,
        # don't return from this method until that happens
        if not connected.wait(self.connect_timeout().total_seconds()):
            self.logger.warning("Connect response not received in time")
            self._close_client()
            raise Exception("Did not connect in time")
        else:
            self._client_ready = True

    def process_signals(self, signals):
        """ Send content to the socket.io room. """

        # Don't do any processing or sending if the block is stopping.
        # The connection may be closed and we don't want to re-open
        if self._stopping:
            return

        if not self._client or not self._client_ready:
            self.logger.warning("Tried to send to a non-existent or "
                                "terminated web socket, dropping signals")
            return

        for signal in signals:
            try:
                message = self.content(signal)
                self._client.sender.send_event('pub', message)
            except:
                self.logger.exception("Could not send message")

    def _close_client(self):
        """ Safely close the client and remove the reference """
        try:
            # The client isn't ready if we're closing
            self._client_ready = False
            # Try to close the client if it's open
            if self._client:
                self._client.close()
        except:
            # If we couldn't close, it's fine. Either the client wasn't
            # opened or it didn't want to respond. That's what we get for
            # being nice and cleaning up our connection
            self.logger.info("Error closing client", exc_info=True)
        finally:
            self._client = None

    def _create_client(self, url, connected_event):
        """ Create a WS client object.

        This will close any existing clients and re-create a client
        object.

        By the time this function returns, the client is connected and
        ready to send data.
        """
        # We will only want to handle incoming data if the block
        # has been configured to do so
        if self.listen():
            data_callback = self.handle_data
        else:
            data_callback = None

        self._client = SocketIOWebSocketClient(
            url=url,
            room=self.room(),
            connect_event=connected_event,
            heartbeat_interval=self._hb_interval,
            heartbeat_timeout=self._hb_timeout,
            data_callback=data_callback,
            disconnect_callback=self.handle_disconnect,
            logger=self.logger)

        self._client.connect()

    def _build_socket_url_base(self):
        host = self.host().strip()
        # Default to http protocol
        # See if they included an http or https in front of the host,
        host_matched = re.match('^(https?)://(.*)$', host)
        if host_matched:
            self._socket_url_protocol = host_matched.group(1)
            host = host_matched.group(2)

        self._socket_url_base = "{}:{}/socket.io/".format(host, self.port())

    def _do_handshake(self):
        """ Perform the socket io handshake.

        This function will set the proper variables like heartbeat timeout
        and the sid. It will also make sure that websockets is a valid
        transport for this socket.io server.
        """
        handshake_url = self._get_handshake_url()
        self.logger.debug(
            "Making handshake request to {}".format(handshake_url))

        handshake = requests.get(handshake_url)

        if handshake.status_code != 200:
            raise Exception("Could not complete handshake: %s" %
                            handshake.text)

        self._parse_handshake_response(handshake.text)

        self.logger.debug("Handshake successful, sid=%s" % self._sid)

        # Make sure the server reports that they can handle websockets
        if 'websocket' not in self._transports:
            raise Exception("Websocket is not a valid transport for server")

    def _get_handshake_url(self):
        """ Get the URL to perform the initial handshake request to """
        return "{}://{}?transport=polling".format(self._socket_url_protocol,
                                                  self._socket_url_base)

    def _parse_handshake_response(self, resp_text):
        """ Parse a socket.io v1 handshake response.

        Expected response should look like:
            \0xxxx {"sid":"xxx", "upgrades":["websocket","polling",..],
            pingInterval:xxxx, pingTimeout:xxxx}
        """
        self.logger.debug("Parsing handshake response: {}".format(resp_text))
        matches = re.search('({.*})', resp_text)

        resp = json.loads(matches.group(1))

        self._sid = resp['sid']
        self._hb_interval = int(resp['pingInterval']) / 1000
        self._hb_timeout = int(resp['pingTimeout']) / 1000
        self._transports = resp['upgrades']

    def _get_ws_url(self):
        """ Get the websocket URL to communciate with """
        return "{}://{}?transport=websocket&sid={}".format(
            self.wsp().value, self._socket_url_base, self._sid)
Example #16
0
class NioCommand(OAuth2ServiceAccount, Block):

    version = VersionProperty("0.1.2")
    params = ListProperty(URLParameter, title="Command Parameters", default=[])
    host = StringProperty(title="n.io Host", default="[[NIOHOST]]")
    port = IntProperty(title="n.io Port", default="[[NIOPORT]]")
    service_name = Property(title="Service Name", default='')
    block_name = Property(title="Block Name (optional)", default='')
    command_name = Property(title="Command Name", default='')
    security_method = SelectProperty(SecurityMethod,
                                     default=SecurityMethod.BASIC,
                                     title='Security Method')
    basic_auth_creds = ObjectProperty(BasicAuthCreds,
                                      title='Credentials (BasicAuth)')

    # We should periodically re-authenticate with Google, this is the interval
    # to do so.
    # Ideally, we use the expiry time in the OAuth token that we get back, but
    # that will require a non-backwards compatible change to the OAuth2 mixin,
    # so for now, having an extra non-visible property will have to do
    reauth_interval = TimeDeltaProperty(title="Reauthenticate Interval",
                                        visible=False,
                                        default={'seconds':
                                                 2400})  # Default to 40 mins

    def __init__(self):
        super().__init__()
        self._access_token = None
        self._reauth_job = None

    def configure(self, context):
        super().configure(context)
        if self.security_method() == SecurityMethod.OAUTH:
            self._init_access_token()

    def process_signals(self, signals):
        output_sigs = []
        for signal in signals:
            try:
                url, headers = self._get_url(signal)
                if url:
                    resp = requests.get(url, headers=headers)
                    sigs = self._process_response(resp)
                    output_sigs.extend(sigs)
            except Exception:
                self.logger.exception('Failed to process signals')
        if output_sigs:
            self.notify_signals(output_sigs)

    def _process_response(self, resp):
        status = resp.status_code
        if status != 200:
            self.logger.error(
                "Status {0} returned while requesting : {1}".format(
                    status, resp))
        try:
            data = resp.json()
        except:
            data = resp.text
        sigs = self._build_signals(data)
        return sigs

    def _build_signals(self, data):
        sigs = []
        if isinstance(data, dict):
            sigs.append(Signal(data))
        elif isinstance(data, list):
            for d in data:
                sigs.extend(self._build_signals(d))
        else:
            sigs.append(Signal({'resp': data}))
        return sigs

    def _init_access_token(self):
        try:
            self._access_token = self.get_access_token('openid email')
            self.logger.debug("Obtained access token: {}".format(
                self._access_token))

            if self._reauth_job:
                self._reauth_job.cancel()

            # Remember to reauthenticate at a certain point if it's configured
            if self.reauth_interval().total_seconds() > 0:
                self._reauth_job = Job(self._init_access_token,
                                       self.reauth_interval(), False)

        except OAuth2Exception:
            self.logger.exception('Error obtaining access token')
            self._access_token = None

    def _get_params(self, signal):
        """ Return a dictionary of any configured URL parameters """
        params = dict()
        for param in self.params():
            try:
                params[param.prop_name(signal)] = param.prop_value(signal)
            except Exception:
                self.logger.exception('Failed to evaluate command params')
        return params

    def _get_url(self, signal):
        try:
            service = self.service_name(signal)
            block = self.block_name(signal)
            command = self.command_name(signal)
        except Exception:
            self.logger.exception('Failed to evaluate command definition')
            return None, None
        if not service or not command:
            self.logger.error(
                '`Service Name` and `Command Name` are required parameters')
            return None, None
        if not block:
            url = "http://{}:{}/services/{}/{}?{}".format(
                self.host(), self.port(), service, command,
                urlencode(self._get_params(signal)))
        else:
            url = "http://{}:{}/services/{}/{}/{}?{}".format(
                self.host(), self.port(), service, block, command,
                urlencode(self._get_params(signal)))
        headers = self._get_headers()
        self.logger.debug('Commanding: {} {}'.format(url, headers))
        return url, headers

    def _get_headers(self):
        headers = {"Content-Type": "application/json"}
        if self.security_method() == SecurityMethod.OAUTH:
            headers.update(self.get_access_token_headers())
        if self.security_method() == SecurityMethod.BASIC:
            user = '******'.format(self.basic_auth_creds().username(),
                                  self.basic_auth_creds().password())
            b64 = base64.b64encode(user.encode('ascii')).decode('ascii')
            headers.update({'Authorization': 'Basic {}'.format(b64)})
        return headers
class PubSubConnectivity(object):
    """ Adds connectivity awareness to pubsub blocks
    """
    timeout = TimeDeltaProperty(title='Connect Timeout',
                                default={'seconds': 2},
                                advanced=True)

    def __init__(self):
        super().__init__()

        # make sure it inherits from Block's root class since class assumes
        # access to notify_management_signal, status, logger, etc
        if not isinstance(self, BlockBase):
            raise ValueError(
                "PubSubConnectivity requires it's use within a Block instance")

        self._connected = None
        self._connected_lock = RLock()
        self._warning_status_set = False
        self._connected_event = Event()

    def conn_configure(self, is_connected):
        """ Sets up instance for connectivity checks

        Args:
            is_connected (callable): function to invoke to establish initial
                connectivity status
        """
        with self._connected_lock:
            connected = is_connected()
            self.logger.info("Starting in: '{}' state".format(
                "connected" if connected else "disconnected"))
            self._connected = connected

        if not connected:
            # per spec, hold the configure method hoping to get connected
            if not self._connected_event.wait(self.timeout().total_seconds()):
                self._notify_disconnection()

    def conn_on_connected(self):
        with self._connected_lock:
            # remove any possible wait for on_connected event
            self._connected_event.set()
            self._connected = True

        # if there was a warning status formerly notified then
        # notify "recovery"
        if self._warning_status_set:
            self.status.remove(RunnerStatus.warning)
            # notify status change
            signal = BlockStatusSignal(RunnerStatus.started,
                                       message="Block is connected")
            self.notify_management_signal(signal)

    def conn_on_disconnected(self):
        # ignore disconnections when stopping/stopped
        if self.status.is_set(RunnerStatus.stopping) or \
           self.status.is_set(RunnerStatus.stopped):
            return

        with self._connected_lock:
            self._connected_event.clear()
            self._connected = False
        self._notify_disconnection()

    def _notify_disconnection(self):
        with self._connected_lock:
            # double check that we are disconnected before notifying
            if not self._connected:
                signal = BlockStatusSignal(RunnerStatus.warning,
                                           message="Block is not connected")
                self.notify_management_signal(signal)
                # set block in warning status
                self.status.add(RunnerStatus.warning)
                self._warning_status_set = True
class RESTPolling(Block):
    """ A base class for blocks that poll restful web services.

    """
    polling_interval = TimeDeltaProperty(title='Polling Interval',
                                         default={"seconds": 20})
    retry_interval = TimeDeltaProperty(title='Retry Interval',
                                       default={"seconds": 60})
    queries = ListProperty(StringType, title='Query Strings', default=[])
    include_query = StringProperty(title='Include Query Field',
                                   allow_none=True)
    retry_limit = IntProperty(title='Retry Limit', default=3)

    def __init__(self):
        super().__init__()
        self._n_queries = 0
        self._url = None
        self._paging_url = None
        self._page_num = 1
        self._idx = 0
        self._poll_job = None
        self._retry_job = None
        self._retry_interval = None
        self._etags = [None]
        self._modifieds = [None]
        self._freshest = [None]
        self._prev_freshest = [None]
        self._prev_stalest = [None]
        self._curr_fresh = None
        self._curr_stale = None
        self._poll_lock = Lock()
        self._retry_count = 0
        self._auth = None
        self._recent_posts = None
        self._num_locks = 0
        self._max_locks = 5  # the max number of lock acquirers that can wait

        # this should be overridden in child blocks to refer to the actual
        # "created at" field for items returned from the particular service
        self._created_field = 'created_at'

    def configure(self, context):
        super().configure(context)
        self._authenticate()
        self._retry_interval = self.retry_interval()
        self._n_queries = len(self.queries())
        self._etags *= self._n_queries
        self._modifieds *= self._n_queries
        self._prev_freshest *= self._n_queries
        self._prev_stalest *= self._n_queries
        self._recent_posts = [None] * self._n_queries

    def start(self):
        super().start()
        if self.polling_interval().total_seconds() > 0:
            self._poll_job = Job(self.poll, self.polling_interval(), True)
            spawn(self.poll)
        else:
            self.logger.info("No poll job")

    def stop(self):
        super().stop()
        if self._poll_job is not None:
            self._poll_job.cancel()
        if self._retry_job is not None:
            self._retry_job.cancel()

    def process_signals(self, signals):
        if self._retry_job is None:
            for signal in signals:
                self.poll()
        else:
            self.logger.debug("A 'retry' is currently scheduled. "
                              "Ignoring incoming signals.")

    def poll(self, paging=False, in_retry=False):
        """ Called from user-defined block. Assumes that self.url contains
        the fully-formed endpoint intended for polling.

        Signals are notified from here.

        Args:
            paging (bool): Are we paging?
            in_retry (bool): was poll called form a retry_job

        Returns:
            None

        """
        if self._n_queries == 0:
            return

        if self._num_locks >= self._max_locks:
            self.logger.warning(
                "Currently {} locks waiting to be acquired. This is more than "
                "the max of {}. Ignoring poll".format(self._num_locks,
                                                      self._max_locks))
            return

        # Increment the number of lock waiters so we don't build up too many
        self._num_locks += 1
        with self._poll_lock:
            if self._retry_job is None or in_retry:
                self._locked_poll(paging)
            else:
                self.logger.debug("A 'retry' is already scheduled. "
                                  "Skipping this poll.")

        self._num_locks -= 1

    def _locked_poll(self, paging=False):
        """ Execute the poll, while being assured that resources are locked """

        if not paging:
            # This is the first page of a new query.
            self._recent_posts[self._idx] = {}
            self.page_num = 1

        headers = self._prepare_url(paging)
        url = self.paging_url or self.url

        self.logger.debug("{}: {}".format("Paging" if paging else "Polling",
                                          url))

        resp = self._execute_request(url, headers, paging)
        if resp is None:
            return

        self.etag = self.etag if paging else resp.headers.get('ETag')
        self.modified = self.modified if paging \
            else resp.headers.get('Last-Modified')

        try:
            if not self._validate_response(resp):
                self._on_failure(resp, paging, url)
            else:
                self._on_success(resp, paging)
        except Exception as e:
            self.logger.exception(e)
            self.logger.warning(
                "Error processing polling response: {}: {}".format(
                    type(e).__name__, str(e)))

    def _on_failure(self, resp, paging, url):
        """ This can be overridden in user-defined blocks.

        Defines how failed polling requests will be handled.

        """
        try:
            status_code = resp.status_code
            resp = resp.json()
        except:
            # Response is not json.
            # This is fine. We're just logging a warning about the resp.
            pass
        finally:
            self.logger.warning(
                "Polling request of {} returned status {}: {}".format(
                    url, status_code, resp))
            self._retry(paging)

    def _on_success(self, resp, paging):
        """ This can be overridden in user-defined blocks.

        Defines how successful polling requests will be handled.

        """
        self._reset_retry_cycle()

        signals, paging = self._process_response(resp)
        self.logger.debug('signals pre-remove-duplicates: %s' % signals)
        signals = self._discard_duplicate_posts(signals)
        self.logger.debug('signals post-remove-duplicates: %s' % signals)

        # add the include_query attribute if it is configured
        if self.include_query() and signals is not None:
            for s in signals:
                setattr(s, self.include_query(), unquote(self.current_query))

        if signals:
            self.notify_signals(signals)

        if paging:
            self.page_num += 1
            self._paging()
        else:
            self._epilogue()

    def _reset_retry_cycle(self):
        """ This can be overridden in user-defined blocks.

        Logic for cleaning up retry jobs and counters goes here.

        """
        # cancel the retry job if we were in a retry cycle
        if self._retry_job is not None:
            self._retry_job.cancel()
            self._retry_job = None
        self._retry_interval = self.retry_interval()
        # this poll was a success so reset the retry count
        self._retry_count = 0

    def _epilogue(self):
        """ This can be overridden in user-defined blocks.

        Defines behavior after a query has been fully processed,
        when we are ready for the next query. That is, when paging
        is done and retries are cleared.

        """
        if self.polling_interval().total_seconds() > 0:
            self._poll_job = self._poll_job or Job(
                self.poll, self.polling_interval(), True)
        self._increment_idx()
        if self.queries():
            self.logger.debug("Preparing to query for: %s" %
                              self.current_query)

    def _authenticate(self):
        """ This should be overridden in user-defined blocks.

        This is where an oauth handshake would take place or a url would
        be enriched with auth data.

        """
        pass

    def _validate_response(self, resp):
        """ This can be overridden in user-defined blocks.

        This is where we determine if a response is bad and we need a retry.

        Returns:
            validation (bool): True if response is good, False if bad.

        """
        return resp.status_code == 200 or resp.status_code == 304

    def _retry(self, paging):
        """

        This is where we determine what to do on a bad poll response.

        """
        self.logger.debug("Attempting to re-authenticate.")
        self._authenticate()
        self.logger.debug("Attempting to retry poll.")
        self._retry_poll(paging)

    def _prepare_url(self, paging):
        """ This should be overridden in user-defined blocks.

        Makes any necessary amendments, interpolations, etc. to self._url.

        """
        pass

    def _process_response(self, resp):
        """ This should be overridden in user-defined blocks.

        Do what thou wilt with the polling response.

        Args:
            resp (Response): A Response object (from requests lib)

        Returns:
            signals (list(Signal)): A list of signal object to notify.
            paging (dict/list/obj): Paging data, possibly None, from the
                recorded response.

        """
        pass

    def _paging(self):
        """ This can be overridden in user-defined blocks.

        Logic for handling paging situations.

        """
        # cancel the polling job while we are paging
        if self._poll_job is not None:
            self._poll_job.cancel()
            self._poll_job = None

        self._locked_poll(True)

    def _update_retry_interval(self):
        """ This should be overridden in user-defined blocks.

        Implement your retry strategy here. Exponential backoff? War?

        """
        self.logger.debug("Updating retry interval from {} to {}".format(
            self._retry_interval, self._retry_interval * 2))
        self._retry_interval *= 2

    def _retry_poll(self, paging=False):
        """ Helper method to schedule polling retries.

        """
        if self._poll_job is not None:
            self._poll_job.cancel()
            self._poll_job = None
        if self._retry_count < self.retry_limit():
            self.logger.debug("Retrying the polling job...")
            self._retry_count += 1
            self._retry_job = Job(self.poll,
                                  self._retry_interval,
                                  False,
                                  paging=paging,
                                  in_retry=True)
            self._update_retry_interval()
        else:
            self.logger.error("Out of retries. "
                              "Aborting and changing status to Error.")
            status_signal = BlockStatusSignal(RunnerStatus.error,
                                              'Out of retries.')

            # Leaving source for backwards compatibility
            # In the future, you will know that a status signal is a block
            # status signal when it contains service_name and name
            #
            # TODO: Remove when source gets added to status signals in nio
            setattr(status_signal, 'source', 'Block')

            self.notify_management_signal(status_signal)

    def update_freshness(self, posts):
        """ Bookkeeping for the state of the current query's polling.

        """
        self._curr_fresh = self.created_epoch(posts[0])
        self._curr_stale = self.created_epoch(posts[-1])
        if self._poll_job is not None:
            if self.prev_freshest is None or \
                    self.freshest > self.prev_freshest:
                self.prev_freshest = self.freshest
            self.freshest = self._curr_fresh

    def find_fresh_posts(self, posts):
        """ This can be overridden in user-defined blocks, if desired.

        Returns only those posts which were created after the newest
        post from the previous round of polling on the current query
        string.

        Note that the self.created_epoch expects dictionaries.
        Reimplement that method if you have another structure for posts.

        Args:
            posts (list(dict)): A list of posts.

        Returns:
            posts (list(dict)): The amended list of posts.

        """
        posts = [
            p for p in posts
            if self.created_epoch(p) > (self.prev_freshest or 0)
        ]
        return posts

    def _discard_duplicate_posts(self, posts):
        """ Removes sigs that were already found by another query.

        Each query acts independently so if a post matches multiple
        queries, then it will be notified for each one. This method
        keeps track of the all the most recent posts for each query
        and discards posts if they are already here.

        Args:
            posts (list(dict)): A list of posts.
            first_page (bool): True if this is the first page of query.

        Returns:
            posts (list(dict)): The amended list of posts.

        """
        # No need to try to discards posts if there is only one query.
        if self._n_queries <= 1:
            return posts

        # Return only posts that are not in self._recent_posts.
        result = []
        for post in posts:
            post_id = self._get_post_id(post)
            is_dupe = False
            valid_records = [r for r in self._recent_posts if r is not None]
            for record in valid_records:
                if post_id in record:
                    is_dupe = True
                    break

            if not post_id or not is_dupe:
                result.append(post)
                self._recent_posts[self._idx][post_id] = True

        return result

    def _get_post_id(self, post):
        """ Returns a uniquely identifying string for a post.

        This should be overridden in user-defined blocks.

        Args:
            post (dict): A post.
        Returns:
            id (string): A string that uniquely identifies a
                         post. None indicated that the post should
                         be treated as unique.
        """
        return None

    def created_epoch(self, post):
        """ Helper function to return the seconds since the epoch
        for the given post's 'created_time.

        Args:
            post (dict): Should contain a 'created_time' key.

        Returns:
            seconds (int): post[created_time] in seconds since epoch.

        """
        dt = self._parse_date(post.get(self._created_field, ''))
        return self._unix_time(dt)

    def _parse_date(self, date):
        """ Parses the service's date string format into a native datetime.

        This should be overridden in user-defined blocks.

        """
        exp = r"(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})"
        m = re.match(exp, date)
        return datetime(*[int(n) for n in m.groups(0)])

    def _execute_request(self, url, headers, paging):
        """ Execute the request, accounting for possible errors

        """
        # Requests won't generally throw exceptions, but this provides a
        # bit of convenience for the block developer.
        self.logger.debug('executing GET request with: url: %s, headers: %s, '
                          'paging: %s' % (url, headers, paging))
        resp = None
        try:
            if self._auth is not None:
                resp = requests.get(url, headers=headers, auth=self._auth)
            else:
                resp = requests.get(url, headers=headers)
        except Exception as e:
            self.logger.warning("GET request failed, details: %s" % e)

            # Use the usual retry strategy to resolve the error
            self._retry(paging)
        finally:
            return resp

    def _unix_time(self, dt):
        epoch = datetime.utcfromtimestamp(0)
        delta = dt - epoch
        return int(delta.total_seconds())

    def _increment_idx(self):
        self._idx = (self._idx + 1) % self._n_queries

    @property
    def current_query(self):
        return quote(self.queries()[self._idx])

    @property
    def url(self):
        return self._url

    @url.setter
    def url(self, url):
        self._url = url

    @property
    def paging_url(self):
        return self._paging_url

    @paging_url.setter
    def paging_url(self, url):
        self._paging_url = url

    @property
    def page_num(self):
        return self._page_num

    @page_num.setter
    def page_num(self, num):
        self._page_num = num

    @property
    def etag(self):
        return self._etags[self._idx]

    @etag.setter
    def etag(self, etag):
        self._etags[self._idx] = etag

    @property
    def modified(self):
        return self._modifieds[self._idx]

    @modified.setter
    def modified(self, modified):
        self._modifieds[self._idx] = modified

    @property
    def freshest(self):
        return self._freshest[self._idx]

    @freshest.setter
    def freshest(self, timestamp):
        self._freshest[self._idx] = timestamp

    @property
    def prev_freshest(self):
        return self._prev_freshest[self._idx]

    @prev_freshest.setter
    def prev_freshest(self, timestamp):
        self._prev_freshest[self._idx] = timestamp

    @property
    def prev_stalest(self):
        return self._prev_stalest[self._idx]

    @prev_stalest.setter
    def prev_stalest(self, timestamp):
        self._prev_stalest[self._idx] = timestamp
Example #19
0
class SignalRate(GroupBy, Persistence, Block):

    report_interval = TimeDeltaProperty(default={"seconds": 1},
                                        title="Report Interval")
    averaging_interval = TimeDeltaProperty(default={"seconds": 5},
                                           title="Averaging Interval")
    version = VersionProperty("0.1.1")

    def __init__(self):
        super().__init__()
        self._signal_counts = defaultdict(deque)
        self._signals_lock = Lock()
        self._job = None
        self._start_time = None
        self._averaging_seconds = None

    def persisted_values(self):
        """ Overridden from persistence mixin """
        return ['_start_time', '_signal_counts']

    def configure(self, context):
        super().configure(context)
        # This is just for backwards compatability with persistence
        if self._signal_counts.default_factory == list:
            self._signal_counts.default_factory = deque
            for group in self._signal_counts:
                self._signal_counts[group] = deque(self._signal_counts[group])

    def start(self):
        super().start()
        # use _start_time if it was loaded from persistence
        self._start_time = self._start_time or _time()
        self._averaging_seconds = self.averaging_interval().total_seconds()
        self._job = Job(self.report_frequency, self.report_interval(), True)

    def process_signals(self, signals, input_id='default'):
        # Record the count for each group in this list of signals
        self.for_each_group(self.record_count, signals)

    def record_count(self, signals, group):
        """ Save the time and the counts for each group received """
        with self._signals_lock:
            self._signal_counts[group].append((_time(), len(signals)))

    def report_frequency(self):
        signals = []

        self.for_each_group(self.get_frequency, sigs_out=signals)

        self.logger.debug("Current counts: {}".format(self._signal_counts))

        if signals:
            self.notify_signals(signals)

    def get_frequency(self, group, sigs_out):
        """ Get the frequency for a group and add it to sigs_out """
        with self._signals_lock:
            ctime = _time()
            self._signal_counts[group] = self.trim_old_signals(
                self._signal_counts[group], ctime)

            signals = copy(self._signal_counts[group])

        # Add up all of our current counts
        total_count = sum(grp[1] for grp in signals)

        # If we haven't reached a full period, divide by elapsed time
        rate = total_count / min(ctime - self._start_time,
                                 self._averaging_seconds)

        sigs_out.append(Signal({"group": group, "rate": rate}))

    def trim_old_signals(self, signal_counts, ctime):
        """ Take some signal counts and get rid of old ones """
        while len(signal_counts) and \
                ctime - signal_counts[0][0] >= self._averaging_seconds:
            signal_counts.popleft()
        return signal_counts

    def stop(self):
        if self._job:
            self._job.cancel()
        super().stop()
class DynamicPublisher(PubSubConnectivity, TerminatorBlock):
    version = VersionProperty("0.2.0")
    topic = StringProperty(
        title="Topic",
        default="",
        order=0)

    ttl = TimeDeltaProperty(
        title="Time-to-live",
        advanced=True,
        order=0,
        default=dict(seconds=600))
    is_local = BoolProperty(
        advanced=True,
        default=False,
        order=1,
        title="Local Publisher?")
    local_identifier = StringProperty(
        advanced=True,
        default='[[INSTANCE_ID]]',
        order=2,
        title='Local Identifier')

    def __init__(self):
        super().__init__()
        self._cache = keydefaultdict(lambda topic: (self.__create_publisher(topic), None))
        self._cache_lock = Lock()
        self._is_local = False
        self._local_id = None

    def configure(self, context):
        super().configure(context)
        self._is_local = self.is_local()
        if self._is_local:
            self._local_id = self.local_identifier()

    def stop(self):
        with self._cache_lock:
            for topic in self._cache:
                (pub, job) = self._cache[topic]
                if job is not None:
                    job.cancel()
                pub.close()

            self._cache.clear()

    def process_signals(self, in_signals):
        """ Publish each group of signals """
        ttl = self.ttl()
        groups = defaultdict(list)

        for signal in in_signals:
            try:
                topic = self.topic(signal)
                if self._is_local and self._local_id:
                    topic = '{}.{}'.format(self._local_id, topic)
            except Exception:
                self.logger.exception('topic expression failed, ignoring signal')
                continue
            groups[topic].append(signal)

        for topic, out_signals in groups.items():
            try:
                if self._is_local:
                    out_signals = [Signal({"signals": b64encode(pickle.dumps(out_signals))})]

                self.__get_publisher(topic, ttl).send(out_signals)
            except pickle.PicklingError:
                self.logger.exception("Pickling based pickle error")
            except TypeError:
                self.logger.exception("Unable to encode pickled signals")
            except PublisherError:  # pragma no cover
                self.logger.exception('Error publishing {:n} signals to "{}"'.format(len(out_signals), topic))
            except:
                self.logger.exception("Error processing signals")

    def __close_publisher(self, topic):
        with self._cache_lock:
            self.logger.info('removing expired publisher for "{}"'.format(topic))
            pub, _ = self._cache.pop(topic)
            pub.close()

    def __create_publisher(self, topic):
        self.logger.info('creating new publisher for "{}"'.format(topic))
        publisher = Publisher(topic=topic)

        try:
            publisher.open(
                on_connected=self.conn_on_connected,
                on_disconnected=self.conn_on_disconnected)
        except TypeError as e:
            self.logger.warning(
                'Connecting to an outdated communication module')
            # try previous interface
            publisher.open()
            # no need to configure connectivity if not supported
            return publisher

        self.conn_configure(publisher.is_connected)
        return publisher

    def __get_publisher(self, topic, ttl):
        with self._cache_lock:
            publisher, prev_job = self._cache[topic]
            if prev_job is not None:
                prev_job.cancel()

            job = (Job(
                self.__close_publisher,
                ttl,
                False,
                topic,
            ) if ttl.total_seconds() >= 0 else None)

            self._cache[topic] = (publisher, job)

            return publisher