class Stagger(Block): version = VersionProperty("1.0.1") period = TimeDeltaProperty(title='Period', default={"seconds": 1}) min_interval = TimeDeltaProperty(title='Minimum Interval', advanced=True, default={"microseconds": 100000}) def process_signals(self, signals, input_id=None): stagger_period = self._get_stagger_period(len(signals)) self.logger.debug("{} signals received, notifying every {}".format( len(signals), stagger_period)) # Launch the notification mechanism in a new thread so that it can # sleep between notifications stagger_data = StaggerData( stagger_period, math.ceil(self.period() / stagger_period), signals, self.notify_signals, self.logger, ) stagger_data.start_notify() def _get_stagger_period(self, num_signals): """ Returns the stagger period based on a number of signals """ return max(self.period() / num_signals, self.min_interval())
class RethinkDBBase(LimitLock, Retry, Block): """ A block for communicating with a RethinkDB server. Properties: host (str): server host to connect to port (int): port on the server host, default rethink port is 28015 database_name (str): database name to access connect_timeout (interval): time to wait for a successful connection """ version = VersionProperty('1.0.0') host = StringProperty(title='Host', default='[[RETHINKDB_HOST]]') port = IntProperty(title='Port', default='[[RETHINKDB_PORT]]') database_name = StringProperty(title='DB name', default='test') connect_timeout = TimeDeltaProperty(title="Connect timeout", default={"seconds": 20}, visible=False) def process_signals(self, signals): self.execute_with_lock(self._locked_process_signals, 10, signals=signals) def _locked_process_signals(self, signals): pass
class SignalRepeater(GroupBy, Block): version = VersionProperty('0.1.1') max_repeats = IntProperty(title='Max Repeats', default=-1) interval = TimeDeltaProperty(title='Repeat Interval', default={'seconds': 10}) def configure(self, context): super().configure(context) self.notifications = defaultdict(dict) self._group_locks = defaultdict(Lock) def stop(self): for group in copy(self.notifications): self._cancel_group_job(group) super().stop() def _cancel_group_job(self, group): job = self.notifications[group].get('job') if job: self.logger.debug("Cancelling job for group {}".format(group)) job.cancel() del self.notifications[group] def process_group_signals(self, signals, group, input_id='repeat'): if input_id == 'cancel': self._cancel_group_job(group) return if len(signals) == 0: return signal = signals[-1] repeats_remaining = self.max_repeats(signal) with self._group_locks[group]: self._cancel_group_job(group) if repeats_remaining == 0: # They don't want to repeat, ignore return self.logger.debug("Setting up repeat for group {}".format(group)) self.notifications[group]['signal'] = signal self.notifications[group]['num_remaining'] = repeats_remaining self.notifications[group]['job'] = Job(target=self.notify_group, delta=self.interval(signal), repeatable=True, group=group) def notify_group(self, group): with self._group_locks[group]: notification = self.notifications[group] if notification.get('num_remaining', 0) != 0: notification['num_remaining'] -= 1 self.logger.debug( "Notifying signal for group {}, {} remaining".format( group, notification['num_remaining'])) self.notify_signals([notification['signal']]) else: self._cancel_group_job(group)
class SafeTrigger(): """ Guarantees notifying signals every interval, regardless of count """ interval = TimeDeltaProperty(title='Interval', default={'seconds': 1}, order=0) max_count = IntProperty(title='Max Count', default=1, order=1) def __init__(self): super().__init__() self._job = None self.stop_event = Event() self.signal_lock = Lock() def start(self): super().start() self._job = Job(self._emit, self.interval(), True) # Run an emit cycle immediately, but in a new thread since it # might take some time and we don't want it to hold up start spawn(self._emit) def stop(self): """ Stop the simulator thread and signal generation """ if self._job: self._job.cancel() self.stop_event.set() super().stop() def _emit(self): """ Called every *interval* to generate then notify the signals """ self.logger.debug("New generation cycle requested") count = 0 signals = [] # Stop any currently running simulator threads self.stop_event.set() # We only want one simulator thread simulating at a time with self.signal_lock: # Ok, we're running, so clear the event and wait self.stop_event.clear() self.logger.debug("Starting generation...") while count < self.max_count() and not self.stop_event.is_set(): signals.extend(self.generate_signals(1)) count += 1 self.logger.debug("Notifying {} signals".format(len(signals))) self.notify_signals(signals)
class IntervalTrigger(): """Generate signals at a regular interval up to total_signals""" total_signals = IntProperty(title="Total Number of Signals", default=-1, order=4) interval = TimeDeltaProperty(title='Interval', default={'seconds': 1}, order=0) def __init__(self): super().__init__() self.counter = None self._job = None def start(self): super().start() self.counter = 0 # Schedule interval simulations for the future self._job = Job(self._simulate, self.interval(), True) # But also simulate right away self._simulate() def _simulate(self): sigs = self.generate_signals() # If a generator is returned, build the list if not isinstance(sigs, list): sigs = list(sigs) # Add however many signals were generated (in case multiple # signals mixin was used) to the counter and notify them self.counter += len(sigs) # self.counter - self.total_signals() yield that amount of signals that # should be removed if self.counter > self.total_signals() and self.total_signals() >= 0: sigs_to_remove = self.counter - self.total_signals() sigs = sigs[:-1 * sigs_to_remove] self.notify_signals(sigs) if self.total_signals() > 0 and \ self.counter >= self.total_signals(): self._job.cancel() def stop(self): """ Stop the simulator thread. """ self._job.cancel() super().stop()
class Debounce(GroupBy, Block): interval = TimeDeltaProperty(title='Debounce Interval', default={ 'days': 0, 'seconds': 1, 'microseconds': 0 }) version = VersionProperty("0.1.1") def __init__(self): super().__init__() self._last_emission = defaultdict(lambda: None) def process_group_signals(self, signals, group, input_id): """Check configured interval and return a signal if valid.""" now = datetime.utcnow() if self._last_emission[group] is None or \ now - self._last_emission[group] > self.interval(signals[-1]): self._last_emission[group] = now return signals[:1] else: return []
class FacebookFeed(RESTPolling): """ This block polls the Facebook Graph API, using the feed endpoint Params: phrase (str): The phrase with which to search posts. Need not be url-quoted. limit (int): Maximum number of posts contained in each response. lookback (timedelta): Initial window of desirable posts (for the very first request. """ URL_FORMAT = ("https://graph.facebook.com/v2.2/" "{}/{}?since={}&limit={}") TOKEN_URL_FORMAT = ("https://graph.facebook.com/oauth" "/access_token?client_id={0}&client_secret={1}" "&grant_type=client_credentials") creds = ObjectProperty(Creds, title='Credentials', default=Creds()) lookback = TimeDeltaProperty(title='Lookback', default={"seconds": 0}) limit = IntProperty(title='Limit (per poll)', default=10) feed_type = SelectProperty(FeedType, default=FeedType.FEED, title='Feed Type') version = VersionProperty("1.0.2") def __init__(self): super().__init__() self._url = None self._paging_field = "paging" self._created_field = "created_time" self._access_token = None def configure(self, context): super().configure(context) lb = self._unix_time(datetime.utcnow() - self.lookback()) self._freshest = [lb] * self._n_queries def _authenticate(self): """ Overridden from the RESTPolling block. Generates and records the access token for pending requests. """ if self.creds().consumer_key() is None or \ self.creds().app_secret() is None: self.logger.error("You need a consumer key and app secret, yo") else: self._access_token = self._request_access_token() def _process_response(self, resp): """ Extract fresh posts from the Facebook graph api response object. Args: resp (Response) Returns: signals (list(Signal)): The list of signals to notify, each of which corresponds to a fresh FB post. paging (bool): Denotes whether or not paging requests are necessary. """ signals = [] resp = resp.json() fresh_posts = posts = resp['data'] paging = resp.get(self._paging_field) is not None self.logger.debug("Facebook response contains %d posts" % len(posts)) # we shouldn't see empty responses, but we'll protect our necks. if len(posts) > 0: self.update_freshness(posts) fresh_posts = self.find_fresh_posts(posts) paging = len(fresh_posts) == self.limit() # store the timestamp of the oldest fresh post for use in url # preparation later. if len(fresh_posts) > 0: self.prev_stalest = self.created_epoch(fresh_posts[-1]) signals = [FacebookSignal(p) for p in fresh_posts] self.logger.debug("Found %d fresh posts" % len(signals)) return signals, paging def _request_access_token(self): """ Request an access token directly from facebook. Args: None Returns: token (str): The access token, which goes on the end of a request. """ resp = requests.get( self.TOKEN_URL_FORMAT.format(self.creds().consumer_key(), self.creds().app_secret())) status = resp.status_code # If the token request fails, try to use the configured app id # and secret. This probably won't work, but the docs say that it # should. for more info, see: # https://developers.facebook.com/docs/facebook-login/access-tokens token = "%s|%s" % (self.creds().consumer_key(), self.creds().app_secret()) if status == 200: token = resp.text.split('access_token=')[1] else: self.logger.error("Facebook token request failed with status %d" % status) return token def _prepare_url(self, paging=False): """ Overridden from RESTPolling block. Appends the access token to the format string and builds the headers dictionary. If paging, we do some string interpolation to get our arguments into the request url. Otherwise, we append the until parameter to the end. Args: paging (bool): Are we paging? Returns: headers (dict): Contains the (case sensitive) http headers. """ headers = {"Content-Type": "application/json"} fmt = "%s&access_token=%s" % (self.URL_FORMAT, self._access_token) if not paging: self.paging_url = None feed_type = self.feed_type().value self.url = fmt.format(self.current_query, feed_type, self.freshest - 2, self.limit()) else: self.paging_url = "%s&until=%d" % (self.url, self.prev_stalest) return headers def _on_failure(self, resp, paging, url): execute_retry = True try: status_code = resp.status_code resp = resp.json() err_code = resp.get('error', {}).get('code') if (status_code == 404 and err_code in [803, 2500] or status_code == 500 and err_code == 2): # Page feed requests require only an access token [1] but user # feed requsts require a user access token with read_stream # permission [2]. # [1]: https://developers.facebook.com/docs/graph-api/ # reference/v2.2/page/feed # [2]: https://developers.facebook.com/docs/graph-api/ # reference/v2.2/user/feed self.logger.warning("Skipping feed: {}".format( self.current_query)) execute_retry = False self._increment_idx() finally: self.logger.error( "Polling request of {} returned status {}: {}".format( url, status_code, resp)) if execute_retry: self._retry(paging)
class AccelerometerChip(Block): """ A block enriches incoming signals with the current values of a set of input pins. """ version = VersionProperty("0.1.2") signal_name = StringProperty(title="Name", default="value") address = IntProperty(default=0x53, title="Address") chip = SelectProperty(ChipTypes, title="Chip", default=ChipTypes.ADXL345) interval = TimeDeltaProperty(title="Sampling Period", default={"microseconds": 50000}) sample = SelectProperty(SampleTypes, title="Sample Type", default=SampleTypes.Stats) range = SelectProperty(Ranges, title="G Range", default=Ranges._2G) def configure(self, context): super().configure(context) if self.chip() == ChipTypes.ADXL345: obj = get_adxl345() self._accel = obj(self.address()) self._accel.set_range(self.range().value) self._job = None if self.sample() != SampleTypes.Last: self._samples = [] self._thread = threading.Thread(target=self._sample_threaded) self._kill = False self._thread.start() def stop(self): super().stop() self._kill = True def _sample_threaded(self): sleeptime = self.interval().seconds \ + self.interval().microseconds * 1e-6 while not self._kill: self._sample() time.sleep(sleeptime) def _sample(self): self._samples.append(self._accel.read()) def process_signals(self, signals): if self.sample() == SampleTypes.Last: value = self._accel.read() gval = sum(n**2 for n in value) gval = math.sqrt(gval) value = { "last": value, "last_magnitude": gval, } else: samples = [] pop = self._samples.pop # get data in threadsafe way while self._samples: samples.append(pop(0)) if not samples: self.logger.error("Accelerometer has no samples!") return x, y, z = zip(*samples) # get gs squared x_gs = map(math.pow, x, itertools.repeat(2)) y_gs = map(math.pow, y, itertools.repeat(2)) z_gs = map(math.pow, z, itertools.repeat(2)) # add them together x^2 + y^2 + z^2 sample_gs = map(sum, zip(x_gs, y_gs, z_gs)) # take their sqare root to get the vector value sample_gs = tuple(map(math.sqrt, sample_gs)) max_g = max(sample_gs) min_g = min(sample_gs) max_i = sample_gs.index(max_g) min_i = sample_gs.index(min_g) mean_gs = statistics.mean(sample_gs) if len(sample_gs) >= 2: stdev_gs = statistics.stdev(sample_gs, mean_gs) else: stdev_gs = None value = { "max": samples[max_i], "min": samples[min_i], "mean": mean_gs, "stdev": stdev_gs, "last": samples[-1], "max_magnitude": max_g, "min_magnitude": min_g, "last_magnitude": sample_gs[-1] } name = self.signal_name() for s in signals: setattr(s, name, value) self.notify_signals(signals)
class ControlBands(GroupBy, Persistence, Block): band_interval = TimeDeltaProperty(default={"days": 1}, title="Band Interval") value_expr = Property(default="{{ $value }}", title="Value") version = VersionProperty("1.0.2") def __init__(self): super().__init__() self._band_values = defaultdict(list) self._signals_lock = Lock() def process_signals(self, signals, input_id='default'): sigs_out = self.for_each_group(self.record_values, signals) if sigs_out: self.notify_signals(sigs_out) def persisted_values(self): """ Overridden from persistence mixin """ return ['_band_values'] def record_values(self, signals, group): """ Save the time and the list of signals for each group. This will return signals with the mean/band data included on them """ sigs_out = [] with self._signals_lock: ctime = _time() # First get rid of the old values self.trim_old_values(group, ctime) prev_values = self._get_current_values(group) self.logger.debug( "Previous values for group: {}".format(prev_values)) # Start off a new band data using the latest value from the # previous band data objects new_values = BandData(prev_values.last_val) for sig in signals: try: # the value must be a floating point value value = float(self.value_expr(sig)) # Add the moving range data to the signal and add it to # the list of signals to notify sigs_out.append( self._enrich_signal(sig, prev_values + new_values, value)) # Now account for the latest value in the moving range data new_values.register_value(value) except: self.logger.exception( "Unable to determine value for signal {}".format(sig)) # Archive the new values if new_values.count_items: self._band_values[group].append((ctime, new_values)) return sigs_out def _enrich_signal(self, signal, band_data, value): """ Add relevant band data to the signal. Args: signal: The signal that we should add data to band_data (BandData): A single BandData object containing the current moving range information value: The value this signal contributed to the band data. This is used to determine how many deviations from the mean it is. Returns: The signal with updated data """ range_mean = band_data.get_mean() range_deviation = band_data.get_range() if range_deviation != 0: deviations = (value - range_mean) / range_deviation else: deviations = 0 class BandSignalData(): def __init__(self, value, mean, deviation, deviations): self.value = value self.mean = mean self.deviation = deviation self.deviations = deviations def to_dict(self): """represent all BandSignalData attributes as a dict""" return self.__dict__ setattr( signal, 'band_data', BandSignalData(value, range_mean, range_deviation, deviations).to_dict()) return signal def _get_current_values(self, group): """ Returns a single BandData object for a group. This will make use of the __add__ function in the BandData class to sum together all of the current data points in the group. The result will be a single BandData object with all of the previously saved points accounted for. """ cur_values = self._band_values[group] if len(cur_values) > 1: # Sum every BandData (after the first), using the first one as # the starting point return sum([data[1] for data in cur_values[1:]], cur_values[0][1]) elif len(cur_values) == 1: return cur_values[0][1] else: return BandData() def trim_old_values(self, group, ctime): """ Remove any "old" saved values for a given group """ group_values = self._band_values[group] self.logger.debug("Trimming old values - had {} items".format( len(group_values))) group_values[:] = [ data for data in group_values if data[0] > ctime - self.band_interval().total_seconds() ] self.logger.debug("Now has {} items".format(len(group_values)))
class Interval(PropertyHolder): interval = TimeDeltaProperty(title='Interval', default={}) repeatable = BoolProperty(title='Repeatable', default=False)
class SlidingWindow(GroupBy, Block): """Creates a sliding window of signals. Examples: { min_signals = 1, max_signals = 3 } input: ----1------2--------3--------4--------5--> output: ----•------•--------•--------•--------•--> [1] [1,2] [1,2,3] [2,3,4] [3,4,5] { min_signals = 3, max_signals = 3 } input: ----1------2--------3--------4--------5--> output: --------------------•--------•--------•--> [1,2,3] [2,3,4] [3,4,5] { min_signals = 1, max_signals = 3, expiration: { millseconds: 500 } } input: ----1------2--------3--------4--| >500ms |---5--> output: ----•------•--------•--------•--| |---•--> [1] [1,2] [1,2,3] [2,3,4] [5] """ """TODO - [x] Window Expriation - [ ] Implement Group - [ ] Implement Persistence - [ ] Use Signal Expiration """ version = VersionProperty("0.0.1") min_signals = IntProperty(default=1, title='Min Signals') max_signals = IntProperty(default=20, title='Max Signals') expiration = TimeDeltaProperty(title='Window Expiration', allow_none=True) def __init__(self): super().__init__() self._buffers = defaultdict(list) self._last_recv = defaultdict(lambda : datetime.min) def expire(self): self.logger.debug('Clearing the buffer window') self._buffers.clear() def process_group_signals(self, signals, group, input_id=None): now = datetime.utcnow() hasExpiration = self.expiration() is not None if hasExpiration and (self._last_recv[group] + self.expiration()) < now: self.logger.debug('The buffer window has expired') self._buffers[group].clear() self._last_recv[group] = now for signal in signals: self._buffers[group].append(signal) del self._buffers[group][:-self.max_signals()] if len(self._buffers[group]) >= self.min_signals(): return self._buffers[group]
class Buffer(Persistence, GroupBy, Block): version = VersionProperty("0.1.1") signal_start = BoolProperty(title='Start Interval On Signal?', default=False) interval = TimeDeltaProperty(title='Buffer Interval', default={'seconds': 1}, allow_none=True) interval_duration = TimeDeltaProperty(title='Interval Duration', allow_none=True) def __init__(self): super().__init__() self._last_emission = None self._cache = defaultdict(lambda: defaultdict(list)) self._cache_lock = Lock() self._emission_job = None self._active_job = False def persisted_values(self): return ['_last_emission', '_cache'] def start(self): # Start emission job on service start if bool property is not checked if self.interval() and not self.signal_start(): now = datetime.utcnow() latest = self._last_emission or now delta = self.interval() - (now - latest) self._emission_job = Job( self._emit_job, delta, False, group=None, reset=True, ) def emit(self, group=None): self._emit_job(group) def _emit_job(self, group, reset=False): self.logger.debug('Emitting signals') if reset: self._emission_job.cancel() self._emission_job = Job( self._emit_job, self.interval(), True, group=group, ) self._last_emission = datetime.utcnow() signals = self._get_emit_signals(group) self._active_job = False if signals: self.logger.debug('Notifying {} signals'.format(len(signals))) self.notify_signals(signals) else: self.logger.debug('No signals to notify') def _get_emit_signals(self, group=None): signals = [] with self._cache_lock: if not group: for group in self._cache.keys(): signals.extend(self._get_emit_signals_for_group(group)) else: signals.extend(self._get_emit_signals_for_group(group)) return signals def _get_emit_signals_for_group(self, group): now = int(time()) signals = [] cache_times = sorted(self._cache[group].keys()) if self.interval_duration(): # Remove old signals from cache. old = now - int(self.interval_duration().total_seconds()) self.logger.debug( 'Removing signals from cache older than {}'.format(old)) for cache_time in cache_times: if cache_time < old: del self._cache[group][cache_time] else: break for cache in cache_times: signals.extend(self._cache[group][cache]) if not self.interval_duration(): # Clear cache every time if duration is not set. self.logger.debug('Clearing cache of signals') self._cache[group] = defaultdict(list) return signals def process_signals(self, signals): self.for_each_group(self.process_group, signals) # Start a new job if property is checked and there is no active job if self.signal_start() and not self._active_job: self._emission_job = Job( self._emit_job, self.interval(), False, group=None, reset=False, ) self._active_job = True # Added flag for active job def process_group(self, signals, key): with self._cache_lock: now = int(time()) self._cache[key][now].extend(signals)
class MergeStreams(Persistence, GroupBy, Block): """ Take two input streams and combine signals together. """ expiration = TimeDeltaProperty(default={}, title="Stream Expiration") notify_once = BoolProperty(default=True, title="Notify Once?") version = VersionProperty('0.1.0') def _default_signals_dict(self): return {"input_1": {}, "input_2": {}} def _default_expiration_jobs_dict(self): return {"input_1": None, "input_2": None} def __init__(self): super().__init__() self._signals = defaultdict(self._default_signals_dict) self._signals_lock = defaultdict(Lock) self._expiration_jobs = defaultdict(self._default_expiration_jobs_dict) def persisted_values(self): """Persist signals only when no expiration (ttl) is configured. Signals at each input will be persisted between block restarts except when an expiration is configured. TODO: Improve this feature so signals are always persisted and then properly removed after loaded and the expiration has passed. """ if self.expiration(): return [] else: return ["_signals"] def process_group_signals(self, signals, group, input_id): merged_signals = [] with self._signals_lock[group]: for signal in signals: self._signals[group][input_id] = signal signal1 = self._signals[group]["input_1"] signal2 = self._signals[group]["input_2"] if signal1 and signal2: merged_signal = self._merge_signals(signal1, signal2) merged_signals.append(merged_signal) if self.notify_once(): self._signals[group]["input_1"] = {} self._signals[group]["input_2"] = {} if self.expiration(): self._schedule_signal_expiration_job(group, input_id) return merged_signals def _merge_signals(self, signal1, signal2): """ Merge signals 1 and 2 and clear from memory if only notify once """ sig_1_dict = signal1.to_dict() sig_2_dict = signal2.to_dict() self._fix_to_dict_hidden_attr_bug(sig_1_dict) self._fix_to_dict_hidden_attr_bug(sig_2_dict) merged_signal_dict = {} merged_signal_dict.update(sig_1_dict) merged_signal_dict.update(sig_2_dict) return Signal(merged_signal_dict) def _fix_to_dict_hidden_attr_bug(self, signal_dict): """ Remove special attributes from dictionary n.io has a bug when using Signal.to_dict(hidden=True). It should include private attributes (i.e. attributes starting withe '_') but not special attributes (i.e. attributes starting with '__'). """ for key in list(signal_dict.keys()): if key.startswith('__'): del signal_dict[key] def _schedule_signal_expiration_job(self, group, input_id): """ Schedule expiration job, cancelling existing job first """ if self._expiration_jobs[group][input_id]: self._expiration_jobs[group][input_id].cancel() self._expiration_jobs[group][input_id] = Job( self._signal_expiration_job, self.expiration(), False, group, input_id) def _signal_expiration_job(self, group, input_id): self._signals[group][input_id] = {} self._expiration_jobs[group][input_id] = None
class TwitterSearch(Block): version = VersionProperty("1.0.0") interval = TimeDeltaProperty(title="Query Interval", default={"minutes": 10}) tweet_text = ListProperty(StringType, title="Text includes", default=[]) hashtags = ListProperty(StringType, title="Hashtags", default=[]) _from = StringProperty(title="From user", default='') _to = StringProperty(title="To user", default='') at = ListProperty(StringType, title="Referenced users", default=[]) geo = ObjectProperty(GeoCode, title="Geographical") count = IntProperty(title="Max Results", default=25) lookback = IntProperty(title="Query Lookback (days)", default=-1) creds = ObjectProperty(TwitterCreds, title="Credentials") tude = SelectProperty( TwitterAttitude, default=TwitterAttitude.NEUTRAL, title="Tone" ) operator = SelectProperty( TwitterQueryOp, default=TwitterQueryOp.AND, title="Query Operator" ) result_type = SelectProperty( TwitterResultType, default=TwitterResultType.MIXED, title="Result Type" ) def __init__(self): super().__init__() self._auth = None self._url = None self._search_job = None def configure(self, context): super().configure(context) def start(self): super().start() self._authorize() self._construct_url() self._search_job = Job( self._search_tweets, self.interval(), False, self._url ) def stop(self): super().stop() self._search_job.cancel() def _search_tweets(self, url): rsp = requests.get(url, auth=self._auth) status = rsp.status_code if status == 200: data = rsp.json() tweets = data['statuses'] next_results = data['search_metadata'].get('next_results') self.notify_signals([Signal(t) for t in tweets]) if next_results is not None: self._search_tweets( "{0}{1}".format(SEARCH_URL, next_results) ) else: self.logger.debug("Scheduling next search...") self._search_job = Job( self._search_tweets, self.interval(), False, self._url ) else: self.logger.error( "Twitter search failed with status {0}".format(status)) def _construct_url(self): self._url = "{0}?".format(SEARCH_URL) query = self._process_query() if query: self._append_param('q', sep=self.operator().value, vals=query) if self.geo().latitude(): self._append_param('geo', ',', 'mi', [self.geo().latitude(), self.geo().longitude(), self.geo().radius()]) if self.lookback() >= 0: now = datetime.utcnow() - timedelta(days=self.lookback()) vals = [now.year, now.month, now.day] self._append_param('since', '-', vals=vals) if self.count(): self._append_param('count', vals=[self.count()]) self._append_param('result_type', vals=[self.result_type().value]) def _append_param(self, p_name, sep='', end='', vals=[]): val_str = quote(sep.join([str(v) for v in vals]) + end) self._url += "{0}={1}&".format(p_name, val_str) def _process_query(self): values = [] values.extend(self.tweet_text()) for h in self.hashtags(): values.append("#{0}".format(h)) for u in self.at(): values.append("@{0}".format(u)) if self._from(): values.append("from:{0}".format(self._from())) if self._to(): values.append("to:{0}".format(self._to())) if self.tude().value: values.append(self.tude().value) return values def _authorize(self): """ Prepare the OAuth handshake and verify. """ try: self._auth = OAuth1(self.creds().consumer_key(), self.creds().app_secret(), self.creds().oauth_token(), self.creds().oauth_token_secret()) resp = requests.get(VERIFY_CREDS_URL, auth=self._auth) if resp.status_code != 200: raise Exception("Status %s" % resp.status_code) except Exception as e: self.logger.error("Authentication Failed" "for consumer key: %s" % self.creds().consumer_key())
class SocketIO(Retry, Block): """ A block for communicating with a socket.io server. Properties: host (str): location of the socket.io server. port (int): socket.io server port. room (str): socket.io room. content (Expression): Content to send to socket.io room. listen (bool): Whether or not the block should listen to messages FROM the SocketIo room. """ version = VersionProperty('2.0.0') host = StringProperty(title='SocketIo Host', default="127.0.0.1") port = IntProperty(title='Port', default=443) room = StringProperty(title='Socket.io Room', default="default") content = Property(title='Content', default="{{ json.dumps($to_dict(), default=str) }}", visible=False) listen = BoolProperty(title="Listen to SocketIo Room", default=False) connect_timeout = TimeDeltaProperty(title="Connect timeout", default={"seconds": 10}, visible=False) start_without_server = BoolProperty(title="Allow Service Start On Failed " "Connection", default=False) wsp = SelectProperty(WS_Protocols, title="Websocket Protocol", default="ws") def __init__(self): super().__init__() self._sid = "" self._hb_interval = -1 # Heartbeat interval self._hb_timeout = -1 # Heartbeat timeout self._transports = "" # Valid transports self._client = None self._client_ready = False # This bounded semaphore will ensure that only one thread can be # connecting to the client at a time self._connection_semaphore = BoundedSemaphore(1) self._socket_url_protocol = "http" self._socket_url_base = "" self._stopping = False self._disconnect_thread = None def configure(self, context): super().configure(context) self._build_socket_url_base() # Connect to the socket before starting the block # This connection won't happen with a retry, so if the socket # server is not running, the connection will fail. In this case, # if the user has specified that the service should start anyways, # attempt to reconnect based off of the given retry strategy. try: self._connect_to_socket() except: if self.start_without_server(): self.logger.info('Could not connect to web socket. Service ' 'will be started and this block will attempt ' 'to reconnect using given retry strategy.') self._disconnect_thread = spawn(self.handle_disconnect) else: raise def stop(self): """ Stop the block by closing the client. """ self._stopping = True self.logger.debug("Shutting down socket.io client") if self._disconnect_thread: self._disconnect_thread.join() self._close_client() super().stop() def handle_disconnect(self): """ What to do when the client reports a problem """ # Don't need to reconnect if we are stopping, the close was expected if self._stopping: return try: self.logger.info("Attempting to reconnect to the socket") self.execute_with_retry(self.reconnect_client) except: self.logger.exception("Failed to reconnect - giving up") status_signal = BlockStatusSignal(RunnerStatus.error, 'Out of retries.') self.notify_management_signal(status_signal) def reconnect_client(self): # Only allow one connection at a time by wrapping this call in a # bounded semaphore self.logger.debug("Acquiring connection semaphore") if not self._connection_semaphore.acquire(blocking=False): self.logger.warning("Already reconnecting, ignoring request") return self.logger.debug("Connection semaphore acquired") try: self._close_client() self._connect_to_socket() finally: self.logger.debug("Releasing connection semaphore") self._connection_semaphore.release() def handle_data(self, data): """Handle data coming from the web socket data will be a dictionary, containing an event and data that was sent, in the form of a python dictionary. """ if data.get('event', '') != 'recvData': # We don't care about this event, it's not data return try: sig = Signal(data['data']) self.notify_signals([sig]) except: self.logger.warning("Could not parse socket data", exc_info=True) def _connect_to_socket(self): connected = Event() self._do_handshake() url = self._get_ws_url() self.logger.info("Connecting to %s" % url) self._create_client(url, connected) self.logger.info("Connected to socket successfully") # Give the client some time to report that it's connected, # don't return from this method until that happens if not connected.wait(self.connect_timeout().total_seconds()): self.logger.warning("Connect response not received in time") self._close_client() raise Exception("Did not connect in time") else: self._client_ready = True def process_signals(self, signals): """ Send content to the socket.io room. """ # Don't do any processing or sending if the block is stopping. # The connection may be closed and we don't want to re-open if self._stopping: return if not self._client or not self._client_ready: self.logger.warning("Tried to send to a non-existent or " "terminated web socket, dropping signals") return for signal in signals: try: message = self.content(signal) self._client.sender.send_event('pub', message) except: self.logger.exception("Could not send message") def _close_client(self): """ Safely close the client and remove the reference """ try: # The client isn't ready if we're closing self._client_ready = False # Try to close the client if it's open if self._client: self._client.close() except: # If we couldn't close, it's fine. Either the client wasn't # opened or it didn't want to respond. That's what we get for # being nice and cleaning up our connection self.logger.info("Error closing client", exc_info=True) finally: self._client = None def _create_client(self, url, connected_event): """ Create a WS client object. This will close any existing clients and re-create a client object. By the time this function returns, the client is connected and ready to send data. """ # We will only want to handle incoming data if the block # has been configured to do so if self.listen(): data_callback = self.handle_data else: data_callback = None self._client = SocketIOWebSocketClient( url=url, room=self.room(), connect_event=connected_event, heartbeat_interval=self._hb_interval, heartbeat_timeout=self._hb_timeout, data_callback=data_callback, disconnect_callback=self.handle_disconnect, logger=self.logger) self._client.connect() def _build_socket_url_base(self): host = self.host().strip() # Default to http protocol # See if they included an http or https in front of the host, host_matched = re.match('^(https?)://(.*)$', host) if host_matched: self._socket_url_protocol = host_matched.group(1) host = host_matched.group(2) self._socket_url_base = "{}:{}/socket.io/".format(host, self.port()) def _do_handshake(self): """ Perform the socket io handshake. This function will set the proper variables like heartbeat timeout and the sid. It will also make sure that websockets is a valid transport for this socket.io server. """ handshake_url = self._get_handshake_url() self.logger.debug( "Making handshake request to {}".format(handshake_url)) handshake = requests.get(handshake_url) if handshake.status_code != 200: raise Exception("Could not complete handshake: %s" % handshake.text) self._parse_handshake_response(handshake.text) self.logger.debug("Handshake successful, sid=%s" % self._sid) # Make sure the server reports that they can handle websockets if 'websocket' not in self._transports: raise Exception("Websocket is not a valid transport for server") def _get_handshake_url(self): """ Get the URL to perform the initial handshake request to """ return "{}://{}?transport=polling".format(self._socket_url_protocol, self._socket_url_base) def _parse_handshake_response(self, resp_text): """ Parse a socket.io v1 handshake response. Expected response should look like: \0xxxx {"sid":"xxx", "upgrades":["websocket","polling",..], pingInterval:xxxx, pingTimeout:xxxx} """ self.logger.debug("Parsing handshake response: {}".format(resp_text)) matches = re.search('({.*})', resp_text) resp = json.loads(matches.group(1)) self._sid = resp['sid'] self._hb_interval = int(resp['pingInterval']) / 1000 self._hb_timeout = int(resp['pingTimeout']) / 1000 self._transports = resp['upgrades'] def _get_ws_url(self): """ Get the websocket URL to communciate with """ return "{}://{}?transport=websocket&sid={}".format( self.wsp().value, self._socket_url_base, self._sid)
class NioCommand(OAuth2ServiceAccount, Block): version = VersionProperty("0.1.2") params = ListProperty(URLParameter, title="Command Parameters", default=[]) host = StringProperty(title="n.io Host", default="[[NIOHOST]]") port = IntProperty(title="n.io Port", default="[[NIOPORT]]") service_name = Property(title="Service Name", default='') block_name = Property(title="Block Name (optional)", default='') command_name = Property(title="Command Name", default='') security_method = SelectProperty(SecurityMethod, default=SecurityMethod.BASIC, title='Security Method') basic_auth_creds = ObjectProperty(BasicAuthCreds, title='Credentials (BasicAuth)') # We should periodically re-authenticate with Google, this is the interval # to do so. # Ideally, we use the expiry time in the OAuth token that we get back, but # that will require a non-backwards compatible change to the OAuth2 mixin, # so for now, having an extra non-visible property will have to do reauth_interval = TimeDeltaProperty(title="Reauthenticate Interval", visible=False, default={'seconds': 2400}) # Default to 40 mins def __init__(self): super().__init__() self._access_token = None self._reauth_job = None def configure(self, context): super().configure(context) if self.security_method() == SecurityMethod.OAUTH: self._init_access_token() def process_signals(self, signals): output_sigs = [] for signal in signals: try: url, headers = self._get_url(signal) if url: resp = requests.get(url, headers=headers) sigs = self._process_response(resp) output_sigs.extend(sigs) except Exception: self.logger.exception('Failed to process signals') if output_sigs: self.notify_signals(output_sigs) def _process_response(self, resp): status = resp.status_code if status != 200: self.logger.error( "Status {0} returned while requesting : {1}".format( status, resp)) try: data = resp.json() except: data = resp.text sigs = self._build_signals(data) return sigs def _build_signals(self, data): sigs = [] if isinstance(data, dict): sigs.append(Signal(data)) elif isinstance(data, list): for d in data: sigs.extend(self._build_signals(d)) else: sigs.append(Signal({'resp': data})) return sigs def _init_access_token(self): try: self._access_token = self.get_access_token('openid email') self.logger.debug("Obtained access token: {}".format( self._access_token)) if self._reauth_job: self._reauth_job.cancel() # Remember to reauthenticate at a certain point if it's configured if self.reauth_interval().total_seconds() > 0: self._reauth_job = Job(self._init_access_token, self.reauth_interval(), False) except OAuth2Exception: self.logger.exception('Error obtaining access token') self._access_token = None def _get_params(self, signal): """ Return a dictionary of any configured URL parameters """ params = dict() for param in self.params(): try: params[param.prop_name(signal)] = param.prop_value(signal) except Exception: self.logger.exception('Failed to evaluate command params') return params def _get_url(self, signal): try: service = self.service_name(signal) block = self.block_name(signal) command = self.command_name(signal) except Exception: self.logger.exception('Failed to evaluate command definition') return None, None if not service or not command: self.logger.error( '`Service Name` and `Command Name` are required parameters') return None, None if not block: url = "http://{}:{}/services/{}/{}?{}".format( self.host(), self.port(), service, command, urlencode(self._get_params(signal))) else: url = "http://{}:{}/services/{}/{}/{}?{}".format( self.host(), self.port(), service, block, command, urlencode(self._get_params(signal))) headers = self._get_headers() self.logger.debug('Commanding: {} {}'.format(url, headers)) return url, headers def _get_headers(self): headers = {"Content-Type": "application/json"} if self.security_method() == SecurityMethod.OAUTH: headers.update(self.get_access_token_headers()) if self.security_method() == SecurityMethod.BASIC: user = '******'.format(self.basic_auth_creds().username(), self.basic_auth_creds().password()) b64 = base64.b64encode(user.encode('ascii')).decode('ascii') headers.update({'Authorization': 'Basic {}'.format(b64)}) return headers
class PubSubConnectivity(object): """ Adds connectivity awareness to pubsub blocks """ timeout = TimeDeltaProperty(title='Connect Timeout', default={'seconds': 2}, advanced=True) def __init__(self): super().__init__() # make sure it inherits from Block's root class since class assumes # access to notify_management_signal, status, logger, etc if not isinstance(self, BlockBase): raise ValueError( "PubSubConnectivity requires it's use within a Block instance") self._connected = None self._connected_lock = RLock() self._warning_status_set = False self._connected_event = Event() def conn_configure(self, is_connected): """ Sets up instance for connectivity checks Args: is_connected (callable): function to invoke to establish initial connectivity status """ with self._connected_lock: connected = is_connected() self.logger.info("Starting in: '{}' state".format( "connected" if connected else "disconnected")) self._connected = connected if not connected: # per spec, hold the configure method hoping to get connected if not self._connected_event.wait(self.timeout().total_seconds()): self._notify_disconnection() def conn_on_connected(self): with self._connected_lock: # remove any possible wait for on_connected event self._connected_event.set() self._connected = True # if there was a warning status formerly notified then # notify "recovery" if self._warning_status_set: self.status.remove(RunnerStatus.warning) # notify status change signal = BlockStatusSignal(RunnerStatus.started, message="Block is connected") self.notify_management_signal(signal) def conn_on_disconnected(self): # ignore disconnections when stopping/stopped if self.status.is_set(RunnerStatus.stopping) or \ self.status.is_set(RunnerStatus.stopped): return with self._connected_lock: self._connected_event.clear() self._connected = False self._notify_disconnection() def _notify_disconnection(self): with self._connected_lock: # double check that we are disconnected before notifying if not self._connected: signal = BlockStatusSignal(RunnerStatus.warning, message="Block is not connected") self.notify_management_signal(signal) # set block in warning status self.status.add(RunnerStatus.warning) self._warning_status_set = True
class RESTPolling(Block): """ A base class for blocks that poll restful web services. """ polling_interval = TimeDeltaProperty(title='Polling Interval', default={"seconds": 20}) retry_interval = TimeDeltaProperty(title='Retry Interval', default={"seconds": 60}) queries = ListProperty(StringType, title='Query Strings', default=[]) include_query = StringProperty(title='Include Query Field', allow_none=True) retry_limit = IntProperty(title='Retry Limit', default=3) def __init__(self): super().__init__() self._n_queries = 0 self._url = None self._paging_url = None self._page_num = 1 self._idx = 0 self._poll_job = None self._retry_job = None self._retry_interval = None self._etags = [None] self._modifieds = [None] self._freshest = [None] self._prev_freshest = [None] self._prev_stalest = [None] self._curr_fresh = None self._curr_stale = None self._poll_lock = Lock() self._retry_count = 0 self._auth = None self._recent_posts = None self._num_locks = 0 self._max_locks = 5 # the max number of lock acquirers that can wait # this should be overridden in child blocks to refer to the actual # "created at" field for items returned from the particular service self._created_field = 'created_at' def configure(self, context): super().configure(context) self._authenticate() self._retry_interval = self.retry_interval() self._n_queries = len(self.queries()) self._etags *= self._n_queries self._modifieds *= self._n_queries self._prev_freshest *= self._n_queries self._prev_stalest *= self._n_queries self._recent_posts = [None] * self._n_queries def start(self): super().start() if self.polling_interval().total_seconds() > 0: self._poll_job = Job(self.poll, self.polling_interval(), True) spawn(self.poll) else: self.logger.info("No poll job") def stop(self): super().stop() if self._poll_job is not None: self._poll_job.cancel() if self._retry_job is not None: self._retry_job.cancel() def process_signals(self, signals): if self._retry_job is None: for signal in signals: self.poll() else: self.logger.debug("A 'retry' is currently scheduled. " "Ignoring incoming signals.") def poll(self, paging=False, in_retry=False): """ Called from user-defined block. Assumes that self.url contains the fully-formed endpoint intended for polling. Signals are notified from here. Args: paging (bool): Are we paging? in_retry (bool): was poll called form a retry_job Returns: None """ if self._n_queries == 0: return if self._num_locks >= self._max_locks: self.logger.warning( "Currently {} locks waiting to be acquired. This is more than " "the max of {}. Ignoring poll".format(self._num_locks, self._max_locks)) return # Increment the number of lock waiters so we don't build up too many self._num_locks += 1 with self._poll_lock: if self._retry_job is None or in_retry: self._locked_poll(paging) else: self.logger.debug("A 'retry' is already scheduled. " "Skipping this poll.") self._num_locks -= 1 def _locked_poll(self, paging=False): """ Execute the poll, while being assured that resources are locked """ if not paging: # This is the first page of a new query. self._recent_posts[self._idx] = {} self.page_num = 1 headers = self._prepare_url(paging) url = self.paging_url or self.url self.logger.debug("{}: {}".format("Paging" if paging else "Polling", url)) resp = self._execute_request(url, headers, paging) if resp is None: return self.etag = self.etag if paging else resp.headers.get('ETag') self.modified = self.modified if paging \ else resp.headers.get('Last-Modified') try: if not self._validate_response(resp): self._on_failure(resp, paging, url) else: self._on_success(resp, paging) except Exception as e: self.logger.exception(e) self.logger.warning( "Error processing polling response: {}: {}".format( type(e).__name__, str(e))) def _on_failure(self, resp, paging, url): """ This can be overridden in user-defined blocks. Defines how failed polling requests will be handled. """ try: status_code = resp.status_code resp = resp.json() except: # Response is not json. # This is fine. We're just logging a warning about the resp. pass finally: self.logger.warning( "Polling request of {} returned status {}: {}".format( url, status_code, resp)) self._retry(paging) def _on_success(self, resp, paging): """ This can be overridden in user-defined blocks. Defines how successful polling requests will be handled. """ self._reset_retry_cycle() signals, paging = self._process_response(resp) self.logger.debug('signals pre-remove-duplicates: %s' % signals) signals = self._discard_duplicate_posts(signals) self.logger.debug('signals post-remove-duplicates: %s' % signals) # add the include_query attribute if it is configured if self.include_query() and signals is not None: for s in signals: setattr(s, self.include_query(), unquote(self.current_query)) if signals: self.notify_signals(signals) if paging: self.page_num += 1 self._paging() else: self._epilogue() def _reset_retry_cycle(self): """ This can be overridden in user-defined blocks. Logic for cleaning up retry jobs and counters goes here. """ # cancel the retry job if we were in a retry cycle if self._retry_job is not None: self._retry_job.cancel() self._retry_job = None self._retry_interval = self.retry_interval() # this poll was a success so reset the retry count self._retry_count = 0 def _epilogue(self): """ This can be overridden in user-defined blocks. Defines behavior after a query has been fully processed, when we are ready for the next query. That is, when paging is done and retries are cleared. """ if self.polling_interval().total_seconds() > 0: self._poll_job = self._poll_job or Job( self.poll, self.polling_interval(), True) self._increment_idx() if self.queries(): self.logger.debug("Preparing to query for: %s" % self.current_query) def _authenticate(self): """ This should be overridden in user-defined blocks. This is where an oauth handshake would take place or a url would be enriched with auth data. """ pass def _validate_response(self, resp): """ This can be overridden in user-defined blocks. This is where we determine if a response is bad and we need a retry. Returns: validation (bool): True if response is good, False if bad. """ return resp.status_code == 200 or resp.status_code == 304 def _retry(self, paging): """ This is where we determine what to do on a bad poll response. """ self.logger.debug("Attempting to re-authenticate.") self._authenticate() self.logger.debug("Attempting to retry poll.") self._retry_poll(paging) def _prepare_url(self, paging): """ This should be overridden in user-defined blocks. Makes any necessary amendments, interpolations, etc. to self._url. """ pass def _process_response(self, resp): """ This should be overridden in user-defined blocks. Do what thou wilt with the polling response. Args: resp (Response): A Response object (from requests lib) Returns: signals (list(Signal)): A list of signal object to notify. paging (dict/list/obj): Paging data, possibly None, from the recorded response. """ pass def _paging(self): """ This can be overridden in user-defined blocks. Logic for handling paging situations. """ # cancel the polling job while we are paging if self._poll_job is not None: self._poll_job.cancel() self._poll_job = None self._locked_poll(True) def _update_retry_interval(self): """ This should be overridden in user-defined blocks. Implement your retry strategy here. Exponential backoff? War? """ self.logger.debug("Updating retry interval from {} to {}".format( self._retry_interval, self._retry_interval * 2)) self._retry_interval *= 2 def _retry_poll(self, paging=False): """ Helper method to schedule polling retries. """ if self._poll_job is not None: self._poll_job.cancel() self._poll_job = None if self._retry_count < self.retry_limit(): self.logger.debug("Retrying the polling job...") self._retry_count += 1 self._retry_job = Job(self.poll, self._retry_interval, False, paging=paging, in_retry=True) self._update_retry_interval() else: self.logger.error("Out of retries. " "Aborting and changing status to Error.") status_signal = BlockStatusSignal(RunnerStatus.error, 'Out of retries.') # Leaving source for backwards compatibility # In the future, you will know that a status signal is a block # status signal when it contains service_name and name # # TODO: Remove when source gets added to status signals in nio setattr(status_signal, 'source', 'Block') self.notify_management_signal(status_signal) def update_freshness(self, posts): """ Bookkeeping for the state of the current query's polling. """ self._curr_fresh = self.created_epoch(posts[0]) self._curr_stale = self.created_epoch(posts[-1]) if self._poll_job is not None: if self.prev_freshest is None or \ self.freshest > self.prev_freshest: self.prev_freshest = self.freshest self.freshest = self._curr_fresh def find_fresh_posts(self, posts): """ This can be overridden in user-defined blocks, if desired. Returns only those posts which were created after the newest post from the previous round of polling on the current query string. Note that the self.created_epoch expects dictionaries. Reimplement that method if you have another structure for posts. Args: posts (list(dict)): A list of posts. Returns: posts (list(dict)): The amended list of posts. """ posts = [ p for p in posts if self.created_epoch(p) > (self.prev_freshest or 0) ] return posts def _discard_duplicate_posts(self, posts): """ Removes sigs that were already found by another query. Each query acts independently so if a post matches multiple queries, then it will be notified for each one. This method keeps track of the all the most recent posts for each query and discards posts if they are already here. Args: posts (list(dict)): A list of posts. first_page (bool): True if this is the first page of query. Returns: posts (list(dict)): The amended list of posts. """ # No need to try to discards posts if there is only one query. if self._n_queries <= 1: return posts # Return only posts that are not in self._recent_posts. result = [] for post in posts: post_id = self._get_post_id(post) is_dupe = False valid_records = [r for r in self._recent_posts if r is not None] for record in valid_records: if post_id in record: is_dupe = True break if not post_id or not is_dupe: result.append(post) self._recent_posts[self._idx][post_id] = True return result def _get_post_id(self, post): """ Returns a uniquely identifying string for a post. This should be overridden in user-defined blocks. Args: post (dict): A post. Returns: id (string): A string that uniquely identifies a post. None indicated that the post should be treated as unique. """ return None def created_epoch(self, post): """ Helper function to return the seconds since the epoch for the given post's 'created_time. Args: post (dict): Should contain a 'created_time' key. Returns: seconds (int): post[created_time] in seconds since epoch. """ dt = self._parse_date(post.get(self._created_field, '')) return self._unix_time(dt) def _parse_date(self, date): """ Parses the service's date string format into a native datetime. This should be overridden in user-defined blocks. """ exp = r"(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})" m = re.match(exp, date) return datetime(*[int(n) for n in m.groups(0)]) def _execute_request(self, url, headers, paging): """ Execute the request, accounting for possible errors """ # Requests won't generally throw exceptions, but this provides a # bit of convenience for the block developer. self.logger.debug('executing GET request with: url: %s, headers: %s, ' 'paging: %s' % (url, headers, paging)) resp = None try: if self._auth is not None: resp = requests.get(url, headers=headers, auth=self._auth) else: resp = requests.get(url, headers=headers) except Exception as e: self.logger.warning("GET request failed, details: %s" % e) # Use the usual retry strategy to resolve the error self._retry(paging) finally: return resp def _unix_time(self, dt): epoch = datetime.utcfromtimestamp(0) delta = dt - epoch return int(delta.total_seconds()) def _increment_idx(self): self._idx = (self._idx + 1) % self._n_queries @property def current_query(self): return quote(self.queries()[self._idx]) @property def url(self): return self._url @url.setter def url(self, url): self._url = url @property def paging_url(self): return self._paging_url @paging_url.setter def paging_url(self, url): self._paging_url = url @property def page_num(self): return self._page_num @page_num.setter def page_num(self, num): self._page_num = num @property def etag(self): return self._etags[self._idx] @etag.setter def etag(self, etag): self._etags[self._idx] = etag @property def modified(self): return self._modifieds[self._idx] @modified.setter def modified(self, modified): self._modifieds[self._idx] = modified @property def freshest(self): return self._freshest[self._idx] @freshest.setter def freshest(self, timestamp): self._freshest[self._idx] = timestamp @property def prev_freshest(self): return self._prev_freshest[self._idx] @prev_freshest.setter def prev_freshest(self, timestamp): self._prev_freshest[self._idx] = timestamp @property def prev_stalest(self): return self._prev_stalest[self._idx] @prev_stalest.setter def prev_stalest(self, timestamp): self._prev_stalest[self._idx] = timestamp
class SignalRate(GroupBy, Persistence, Block): report_interval = TimeDeltaProperty(default={"seconds": 1}, title="Report Interval") averaging_interval = TimeDeltaProperty(default={"seconds": 5}, title="Averaging Interval") version = VersionProperty("0.1.1") def __init__(self): super().__init__() self._signal_counts = defaultdict(deque) self._signals_lock = Lock() self._job = None self._start_time = None self._averaging_seconds = None def persisted_values(self): """ Overridden from persistence mixin """ return ['_start_time', '_signal_counts'] def configure(self, context): super().configure(context) # This is just for backwards compatability with persistence if self._signal_counts.default_factory == list: self._signal_counts.default_factory = deque for group in self._signal_counts: self._signal_counts[group] = deque(self._signal_counts[group]) def start(self): super().start() # use _start_time if it was loaded from persistence self._start_time = self._start_time or _time() self._averaging_seconds = self.averaging_interval().total_seconds() self._job = Job(self.report_frequency, self.report_interval(), True) def process_signals(self, signals, input_id='default'): # Record the count for each group in this list of signals self.for_each_group(self.record_count, signals) def record_count(self, signals, group): """ Save the time and the counts for each group received """ with self._signals_lock: self._signal_counts[group].append((_time(), len(signals))) def report_frequency(self): signals = [] self.for_each_group(self.get_frequency, sigs_out=signals) self.logger.debug("Current counts: {}".format(self._signal_counts)) if signals: self.notify_signals(signals) def get_frequency(self, group, sigs_out): """ Get the frequency for a group and add it to sigs_out """ with self._signals_lock: ctime = _time() self._signal_counts[group] = self.trim_old_signals( self._signal_counts[group], ctime) signals = copy(self._signal_counts[group]) # Add up all of our current counts total_count = sum(grp[1] for grp in signals) # If we haven't reached a full period, divide by elapsed time rate = total_count / min(ctime - self._start_time, self._averaging_seconds) sigs_out.append(Signal({"group": group, "rate": rate})) def trim_old_signals(self, signal_counts, ctime): """ Take some signal counts and get rid of old ones """ while len(signal_counts) and \ ctime - signal_counts[0][0] >= self._averaging_seconds: signal_counts.popleft() return signal_counts def stop(self): if self._job: self._job.cancel() super().stop()
class DynamicPublisher(PubSubConnectivity, TerminatorBlock): version = VersionProperty("0.2.0") topic = StringProperty( title="Topic", default="", order=0) ttl = TimeDeltaProperty( title="Time-to-live", advanced=True, order=0, default=dict(seconds=600)) is_local = BoolProperty( advanced=True, default=False, order=1, title="Local Publisher?") local_identifier = StringProperty( advanced=True, default='[[INSTANCE_ID]]', order=2, title='Local Identifier') def __init__(self): super().__init__() self._cache = keydefaultdict(lambda topic: (self.__create_publisher(topic), None)) self._cache_lock = Lock() self._is_local = False self._local_id = None def configure(self, context): super().configure(context) self._is_local = self.is_local() if self._is_local: self._local_id = self.local_identifier() def stop(self): with self._cache_lock: for topic in self._cache: (pub, job) = self._cache[topic] if job is not None: job.cancel() pub.close() self._cache.clear() def process_signals(self, in_signals): """ Publish each group of signals """ ttl = self.ttl() groups = defaultdict(list) for signal in in_signals: try: topic = self.topic(signal) if self._is_local and self._local_id: topic = '{}.{}'.format(self._local_id, topic) except Exception: self.logger.exception('topic expression failed, ignoring signal') continue groups[topic].append(signal) for topic, out_signals in groups.items(): try: if self._is_local: out_signals = [Signal({"signals": b64encode(pickle.dumps(out_signals))})] self.__get_publisher(topic, ttl).send(out_signals) except pickle.PicklingError: self.logger.exception("Pickling based pickle error") except TypeError: self.logger.exception("Unable to encode pickled signals") except PublisherError: # pragma no cover self.logger.exception('Error publishing {:n} signals to "{}"'.format(len(out_signals), topic)) except: self.logger.exception("Error processing signals") def __close_publisher(self, topic): with self._cache_lock: self.logger.info('removing expired publisher for "{}"'.format(topic)) pub, _ = self._cache.pop(topic) pub.close() def __create_publisher(self, topic): self.logger.info('creating new publisher for "{}"'.format(topic)) publisher = Publisher(topic=topic) try: publisher.open( on_connected=self.conn_on_connected, on_disconnected=self.conn_on_disconnected) except TypeError as e: self.logger.warning( 'Connecting to an outdated communication module') # try previous interface publisher.open() # no need to configure connectivity if not supported return publisher self.conn_configure(publisher.is_connected) return publisher def __get_publisher(self, topic, ttl): with self._cache_lock: publisher, prev_job = self._cache[topic] if prev_job is not None: prev_job.cancel() job = (Job( self.__close_publisher, ttl, False, topic, ) if ttl.total_seconds() >= 0 else None) self._cache[topic] = (publisher, job) return publisher