def check_correctness_of_output(in_stream, check_list): def check(v, index, check_list): # v is in_list[index] assert v == check_list[index] next_index = index + 1 return next_index sink_element(func=check, in_stream=in_stream, state=0, check_list=check_list)
def create_agent_to_publish_out_stream( self, out_stream, publication): def f(element_of_stream): # Don't publish _no_value elements if element_of_stream is _no_value: return element_of_stream # Publish elements that are not _no_value message = (publication, element_of_stream) json_payload = json.dumps(message) self.channel.basic_publish( exchange='the_exchange', routing_key=publication, body=json_payload) # Create the agent: A sink which executes f() # for each element of out_stream. The input # stream of the sink is an output stream of # the proces. sink_element(func=f, in_stream=out_stream) return
def datastore_file_agent(in_streams, out_streams): """ Agent that manages writing to and creating datastore files @in_streams - Stream of (n, e, z, t) @out_streams - Stream of (n, e, z, t) - stream itself is not modified """ def write_to_datastore_file(nezt, file_ptr): """ Writes the input stream to the given file_ptr """ accelerations = nezt[:3] sample_timestamp = nezt[3] file_ptr.write("%20.5f" % sample_timestamp + ' ' + ' '.join("%10.7f" % x for x in accelerations) + '\n') def write_to_file(stream, state): """ Writes stream to file. Creates a new file after FILE_STORE_INTERVAL passes. @param stream - input stream of acceleration data. @param state - tuple of (timestamp, curr_file_pointer, n_writing_error) """ timestamp = stream[3] latest_timestamp, curr_file_ptr, curr_filename, n_sample_cnt, n_writing_error = state # update the datastore file if FILE_STORE_INTERVAL passed if timestamp >= latest_timestamp + self.file_store_interval: logging.info('File %s store interval elapsed with %s samples, rate %s samples/second', curr_filename, n_sample_cnt, float(n_sample_cnt) / self.file_store_interval) if curr_file_ptr is not None: curr_file_ptr.close() curr_filename = self.MakeFilename(timestamp) curr_file_ptr = open(curr_filename, 'w') latest_timestamp = timestamp n_sample_cnt = 0 try: write_to_datastore_file(stream, curr_file_ptr) n_sample_cnt += 1 except: n_writing_error += 1 if n_writing_error <= 10: print('Error writing sample to file {} with timestamp {}'.format(curr_filename, timestamp)) return stream, (latest_timestamp, curr_file_ptr, curr_filename, n_sample_cnt, n_writing_error) sink_element(func=write_to_file, in_stream=in_streams, state=(-self.file_store_interval, None, None, 0, 0))
def g(in_streams, out_streams): def print_element(v): print 'stream element is ', v sink_element(func=print_element, in_stream=in_streams[0])
def g(in_stream, **kwargs): sink_element(func, in_stream, **kwargs)
class PhidgetsSensor(object): def __init__(self, config=None): if not config: config = {} self.phidget_attached = False self.collect_samples = True self.picker = True self.sensor_data_lock = threading.Lock() self.delta_fields = set() self.sensor_data = {} self.sensor_data['Type'] = 'ACCELEROMETER_3_AXIS' self.sensor_data['Calibrated'] = True self.sensor_data['Model'] = 'Phidgets 1056' self.decimation = PHIDGETS_DECIMATION if 'software_version' in config: self.software_version = config.get('software_version') else: self.software_version = 'PyCSN Unknown' if 'decimation' in config: self.decimation = config.get('decimation') if 'picker' in config: self.picker = config.get('picker') if 'pick_threshold' in config: self.pick_threshold = config.get('pick_threshold') else: self.pick_threshold = PICKER_THRESHOLD if 'serial' in config: self.sensor_data['Serial'] = config.get('serial') else: self.sensor_data['Serial'] = '' if 'datastore' in config: self.datastore = config.get('datastore') else: self.datastore = '/var/tmp/phidgetsdata' #if 'pick_queue' in config: # self.pick_queue = config.get('pick_queue') #else: # self.pick_queue = None if 'latitude' in config: self.latitude = config.get('latitude') else: self.latitude = 0.0 if 'longitude' in config: self.longitude = config.get('longitude') else: self.longitude = 0.0 if 'floor' in config: self.floor = config.get('floor') else: self.floor = '1' if 'client_id' in config: self.client_id = config.get('client_id') else: self.client_id = 'Unknown' if 'stomp' in config: self.stomp = config.get('stomp') else: self.stomp = None if 'stomp_topic' in config: self.stomp_topic = config.get('stomp_topic') else: self.stomp_topic = None if 'connect_stomp' in config: self.connect_stomp = config.get('connect_stomp') else: self.connect_stomp = None self.datastore_uploaded = self.datastore + '/uploaded' self.datastore_corrupted = self.datastore + '/corrupted' logging.info('Sensor datastore directory is %s',self.datastore) # Make sure data directory exists try: os.makedirs(self.datastore) except OSError as exception: if exception.errno != errno.EEXIST: logging.error('Error making directory %s', self.datastore) raise exception try: os.makedirs(self.datastore_uploaded) except OSError as exception: if exception.errno != errno.EEXIST: logging.error('Error making directory %s', self.datastore_uploaded) raise exception try: os.makedirs(self.datastore_corrupted) except OSError as exception: if exception.errno != errno.EEXIST: logging.error('Error making directory %s', self.datastore_corrupted) raise exception self.sensor_data['Units'] = 'g' self.sensor_data['Num_samples'] = 50 self.sensor_data['Sample_window_size'] = 1 self.accelerometer = None self.time_start = None self.reported_clock_drift = 0.0 self.file_store_interval = 600.0 self.last_phidgets_timestamp = None self.last_datastore_filename = None self.last_datastore_filename_uploaded = None self.writing_errors = 0 self.last_sample_seconds = time.time() # a lock for updating the timing variables self.timing_lock = threading.Lock() self.timing_gradient = 0.0 self.timing_intercept = 0.0 self.timing_base_time = time.time() self.recent_picks = [] # Everything looks OK, start the collect samples and picking threads and the Phidget """ self.sensor_raw_data_queue = Queue.Queue() raw_data_thread = threading.Thread(target=self.ProcessSensorReading, args=['raw']) raw_data_thread.setDaemon(True) raw_data_thread.start() self.sensor_raw_data_queue.join() logging.info('Raw Data Thread started') # start the Picker thread, if this is required if self.picker: self.sensor_readings_queue = Queue.Queue() thread = threading.Thread(target=self.Picker, args=['Simple Threshold']) thread.setDaemon(True) thread.start() logging.info('Picker started') self.sensor_readings_queue.join() logging.info('Sensor readings thread started') else: logging.info('This Phidget will not calculate or send picks') """ self.sensor_raw_data_queue = Queue.Queue() raw_data_thread = threading.Thread(target=self.Picker, args=['Simple Threshold']) raw_data_thread.setDaemon(True) raw_data_thread.start() logging.info('Picker started') self.sensor_raw_data_queue.join() logging.info('Client initialised: now starting Phidget') self.data_buffer = [] # Start the Phidget self.StartPhidget() if not self.phidget_attached: raise self.sensor_data['Serial'] = str(self.accelerometer.getSerialNum()) self.DisplayDeviceInfo() def StartPhidget(self): # Initialise the Phidgets sensor self.phidget_attached = False self.time_start = None try: if self.accelerometer: self.accelerometer.closePhidget() self.accelerometer = Spatial() self.accelerometer.setOnAttachHandler(self.AccelerometerAttached) self.accelerometer.setOnDetachHandler(self.AccelerometerDetached) self.accelerometer.setOnErrorhandler(self.AccelerometerError) self.accelerometer.setOnSpatialDataHandler(self.SpatialData) #self.accelerometer.enableLogging(Phidget.PhidgetLogLevel.PHIDGET_LOG_WARNING, None) #self.accelerometer.enableLogging(Phidget.PhidgetLogLevel.PHIDGET_LOG_VERBOSE, None) self.accelerometer.openPhidget() self.accelerometer.waitForAttach(10000) # set data rate in milliseconds (we will decimate from 4ms to 20ms later) # we now do this in the Attach handler #self.accelerometer.setDataRate(PHIDGETS_NOMINAL_DATA_INTERVAL_MS) except RuntimeError as e: logging.error("Runtime Exception: %s", e.details) return except PhidgetException as e: logging.error("Phidget Exception: %s. Is the Phidget not connected?", e.details) return self.phidget_attached = True def Picker(self, args): logging.info('Initialise picker %s', args) delta = PHIDGETS_NOMINAL_DATA_INTERVAL_MS * self.decimation * 0.001 LTA_count = int(LTA / delta) def pick_orientation(scaled, timestamps, orientation): """ Sends picks on a single orientation, either 'n', 'e', or 'z'. """ # --------------------------------------------------------------- # CREATE AGENTS AND STREAMS # --------------------------------------------------------------- # 1. DECIMATE SCALED DATA. # Window of size DECIMATION is decimated to its average. decimated = Stream('decimated') map_window(lambda v: sum(v) / float(len(v)), scaled, decimated, window_size=self.decimation, step_size=self.decimation) # 2. DECIMATE TIMESTAMPS. # Window of size DECIMATION is decimated to its last value. decimated_timestamps = Stream('decimated_timestamps') map_window(lambda window: window[-1], timestamps, decimated_timestamps, window_size=self.decimation, step_size=self.decimation) # 3. DEMEAN (subtract mean from) DECIMATED STREAM. # Subtract mean of window from the window's last value. # Move sliding window forward by 1 step. demeaned = Stream('demeaned', initial_value=[0.0] * (LTA_count - 1)) map_window(lambda window: window[-1] - sum(window) / float(len(window)), decimated, demeaned, window_size=LTA_count, step_size=1) # 4. MERGE TIMESTAMPS WITH DEMEANED ACCELERATIONS. # Merges decimated_timestamps and demeaned to get timestamped_data. timestamped_data = Stream('timestamped_data') zip_streams(in_streams=[decimated_timestamps, demeaned], out_stream=timestamped_data) # 5. DETECT PICKS. # Output a pick if the value part of the time_value (t_v) exceeds threshold. picks = Stream('picks') filter_element(lambda t_v: abs(t_v[1]) > self.pick_threshold, timestamped_data, picks) # 6. QUENCH PICKS. # An element is a (timestamp, value). # Start a new quench when timestamp > QUENCH_PERIOD + last_quench. # Update the last quench when a new quench is initiated. # Initially the last_quench (i.e. state) is 0. quenched_picks = Stream('quenched_picks') # f is the filtering function def f(timestamped_value, last_quench, QUENCH_PERIOD): timestamp, value = timestamped_value new_quench = timestamp > QUENCH_PERIOD + last_quench last_quench = timestamp if new_quench else last_quench # return filter condition (new_quench) and next state (last_quench) return new_quench, last_quench filter_element(f, picks, quenched_picks, state=0, QUENCH_PERIOD=2) # 7. SEND QUENCHED PICKS. self.send_event(quenched_picks) def picker_agent(nezt): # nezt is a stream of 'data_entry' in the original code. # Declare acceleration streams in n, e, z orientations and timestamp stream. n, e, z, t = (Stream('raw_north'), Stream('raw_east'), Stream('raw_vertical'), Stream('timestamps')) # split the nezt stream into its components split_element(lambda nezt: [nezt[0], nezt[1], nezt[2], nezt[3]], in_stream=nezt, out_streams=[n, e, z, t]) # Determine picks for each orientation after scaling. Note negative scale for East. # Parameters of pick_orientation are: # (0) stream of accelerations in a single orientation, scaled by multiplying by SCALE. # (1) timestamps # (2) The orientation 'n', 'e', or 'z' pick_orientation(f_mul(n, PHIDGETS_ACCELERATION_TO_G), t, 'n') pick_orientation(f_mul(e, -PHIDGETS_ACCELERATION_TO_G), t, 'e') pick_orientation(f_mul(z, PHIDGETS_ACCELERATION_TO_G), t, 'z') def datastore_file_agent(in_streams, out_streams): """ Agent that manages writing to and creating datastore files @in_streams - Stream of (n, e, z, t) @out_streams - Stream of (n, e, z, t) - stream itself is not modified """ def write_to_datastore_file(nezt, file_ptr): """ Writes the input stream to the given file_ptr """ accelerations = nezt[:3] sample_timestamp = nezt[3] file_ptr.write("%20.5f" % sample_timestamp + ' ' + ' '.join("%10.7f" % x for x in accelerations) + '\n') def write_to_file(stream, state): """ Writes stream to file. Creates a new file after FILE_STORE_INTERVAL passes. @param stream - input stream of acceleration data. @param state - tuple of (timestamp, curr_file_pointer, n_writing_error) """ timestamp = stream[3] latest_timestamp, curr_file_ptr, curr_filename, n_sample_cnt, n_writing_error = state # update the datastore file if FILE_STORE_INTERVAL passed if timestamp >= latest_timestamp + self.file_store_interval: logging.info('File %s store interval elapsed with %s samples, rate %s samples/second', curr_filename, n_sample_cnt, float(n_sample_cnt) / self.file_store_interval) if curr_file_ptr is not None: curr_file_ptr.close() curr_filename = self.MakeFilename(timestamp) curr_file_ptr = open(curr_filename, 'w') latest_timestamp = timestamp n_sample_cnt = 0 try: write_to_datastore_file(stream, curr_file_ptr) n_sample_cnt += 1 except: n_writing_error += 1 if n_writing_error <= 10: print('Error writing sample to file {} with timestamp {}'.format(curr_filename, timestamp)) return stream, (latest_timestamp, curr_file_ptr, curr_filename, n_sample_cnt, n_writing_error) sink_element(func=write_to_file, in_stream=in_streams, state=(-self.file_store_interval, None, None, 0, 0)) nezt = Stream('nezt') picker_agent(nezt) datastore_file_agent(nezt) while True: e, sample_timestamp = self.sensor_raw_data_queue.get() if self.collect_samples: if self.time_start is None: self.time_start = sample_timestamp self.last_phidgets_timestamp = 0.0 for index, spatialData in enumerate(e.spatialData): phidgets_timestamp = spatialData.Timestamp.seconds + (spatialData.Timestamp.microSeconds * 0.000001) stream_data = (spatialData.Acceleration[1], spatialData.Acceleration[0], spatialData.Acceleration[2], sample_timestamp) nezt.extend([stream_data]) if self.last_phidgets_timestamp: sample_increment = int(round( (phidgets_timestamp - self.last_phidgets_timestamp) / PHIDGETS_NOMINAL_DATA_INTERVAL)) if sample_increment > 4*self.decimation: logging.warn('Missing >3 samples: last sample %s current sample %s missing samples %s',\ self.last_phidgets_timestamp, phidgets_timestamp, sample_increment) elif sample_increment == 0: logging.warn('Excess samples: last sample %s current sample %s equiv samples %s',\ self.last_phidgets_timestamp, phidgets_timestamp, sample_increment) self.last_phidgets_timestamp = phidgets_timestamp def DisplayDeviceInfo(self): print("|------------|----------------------------------|--------------|------------|") print("|- Attached -|- Type -|- Serial No. -|- Version -|") print("|------------|----------------------------------|--------------|------------|") print("|- %8s -|- %30s -|- %10d -|- %8d -|" % (self.accelerometer.isAttached(), self.accelerometer.getDeviceName(), self.accelerometer.getSerialNum(), self.accelerometer.getDeviceVersion())) print("|------------|----------------------------------|--------------|------------|") print("Number of Axes: %i" % (self.accelerometer.getAccelerationAxisCount())) print('Max Acceleration Axis 0: {} Min Acceleration Axis 0: {}'.format(self.accelerometer.getAccelerationMax(0), self.accelerometer.getAccelerationMin(0))) print('Max Acceleration Axis 1: {} Min Acceleration Axis 1: {}'.format(self.accelerometer.getAccelerationMax(1), self.accelerometer.getAccelerationMin(1))) print('Max Acceleration Axis 2: {} Min Acceleration Axis 2: {}'.format(self.accelerometer.getAccelerationMax(2), self.accelerometer.getAccelerationMin(2))) def setFileStoreInterval(self, file_store_interval): # sets the interval for writing the data to a new file self.file_store_interval = file_store_interval #Event Handler Callback Functions def AccelerometerAttached(self, e): attached = e.device self.phidget_attached = True logging.info("Accelerometer %s Attached!", attached.getSerialNum()) # set data rate in milliseconds (we will decimate from 4ms to 20ms later) self.accelerometer.setDataRate(PHIDGETS_NOMINAL_DATA_INTERVAL_MS) logging.info("Phidget data rate interval set to %s milliseconds", PHIDGETS_NOMINAL_DATA_INTERVAL_MS) def AccelerometerDetached(self, e): detached = e.device self.phidget_attached = False logging.error('Accelerometer %s Detached!',(detached.getSerialNum())) def AccelerometerError(self, e): try: source = e.device logging.error("Accelerometer %s: Phidget Error %s: %s", source.getSerialNum(), e.eCode, e.description) except PhidgetException as e: logging.error("Phidget Exception %s: %s", e.code, e.details) def AccelerometerAccelerationChanged(self, e): source = e.device logging.error("Accelerometer %s: Axis %s: %s", source.getSerialNum(), e.index, e.acceleration) def MakeFilename(self, timestamp): timestamp_datetime = datetime.datetime.fromtimestamp(timestamp).strftime(FILESTORE_NAMING) sps = int (1000 / (PHIDGETS_NOMINAL_DATA_INTERVAL_MS * self.decimation)) return self.datastore + '/' + str(sps) + '_' + timestamp_datetime + '.dat' def setTimingFitVariables(self, base_time, gradient, intercept): # this is called by the main client which is monitoring the system clock compared with NTP # we acquire a lock on the timing variables to update them with self.timing_lock: self.timing_base_time = base_time self.timing_gradient = gradient self.timing_intercept = intercept def GetNtpCorrectedTimestamp(self): # from the current system time we use the NTP thread's line fit to estimate the true time time_now = time.time() # we ensure that the timing variables are not being updated concurrently by acquiring a lock on them with self.timing_lock: offset_estimate = (time_now-self.timing_base_time) * self.timing_gradient + self.timing_intercept return time_now + offset_estimate def StopSampleCollection(self): # This stops more samples being collected, and closes the current samples file self.collect_samples = False self.datastore_file.close() def StartSampleCollection(self): # This restarts sample collection (into files) self.collect_samples = True def SpatialData(self, e): if not self.phidget_attached: return sample_timestamp = self.GetNtpCorrectedTimestamp() self.sensor_raw_data_queue.put((e, sample_timestamp)) def to_dict(self): # No need to persist anything that doesn't change. details = {} details['module'] = 'PhidgetsSensor' details['class'] = 'PhidgetsSensor' with self.sensor_data_lock: details['sensor_id'] = self.sensor_data details['serial'] = self.sensor_data['Serial'] details['datastore'] = self.datastore details['decimation'] = self.decimation details['picker'] = self.picker details['pick_threshold'] = self.pick_threshold if self.last_datastore_filename_uploaded: details['last_upload'] = self.last_datastore_filename_uploaded else: details['last_upload'] = '' return details def get_metadata(self): logging.error('PhidgetSensor get_metadata Not implemented') def datafiles_not_yet_uploaded(self): # Returns a list of files that are older than last_datastore_filename (or current time) in the # data file directory that have not yet been uploaded # we subtract 10 minutes off the time to avoid finding the currently open file (although we could in # principle trap that) file_list = [] last_file_date = self.GetNtpCorrectedTimestamp() - 600.0 logging.info('Searching for files older than %s',last_file_date) for f in os.listdir(self.datastore): filename = self.datastore + '/' + f if filename == self.datastore_filename: logging.info('Will not add currently opened file %s', filename) continue if os.path.isfile(filename): try: #t = os.path.getctime(filename) file_date = os.path.getctime(filename) if file_date < last_file_date: if len(file_list) < 20: logging.info('Not yet uploaded: %s',filename) elif len(file_list) == 20: logging.info('Not yet uploaded: %s (will not show more)', filename) file_list.append(filename) except: logging.error('Error getting file time for %s', filename) return file_list def mark_file_uploaded(self, filename): # when a sensor data file has been successfully uploaded to the server we move it to the uploaded directory try: shutil.copy2(filename, self.datastore_uploaded) self.last_datastore_filename_uploaded = filename os.remove(filename) except: logging.warning('Failed to move %s to %s', filename, self.datastore_uploaded) def mark_file_corrupted(self, filename): # when a sensor data file fails to convert to stream we move it to the corrupt directory try: shutil.copy2(filename, self.datastore_corrupted) except: logging.warning('Failed to move %s to %s', filename, self.datastore_corrupted) # always remove the corrupt file from the main directory try: os.remove(filename) except: logging.error('Failed to delete %s', filename) def set_sensor_id(self, sensor_id): with self.sensor_data_lock: self.sensor_data['sensor_id'] = sensor_id def send_event(self, in_stream): """ pick_message = {'timestamp': event_time, 'station': self.client_id, 'latitude': self.latitude, 'longitude': self.longitude, 'demeaned_accelerations': values, 'floor': self.floor, } try: message_body = json.dumps(pick_message) m = Message() m.set_body(message_body) self.pick_queue.write(m) #logging.info('Sent pick to Amazon SQS: %s', pick_message) except Exception, e: logging.error('Failed to send pick message: %s', e) """ def send_pick_to_finder(values): # values: (time stamp, accs) # Send pick to FinDer station_name = self.client_id station_name = station_name[0:1] + station_name[3:] finder_accs = [acc*G_TO_CMS2 for acc in values[1:]] channel = 'HNN' if finder_accs[1] > 0.0: channel = 'HNE' if finder_accs[2] > 0.0: channel = 'HNZ' pick_datetime = datetime.datetime.utcfromtimestamp(float(values[0])) pick_time = pick_datetime.strftime(TIMESTAMP_NAMING) finder_location = str(self.latitude) + ' ' + str(self.longitude) timenow_timestamp = self.GetNtpCorrectedTimestamp() timenow = datetime.datetime.utcfromtimestamp(timenow_timestamp) activemq_message = '1 ' + timenow.strftime(TIMESTAMP_NAMING)[:-3] + ' '+self.software_version+'\n' line = '%s CSN.%s.%s.-- %s %s %s %s\n' % \ (finder_location, station_name, channel, pick_time, abs(finder_accs[0]), abs(finder_accs[1]), abs(finder_accs[2])) activemq_message += line activemq_message += 'ENDOFDATA\n' logging.info("ActiveMQ message to send:\n%s", activemq_message[:-1]) try: self.stomp.put(activemq_message, destination=self.stomp_topic) except Exception, err: logging.error('Error sending pick to ActiveMQ %s', err) logging.info('Trying to reconnect to ActiveMQ broker') self.stomp = self.connect_stomp() sink_element(func=send_pick_to_finder, in_stream=in_stream)
def h(in_streams, out_streams): def print_output(v): print(v) sink_element(func=print_output, in_stream=in_streams[0])
def target(): """ This is the target function of this process. This function has the following steps: 1. Create in_streams of the this process, i.e., the in_streams of the compute_func of the process. 2. Create in_stream_signals, with an in_stream_signal corresponding to each in_stream. 3. Create out_streams of this process, i.e. out_streams of the compute_func of this process. 4. Create the computational agent (compute_func) of this process. 5. For each out_stream of compute_func, create an agent to copy the out_stream to its buffer, and then copy the buffer to each in_stream to which it is connected. 6. For each in_stream of compute_func, create an agent to copy its input buffer into the in_stream. 7. Create the scheduler for this process. Starting the scheduler starts the thread that executes compute_func for this agent. 8. Create the source threads for each source in this process. The source_thread gets data from a source, puts the data into a buffer, and then copies the buffer to each in_queue to which the source is connected. 9. Start the scheduler and source threads. 10. Join the scheduler and source threads. """ # STEP 1 # CREATE THE IN_STREAMS OF COMPUTE_FUNC # and compute the dict, name_to_stream. # in_streams is the list of in_stream of this process. self.in_streams = [] # name_to_stream is a dict where the key is the name of an # input or output stream and the value is the stream itself. self.name_to_stream = {} for in_stream_name, in_stream_type in self.in_stream_names_types: in_stream = Stream(name=in_stream_name) self.in_streams.append(in_stream) self.name_to_stream[in_stream_name] = in_stream # STEP 2 # CREATE IN_STREAM_SIGNALS which is a list of input streams, with # one in_stream_signal for each in_stream. # in_stream_signal[j] is the stream that tells # this process that it has data to be read into # in_stream[j]. The name of an in_stream_signal associated with an # in_stream called 's' is 's_signal_'. self.in_stream_signals = [] for in_stream in self.in_streams: in_stream_signal_name = in_stream.name + '_signal_' in_stream_signal = Stream(name=in_stream_signal_name) self.in_stream_signals.append(in_stream_signal) self.name_to_stream[in_stream_signal_name] = in_stream_signal # STEP 3 # CREATE THE OUT_STREAMS FOR COMPUTE_FUNC. # out_streams is a list of the output streams of this process. self.out_streams = [] for out_stream_name, out_stream_type in self.out_stream_names_types: out_stream = Stream(out_stream_name) self.out_streams.append(out_stream) self.name_to_stream[out_stream_name] = out_stream # STEP 4 # CREATE THE COMPUTE AGENT FOR THIS PROCESS. self.compute_func(self.in_streams, self.out_streams, **self.keyword_args) # STEP 5 # CREATE AGENTS TO COPY EACH OUT_STREAM OF COMPUTE_FUNC TO IN_STREAMS. # Note: Create an agent for each out_stream of compute_func and # create an agent for each source. This agent copies the elements # in each out_stream into the in_streams to which it is connected. # See copy_stream(). # # self.out_stream_names_types is a list of pairs: # (out_stream_name, out_stream_type) for out_stream_name, out_stream_type in self.out_stream_names_types: # STEP 5.1: Get parameters of each agent. # Step 5.1.1 Get the out_stream with the specified name. out_stream = self.name_to_stream[out_stream_name] # Step 5.1.2 Get the buffer and buffer_ptr into which this out_stream # is copied. buffer, buffer_ptr = self.out_to_buffer[out_stream_name] # Step 5.1.3 Get the list of pairs (q, in_stream_signal_name) connected # to this out_stream q_and_in_stream_signal_names = \ self.out_to_q_and_in_stream_signal_names[out_stream_name] # STEP 5.2: Make agent that copies out_stream to the in_streams to # which it is connected. The input stream to this agent is out_stream. # stream_name is a keyword argument of copy_stream(). sink_list(func=self.copy_stream, in_stream=out_stream, stream_name=out_stream_name) # STEP 6 # CREATE AGENTS TO COPY BUFFERS TO IN_STREAMS. # For each in_stream of this process, create an agent that # copies data from the input buffer of this in_stream into # the in_stream. # This agent subscribes to the in_stream_signal associated # with this in_stream. When in_stream_signal gets a message # (start, end) this agent copies the buffer segment between # start and end into the in_stream. # copy_buffer_segment() is the function executed by the agent # when a new message arrives. This function extends out_stream # with the segment of the buffer specified by the message. for in_stream_name, in_stream_type in self.in_stream_names_types: in_stream_signal_name = in_stream_name + '_signal_' # Get the in_stream_signal stream from its name. in_stream_signal = self.name_to_stream[in_stream_signal_name] # Get the in_stream from its name in_stream = self.name_to_stream[in_stream_name] # Get the buffer that feeds this in_stream. buffer, buffer_ptr = self.in_to_buffer[in_stream_name] # Create agents sink_element(func=copy_buffer_segment, in_stream=in_stream_signal, out_stream=in_stream, buffer=buffer, in_stream_type=in_stream_type) # STEP 7 # CREATE A NEW STREAM.SCHEDULER FOR THIS PROCESS # Specify the scheduler, input_queue and name_to_stream for # this processes. Stream.scheduler = ComputeEngine(self) # input_queue is the queue into which all streams for this # process are routed. Stream.scheduler.input_queue = self.in_queue # The scheduler for a process uses a dict, name_to_stream. # name_to_stream[stream_name] is the stream with the name stream_name. Stream.scheduler.name_to_stream = self.name_to_stream # STEP 8 # CREATE SOURCE_THREADS source_threads = [] for source_name, description in self.sources.items(): # thread_creation_func returns a thread which # gets data from a source with name source_name and then # uses self.copy_stream to copy the data into a # buffer associated with this source, and # informs all in_streams connected to this source that # new data has arrived. thread_target = description['func'] if 'keyword_args' in description.keys(): self.source_keyword_args = description['keyword_args'] else: self.source_keyword_args = {} # Get the source_thread for the source with this name. #source_thread = thread_creation_func(self.copy_stream, source_name) source_thread = self.create_source_thread( thread_target, source_name, **self.source_keyword_args) source_threads.append(source_thread) # STEP 9 # START SOURCE THREADS AND START SCHEDULER. # Starting the scheduler starts a thread --- the main thread --- of this # process. The scheduler thread gets a ready agent from the in_queue of # this process and then executes the next step of the agent. Stream.scheduler.start() for source_thread in source_threads: source_thread.start() # STEP 10 # JOIN SOURCE THREADS AND JOIN SCHEDULER. for source_thread in source_threads: source_thread.join() Stream.scheduler.join() return
def compute_func(in_streams, out_streams): sink_element(func=tweet_analyzer, in_stream=in_streams[0])