def test(): x = Stream('x') y,z = max_min_of_windows_in_stream(x) y.set_name('y') z.set_name('z') check(y, [5]) check(z, [3]) x.extend([3,5]) x.print_recent() y.print_recent() z.print_recent() print x.extend([11,15]) x.print_recent() y.print_recent() z.print_recent() print check_empty()
def main(): # Functions: list -> list def square(lst): return [v*v for v in lst] def double(lst): return [2*v for v in lst] def even(lst): return [v for v in lst if not v%2] # Functions: stream -> stream. # Each element of the output stream is f() applied to the corresponding # element of the input stream. stream_square = partial(stream_func, f_type='list', f=square, num_outputs=1) stream_double = partial(stream_func, f_type='list', f=double, num_outputs=1) stream_even = partial(stream_func, f_type='list', f=even, num_outputs=1) # Create stream x, and give it name 'x'. x = Stream('input') # u is the stream returned by stream_square(x) and # v is the stream returned by stream_double(x) # w is the stream returned by stream_square(v) and # so w could have been defined as: # stream_square(stream_double(x)) # a is the stream containing only even values of x u = stream_square(x) v = stream_double(x) w = stream_square(v) a = stream_even(x) # Give names to streams u, v, and w. This is helpful in reading output. u.set_name('square of input') v.set_name('double of input') w.set_name('square of double of input') a.set_name('even values in input') print print 'add [3, 5] to the tail of the input stream' # Add values to the tail of stream x. x.extend([3, 5]) # Print the N most recent values of streams x, u, v, w. x.print_recent() u.print_recent() v.print_recent() w.print_recent() a.print_recent() print print 'add [2, 6] to the tail of the input stream' # Add more values to the tail of stream x. x.extend([2, 6]) # Print the N most recent values of streams x, u, v, w. x.print_recent() u.print_recent() v.print_recent() w.print_recent() a.print_recent()
def test(): x = Stream('x') y,z = exp_smooth_max_and_std_of_windows_in_stream(x) y.set_name('y') z.set_name('z') check(y, [1.6000000000000001, 4.3200000000000003]) check(z, [0.65319726474218087, 0.78383671769061702]) x.extend([1, 2, 3, 4, 5]) x.print_recent() y.print_recent() z.print_recent() print x.extend([6, 12, 13, 14, 15]) x.print_recent() y.print_recent() z.print_recent() print check_empty()
def main(): def copy(list_of_in_lists, state): return ([in_list.list[in_list.start:in_list.stop] for in_list in list_of_in_lists], state, [in_list.stop for in_list in list_of_in_lists]) input_stream_0 = Stream('input_stream_0', num_in_memory=32) input_stream_1 = Stream('input_stream_1', num_in_memory=32 ) output_stream_0 = Stream('output_stream_0', num_in_memory=32) output_stream_1 = Stream('output_stream_1', num_in_memory=32) A = Agent(in_streams=[input_stream_0, input_stream_1 ], out_streams=[output_stream_0, output_stream_1], transition=copy, name='A') input_stream_0.extend(range(10)) assert(output_stream_0.stop == 10) assert(output_stream_1.stop == 0) assert(output_stream_0.recent[:10] == range(10)) assert(input_stream_0.start == {A:10}) assert(input_stream_1.start == {A:0}) input_stream_1.extend(range(10, 25, 1)) assert(output_stream_0.stop == 10) assert(output_stream_1.stop == 15) assert(output_stream_0.recent[:10] == range(10)) assert(output_stream_1.recent[:15] == range(10, 25, 1)) assert(input_stream_0.start == {A:10}) assert(input_stream_1.start == {A:15})
def main(): def max_of_std(lst, state): a = np.array(lst) state = max(a.std(), state) return (state, state) print "example_1" print "example function from list to value: mean_and_sigma() " window_size = 10 step_size = 10 print "window_size = ", window_size print "step_size = ", step_size print "" x = Stream('x') # x is the in_stream. # sum() is the function on the window z = stream_func(x, f_type='window', f=max_of_std, num_outputs=1, state=0.0, window_size=window_size, step_size=step_size) z.set_name('z') x.extend([random.random() for i in range(30)]) x.print_recent() z.print_recent()
def main(): def mean_of_window(stream): def mean(lst, state): sum_window, next_value_dropped = state if sum_window is None: sum_window = sum(lst) else: sum_window = sum_window + lst[-1] - next_value_dropped mean = sum_window/len(lst) next_value_dropped = lst[0] state = (sum_window, next_value_dropped) return (mean, state) return stream_func( inputs=stream, f_type='window', f=mean, num_outputs=1, state=(None, None), window_size=2, step_size=2) def max_of_std(lst, state): a = np.array(lst).std() if a > state: state = a return (a, state) else: return (_no_value, state) x = Stream('x') # x is the input stream. g = partial(stream_func, f_type='window', f=max_of_std, num_outputs=1, state=0.0, window_size=10, step_size=10) y = g(x) z = g(y) means = mean_of_window(x) y.set_name('y') z.set_name('z') means.set_name('means') x.extend([random.random() for i in range(30)]) x.print_recent() y.print_recent() z.print_recent() means.print_recent()
def main(): # Functions: list -> list of lists def even_odd(list_of_integers): evens_list = [n for n in list_of_integers if not n%2] odds_list = [n for n in list_of_integers if n%2] return (evens_list, odds_list) # Functions: stream -> stream. # The n-th element of the output stream is f() applied to the n-th # elements of each of the input streams. # Function mean is defined above, and functions sum and max are the # standard Python functions. stream_even_odd = partial(stream_func, f_type='list', f=even_odd, num_outputs=2) # Create stream x, and give it name 'x'. x = Stream('input_0') # u is the stream returned by stream_sum([x,y]) and # v is the stream returned by stream_max([x,y]) # w is the stream returned by stream_mean([x,y]). # u[i] = sum(x[i],y[i]) # v[i] = max(x[i],y[i]) # w[i] = mean(x[i],y[i]) evens, odds = stream_even_odd(x) # Give names to streams u, v, and w. This is helpful in reading output. evens.set_name('even numbers in x') odds.set_name('odd numbers in x') print print 'Adding [3, 5, 8] to input stream' # Add values to the tail of stream x. x.extend([3, 5, 8]) # Print recent values of the streams print 'recent values of input streams' x.print_recent() print 'recent values of output streams' evens.print_recent() odds.print_recent() print print 'Adding [4, 6, 2, 1] to the input stream' # Add more values to the tail of stream x. x.extend([4, 6, 2, 1]) # Print recent values of the streams print 'recent values of input streams' x.print_recent() print 'recent values of output streams' evens.print_recent() odds.print_recent()
def main(): def sum_diff_of_means(list_of_two_lists, cumulative): a, b = list_of_two_lists cumulative += np.mean(a) - np.mean(b) return (cumulative, cumulative) x = Stream('x') y = Stream('y') z = stream_func([x,y], f_type='window', f=sum_diff_of_means, num_outputs=1, state = 0, window_size=2, step_size=2) z.set_name('z') x.extend([3,5]) y.extend([2]) x.print_recent() y.print_recent() z.print_recent() print x.extend([11,15]) y.extend([4, -10, -12]) x.print_recent() y.print_recent() z.print_recent() print
def test(): x = Stream('x') y = Stream('y') z = sum_diffs_means_of_windows([x,y]) z.set_name('z') check(z, [1.0]) x.extend([3,5]) y.extend([2]) x.print_recent() y.print_recent() z.print_recent() print x.extend([11,15]) y.extend([4, -10, -12]) x.print_recent() y.print_recent() z.print_recent() print check_empty()
def test(): # Create stream x, and give it name 'x'. x = Stream('input_0') id_0_average, id_1_average = stream_split_by_sensor_id(x) # Give names to streams u, v, and w. This is helpful in reading output. id_0_average.set_name('average of id_0 sensors in x') id_1_average.set_name('average of id_1 sensors in x') check(id_0_average, [2.0, 3.0, 5.0, 4.0, 4.0]) check(id_1_average, [5.0, 3.0, 3.0, 4.0, 5.0, 6.0]) print print 'Adding ([(0,2), (0,4), (1,5), (1,1), (0,9)]' print 'to the input stream.' # Add values to the tail of stream x. x.extend([(0,2), (0,4), (1,5), (1,1), (0,9)]) # Print recent values of the streams print print 'recent values of input streams' x.print_recent() print print 'recent values of output streams' id_0_average.print_recent() id_1_average.print_recent() print print print 'Adding ([(1,3), (1,7), (0,1), (1,9), (1,11), (0,4)])' print 'to the input stream.' # Add values to the tail of stream x. x.extend([(1,3), (1,7), (0,1), (1,9), (1,11), (0,4)]) # Print recent values of the streams print 'recent values of input streams' print x.print_recent() print 'recent values of output streams' print id_0_average.print_recent() id_1_average.print_recent() check_empty()
def test(): # Create stream x, and give it name 'x'. x = Stream('input_0') # u is the stream returned by stream_sum([x,y]) and # v is the stream returned by stream_max([x,y]) # w is the stream returned by stream_mean([x,y]). # u[i] = sum(x[i],y[i]) # v[i] = max(x[i],y[i]) # w[i] = mean(x[i],y[i]) evens, odds = stream_even_odd(x) # Give names to streams u, v, and w. This is helpful in reading output. evens.set_name('even numbers in x') odds.set_name('odd numbers in x') check(evens, [8, 4, 6, 2]) check(odds, [3,5]) print print 'Adding [3, 5, 8], [1, 7, 2], [2, 3] to 3 input streams' # Add values to the tail of stream x. x.extend([3, 5, 8]) # Print recent values of the streams print 'recent values of input streams' x.print_recent() print 'recent values of output streams' evens.print_recent() odds.print_recent() print print 'Adding [4, 6, 2], [2, 3, 8], [5, 3, 0, -1] to 3 input streams' # Add more values to the tail of stream x. x.extend([4, 6, 2]) # Print recent values of the streams print 'recent values of input streams' x.print_recent() print 'recent values of output streams' evens.print_recent() odds.print_recent() check_empty()
def test(): in_1 = Stream(name='in_1') in_2 = Stream(name='in_2') out_1 = Stream(name='out_1') out_2 = Stream(name= 'out_2') out_1.print_recent() check(out_1, [13, 14, 15]) check(out_2, [3, 4, 5]) echo([in_1, in_2], [out_1, out_2]) in_1.extend([3, 4, 5]) in_2.extend([13, 14, 15]) out_1.print_recent() out_2.print_recent() check_empty()
def main(): b, a = butter_bandpass(lowcut=0.1, highcut=5.0, fs=50, order=5) y = np.zeros(len(a)-1) state = (b, a, y) filename = '20110111000000.2D.OBS34.SXZ.npy' data_array = np.load(filename) print 'len(data_array)', len(data_array) data_array = data_array[:1000000] x = Stream('x') print 'a', a print 'b', b y = stream_func(x, f_type='window', f=linear_filter, num_outputs=1, state=state, window_size=len(b), step_size=1) x.extend(data_array) y.set_name('y') plt.plot(y.recent[5000:y.stop]) plt.show() plt.close()
def test(): in_1 = Stream(name='in_1') out_1 = Stream(name='out_1') out_2 = Stream(name= 'out_2') check(out_1, [4, 8]) check(out_2, [3, 5]) stream_agent( inputs=in_1, outputs=[out_1, out_2], f_type='element', f=number_even_odd) in_1.extend([3, 4, 5, 8]) out_1.print_recent() out_2.print_recent() check_empty()
def test(): # Create stream x and give it names 'x'. x = Stream('input') # v is the stream returned by stream_cumulative(x) and # w is the stream returned by stream_cumulative(v). v = stream_cumulative(x) w = stream_cumulative(v) # avg is the stream returned by stream_average(x) avg = stream_average(x) # Give names to streams. This is helpful in reading output. v.set_name('cumulative sum of input') w.set_name('cumulative sum of cumulative sum of input') avg.set_name('average of input') check(v, [3, 8, 18, 20, 25, 36]) check(w, [3, 11, 29, 49, 74, 110]) check(avg, [3.0, 4.0, 6.0, 5.0, 5.0, 6.0]) print print 'add values [3, 5, 10] to the tail of the input stream.' # Add values to the tail of stream x. x.extend([3, 5, 10]) # Print the N most recent values of streams x, v, w. x.print_recent() v.print_recent() w.print_recent() avg.print_recent() print print 'add values [2, 5, 11] to the tail of the input stream.' # Add more values to the tail of stream x. x.extend([2, 5, 11]) # Print the N most recent values of streams x, v, w. x.print_recent() v.print_recent() w.print_recent() avg.print_recent() check_empty()
def main(): # EXAMPLE FUNCTIONS ON WINDOWS # Functions have a single input: a list # which is the list of values in a window. # Functions return a scalar value, _no_value # or a list, _multivalue(). min_window_size = 3 max_window_size = 7 step_size = 2 input_stream = Stream('in') output_stream = Stream('out') current_window_size = 0 steady_state = False reset = False state = [current_window_size, steady_state, reset] def f(lst, state): if sum(lst) > 100: # state[2] is set to True to reset the window state[2] = True return (lst, state) dynamic_window_agent( f, input_stream, output_stream, state, min_window_size, max_window_size, step_size) ## input_stream.extend(range(5)) for i in range(0, 10): input_stream.extend([i]) input_stream.print_recent() output_stream.print_recent() print "\n" for i in range(2,4,1): input_stream.extend(range(10*i,10*i+5,1)) print input_stream.print_recent() print output_stream.print_recent() print
def test(): # Create stream x, and give it name 'x'. x = Stream('input_0') y = Stream('input_1') inrange_stream, outlier_stream = inrange_and_outlier_streams([x,y]) # Give names to streams u, v, and w. This is helpful in reading output. inrange_stream.set_name('inrange') outlier_stream.set_name('outlier') check(inrange_stream, [(10, 10), (11, 11), (12, 12), (13, 13), (14, 14), (18, 180), (19, 190)]) check(outlier_stream, [(15, 150), (16, 160), (17, 170)]) print # Add values to the tail of stream x. x.extend(range(10, 15, 1)) y.extend(range(10, 15, 1)) # Print recent values of the streams print 'recent values of input streams' x.print_recent() y.print_recent() print 'recent values of output streams' inrange_stream.print_recent() outlier_stream.print_recent() print print 'Adding [15, 16, ...19], [150, 160,..190] to 2 streams.' # Add more values to the tail of stream x. x_list = range(15, 20, 1) y_list = [10*v for v in x_list] x.extend(x_list) y.extend(y_list) # Print recent values of the streams print 'recent values of input streams' x.print_recent() y.print_recent() print 'recent values of output streams' inrange_stream.print_recent() outlier_stream.print_recent() print print 'The regression parameters take some time to adjust' print 'to the new slope. Initially x = y, then x = 10*y' check_empty()
def main(): def single_stream_of_random_numbers(trigger_stream): def ran(): return random.random() return stream_func( inputs=None, f_type='element', f=ran, num_outputs=1, call_streams=[trigger_stream]) def stream_of_normal_and_pareto(clock_stream, b): from scipy.stats import norm, pareto def normal_and_pareto(): return [norm.rvs(size=1)[0], pareto.rvs(b, size=1)[0]] return stream_func( inputs=None, f_type='element', f=normal_and_pareto, num_outputs=2, call_streams=[clock_stream] ) trigger = Stream('trigger') r = single_stream_of_random_numbers( trigger_stream=trigger) u, v = stream_of_normal_and_pareto( clock_stream=trigger, b=2.62171653214) r.set_name('random') u.set_name('normal') v.set_name('pareto') trigger.extend(['tick', 'tick']) trigger.print_recent() r.print_recent() u.print_recent() v.print_recent()
def main(): x = Stream('x') fs=250 b, a = butter_bandpass( lowcut=4, highcut=10, fs=fs, order=5) # Create sine wave with frequency of 8 num_cycles = 4 hertz_1 = 8 hertz_2 = 16 time = 0.25 wavelength = 0.1 t = np.linspace(0, 20, 5000) z_1 = np.sin(2*np.pi*hertz_1*t) z_2 = 0.5*np.sin(2*np.pi*hertz_2*t) z_3 = z_1+z_2 #print 't_1', t_1 #print 'z', z #x.extend(z) plt.plot(z_1[4000:]) #plt.title('input') plt.show() plt.close() plt.plot(z_2[4000:]) #plt.title('input') plt.show() plt.close() plt.plot(z_3[4000:]) #plt.title('input') plt.show() plt.close() y = filter(b, a, input_stream=x) x.extend(z_3) plt.plot(y.recent[4000:y.stop]) plt.title('output') plt.show() plt.close()
def test(): # Create stream x, and give it name 'input'. x = Stream('input') y = g(x) y.set_name('output') check(y, [3, 1, 2, 5, 2, 4, 7]) # Add values 3, 2, 5 to the tail of stream x. x.extend([3, 2, 5]) x.print_recent() y.print_recent() # Add values 4, 7 to the tail of stream x. x.extend([4, 7]) x.print_recent() y.print_recent() check_empty()
def main(): print "example_1" print x = Stream('x') y = stream_func(x, f_type='window', f=ksigma, num_outputs=1, window_size=WINDOW_SIZE, step_size=STEP_SIZE) y.set_name('y') x.extend(range(20)) x.print_recent() y.print_recent() print x.extend(range(20, 0, -1)) x.print_recent() y.print_recent() print
def main(): def max_and_min(lst): return (max(lst), min(lst)) x = Stream('x') y,z = stream_func(x, f_type='window', f=max_and_min, num_outputs=2, window_size=2, step_size=2) y.set_name('y') z.set_name('z') x.extend([3,5]) x.print_recent() y.print_recent() z.print_recent() print x.extend([11,15]) x.print_recent() y.print_recent() z.print_recent() print
def main(): # EXAMPLE FUNCTIONS ON WINDOWS # Functions have a single input: a list # which is the list of values in a window. # Functions return a scalar value, _no_value # or a list, _multivalue(). min_window_size = 2 max_window_size = 11 step_size = 2 input_stream = Stream('in') # output_stream = Stream('out') current_window_size = 0 steady_state = False reset = False state = [current_window_size, steady_state, reset] def f(lst, state): print lst return (lst, state) f_stream = partial(dynamic_window_func, f = f, state = state, min_window_size = min_window_size, max_window_size = max_window_size, step_size = step_size) output_stream = f_stream(inputs = input_stream) # output_stream = dynamic_window_func( # f, input_stream, state, # min_window_size, max_window_size, step_size) for i in range(1, 15): print "Adding ", i input_stream.extend([i]) if i == 10: state[2] = True # output_stream.print_recent() print "\n"
def main(): lst = [ (0, [3, 4, 1]), (1, [4, 3, 2]), (2, [2, 2, 3]), (3, [1, 2, 1]), (4, [0, 0, 2]), (5, [1, 0, 1]), (6, [0, 1, 1]), (7, [2, 0, 1]), (8, [1.5, 2, 1]), (9, [2, 1.5, 3]), (10, [3, 4, 1]), (11, [4, 3, 0]), ] x = Stream("x") h, v = pick_h_and_v_in_stream(x) h_quenched = quench_stream(h) v_quenched = quench_stream(v) h.set_name("h") v.set_name("v") h_quenched.set_name("h quenched") v_quenched.set_name("v quenched") x.extend(lst) x.print_recent() print h.print_recent() print v.print_recent() print h_quenched.print_recent() print v_quenched.print_recent()
def main(): def single_stream_of_random_numbers(trigger_stream, out_stream): def ran(): return random.random() return stream_agent( inputs=None, f_type='element', f=ran, outputs=out_stream, call_streams=[trigger_stream]) def stream_of_normal_and_pareto(clock_stream, b): from scipy.stats import norm, pareto def normal_and_pareto(): return [norm.rvs(size=1)[0], pareto.rvs(b, size=1)[0]] return stream_func( inputs=None, f_type='element', f=normal_and_pareto, num_outputs=2, call_streams=[clock_stream] ) trigger = Stream('trigger') r = Stream('output') single_stream_of_random_numbers( trigger, r) trigger.extend(['tick', 'tick']) trigger.print_recent() r.print_recent()
def test(): x = Stream("input_0") y = Stream("input_1") inrange_stream, outlier_stream = inrange_and_outlier_streams(x_and_y_streams=[x, y], a=1, b=0, delta=3) inrange_stream.set_name("inrange") outlier_stream.set_name("outlier") check(inrange_stream, [((3, 4), 0.0), ((8, 8), 1.0 / 3.0), ((12, 12), 0.4)]) check(outlier_stream, [((5, 9), 0.5), ((10, 15), 0.5), ((21, 11), 0.5)]) print # Add values to the tail of stream x. x.extend([3, 5, 8, 10]) y.extend([4, 9, 8, 15]) # Print recent values of the streams print "recent values of input streams" x.print_recent() y.print_recent() print "recent values of output streams" inrange_stream.print_recent() outlier_stream.print_recent() print # Add more values to the tail of stream x. x.extend([12, 21, 13]) y.extend([12, 11]) # Print recent values of the streams print "recent values of input streams" x.print_recent() y.print_recent() print "recent values of output streams" inrange_stream.print_recent() outlier_stream.print_recent() check_empty()
def main(): def diff_of_means(list_of_two_lists): a, b = list_of_two_lists return np.mean(a) - np.mean(b) x = Stream('x') y = Stream('y') z = stream_func([x,y], f_type='window', f=diff_of_means, num_outputs=1, window_size=2, step_size=2) z.set_name('z') x.extend([3,5]) y.extend([2]) x.print_recent() y.print_recent() z.print_recent() print x.extend([11,15]) y.extend([4, -10, -12]) x.print_recent() y.print_recent() z.print_recent() print
class Stream_Learn: """ Stream framework for machine learning. This class supports machine learning for streaming data using PSTREAMS. Given data for training and predicting along with functions to learn and predict, this class will output a stream of predictions. Both batch and continual learning is supported. Parameters ---------- data_train : `Stream` or numpy.ndarray or other A object containing data to be trained on. In the case of `Stream`, the object contains tuples of values where each tuple represents a row of data. Each tuple must have at least `num_features` values. The object can also contain non-tuples provided `filter_func` is used to extract the tuples in correct format. In the case of a `numpy` array, the array must have at least `num_features` columns. Any additional values / columns correspond to the output y data. If this is not a `Stream` or `numpy` array, the data will not be split into x and y. data_out : `Stream` A `Stream` object containing data to generate predictions on. The `Stream` contains tuples of values where each tuple represents a row of data and must have at least `num_features` values. train_func : function A function that trains a model. This function takes parameters x and y data, a model object, and a window_state tuple, and returns a trained model object. In the case of `data_train` as a `Stream`, this function has the signature (numpy.ndarray numpy.ndarray Object) -> (Object). The first parameter x will have dimensions i x `num_features`, where `min_window_size` <= i <= `max_window_size`. The second parameter y will have dimensions i x num_outputs, where num_outputs refers to the number of y outputs for an input. For example, num_outputs is 1 for 1 scalar output. For unsupervised learning, num_outputs is 0. In the case of `data_train` as a `numpy` array, this function has the signature (numpy.ndarray numpy.ndarray Object) -> (Object). The first parameter x will have dimensions N x `num_features`, where N refers to the total number of training examples. The second parameter y will have dimensions N x num_outputs where num_outputs is defined as before. If `data_train` is none of the above, the function has the signature (Object None Object) -> (Object). The first parameter is `data_train`. The third parameter is a model defined by this function. The fourth parameter is a window_state tuple with the values (current_window_size, steady_state, reset, `step_size`, `max_window_size`), where current_window_size describes the number of points in the window, steady_state is a boolean that describes whether the window has reached `max_window_size`, and reset is a boolean that can be set to True to reset the window. predict_func : function A function that takes as input 2 tuples corresponding to 1 row of data and a model and returns the prediction output. This function has the signature (tuple tuple Object) -> (Object). The first tuple x has `num_features` values and the second tuple y has num_outputs values, where num_outputs refers to the number of y outputs for an input. In the case of unsupervised learning, y is empty. min_window_size : int An int specifying the minimum size of the window to train on for continual learning. This will be ignored for batch learning. max_window_size : int An int specifying the maximum size of the window to train on for continual learning. This will be ignored for batch learning. step_size : int An int specifying the number of tuples to move the window by for continual learning. This will be ignored for batch learning. num_features : int An int that describes the number of features in the data. filter_func : function, optional A function that filters data for training. This function takes parameters x and y data and a model object, and returns a tuple with signature (boolean, tuple). The first value in the output describes if the data is to be trained on (True) or if it is an outlier (False). The second value is the tuple of data in correct format as described for `data_train`. If `data_train` is a `Stream` that contains tuples, this function has the signature (tuple tuple Object) -> (tuple). The first tuple x has `num_features` values and the second tuple y has num_outputs values, where num_outputs refers to the number of y outputs for an input. The third parameter is a model defined by `train_func`. If `data_train` is a `Stream` that does not contain tuples, this function has the signature (Object None Object) -> (tuple), where the first parameter has the same type as the values in `data_train`. all_func : function, optional A function that processes the data for usage such as visualization. This function takes parameters x and y data, a model object, a state object, and a window_state tuple and returns an updated state object. This function has the signature (np.ndarray np.ndarray Object Object tuple) -> (Object). The first numpy array x has dimensions i x `num_features`, where `min_window_size` <= i <= `max_window_size`. The second numpy array y has dimensions i x num_outputs, where num_outputs refers to the number of y outputs for an input. The third parameter is the model object defined by `train_func`. The fourth parameter is a state object defined by this function. The fifth parameter is a window_state tuple with values as defined in description for `train_func`. """ def __init__(self, data_train, data_out, train_func, predict_func, min_window_size, max_window_size, step_size, num_features, filter_func=None, all_func=None): self.data_train = data_train self.data_out = data_out self.train_func = train_func self.predict_func = predict_func self.min_window_size = min_window_size self.max_window_size = max_window_size self.step_size = step_size self.num_features = num_features self.filter_func = filter_func self.all_func = all_func self.window_state = [ 0, False, False, self.step_size, self.max_window_size ] def _initialize(self): self.trained = False self.model = None self.x_train = Stream('x_train') self.state = None def _filter_f(self, n): # If filter_func is provided and the model has been trained if self.trained and self.filter_func is not None: if not isinstance(n, tuple): [train_data, data] = self.filter_func(n, None, self.model) else: x = n[0:self.num_features] y = n[self.num_features:] [train_data, data] = self.filter_func(x, y, self.model) if train_data: self.x_train.extend([data]) # filter_func is None or the model is not trained else: self.x_train.extend([n]) def _train(self, lst, state): data = np.array(lst) x = data[:, 0:self.num_features] y = data[:, self.num_features:] self.model = self.train_func(x, y, self.model, state) self.trained = True if state[1] and state[2]: self.model = None self.trained = False return (_no_value, state) def _predict(self, n): if self.trained: if not isinstance(n, tuple): return self.predict_func(n, None, self.model) x = n[0:self.num_features] y = n[self.num_features:] return self.predict_func(x, y, self.model) return _no_value def _all_f(self, lst, state): data = np.array(lst) x = data[:, 0:self.num_features] y = data[:, self.num_features:] self.state = self.all_func(x, y, self.model, self.state, state) return (_no_value, state) def _init_streams(self): self.stream_filter = partial(stream_func, f_type='element', f=self._filter_f, num_outputs=0) self.stream_train = partial(dynamic_window_func, f=self._train, min_window_size=self.min_window_size, max_window_size=self.max_window_size, step_size=self.step_size, state=self.window_state) self.stream_predict = partial(stream_func, f_type='element', f=self._predict, num_outputs=1) self.stream_all = partial(dynamic_window_func, f=self._all_f, min_window_size=self.min_window_size, max_window_size=self.max_window_size, step_size=self.step_size, state=[0, False, False]) def run(self): """ Runs the framework and returns a `Stream` of outputs. Returns ------- y_predict : `Stream` A `Stream` containing outputs as returned by `predict_func`. """ self._initialize() self._init_streams() self.model_stream = Stream('model') self.all_stream = Stream('all') # Continual learning if isinstance(self.data_train, Stream): self.stream_filter(self.data_train) self.stream_train(inputs=self.x_train) if self.all_func is not None: self.stream_all(inputs=self.data_train) # Batch learning with numpy array elif isinstance(self.data_train, np.ndarray): x = self.data_train[:, 0:self.num_features] y = self.data_train[:, self.num_features:] self.model = self.train_func(x, y, None, None) self.trained = True # Batch learning else: self.model = self.train_func(self.data_train, None, None, None) self.trained = True y_predict = self.stream_predict(self.data_out) return y_predict def reset(self): """ Resets the training window to `min_window_size`. This function resets the training window to `min_window_size`. After resetting, the window has the last `min_window_size` points in the `Stream` `x_train`. For example, if `max_window_size` is 100, `min_window_size` is 2, and the window contains points [1, 100], after resetting the window contains points [98, 99]. Notes ----- If reset() is called before the window has reached `max_window_size`, the window will continue increasing in size until it reaches `max_window_size`. Then, the window will reset to `min_window_size`. """ self.window_state[2] = True
def generate_stream_of_random_integers(stream_length = 10, max_integer = 100): output_stream = Stream() random_list = [randint(0, max_integer) for _ in range(stream_length)] output_stream.extend(random_list) return output_stream
x = Stream('x') y = Stream('y') z = Stream('z') print_stream(x) print_stream(y) print_stream(z) def h(window, window_size, step_size, threshold, min_window_size): mx = max(window) mn = min(window) dif = mx - mn if dif < threshold: window_size += 1 step_size += 1 elif window_size > min_window_size: window_size -= 1 step_size -= 1 # print to help understand the program print 'dif = ', dif, 'window_size = ', window_size return ([mx, mn, dif], window_size, step_size) initial_window_size = 6 initial_step_size = 6 awf(w, [x,y,z], h, initial_window_size, initial_step_size, threshold=15, min_window_size=1) w.extend([randint(0, 20) for _ in range(60)])
def main(): # Functions: list -> element def mean(list_of_numbers): return sum(list_of_numbers)/float(len(list_of_numbers)) def average_of_running_means(list_of_numbers, state): """ See example_element_single_in_single_out_stateful.py """ current_value = mean(list_of_numbers) n, cum = state n += 1 cum += current_value state = (n, cum) return (cum/float(n), state) # Functions: stream -> stream. # The n-th element of the output stream is f() applied to the n-th # elements of each of the input streams. # Function mean is defined above, and functions sum and max are the # standard Python functions. ## stream_sum = partial(stream_func, f_type='element', f=sum, num_outputs=1) ## stream_max = partial(stream_func, f_type='element', f=max, num_outputs=1) stream_running_mean = partial(stream_func, f_type='element', f=average_of_running_means, num_outputs=1, state = (0,0.0)) stream_mean = partial(stream_func, f_type='element', f=mean, num_outputs=1) # Create stream x, and give it name 'x'. x = Stream('input_0') y = Stream('input_1') z = Stream('input_2') # u is the stream returned by stream_sum([x,y]) and # v is the stream returned by stream_max([x,y]) # w is the stream returned by stream_mean([x,y]). # u[i] = sum(x[i],y[i]) # v[i] = max(x[i],y[i]) # w[i] = mean(x[i],y[i]) u = stream_running_mean([x,y,z]) ## v = stream_max([x,y,z]) w = stream_mean([x,y,z]) # Give names to streams u, v, and w. This is helpful in reading output. u.set_name('running mean of inputs') ## v.set_name('max of inputs') w.set_name('mean of inputs') print print 'Adding [3, 5, 8], [1, 7, 2], [2, 3] to 3 input streams' # Add values to the tail of stream x. x.extend([3, 5, 8]) y.extend([1, 7, 2]) z.extend([2, 3]) print # Print recent values of the streams print 'recent values of input streams' x.print_recent() y.print_recent() z.print_recent() print print 'recent values of output streams' print 'stateless stream function:' w.print_recent() print 'stateful stream function:' u.print_recent() print print print 'Adding [4, 6, 2], [2, 3, 8], [5, 3, 0, -1] to 3 input streams' # Add more values to the tail of stream x. x.extend([4, 6, 2]) y.extend([2, 3, 8]) z.extend([5, 3, 0, -1]) # Print recent values of the streams print 'recent values of input streams' x.print_recent() y.print_recent() z.print_recent() print print 'recent values of output streams' print 'stateless stream function:' w.print_recent() print 'stateful stream function:' u.print_recent()
f=add_list, f_args=(100,)) agent_5 = stream_agent( inputs=in_stream, outputs=out_stream_5, f_type='list', f=add_list_with_state, f_args=(100,), state=200) agent_6 = stream_agent( inputs=in_stream, outputs=out_stream_6, f_type='element', f=multiply_elements, f_args=(2,)) in_stream.extend(range(3)) out_stream_1.print_recent() out_stream_2.print_recent() out_stream_3.print_recent() out_stream_4.print_recent() out_stream_5.print_recent() out_stream_6.print_recent()
if state is None: state = Geomap.Geomap(llcrnrlat=20, llcrnrlon=-126, urcrnrlat=60, urcrnrlon=-65) state.clear() state.plot(x, kmeans.findClosestCentroids(x, model.centroids), s=70) # state.plot(model.centroids, color = 'Red', s = 50) return state x = Stream('x') m = KMeans.KMeansStream(draw=False, output=False, k=5) model = Stream_Learn(x, x, m.train, m.predict, 5, 30, 1, 2, all_func=all_func) y = model.run() r = requests.get('http://stream.meetup.com/2/rsvps', stream=True) i = 0 for line in r.iter_lines(): if line: data = json.loads(line) lat, lon = data['group']['group_lat'], data['group']['group_lon'] if data['group']['group_country'] == 'us': x.extend([(lat, lon)]) print i i += 1
m = LinearRegression.LinearRegressionStream(draw=False, output=output, alpha=0.001) x = Stream('x') linear_regression.init_plot() model = Stream_Learn(data_train=x, data_out=x, train_func=m.train, predict_func=m.predict, min_window_size=min_window_size, max_window_size=max_window_size, step_size=step_size, num_features=num_features, all_func=all_func) y = model.run() stream_func(inputs=y, f=print_stream, f_type='element', num_outputs=0) while i < num_points: w[1] += 0.01 x_value = np.ones((1, num_features)) * i x_b = np.hstack((np.ones((1, 1)), x_value)).transpose() y_value = w.transpose().dot(x_b)[0][0] values = x_value.tolist()[0] values.append(y_value) x.extend([tuple(values)]) if i % 100 == 0 and i != 0: model.reset() print i i += 1 print "Average error: ", m.avg_error
mean_of_x_a = Stream('Mean of x for agent') stream_agent( inputs=x, outputs=mean_of_x_a, f_type='window', f=np.mean, window_size=window_size, step_size=step_size) print_stream(mean_of_x_a) # Drive the example. # Add values to stream x. x.extend([random.random() for _ in range(N)]) #_____________________________________________________ # EXAMPLE 2. STANDARD DEVIATION OF SLIDING WINDOW #_____________________________________________________ # SPECIFICATION: # Write a function that has a single input stream and # that returns a stream whose elements are the standard # deviations of sliding windows of its input stream. # HOW TO DEVELOP THE STREAMING PROGRAM. # First step: # Write a function that has a parameter that # is a list or array and that returns the standard
if __name__ == "__main__": i = 0 x = np.zeros((100, 2)) for i in range(0, 100): x[i, 0] = i x[i, 1] = 2 * i predict_stream = Stream('predict') model = Stream_Learn(data_train=x, data_out=predict_stream, train_func=train_function, predict_func=predict_function, min_window_size=2, max_window_size=2, step_size=1, num_features=num_features) y = model.run() stream_func(inputs=y, f=print_stream, f_type='element', num_outputs=0) while i < num_points: x_value = np.random.rand(1, num_features) * 2 - 1 x_b = np.hstack((np.ones((1, 1)), x_value)).transpose() values = x_value.tolist()[0] predict_stream.extend([tuple(values)]) i += 1
linear_regression.init_plot() model = Stream_Learn(data_train=x, data_out=x, train_func=m.train, predict_func=m.predict, min_window_size=min_window_size, max_window_size=max_window_size, step_size=step_size, num_features=num_features, all_func=all_func) y = model.run() stream_func(inputs=y, f=print_stream, f_type='element', num_outputs=0) while i < num_points: w[1] += 0.01 x_value = np.ones((1, num_features)) * i x_b = np.hstack((np.ones((1, 1)), x_value)).transpose() y_value = w.transpose().dot(x_b)[0][0] values = x_value.tolist()[0] values.append(y_value) x.extend([tuple(values)]) if i % 100 == 0 and i != 0: model.reset() print i i += 1 print "Average error: ", m.avg_error
k = 5 max_window_size = 1000 num_points = 15000 step_size = 1 if __name__ == "__main__": i = 0 centroids = kmeans.initialize(num_centroids, -5, 5) x = Stream('x') m = KMeans.KMeansStream(draw=draw, output=output, k=k) model = Stream_Learn(data_train=x, data_out=x, train_func=m.train, predict_func=m.predict, min_window_size=k, max_window_size=max_window_size, step_size=step_size, num_features=2) y = model.run() while i < num_points: index = np.random.randint(0, num_centroids) z = np.random.rand(1, 2) * 2 - 1 centroids[index] = centroids[index].reshape(1, 2) + z * 2 x.extend([tuple(kmeans.initializeDataCenter(centroids[index], 1, 1).tolist()[0])]) print i i += 1 print "Average number of iterations: ", m.avg_iterations print "Average error: ", m.avg_error