class DataProcessor(object): def __init__(self, queue, headers, targets): self.log = Logger(DATA_PROC_LOG_FILE, level = V_DEBUG) # Main thread communication self.keep_running = True self.transmit = queue self.headers = headers self.targets = targets # print data self.printing = False self.base_data = None self.fig = () self.ax = () # store data self.local_store = False self.files = {} # Launching Thread self.thr = threading.Thread(target = self.process, args = (), name = 'process_thread') self.start() ### ### Process Thread ### def start(self): self.log.info('[MAIN THREAD] Starting process thread') self.thr.start() self.log.debug('[MAIN THREAD] Process thread started') def stop(self): self.keep_running = False self.log.info('[MAIN THREAD] Asked processing thread end') def process(self): while self.keep_running: self.log.debug('[PROCESS THREAD] Getting data') try: data = self.transmit.get(timeout = 1) data = json.loads(data) self.log.debug('[PROCESS THREAD] Got data {}'.format(data)) if self.printing: to_print = self.build_print_data(data) self.log.debug('[PROCESS THREAD] Printing') multi_print_dic(self.base_data, self.print_data) self.log.debug('[PROCESS THREAD] Printed') if self.local_store: # self.build_store_data? self.process_store(data) #### To write: self.process_local except Empty: self.log.debug('[PROCESS THREAD] No data') self.log.info('[PROCESS THREAD] End of thread') ### ### Print utilities ### def start_print(self): self.log.info('[MAIN THREAD] Start printing') self.build_print_headers() self.log.debug('[MAIN THREAD] Built headers') self.print_data = multi_init_print(self.base_data) self.log.debug('[MAIN THREAD] Graphics initiated') self.printing = True def stop_print(self): self.log.info('[MAIN THREAD] Stop printing') self.printing = False def build_print_headers(self): ret = {} for types in self.targets: for instance in self.targets[types]: ret[instance]={} for data_field in self.headers[types]: ret[instance][data_field] = [] self.base_data = ret self.log.debug('[DATA THREAD] Header: {}'.format(self.base_data)) def build_print_data(self, dico): for target in dico: for data_field in dico[target]: # Easy to handle data sequence length here self.base_data[target][data_field].append(dico[target][data_field]) #### #### Storage utilities #### def process_store(self, dico): for target in self.files: try: if target == 'system': res = [dico[target][data_field] for data_field in self.headers['system']] else: res = [dico[target][data_field] for data_field in self.headers['process']] except AttributeError: res = range(len(dico)) print >> self.files[target], list_to_csv(res) self.log.debug('[PROCESS THREAD] Stored {}'.format(list_to_csv(res))) def start_store(self, dirname = None): # Make record dir if not dirname: dirname = time.time() directory = os.path.join(LOCAL_DATA_DIR, dirname) self.log.info('[MAIN THREAD] Starting local storage in {}'.format(directory)) if os.path.isdir(directory): shutil.rmtree(directory) os.makedirs(directory) self.log.debug('[MAIN THREAD] Made local record dir') # Open files for types in self.targets: for instance in self.targets[types]: filename = os.path.join(directory, instance) self.files[instance] = open(filename, 'w') self.log.debug('[MAIN THREAD] Opened {}'.format(filename)) # Write headers for key in self.files: if key == 'system': print >> self.files[key], list_to_csv(self.headers['system']) self.log.debug('[MAIN THREAD] wrote {} in file {}'.format(list_to_csv(self.headers['system']), key)) else: print >> self.files[key], list_to_csv(self.headers['process']) self.log.debug('[MAIN THREAD] wrote {} in file {}'.format(list_to_csv(self.headers['process']), key)) # Ask start storing self.local_store = True self.log.debug('[MAIN THREAD] End start local') return [os.path.join(directory, instance) for instance in self.files] def stop_store(self): self.log.info('[MAIN THREAD] Stopping storage') self.local_store = False for key in self.files: self.files[key].close() self.log.debug('closed {}'.format(key))
class RemoteClient(object): def __init__(self, ip, transmit): if not os.path.isdir(DATA_DIR): os.makedirs(DATA_DIR) # Logging self.log = Logger(CLIENT_LOG_FILE, D_VERB) self.log.info('[MAIN THREAD] Instantiated client') # Central data self.receiving = False self.training = False self.paused = False self.define_headers() self.targets = {} # Workers self.data_client = DataClient(transmit, ip) # Connection self.connect(ip) def connect(self, ip): self.soc_ctrl = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.soc_ctrl.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) my_ip = socket.gethostbyname('') self.log.debug('[MAIN THREAD] connecting...') self.soc_ctrl.connect((ip,SOC_PORT_CTRL)) self.log.info('[MAIN THREAD] Client connected to server') def define_headers(self): head = {} self.headers = head def add_target(self, target, name): if target in self.targets: self.targets[target].append(name) else: self.targets[target]=[name] def remove_target(self, target, name): if target in self.targets: if name in self.targets[target]: self.targets[target].remove(name) self.log.info('[MAIN THREAD] Removed {} named {}'.format(target, name)) else: self.log.error('[MAIN THREAD] Asked to remove {} named {} while not recorded'.format(target, name)) else: self.log.error('[MAIN THREAD] Asked to remove {} named {} while not recorded'.format(target, name)) def start_record(self, target, name): self.log.debug('[MAIN THREAD] Asking server to start recording') msg = MSG_SEP.join([START_RECORD, target, name]) answer = send_data(self.soc_ctrl,msg) self.log.info('[MAIN THREAD] Server asked to start recording') if answer == SYNC: self.add_target(target, name) self.log.info('[MAIN THREAD] Added {} named {}'.format(target, name)) else: self.log.warn('[MAIN THREAD] Could not add {} named {} because of server answer'.format(target, name)) def stop_record(self, target, name): self.log.debug('[MAIN THREAD] Asking server to stop recording') msg = MSG_SEP.join([STOP_RECORD, target, name]) answer = send_data(self.soc_ctrl,msg) self.log.info('[MAIN THREAD] Server asked to stop recording') if answer == SYNC: self.remove_target(target, name) else: self.log.warn('[MAIN THREAD] Could not remove {} named {} because of server answer'.format(target, name)) def start_receive(self): if not self.receiving: self.receiving = True self.log.debug('[MAIN THREAD] Asking server to start sending') status = send_data(self.soc_ctrl,START_SEND) self.log.info('[MAIN THREAD] Server asked to start sending') if status == FAIL: self.log.error('[MAIN THREAD] Client tried to receive but server denied it') else: self.data_client.start() self.log.info('[MAIN THREAD] Client is receiving') self.log.debug("[MAIN THREAD] DATA THREAD started") else: self.log.warn("[MAIN THREAD] Asked to start receiving while already receiving") def start_training(self): if not self.training: self.training = True self.log.debug('[MAIN THREAD] Asking server to start training') status = send_data(self.soc_ctrl,START_TRAIN) self.log.info('[MAIN THREAD] Server asked to start training') if status == FAIL: self.log.error('[MAIN THREAD] Server refused to start training') else: self.log.info('[MAIN THREAD] Server is training') else: self.log.warn("[MAIN THREAD] Asked to start training while already training") def stop_training(self): if self.training: self.training = False self.log.debug('[MAIN THREAD] Asking server to stop training') status = send_data(self.soc_ctrl,STOP_TRAIN) self.log.info('[MAIN THREAD] Server asked to stop training') if status == FAIL: self.log.error('[MAIN THREAD] Server refused to stop training') else: self.log.info('[MAIN THREAD] Server has stopped training') else: self.log.warn("[MAIN THREAD] Asked to stop training while not training") def pause_training(self): if not self.paused: self.paused = True self.log.debug('[MAIN THREAD] Asking server to pause training') status = send_data(self.soc_ctrl,PAUSE_TRAIN) self.log.info('[MAIN THREAD] Server asked to pause training') if status == FAIL: self.log.error('[MAIN THREAD] Server refused to pause training') else: self.log.info('[MAIN THREAD] Server is paused') else: self.log.warn("[MAIN THREAD] Asked to paused training while already paused") def resume_training(self): if self.paused: self.paused = False self.log.debug('[MAIN THREAD] Asking server to resume training') status = send_data(self.soc_ctrl,RESUME_TRAIN) self.log.info('[MAIN THREAD] Server asked to resume training') if status == FAIL: self.log.error('[MAIN THREAD] Server refused to resume training') else: self.log.info('[MAIN THREAD] Server has resumed training') else: self.log.warn("[MAIN THREAD] Asked to resume training while not paused") def stop_receive(self): if self.receiving: self.log.debug('[MAIN THREAD] Closing data channel. Exiting data client thread') self.data_client.stop() self.log.info("[MAIN THREAD] Asked server to stop receiving") self.receiving = False else: self.log.warn("[MAIN THREAD] Asked to stop receiving while already receiving") #def start_store(self, dirname = 'easy_client'): # return self.data_processor.start_store(dirname) #def stop_store(self): # self.data_processor.stop_store() #def start_print(self): # self.data_processor.start_print() #def stop_print(self): # self.printing = self.data_processor.stop_print() def stop_process(self): self.stop_print() self.stop_store() #self.data_processor.stop() self.stop_receive() self.soc_ctrl.close() def stop_all(self): self.stop_process() send_data(self.soc_ctrl, STOP_ALL)
class NLL_Trainer(object): """ DOC """ def __init__( self, transmit, model, train_set, valid_set, test_set, batch_size = 600, learning_rate = 0.13, cost = None, test_func = None, regularization_factor = 0 ): """ DOC """ self.log = Logger(TRAINER_LOG_FILE, level=V_DEBUG) self.transmit = transmit self.is_recording = False self.is_paused = False self.is_running = False self.labels = T.ivector('y') # labels, presented as 1D vector of [int] labels self.index = T.lscalar() # index to a minibatch self.train_set = train_set self.valid_set = valid_set self.test_set = test_set self.batch_size = theano.shared(0) self.reg = theano.shared(regularization_factor) self.set_batch_size(batch_size) self.validation_frequency = self.n_train_batches self.record_frequency = self.n_train_batches self.improvement_step_threshold = 0.0001 self.n_epochs = 1000 self.learning_rate = theano.shared(learning_rate) if not test_func: test_func = self._errors self.model = model self.nll = -T.mean(T.log(self.model.output)[T.arange(self.labels.shape[0]), self.labels]) if not cost: cost = self.nll else: cost = cost + self.reg #* racine(somme(Poids au carrés)) g_params = [T.grad(cost=cost, wrt=param) for param in self.model.params] updates = [(param, param - self.learning_rate * g_param) for param, g_param in zip(self.model.params, g_params)] self.train_model = theano.function( inputs=[self.index], outputs=[cost] + g_params, #outputs = cost, updates=updates, givens={ model.input: self.train_set[0][self.index * self.batch_size: (self.index + 1) * self.batch_size], self.labels: self.train_set[1][self.index * self.batch_size: (self.index + 1) * self.batch_size] } ) self.test_model = theano.function( inputs=[self.index], outputs=test_func(), givens={ model.input: self.test_set[0][self.index * self.batch_size: (self.index + 1) * self.batch_size], self.labels: self.test_set[1][self.index * self.batch_size: (self.index + 1) * self.batch_size] } ) self.validate_model_2 = theano.function( inputs=[self.index], outputs=test_func(), givens={ model.input: self.valid_set[0][self.index * self.batch_size: (self.index + 1) * self.batch_size], self.labels: self.valid_set[1][self.index * self.batch_size: (self.index + 1) * self.batch_size] } ) self.validate_model_1 = theano.function( inputs=[], outputs=test_func(), givens={ model.input: self.valid_set[0], self.labels: self.valid_set[1] } ) ### ### Non client-accessible functions ### def _errors(self): ''' Ratio of errors in the prediction ''' assert self.labels.ndim == self.model.pred.ndim return T.mean(T.neq(self.model.pred, self.labels)) def _training_process(self): """ Training loop """ timer = Timer() minibatch_avg_cost = 0 best_validation_loss = numpy.inf test_score = 0. # time ne prends pas en compte les pauses/resume start_time = timeit.default_timer() epoch = 0 while (epoch < self.n_epochs) and self.is_running: while self.is_paused: time.sleep(1) epoch = epoch + 1 for minibatch_index in xrange(self.n_train_batches): # minibatch_index within an epoch res = timer.time(self.train_model, minibatch_index) cost = res[0] #print res[1].shape #self.transmit.put({0:res[1] * 10}) minibatch_avg_cost += cost iter = (epoch - 1) * self.n_train_batches + minibatch_index # Iter = number of minibatch passed if (iter + 1) % self.record_frequency == 0 and self.is_recording: data = self.model.drop_weights() #print type(data) #print len(data) #print data[0].shape if data: self.transmit.put(data) else: print 'WHAT WENT WRONG? EMPTY DATA DROPPEND BY MODEL IN TRAINER' if (iter + 1) % self.validation_frequency == 0: training_score = minibatch_avg_cost / self.validation_frequency minibatch_avg_cost = 0 this_validation_loss = timer.time(self.validate_model_1) self.log.info( 'epoch %i, minibatch %i/%i, validation error %f %%' % ( epoch, minibatch_index + 1, self.n_train_batches, this_validation_loss * 100. ) ) self.log.verb( 'epoch %i, minibatch %i/%i, training scor %f %%' % ( epoch, minibatch_index + 1, self.n_train_batches, training_score ) ) if this_validation_loss < best_validation_loss or True: best_validation_loss = this_validation_loss else: self.is_running = False test_losses = [self.test_model(i) for i in xrange(self.n_test_batches)] test_score = numpy.mean(test_losses) self.log.info( ( ' epoch %i, minibatch %i/%i, test error of' ' best model %f %%' ) % ( epoch, minibatch_index + 1, self.n_train_batches, test_score * 100. ) ) end_time = timeit.default_timer() self.log.info( ( 'Optimization complete with best validation score of %f %%,' 'with test performance %f %%' ) % (best_validation_loss * 100., test_score * 100.) ) self.log.info('The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time))) self.log.info('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time))) print 'Validation: avg time = {} || total time = {}'.format(timer.get_avg_time(self.validate_model_1), timer.get_total_time(self.validate_model_1)) print 'Training: avg time = {} || total time = {}'.format(timer.get_avg_time(self.train_model), timer.get_total_time(self.train_model)) ### ### Should any value be reinitalised? ### def set_batch_size(self, batch_size): self.batch_size.set_value(batch_size) self.n_train_batches = self.train_set[0].get_value(borrow=True).shape[0] / batch_size self.n_valid_batches = self.valid_set[0].get_value(borrow=True).shape[0] / batch_size self.n_test_batches = self.test_set[0].get_value(borrow=True).shape[0] / batch_size ### ### Client-accessible functions ### def start_training(self): ''' Runs the training process in a woker thread. Two steps: - Initialisation des valeurs de training (best validation loss et test_score). - Training loop. ''' self.is_running = True thread = threading.Thread(target = self._training_process, name = 'training process', args = ()) thread.start() def stop_training(self): ''' Terminate the training process in a clean manner. Si le training est pausé, ne fait rien. Affiche les scores et temps en fin de Thread ''' if (not self.is_paused and self.is_running): self.is_running = False return True else: return False def pause_training(self): ''' If the training is running and not paused, this method returns true and induces an Idle Loop with 1 s frequency until resume_training is called. Otherwise, returns False ''' if not self.is_running or self.is_paused: return False self.is_paused = True return True def resume_training(self): ''' If the training is running and paused, this method ends Idle Loop and returns True. Other wise does nothing and returns False ''' if self.is_paused and self.is_running: self.is_paused = False return True else: return False def add_target(self, target = None): ''' Set the target layer as to be recorded. See model.target method ''' return self.model.add_target(target) def remove_target(self, target = None): ''' Set the target layer as not to be recorded. See model.target method ''' return self.model.remove_target(target) def start_record(self): ''' Asks model to stop putting weights in transmit Queue ''' self.is_recording = True def stop_record(self): ''' Asks model to stop putting weights in transmit Queue ''' self.is_recording = False def load_model_weights(self, weight_file = None): ''' Sets models weights to the values serialized in weight_file ''' if not self.is_running or self.is_paused: ret_val = self.model.load_weights(weight_file) else: self.log.error('Asked for weight loading while training is running') return False def set_parameter(self, parameter, value): ''' Set trainer's parameter to value and returns value. If parameter is not an attribute of trainer, returns None. ''' self.log.debug('Setting Parameter {} to {}'.format(parameter, value)) try: if parameter == 'batch_size': val = int(value) self.set_batch_size(val) elif parameter == 'learning_rate': val = float(value) self.learning_rate.set_value(val) else: val = float(value) setattr(self, parameter, val) new_val = getattr(self, parameter) self.log.hist('Has set {} to {}'.format(parameter, new_val)) return new_val except (ValueError, AttributeError) as e: self.log.error('Failed to set {} as {}. Error is {}'.format(parameter, val, e)) return None def get_parameter(self, parameter): ''' Returns the value of trainer's parameter. If parameter is not an attribute of trainer, returns None ''' try: val = getattr(self, parameter) self.log.info('Got parameter {}={}'.format(parameter, val)) return val except AttributeError as e: self.log.error('Error Getting parameter {}: {}'.format(parameter, e)) return None
class DataClient(object): def __init__(self, queue, ip): self.log = Logger(DATA_CLIENT_LOG_FILE, D_VERB) self.log.info('[MAIN THREAD] Instantiatie data_client') self.transmit = queue self.receiving = False self.remote_ip = ip self.my_ip = socket.gethostbyname(socket.gethostname()) # Headers could be used to check structure integrity of received data # Data integrity check put in data_processor # self.headers = None def start(self): self.soc_data = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.soc_data.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) self.log.debug('[MAIN THREAD] Connecting to server data channel') self.soc_data.connect((self.remote_ip,SOC_PORT_DATA)) self.log.info('[MAIN THREAD] Data Channel Connected') self.data_receive = threading.Thread(target = self.receive, args = ()) self.log.info('[MAIN THREAD] Starting DATA THREAD') self.receiving = True self.data_receive.start() self.log.debug('[MAIN THREAD] DATA THREAD started') def stop(self): self.log.debug("[MAIN THREAD] Stop command sent") self.receiving = False self.log.info("[MAIN THREAD] Asked DATA THREAD to stop receiving") def receive(self): #FIXME_1 : recv_data is blocking. If nothing is sent and asked to stop, it will block program exit while self.receiving: self.log.debug('[DATA THREAD] waiting for data from server') data = recv_data(self.soc_data) self.log.debug('[DATA THREAD] Received data {}\n'.format(data)) if data: self.transmit.put(data) self.log.debug('[DATA THREAD] Transmitted data ') else: # Not sure this should exist self.log.info('[DATA THREAD] Empty data received. Closing socket ') self.soc_data.close() break if not self.receiving: self.log.info('[DATA THREAD] self.receiving is False. Closing socket ') self.soc_data.close() self.log.info('[DATA THREAD] Exiting thread \n')
break log.debug([fig]) fig.canvas.draw() fig.canvas.flush_events() return fig, ax_ret, line_ret, background_ret def multi_print_fast(multi_dico, print_data): log.verb('Multiprinting: {}'.format(multi_dico)) for keys in multi_dico: print_dic_fast(multi_dico[keys], print_data[keys]) def print_dic_fast(dico, (fig, ax, lines, backgrounds)): log.verb('Printing: {}'.format(dico)) keys = dico.keys() for ind in range(len(keys)): log.debug('back in loop') ### Should do better here ydata = dico[keys[ind]] xdata = range(len(ydata)) log.debug('Before drawing') fig.canvas.restore_region(backgrounds[ind]) lines[ind].set_data(xdata,ydata) log.debug([ax[ind]]) log.debug([fig]) ax[ind].draw_artist(lines[ind]) fig.canvas.blit(ax[ind].bbox) log.debug('Has drawn data {}'.format(lines[ind].get_data())) #column.set_title(keys[ind]) if ind >= len(keys):
class DataManager(object): def __init__(self, transmit, connection_table): self.step = D_STEP self.timeout = int(D_TIMEOUT / self.step) self.log = Logger(DATA_LOG_FILE, D_VERB) self.run = True self.receivers = [] self.transmit = transmit self.connection_table = connection_table self.data_thread = threading.Thread(target=self.process_loop, name="data managing", args=()) self.log.info("Starting DATA THREAD") self.data_thread.start() self.log.debug("DATA THREAD Started") def process_loop(self): ### ### Add timeout so that we keep control when waiting for data ### while self.run: self.log.debug("[DATA THREAD] Waiting for queue") data = self.transmit.get() self.log.debug("[DATA THREAD] Got {}".format(data)) for socket in self.receivers: self.process_send(socket, data) def quit(self): self.run = False def start_send(self): self.init_thread = threading.Thread(target=self.init_connection, name="init_send_connection", args=()) self.log.info("[MAIN THREAD] Starting INIT THREAD") self.init_thread.start() self.log.debug("[MAIN THREAD] INIT THREAD Started") def init_connection(self): soc_data = socket.socket(socket.AF_INET, socket.SOCK_STREAM) soc_data.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) soc_data.bind(("", SOC_PORT_DATA)) soc_data.listen(1) self.log.info("[INIT THREAD] Waiting for a connection") connection, client_address = soc_data.accept() self.log.info("[INIT THREAD] Connection accepted from {}".format(client_address)) self.receivers.append(connection) def process_send(self, connection, data): targets = self.get_client_targets(connection) self.log.debug("[DATA THREAD] targets are {}".format(targets)) sub_data = self.get_sub_dict(data, targets) self.log.debug("[DATA THREAD] sub_data is {}".format(sub_data)) mess = json.dumps(sub_data) self.log.debug("[DATA THREAD] Sending data {}".format(mess)) status = send_data(connection, mess) if status == "": self.receivers.remove(connection) self.log.info("[DATA THREAD] connection removed") self.log.debug("[DATA THREAD] Data sent") def get_sub_dict(self, data, targets): return dict([(key, data[key]) for key in targets if key in data]) def get_client_targets(self, connection): client_address = connection.getpeername()[0] targets = None for client in self.connection_table: self.log.debug("[DATA THREAD] Checking with potential address {} ".format(client.getpeername())) if client.getpeername()[0] == client_address: targets = self.connection_table[client] if targets is not None: return targets else: self.log.error("[DATA THREAD] Could not find client {} in connection table".format(client_address)) return [] def stop_send(self): self.log.info("[MAIN THREAD] Stopping DATA THREAD") tmp = self.receivers self.receivers = [] for elem in tmp: elem.close() self.log.debug("[MAIN THREAD] Closed data socket") def is_sending(self): if len(self.receivers) > 0: return True else: return False