def validate_dataset(self): '''' This function validates the test & train dataset ''' if (len(os.listdir(self.paths['train_dataset_path'])) == 0) or (len( os.listdir(self.paths['test_dataset_path'])) == 0): utils.print_head( f"Train or Test dataset not found !\nKindly check {self.paths['train_dataset_path']} and {self.paths['test_dataset_path']}.", color='red') exit()
def load_training_dataset(self): ''' This function loads the training dataset ''' utils.print_head('TRAINING dataset loaded...', 'darkcyan') self.train_datagen = utils.load_train_dataset(self.paths['train_dataset_path'], self.config) self.val_datagen = utils.load_validation_dataset(self.paths['train_dataset_path'], self.config) # Saving classes to use for predictions classes = np.array(list(self.train_datagen.class_indices.keys())) np.save(f"{self.paths['class_file_path']}/{self.paths['class_file_name']}", classes)
def validate_directories(self, paths): ''' This function validates the common directories ''' for key, value in self.paths.items(): # if directory not found if self.check_directory_existance(value) == False: # create dir if os.mkdir(value): utils.print_head(f"{key} directory created !!", color='green')
def get_validated_paths(self): # validate config file self.validate_config() # validate common directories self.validate_directories(self.paths) # validate dataset self.validate_dataset() utils.print_head('Validated folders structure & dataset...', 'darkcyan')
def get_weights_path(self, weight_name): ''' This function load the weight file (latest/custom) ''' if weight_name == 'latest': utils.print_head("\nUsing latest weight file for testing model....\n", color ='darkcyan') # Find latest weights list_of_weights = glob.glob(f'{self.paths["model_weights_path"]}/*.h5') # * means all if need specific format then *.csv latest_weight = max(list_of_weights, key=os.path.getctime) return latest_weight else: utils.print_head(f"Loading weigth file {weight_name} for testing model....", color='darkcyan') return f"{self.paths['model_weights_path']}/{weight_name}"
def bootstrap(self): print_head() self.verify_environment() self.load_config() self.bootstrap_storage_mixin() self.bootstrap_plugins() self.verify_plugin_settings() puts("Bootstrapping complete.") puts("\nStarting core processes:") # Scheduler scheduler_thread = Process(target=self.bootstrap_scheduler) # scheduler_thread.daemon = True # Bottle bottle_thread = Process(target=self.bootstrap_bottle) # bottle_thread.daemon = True # XMPP Listener xmpp_thread = Process(target=self.bootstrap_xmpp) # xmpp_thread.daemon = True with indent(2): try: # Start up threads. xmpp_thread.start() scheduler_thread.start() bottle_thread.start() errors = self.get_startup_errors() if len(errors) > 0: default_room = self.get_room_from_name_or_id( settings.DEFAULT_ROOM)["room_id"] error_message = "FYI, I ran into some problems while starting up:" for err in errors: error_message += "\n%s\n" % err self.send_room_message(default_room, error_message, color="yellow") puts(colored.red(error_message)) while True: time.sleep(100) except (KeyboardInterrupt, SystemExit): scheduler_thread.terminate() bottle_thread.terminate() xmpp_thread.terminate() print '\n\nReceived keyboard interrupt, quitting threads.', while (scheduler_thread.is_alive() or bottle_thread.is_alive() or xmpp_thread.is_alive()): sys.stdout.write(".") sys.stdout.flush() time.sleep(0.5)
def evaluate_mode(self): # load model self.load_model() # load testing dataset self.load_testing_dataset() steps = self.test_datagen.samples // self.config.batch_size if(steps < 1): steps = self.test_datagen.samples utils.print_head('Evaluate model on testing dataset..', 'darkcyan') best_scores = self.model.evaluate_generator(self.test_datagen, steps=steps) utils.print_head(f"Best Weight's Accuracy: {utils.font_bold(round((best_scores[1]) * 100, 2))}%", color='darkcyan')
def bootstrap(self): print_head() self.verify_environment() self.load_config() self.bootstrap_storage_mixin() self.bootstrap_plugins() self.verify_plugin_settings() puts("Bootstrapping complete.") puts("\nStarting core processes:") # Scheduler scheduler_thread = Process(target=self.bootstrap_scheduler) # scheduler_thread.daemon = True # Bottle bottle_thread = Process(target=self.bootstrap_bottle) # bottle_thread.daemon = True # XMPP Listener xmpp_thread = Process(target=self.bootstrap_xmpp) # xmpp_thread.daemon = True with indent(2): try: # Start up threads. xmpp_thread.start() scheduler_thread.start() bottle_thread.start() errors = self.get_startup_errors() if len(errors) > 0: default_room = self.get_room_from_name_or_id(settings.DEFAULT_ROOM)["room_id"] error_message = "FYI, I ran into some problems while starting up:" for err in errors: error_message += "\n%s\n" % err self.send_room_message(default_room, error_message, color="yellow") puts(colored.red(error_message)) while True: time.sleep(100) except (KeyboardInterrupt, SystemExit): scheduler_thread.terminate() bottle_thread.terminate() xmpp_thread.terminate() print '\n\nReceived keyboard interrupt, quitting threads.', while (scheduler_thread.is_alive() or bottle_thread.is_alive() or xmpp_thread.is_alive()): sys.stdout.write(".") sys.stdout.flush() time.sleep(0.5)
def train_model(self): ''' This function enables to compile the selected model for training ''' # get user selected model self.model = self.get_selected_model(self.config.selected_model) utils.print_head( f"Selected Model : {self.config.selected_model}, Epochs : {self.config.epochs}, Batch Size : {self.config.batch_size}, Learning Rate : {self.config.lr_rate}, Optimizer : {self.config.optimizer}", color='purple') self.model.compile(loss=self.config.loss, optimizer=self.get_optimizer(), metrics=["accuracy"]) # save the model self.model.save( f"{self.paths['model_path']}/{self.config.model_name}--{self.config.selected_model}--{self.config.epochs}_e--{self.config.lr_rate}_lr--{self.config.batch_size}_batch--{self.config.optimizer}.h5" ) return self.model
def prepare_csv_url_dataset(project_name, config, dataset_path, paths): ''' This function loads csv dataset containing image urls, extract images and labels ''' utils.print_head( "Preparing dataset of provided .csv file having urls & labels... ", color='darkcyan') # create folder to store images if not exists if os.path.exists(paths['images_path']) == False: os.mkdir(paths['images_path']) # load csv try: with open((f'{dataset_path}/{config.csv_filename}'.format( f'{dataset_path}/{config.csv_filename}')), 'r') as csv_file: # save the status saved, unable_to_download, already_existed = 0, 0, 0 # process the file for csv_row in tqdm(reader(csv_file)): if csv_row[config.image_url_column_index] != '' and csv_row[ config. image_url_column_index] != config.image_url_column_name: # find image extension url = csv_row[config.image_url_column_index] # find image extension image_extension = url[url.rfind('.'):] # get the label and store belongings to the respective folders label_column_name = csv_row[ config.image_label_column_index].strip("[], '' ") label = f"{paths['images_path']}/{label_column_name}" if os.path.exists(label) == False: os.mkdir(label) # get image name name = f'{csv_row[config.image_name_column_index].replace("/", " ")}{image_extension}' # if image already exists if os.path.isfile(name): already_existed += 1 continue else: try: urllib.request.urlretrieve( url=csv_row[config.image_url_column_index], filename=f'{label}/{name}') saved += 1 except: utils.print_head(f"Unable to download {name}", color='red') unable_to_download += 1 continue utils.print_head( f"** Dataset Status **\nTotal images processed : {saved+unable_to_download+already_existed}\nSaved images : {saved}\nUnable to download : {unable_to_download}\nAlready Existing : {already_existed}", color='purple') except: utils.print_head( "CSV data file is not provided!\nCheck `csv_filename` in config.py file & `dataset` folder...", color='red') exit()
def evaluate_mode(self): ''' This function evaluates the traininng model ''' utils.print_head('Evaluating model.....', 'darkcyan') # loading test dataset if present self.load_testing_dataset() # counting steps steps = self.train_datagen.samples // self.config.batch_size if(steps < 1): steps = self.train_datagen.samples latest_scores = self.model.evaluate_generator(self.test_datagen, steps=steps) self.model.load_weights(f"{self.get_dynamic_weights_name()}") best_scores = self.model.evaluate_generator(self.test_datagen, steps=steps) utils.print_head(f"Latest Weight's Accuracy: {utils.font_bold(round(latest_scores[1] * 100, 2))}%", color='green') print(f"Best Weight's Accuracy: {utils.font_bold(round(best_scores[1] * 100, 2))}%")
def get_selected_model(self, model_to_trigger): ''' This function triggers the selected model in config.py ''' available_models = { 'vgg16': self.vgg16, 'fer': self.fer, 'customized_model_name': self.customized_model_name } selected_model = available_models.get( model_to_trigger, lambda: utils.print_head("Invalid Model Selection!!", color='red')) return selected_model()
def prepare_json_dataset(project_name, config, dataset_path, paths): ''' This function loads json dataset, extract images and labels ''' utils.print_head("Preparing dataset from provided .json file... ", color='darkcyan') # create folder to store images if not exists if os.path.exists(paths['images_path']) == False: os.mkdir(paths['images_path']) # load json try: saved, unable_to_download, already_existed = 0, 0, 0 with open(config.json_filename) as json_file: # load the json data = json.load(json_file) for item in tqdm(data): # get the url url = item[config.image_url_key_name] # find image extension image_extension = url[url.rfind('.'):] # get the image label label = item[ config. image_label_key_name] #.strip('[', ']', '{', '}', "\'", '\"') label = f'{paths["images_path"]}/{label}' if os.path.exists(label) == False: os.mkdir(label) # get the image name name = f'{item[config.image_name_key_name].replace("/", " ")}{image_extension}' # if image already exists if os.path.isfile(name): already_existed += 1 continue else: try: # save the image urllib.request.urlretrieve(url=url, filename=f'{label}/{name}') saved += 1 except: utils.print_head(f"Unable to download : {name}", color='red') unable_to_download += 1 continue utils.print_head( f"** Dataset Status **\nTotal images processed : {saved+unable_to_download+already_existed}\nSaved images : {saved}\nUnable to download : {unable_to_download}\nAlready Existing : {already_existed}", color='purple') except: utils.print_head( "JSON data file is not provided!\nCheck `json_filename` in config.py file & `dataset` folder...", color='red') exit()
def train_model(self): ''' This function trains the model ''' utils.print_head('Model Training Initiated...', 'darkcyan') if self.model_name == 'new': self.model = self.modelObj.train_model() else: # load the model to resume training self.get_last_model(f"{self.paths['model_path']}/{self.model_name}") # fit the model self.model.fit_generator(self.train_datagen, steps_per_epoch=self.train_datagen.samples // self.config.batch_size, validation_data=self.val_datagen, validation_steps=self.val_datagen.samples // self.config.batch_size, epochs=self.config.epochs, callbacks = self.get_model_callbacks()) # calculate the score latest_scores = tqdm(self.model.evaluate_generator(self.train_datagen, steps=self.train_datagen.samples // self.config.batch_size)) print("Score : ",latest_scores)
def predict_class(self): utils.print_head('Prediction Initiated...', color='darkcyan') images_path = glob.glob(self.paths['predict_input_path'] + '/*.*') for image_path in tqdm(images_path): try: image, tailored_image = self.preprocessObj.operations( image_path) pred = self.model.predict(tailored_image) result = np.where(pred == np.amax(pred)) resultidx = result[1][0] class_name = self.classes[resultidx] img_with_class = self.draw_class(image, class_name) plt.imsave( f'{self.paths["predict_output_path"]}/{class_name}-{os.path.basename(image_path)}', img_with_class) except Exception as e: utils.print_head(f'Prediction failed for {image_path}', color='red') print(e)
def resume_last_model(self): ''' This function load & resumes the last trainned model''' utils.print_head(f"Resuming last trained model....", color='darkcyan') #Find latest weights list_of_model = glob.glob(f"{self.paths['model_path']}/*.h5") # * means all if need specific format then *.csv try: latest_model = max(list_of_model, key=os.path.getctime) self.model = load_model(f"{latest_model}") except: utils.print_head(f"No pre-trained model found..!!", color='red') utils.print_head(f"Training model from initial epoch......", color='darkcyan') self.model = self.modelObj.train_model()
def prepare_csv_pixel_dataset(project_name, config, dataset_path, paths): ''' This function loads csv dataset, extract images and labels ''' utils.print_head( "Preparing dataset of provided .csv file having image pixels & labels... ", color='darkcyan') # load csv try: dataset = pd.read_csv(f'{dataset_path}/{config.csv_filename}') except: utils.print_head( "CSV data file is not provided!\nCheck `csv_filename` in config file & `dataset` folder...", color='red') exit() # create folder to store images if not exists if os.path.exists(paths['images_path']) == False: os.mkdir(paths['images_path']) ## Labels # create folders for the labels found in the dataset for folder_name in dataset[config.label_column_name].unique(): if os.path.exists( f"{paths['images_path']}/{str(folder_name)}") == False: os.mkdir(f"{paths['images_path']}/{str(folder_name)}") labels = dataset[config.label_column_name].tolist() ## Images # save all the pixels values to list 'pixels' pixels_of_images = dataset[config.images_column_name].tolist() for image_label, image_pixels in zip(labels, pixels_of_images): # face contains pixels value of single image image = [int(pixel) for pixel in image_pixels.split(' ')] # convert image into array try: image = np.asarray(image).reshape(config.img_height, config.img_width) except: utils.print_head( "The size provided for resizing is larger than the image!", color='red') # save the image as .png file image_name = str(uuid.uuid4()) cv2.imwrite( f"{paths['images_path']}/{str(image_label)}/{image_name}.png", image)
def get_last_model(self, model_name): if self.model_name == 'resume': self.resume_last_model() # try to load the demanded model else: # check the trained model existance check = os.path.exists(f"{model_name}") if check == True: utils.print_head(f"Resuming given model : {model_name}", color='darkcyan') # load the model if model exists self.model = load_model(f"{model_name}") else: utils.print_head(f"Model not found !!", color='red') utils.print_head(f"Training model from initial epoch......", color='darkcyan') self.resume_last_model()
def validate_presence_of_items_in_folder(self, path): '''' This function validates the non-emptyness of the folder ''' if (len(os.listdir(path)) == 0) or (len(os.listdir(path)) == 0): utils.print_head(f"Data not found !\nKindly check {path}.", color='red') exit()
def load_model(self): utils.print_head('Model loaded for Testing...', 'darkcyan') self.model = keras.models.load_model(self.weights_path) self.classes = np.load(f"{self.paths['class_file_path']}/{self.paths['class_file_name']}")
def load_testing_dataset(self): ''' This function loads the testing dataset ''' utils.print_head('Testing dataset loaded...', 'darkcyan') self.test_datagen = utils.load_test_dataset(self.paths['test_dataset_path'], self.config)
def get_best_dynamic_weights_name(self): ''' This function names the best weight ''' utils.print_head(f"Saving Best Weight : {self.paths['model_weights_path']}/Best-{self.run_id}.h5", color='darkcyan') return f"{self.paths['model_weights_path']}/Best-{self.run_id}.h5"
def load_model(self): utils.print_head('Load model for prediction...', color='darkcyan') self.model = keras.models.load_model(self.weights_path) self.classes = np.load( f"{self.paths['class_file_path']}/{self.paths['class_file_name']}")
def validate_config(self): ''' This function validates the config.py file ''' config_path = f"{self.paths['project_path']}/config.py" if self.check_directory_existance(config_path) == False: utils.print_head(f"{config_path} file not found !!", color='red') exit()