class Data(): def __init__(self): """ Sets up all default requirements and placeholders needed for the Caffe Acute Lymphoblastic Leukemia CNN data script. """ self.Helpers = Helpers() self.confs = self.Helpers.loadConfs() self.logFile = self.Helpers.setLogFile( self.confs["Settings"]["Logs"]["allCNN"]) self.CaffeHelpers = CaffeHelpers(self.confs, self.Helpers, self.logFile) self.Helpers.logMessage(self.logFile, "allCNN", "Status", "Data init complete") def sortData(self): """ Prepares the data ready for training. """ self.CaffeHelpers.deleteLMDB() self.CaffeHelpers.sortLabels() self.CaffeHelpers.sortTrainingData() self.CaffeHelpers.recreatePaperData() self.CaffeHelpers.createTrainingLMDB() self.CaffeHelpers.createValidationLMDB() self.CaffeHelpers.computeMean() self.Helpers.logMessage(self.logFile, "allCNN", "Status", "Data sorting complete")
class Data(): ############################################################### # # Core Data class wrapper. # ############################################################### def __init__(self): ############################################################### # # Sets up all default requirements and placeholders # needed for this class. # ############################################################### self.Helpers = Helpers() self.confs = self.Helpers.loadConfs() self.logFile = self.Helpers.setLogFile(self.confs["Settings"]["Logs"]["DataLogDir"]) self.DataProcess = DataProcess() self.labelsToName = {} self.Helpers.logMessage(self.logFile, "init", "INFO", "Init complete") def sortData(self): ############################################################### # # Sorts the data # ############################################################### humanStart, clockStart = self.Helpers.timerStart() self.Helpers.logMessage(self.logFile, "sortData", "INFO", "Loading & preparing training data") dataPaths, classes = self.DataProcess.processFilesAndClasses() classId = [ int(i) for i in classes] classNamesToIds = dict(zip(classes, classId)) # Divide the training datasets into train and test numValidation = int(self.confs["Classifier"]["ValidationSize"] * len(dataPaths)) self.Helpers.logMessage(self.logFile, "sortData", "Validation Size", str(numValidation)) self.Helpers.logMessage(self.logFile, "sortData", "Class Size", str(len(classes))) random.seed(self.confs["Classifier"]["RandomSeed"]) random.shuffle(dataPaths) trainingFiles = dataPaths[numValidation:] validationFiles = dataPaths[:numValidation] # Convert the training and validation sets self.DataProcess.convertToTFRecord('train', trainingFiles, classNamesToIds) self.DataProcess.convertToTFRecord('validation', validationFiles, classNamesToIds) # Write the labels to file labelsToClassNames = dict(zip(classId, classes)) self.DataProcess.writeLabels(labelsToClassNames) self.Helpers.logMessage(self.logFile, "sortData", "COMPLETE", "Completed sorting data!")
class Data(): ############################################################### # # Core Data class. # ############################################################### def __init__(self): ############################################################### # # Sets up all default requirements and placeholders # needed for this class. # ############################################################### self.Helpers = Helpers() self.confs = self.Helpers.loadConfs() self.logFile = self.Helpers.setLogFile(self.confs["Settings"]["Logs"]["DataLogDir"]) def getLabelsAndDirectories(self): ############################################################### # # Returns a list of classes/labels and directories. # ############################################################### labels = [name for name in os.listdir(self.confs["Classifier"]["DatasetDir"]) if os.path.isdir(os.path.join(self.confs["Classifier"]["DatasetDir"], name)) and name != '.ipynb_checkpoints'] directories = [] for dirName in os.listdir(self.confs["Classifier"]["DatasetDir"]): if dirName != '.ipynb_checkpoints': path = os.path.join(self.confs["Classifier"]["DatasetDir"], dirName) if os.path.isdir(path): directories.append(path) return labels, directories def processFilesAndClasses(self): ############################################################### # # Returns a list of filenames and classes/labels. # ############################################################### labels, directories = self.getLabelsAndDirectories() data = [] for directory in directories: for filename in os.listdir(directory): if os.path.splitext(filename)[1] in self.confs["Classifier"]["ValidIType"]: data.append(os.path.join(directory, filename)) else: continue return data, sorted(labels) def convertToTFRecord(self, split_name, filenames, labels_to_ids): ############################################################### # # Converts the given filenames to a TFRecord dataset. # ############################################################### assert split_name in ['train', 'validation'] num_per_shard = int(math.ceil(len(filenames) / float(self.confs["Classifier"]["Shards"]))) self.Helpers.logMessage(self.logFile, "convertToTFRecord", "INFO", "Number of files: " + str(len(filenames))) self.Helpers.logMessage(self.logFile, "convertToTFRecord", "INFO", "Number per shard: " + str(num_per_shard)) with tf.Graph().as_default(): image_reader = ImageReader() with tf.Session('') as sess: for shard_id in range(self.confs["Classifier"]["Shards"]): output_filename = self.getDatasetFilename(split_name, shard_id) self.Helpers.logMessage(self.logFile, "convertToTFRecord", "STATUS", "Saving: " + str(output_filename)) with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer: start_ndx = shard_id * num_per_shard end_ndx = min((shard_id+1) * num_per_shard, len(filenames)) for i in range(start_ndx, end_ndx): sys.stdout.write('\r>> Converting image %d/%d shard %d' % ( i+1, len(filenames), shard_id)) sys.stdout.flush() print("") # Read the filename: image_data = tf.gfile.FastGFile(filenames[i], 'rb').read() height, width = image_reader.read_image_dims(sess, image_data) class_name = os.path.basename(os.path.dirname(filenames[i])) class_id = labels_to_ids[class_name] self.Helpers.logMessage(self.logFile, "convertToTFRecord", "INFO", "class_name: " + str(class_name)) self.Helpers.logMessage(self.logFile, "convertToTFRecord", "INFO", "class_id: " + str(class_id)) example = self.imageToTFExample( image_data, b'jpg', height, width, class_id) tfrecord_writer.write(example.SerializeToString()) sys.stdout.write('\n') sys.stdout.flush() def getDatasetFilename(self, split_name, shard_id): ############################################################### # # Gets the dataset filename # ############################################################### output_filename = '%s_%s_%05d-of-%05d.tfrecord' % ( self.confs["Classifier"]["TFRecordFile"], split_name, shard_id, self.confs["Classifier"]["Shards"]) return os.path.join(self.confs["Classifier"]["DatasetDir"], output_filename) def int64Feature(self, values): ############################################################### # # Returns a TF-Feature of int64s # ############################################################### if not isinstance(values, (tuple, list)): values = [values] return tf.train.Feature(int64_list=tf.train.Int64List(value=values)) def bytesFeature(self, values): ############################################################### # # Returns a TF-Feature of bytes # ############################################################### return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values])) def imageToTFExample(self, image_data, image_format, height, width, class_id): ############################################################### # # Converts an image to a TF Example # ############################################################### return tf.train.Example(features=tf.train.Features(feature={ 'image/encoded': self.bytesFeature(image_data), 'image/format': self.bytesFeature(image_format), 'image/class/label': self.int64Feature(class_id), 'image/height': self.int64Feature(height), 'image/width': self.int64Feature(width) })) def writeLabels(self, labels_to_labels): ############################################################### # # Writes a file with the list of class names # ############################################################### labels_filename = os.path.join(self.confs["Classifier"]["DatasetDir"], self.confs["Classifier"]["Labels"]) with tf.gfile.Open(self.confs["Classifier"]["Classes"], 'w') as f: for label in labels_to_labels: f.write('%s\n' % (label)) with tf.gfile.Open(labels_filename, 'w') as f: for label in labels_to_labels: class_name = labels_to_labels[label] f.write('%d:%s\n' % (label, class_name))
class Data(): def __init__(self): """ Sets up all default requirements and placeholders needed for this class. """ self.Helpers = Helpers() self.confs = self.Helpers.loadConfs() self.fixed = tuple((self.confs["Settings"]["ImgDims"], self.confs["Settings"]["ImgDims"])) self.filesMade = 0 self.trainingDir = self.confs["Settings"]["TrainDir"] def writeImage(self, filename, image): """ Writes an image based on the filepath and the image provided. """ if filename is None: print("Filename does not exist, file cannot be written.") return if image is None: print("Image does not exist, file cannot be written.") return try: cv2.imwrite(filename, image) except: print("File was not written! "+filename) def resize(self, filePath, savePath, show = False): """ Writes an image based on the filepath and the image provided. """ image = cv2.resize(cv2.imread(filePath), self.fixed) self.writeImage(savePath, image) self.filesMade += 1 print("Resized image written to: " + savePath) if show is True: plt.imshow(image) plt.show() return image def grayScale(self, image, grayPath, show = False): """ Writes a grayscale copy of the image to the filepath provided. """ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) self.writeImage(grayPath, gray) self.filesMade += 1 print("Grayscaled image written to: " + grayPath) if show is True: plt.imshow(gray) plt.show() return image, gray def equalizeHist(self, gray, histPath, show = False): """ Writes histogram equalized copy of the image to the filepath provided. """ hist = cv2.equalizeHist(gray) self.writeImage(histPath, cv2.equalizeHist(gray)) self.filesMade += 1 print("Histogram equalized image written to: " + histPath) if show is True: plt.imshow(hist) plt.show() return hist def reflection(self, image, horPath, verPath, show = False): """ Writes reflected copies of the image to the filepath provided. """ horImg = cv2.flip(image, 0) self.writeImage(horPath, horImg) self.filesMade += 1 print("Horizontally reflected image written to: " + horPath) if show is True: plt.imshow(horImg) plt.show() verImg = cv2.flip(image, 1) self.writeImage(verPath, verImg) self.filesMade += 1 print("Vertical reflected image written to: " + verPath) if show is True: plt.imshow(verImg) plt.show() return horImg, verImg def gaussian(self, filePath, gaussianPath, show = False): """ Writes gaussian blurred copy of the image to the filepath provided. """ gaussianBlur = ndimage.gaussian_filter(plt.imread(filePath), sigma=5.11) self.writeImage(gaussianPath, gaussianBlur) self.filesMade += 1 print("Gaussian image written to: " + gaussianPath) if show is True: plt.imshow(gaussianBlur) plt.show() return gaussianBlur def translate(self, image, filePath, show = False): """ Writes transformed copy of the image to the filepath provided. """ cols, rows, chs = image.shape translated = cv2.warpAffine(image, np.float32([[1, 0, 84], [0, 1, 56]]), (rows, cols), borderMode=cv2.BORDER_CONSTANT, borderValue=(144, 159, 162)) self.writeImage(filePath, translated) self.filesMade += 1 print("Translated image written to: " + filePath) if show is True: plt.imshow(translated) plt.show() return translated def rotation(self, path, filePath, filename, show = False): """ Writes rotated copies of the image to the filepath provided. """ img = Image.open(filePath) image = cv2.imread(filePath) cols, rows, chs = image.shape for i in range(0, 20): randDeg = random.randint(-180, 180) matrix = cv2.getRotationMatrix2D((cols/2, rows/2), randDeg, 0.70) rotated = cv2.warpAffine(image, matrix, (rows, cols), borderMode=cv2.BORDER_CONSTANT, borderValue=(144, 159, 162)) fullPath = os.path.join(path, str(randDeg) + '-' + str(i) + '-' + filename) self.writeImage(fullPath, rotated) self.filesMade += 1 print("Rotated image written to: " + fullPath) if show is True: plt.imshow(rotated) plt.show() def processDataset(self): """ Runs all of the above functions saving the new dataset to the Augmented directory. """ for directory in os.listdir(self.trainingDir): # Skip none data directories if(directory==".ipynb_checkpoints" or directory=="__pycache__"): continue self.filesMade = 0 path = os.path.join(self.confs["Settings"]["TrainDir"], directory) sortedPath = os.path.join(self.confs["Settings"]["AugDir"], directory) # Stops program from crashing if augmented folders do not exist if not os.path.exists(sortedPath): os.makedirs(sortedPath) if os.path.isdir(path): fCount = 0 for filename in os.listdir(path): if filename.endswith('.jpg'): filePath = os.path.join(path, filename) fileSortedPath = sortedPath+"/"+filename image = self.resize(filePath, fileSortedPath, False) image, gray = self.grayScale(image, os.path.join(sortedPath, "Gray-"+filename), False) hist = self.equalizeHist(gray, os.path.join(sortedPath, "Hist-"+filename), False) horImg, verImg = self.reflection(image, os.path.join(sortedPath, "Hor-"+filename), os.path.join(sortedPath, "Ver-"+filename), False) gaussianBlur = self.gaussian(fileSortedPath, os.path.join(sortedPath, "Gaus-"+filename), False) translated = self.translate(image, os.path.join(sortedPath, "Trans-"+filename), False) self.rotation(sortedPath, fileSortedPath, filename) fCount += 1 print("Total augmented files created so far " + str(self.filesMade)) print("") else: print("File was not jpg! "+filename) continue print("AML/ALL Augmentation: " + self.Helpers.currentDateTime()) print("Added filters to " + str(fCount) + " files in the " + str(directory) + " directory") print("Total of " + str(self.filesMade) + " augmented files created.") print("")
class Classifier(): def __init__(self): self.Helpers = Helpers() self.confs = self.Helpers.loadConfs() self.logFile = self.Helpers.setLogFile( self.confs["Settings"]["Logs"]["DataLogDir"]) self.Helpers.logMessage(self.logFile, "init", "INFO", "Init complete") self.movidius = None self.mean = 128 self.std = 1 / 128 self.categories = [] self.graphfile = None self.graph = None self.reqsize = None self.extensions = [".jpg", ".png"] self.CheckDevices() def CheckDevices(self): #mvnc.SetGlobalOption(mvnc.GlobalOption.LOGLEVEL, 2) devices = mvnc.EnumerateDevices() if len(devices) == 0: self.Helpers.logMessage(self.logFile, "CheckDevices", "WARNING", "No Movidius Devices Found") quit() self.movidius = mvnc.Device(devices[0]) self.movidius.OpenDevice() self.Helpers.logMessage(self.logFile, "CheckDevices", "STATUS", "Movidius Connected") def AllocateGraph(self, graphfile): self.graph = self.movidius.AllocateGraph(graphfile) def LoadInception(self): self.reqsize = self.confs["Classifier"]["ImageSize"] with open(self.confs["Classifier"]["NetworkPath"] + self.confs["Classifier"]["InceptionGraph"], mode='rb') as f: self.graphfile = f.read() self.AllocateGraph(self.graphfile) self.Helpers.logMessage(self.logFile, "LoadInception", "STATUS", "Graph Allocated") with open( self.confs["Classifier"]["NetworkPath"] + 'Model/classes.txt', 'r') as f: for line in f: cat = line.split('\n')[0] if cat != 'classes': self.categories.append(cat) f.close() self.Helpers.logMessage(self.logFile, "LoadInception", "STATUS", "Categories Loaded")
class Trainer(): ############################################################### # # Trainer class # ############################################################### def __init__(self): ############################################################### # # Sets up all default requirements and placeholders # needed for this class. # ############################################################### self.Helpers = Helpers() self.confs = self.Helpers.loadConfs() self.logFile = self.Helpers.setLogFile( self.confs["Settings"]["Logs"]["DataLogDir"]) self.labelsToName = {} self.Helpers.logMessage(self.logFile, "init", "INFO", "Init complete") def getSplit(self, split_name): ############################################################### # # Obtains the training/validation split # ############################################################### #Check whether the split_name is train or validation if split_name not in ['train', 'validation']: raise ValueError( 'The split_name %s is not recognized. Please input either train or validation as the split_name' % (split_name)) #Create the full path for a general FilePattern to locate the tfrecord_files FilePattern_path = os.path.join( self.confs["Classifier"]["DatasetDir"], self.confs["Classifier"]["FilePattern"] % (split_name)) #Count the total number of examples in all of these shard num_samples = 0 FilePattern_for_counting = '200label_' + split_name tfrecords_to_count = [ os.path.join(self.confs["Classifier"]["DatasetDir"], file) for file in os.listdir(self.confs["Classifier"]["DatasetDir"]) if file.startswith(FilePattern_for_counting) ] #print(tfrecords_to_count) for tfrecord_file in tfrecords_to_count: for record in tf.python_io.tf_record_iterator(tfrecord_file): num_samples += 1 #Create a reader, which must be a TFRecord reader in this case reader = tf.TFRecordReader #Create the keys_to_features dictionary for the decoder keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpg'), 'image/class/label': tf.FixedLenFeature([], tf.int64, default_value=tf.zeros([], dtype=tf.int64)), } #Create the items_to_handlers dictionary for the decoder. items_to_handlers = { 'image': slim.tfexample_decoder.Image(), 'label': slim.tfexample_decoder.Tensor('image/class/label'), } #Start to create the decoder decoder = slim.tfexample_decoder.TFExampleDecoder( keys_to_features, items_to_handlers) #Create the labels_to_name file labels_to_name_dict = self.labelsToName #Actually create the dataset dataset = slim.dataset.Dataset( data_sources=FilePattern_path, decoder=decoder, reader=reader, num_readers=4, num_samples=num_samples, num_classes=self.confs["Classifier"]["NumClasses"], labels_to_name=labels_to_name_dict, items_to_descriptions=self.items_to_descriptions) return dataset def loadBatch(self, dataset, is_training=True): ############################################################### # # Loads a batch for training # ############################################################### #First create the data_provider object data_provider = slim.dataset_data_provider.DatasetDataProvider( dataset, common_queue_capacity=24 + 3 * self.confs["Classifier"]["BatchSize"], common_queue_min=24) #Obtain the raw image using the get method raw_image, label = data_provider.get(['image', 'label']) #Perform the correct preprocessing for this image depending if it is training or evaluating image = Classes.inception_preprocessing.preprocess_image( raw_image, self.confs["Classifier"]["ImageSize"], self.confs["Classifier"]["ImageSize"], is_training) #As for the raw images, we just do a simple reshape to batch it up raw_image = tf.image.resize_image_with_crop_or_pad( raw_image, self.confs["Classifier"]["ImageSize"], self.confs["Classifier"]["ImageSize"]) #Batch up the image by enqueing the tensors internally in a FIFO queue and dequeueing many elements with tf.train.batch. images, raw_images, labels = tf.train.batch( [image, raw_image, label], batch_size=self.confs["Classifier"]["BatchSize"], num_threads=4, capacity=4 * self.confs["Classifier"]["BatchSize"], allow_smaller_final_batch=True) return images, raw_images, labels