def create_map_files_from_folders(data_dir, split=0.8, number_of_samples=800): with open(os.path.join(data_dir, 'images.txt'), mode='w') as f: path, classes, file = list(os.walk(data_dir))[0] for cls in classes: for file in [ x for x in glob.glob(os.path.join(path, cls, '*')) if not x.endswith('txt') ][:number_of_samples]: if file.endswith(('png', 'jpg', 'jpeg')): f.write("{}\t{}\n".format(os.path.abspath(file), classes.index(cls))) with open(os.path.join(data_dir, 'images.txt')) as f: images = f.readlines() is_it_shuffled = set([image.split('\t')[-1] for image in images[:20]]) if len(is_it_shuffled.difference() ) < 2: # if smaller than 2, it is not shuffled yet. random.shuffle(images) train_images = images[:int(len(images) * split)] test_images = images[int(len(images) * split):] with open(os.path.join(data_dir, 'finalize_network.txt'), mode='w') as t: t.writelines(["{}".format(image) for image in train_images]) with open(os.path.join(data_dir, 'test.txt'), mode='w') as test: test.writelines(["{}".format(image) for image in test_images])
def create_map_files_from_folders(data_dir, split=0.8, number_of_training_samples=800): with open(os.path.join(data_dir, 'images.txt'), mode='w') as f: path, classes, file = list(os.walk(data_dir))[0] for cls in classes: for file in glob.glob(os.path.join( path, cls, '*'))[:number_of_training_samples]: if file.endswith(('png', 'jpg')): f.write(f"{os.path.abspath(file)}\t{classes.index(cls)}\n") with open(os.path.join(data_dir, 'images.txt')) as f: images = f.readlines() is_it_shuffled = set([image.split('\t')[-1] for image in images[:20]]) if len(is_it_shuffled.difference() ) < 2: # if smaller than 2, it is not shuffled yet. random.shuffle(images) eval_images = [images.pop() for i in range(100)] train_images = images[:int(len(images) * split)] test_images = images[int(len(images) * split):] with open(os.path.join(data_dir, 'finalize_network.txt'), mode='w') as t: t.writelines([f"{image}" for image in train_images]) with open(os.path.join(data_dir, 'test.txt'), mode='w') as test: test.writelines([f"{image}" for image in test_images]) with open(os.path.join(data_dir, 'evaluate.txt'), mode='w') as e: e.writelines([f"{image}" for image in eval_images])
def main(max_epochs, image_directory, lr, number_of_samples, samples_per_minibatch, model_details): create_map_files_from_folders(image_directory) pixel_dimensions = model_details['image_dims'] classes = list(os.walk(os.path.join(image_directory)))[0][1] all_images_map_file = os.path.join(image_directory, 'images.txt') mean_file = create_mean_file(all_images_map_file, pixel_dimensions, 'mean_file.xml') samples_per_epoch_train = len(open(all_images_map_file).readlines()) print(f"Classes: {classes}") print(f"Max epochs: {max_epochs}") print(f"Learning rate: {lr}") print(f"Minibatch Size: {samples_per_minibatch}") print(f"Width and height of: {pixel_dimensions}") print(f"Number of training samples: {number_of_samples}") reader = create_reader(map_file=all_images_map_file, pixel_dimensions=pixel_dimensions, classes=classes, train=True, total_number_of_samples=max_epochs * samples_per_epoch_train, mean_file=mean_file) finalize_network(reader=reader, samples_per_epoch=samples_per_epoch_train, max_amount_of_epochs=max_epochs, samples_per_minibatch=samples_per_minibatch, pixel_dimensions=pixel_dimensions, classes=classes, learning_rate=lr, model_details=model_details)
def create_map_files_from_folders(image_directory): with open(os.path.join(image_directory, 'images.txt'), mode='w') as f: path, classes, file = list(os.walk(image_directory))[0] files = [] for cls in classes: for file in [ x for x in glob.glob(os.path.join(path, cls, '*')) if not x.endswith('txt') ][:3200]: if file.endswith(('png', 'jpg', 'jpeg')): files.append( f"{os.path.abspath(file)}\t{classes.index(cls)}\n") random.shuffle(files) f.writelines(files)
def main(max_epochs, data_dir, test_dir, output_dir, lr, dimensions, number_of_samples, samples_per_minibatch, with_tf): create_map_files_from_folders(data_dir, split=0.7, number_of_samples=number_of_samples) classes = list(os.walk(os.path.join(data_dir)))[0][1] train_map_file = os.path.join(data_dir, 'finalize_network.txt') test_map_file = os.path.join(data_dir, 'test.txt') mean_file = create_mean_file(train_map_file, dimensions, 'mean_file.xml') if test_dir: create_map_files_from_folders(test_dir, split=0.7, number_of_samples=number_of_samples) test_map_file = os.path.join(test_dir, 'test.txt') max_amount_of_epochs = max_epochs samples_per_epoch_train = len(open(train_map_file).readlines()) samples_per_epoch_test = len(open(test_map_file).readlines()) print("Classes: {}".format(classes)) print("Max epochs: {}".format(max_epochs)) print("Learning rate: {}".format(lr)) print("Minibatch Size: {}".format(samples_per_minibatch)) print("Width and height of: {}".format(dimensions)) print("Number of training samples: {}".format(number_of_samples)) # run.log_metric('learning_rate', lr) # run.log_metric("Minibatch Size", samples_per_minibatch) # run.log_metric("shape", dimensions['width']) # run.log_metric("samples", number_of_samples) # run.log_metric("data_dir", data_dir) # run.log_metric("test_dir", test_dir) reader_train = create_reader(map_file=train_map_file, dimensions=dimensions, classes=classes, train=True, total_number_of_samples=max_amount_of_epochs * samples_per_epoch_train, mean_file=mean_file) reader_test = create_reader(map_file=test_map_file, dimensions=dimensions, classes=classes, train=False, total_number_of_samples=FULL_DATA_SWEEP, mean_file=mean_file) reader_eval = create_reader(map_file=test_map_file, dimensions=dimensions, classes=classes, train=False, total_number_of_samples=FULL_DATA_SWEEP, mean_file=mean_file) network = train(reader_train=reader_train, reader_test=reader_test, samples_per_epoch=samples_per_epoch_train, max_amount_of_epochs=max_amount_of_epochs, samples_per_minibatch=samples_per_minibatch, dimensions=dimensions, classes=classes, learning_rate=lr, output_directory=output_dir, with_tf=with_tf) evaluate_batch(network, reader_eval, samples_per_epoch_test, classes, output_directory=output_dir, number_of_samples=number_of_samples, with_tf=with_tf, samples_per_minibatch=samples_per_minibatch, data_dir=data_dir, test_dir=test_dir)
def main(max_epochs, data_dir, test_dir, output_dir, lr, dimensions, number_of_samples, samples_per_minibatch, tf_model_url): create_map_files_from_folders(data_dir, split=0.7, number_of_training_samples=number_of_samples) classes = list(os.walk(os.path.join(data_dir)))[0][1] train_map_file = os.path.join(data_dir, 'finalize_network.txt') test_map_file = os.path.join(data_dir, 'test.txt') # eval_map_file = os.path.join(data_dir, 'evaluate.txt') if test_dir: create_map_files_from_folders( test_dir, split=0.7, number_of_training_samples=number_of_samples) test_map_file = os.path.join(test_dir, 'test.txt') max_amount_of_epochs = max_epochs samples_per_epoch_train = len(open(train_map_file).readlines()) samples_per_epoch_test = len(open(test_map_file).readlines()) print(f"Classes: {classes}") print(f"Max epochs: {max_epochs}") print(f"Learning rate: {lr}") run.log('learning_rate', lr) print(f"Minibatch Size: {samples_per_minibatch}") run.log("Minibatch Size", samples_per_minibatch) print(f"Width and height of: {dimensions}") run.log("shape", dimensions['width']) print(f"Number of training samples: {number_of_samples}") run.log("samples", number_of_samples) run.log("data_dir", data_dir) run.log("test_dir", test_dir) reader_train = create_reader(map_file=train_map_file, dimensions=dimensions, classes=classes, train=True, total_number_of_samples=max_amount_of_epochs * samples_per_epoch_train) reader_test = create_reader(map_file=test_map_file, dimensions=dimensions, classes=classes, train=False, total_number_of_samples=FULL_DATA_SWEEP) reader_eval = create_reader(map_file=test_map_file, dimensions=dimensions, classes=classes, train=False, total_number_of_samples=FULL_DATA_SWEEP) network = train(reader_train=reader_train, reader_test=reader_test, samples_per_epoch=samples_per_epoch_train, max_amount_of_epochs=max_amount_of_epochs, samples_per_minibatch=samples_per_minibatch, dimensions=dimensions, classes=classes, learning_rate=lr, output_directory=output_dir, tf_model_url=tf_model_url) evaluate_batch(network, reader_eval, samples_per_epoch_test, classes, output_directory=output_dir, number_of_samples=number_of_samples)