def build_model(self): train_it, val_it, test_it = self.get_train_test_set(self.training_loc) try: model = self.load_model(self.model_save_loc) except OSError: model = self.custom_sequential_model() model.compile( loss="categorical_crossentropy", optimizer=keras.optimizers.Adam(learning_rate=0.001), metrics=["accuracy"], ) history = model.fit( train_it, epochs=self.epochs, validation_data=val_it, batch_size=100 ) model.save(join_paths([get_project_root(), self.model_save_loc])) plt.plot(history.history["accuracy"], label="accuracy") plt.plot(history.history["val_accuracy"], label="val_accuracy") plt.xlabel("Epoch") plt.ylabel("Accuracy") plt.ylim([0.5, 1]) plt.legend(loc="lower right") plt.savefig( join_paths([get_project_root(), self.model_save_loc, "graph.png"]) ) pred = model.predict_classes(test_it) logger.info(metrics.confusion_matrix(test_it.labels, pred)) test_loss, test_acc = model.evaluate(test_it, verbose=2) logger.info(test_acc)
def get_train_test_set(self, dir_path): # create generator # prepare an iterators for each dataset train_it = self.get_dataset_it(join_paths([dir_path, "train"])) val_it = self.get_dataset_it(join_paths([dir_path, "val"])) test_it = self.get_dataset_it(join_paths([dir_path, "test"])) return train_it, val_it, test_it
def test_create_file_segments(self): """ This tests the create_file_segments method. :raises AssertionError: :return void: """ file_path = join_paths( [get_project_root(), 'tests/test_fixtures/test_training_set.txt']) azure_interface = AzureInterface('elephant-sound-data') azure_interface.download_from_azure( 'TestSet/nn01d/nn01d_20180127_000000.wav', join_paths([ get_project_root(), 'data/segments/TrainingSet/nn01d/nn01d_20180127_000000.wav' ])) azure_interface.download_from_azure( 'TrainingSet/nn01b/nn01b_20180220_000000.wav', join_paths([ get_project_root(), 'data/segments/TrainingSet/nn01b/nn01b_20180220_000000.wav' ])) create_file_segments(file_path) assert len( os.listdir( join_paths([ get_project_root(), 'data/segments/CroppedTrainingSet/nn01d' ]))) == 3 assert len( os.listdir( join_paths([ get_project_root(), 'data/segments/CroppedTrainingSet/nn01b' ]))) == 4
def run_model(self, dir_path): data_it = self.get_dataset_it(join_paths([get_project_root(), dir_path])) try: model = self.load_model( join_paths([get_project_root(), self.model_save_loc]) ) except OSError: logger.info("model {} not loaded".format(self.model_save_loc)) else: return model.predict_classes(data_it)
def write_box_to_file(self, image, elephants, image_filename): image_filename = image_filename.replace("mono_", "boxed_") os.makedirs( join_paths([self.target_folder, str(len(elephants))]), exist_ok=True ) boxed_path = join_paths( [self.target_folder, str(len(elephants)), image_filename] ) cv2.imwrite(boxed_path, image) logger.info(f"Boxed image stored as {boxed_path}")
def create_app(): setup_logging() app = Flask(__name__, template_folder = 'application/templates/') app.config.update({ 'SQLALCHEMY_DATABASE_URI': 'sqlite:///elephantscounter.sqlite3', 'SQLALCHEMY_TRACK_MODIFICATIONS': False, 'DEBUG': True }) db.init_app(app) migrate.init_app( app, db, MIGRATION_DIR = join_paths([get_project_root(), 'application/persistence/migrations']) ) manager = Manager(app) manager.add_command('db', MigrateCommand) app.register_blueprint(data_analysis) app.register_blueprint(data_import) app.register_blueprint(data_processing) app.register_blueprint(demo) app.register_blueprint(events) # api blueprints app.register_blueprint(elephant_blueprint) app.register_blueprint(blob_blueprint) return app
def test_run_pipeline(self): value = pipeline_run( join_paths([get_project_root(), "tests/test_fixtures/"]), "data/demo/test_spec_image_labels.csv", ) assert value[0] == 1 assert value[1] == 1 assert value[2] == 1
def download_from_azure(self, source_file, dest_file): try: blob = BlobClient(account_url=env.AZURE_STORAGE_ACCOUNT, container_name=self.container_name, blob_name=source_file, credential=env.STORAGE_SAS_KEY) with open(join_paths([get_project_root(), dest_file]), "wb+") as f: data = blob.download_blob() data.readinto(f) except azure.core.exceptions.ResourceNotFoundError: logger.info('Blob not found %s', source_file)
async def on_event_batch(self, partition_context, events): for event in events: logger.info("Got new event to process!") event_data = ast.literal_eval(event.body_as_str()) logger.info("Received file name in queue: %s", event_data['filename']) self.audio_events_queue.insert_message_queue( join_paths([self.dest_folder, event_data['filename']]) ) if self.flag['finished']: import sys sys.exit(0) await partition_context.update_checkpoint()
def send_to_iot(source_dir, flag, container_name, dest_folder): path = join_paths([get_project_root(), source_dir]) spectrogram_list = get_files_in_dir(path) counter = {'count': 0} # Add a delay to let the receiver catch up. time.sleep(10) asyncio.run( write_to_hub(path, spectrogram_list, counter, limit=len(spectrogram_list), container_name=container_name, dest_folder=dest_folder)) logger.info('finished sending data!!!') flag['finished'] = True
def run_processing(): queue_name = request.args.get("queue_name") container_name = request.args.get("container_name") audio_events_queue = AudioEventsQueue(queue_name) messages = audio_events_queue.dequeue_message_queue() azure_interface = AzureInterface(container_name=container_name) messages = [message for message in messages] for message in messages: file_path = message["content"].split("/")[-1] file_path = join_paths( [get_project_root(), "data/imported_data", file_path]) azure_interface.download_from_azure(message["content"], dest_file=file_path) logger.info("about to run pipeline on {}!".format("data/imported_data")) pipeline_run("data/imported_data", "data/labels/spec_images_labels.csv") audio_events_queue.delete_processed_messages(messages) logger.info("Deleted processed messages") return {}
def run_processing(): queue_name = request.args.get('queue_name') container_name = request.args.get('container_name') audio_events_queue = AudioEventsQueue(queue_name) messages = audio_events_queue.dequeue_message_queue() azure_interface = AzureInterface(container_name = container_name) messages = [message for message in messages] for message in messages: file_path = message['content'].split('/')[-1] file_path = join_paths([get_project_root(), 'data/imported_data', file_path]) azure_interface.download_from_azure( message['content'], dest_file = file_path ) logger.info('about to run pipeline on {}!'.format('data/imported_data')) pipeline_run('data/imported_data', 'data/labels/spec_images_labels.csv') audio_events_queue.delete_processed_messages(messages) logger.info('Deleted processed messages') return {}
async def send_spectrogram(counter): sleep_interval = 5 while True: for f in list_of_files: payload = json.dumps({ 'capturedate': time.time(), 'filename': f, 'finished': 'False' }) azure_interface = AzureInterface(container_name) azure_interface.send_to_azure(join_paths([source_path, f]), dest_folder, f, media_file=True) msg = build_message(payload) await device_client.send_message(msg) logger.info("done sending file " + str(f)) counter['count'] += 1 logger.info(counter['count']) await asyncio.sleep(sleep_interval)
def pipeline_run(folder_path, csv_file_path): for file in get_files_in_dir(folder_path): analyse_sound_data( file_path=join_paths([folder_path, file]), dest_path=join_paths([get_project_root(), "data/demo/spectrogram"]), ) spectrogram_files_full = [ join_paths([get_project_root(), "data/demo/spectrogram", file]) for file in get_files_in_dir("data/demo/spectrogram") ] create_mono_spectrograms( spectrogram_files_full, target_folder=join_paths( [get_project_root(), "data/demo/spectrogram_mono"]), write_file=True, ) find_elephants_in_images( join_paths([get_project_root(), "data/demo/spectrogram_mono"]), join_paths([get_project_root(), "data/demo/spectrogram_bb"]), join_paths([get_project_root(), csv_file_path]), ) value = run_cnn("binaries/resnet", "data/demo/spectrogram_bb") for index, file_path in enumerate(get_files_in_dir(folder_path)): file_name = file_path.split("/")[-1] device_id = file_name.split("_")[0] URL = "http://0.0.0.0:5000/elephants/add_elephant_count/" r = requests.get( url=URL, params={ "latitude": "20", "longitude": "30", "start_time": "2020-01-10 06:30:23", "end_time": "2021-01-11 06:30:23", "device_id": device_id, "number_of_elephants": value[index], }, ) logger.info("Number of elephants found after running pipeline %s", str(value)) return value
def create_monochrome(self, image_filename, write_file=False): logger.info(f"Making monochrome image of file {image_filename}...") original_image = cv2.imread(image_filename) # grayscale gray_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY) # monochrome (thresh, blackAndWhiteImage) = cv2.threshold(gray_image, 127, 255, cv2.THRESH_BINARY) if write_file: create_necessary_directories(self.target_folder) image_filename = image_filename.split("/")[-1] mono_path = join_paths([ self.target_folder, image_filename.replace("spec_image_", "mono_") ]) cv2.imwrite(mono_path, blackAndWhiteImage) logger.info(f"Monochrome image stored as {mono_path}") return blackAndWhiteImage
def send_to_azure(self, original_file, dir_path, filename, media_file=True, remove_file=False): logger.info("Storing " + filename + " in Azure Blob..." + dir_path) try: blob_client = self.blob_service_client.get_blob_client( container=self.container_name, blob=join_paths([dir_path, filename])) with open(original_file, "rb") as data: if media_file: blob_client.upload_blob(data, blob_type="BlockBlob") else: blob_client.upload_blob(data) # delete local file if remove_file: os.remove(original_file) logger.info("Done uploading file! %s", dir_path + filename) except Exception as e: logger.info("Error while uploading " + filename + ": " + str(e))
def pipeline_run(folder_path, csv_file_path): for file in get_files_in_dir(folder_path): analyse_sound_data(file_path=join_paths([folder_path, file]), dest_path=join_paths( [get_project_root(), 'data/demo/spectrogram'])) spectrogram_files_full = [ join_paths([get_project_root(), 'data/demo/spectrogram', file]) for file in get_files_in_dir('data/demo/spectrogram') ] create_mono_spectrograms(spectrogram_files_full, target_folder=join_paths([ get_project_root(), 'data/demo/spectrogram_mono' ]), write_file=True) find_elephants_in_images( join_paths([get_project_root(), 'data/demo/spectrogram_mono']), join_paths([get_project_root(), 'data/demo/spectrogram_bb']), join_paths([get_project_root(), csv_file_path])) value = run_cnn('binaries/resnet', 'data/demo/spectrogram_bb') for index, file_path in enumerate(get_files_in_dir(folder_path)): file_name = file_path.split("/")[-1] device_id = file_name.split('_')[0] URL = 'http://0.0.0.0:5000/elephants/add_elephant_count/' r = requests.get(url=URL, params={ 'latitude': '20', 'longitude': '30', 'start_time': '2020-01-10 06:30:23', 'end_time': '2021-01-11 06:30:23', 'device_id': device_id, 'number_of_elephants': value[index] }) logger.info('Number of elephants found after running pipeline %s', str(value)) return value
def delete_images(directories, file_name): for directory in directories: os.remove(join_paths([get_project_root(), directory, file_name]))
def get_files_in_dir(path): return os.listdir(join_paths([get_project_root(), path]))
def train_vgg_cnn(training_loc): elephant_counter_vgg = ElephantCounterVGG( training_loc=join_paths([get_project_root(), training_loc])) elephant_counter_vgg.build_model()
@data_processing.cli.command("generate_file_segments") @click.argument("file_name") @click.pass_context def generate_file_segments(context, file_name): """Command to generate the file segments from the original video. :return void: """ create_file_segments(file_name) @data_processing.cli.command("generate_training_data") @click.argument("input_folder") @click.argument( "output_folder", default=join_paths([get_project_root(), "data/training_data"]) ) @click.pass_context def generate_training_data(context, input_folder, output_folder): """Command to generate the training data based of an input folder. :param context: :param string input_folder: :param string output_folder: :return void: """ model_preprocessing = ModelPreprocessing(input_folder, output_folder) model_preprocessing.split_images_into_right_format() @data_processing.cli.command("cleanup_data")
import logging from datetime import datetime from flask import Blueprint, render_template, request from elephantcallscounter import db from elephantcallscounter.application.persistence.models.elephants import \ Elephants from elephantcallscounter.common.constants import LOCATION from elephantcallscounter.utils.path_utils import get_project_root, join_paths logger = logging.getLogger(__name__) template_folder_loc = join_paths([get_project_root(), "app/templates"]) elephant_blueprint = Blueprint( "elephant", __name__, url_prefix="/elephants", template_folder=template_folder_loc ) @elephant_blueprint.route("/elephants_count/") def elephant_counter(): start_time = request.args.get("start_time") end_time = request.args.get("end_time") start_time = datetime.strptime(start_time, "%Y-%m-%d %H:%M:%S") end_time = datetime.strptime(end_time, "%Y-%m-%d %H:%M:%S") elephants = db.session.query(Elephants).all() logger.info(start_time) logger.info(end_time) elephant_output = [ {elephant.device_id: elephant.number_of_elephants} for elephant in elephants
def load_model(model_save_loc): return keras.models.load_model(join_paths([get_project_root(), model_save_loc]))
@data_processing.cli.command('generate_file_segments') @click.argument('file_name') @click.pass_context def generate_file_segments(context, file_name): """ Command to generate the file segments from the original video. :return void: """ create_file_segments(file_name) @data_processing.cli.command('generate_training_data') @click.argument('input_folder') @click.argument('output_folder', default=join_paths([get_project_root(), 'data/training_data'])) @click.pass_context def generate_training_data(context, input_folder, output_folder): """ Command to generate the training data based of an input folder. :param context: :param string input_folder: :param string output_folder: :return void: """ model_preprocessing = ModelPreprocessing(input_folder, output_folder) model_preprocessing.split_images_into_right_format() @data_processing.cli.command('cleanup_data') def cleanup_data():
def train_cnn(training_loc, model_name): elephant_counter_resnet = ElephantCounterResnet( training_loc=join_paths([get_project_root(), training_loc]), model_name=model_name, ) elephant_counter_resnet.build_model()
def create_boxes(self, image_filename): logger.info(f'Creating boxes for {self.image_folder + image_filename}...') image = self.monochrome.create_monochrome( join_paths([get_project_root(), self.image_folder, image_filename]) ) # cut off the axes # source images are 640 x 480 pixels y_top = 60 y_bottom = 425 x_left = 82 x_right = 570 ROI = image[y_top:y_bottom, x_left:x_right] thresh_inverse = cv2.bitwise_not(ROI) # create contours contours, hierarchy = cv2.findContours( thresh_inverse, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE ) cv2.drawContours(ROI, contours, -1, (0, 255, 0), 1) # approximate contours to polygons + get bounding rects boxes = [None] * len(contours) elephant_rumbles = [] for i, c in enumerate(contours): polygon = cv2.approxPolyDP(c, 3, True) boxes[i] = cv2.boundingRect(polygon) # (x, y, w, h), where x, y is the top left corner, # and w, h are the width and height respectively rect = boxes[i] width = rect[2] height = rect[3] # check if this can be an elephant if height > 5 and width > 50: middle_x = math.floor(rect[0] + (width / 2)) middle_y = math.floor(rect[1] + (height / 2)) cv2.rectangle(ROI, (int(boxes[i][0]), int(boxes[i][1])), (int(boxes[i][0] + boxes[i][2]), int(boxes[i][1] + boxes[i][3])), cv2.COLOR_BGR2HSV, 2) elephant_rumbles.append((middle_x, middle_y)) # count the elephants elephants = [] for rumble in elephant_rumbles: # if the rumble has a similar frequency as others, don't count it # if the rumble has a similar mean time as others, don't count it similar_rumbles = list( filter( lambda elephant: ((abs(elephant[0] - rumble[0]) < 20) or ( abs(elephant[1] - rumble[1]) < 200)), elephants)) if len(similar_rumbles) < 1: logger.info(f'Unique elephant at {rumble}') elephants.append(rumble) cv2.drawMarker(ROI, rumble, cv2.COLOR_LAB2LBGR, markerType = cv2.MARKER_STAR) logger.info(f'Found {len(elephants)} elephant(s) in image!') # put the ROI on top of the original image h, w = ROI.shape[0], ROI.shape[1] image[y_top:y_top + h, x_left:x_left + w] = ROI if self.write_file: self.write_box_to_file(image, elephants, image_filename) return image, len(elephants)