def build_model(self): train_it, val_it, test_it = self.get_train_test_set(self.training_loc) try: model = self.load_model(self.model_save_loc) except OSError: model = self.custom_sequential_model() model.compile( loss="categorical_crossentropy", optimizer=keras.optimizers.Adam(learning_rate=0.001), metrics=["accuracy"], ) history = model.fit( train_it, epochs=self.epochs, validation_data=val_it, batch_size=100 ) model.save(join_paths([get_project_root(), self.model_save_loc])) plt.plot(history.history["accuracy"], label="accuracy") plt.plot(history.history["val_accuracy"], label="val_accuracy") plt.xlabel("Epoch") plt.ylabel("Accuracy") plt.ylim([0.5, 1]) plt.legend(loc="lower right") plt.savefig( join_paths([get_project_root(), self.model_save_loc, "graph.png"]) ) pred = model.predict_classes(test_it) logger.info(metrics.confusion_matrix(test_it.labels, pred)) test_loss, test_acc = model.evaluate(test_it, verbose=2) logger.info(test_acc)
def test_create_file_segments(self): """ This tests the create_file_segments method. :raises AssertionError: :return void: """ file_path = join_paths( [get_project_root(), 'tests/test_fixtures/test_training_set.txt']) azure_interface = AzureInterface('elephant-sound-data') azure_interface.download_from_azure( 'TestSet/nn01d/nn01d_20180127_000000.wav', join_paths([ get_project_root(), 'data/segments/TrainingSet/nn01d/nn01d_20180127_000000.wav' ])) azure_interface.download_from_azure( 'TrainingSet/nn01b/nn01b_20180220_000000.wav', join_paths([ get_project_root(), 'data/segments/TrainingSet/nn01b/nn01b_20180220_000000.wav' ])) create_file_segments(file_path) assert len( os.listdir( join_paths([ get_project_root(), 'data/segments/CroppedTrainingSet/nn01d' ]))) == 3 assert len( os.listdir( join_paths([ get_project_root(), 'data/segments/CroppedTrainingSet/nn01b' ]))) == 4
def run_model(self, dir_path): data_it = self.get_dataset_it(join_paths([get_project_root(), dir_path])) try: model = self.load_model( join_paths([get_project_root(), self.model_save_loc]) ) except OSError: logger.info("model {} not loaded".format(self.model_save_loc)) else: return model.predict_classes(data_it)
def __init__(self, start_fresh, file_range=30): """This class handles the segmentation of files after reading from azure. :param bool start_fresh: :param int file_range: """ self.file_range = file_range self.start_fresh = start_fresh self.training_set = os.path.join(get_project_root(), "data", "segments", "TrainingSet") self.crop_set = os.path.join(get_project_root(), "data", "segments", "CroppedTrainingSet")
def analyse_sound_data(file_path, dest_path): """Analyse the sound data and generate spectrograms. :param str file_path: :param str dest_path: :return void: """ sound_data_analyser = AnalyseSoundData( file_read_location=os.path.join(get_project_root(), file_path), save_image_location=os.path.join(get_project_root(), dest_path), sr=1000, hop_length=256, ) sound_data_analyser.analyse_audio()
def find_elephants_command(context, dir_name, dest_folder, csv_file_path): """Command to analyse spectrograms and generate bounding box images of possible elephants. :param context: :param string dir_name: :param string dest_folder: :param string csv_file_path: :return void: """ find_elephants_in_images( os.path.join(get_project_root(), dir_name), os.path.join(get_project_root(), dest_folder), os.path.join(get_project_root(), csv_file_path), )
def create_app(): setup_logging() app = Flask(__name__, template_folder = 'application/templates/') app.config.update({ 'SQLALCHEMY_DATABASE_URI': 'sqlite:///elephantscounter.sqlite3', 'SQLALCHEMY_TRACK_MODIFICATIONS': False, 'DEBUG': True }) db.init_app(app) migrate.init_app( app, db, MIGRATION_DIR = join_paths([get_project_root(), 'application/persistence/migrations']) ) manager = Manager(app) manager.add_command('db', MigrateCommand) app.register_blueprint(data_analysis) app.register_blueprint(data_import) app.register_blueprint(data_processing) app.register_blueprint(demo) app.register_blueprint(events) # api blueprints app.register_blueprint(elephant_blueprint) app.register_blueprint(blob_blueprint) return app
def test_run_pipeline(self): value = pipeline_run( join_paths([get_project_root(), "tests/test_fixtures/"]), "data/demo/test_spec_image_labels.csv", ) assert value[0] == 1 assert value[1] == 1 assert value[2] == 1
def download_from_azure(self, source_file, dest_file): try: blob = BlobClient(account_url=env.AZURE_STORAGE_ACCOUNT, container_name=self.container_name, blob_name=source_file, credential=env.STORAGE_SAS_KEY) with open(join_paths([get_project_root(), dest_file]), "wb+") as f: data = blob.download_blob() data.readinto(f) except azure.core.exceptions.ResourceNotFoundError: logger.info('Blob not found %s', source_file)
def copy_data_from_s3_to_azure_fast(): """ This is a multithreaded copy of data to azure from s3. :return void: """ az_data_importer = AzureDataImporter( source_directory=os.path.join(get_project_root(), 'data', 'rumble_landscape_general'), blob_string="project15team4.blob.core.windows.net", container_name="elephant-sound-data") az_data_importer.send_to_copy_handler()
def send_to_iot(source_dir, flag, container_name, dest_folder): path = join_paths([get_project_root(), source_dir]) spectrogram_list = get_files_in_dir(path) counter = {'count': 0} # Add a delay to let the receiver catch up. time.sleep(10) asyncio.run( write_to_hub(path, spectrogram_list, counter, limit=len(spectrogram_list), container_name=container_name, dest_folder=dest_folder)) logger.info('finished sending data!!!') flag['finished'] = True
def run_processing(): queue_name = request.args.get("queue_name") container_name = request.args.get("container_name") audio_events_queue = AudioEventsQueue(queue_name) messages = audio_events_queue.dequeue_message_queue() azure_interface = AzureInterface(container_name=container_name) messages = [message for message in messages] for message in messages: file_path = message["content"].split("/")[-1] file_path = join_paths( [get_project_root(), "data/imported_data", file_path]) azure_interface.download_from_azure(message["content"], dest_file=file_path) logger.info("about to run pipeline on {}!".format("data/imported_data")) pipeline_run("data/imported_data", "data/labels/spec_images_labels.csv") audio_events_queue.delete_processed_messages(messages) logger.info("Deleted processed messages") return {}
def download_data_from_azure_fast(container_name, source_folder, dest_folder): """ This downloads all data from azure blob using azcopy. :param string container_name: :param string source_folder: :param string dest_folder: :return: """ os.makedirs(dest_folder, exist_ok=True) logger.info(f'Processing {source_folder}...') az_data_importer = AzureDataImporter(source_directory=os.path.join( get_project_root(), 'data', 'rumble_landscape_general'), blob_string=env.AZURE_STORAGE_ACCOUNT, container_name=container_name) p1 = az_data_importer.az_download_data_from_blob( source_path=source_folder, destination_path=dest_folder) logger.info(f'Processing {source_folder} finished!') logger.info(f'Sent file to {dest_folder}')
def copy_file_to_azure_fast(container_name, source_file_name, dest_folder): """ This copies data to azure blob using azcopy. :param string container_name: :param string source_file_name: :param string dest_folder: :return: """ logger.info(f'Processing {source_file_name}...') az_data_importer = AzureDataImporter(source_directory=os.path.join( get_project_root(), 'data', 'rumble_landscape_general'), blob_string=env.AZURE_STORAGE_ACCOUNT, container_name=container_name) logger.info('Sending file: %s to %s', source_file_name, dest_folder) p1 = az_data_importer.az_upload_data_to_blob(source_path=source_file_name, destination_path=dest_folder) logger.info(f'Processing {source_file_name} finished!') logger.info(f'Sent file to {dest_folder}')
def run_processing(): queue_name = request.args.get('queue_name') container_name = request.args.get('container_name') audio_events_queue = AudioEventsQueue(queue_name) messages = audio_events_queue.dequeue_message_queue() azure_interface = AzureInterface(container_name = container_name) messages = [message for message in messages] for message in messages: file_path = message['content'].split('/')[-1] file_path = join_paths([get_project_root(), 'data/imported_data', file_path]) azure_interface.download_from_azure( message['content'], dest_file = file_path ) logger.info('about to run pipeline on {}!'.format('data/imported_data')) pipeline_run('data/imported_data', 'data/labels/spec_images_labels.csv') audio_events_queue.delete_processed_messages(messages) logger.info('Deleted processed messages') return {}
def pipeline_run(folder_path, csv_file_path): for file in get_files_in_dir(folder_path): analyse_sound_data( file_path=join_paths([folder_path, file]), dest_path=join_paths([get_project_root(), "data/demo/spectrogram"]), ) spectrogram_files_full = [ join_paths([get_project_root(), "data/demo/spectrogram", file]) for file in get_files_in_dir("data/demo/spectrogram") ] create_mono_spectrograms( spectrogram_files_full, target_folder=join_paths( [get_project_root(), "data/demo/spectrogram_mono"]), write_file=True, ) find_elephants_in_images( join_paths([get_project_root(), "data/demo/spectrogram_mono"]), join_paths([get_project_root(), "data/demo/spectrogram_bb"]), join_paths([get_project_root(), csv_file_path]), ) value = run_cnn("binaries/resnet", "data/demo/spectrogram_bb") for index, file_path in enumerate(get_files_in_dir(folder_path)): file_name = file_path.split("/")[-1] device_id = file_name.split("_")[0] URL = "http://0.0.0.0:5000/elephants/add_elephant_count/" r = requests.get( url=URL, params={ "latitude": "20", "longitude": "30", "start_time": "2020-01-10 06:30:23", "end_time": "2021-01-11 06:30:23", "device_id": device_id, "number_of_elephants": value[index], }, ) logger.info("Number of elephants found after running pipeline %s", str(value)) return value
def pipeline_run(folder_path, csv_file_path): for file in get_files_in_dir(folder_path): analyse_sound_data(file_path=join_paths([folder_path, file]), dest_path=join_paths( [get_project_root(), 'data/demo/spectrogram'])) spectrogram_files_full = [ join_paths([get_project_root(), 'data/demo/spectrogram', file]) for file in get_files_in_dir('data/demo/spectrogram') ] create_mono_spectrograms(spectrogram_files_full, target_folder=join_paths([ get_project_root(), 'data/demo/spectrogram_mono' ]), write_file=True) find_elephants_in_images( join_paths([get_project_root(), 'data/demo/spectrogram_mono']), join_paths([get_project_root(), 'data/demo/spectrogram_bb']), join_paths([get_project_root(), csv_file_path])) value = run_cnn('binaries/resnet', 'data/demo/spectrogram_bb') for index, file_path in enumerate(get_files_in_dir(folder_path)): file_name = file_path.split("/")[-1] device_id = file_name.split('_')[0] URL = 'http://0.0.0.0:5000/elephants/add_elephant_count/' r = requests.get(url=URL, params={ 'latitude': '20', 'longitude': '30', 'start_time': '2020-01-10 06:30:23', 'end_time': '2021-01-11 06:30:23', 'device_id': device_id, 'number_of_elephants': value[index] }) logger.info('Number of elephants found after running pipeline %s', str(value)) return value
def delete_images(directories, file_name): for directory in directories: os.remove(join_paths([get_project_root(), directory, file_name]))
def get_files_in_dir(path): return os.listdir(join_paths([get_project_root(), path]))
def train_cnn(training_loc, model_name): elephant_counter_resnet = ElephantCounterResnet( training_loc=join_paths([get_project_root(), training_loc]), model_name=model_name, ) elephant_counter_resnet.build_model()
@data_processing.cli.command("generate_file_segments") @click.argument("file_name") @click.pass_context def generate_file_segments(context, file_name): """Command to generate the file segments from the original video. :return void: """ create_file_segments(file_name) @data_processing.cli.command("generate_training_data") @click.argument("input_folder") @click.argument( "output_folder", default=join_paths([get_project_root(), "data/training_data"]) ) @click.pass_context def generate_training_data(context, input_folder, output_folder): """Command to generate the training data based of an input folder. :param context: :param string input_folder: :param string output_folder: :return void: """ model_preprocessing = ModelPreprocessing(input_folder, output_folder) model_preprocessing.split_images_into_right_format() @data_processing.cli.command("cleanup_data")
def train_vgg_cnn(training_loc): elephant_counter_vgg = ElephantCounterVGG( training_loc=join_paths([get_project_root(), training_loc])) elephant_counter_vgg.build_model()
def load_model(model_save_loc): return keras.models.load_model(join_paths([get_project_root(), model_save_loc]))
import logging from datetime import datetime from flask import Blueprint, render_template, request from elephantcallscounter import db from elephantcallscounter.application.persistence.models.elephants import \ Elephants from elephantcallscounter.common.constants import LOCATION from elephantcallscounter.utils.path_utils import get_project_root, join_paths logger = logging.getLogger(__name__) template_folder_loc = join_paths([get_project_root(), "app/templates"]) elephant_blueprint = Blueprint( "elephant", __name__, url_prefix="/elephants", template_folder=template_folder_loc ) @elephant_blueprint.route("/elephants_count/") def elephant_counter(): start_time = request.args.get("start_time") end_time = request.args.get("end_time") start_time = datetime.strptime(start_time, "%Y-%m-%d %H:%M:%S") end_time = datetime.strptime(end_time, "%Y-%m-%d %H:%M:%S") elephants = db.session.query(Elephants).all() logger.info(start_time) logger.info(end_time) elephant_output = [ {elephant.device_id: elephant.number_of_elephants} for elephant in elephants
import os from elephantcallscounter.utils.path_utils import get_project_root TRAINING_FILE_PATH_DEFAULT = os.path.join( get_project_root(), 'data/metadata/nn_ele_hb_00-24hr_TrainingSet_v2.txt') TEST_PATH_DEFAULT = os.path.join( get_project_root(), 'data/metadata/nn_ele_00-24hr_GeneralTest_v4.txt') RUN_FRESH = True LOCATION = { 'nn01a': { 'lat': 2.5, 'lng': 16.4 }, 'nn01b': { 'lat': 2.9, 'lng': 16.3 }, 'nn02a': { 'lat': 2.2, 'lng': 16.7 }, 'nn01d': { 'lat': 3.5, 'lng': 17.8 } }
@data_processing.cli.command('generate_file_segments') @click.argument('file_name') @click.pass_context def generate_file_segments(context, file_name): """ Command to generate the file segments from the original video. :return void: """ create_file_segments(file_name) @data_processing.cli.command('generate_training_data') @click.argument('input_folder') @click.argument('output_folder', default=join_paths([get_project_root(), 'data/training_data'])) @click.pass_context def generate_training_data(context, input_folder, output_folder): """ Command to generate the training data based of an input folder. :param context: :param string input_folder: :param string output_folder: :return void: """ model_preprocessing = ModelPreprocessing(input_folder, output_folder) model_preprocessing.split_images_into_right_format() @data_processing.cli.command('cleanup_data') def cleanup_data():
def create_boxes(self, image_filename): logger.info(f'Creating boxes for {self.image_folder + image_filename}...') image = self.monochrome.create_monochrome( join_paths([get_project_root(), self.image_folder, image_filename]) ) # cut off the axes # source images are 640 x 480 pixels y_top = 60 y_bottom = 425 x_left = 82 x_right = 570 ROI = image[y_top:y_bottom, x_left:x_right] thresh_inverse = cv2.bitwise_not(ROI) # create contours contours, hierarchy = cv2.findContours( thresh_inverse, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE ) cv2.drawContours(ROI, contours, -1, (0, 255, 0), 1) # approximate contours to polygons + get bounding rects boxes = [None] * len(contours) elephant_rumbles = [] for i, c in enumerate(contours): polygon = cv2.approxPolyDP(c, 3, True) boxes[i] = cv2.boundingRect(polygon) # (x, y, w, h), where x, y is the top left corner, # and w, h are the width and height respectively rect = boxes[i] width = rect[2] height = rect[3] # check if this can be an elephant if height > 5 and width > 50: middle_x = math.floor(rect[0] + (width / 2)) middle_y = math.floor(rect[1] + (height / 2)) cv2.rectangle(ROI, (int(boxes[i][0]), int(boxes[i][1])), (int(boxes[i][0] + boxes[i][2]), int(boxes[i][1] + boxes[i][3])), cv2.COLOR_BGR2HSV, 2) elephant_rumbles.append((middle_x, middle_y)) # count the elephants elephants = [] for rumble in elephant_rumbles: # if the rumble has a similar frequency as others, don't count it # if the rumble has a similar mean time as others, don't count it similar_rumbles = list( filter( lambda elephant: ((abs(elephant[0] - rumble[0]) < 20) or ( abs(elephant[1] - rumble[1]) < 200)), elephants)) if len(similar_rumbles) < 1: logger.info(f'Unique elephant at {rumble}') elephants.append(rumble) cv2.drawMarker(ROI, rumble, cv2.COLOR_LAB2LBGR, markerType = cv2.MARKER_STAR) logger.info(f'Found {len(elephants)} elephant(s) in image!') # put the ROI on top of the original image h, w = ROI.shape[0], ROI.shape[1] image[y_top:y_top + h, x_left:x_left + w] = ROI if self.write_file: self.write_box_to_file(image, elephants, image_filename) return image, len(elephants)