예제 #1
0
def predict(model, device, test_loader):

	# evaluate the model
	model.eval()

	pred_results = np.asarray([])
	#test_pred = torch.LongTensor()
	#print('Testing..')
	loghub.logMsg(msg="{}: Predicting...".format(__name__), otherlogs=["test_acc"])

	# Use no gradient backpropagations (as we are just testing)
	with torch.no_grad():
		# for every testing batch
		for i_batch, sample_batched in enumerate(test_loader):
			# for every batch, extract data (16, 1, 40, 500) and label (16, 1)
			data, invalid_label = sample_batched

			# Map the variables to the current device (CPU or GPU)
			data = data.to(device, dtype=torch.float)

			# get the predictions
			output = model(data)

			# get the predictions
			pred = output.argmax(dim=1, keepdim=True)

			# collate the predicted results
			pred = np.squeeze(pred.cpu().numpy())
			pred_results = np.concatenate((pred_results, pred))
			#test_pred = torch.cat((test_pred, pred), dim=0)
			
	return pred_results
예제 #2
0
	def load_all_data(self, include_test=False, with_labels=True):
		"""
			load all data, extract the features and save as filename
		"""

		# Read the training & testing data from the csv file
		#print("Loading all data...")
		loghub.logMsg(msg="{}: Loading all data...".format(__name__), otherlogs=["test_acc"])
		self.train_data_list, self.train_label_list, self.train_label_indices = self.__read_DCASE_csv_file(self.train_csv_filepath, "train")
		if with_labels:
			self.test_data_list, self.test_label_list, self.test_label_indices = self.__read_DCASE_csv_file(self.test_csv_filepath, "test")
		else:
			self.test_data_list, self.test_label_list, self.test_label_indices = self.__read_DCASE_csv_file(self.test_csv_filepath, "evaluate")
		self.audio_files = self.train_data_list + self.test_data_list
		self.audio_labels = self.train_label_list + self.test_label_list
		self.audio_label_indices = self.train_label_indices + self.test_label_indices
		self.data_type = [0] * len(self.train_data_list) + [1] * len(self.test_data_list)

		self.base = len(self.train_data_list)
		if include_test:
			self.train_data_list = self.train_data_list + self.test_data_list
			self.train_label_list = self.train_label_list + self.test_label_list
			self.train_label_indices = self.train_label_indices + self.test_label_indices

		self.data_type = np.asarray(self.data_type)
		#print("All data loaded.")	
		loghub.logMsg(msg="{}: All data loaded.".format(__name__), otherlogs=["test_acc"])
예제 #3
0
def test(args, model, device, test_loader, data_type):

	# evaluate the model
	model.eval()

	# init test loss
	test_loss = 0
	correct = 0
	pred_results = np.asarray([])
	#print('Testing..')
	loghub.logMsg(msg="{}: Testing...".format(__name__), otherlogs=["test_acc"])

	# Use no gradient backpropagations (as we are just testing)
	with torch.no_grad():
		# for every testing batch
		for i_batch, sample_batched in enumerate(test_loader):

			# for every batch, extract data (16, 1, 40, 500) and label (16, 1)
			data, label = sample_batched

			# Map the variables to the current device (CPU or GPU)
			data = data.to(device, dtype=torch.float)
			label = label.to(device, dtype=torch.long)

			# get the predictions
			output = model(data)

			# accumulate the batchwise loss
			test_loss += F.nll_loss(output, label, reduction='sum').item()

			# get the predictions
			pred = output.argmax(dim=1, keepdim=True)

			# accumulate the correct predictions
			correct += pred.eq(label.view_as(pred)).sum().item()

			# collate the predicted results
			pred = np.squeeze(pred.cpu().numpy())
			pred_results = np.concatenate((pred_results, pred))
			
	# normalize the test loss with the number of test samples
	test_loss /= len(test_loader.dataset)

	# print the results
	#print('Model prediction on ' + data_type + ': Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
	#	test_loss, correct, len(test_loader.dataset),
	#	100. * correct / len(test_loader.dataset)))
	loghub.logMsg(msg="{}: Model prediction on {}: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format(
		__name__, data_type, test_loss, correct, len(test_loader.dataset),
		100. * correct / len(test_loader.dataset)), otherlogs=["test_acc"])

	return pred_results
예제 #4
0
	def apply_k_fold(self, K=5):
		"""
			K (int): K folds
			Split train data into K folds and returns an array of an array of indices
				- Fold #1 (train_indices, test_indices)
				- ....
				- Fold #K (train_indices, test_indices)
		"""
		# check that data have been loaded
		if not self.train_data_list:
			#print("Data have not been loaded. Running data_manager.load_all_data()...")
			loghub.logMsg(msg="{}: Data have not been loaded. Running data_manager.load_all_data()...".format(__name__), otherlogs=["test_acc"], level="warning")
			self.load_all_data()

		# Initialize array
		kfolds_arr = []
		for i in range(K):
			kfolds_arr.append([])			# axis 0 = folds

		# K FOLDS
		fold_counter = 0
		for i in range(len(self.train_data_list)):
			kfolds_arr[fold_counter].append(i)
			fold_counter = (fold_counter + 1) % K

		# Generate the cross validation array 
		kfolds = []							# axis 0 = folds
		# For each folds
		for i in range(K): 
			# Initialize the array
			test_indices = []
			train_indices = []

			# let the fold index be the test indices
			test_indices = kfolds_arr[i]

			# combine the rest to be the train indices
			for j in range(K):
				if i == j:
					continue
				train_indices += kfolds_arr[j]

			kfolds.append((train_indices, test_indices))

		return kfolds
예제 #5
0
	def get_data_index_from_map(self, idx, data_type):
		"""
			All data are loaded into a single main file in prepare_data(). This is to get the index of the data 
			in the main file based on self.train_idx_map or self.test_idx_map 

			data_type (string): two types ["train" or "test"]
		"""

		if (not self.train_idx_map) or (not self.test_idx_map):
			# Mapping is empty
			return idx 					
		else:
			# Mapping is not empty
			if data_type == "train":
				return self.train_idx_map[idx]
			elif data_type == "test":
				return self.test_idx_map[idx]
			else:
				#print("Error! Invalid data type")
				loghub.logMsg(msg="{}: Error! Invalid data type".format(__name__), otherlogs=["test_acc"], level="error")
				return
def NormalizeData(train_labels_dir, root_dir, dcase_dataset):
    """
		Compute the mean/std which will be used to normalized the dataset
	"""

    # concatenate the mel spectrograms in time-dimension, this variable accumulates the spectrograms
    melConcat = np.asarray([])

    # flag for the first element
    flag = 0

    # generate a random permutation, because it's fun. there's no specific reason for that.
    rand = np.random.permutation(len(dcase_dataset))

    # for all the training samples
    for i in range(len(dcase_dataset)):

        # extract the sample
        sample = dcase_dataset[rand[i]]
        data, label = sample
        # print because we like to see it working
        #print('NORMALIZATION (FEATURE SCALING) : ' + str(i) + ' - data shape: ' + str(data.shape) + ', label: ' + str(label) + ', current accumulation size: ' + str(melConcat.shape))
        loghub.logMsg(
            msg=
            "{}: NORMALIZATION (FEATURE SCALING) : {} - data shape: {}, label: {}, current accumulation size: {}"
            .format(__name__, str(i), str(data.shape), str(label),
                    str(melConcat.shape)),
            level="info")
        if flag == 0:
            # get the data and init melConcat for the first time
            melConcat = data
            flag = 1
        else:
            # concatenate spectrograms from second iteration
            melConcat = np.concatenate((melConcat, data), axis=2)
    # extract std and mean
    std = np.std(melConcat, axis=2)
    mean = np.mean(melConcat, axis=2)

    return mean, std
예제 #7
0
	def prepare_test_data(self, test_csv="test_dataset.csv"):
		"""
			This is used when testing model. Instead of preparing both train/test csv in prepare_data().
			This function only prepares the test.csv
		"""

		# Prepare csv file path
		test_filepath = os.path.join(self.root_dir, test_csv)

		# Extract data for test.csv
		test_csv_data = []
		for i in range(self.get_test_data_size()):
			# Get dataset
			dataset = []
			dataset.append(self.test_data_list[i])
			test_csv_data.append(dataset)

		# Write into test csv file
		util.write_to_csv_file(test_csv_data, test_filepath)

		#print("Test Data Labels generated in %s (test)" % test_filepath)
		loghub.logMsg(msg="{}: Test Data Labels generated in {} (test)".format(__name__, test_filepath), otherlogs=["test_acc"])

		return test_filepath
예제 #8
0
def train(args, model, device, train_loader, optimizer, epoch):
	model.train()

	# training module
	for batch_idx, sample_batched in enumerate(train_loader):

		# for every batch, extract data (16, 1, 40, 500) and label (16, 1)
		data, label = sample_batched

		# Map the variables to the current device (CPU or GPU)
		data = data.to(device, dtype=torch.float)
		label = label.to(device, dtype=torch.long)

		# set initial gradients to zero : https://discuss.pytorch.org/t/why-do-we-need-to-set-the-gradients-manually-to-zero-in-pytorch/4903/9
		optimizer.zero_grad()

		# pass the data into the model
		output = model(data)

		# get the loss using the predictions and the label
		loss = F.nll_loss(output, label)

		# backpropagate the losses
		loss.backward()

		# update the model parameters : https://discuss.pytorch.org/t/how-are-optimizer-step-and-loss-backward-related/7350
		optimizer.step()

		# Printing the results
		if batch_idx % args.log_interval == 0:
			#print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
			#	epoch, batch_idx * len(data), len(train_loader.dataset),
			#	100. * batch_idx / len(train_loader), loss.item()))
			loghub.logMsg(msg="{}: Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
				__name__, epoch, batch_idx * len(data), len(train_loader.dataset), 
				100. * batch_idx / len(train_loader), loss.item()), otherlogs=["test_acc"])
def build_stack_model():
    """
		Stacking (Meta Ensembling) - Ensemble Technique to combine multiple models to generate a new model

		Referenced from http://blog.kaggle.com/2016/12/27/a-kagglers-guide-to-model-stacking-in-practice/
		Referenced from https://towardsdatascience.com/how-to-train-an-image-classifier-in-pytorch-and-use-it-to-perform-basic-inference-on-single-images-99465a1e9bf5
	"""

    # 0. Split training & test data (should be the same as the one used to train the models) ##############################

    # MOVED TO GLOBAL VARIABLES
    """
	train_labels_dir = '../Dataset/train/train_labels.csv'
	test_labels_dir = '../Dataset/test/test_labels.csv'
	root_dir = '../Dataset'
	processed_root_dir = 'processed_data'
	"""

    # Load all the dataset
    data_manager = DatasetManager(train_labels_dir, test_labels_dir, root_dir)
    data_manager.load_all_data(include_test=True)

    # 1. Partition Training Data into K folds #############################################################################

    kfolds = data_manager.apply_k_fold(K_FOLD)

    # 2. Create 2 dataset (train_meta & test_meta) with n empty columsn (M1, M2, ... Mn) where n = number of models ##############################

    # use k-fold of train data to fill up
    train_meta = np.empty(
        (data_manager.get_train_data_size(),
         len(save_models)))  # (n x m) where n = audio data, m = model
    # use all of train data to fill up
    test_meta = np.empty(
        (data_manager.get_test_data_size(),
         len(save_models)))  # (n x m) where n = audio data, m = model

    # 3. Apply K-fold cross validation to fill up empty columns (M1, M2, .... Mn) of train_meta with prediction results for each folds ##############################

    #print("Getting Prediction Results to fill in train_meta")
    loghub.logMsg(
        msg="{}: Getting Prediction Results to fill in train_meta".format(
            __name__),
        otherlogs=["test_acc"])
    fold = 0  # fold counter
    for train, validate in kfolds:  # train, validate is a list of index
        #print("Cross Validation Fold #%i..." % (fold+1))
        loghub.logMsg(msg="{}: Cross Validation Fold #{}...".format(
            __name__, (fold + 1)),
                      otherlogs=["test_acc"])

        # For each model
        for i in range(len(save_models)):
            #print("Fold #%i for model (%s)..." % ((fold+1), save_models[i]))
            loghub.logMsg(msg="{}: Fold #{} for model ({})...".format(
                __name__, (fold + 1), save_models[i]),
                          otherlogs=["test_acc"])

            # Get feature index
            fid = feat_indices[i]

            # Load/Preprocess Feature for model
            preprocessed_features_filepath = os.path.join(
                processed_root_dir, preprocessed_features[i])
            data_manager.load_feature(fid, preprocessed_features_filepath)

            # Prepare data
            train_csv, test_csv = data_manager.prepare_data(
                train_indices=train,
                test_indices=validate,
                train_csv=temp_train_csv_file,
                test_csv=temp_test_csv_file,
                train_only=True)

            # Load Normalized data
            norm_std = os.path.join(processed_root_dir,
                                    fold_norm_stds[i][fold])
            norm_mean = os.path.join(processed_root_dir,
                                     fold_norm_means[i][fold])

            # Build Model & get prediction results
            model, predictions = bm.buildCNNModel(
                train_csv=train_csv,
                test_csv=test_csv,
                norm_std=norm_std,
                norm_mean=norm_mean,
                data_manager=data_manager,
                num_of_channel=num_of_channels[i],
                save_model=False)

            # Fill up the train_meta with predictions results of test.csv
            for j in range(len(validate)):
                v_idx = validate[j]
                train_meta[v_idx][i] = predictions[j]  # data x model

        #print("End of Fold #%i." % (fold+1))
        loghub.logMsg(msg="{}: End of Fold #{}".format(__name__, (fold + 1)),
                      otherlogs=["test_acc"])
        fold += 1

    #print("Train_meta generated successfully.")
    loghub.logMsg(
        msg="{}: Train_meta generated successfully.".format(__name__),
        otherlogs=["test_acc"])

    # 4. Fit each model to the full training dataset & make predictions on the test dataset, store into test_meta ##############################

    #print("Getting Prediction Results to fill in test_meta...")
    loghub.logMsg(
        msg="{}: Getting Prediction Results to fill in test_meta...".format(
            __name__),
        otherlogs=["test_acc"])

    # For each model
    for i in range(len(save_models)):
        # Get feature index
        fid = feat_indices[i]

        # Load/Preprocess Feature for model
        preprocessed_features_filepath = os.path.join(processed_root_dir,
                                                      preprocessed_features[i])
        data_manager.load_feature(fid, preprocessed_features_filepath)

        # Prepare data
        train_csv, test_csv = data_manager.prepare_data(
            train_csv=temp_train_csv_file, test_csv=temp_test_csv_file)

        # Get Normalized preprocessed data file
        norm_std = os.path.join(processed_root_dir, norm_stds[i])
        norm_mean = os.path.join(processed_root_dir, norm_means[i])

        # Get save model
        model_name = os.path.join(processed_root_dir, save_models[i])

        # Build Model & get prediction results
        model, predictions = bm.buildCNNModel(
            train_csv=train_csv,
            test_csv=test_csv,
            norm_std=norm_std,
            norm_mean=norm_mean,
            data_manager=data_manager,
            num_of_channel=num_of_channels[i],
            saved_model_name=model_name,
            save_model=True)

        # Fill up the train_meta with predictions results of test.csv
        for j in range(data_manager.get_test_data_size()):
            test_meta[j][i] = predictions[j]  # data x model

    #print("Test_meta generated successfully.")
    loghub.logMsg(msg="{}: Test_meta generated successfully.".format(__name__),
                  otherlogs=["test_acc"])

    # 5. Fit (stacking model S) to train_meta, using (M1, M2, ... Mn) as features. ############################################################
    # 6. Use the stacked model S to make final predictions on test_meta ############################################################

    # get the training/testing label
    train_meta_labels = np.asarray(data_manager.train_label_indices)
    test_meta_labels = np.asarray(data_manager.test_label_indices)

    # Fit and Train classifier Model (step 5 & 6)
    classifier = ClassifierModel(train_meta, train_meta_labels, test_meta,
                                 test_meta_labels)
    predicts = classifier.run_decision_tree_classification()

    # Evaluate
    precision, recall, f1_measure = classifier.evaluate_prediction(predicts)
    correct, total = classifier.get_accuracy(predicts)
    percentage = 100 * correct / total

    #print("Stacked Model Prediction:\nAccuracy: {}/{} ({:.0f}%)\n\tPrecision: {}\n\tRecall: {}\n\tF1 Measure:{}".format(
    #	correct, total, percentage, precision, recall, f1_measure))
    loghub.logMsg(
        msg=
        "{}: Stacked Model Prediction:\nAccuracy: {}/{} ({:.0f}%)\n\tPrecision: {}\n\tRecall: {}\n\tF1 Measure:{}"
        .format(__name__, correct, total, percentage, precision, recall,
                f1_measure),
        otherlogs=["test_acc"])

    # 7. Save the ensemble model ########################################################################################################################

    stacked_model_filepath = os.path.join(processed_root_dir,
                                          stacked_model_name)
    classifier.save_model(stacked_model_filepath)
def predict_with_stack_model(with_labels=True):
    """
		load previously saved model to predict labels on test

		with_labels (bool): Indicator to tell us if there is labels in test data.
			- evaluation data has no labels
			- test data has labels
	"""

    # 1. Load the Testing Data #######################################################################################

    # MOVE TO GLOBAL VARIABLES
    """
	train_labels_dir = '../Dataset/train/train_labels.csv'
	test_labels_dir = '../Dataset/test/test_labels.csv'
	eval_labels_dir = "../Dataset/evaluate/evaluate_labels.csv"
	root_dir = '../Dataset'
	processed_root_dir = 'processed_data'
	"""

    # Load all the dataset
    if with_labels:
        # Test Datset (with labels)
        data_manager = DatasetManager(train_labels_dir, test_labels_dir,
                                      root_dir)
        # Load all the dataset
        data_manager.load_all_data(with_labels=True)
    else:
        # Evaluation Datset (with no labels)
        data_manager = DatasetManager("", eval_labels_dir, root_dir)
        # Load all the dataset
        data_manager.load_all_data(with_labels=False)

    # Initialize the input_vector for stacked model
    input_vect = np.empty(
        (data_manager.get_test_data_size(),
         len(save_models)))  # (n x m) where n = audio data, m = model

    # 2. Get Prediction Results from each Model #######################################################################

    # For each model
    for i in range(len(save_models)):
        # Get feature index
        fid = feat_indices[i]

        # Preprocess Feature for model
        if with_labels:
            # Test Datset (with labels)
            preprocessed_features_filepath = os.path.join(
                processed_root_dir, preprocessed_features[i])
        else:
            # Evaluation Datset (with no labels)
            preprocessed_features_filepath = os.path.join(
                processed_root_dir, preprocessed_features_test[i])

        data_manager.load_feature(
            fid, preprocessed_features_filepath
        )  # THIS HAVE TO BE REMOVED (BECAUSE WHEN PREDICTING, we won't have preprocess thea udio file as we don't know what it is. leave it balnk)

        # Prepare data
        if with_labels:
            # Test Datset (with labels)
            train_csv, test_csv = data_manager.prepare_data(
                train_csv=temp_train_csv_file, test_csv=temp_test_csv_file)
        else:
            # Evaluation Datset (with no labels)
            test_csv = data_manager.prepare_test_data(
                test_csv=temp_test_csv_file)

        # Get Normalized preprocessed data file
        norm_std = os.path.join(processed_root_dir, norm_stds[i])
        norm_mean = os.path.join(processed_root_dir, norm_means[i])

        # Get saved model path
        saved_model_path = os.path.join(processed_root_dir, save_models[i])

        # Test the saved model & get prediction results
        if with_labels:
            # Test Data set (with labels)
            predictions = bm.testCNNModel(saved_model_path=saved_model_path,
                                          test_csv=test_csv,
                                          norm_std=norm_std,
                                          norm_mean=norm_mean,
                                          data_manager=data_manager,
                                          num_of_channel=num_of_channels[i],
                                          with_labels=with_labels)
        else:
            # Evaluation Dataset (with no labels)
            predictions = bm.testCNNModel(saved_model_path=saved_model_path,
                                          test_csv=test_csv,
                                          norm_std=norm_std,
                                          norm_mean=norm_mean,
                                          data_manager=data_manager,
                                          num_of_channel=num_of_channels[i],
                                          with_labels=with_labels)

        # Fill up the input_vector with predictions results from model
        for j in range(data_manager.get_test_data_size()):
            input_vect[j][i] = predictions[j]

    # 3. Get Prediction Results from Stack Model based on input_vector  ####################################################

    # Load the stacked model
    stacked_model_filepath = os.path.join(processed_root_dir,
                                          stacked_model_name)
    stacked_em = pickle.load(open(stacked_model_filepath, 'rb'))

    # Get Prediction Results
    predicts = stacked_em.predict(input_vect)

    # Print prediction Accuracy
    if with_labels:
        # Test Dataset (with labels)
        correct, total = util.compare_list_elements(
            predicts, data_manager.test_label_indices)
        percentage = 100 * correct / total
        #print("Stacked Model Prediction Accuracy: {}/{} ({:.0f}%)".format(correct, total, percentage))
        loghub.logMsg(
            msg="{}: Stacked Model Prediction Accuracy: {}/{} ({:.0f}%)".
            format(__name__, correct, total, percentage),
            otherlogs=["test_acc"])

        #np.set_printoptions(precision=2)

        # Plot non-normalized confusion matrix
        #mk.plot_confusion_matrix(data_manager.test_label_indices, predicts, classes=[
        #	'airport', 'bus', 'metro', 'metro_station', 'park', 'public_square', 'shopping_mall',
        #	'street_pedestrian', 'street_traffic', 'tram'
        #	], title='Confusion matrix')

        #plt.show()
    else:
        # Evaluation Datset (with no labels)
        # Store the prediction results
        dcase_eval_data = DCASEDataset(eval_labels_dir, root_dir, data_manager)

        results = []
        headers = ["filename", "label", "label_index"]
        for i in range(len(dcase_eval_data) - 1):
            result = []
            # Get prediction results for each audio file
            result.append(dcase_eval_data.datalist[
                i + 1])  # first line is header...(so add 1 to skip it)
            pred_idx = int(predicts[i])
            result.append(dcase_eval_data.default_labels[pred_idx])
            result.append(pred_idx)
            # Add to list
            results.append(result)
        # Write to csv file
        util.write_to_csv_file(results, predict_results_csv, headers)
    parser.add_argument("--em",
                        help="Ensemble Mode",
                        choices=['build', "test", 'predict'])
    parser.add_argument("--ename",
                        help="Stacked Model name (eg. stackedModel.sav)")
    process_arguments(parser)

    # 2. Set up logging
    loghub.init_main_logger(os.path.join("log_files", main_log))
    loghub.setup_logger("test_acc", os.path.join("log_files", test_accu_log))

    # 3. Run Ensemble Learning
    if ensemble_mode == 0:
        #print("Building Stacked Ensemble Model (Meta Ensembling)...")
        loghub.logMsg(
            msg="{}: Building Stacked Ensemble Model (Meta Ensembling)...".
            format(__name__),
            otherlogs=["test_acc"])
        build_stack_model()
    elif ensemble_mode == 1:
        #print("Testing Stacked Ensemble Model...")
        loghub.logMsg(
            msg="{}: Testing Stacked Ensemble Model...".format(__name__),
            otherlogs=["test_acc"])
        predict_with_stack_model(with_labels=True)
    elif ensemble_mode == 2:
        #print("Predicting with Stacked Ensemble Model...")
        loghub.logMsg(
            msg="{}: Predicting with Stacked Ensemble Model...".format(
                __name__),
            otherlogs=["test_acc"])
        predict_with_stack_model(with_labels=False)
def buildCNNModel(train_csv,
                  test_csv,
                  norm_std,
                  norm_mean,
                  data_manager,
                  num_of_channel,
                  split_valid=False,
                  saved_model_name="",
                  test_batch_size=16,
                  batch_size=16,
                  epochs=200,
                  lr=0.01,
                  no_cuda=False,
                  seed=1,
                  log_interval=10,
                  save_model=True):
    """
		Build and Train CNN model
		
		Required Parameters:
			train_csv (string): file that contains all train data labels.
			test_csv (string): file that contains all test data labels.
			norm_std (string): file that contains the normalized std 
			norm_mean (string): file that contains the normalized mean 
			data_manager (DataManager): contains all the loaded train/test dataset
			num_of_channel (int): number of channels for input features
			split_valid (bool): True = split train data into train/validate, False = use test data as validate data
			saved_model (string): name to use when saving

		Optional Parameters
			batch_size (int): input batch size for training
			test_batch_size (int): input batch size of testing
			epochs (int): number of epochs to train
			lr (float): learning rate 
			no_cuda (bool): disables CUDA training
			seed (int): random seed
			log_interval (int): how many batches to wait before logging training status
			save_model (bool): for saving the current model
	"""

    # Step 0: Setting up Training Settings ##################################################

    # Training settings
    use_cuda = not no_cuda and torch.cuda.is_available()

    torch.manual_seed(seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    args = {
        "batch_size": batch_size,
        "test_batch_size": test_batch_size,
        "epochs": epochs,
        "lr": lr,
        "no_cuda": no_cuda,
        "seed": seed,
        "log_interval": log_interval,
        "save_model": save_model
    }
    args = Namespace(**args)

    # Step 1a: Preparing Data - Extract data ###########################################################

    # init the train directories
    train_labels_dir = train_csv
    test_labels_dir = test_csv
    root_dir = data_manager.root_dir

    # Step 1b: Preparing Data - Transform Data #########################################################

    # Compute Normalization Score
    if os.path.isfile(norm_std) and os.path.isfile(norm_mean):
        #print("Loading Normalization Data...")
        loghub.logMsg(msg="{}: Loading Normalization Data...".format(__name__),
                      otherlogs=["test_acc"])
        # load the npy files
        mean = np.load(norm_mean)
        std = np.load(norm_std)
    else:
        # Run the normalization and save mean/std if not already computed
        #print('DATA NORMALIZATION : ACCUMULATING THE DATA')
        loghub.logMsg(
            msg="{}: DATA NORMALIZATION : ACCUMULATING THE DATA".format(
                __name__),
            otherlogs=["test_acc"])
        # Load dataset
        dcase_dataset = DCASEDataset(train_labels_dir, root_dir, data_manager,
                                     True)
        mean, std = NormalizeData(train_labels_dir, root_dir, dcase_dataset)
        # Save the model
        np.save(norm_mean, mean)
        np.save(norm_std, std)
        #print('DATA NORMALIZATION COMPLETED')
        loghub.logMsg(msg="{}: DATA NORMALIZATION COMPLETED".format(__name__),
                      otherlogs=["test_acc"])

    # Convert to Torch Tensors
    mean = torch.from_numpy(mean)
    std = torch.from_numpy(std)

    # convert to torch variables
    mean = torch.reshape(
        mean, [num_of_channel, 40, 1]
    )  # numpy broadcast (CxHxW). last dimension is 1 -> which will be automatically broadcasted to 500 (time)
    std = torch.reshape(std, [num_of_channel, 40, 1])

    # init the data_transform
    data_transform = transforms.Compose(
        [cnn.ToTensor(), cnn.Normalize(mean, std)])

    #print("Preparing Data...")
    loghub.logMsg(msg="{}: Preparing Data...".format(__name__),
                  otherlogs=["test_acc"])

    # init the datasets
    dcase_dataset = DCASEDataset(csv_file=train_labels_dir,
                                 root_dir=root_dir,
                                 data_manager=data_manager,
                                 is_train_data=True,
                                 transform=data_transform)
    dcase_dataset_test = DCASEDataset(csv_file=test_labels_dir,
                                      root_dir=root_dir,
                                      data_manager=data_manager,
                                      is_train_data=False,
                                      transform=data_transform)

    # Step 1c: Preparing Data - Load Data ###############################################################

    # set number of cpu workers in parallel
    kwargs = {'num_workers': 16, 'pin_memory': True} if use_cuda else {}

    # get the training and testing data loader
    train_loader = torch.utils.data.DataLoader(dcase_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               **kwargs)

    valid_loader = torch.utils.data.DataLoader(dcase_dataset_test,
                                               batch_size=args.test_batch_size,
                                               shuffle=False,
                                               **kwargs)

    test_loader = torch.utils.data.DataLoader(dcase_dataset_test,
                                              batch_size=args.test_batch_size,
                                              shuffle=False,
                                              **kwargs)

    # Update data loader
    if split_valid:
        # Split Train data into train/validate data
        valid_ratio = 0.2
        num_train_data = len(dcase_dataset)
        indices = list(range(num_train_data))
        split = int(np.floor(valid_ratio * num_train_data))
        np.random.shuffle(indices)
        train_idx, valid_idx = indices[split:], indices[:split]
        # Initialize Random Sampler
        train_sampler = SubsetRandomSampler(train_idx)
        valid_sampler = SubsetRandomSampler(valid_idx)

        # get the training and testing data loader
        train_loader = torch.utils.data.DataLoader(dcase_dataset,
                                                   batch_size=args.batch_size,
                                                   sampler=train_sampler,
                                                   **kwargs)

        valid_loader = torch.utils.data.DataLoader(
            dcase_dataset,
            batch_size=args.test_batch_size,
            sampler=valid_sampler,
            **kwargs)

    # Step 2: Build Model ###############################################################

    # init the model
    model = BaselineASC(num_of_channel).to(device)

    # init the optimizer
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    # Step 3: Train Model ###############################################################

    #print('MODEL TRAINING START')
    loghub.logMsg(msg="{}: MODEL TRAINING START".format(__name__),
                  otherlogs=["test_acc"])
    # train the model
    for epoch in range(1, args.epochs + 1):
        cnn.train(args, model, device, train_loader, optimizer, epoch)
        #print("MODEL: %s" % saved_model_name)
        loghub.logMsg(msg="{}: EPOCH {} - MODEL: {}".format(
            __name__, epoch, saved_model_name),
                      otherlogs=["test_acc"])
        cnn.test(args, model, device, valid_loader, "Validation Data")
        #cnn.test(args, model, device, train_loader, 'Training Data')
        #cnn.test(args, model, device, test_loader, 'Testing Data')

    #print('MODEL TRAINING END')
    loghub.logMsg(msg="{}: MODEL TRAINING END".format(__name__),
                  otherlogs=["test_acc"])

    # Step 4. Test Model ###############################################################

    #print("Model TESTING START")
    loghub.logMsg(msg="{}: MODEL TESTING START".format(__name__),
                  otherlogs=["test_acc"])
    # test the model
    if split_valid:
        predictions = cnn.test(args, model, device, valid_loader,
                               "Validation Data")
    else:
        predictions = cnn.test(args, model, device, test_loader,
                               "Testing Data")

    #print("Model TESTING END")
    loghub.logMsg(msg="{}: MODEL TESTING END".format(__name__),
                  otherlogs=["test_acc"])

    # Step 5: Save Model ################################################################

    # save the model
    if (args.save_model):
        torch.save(model.state_dict(), saved_model_name)

    return model, predictions
def testCNNModel(saved_model_path,
                 test_csv,
                 norm_std,
                 norm_mean,
                 data_manager,
                 num_of_channel,
                 with_labels,
                 test_batch_size=16,
                 no_cuda=False,
                 seed=1):
    """
		Test the trained CNN model

		Required Parameters:
			saved_model_path (BaselineASC): saved CNN model path
			test_csv (string): file that contains all test data labels.
			norm_std (string): file that contains the normalized std 
			norm_mean (string): file that contains the normalized mean 
			data_manager (DataManager): contains all the loaded train/test dataset
			num_of_channel (int): number of channels for input features
			with_labels (bool): Indicator if test_data has labels

		Optional Parameters
			test_batch_size (int): input batch size of testing
			no_cuda (bool): disables CUDA training
			seed (int): random seed
	"""

    # Step 0: Setting up Training Settings ##################################################

    # Training settings
    use_cuda = not no_cuda and torch.cuda.is_available()

    torch.manual_seed(seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    args = {
        "test_batch_size": test_batch_size,
        "no_cuda": no_cuda,
        "seed": seed,
    }
    args = Namespace(**args)

    # Step 1a: Preparing Data - Extract data ###########################################################

    # init the train directories
    test_labels_dir = test_csv
    root_dir = data_manager.root_dir

    # Step 1b: Preparing Data - Transform Data #########################################################

    # Load normalization score
    #print("Loading Normalization Data...")
    loghub.logMsg(msg="{}: Loading Normalization Data...".format(__name__),
                  otherlogs=["test_acc"])
    mean = np.load(norm_mean)
    std = np.load(norm_std)
    #print('Normalization Data Loaded.')
    loghub.logMsg(msg="{}: Normalization Data Loaded.".format(__name__),
                  otherlogs=["test_acc"])

    # Convert to Torch Tensors
    mean = torch.from_numpy(mean)
    std = torch.from_numpy(std)

    # convert to torch variables
    mean = torch.reshape(
        mean, [num_of_channel, 40, 1]
    )  # numpy broadcast (CxHxW). last dimension is 1 -> which will be automatically broadcasted to 500 (time)
    std = torch.reshape(std, [num_of_channel, 40, 1])

    # init the data_transform
    data_transform = transforms.Compose(
        [cnn.ToTensor(), cnn.Normalize(mean, std)])

    #print("Preparing Data...")
    loghub.logMsg(msg="{}: Preparing Data...".format(__name__),
                  otherlogs=["test_acc"])

    # init the datasets
    dcase_dataset_test = DCASEDataset(csv_file=test_labels_dir,
                                      root_dir=root_dir,
                                      data_manager=data_manager,
                                      is_train_data=False,
                                      transform=data_transform)

    # Step 1c: Preparing Data - Load Data ###############################################################

    # set number of cpu workers in parallel
    kwargs = {'num_workers': 16, 'pin_memory': True} if use_cuda else {}

    # get the testing data loader
    test_loader = torch.utils.data.DataLoader(dcase_dataset_test,
                                              batch_size=args.test_batch_size,
                                              shuffle=False,
                                              **kwargs)

    # Step 2: Test Model ###############################################################

    #print("Model TESTING START...")
    loghub.logMsg(msg="{}: Model TESTING START...".format(__name__),
                  otherlogs=["test_acc"])

    # load the model
    model = BaselineASC(num_of_channel).to(device)
    model.load_state_dict(torch.load(saved_model_path))

    # test the model
    if with_labels:
        predictions = cnn.test(args, model, device, test_loader,
                               "Testing Data")
    else:
        # Evaluation Datset (with no labels)
        predictions = cnn.predict(model, device, test_loader)

    #print("Model TESTING END.")
    loghub.logMsg(msg="{}: Model TESTING END.".format(__name__),
                  otherlogs=["test_acc"])

    return predictions
예제 #14
0
	def load_feature(self, feature_index, filename):
		"""
			filename (string): name of the file to save the extracted features eg feature.npy
			feature_index (int): index to indicate which feature to extract
			Load or Extract the features for all audio files.
		"""

		# check that data have been loaded
		if not self.audio_files:
			#print("Data have not been loaded. Running data_manager.load_all_data()...")
			loghub.logMsg(msg="{}: Data have not been loaded. Running data_manager.load_all_data()...".format(__name__), otherlogs=["test_acc"], level="warning")
			self.load_all_data()

		# Extract features
		#print("Loading/Extracting feature %i from audio files..." % feature_index)
		loghub.logMsg(msg="{}: Loading/Extracting feature {} from audio files...".format(__name__, feature_index), otherlogs=["test_acc"])

		if os.path.isfile(filename):
			# file already exists
			self.audio_data = np.load(filename) 
		else:
			# file does not exists (extract spectrogram of feature and save the data)
			mel_specs = []

			specA = specB = None

			# Load preprocessed data if exists
			if feature_index == 3:
				if os.path.isfile("processed_data/left_spec.npy") and os.path.isfile("processed_data/right_spec.npy"):
					specA = np.load("processed_data/left_spec.npy")
					specB = np.load("processed_data/right_spec.npy")
			elif feature_index == 6:
				if os.path.isfile("processed_data/LR_spec.npy") and os.path.isfile("processed_data/diff_spec.npy"):
					specA = np.load("processed_data/LR_spec.npy")
					specB = np.load("processed_data/diff_spec.npy")
			elif feature_index == 8:
				if os.path.isfile("processed_data/hpss_spec.npy") and os.path.isfile("processed_data/mono_spec.npy"):
					specA = np.load("processed_data/hpss_spec.npy")
					specB = np.load("processed_data/mono_spec.npy")
			elif feature_index == 15:
				if os.path.isfile("processed_data/mfcc_left_spec.npy") and os.path.isfile("processed_data/mfcc_right_spec.npy"):
					specA = np.load("processed_data/mfcc_left_spec.npy")
					specB = np.load("processed_data/mfcc_right_spec.npy")
			elif feature_index == 16:
				if os.path.isfile("processed_data/mfcc_LR_spec.npy") and os.path.isfile("processed_data/mfcc_diff_spec.npy"):
					specA = np.load("processed_data/mfcc_LR_spec.npy")
					specB = np.load("processed_data/mfcc_diff_spec.npy")
			elif feature_index == 17:
				if os.path.isfile("processed_data/hpssmono_spec.npy") and os.path.isfile("processed_data/LR_spec.npy"):
					specA = np.load("processed_data/hpssmono_spec.npy")
					specB = np.load("processed_data/LR_spec.npy")
			elif feature_index == 18:
				if os.path.isfile("processed_data/mono_spec.npy") and os.path.isfile("processed_data/LRD_spec.npy"):
					specA = np.load("processed_data/mono_spec.npy")
					specB = np.load("processed_data/LRD_spec.npy")
			elif feature_index == 19:
				if os.path.isfile("processed_data/mfcc_mono_spec.npy") and os.path.isfile("processed_data/mfcc_LRD_spec.npy"):
					specA = np.load("processed_data/mfcc_mono_spec.npy")
					specB = np.load("processed_data/mfcc_LRD_spec.npy")

			# Extract features from audio file
			for i in range(len(self.audio_files)):
				wav_name = os.path.join(self.root_dir, self.audio_files[i])

				if feature_index == 0:
					# Extracting Mel Spectrogram for Mono Channel (1 channel)
					mel_specs.append(ap.extract_mel_spectrogram_for_mono_channel(wav_name))
				elif feature_index == 1:
					# Extracting Mel Spectrogram for Left Channel (1 channel)
					mel_specs.append(ap.extract_mel_spectrogram_for_left_channel(wav_name))
				elif feature_index == 2:
					# Extracting Mel Spectrogram for Right Channel (1 channel)
					mel_specs.append(ap.extract_mel_spectrogram_for_right_channel(wav_name))
				elif feature_index == 3:
					# Extracting Mel Spectrogram for left & right Channel (2 channel)
					if specA != None and specB != None:
						mel_specs.append(ap.combine_left_and_right_mel_spectrogram(wav_name, specA[i], specB[i]))
					else:
						mel_specs.append(ap.combine_left_and_right_mel_spectrogram(wav_name))
				elif feature_index == 4:
					# Extracting Mel Spectrogram for difference of left & right Channel (1 channel)
					mel_specs.append(ap.extract_mel_spectrogram_for_difference_of_left_right_channel(wav_name))
				elif feature_index == 5:
					# Extracting Mel Spectrogram for sum of left & right Channel (1 channel)
					mel_specs.append(ap.extract_mel_spectrogram_for_sum_of_left_right_channel(wav_name))
				elif feature_index == 6:
					# Extracting Mel Spectrogram of left & right & leftrightdiff Channel (3 channel)
					if specA != None and specB != None:
						mel_specs.append(ap.combine_left_right_with_LRdifference(wav_name, specA[i], specB[i]))
					else:
						mel_specs.append(ap.combine_left_right_with_LRdifference(wav_name))
				elif feature_index == 7:
					# Extracting Mel Spectrogram of mono Channel with hpss applied (2 channel)
					mel_specs.append(ap.extract_mel_spectrogram_for_hpss(wav_name))
				elif feature_index == 8:
					# Extracting Mel Spectrogram of mono Channel & hpss (3 channel)
					if specA != None and specB != None:
						mel_specs.append(ap.combine_hpss_and_mono_mel_spectrogram(wav_name, specA[i], specB[i]))
					else:
						mel_specs.append(ap.combine_hpss_and_mono_mel_spectrogram(wav_name))
				elif feature_index == 9:
					# Extracting Chroma feature (1 channel)
					mel_specs.append(ap.extract_chroma_for_mono_channel(wav_name))
				elif feature_index == 10:
					# Extracting Zero Crossing feature (1 channel)
					mel_specs.append(ap.extract_zero_crossing_for_mono_channel(wav_name))
				elif feature_index == 11:
					# Extracting MFCC feature from mono channel (1 channel)
					mel_specs.append(ap.extract_mfcc_for_mono_channel(wav_name))
				elif feature_index == 12:
					# Extracting MFCC feature from left channel (1 channel)
					mel_specs.append(ap.extract_mfcc_spectrogram_for_left_channel(wav_name))
				elif feature_index == 13:
					# Extracting MFCC feature from right channel (1 channel)
					mel_specs.append(ap.extract_mfcc_spectrogram_for_right_channel(wav_name))
				elif feature_index == 14:
					# Extracting MFCC feature from difference of left & right channel (1 channel)
					mel_specs.append(ap.extract_mfcc_spectrogram_for_difference_of_left_right_channel(wav_name))
				elif feature_index == 15:
					# Extracting MFCC feature from left & right & leftrightdiff channel (3 channel)
					if specA != None and specB != None:
						mel_specs.append(ap.combine_mfcc_left_and_right(wav_name, specA[i], specB[i]))
					else:
						mel_specs.append(ap.combine_mfcc_left_and_right(wav_name))
				elif feature_index == 16:
					# Extracting MFCC feature from left & right & leftrightdiff channel (3 channel)
					if specA != None and specB != None:
						mel_specs.append(ap.combine_mfcc_left_right_with_LRdifference(wav_name, specA[i], specB[i]))
					else:
						mel_specs.append(ap.combine_mfcc_left_right_with_LRdifference(wav_name))
				elif feature_index == 17:
					# Combine left mel + right mel + hpss + mono mel
					if specA != None and specB != None:
						mel_specs.append(ap.extract_early_fusion_left_right_3f(wav_name, specA[i], specB[i]))
					else:
						mel_specs.append(ap.extract_early_fusion_left_right_3f(wav_name))
				elif feature_index == 18:
					# Combine left mel + right mel + diff mel + mono mel
					if specA != None and specB != None:
						mel_specs.append(ap.extract_early_fusion_left_right_diff_mono(wav_name, specA[i], specB[i]))
					else:
						mel_specs.append(ap.extract_early_fusion_left_right_diff_mono(wav_name))
				elif feature_index == 19:
					# Combine left mfcc + right mfcc + diff mfcc + mono mfcc
					if specA != None and specB != None:
						mel_specs.append(ap.extract_early_fusion_MFCC_left_right_diff_mono(wav_name, specA[i], specB[i]))
					else:
						mel_specs.append(ap.extract_early_fusion_MFCC_left_right_diff_mono(wav_name))

			if filename:
				np.save(filename, mel_specs)

			mel_specs = np.asarray(mel_specs)
			self.audio_data = mel_specs

		#print("Feature %i extracted." % feature_index)
		loghub.logMsg(msg="{}: Feature {} extracted.".format(__name__, feature_index), otherlogs=["test_acc"])
예제 #15
0
	def prepare_data(self, train_indices=None, test_indices=None, train_only=False, train_csv="train_dataset.csv", test_csv="test_dataset.csv"):
		"""
			train_indices (array of index): indices of all training audio files
			test_indices (array of index): indicies of all testing audio files
			train_only (bool): indicator on whether train_indices and test_indices are all from training data
			train_csv (string): filename of newly generated train dataset
			test_csv (string): filename of newly generated test dataset
			Prepare data for training/testing model. As we loaded all the features and store them into a
			single data file, this function is to generate a train.csv and test.csv which will be used
			to build the model. The index of audio files in train.csv/test.csv will be map to the index
			of the main data file. Purpose is to improve efficiency by not recomputing/extracting all
			the features in the audio files whenever the train/test data changes
		"""

		#print("Generating train.csv and test.csv for building model...")
		loghub.logMsg(msg="{}: Generating train.csv and test.csv for building model...".format(__name__), otherlogs=["test_acc"])

		self.train_idx_map = []
		self.test_idx_map = []

		if train_indices == None and test_indices == None:
			# using the original indices order
			train_indices = np.arange(self.get_train_data_size())	# Train indices = all of train data
			test_indices = np.arange(self.get_test_data_size())		# Test indices = all of test data

		# Extract data for train.csv
		train_csv_data = []
		for i in range(len(train_indices)):
			# get index
			index = train_indices[i]
			# Get Dataset
			dataset = []
			dataset.append(self.train_data_list[index])
			dataset.append(self.train_label_list[index])
			dataset.append(self.train_label_indices[index])
			train_csv_data.append(dataset)
			# Map index to main data list
			self.train_idx_map.append(index)

		# Extract data for test.csv
		test_csv_data = []
		base = self.base						# main data = train + test (hence index of test starts after train)
		for i in range(len(test_indices)):
			# get index
			index = test_indices[i]
			# check if test_indices is from train or test data
			if train_only:
				# test_indices is a validation set (from training data)
				# Get dataset
				dataset = []
				dataset.append(self.train_data_list[index])
				dataset.append(self.train_label_list[index])
				dataset.append(self.train_label_indices[index])
				test_csv_data.append(dataset)
				# Map index to main data list
				self.test_idx_map.append(index)					# index = index of self.audio
			else:
				# test indices is a test set (from testing data)
				# Get dataset
				dataset = []
				dataset.append(self.test_data_list[index])
				dataset.append(self.test_label_list[index])
				dataset.append(self.test_label_indices[index])
				test_csv_data.append(dataset)
				# Map index to main data list
				self.test_idx_map.append(base + index)			# base+index = index of self.audio

		# Prepare csv file path
		train_filepath = os.path.join(self.root_dir, train_csv)
		test_filepath = os.path.join(self.root_dir, test_csv)

		# Write into train csv file
		util.write_to_csv_file(train_csv_data, train_filepath)

		# Write into test csv file
		util.write_to_csv_file(test_csv_data, test_filepath)

		#print("Data labels generated in %s (train) and %s (test)" % (train_filepath, test_filepath))
		loghub.logMsg(msg="{}: Data labels generated in {} (train) and {} (test)".format(__name__, train_filepath, test_filepath), otherlogs=["test_acc"])

		return train_filepath, test_filepath
예제 #16
0
def main():

    # Initialize Timer
    timer = StopWatch()
    timer.startTimer()

    # Step 0: Setting up Training Settings ##################################################

    # Training settings
    parser = argparse.ArgumentParser(
        description='PyTorch Baseline code for ASC Group Project (CS4347)')
    parser.add_argument('--batch-size',
                        type=int,
                        default=16,
                        metavar='N',
                        help='input batch size for training (default: 16)')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=16,
                        metavar='N',
                        help='input batch size for testing (default: 16)')
    parser.add_argument('--epochs',
                        type=int,
                        default=200,
                        metavar='N',
                        help='number of epochs to train (default: 200)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.01,
                        metavar='LR',
                        help='learning rate (default: 0.001)')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        metavar='N',
        help='how many batches to wait before logging training status')
    parser.add_argument('--save-model',
                        action='store_true',
                        default=False,
                        help='For Saving the current Model')
    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    # Step 1a: Preparing Data - Extract data ###########################################################

    # init the train and test directories
    train_labels_dir = '../Dataset/train/train_labels.csv'
    test_labels_dir = '../Dataset/test/test_labels.csv'
    root_dir = '../Dataset'

    # Load all the dataset
    data_manager = DatasetManager(train_labels_dir, test_labels_dir, root_dir)
    data_manager.load_all_data(include_test=False)

    # Load/Preprocess Feature for model
    data_manager.load_feature(feature_index, preprocessed_features)

    # Prepare data
    train_labels_dir, test_labels_dir = data_manager.prepare_data(
        train_csv=temp_train_csv_file, test_csv=temp_test_csv_file)

    # Step 1b: Preparing Data - Transform Data #########################################################

    # Compute Normalization score
    if os.path.isfile(preprocessed_norm_mean_file) and os.path.isfile(
            preprocessed_norm_std_file):
        # get the mean and std. If Normalized already, just load the npy files and comment the NormalizeData() function above
        mean = np.load(preprocessed_norm_mean_file)
        std = np.load(preprocessed_norm_std_file)
    else:
        # If not, run the normalization and save the mean/std
        #print('DATA NORMALIZATION : ACCUMULATING THE DATA')
        loghub.logMsg(
            msg="{}: DATA NORMALIZATION : ACCUMULATING THE DATA".format(
                __name__),
            otherlogs=["test_acc"])
        # load the datase
        dcase_dataset = DCASEDataset(train_labels_dir, root_dir, data_manager,
                                     True)
        mean, std = NormalizeData(train_labels_dir, root_dir, dcase_dataset)
        np.save(preprocessed_norm_mean_file, mean)
        np.save(preprocessed_norm_std_file, std)
        #print('DATA NORMALIZATION COMPLETED')
        loghub.logMsg(msg="{}: DATA NORMALIZATION COMPLETED".format(__name__),
                      otherlogs=["test_acc"])

    # Convert to Torch Tensors
    mean = torch.from_numpy(mean)
    std = torch.from_numpy(std)

    # convert to torch variables
    mean = torch.reshape(
        mean, [num_of_channel, 40, 1]
    )  # numpy broadcast (CxHxW). last dimension is 1 -> which will be automatically broadcasted to 500 (time)
    std = torch.reshape(std, [num_of_channel, 40, 1])

    # init the data_transform
    data_transform = transforms.Compose(
        [cnn.ToTensor(), cnn.Normalize(mean, std)])

    # init the datasets
    dcase_dataset = DCASEDataset(csv_file=train_labels_dir,
                                 root_dir=root_dir,
                                 data_manager=data_manager,
                                 is_train_data=True,
                                 transform=data_transform)
    dcase_dataset_test = DCASEDataset(csv_file=test_labels_dir,
                                      root_dir=root_dir,
                                      data_manager=data_manager,
                                      is_train_data=False,
                                      transform=data_transform)

    # Step 1c: Preparing Data - Load Data ###############################################################

    # set number of cpu workers in parallel
    kwargs = {'num_workers': 16, 'pin_memory': True} if use_cuda else {}

    # get the training and testing data loader
    train_loader = torch.utils.data.DataLoader(dcase_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               **kwargs)

    test_loader = torch.utils.data.DataLoader(dcase_dataset_test,
                                              batch_size=args.test_batch_size,
                                              shuffle=False,
                                              **kwargs)

    # Step 2: Build Model ###############################################################

    # init the model
    model = BaselineASC(num_of_channel).to(device)

    # init the optimizer
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    # Step 3: Train Model ###############################################################

    #print('MODEL TRAINING START')
    loghub.logMsg(msg="{}: MODEL TRAINING START.".format(__name__),
                  otherlogs=["test_acc"])
    # train the model
    for epoch in range(1, args.epochs + 1):
        cnn.train(args, model, device, train_loader, optimizer, epoch)
        cnn.test(args, model, device, train_loader, 'Training Data')
        cnn.test(args, model, device, test_loader, 'Test Data')

    #print('MODEL TRAINING END')
    loghub.logMsg(msg="{}: MODEL TRAINING END.".format(__name__),
                  otherlogs=["test_acc"])

    # Step 4: Save Model ################################################################

    # save the model
    if (args.save_model):
        torch.save(model.state_dict(), saved_model)

    # stop timer
    timer.stopTimer()
    timer.printElapsedTime()