from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten
from keras.optimizers import RMSprop
sys.path.insert(0, "../")
from youtube_audioset import get_recursive_sound_names, get_all_sound_names
from youtube_audioset import EXPLOSION_SOUNDS, MOTOR_SOUNDS, \
                             WOOD_SOUNDS, HUMAN_SOUNDS, NATURE_SOUNDS, AMBIENT_SOUNDS, IMPACT_SOUNDS
import balancing_dataset

########################################################################
# get all the sounds
########################################################################
AMBIENT_SOUNDS, IMPACT_SOUNDS = get_all_sound_names("../")
explosion_sounds = get_recursive_sound_names(EXPLOSION_SOUNDS, "../")
motor_sounds = get_recursive_sound_names(MOTOR_SOUNDS, "../")
wood_sounds = get_recursive_sound_names(WOOD_SOUNDS, "../")
human_sounds = get_recursive_sound_names(HUMAN_SOUNDS, "../")
nature_sounds = get_recursive_sound_names(NATURE_SOUNDS, "../")

########################################################################
# Read the balanced data
# Note that this is binary classification.
# Balancing must be  [ Ambient ] vs  [ Impact ]
########################################################################
DATA_FRAME = balancing_dataset.balanced_data(audiomoth_flag=0,
                                             mixed_sounds_flag=0)

########################################################################
# Binarize the labels
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report, hamming_loss
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten
from keras.optimizers import Adam
sys.path.insert(0, "../")
from youtube_audioset import get_recursive_sound_names, get_all_sound_names
from youtube_audioset import EXPLOSION_SOUNDS, MOTOR_SOUNDS, WOOD_SOUNDS, \
                             HUMAN_SOUNDS, NATURE_SOUNDS, DOMESTIC_SOUNDS, TOOLS_SOUNDS
import balancing_dataset

########################################################################
# Get all sound names
########################################################################
explosion_sounds = get_recursive_sound_names(EXPLOSION_SOUNDS, "../")
motor_sounds = get_recursive_sound_names(MOTOR_SOUNDS, "../")
wood_sounds = get_recursive_sound_names(WOOD_SOUNDS, "../")
human_sounds = get_recursive_sound_names(HUMAN_SOUNDS, "../")
nature_sounds = get_recursive_sound_names(NATURE_SOUNDS, "../")
domestic_sounds = get_recursive_sound_names(DOMESTIC_SOUNDS, "../")
tools = get_recursive_sound_names(TOOLS_SOUNDS, "../")
#wild_animals=get_recursive_sound_names(Wild_animals)

########################################################################
# Importing balanced data from the function.
# Including audiomoth annotated files for training
########################################################################
DATA_FRAME = balancing_dataset.balanced_data(audiomoth_flag=0,
                                             mixed_sounds_flag=0)
from mlxtend.plotting import plot_learning_curves, plot_decision_regions
from mlxtend.plotting import plot_confusion_matrix

from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten
from keras.wrappers.scikit_learn import KerasClassifier
from keras.optimizers import RMSprop

from keras_tqdm import TQDMNotebookCallback

from youtube_audioset import get_data, get_recursive_sound_names, get_all_sound_names
from youtube_audioset import explosion_sounds, motor_sounds, wood_sounds, human_sounds, nature_sounds

ambient_sounds, impact_sounds = get_all_sound_names()

explosion_sounds = get_recursive_sound_names(explosion_sounds)
motor_sounds = get_recursive_sound_names(motor_sounds)
wood_sounds = get_recursive_sound_names(wood_sounds)
human_sounds = get_recursive_sound_names(human_sounds)
nature_sounds = get_recursive_sound_names(nature_sounds)

#Read the balanced data created by running the balancing_datasets.py
with open('balanced_data.pkl', 'rb') as f:
    df = pickle.load(f)
print(df.shape)
df['labels'] = df['labels_name']

# Binarize the labels
name_bin = LabelBinarizer().fit(ambient_sounds + impact_sounds)
labels_split = df['labels'].apply(pd.Series).fillna('None')
labels_binarized = name_bin.transform(labels_split[labels_split.columns[0]])
Beispiel #4
0
def main(predictions_cfg_json,
         path_for_dataframe_with_features,
         save_misclassified_examples=None,
         path_to_save_prediction_csv=None):

    ##############################################################################
    # Import json data
    ##############################################################################

    CONFIG_DATAS = import_predict_configuration_json(predictions_cfg_json)

    ##############################################################################
    # read the dataframe with feature and labels_name column
    ##############################################################################

    print "Importing Data..."
    with open(path_for_dataframe_with_features, "rb") as file_obj:
        DATA_FRAME = pickle.load(file_obj)
        DATA_FRAME.index = range(0, DATA_FRAME.shape[0])

    IS_DATAFRAME_LABELED = 'labels_name' in DATA_FRAME.columns

    if IS_DATAFRAME_LABELED:
        print "Categorizing labels in dataframe..."
        ##############################################################################
        # Check if labels fall into positive label designation
        ##############################################################################
        LABELS_BINARIZED = pd.DataFrame()

        for label_name in CONFIG_DATAS.keys():

            config_data = CONFIG_DATAS[label_name]

            positiveLabels = get_recursive_sound_names(
                designated_sound_names=config_data["positiveLabels"],
                path_to_ontology="../../",
                ontology_extension_paths=config_data["ontology"]["extension"])

            LABELS_BINARIZED[label_name] = 1.0 * DATA_FRAME['labels_name'].apply( \
                                           lambda arr: np.any([x.lower() in positiveLabels for x in arr]))

    ##############################################################################
    # Filtering the sounds that are exactly 10 seconds
    ##############################################################################
    DF_TEST = DATA_FRAME.loc[DATA_FRAME.features.apply(
        lambda x: x.shape[0] == 10)]

    if IS_DATAFRAME_LABELED:
        LABELS_FILTERED = LABELS_BINARIZED.loc[DF_TEST.index, :]

    ##############################################################################
    # preprecess the data into required structure
    ##############################################################################
    X_TEST = np.array(DF_TEST.features.apply(lambda x: x.flatten()).tolist())
    X_TEST_STANDARDIZED = X_TEST / 255

    ##############################################################################
    # reshaping the test data so as to align with input for model
    ##############################################################################
    CLF2_TEST = X_TEST.reshape((-1, 1280, 1))

    ##############################################################################
    # Implementing using the keras usual prediction technique
    ##############################################################################

    for label_name in CONFIG_DATAS.keys():

        config_data = CONFIG_DATAS[label_name]

        MODEL = load_model(config_data["networkCfgJson"],
                           config_data["train"]["outputWeightFile"])

        print("\nLoaded " + label_name + " model from disk")

        ##############################################################################
        # Predict on test data
        ##############################################################################
        CLF2_TEST_PREDICTION_PROB = MODEL.predict(CLF2_TEST).ravel()
        CLF2_TEST_PREDICTION = CLF2_TEST_PREDICTION_PROB.round()

        # Add results to data frame
        DF_TEST.insert(len(DF_TEST.columns), label_name + "_Probability",
                       CLF2_TEST_PREDICTION_PROB)
        DF_TEST.insert(len(DF_TEST.columns), label_name + "_Prediction",
                       CLF2_TEST_PREDICTION)

        if IS_DATAFRAME_LABELED:
            ##############################################################################
            # Target for the test labels
            ##############################################################################
            CLF2_TEST_TARGET = LABELS_FILTERED[label_name].values
            print 'Target shape:', CLF2_TEST_TARGET.shape

            ##############################################################################
            # To get the Misclassified examples
            ##############################################################################
            DF_TEST.insert(len(DF_TEST.columns), label_name + '_Actual',
                           CLF2_TEST_TARGET)
            MISCLASSIFED_ARRAY = CLF2_TEST_PREDICTION != CLF2_TEST_TARGET
            print '\nMisclassified number of examples for '+ label_name + " :", \
                  DF_TEST.loc[MISCLASSIFED_ARRAY].shape[0]

            ##############################################################################
            #  misclassified examples are to be saved
            ##############################################################################
            if save_misclassified_examples:
                misclassified_pickle_file = save_misclassified_examples + \
                              "misclassified_examples_br_model_"+label_name+".pkl"
                with open(misclassified_pickle_file, "w") as f:
                    pickle.dump(
                        DF_TEST[MISCLASSIFED_ARRAY].drop(["features"], axis=1),
                        f)

            ##############################################################################
            # Print confusion matrix and classification_report
            ##############################################################################
            print 'Confusion Matrix for ' + label_name
            print '============================================'
            RESULT_ = confusion_matrix(CLF2_TEST_TARGET, CLF2_TEST_PREDICTION)
            print RESULT_

            ##############################################################################
            # print classification report
            ##############################################################################
            print 'Classification Report for ' + label_name
            print '============================================'
            CL_REPORT = classification_report(CLF2_TEST_TARGET,
                                              CLF2_TEST_PREDICTION)
            print CL_REPORT

            ##############################################################################
            # calculate accuracy and hamming loss
            ##############################################################################
            ACCURACY = accuracy_score(CLF2_TEST_TARGET, CLF2_TEST_PREDICTION)
            HL = hamming_loss(CLF2_TEST_TARGET, CLF2_TEST_PREDICTION)
            print 'Hamming Loss :', HL
            print 'Accuracy :', ACCURACY

    ##############################################################################
    # save the prediction in pickle format
    ##############################################################################

    if path_to_save_prediction_csv:
        DF_TEST.drop(["features"], axis=1).to_csv(path_to_save_prediction_csv)
Beispiel #5
0
#get the data of each sounds seperately and then concat all sounds to get balanced data
MOT, HUM, WOD, EXP, DOM, TOOLS, WILD, NAT = frequency_component_files.get_req_sounds(
    RESULT.path_for_goertzel_components)

# Try to balance the number of examples.
# Here we need to balance as Impact Vs Ambient , but not as multilabel sounds
DATA_FRAME = pd.concat([
    MOT[:3000], HUM[:1700], WOD[:500], EXP[:1200], DOM[:1100], TOOLS[:1500],
    WILD[:1000], NAT[:8000]
],
                       ignore_index=True)

# execute the labels binarized by importing the youtube_audioset function
AMBIENT_SOUNDS, IMPACT_SOUNDS = get_all_sound_names()
EXPLOSION = get_recursive_sound_names(explosion_sounds)
MOTOR = get_recursive_sound_names(motor_sounds)
WOOD = get_recursive_sound_names(wood_sounds)
HUMAN = get_recursive_sound_names(human_sounds)
NATURE = get_recursive_sound_names(nature_sounds)
DOMESTIC = get_recursive_sound_names(domestic_sounds)

#Binarize the labels
NAME_BIN = LabelBinarizer().fit(AMBIENT_SOUNDS + IMPACT_SOUNDS)
LABELS_SPLIT = DATA_FRAME['labels_name'].apply(pd.Series).fillna('None')
LABELS_BINARIZED = NAME_BIN.transform(LABELS_SPLIT[LABELS_SPLIT.columns[0]])
for column in LABELS_SPLIT.columns:
    LABELS_BINARIZED |= NAME_BIN.transform(LABELS_SPLIT[column])
LABELS_BINARIZED = pd.DataFrame(LABELS_BINARIZED, columns=NAME_BIN.classes_)

#Shuffle the data
Beispiel #6
0
                    '--path_for_goertzel_components',
                    action='store',
                    help=HELP)
RESULT = PARSER.parse_args()

#################################################################################
# get the data of each sounds seperately
#################################################################################
DATA_FRAME = balancing_dataset_goertzel.balanced_data(audiomoth_flag=0,
                                                      mixed_sounds_flag=0)

#################################################################################
# getting recursive label names
#################################################################################
AMBIENT_SOUNDS, IMPACT_SOUNDS = get_all_sound_names("../")
EXPLOSION = get_recursive_sound_names(EXPLOSION_SOUNDS, "../")
MOTOR = get_recursive_sound_names(MOTOR_SOUNDS, "../")
WOOD = get_recursive_sound_names(WOOD_SOUNDS, "../")
HUMAN = get_recursive_sound_names(HUMAN_SOUNDS, "../")
NATURE = get_recursive_sound_names(NATURE_SOUNDS, "../")
DOMESTIC = get_recursive_sound_names(DOMESTIC_SOUNDS, "../")
DOMESTIC = get_recursive_sound_names(TOOLS_SOUNDS, "../")

#################################################################################
# Binarize the labels
#################################################################################
NAME_BIN = LabelBinarizer().fit(AMBIENT_SOUNDS + IMPACT_SOUNDS)
LABELS_SPLIT = DATA_FRAME['labels_name'].apply(pd.Series).fillna('None')
LABELS_BINARIZED = NAME_BIN.transform(LABELS_SPLIT[LABELS_SPLIT.columns[0]])
for column in LABELS_SPLIT.columns:
    LABELS_BINARIZED |= NAME_BIN.transform(LABELS_SPLIT[column])
Beispiel #7
0
# existing youtube ontology
ontologyExtFiles = CONFIG_DATA["ontology"]["extension"]

# If single file or null, then convert to list
if ontologyExtFiles is None:
    ontologyExtFiles = []
elif type(ontologyExtFiles) != list:
    ontologyExtFiles = [ontologyExtFiles]

# All paths to ontology extension files are relative to the location of the
# model configuration file.
ontologyExtFiles = map(lambda x: PATH_TO_DIRECTORY_OF_CONFIG + x,
                       ontologyExtFiles)

# Grab all the positive labels
POSITIVE_LABELS = get_recursive_sound_names(CONFIG_DATA["positiveLabels"],
                                            "../", ontologyExtFiles)

# If negative labels were provided, then collect them
# Otherwise, assume all examples that are not positive are negative
if CONFIG_DATA["negativeLabels"] is None:
    NEGATIVE_LABELS = None
else:
    # Grab all the negative labels
    NEGATIVE_LABELS = get_recursive_sound_names(CONFIG_DATA["negativeLabels"],
                                                "../", ontologyExtFiles)
    # Make sure there is no overlap between negative and positive labels
    NEGATIVE_LABELS = NEGATIVE_LABELS.difference(POSITIVE_LABELS)

#############################################################################
# Importing dataframes from the function
#############################################################################