from sklearn.preprocessing import LabelBinarizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten
from keras.optimizers import RMSprop
sys.path.insert(0, "../")
from youtube_audioset import get_recursive_sound_names, get_all_sound_names
from youtube_audioset import EXPLOSION_SOUNDS, MOTOR_SOUNDS, \
                             WOOD_SOUNDS, HUMAN_SOUNDS, NATURE_SOUNDS, AMBIENT_SOUNDS, IMPACT_SOUNDS
import balancing_dataset

########################################################################
# get all the sounds
########################################################################
AMBIENT_SOUNDS, IMPACT_SOUNDS = get_all_sound_names("../")
explosion_sounds = get_recursive_sound_names(EXPLOSION_SOUNDS, "../")
motor_sounds = get_recursive_sound_names(MOTOR_SOUNDS, "../")
wood_sounds = get_recursive_sound_names(WOOD_SOUNDS, "../")
human_sounds = get_recursive_sound_names(HUMAN_SOUNDS, "../")
nature_sounds = get_recursive_sound_names(NATURE_SOUNDS, "../")

########################################################################
# Read the balanced data
# Note that this is binary classification.
# Balancing must be  [ Ambient ] vs  [ Impact ]
########################################################################
DATA_FRAME = balancing_dataset.balanced_data(audiomoth_flag=0,
                                             mixed_sounds_flag=0)

########################################################################
from ipywidgets import IntProgress

from mlxtend.plotting import plot_learning_curves, plot_decision_regions
from mlxtend.plotting import plot_confusion_matrix

from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten
from keras.wrappers.scikit_learn import KerasClassifier
from keras.optimizers import RMSprop

from keras_tqdm import TQDMNotebookCallback

from youtube_audioset import get_data, get_recursive_sound_names, get_all_sound_names
from youtube_audioset import explosion_sounds, motor_sounds, wood_sounds, human_sounds, nature_sounds

ambient_sounds, impact_sounds = get_all_sound_names()

explosion_sounds = get_recursive_sound_names(explosion_sounds)
motor_sounds = get_recursive_sound_names(motor_sounds)
wood_sounds = get_recursive_sound_names(wood_sounds)
human_sounds = get_recursive_sound_names(human_sounds)
nature_sounds = get_recursive_sound_names(nature_sounds)

#Read the balanced data created by running the balancing_datasets.py
with open('balanced_data.pkl', 'rb') as f:
    df = pickle.load(f)
print(df.shape)
df['labels'] = df['labels_name']

# Binarize the labels
name_bin = LabelBinarizer().fit(ambient_sounds + impact_sounds)