indepent_features = ['mfcc', 'spectral_contrast'] print('Constructing datasets') print('X') # the ind vars X = pd.DataFrame(D['X'][DATA_SET][indepent_features]) print('Y') # the dependent var Y = pd.DataFrame(D['Y'][DATA_SET], columns=['genre_top']) print('train/validation split') # Test and train split using encoded Y labels (vector of 0s with one 1) trainx, valx, trainy, valy = train_test_split( X.values, encode(Y), # one hot encoder, see ANN_encode.py test_size=VALIDATION_PERCENT, # validation size random_state=EXPERIMENT_SEED) sample = trainx[0].copy() print('Data done!\n\n********') net = 0 history = 0 callback = 0 # Use this for pre trained models net = ANN(trained_model=MODEL_NAME) if (MODEL_NAME_2 != ''):
def getHistory(data_set_size): # set your experiment seed for train test split EXPERIMENT_SEED = 42 FEATURE_COUNT = 200 VALIDATION_PERCENT = 0.1 DEFAULT_LAYERS = 1 DEFAULT_NODES = len(classes) + 1 DEFAULT_H_ACTIVATION = 'relu' DEFAULT_O_ACTIVATION = 'softmax' DEFAULT_LOSS = 'categorical_crossentropy' DEFAULT_BATCH = 200 DEFAULT_EPOCHS = 200 TEST_RATIO = 0.34 DATA_SET = data_set_size # use the best model MODEL_NAME = 'matt' ## Process Data # Load the Data Management's interface import sys sys.path.append('../Back_End/') sys.path.append('../Data_Management/') import CSVInterface import song_result_interface import pandasDB print('Initializing Data Management interface...') # reads the data from the csv reader = CSVInterface.featRead() DB = pandasDB.DataBase() D = {} D['X'] = { 'small' : reader.getSubset( reader.getFrame('features'), sub='small' ), 'medium': reader.getSubset( reader.getFrame('features'), sub='medium' ), 'cleanLarge' : reader.getSubset( reader.getFrame('features'), sub='cleanLarge' ) } D['Y'] = { 'small' : reader.getSubset( reader.getFrame('track')['genre_top'], sub='small' ), 'medium': reader.getSubset( reader.getFrame('track')['genre_top'], sub='medium' ), 'cleanLarge': reader.getSubset( reader.getFrame('track')['genre_top'], sub='cleanLarge' ), } # Show all the weights prior to training # net.show_weights(net.num_hidden_layers + 1) # The data after removing outliers # data = outlier_method(RawData) #get the features # indepent_features = reader.selectN(n=FEATURE_COUNT) indepent_features = ['mfcc', 'spectral_contrast'] print('Constructing datasets') print('X') # the ind vars # X = pd.DataFrame(D['X'][DATA_SET].iloc[:, indepent_features]) X = pd.DataFrame(D['X'][DATA_SET][indepent_features]) print('Y') # the dependent var Y = pd.DataFrame(D['Y'][DATA_SET], columns=['genre_top']) print('train/validation split') # Test and train split using encoded Y labels (vector of 0s with one 1) trainx, testx, trainy, testy = train_test_split( X.values, encode(Y), # one hot encoder, see ANN_encode.py test_size=VALIDATION_PERCENT, # validation size random_state=EXPERIMENT_SEED ) sample = trainx[0].copy() print('Data done!\n\n********') ## Build the neural network print('\nBuilding neural net') print('input : {}'.format(len(sample))) print('output: {}\n'.format(NUM_GENRES)) net = 0 history = 0 callback = 0 # Use this for pre trained models net = ANN(p=Parameter( num_input=len(sample), num_hidden_layers=1, nodes_per_hidden=len(sample) + 1, num_output=NUM_GENRES, hidden_activation=DEFAULT_H_ACTIVATION, output_activation=DEFAULT_O_ACTIVATION, initialize=False, loss_function=DEFAULT_LOSS, features = indepent_features )) # Show the weights # net.show_weights(net.num_hidden_layers + 1) # Train the network # returns history of training process, and a callback object that can # extract information about the model at the end of events ANN_callbacks.py h, callback = net.train( trainx, trainy, num_iter=DEFAULT_EPOCHS, testing=(testx, np.array(testy)), batch=DEFAULT_BATCH, interactive=False ) return h