def main(argv): if len(argv) != 4: print("Usage: python3 get_genre.py audiopath low medium high") exit() #le = LabelEncoder().fit(GENRES) # ------------------------------- # ## LOAD TRAINED GENRENET MODEL net = genreNet() net.load_state_dict(torch.load(MODELPATH, map_location='cpu')) # ------------------------------- # ## LOAD AUDIO audio_path = argv[0] y_val, sr = load(audio_path, mono=True, sr=22050) y = y_val[sr * 30:sr * 60] # ------------------------------- # ## GET CHUNKS OF AUDIO SPECTROGRAMS S = melspectrogram(y, sr).T S = S[:-1 * (S.shape[0] % 128)] num_chunk = S.shape[0] / 128 data_chunks = np.split(S, num_chunk) # ------------------------------- # ## CLASSIFY SPECTROGRAMS genres = list() for i, data in enumerate(data_chunks): data = torch.FloatTensor(data).view(1, 1, 128, 128) preds = net(data) pred_val, pred_index = preds.max(1) pred_index = pred_index.data.numpy()[0] pred_val = np.exp(pred_val.data.numpy()[0]) #pred_genre = le.inverse_transform(pred_index) if pred_val >= 0.5: genres.append(pred_index) # ------------------------------- # #s = float(sum([v for k,v in dict(Counter(genres)).items()])) #pos_genre = sorted([(k, v/s*100 ) for k,v in dict(Counter(genres)).items()], key=lambda x:x[1], reverse=True) #for genre, pos in pos_genre: #print("%10s: \t%.2f\t%%" % (genre, pos)) genre_index = str(np.bincount(genres).argmax()) low = argv[1] medium = argv[2] high = argv[3] if genre_index in low: print("Applying Low Effect!") pitch_shift(y_val, sr) elif genre_index in medium: print("Applying Medium Effect!") bass_operation(y_val, sr) elif genre_index in high: print("Applying High Effect") print("high") overlay(y_val, sr) return
def main(argv): if len(argv) != 1: print("Usage: python3 get_genre.py audiopath") exit() le = LabelEncoder().fit(GENRES) # ------------------------------- # ## LOAD TRAINED GENRENET MODEL net = genreNet() net.load_state_dict(torch.load(MODELPATH, map_location='cuda:0')) # ------------------------------- # ## LOAD AUDIO audio_path = argv[0] y, sr = load(audio_path, mono=True, sr=22050) # ------------------------------- # ## GET CHUNKS OF AUDIO SPECTROGRAMS S = melspectrogram(y, sr).T S = S[:-1 * (S.shape[0] % 128)] num_chunk = S.shape[0] / 128 data_chunks = np.split(S, num_chunk) # ------------------------------- # ## CLASSIFY SPECTROGRAMS genres = [] for i, data in enumerate(data_chunks): data = torch.FloatTensor(data).view(1, 1, 128, 128) preds = net(data) pred_val, pred_index = preds.max(1) pred_index = pred_index.data.numpy()[0] pred_val = np.exp(pred_val.data.numpy()[0]) pred_genre = le.inverse_transform([pred_index])[0] #if pred_val >= 0.5: genres.append(pred_genre) # ------------------------------- # s = float(sum([v for k, v in dict(Counter(genres)).items()])) #pos_genre = sorted([(k, v/s ) for k,v in dict(Counter(genres)).items()], key=lambda x:x[1], reverse=True) pos_genre = [(k, v / s) for k, v in dict(Counter(genres)).items()] result = dict.fromkeys(GENRES, 0) for genre, pos in pos_genre: result[genre] = pos str_out = re.sub('.mp3', '', os.path.basename(audio_path)) for genre in GENRES: str_out = str_out + ' ' + str(result[genre]) print(str_out) return
def main(argv): if len(argv) != 1: print("Usage: python3 get_genre.py audiopath") exit() le = LabelEncoder().fit(GENRES) # ------------------------------- # ## LOAD TRAINED GENRENET MODEL net = genreNet() net.load_state_dict(torch.load(MODELPATH, map_location='cpu')) # ------------------------------- # ## LOAD AUDIO audio_path = argv[0] y, sr = load(audio_path, mono=True, sr=22050) # ------------------------------- # ## GET CHUNKS OF AUDIO SPECTROGRAMS S = melspectrogram(y, sr).T S = S[:-1 * (S.shape[0] % 128)] num_chunk = S.shape[0] / 128 data_chunks = np.split(S, num_chunk) # ------------------------------- # ## CLASSIFY SPECTROGRAMS genres = list() for i, data in enumerate(data_chunks): data = torch.FloatTensor(data).view(1, 1, 128, 128) preds = net(data) pred_val, pred_index = preds.max(1) pred_index = pred_index.data.numpy() pred_val = np.exp(pred_val.data.numpy()[0]) pred_genre = le.inverse_transform(pred_index).item() if pred_val >= 0.5: genres.append(pred_genre) # ------------------------------- # s = float(sum([v for k, v in dict(Counter(genres)).items()])) pos_genre = sorted([(k, v / s * 100) for k, v in dict(Counter(genres)).items()], key=lambda x: x[1], reverse=True) for genre, pos in pos_genre: print("%10s: \t%.2f\t%%" % (genre, pos)) return
def main(argv): if len(argv) != 1: print("Usage: python3 get_genre.py audiopath") exit() le = LabelEncoder().fit(GENRES) # ------------------------------- # ## LOAD TRAINED GENRENET MODEL net = genreNet() net.load_state_dict(torch.load(MODELPATH, map_location='cpu')) # ------------------------------- # ## LOAD AUDIO audio_path = argv[0] y, sr = load(audio_path, mono=True, sr=22050) # ------------------------------- # ## GET CHUNKS OF AUDIO SPECTROGRAMS S = melspectrogram(y, sr).T S = S[:-1 * (S.shape[0] % 128)] num_chunk = S.shape[0] / 128 data_chunks = np.split(S, num_chunk) # ------------------------------- # ## CLASSIFY SPECTROGRAMS genres = list() added = 0 total = 0 for i, data in enumerate(data_chunks): data = torch.FloatTensor(data).view(1, 1, 128, 128) preds = net(data) pred_val = preds.item() print(pred_val) added += pred_val total += 1 # ------------------------------- # print('Predicted: {}'.format((added/total) + 1960)) s = float(sum([v for k,v in dict(Counter(genres)).items()])) pos_genre = sorted([(k, v/s*100 ) for k,v in dict(Counter(genres)).items()], key=lambda x:x[1], reverse=True) print('Should output something.') for genre, pos in pos_genre: print("%10s: \t%.2f\t%%" % (genre, pos)) return
def main(): # ------------------------------------------------------------------------------------------- # ## DATA data = Data(GENRES, DATAPATH) data.make_raw_data() data.save() data = Data(GENRES, DATAPATH) data.load() # ------------------------------------------------------------------------------------------- # # ------------------------------------------------------------------------------------------- # ## SET set_ = Set(data) set_.make_dataset() set_.save() set_ = Set(data) set_.load() x_train, y_train = set_.get_train_set() x_valid, y_valid = set_.get_valid_set() x_test, y_test = set_.get_test_set() # ------------------------------------------------------------------------------------------- # TRAIN_SIZE = len(x_train) VALID_SIZE = len(x_valid) TEST_SIZE = len(x_test) net = genreNet() net.cuda() criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(net.parameters(), lr=1e-3, weight_decay=1e-4) EPOCH_NUM = 200 BATCH_SIZE = 16 tra_loss = [] tra_acc = [] val_acc = [] val_loss = [] for epoch in range(EPOCH_NUM): inp_train, out_train = Variable( torch.from_numpy(x_train)).float().cuda(), Variable( torch.from_numpy(y_train)).long().cuda() inp_valid, out_valid = Variable( torch.from_numpy(x_valid)).float().cuda(), Variable( torch.from_numpy(y_valid)).long().cuda() # ------------------------------------------------------------------------------------------------- # ## TRAIN PHASE # TRAIN PHASE # TRAIN PHASE # TRAIN PHASE # TRAIN PHASE # TRAIN PHASE # TRAIN PHASE # # ------------------------------------------------------------------------------------------------- # train_loss = 0 optimizer.zero_grad() # <-- OPTIMIZER for i in range(0, TRAIN_SIZE, BATCH_SIZE): x_train_batch, y_train_batch = inp_train[ i:i + BATCH_SIZE], out_train[i:i + BATCH_SIZE] pred_train_batch = net(x_train_batch) loss_train_batch = criterion(pred_train_batch, y_train_batch) train_loss += loss_train_batch.data.cpu().numpy() loss_train_batch.backward() optimizer.step() # <-- OPTIMIZER epoch_train_loss = (train_loss * BATCH_SIZE) / TRAIN_SIZE tra_loss.append(train_loss) train_sum = 0 for i in range(0, TRAIN_SIZE, BATCH_SIZE): pred_train = net(inp_train[i:i + BATCH_SIZE]) indices_train = pred_train.max(1)[1] train_sum += (indices_train == out_train[i:i + BATCH_SIZE] ).sum().data.cpu().numpy() train_accuracy = train_sum / float(TRAIN_SIZE) tra_acc.append(train_accuracy) # ------------------------------------------------------------------------------------------------- # ## VALIDATION PHASE ## VALIDATION PHASE ## VALIDATION PHASE ## VALIDATION PHASE ## VALIDATION PHASE # # ------------------------------------------------------------------------------------------------- # valid_loss = 0 for i in range(0, VALID_SIZE, BATCH_SIZE): x_valid_batch, y_valid_batch = inp_valid[ i:i + BATCH_SIZE], out_valid[i:i + BATCH_SIZE] pred_valid_batch = net(x_valid_batch) loss_valid_batch = criterion(pred_valid_batch, y_valid_batch) valid_loss += loss_valid_batch.data.cpu().numpy() epoch_valid_loss = (valid_loss * BATCH_SIZE) / VALID_SIZE val_loss.append(epoch_valid_loss) valid_sum = 0 for i in range(0, VALID_SIZE, BATCH_SIZE): pred_valid = net(inp_valid[i:i + BATCH_SIZE]) indices_valid = pred_valid.max(1)[1] valid_sum += (indices_valid == out_valid[i:i + BATCH_SIZE] ).sum().data.cpu().numpy() valid_accuracy = valid_sum / float(VALID_SIZE) val_acc.append(valid_accuracy) print("Epoch: %d\t\tTrain loss : %.2f\t\tValid loss : %.2f\t\tTrain acc : %.2f\t\tValid acc : %.2f" % \ (epoch + 1, epoch_train_loss, epoch_valid_loss, train_accuracy, valid_accuracy)) # ------------------------------------------------------------------------------------------------- # # ------------------------------------------------------------------------------------------------- # ## SAVE GENRENET MODEL # ------------------------------------------------------------------------------------------------- # torch.save(net.state_dict(), MODELPATH) print("-> ptorch model is saved") # ------------------------------------------------------------------------------------------------- # save_dir = "../utils/" plt.plot(tra_loss) plt.xlabel("Epochs") plt.ylabel("Loss") plt.title("Training Loss") plt.tight_layout() plt.savefig("./evaluate.png", format="png", bbox_inches="tight") plt.close() plt.plot(tra_acc) plt.xlabel("Epochs") plt.ylabel("Accuracy") plt.title("Training Accuracy") plt.tight_layout() plt.savefig("./tra_acc.png", format="png", bbox_inches="tight") plt.close() plt.plot(val_loss) plt.xlabel("Epochs") plt.ylabel("Loss") plt.title("Validation Loss") plt.tight_layout() plt.savefig("./val_loss.png", format="png", bbox_inches="tight") plt.close() plt.plot(val_acc) plt.xlabel("Epochs") plt.ylabel("Loss") plt.title("Validation Accuracy") plt.tight_layout() plt.savefig("./val_acc.png", format="png", bbox_inches="tight") plt.close() # ------------------------------------------------------------------------------------------------- # ## EVALUATE TEST ACCURACY # ------------------------------------------------------------------------------------------------- # test_acc = [] inp_test, out_test = Variable( torch.from_numpy(x_test)).float().cuda(), Variable( torch.from_numpy(y_test)).long().cuda() test_sum = 0 for i in range(0, TEST_SIZE, BATCH_SIZE): pred_test = net(inp_test[i:i + BATCH_SIZE]) indices_test = pred_test.max(1)[1] test_sum += ( indices_test == out_test[i:i + BATCH_SIZE]).sum().data.cpu().numpy() test_acc.append(test_sum / float(TEST_SIZE)) test_accuracy = test_sum / float(TEST_SIZE) print("Test acc: %.2f" % test_accuracy) # ------------------------------------------------------------------------------------------------- # plt.plot(test_acc) plt.xlabel("Epochs") plt.ylabel("Accuracy") plt.title("Testing Accuracy") plt.tight_layout() plt.savefig("./test_acc.png", format="png", bbox_inches="tight") plt.close() return
def main(): # ------------------------------------------------------------------------------------------- # ## DATA #data = Data_rank(RANKS, DATAPATH) #data.make_raw_data() # computing features #data.make_data_sets() # ------------------------------------------------------------------------------------------- # # ------------------------------------------------------------------------------------------- # ## SET data = Data_rank(RANKS, DATAPATH) set_ = Set(data) set_.chunk_size =160 # files set_.make_chunks() #x_train, y_train = set_.get_train_set() #TRAIN_SIZE = len(x_train) # # ------------------------------------------------------------------------------------------- # #rates = [5e-1,5e-2,5e-3,5e-4,5e-5] EPOCH_NUM = 250 BATCH_SIZE = 64 # load check point if existing point = 0 ## net = genreNet() #optimizer = torch.optim.RMSprop(net.parameters(),lr=1e-2,momentum=0.9,weight_decay=1e-5) optimizer = torch.optim.Adam(net.parameters(),lr=1e-2, weight_decay=1e-5) #scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer,lr_lambda=[EPOCH_NUM,0.95]) #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.5) if not os.path.isfile(MODELCHECKPOINT): print("Starting from zero point") point = -1 else: checkpoint = torch.load(MODELCHECKPOINT) net = checkpoint['model'] optimizer = torch.optim.Adam(net.parameters()) point = checkpoint['epoch'] net.load_state_dict(checkpoint['state_dict']) #scheduler.load_state_dict(checkpoint['scheduler']) print("Starting from the checkpoint " + MODELCHECKPOINT + ' from epoch: ' + str(point)) criterion = torch.nn.CrossEntropyLoss() net.cuda() # for epoch in range(EPOCH_NUM): if epoch > point: c_iter = 0 num_chunks = len(set_.train_chunks) for chunk in set_.train_chunks: x_train, y_train = set_.get_set(chunk) if x_train is not None: TRAIN_SIZE = len(x_train) inp_, out_ = Variable(torch.from_numpy(x_train)).float().cuda(), Variable(torch.from_numpy(y_train)).long().cuda() # ------------------------------------------------------------------------------------------------- # train_loss = 0 for i in range(0, TRAIN_SIZE, BATCH_SIZE): optimizer.zero_grad() # <-- OPTIMIZER x_train_batch, y_train_batch = inp_[i:i + BATCH_SIZE], out_[i:i + BATCH_SIZE] pred_train_batch = net(x_train_batch) loss_train_batch = criterion(pred_train_batch, y_train_batch) train_loss += loss_train_batch.data.cpu().item() loss_train_batch.backward() #clip_grad_norm_(net.parameters(), 5) optimizer.step() # <-- OPTIMIZER epoch_train_loss = train_loss / (TRAIN_SIZE/BATCH_SIZE) train_sum = 0 for i in range(0, TRAIN_SIZE, BATCH_SIZE): pred_train = net(inp_[i:i + BATCH_SIZE]) indices_train = pred_train.max(1)[1] train_sum += (indices_train == out_[i:i + BATCH_SIZE]).sum().data.cpu().item() train_accuracy = train_sum / float(TRAIN_SIZE) print("Epoch: %d\t\tIter: %d/%d\t\tTrain loss : %.2f\t\tTrain accuracy: %.2f" % \ (epoch + 1, c_iter+1, num_chunks, epoch_train_loss, train_accuracy)) c_iter+=1 # del inp_, out_ #scheduler.step() # ------------------------------------------------------------------------------------------------- # ## SAVE checkpoint # ------------------------------------------------------------------------------------------------- # checkpoint = {'model': genreNet(), 'state_dict': net.state_dict(), 'optimizer' : optimizer.state_dict(), #'scheduler' : scheduler.state_dict(), 'epoch' : epoch} torch.save(checkpoint, MODELCHECKPOINT) print("Saving checkpoint for epoch " + str(epoch)) # ------------------------------------------------------------------------------------------------- # # ------------------------------------------------------------------------------------------------- # ## VALIDATION PHASE ## VALIDATION PHASE ## VALIDATION PHASE ## VALIDATION PHASE ## VALIDATION PHASE # # ------------------------------------------------------------------------------------------------- # torch.cuda.empty_cache() valid_loss = 0 valid_sum = 0 for v_chunk in set_.valid_chunks: x_valid, y_valid = set_.get_set(v_chunk) if x_valid is not None: VALID_SIZE = len(x_valid) inp_, out_ = Variable(torch.from_numpy(x_valid)).float().cuda(), Variable(torch.from_numpy(y_valid)).long().cuda() for i in range(0, VALID_SIZE, BATCH_SIZE): x_valid_batch, y_valid_batch = inp_[i:i + BATCH_SIZE], out_[i:i + BATCH_SIZE] pred_valid_batch = net(x_valid_batch) loss_valid_batch = criterion(pred_valid_batch, y_valid_batch) valid_loss += loss_valid_batch.data.cpu().item() for i in range(0, VALID_SIZE, BATCH_SIZE): pred_valid = net(inp_[i:i + BATCH_SIZE]) indices_valid = pred_valid.max(1)[1] valid_sum += (indices_valid == out_[i:i + BATCH_SIZE]).sum().data.cpu().item() del inp_, out_ valid_accuracy = valid_sum / float(VALID_SIZE) epoch_valid_loss = (valid_loss * BATCH_SIZE) / VALID_SIZE print("Epoch: %d\t\tTrain loss : %.2f\t\tValid loss : %.2f\t\tTrain acc : %.2f\t\tValid acc : %.2f" % \ (epoch + 1, epoch_train_loss, epoch_valid_loss, train_accuracy, valid_accuracy)) # ------------------------------------------------------------------------------------------------- # torch.save(net.state_dict(), MODELPATH) print('-> ptorch model is saved.') # ------------------------------------------------------------------------------------------------- # ## EVALUATE TEST ACCURACY # ------------------------------------------------------------------------------------------------- # torch.cuda.empty_cache() test_sum = 0 for t_chunk in set_.test_chunks: x_test, y_test = set_.get_set(t_chunk) if x_test is not None: TEST_SIZE = len(x_test) inp_, out_ = Variable(torch.from_numpy(x_test)).float().cuda(), Variable(torch.from_numpy(y_test)).long().cuda() for i in range(0, TEST_SIZE, BATCH_SIZE): pred_test = net(inp_[i:i + BATCH_SIZE]) indices_test = pred_test.max(1)[1] test_sum += (indices_test == out_[i:i + BATCH_SIZE]).sum().data.cpu().item() del inp_, out_ test_accuracy = test_sum / float(TEST_SIZE) print("Test acc: %.2f" % test_accuracy) # ------------------------------------------------------------------------------------------------- # return
def main(): # ------------------------------------------------------------------------------------------- # ## DATA data = Data(GENRES, DATAPATH) #data.make_raw_data() #data.save() data = Data(GENRES, DATAPATH) data.load() # ------------------------------------------------------------------------------------------- # # ------------------------------------------------------------------------------------------- # ## SET set_ = Set(data) set_.make_dataset() set_.save() set_ = Set(data) set_.load() x_train, y_train = set_.get_train_set() x_valid, y_valid = set_.get_valid_set() x_test, y_test = set_.get_test_set() # ------------------------------------------------------------------------------------------- # TRAIN_SIZE = len(x_train) VALID_SIZE = len(x_valid) TEST_SIZE = len(x_test) net = genreNet() net.cuda() #criterion = torch.nn.CrossEntropyLoss() criterion = torch.nn.L1Loss() optimizer = torch.optim.RMSprop(net.parameters(), lr=1e-4) EPOCH_NUM = 100 #250 BATCH_SIZE = 16 #16 for epoch in range(EPOCH_NUM): inp_train, out_train = Variable(torch.from_numpy(x_train)).float().cuda(), Variable(torch.from_numpy(y_train)).long().cuda() inp_valid, out_valid = Variable(torch.from_numpy(x_valid)).float().cuda(), Variable(torch.from_numpy(y_valid)).long().cuda() # ------------------------------------------------------------------------------------------------- # ## TRAIN PHASE # TRAIN PHASE # TRAIN PHASE # TRAIN PHASE # TRAIN PHASE # TRAIN PHASE # TRAIN PHASE # # ------------------------------------------------------------------------------------------------- # train_loss = 0 for i in range(0, TRAIN_SIZE, BATCH_SIZE): optimizer.zero_grad() # <-- OPTIMIZER x_train_batch, y_train_batch = inp_train[i:i + BATCH_SIZE], out_train[i:i + BATCH_SIZE] pred_train_batch = net(x_train_batch) loss_train_batch = criterion(pred_train_batch, y_train_batch) train_loss += loss_train_batch.data.cpu().numpy() #[0] loss_train_batch.backward() optimizer.step() # <-- OPTIMIZER epoch_train_loss = (train_loss * BATCH_SIZE) / TRAIN_SIZE train_sum = 0 for i in range(0, TRAIN_SIZE, BATCH_SIZE): pred_train = net(inp_train[i:i + BATCH_SIZE]) indices_train = pred_train.max(0)[0] #(1)[1] train_sum += (indices_train == out_train[i:i + BATCH_SIZE]).sum().data.cpu().numpy() #[0] train_accuracy = train_sum / float(TRAIN_SIZE) # ------------------------------------------------------------------------------------------------- # ## VALIDATION PHASE ## VALIDATION PHASE ## VALIDATION PHASE ## VALIDATION PHASE ## VALIDATION PHASE # # ------------------------------------------------------------------------------------------------- # valid_loss = 0 for i in range(0, VALID_SIZE, BATCH_SIZE): x_valid_batch, y_valid_batch = inp_valid[i:i + BATCH_SIZE], out_valid[i:i + BATCH_SIZE] pred_valid_batch = net(x_valid_batch) loss_valid_batch = criterion(pred_valid_batch, y_valid_batch) valid_loss += loss_valid_batch.data.cpu().numpy() #[0] epoch_valid_loss = (valid_loss * BATCH_SIZE) / VALID_SIZE valid_sum = 0 for i in range(0, VALID_SIZE, BATCH_SIZE): pred_valid = net(inp_valid[i:i + BATCH_SIZE]) indices_valid = pred_valid.max(0)[0] # 1 1 valid_sum += (indices_valid == out_valid[i:i + BATCH_SIZE]).sum().data.cpu().numpy() #[0] valid_accuracy = valid_sum / float(VALID_SIZE) print("Epoch: %d\t\tTrain loss : %.2f\t\tValid loss : %.2f\t\tTrain acc : %.2f\t\tValid acc : %.2f" % \ (epoch + 1, epoch_train_loss, epoch_valid_loss, train_accuracy, valid_accuracy)) # ------------------------------------------------------------------------------------------------- # # ------------------------------------------------------------------------------------------------- # ## SAVE GENRENET MODEL # ------------------------------------------------------------------------------------------------- # torch.save(net.state_dict(), MODELPATH) print('-> ptorch model is saved.') # ------------------------------------------------------------------------------------------------- # # ------------------------------------------------------------------------------------------------- # ## EVALUATE TEST ACCURACY # ------------------------------------------------------------------------------------------------- # inp_test, out_test = Variable(torch.from_numpy(x_test)).float().cuda(), Variable(torch.from_numpy(y_test)).long().cuda() test_sum = 0 for i in range(0, TEST_SIZE, BATCH_SIZE): pred_test = net(inp_test[i:i + BATCH_SIZE]) indices_test = pred_test.max(0)[0] # [0][0] test_sum += (indices_test == out_test[i:i + BATCH_SIZE]).sum().data.cpu().numpy() #[0] test_accuracy = test_sum / float(TEST_SIZE) print("Test acc: %.2f" % test_accuracy) # ------------------------------------------------------------------------------------------------- # return