def prepare_tabular_data(dataset_dir, concat_train_valid): # Load data logging.info('Loading datsets from: {}'.format(dataset_dir)) datasets_orig = load_datasets(dataset_dir) (X_train, y_train), (X_val, y_val), (X_test, y_test) = datasets_orig if concat_train_valid: X_train = pd.concat([X_train, X_val]) y_train = pd.concat([y_train, y_val]) X_val = None y_val = None # One-hot encode class labels (needed as output layer has multiple nodes) label_encoder, unused = utility.encode_labels(pd.concat( [y_train, y_val, y_test]), encoder=None) unused, y_train_enc = utility.encode_labels(y_train, encoder=label_encoder) y_val_enc = None if X_val is not None: unused, y_val_enc = utility.encode_labels(y_val, encoder=label_encoder) unused, y_test_enc = utility.encode_labels(y_test, encoder=label_encoder) datasets_orig = (X_train, y_train), (X_val, y_val), (X_test, y_test) datasets_enc = (X_train, y_train_enc), (X_val, y_val_enc), (X_test, y_test_enc) return datasets_orig, datasets_enc, label_encoder
def get_y_classif(self, labels): ''' Returns expected output for classification, one hot encoding of each sample outputs Inputs: -> labels, list of string Outputs: -> y, numpy array, shape = [num_samples, num_labels] -> num_class, int ''' return u.encode_labels(labels)
def main(): df = pd.read_csv('data/train.csv') df, _ = utility.encode_labels(df) data_loader = dl.get_full_data_loader(df, data_dir='data/train_images', batch_size=128, image_size=32) train_dataset_arr = next(iter(data_loader))[0].numpy() zca = utility.ZCA() zca.fit(train_dataset_arr) zca_mat = zca.ZCA_mat zca_mean = zca.mean zca_dic = {"zca_matrix": zca_mat, "zca_mean": zca_mean} savemat("data/zca_data.mat", zca_dic)
def prepare_sequence_data(dataset_dir, time_steps, concat_train_valid): # Load data logging.info('Loading datsets from: {}'.format(dataset_dir)) datasets_loaded = load_datasets(dataset_dir) (X_train, y_train), (X_val, y_val), (X_test, y_test) = datasets_loaded if concat_train_valid: X_train = pd.concat([X_train, X_val]) y_train = pd.concat([y_train, y_val]) X_val = None y_val = None # datasets_orig = (X_train, y_train), (X_val, y_val), (X_test, y_test) # Prepare sequences of flows (for LSTM input) logging.info('Preparing flow sequences') t0 = time.time() X_train, y_train_seq = utility.extract_flow_sequences( X_train, y_train, time_steps, None) y_val_seq = None if X_val is not None: X_val, y_val_seq = utility.extract_flow_sequences( X_val, y_val, time_steps, None) X_test, y_test_seq = utility.extract_flow_sequences( X_test, y_test, time_steps, None) logging.info('Extracting flows complete. time_taken = {:.2f} sec'.format( time.time() - t0)) # One-hot encode class labels (needed as output layer has multiple nodes) y_list = [y_train_seq.flatten()] if y_val_seq is not None: y_list.append(y_val_seq.flatten()) y_list.append(y_test_seq.flatten()) all_y = np.hstack(y_list) label_encoder, all_y_enc = utility.encode_labels(all_y, encoder=None) unused, y_train_enc = utility.encode_labels(y_train_seq.flatten(), encoder=label_encoder) y_val_enc = None if y_val_seq is not None: unused, y_val_enc = utility.encode_labels(y_val_seq.flatten(), encoder=label_encoder) unused, y_test_enc = utility.encode_labels(y_test_seq.flatten(), encoder=label_encoder) y_train_enc = y_train_enc.reshape(y_train_seq.shape[0], y_train_seq.shape[1], all_y_enc.shape[1]) if y_val_seq is not None: y_val_enc = y_val_enc.reshape(y_val_seq.shape[0], y_val_seq.shape[1], all_y_enc.shape[1]) y_test_enc = y_test_enc.reshape(y_test_seq.shape[0], y_test_seq.shape[1], all_y_enc.shape[1]) # batch_size * time_steps in the prepared seq train_num_flows = X_train.shape[0] * X_train.shape[1] test_num_flows = X_test.shape[0] * X_test.shape[1] y_values = None if y_val is not None: val_num_flows = X_val.shape[0] * X_val.shape[1] y_values = y_val[0:val_num_flows] datasets_orig = (X_train, y_train[0:train_num_flows]), (X_val, y_values), ( X_test, y_test[0:test_num_flows]) datasets_enc = (X_train, y_train_enc), (X_val, y_val_enc), (X_test, y_test_enc) return datasets_orig, datasets_enc, label_encoder
def train_model(): # Init device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Initialize a new wandb run wandb.init() # Config is a variable that holds and saves hyperparameters and inputs config = wandb.config # Load the meta data file df = pd.read_csv('data/train.csv', ) df = df.drop(['timestamp'], axis=1) df, _ = utility.encode_labels(df) num_classes = len(df['label'].value_counts()) # Build the dataset train_loader, valid_loader = dl.get_train_valid_loader( df, data_dir='data/train_images', batch_size=config.batch_size, image_size=IMAGE_SIZE, augment=True, random_seed=0) # Make resnet model = utility.initialize_net(num_classes, config.resnet_type, config.use_feature_extract) model = model.to(device) # Gather the parameters to be optimized/updated in this run. params_to_update = utility.get_model_params_to_train( model, config.use_feature_extract) # Define criterion + optimizer criterion = nn.CrossEntropyLoss() if config.optimizer == 'sgd': optimizer = optim.SGD(params_to_update, lr=config.learning_rate) elif config.optimizer == 'rmsprop': optimizer = optim.RMSprop(params_to_update, lr=config.learning_rate) elif config.optimizer == 'adam': optimizer = optim.Adam(params_to_update, lr=config.learning_rate) # Define scheduler scheduler = optim.lr_scheduler.OneCycleLR( optimizer=optimizer, max_lr=10, epochs=config.epochs, anneal_strategy=config.scheduler, steps_per_epoch=len(train_loader)) trainer.train_model(device=device, model=model, optimizer=optimizer, criterion=criterion, train_loader=train_loader, valid_loader=valid_loader, scheduler=scheduler, epochs=config.epochs, send_to_wandb=True)
def main(): # Load the meta data file df = pd.read_csv('./data/train.csv') df, label_encoder = utility.encode_labels(df) num_classes = len(df['label'].value_counts()) np.save('./data/label_encoder_classes.npy', label_encoder.classes_) # Generate the ZCA matrix if enabled for image_size in IMAGE_SIZES: # train for every res if APPLY_ZCA_TRANS: print("Making ZCA matrix ...") data_loader = dl.get_full_data_loader(df, data_dir=DATA_DIR, batch_size=BATCH_SIZE, image_size=image_size) train_dataset_arr = next(iter(data_loader))[0].numpy() zca = utility.ZCA() zca.fit(train_dataset_arr) zca_dic = {"zca_matrix": zca.ZCA_mat, "zca_mean": zca.mean} savemat("./data/zca_data.mat", zca_dic) print("Completed making ZCA matrix") # Define normalization normalize = transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], ) # Define specific transforms train_transform = transforms.Compose([ utility.AddPadding(), transforms.Resize((image_size, image_size)), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomRotation(degrees=(-90, 90)), transforms.RandomVerticalFlip(p=0.5), transforms.ColorJitter(.4, .4, .4), transforms.ToTensor(), normalize ]) valid_transform = transforms.Compose([ utility.AddPadding(), transforms.Resize((image_size, image_size)), transforms.ToTensor(), normalize ]) # Create a train and valid dataset train_dataset = dl.HotelImagesDataset(df, root_dir=DATA_DIR, transform=train_transform) valid_dataset = dl.HotelImagesDataset(df, root_dir=DATA_DIR, transform=valid_transform) # Get a train and valid data loader train_loader, valid_loader = dl.get_train_valid_loader( train_dataset, valid_dataset, batch_size=BATCH_SIZE, random_seed=0) for net_type in NETS: # train for every net model = utility.initialize_net(num_classes, net_type, feature_extract=FEATURE_EXTRACT) # If old model exists, take state from it if path.exists(f"./models/model_{net_type}.pt"): print("Resuming training on trained model ...") model = utility.load_latest_model( model, f'./models/model_{net_type}.pt') # Gather the parameters to be optimized/updated in this run. params_to_update = utility.get_model_params_to_train( model, FEATURE_EXTRACT) # Send model to GPU device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") model = model.to(device) # Make criterion criterion = nn.CrossEntropyLoss() # Make optimizer + scheduler optimizer = torch.optim.SGD(params_to_update, lr=0.01, momentum=0.9) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.01, patience=3) trained_model = trainer.train_model( device=device, model=model, optimizer=optimizer, criterion=criterion, train_loader=train_loader, valid_loader=valid_loader, scheduler=scheduler, net_type=net_type, epochs=EPOCHS, apply_zca_trans=APPLY_ZCA_TRANS) utility.save_current_model(trained_model, f"./models/model_{net_type}.pt")
def main(classes_num=20, gid=0, random_state=0, \ bs=100, learn_rate=0.0001, \ val_num=1, stop_num=20, origin=True, vocal=True, remix=True, CRNN_model=True, CRNNx2_model=False, debug=False): start_time = time.time() save_folder = '../save/'+str(random_state)+'/' if origin and vocal and remix: save_folder = save_folder + '/all/' elif origin: save_folder = save_folder + '/ori/' elif vocal: save_folder = save_folder + '/voc/' elif remix: save_folder = save_folder + '/remix/' if not os.path.exists(save_folder+'/model/'): os.makedirs(save_folder+'/model/') if not os.path.exists(save_folder+'/result/'): os.makedirs(save_folder+'/result/') epoch_num = 10000 print('Loading pretrain model ...') # Classifier = model.CRNN2D_elu(224,classes_num) # Classifier.float() # Classifier.cuda() # Classifier.train() if CRNN_model: Classifier = model.CRNN2D_elu(224,classes_num) Classifier.float() Classifier.cuda() Classifier.train() elif CRNNx2_model: Classifier = model.CRNN2D_elu2(288,classes_num) Classifier.float() Classifier.cuda() Classifier.train() print('Loading training data ...') artist_folder=f'/home/bill317996/189/homes/kevinco27/dataset/artist20_mix' song_folder=f'/home/bill317996/189/homes/kevinco27/ICASSP2020_meledy_extraction/music-artist-classification-crnn/song_data_mix' voc_folder=f'/home/bill317996/189/homes/kevinco27/ICASSP2020_meledy_extraction/music-artist-classification-crnn/song_data_open_unmix_vocal_2' bgm_folder = f'/home/bill317996/189/homes/kevinco27/ICASSP2020_meledy_extraction/music-artist-classification-crnn/song_data_open_unmix_kala' # random_states = [0,21,42] if debug: Y_train, X_train, S_train, V_train, B_train,\ Y_test, X_test, S_test, V_test, B_test,\ Y_val, X_val, S_val, V_val, B_val = \ np.zeros(11437, dtype=int), np.zeros((11437, 128, 157)), np.zeros(11437), np.zeros((11437, 128, 157)), np.zeros((11437, 128, 157)), \ np.zeros(11437, dtype=int), np.zeros((11437, 128, 157)), np.zeros(11437), np.zeros((11437, 128, 157)), np.zeros((11437, 128, 157)), \ np.zeros(11437, dtype=int), np.zeros((11437, 128, 157)), np.zeros(11437), np.zeros((11437, 128, 157)), np.zeros((11437, 128, 157)) Y_train[0] = 1 Y_val[0] = 1 Y_test[0] = 1 else: Y_train, X_train, S_train, V_train, B_train,\ Y_test, X_test, S_test, V_test, B_test,\ Y_val, X_val, S_val, V_val, B_val = \ utility.load_dataset_album_split_da(song_folder_name=song_folder, artist_folder=artist_folder, voc_song_folder=voc_folder, bgm_song_folder=bgm_folder, nb_classes=classes_num, random_state=random_state) if not debug: print("Loaded and split dataset. Slicing songs...") slice_length = 157 # Create slices out of the songs X_train, Y_train, S_train, V_train, B_train = utility.slice_songs_da(X_train, Y_train, S_train, V_train, B_train, length=slice_length) X_val, Y_val, S_val, V_val, B_val = utility.slice_songs_da(X_val, Y_val, S_val, V_val, B_val, length=slice_length) X_test, Y_test, S_test, V_test, B_test = utility.slice_songs_da(X_test, Y_test, S_test, V_test, B_test, length=slice_length) print("Training set label counts:", np.unique(Y_train, return_counts=True)) # # Encode the target vectors into one-hot encoded vectors Y_train, le, enc = utility.encode_labels(Y_train) Y_test, le, enc = utility.encode_labels(Y_test, le, enc) Y_val, le, enc = utility.encode_labels(Y_val, le, enc) Y_train = Y_train[:,0] Y_test = Y_test[:,0] Y_val = Y_val[:,0] print(X_train.shape, Y_train.shape, S_train.shape, V_train.shape, B_train.shape) print(X_val.shape, Y_val.shape, S_val.shape, V_val.shape, B_val.shape) print(X_test.shape, Y_test.shape, S_test.shape, V_test.shape, B_test.shape) ##################################### # numpy to tensor to data_loader # train X_train = torch.from_numpy(X_train).float() Y_train = torch.from_numpy(Y_train).long() V_train = torch.from_numpy(V_train).float() B_train = torch.from_numpy(B_train).float() if origin: original_set = Dataset_2(data_tensor=X_train, target_tensor=Y_train) original_loader = Data.DataLoader(dataset=original_set, batch_size=bs, shuffle=True) if vocal or remix: vocal_set = Dataset_2(data_tensor=V_train, target_tensor=Y_train) vocal_loader = Data.DataLoader(dataset=vocal_set, batch_size=bs, shuffle=True) if remix: bgm_set = Dataset_1(data_tensor=B_train) bgm_loader = Data.DataLoader(dataset=bgm_set, batch_size=bs, shuffle=True) # val if vocal and not origin: X_val = torch.from_numpy(V_val).float() Y_val = torch.from_numpy(Y_val).long() else: X_val = torch.from_numpy(X_val).float() Y_val = torch.from_numpy(Y_val).long() val_set = Dataset_2(data_tensor=X_val, target_tensor=Y_val) val_loader = Data.DataLoader(dataset=val_set, batch_size=bs, shuffle=False) # Test X_test = torch.from_numpy(X_test).float() Y_test = torch.from_numpy(Y_test).long() V_test = torch.from_numpy(V_test).float() test_o_set = Dataset_4(data_tensor=X_test, target_tensor1=Y_test, target_tensor2=S_test, target_tensor3=V_test) test_o_loader = Data.DataLoader(dataset=test_o_set, batch_size=bs, shuffle=False) # test_v_set = Dataset_3(data_tensor=V_test, target_tensor1=Y_test, target_tensor2=S_test) # test_v_loader = Data.DataLoader(dataset=test_v_set, batch_size=bs, shuffle=False) ##################################### best_epoch = 0 best_F1 = 0 CELoss = nn.CrossEntropyLoss() opt = optim.Adam(Classifier.parameters(),lr=learn_rate) print('Start training ...') # start_time = time.time() early_stop_flag = False for epoch in range(epoch_num): if early_stop_flag: print('rs: ', random_state) print('Origin: ', origin, ' | Vocal: ', vocal, ' | Remix: ', remix) print('CRNN: ', CRNN_model, ' | CRNNx2: ', CRNNx2_model) print(' best_epoch: ', best_epoch, ' | best_val_F1: %.2f'% best_F1) print(' Test original | frame level: %.2f'% test_F1_frame_o, ' | songs level: %.2f'% test_F1_songs_o) if vocal: print(' Test vocal | frame level: %.2f'% test_F1_frame_v, ' | songs level: %.2f'% test_F1_songs_v) break if stop_num: if epoch - best_epoch >= stop_num: early_stop_flag = True print('Early Stop!') all_loss = 0 Classifier.train() if origin: for step, (batch_x, batch_y) in enumerate(original_loader): opt.zero_grad() batch_x = batch_x.cuda() batch_y = batch_y.cuda() batch_h = torch.randn(1, batch_x.size(0), 32).cuda() pred_y, emb = Classifier(batch_x, batch_h) loss = CELoss(pred_y, batch_y) loss.backward() opt.step() all_loss += loss if vocal: for step, (batch_x, batch_y) in enumerate(vocal_loader): opt.zero_grad() batch_x = batch_x.cuda() batch_y = batch_y.cuda() batch_h = torch.randn(1, batch_x.size(0), 32).cuda() pred_y, emb = Classifier(batch_x, batch_h) loss = CELoss(pred_y, batch_y) loss.backward() opt.step() all_loss += loss if remix: for step, ((batch_x, batch_y), batch_b) in enumerate(zip(vocal_loader,bgm_loader)): opt.zero_grad() batch_x = batch_x.cuda() batch_y = batch_y.cuda() batch_h = torch.randn(1, batch_x.size(0), 32).cuda() batch_b = batch_b.cuda() batch_x = 10.0*torch.log10((10.0**(batch_x/10.0)) + (10.0**(batch_b/10.0))) pred_y, emb = Classifier(batch_x, batch_h) loss = CELoss(pred_y, batch_y) loss.backward() opt.step() all_loss += loss print('epoch: ', epoch, ' | Loss: %.4f'% all_loss, ' | time: %.2f'% (time.time()-start_time), '(s)') start_time = time.time() if epoch % val_num == 0: Classifier.eval() frame_true = [] frame_pred = [] for step, (batch_x, batch_y) in enumerate(val_loader): batch_x = batch_x.cuda() batch_y = batch_y.cuda() batch_h = torch.randn(1, batch_x.size(0), 32).cuda() pred_y, emb = Classifier(batch_x, batch_h) pred_y = pred_y.detach().cpu().numpy() batch_y = batch_y.detach().cpu().numpy() for i in range(len(pred_y)): frame_true.append(batch_y[i]) frame_pred.append(np.argmax(pred_y[i]) ) val_F1 = f1_score(frame_true, frame_pred, average='weighted') print(' val F1: %.2f'% val_F1) if best_F1 < val_F1: best_F1 = val_F1 best_epoch = epoch print(' best_epoch: ', best_epoch, ' | best_val_F1: %.2f'% best_F1) torch.save({'Classifier_state_dict': Classifier.state_dict() }, save_folder+'/model/CRNN2D_elu_model_state_dict') frame_true = [] frame_pred = [] songs_true = [] songs_pred = [] songs_list = [] songs_vote_dict = {} songs_true_dict = {} emb_list = [] for step, (batch_x, batch_y, batch_song, batch_v) in enumerate(test_o_loader): batch_x = batch_x.cuda() batch_y = batch_y.cuda() batch_h = torch.randn(1, batch_x.size(0), 32).cuda() pred_y, emb = Classifier(batch_x, batch_h) pred_y = pred_y.detach().cpu().numpy() batch_y = batch_y.detach().cpu().numpy() emb = emb.detach().cpu().numpy() batch_v = batch_v.detach().cpu().numpy() for i in range(len(pred_y)): frame_true.append(batch_y[i]) frame_pred.append(np.argmax(pred_y[i])) emb_list.append(emb[i]) onehot = np.zeros(20) onehot[np.argmax(pred_y[i])] += 1 if batch_song[i] not in songs_list: songs_list.append(batch_song[i]) songs_true_dict[batch_song[i]] = batch_y[i] songs_vote_dict[batch_song[i]] = onehot else: songs_vote_dict[batch_song[i]] += onehot for song in songs_list: songs_true.append(songs_true_dict[song]) songs_pred.append(np.argmax(songs_vote_dict[song])) np.savez(save_folder+'/result/ori_result.npz', \ pred=np.array(frame_pred), true=np.array(frame_true), emb=np.array(emb_list)) test_F1_frame_o = f1_score(frame_true, frame_pred, average='weighted') test_F1_songs_o = f1_score(songs_true, songs_pred, average='weighted') print(' Test original | frame level: %.2f'% test_F1_frame_o, ' | songs level: %.2f'% test_F1_songs_o) if vocal: frame_true = [] frame_pred = [] songs_true = [] songs_pred = [] songs_list = [] songs_vote_dict = {} songs_true_dict = {} for step, (batch_x, batch_y, batch_song, batch_v) in enumerate(test_o_loader): batch_x = batch_v.cuda() batch_y = batch_y.cuda() batch_h = torch.randn(1, batch_x.size(0), 32).cuda() pred_y, emb = Classifier(batch_x, batch_h) pred_y = pred_y.detach().cpu().numpy() batch_y = batch_y.detach().cpu().numpy() for i in range(len(pred_y)): frame_true.append(batch_y[i]) frame_pred.append(np.argmax(pred_y[i])) onehot = np.zeros(20) onehot[np.argmax(pred_y[i])] += 1 if batch_song[i] not in songs_list: songs_list.append(batch_song[i]) songs_true_dict[batch_song[i]] = batch_y[i] songs_vote_dict[batch_song[i]] = onehot else: songs_vote_dict[batch_song[i]] += onehot for song in songs_list: songs_true.append(songs_true_dict[song]) songs_pred.append(np.argmax(songs_vote_dict[song])) test_F1_frame_v = f1_score(frame_true, frame_pred, average='weighted') test_F1_songs_v = f1_score(songs_true, songs_pred, average='weighted') print(' Test vocal | frame level: %.2f'% test_F1_frame_v, ' | songs level: %.2f'% test_F1_songs_v)