def prepare_tabular_data(dataset_dir, concat_train_valid):
    # Load data
    logging.info('Loading datsets from: {}'.format(dataset_dir))
    datasets_orig = load_datasets(dataset_dir)

    (X_train, y_train), (X_val, y_val), (X_test, y_test) = datasets_orig

    if concat_train_valid:
        X_train = pd.concat([X_train, X_val])
        y_train = pd.concat([y_train, y_val])
        X_val = None
        y_val = None

    # One-hot encode class labels (needed as output layer has multiple nodes)
    label_encoder, unused = utility.encode_labels(pd.concat(
        [y_train, y_val, y_test]),
                                                  encoder=None)
    unused, y_train_enc = utility.encode_labels(y_train, encoder=label_encoder)
    y_val_enc = None
    if X_val is not None:
        unused, y_val_enc = utility.encode_labels(y_val, encoder=label_encoder)
    unused, y_test_enc = utility.encode_labels(y_test, encoder=label_encoder)

    datasets_orig = (X_train, y_train), (X_val, y_val), (X_test, y_test)
    datasets_enc = (X_train, y_train_enc), (X_val, y_val_enc), (X_test,
                                                                y_test_enc)

    return datasets_orig, datasets_enc, label_encoder
Exemple #2
0
    def get_y_classif(self, labels):
        '''
        Returns expected output for classification, one hot encoding of each sample outputs

        Inputs:
            -> labels, list of string

        Outputs:
            -> y, numpy array, shape = [num_samples, num_labels]
            -> num_class, int
        '''
        return u.encode_labels(labels)
Exemple #3
0
def main():
    df = pd.read_csv('data/train.csv')
    df, _ = utility.encode_labels(df)
    data_loader = dl.get_full_data_loader(df, data_dir='data/train_images',
                                          batch_size=128,
                                          image_size=32)
    train_dataset_arr = next(iter(data_loader))[0].numpy()
    
    zca = utility.ZCA()
    zca.fit(train_dataset_arr)
    zca_mat = zca.ZCA_mat
    zca_mean = zca.mean
    zca_dic = {"zca_matrix": zca_mat, "zca_mean": zca_mean}
    savemat("data/zca_data.mat", zca_dic)
def prepare_sequence_data(dataset_dir, time_steps, concat_train_valid):
    # Load data
    logging.info('Loading datsets from: {}'.format(dataset_dir))
    datasets_loaded = load_datasets(dataset_dir)

    (X_train, y_train), (X_val, y_val), (X_test, y_test) = datasets_loaded

    if concat_train_valid:
        X_train = pd.concat([X_train, X_val])
        y_train = pd.concat([y_train, y_val])
        X_val = None
        y_val = None

    # datasets_orig = (X_train, y_train), (X_val, y_val), (X_test, y_test)

    # Prepare sequences of flows (for LSTM input)
    logging.info('Preparing flow sequences')
    t0 = time.time()

    X_train, y_train_seq = utility.extract_flow_sequences(
        X_train, y_train, time_steps, None)

    y_val_seq = None
    if X_val is not None:
        X_val, y_val_seq = utility.extract_flow_sequences(
            X_val, y_val, time_steps, None)
    X_test, y_test_seq = utility.extract_flow_sequences(
        X_test, y_test, time_steps, None)

    logging.info('Extracting flows complete. time_taken = {:.2f} sec'.format(
        time.time() - t0))

    # One-hot encode class labels (needed as output layer has multiple nodes)
    y_list = [y_train_seq.flatten()]
    if y_val_seq is not None:
        y_list.append(y_val_seq.flatten())
    y_list.append(y_test_seq.flatten())
    all_y = np.hstack(y_list)

    label_encoder, all_y_enc = utility.encode_labels(all_y, encoder=None)
    unused, y_train_enc = utility.encode_labels(y_train_seq.flatten(),
                                                encoder=label_encoder)
    y_val_enc = None
    if y_val_seq is not None:
        unused, y_val_enc = utility.encode_labels(y_val_seq.flatten(),
                                                  encoder=label_encoder)
    unused, y_test_enc = utility.encode_labels(y_test_seq.flatten(),
                                               encoder=label_encoder)

    y_train_enc = y_train_enc.reshape(y_train_seq.shape[0],
                                      y_train_seq.shape[1], all_y_enc.shape[1])
    if y_val_seq is not None:
        y_val_enc = y_val_enc.reshape(y_val_seq.shape[0], y_val_seq.shape[1],
                                      all_y_enc.shape[1])
    y_test_enc = y_test_enc.reshape(y_test_seq.shape[0], y_test_seq.shape[1],
                                    all_y_enc.shape[1])

    # batch_size * time_steps in the prepared seq
    train_num_flows = X_train.shape[0] * X_train.shape[1]
    test_num_flows = X_test.shape[0] * X_test.shape[1]

    y_values = None
    if y_val is not None:
        val_num_flows = X_val.shape[0] * X_val.shape[1]
        y_values = y_val[0:val_num_flows]

    datasets_orig = (X_train, y_train[0:train_num_flows]), (X_val, y_values), (
        X_test, y_test[0:test_num_flows])

    datasets_enc = (X_train, y_train_enc), (X_val, y_val_enc), (X_test,
                                                                y_test_enc)

    return datasets_orig, datasets_enc, label_encoder
Exemple #5
0
def train_model():
    # Init device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Initialize a new wandb run
    wandb.init()

    # Config is a variable that holds and saves hyperparameters and inputs
    config = wandb.config

    # Load the meta data file
    df = pd.read_csv('data/train.csv', )
    df = df.drop(['timestamp'], axis=1)
    df, _ = utility.encode_labels(df)
    num_classes = len(df['label'].value_counts())

    # Build the dataset
    train_loader, valid_loader = dl.get_train_valid_loader(
        df,
        data_dir='data/train_images',
        batch_size=config.batch_size,
        image_size=IMAGE_SIZE,
        augment=True,
        random_seed=0)

    # Make resnet
    model = utility.initialize_net(num_classes, config.resnet_type,
                                   config.use_feature_extract)
    model = model.to(device)

    # Gather the parameters to be optimized/updated in this run.
    params_to_update = utility.get_model_params_to_train(
        model, config.use_feature_extract)

    # Define criterion + optimizer
    criterion = nn.CrossEntropyLoss()

    if config.optimizer == 'sgd':
        optimizer = optim.SGD(params_to_update, lr=config.learning_rate)
    elif config.optimizer == 'rmsprop':
        optimizer = optim.RMSprop(params_to_update, lr=config.learning_rate)
    elif config.optimizer == 'adam':
        optimizer = optim.Adam(params_to_update, lr=config.learning_rate)

    # Define scheduler
    scheduler = optim.lr_scheduler.OneCycleLR(
        optimizer=optimizer,
        max_lr=10,
        epochs=config.epochs,
        anneal_strategy=config.scheduler,
        steps_per_epoch=len(train_loader))

    trainer.train_model(device=device,
                        model=model,
                        optimizer=optimizer,
                        criterion=criterion,
                        train_loader=train_loader,
                        valid_loader=valid_loader,
                        scheduler=scheduler,
                        epochs=config.epochs,
                        send_to_wandb=True)
Exemple #6
0
def main():
    # Load the meta data file
    df = pd.read_csv('./data/train.csv')
    df, label_encoder = utility.encode_labels(df)
    num_classes = len(df['label'].value_counts())
    np.save('./data/label_encoder_classes.npy', label_encoder.classes_)

    # Generate the ZCA matrix if enabled
    for image_size in IMAGE_SIZES:  # train for every res
        if APPLY_ZCA_TRANS:
            print("Making ZCA matrix ...")
            data_loader = dl.get_full_data_loader(df,
                                                  data_dir=DATA_DIR,
                                                  batch_size=BATCH_SIZE,
                                                  image_size=image_size)
            train_dataset_arr = next(iter(data_loader))[0].numpy()
            zca = utility.ZCA()
            zca.fit(train_dataset_arr)
            zca_dic = {"zca_matrix": zca.ZCA_mat, "zca_mean": zca.mean}
            savemat("./data/zca_data.mat", zca_dic)
            print("Completed making ZCA matrix")

        # Define normalization
        normalize = transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
        )

        # Define specific transforms
        train_transform = transforms.Compose([
            utility.AddPadding(),
            transforms.Resize((image_size, image_size)),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomRotation(degrees=(-90, 90)),
            transforms.RandomVerticalFlip(p=0.5),
            transforms.ColorJitter(.4, .4, .4),
            transforms.ToTensor(), normalize
        ])
        valid_transform = transforms.Compose([
            utility.AddPadding(),
            transforms.Resize((image_size, image_size)),
            transforms.ToTensor(), normalize
        ])

        # Create a train and valid dataset
        train_dataset = dl.HotelImagesDataset(df,
                                              root_dir=DATA_DIR,
                                              transform=train_transform)
        valid_dataset = dl.HotelImagesDataset(df,
                                              root_dir=DATA_DIR,
                                              transform=valid_transform)

        # Get a train and valid data loader
        train_loader, valid_loader = dl.get_train_valid_loader(
            train_dataset, valid_dataset, batch_size=BATCH_SIZE, random_seed=0)
        for net_type in NETS:  # train for every net
            model = utility.initialize_net(num_classes,
                                           net_type,
                                           feature_extract=FEATURE_EXTRACT)

            # If old model exists, take state from it
            if path.exists(f"./models/model_{net_type}.pt"):
                print("Resuming training on trained model ...")
                model = utility.load_latest_model(
                    model, f'./models/model_{net_type}.pt')

            # Gather the parameters to be optimized/updated in this run.
            params_to_update = utility.get_model_params_to_train(
                model, FEATURE_EXTRACT)

            # Send model to GPU
            device = torch.device(
                "cuda" if torch.cuda.is_available() else "cpu")
            model = model.to(device)

            # Make criterion
            criterion = nn.CrossEntropyLoss()

            # Make optimizer + scheduler
            optimizer = torch.optim.SGD(params_to_update,
                                        lr=0.01,
                                        momentum=0.9)
            scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                                   mode='min',
                                                                   factor=0.01,
                                                                   patience=3)

            trained_model = trainer.train_model(
                device=device,
                model=model,
                optimizer=optimizer,
                criterion=criterion,
                train_loader=train_loader,
                valid_loader=valid_loader,
                scheduler=scheduler,
                net_type=net_type,
                epochs=EPOCHS,
                apply_zca_trans=APPLY_ZCA_TRANS)

            utility.save_current_model(trained_model,
                                       f"./models/model_{net_type}.pt")
Exemple #7
0
def main(classes_num=20, gid=0, random_state=0, \
            bs=100, learn_rate=0.0001, \
            val_num=1, stop_num=20,
            origin=True, vocal=True, remix=True,
            CRNN_model=True, CRNNx2_model=False,
            debug=False):

    start_time = time.time()

    save_folder = '../save/'+str(random_state)+'/'

    if origin and vocal and remix:
        save_folder = save_folder + '/all/'
    elif origin:
        save_folder = save_folder + '/ori/'
    elif vocal:
        save_folder = save_folder + '/voc/'
    elif remix:
        save_folder = save_folder + '/remix/'

    if not os.path.exists(save_folder+'/model/'):
        os.makedirs(save_folder+'/model/')
    if not os.path.exists(save_folder+'/result/'):
        os.makedirs(save_folder+'/result/')

    epoch_num = 10000

    print('Loading pretrain model ...')

    # Classifier = model.CRNN2D_elu(224,classes_num)
    # Classifier.float()
    # Classifier.cuda()
    # Classifier.train()

    if CRNN_model:
        Classifier = model.CRNN2D_elu(224,classes_num)
        Classifier.float()
        Classifier.cuda()
        Classifier.train()
    elif CRNNx2_model:
        Classifier = model.CRNN2D_elu2(288,classes_num)
        Classifier.float()
        Classifier.cuda()
        Classifier.train()

    print('Loading training data ...')

    artist_folder=f'/home/bill317996/189/homes/kevinco27/dataset/artist20_mix'
    song_folder=f'/home/bill317996/189/homes/kevinco27/ICASSP2020_meledy_extraction/music-artist-classification-crnn/song_data_mix'
    voc_folder=f'/home/bill317996/189/homes/kevinco27/ICASSP2020_meledy_extraction/music-artist-classification-crnn/song_data_open_unmix_vocal_2'
    bgm_folder = f'/home/bill317996/189/homes/kevinco27/ICASSP2020_meledy_extraction/music-artist-classification-crnn/song_data_open_unmix_kala'
    # random_states = [0,21,42]
    
    

    if debug:
        Y_train, X_train, S_train, V_train, B_train,\
        Y_test, X_test, S_test, V_test, B_test,\
        Y_val, X_val, S_val, V_val, B_val = \
        np.zeros(11437, dtype=int), np.zeros((11437, 128, 157)), np.zeros(11437), np.zeros((11437, 128, 157)), np.zeros((11437, 128, 157)), \
        np.zeros(11437, dtype=int), np.zeros((11437, 128, 157)), np.zeros(11437), np.zeros((11437, 128, 157)), np.zeros((11437, 128, 157)), \
        np.zeros(11437, dtype=int), np.zeros((11437, 128, 157)), np.zeros(11437), np.zeros((11437, 128, 157)), np.zeros((11437, 128, 157)) 

        Y_train[0] = 1
        Y_val[0] = 1
        Y_test[0] = 1
    else:
        Y_train, X_train, S_train, V_train, B_train,\
        Y_test, X_test, S_test, V_test, B_test,\
        Y_val, X_val, S_val, V_val, B_val = \
            utility.load_dataset_album_split_da(song_folder_name=song_folder,
                                             artist_folder=artist_folder,
                                             voc_song_folder=voc_folder,
                                             bgm_song_folder=bgm_folder,
                                             nb_classes=classes_num,
                                             random_state=random_state)

    if not debug:
        print("Loaded and split dataset. Slicing songs...")

        slice_length = 157

        # Create slices out of the songs
        X_train, Y_train, S_train, V_train, B_train = utility.slice_songs_da(X_train, Y_train, S_train, V_train, B_train,
                                                        length=slice_length)
        X_val, Y_val, S_val, V_val, B_val = utility.slice_songs_da(X_val, Y_val, S_val, V_val, B_val,
                                                  length=slice_length)
        X_test, Y_test, S_test, V_test, B_test = utility.slice_songs_da(X_test, Y_test, S_test, V_test, B_test,
                                                     length=slice_length)

        print("Training set label counts:", np.unique(Y_train, return_counts=True))

    

        # # Encode the target vectors into one-hot encoded vectors
        Y_train, le, enc = utility.encode_labels(Y_train)
        Y_test, le, enc = utility.encode_labels(Y_test, le, enc)
        Y_val, le, enc = utility.encode_labels(Y_val, le, enc)

        Y_train = Y_train[:,0]
        Y_test = Y_test[:,0]
        Y_val = Y_val[:,0]


    print(X_train.shape, Y_train.shape, S_train.shape, V_train.shape, B_train.shape)
    print(X_val.shape, Y_val.shape, S_val.shape, V_val.shape, B_val.shape)
    print(X_test.shape, Y_test.shape, S_test.shape, V_test.shape, B_test.shape)

    #####################################
    # numpy to tensor to data_loader
    # train

    X_train = torch.from_numpy(X_train).float()
    Y_train = torch.from_numpy(Y_train).long()
    V_train = torch.from_numpy(V_train).float()
    B_train = torch.from_numpy(B_train).float()

    if origin:
        original_set = Dataset_2(data_tensor=X_train, target_tensor=Y_train)
        original_loader = Data.DataLoader(dataset=original_set, batch_size=bs, shuffle=True)
    if vocal or remix:
        vocal_set = Dataset_2(data_tensor=V_train, target_tensor=Y_train)
        vocal_loader = Data.DataLoader(dataset=vocal_set, batch_size=bs, shuffle=True)
    if remix:
        bgm_set = Dataset_1(data_tensor=B_train)
        bgm_loader = Data.DataLoader(dataset=bgm_set, batch_size=bs, shuffle=True)

    # val
    if vocal and not origin:
        X_val = torch.from_numpy(V_val).float()
        Y_val = torch.from_numpy(Y_val).long()
    else:
        X_val = torch.from_numpy(X_val).float()
        Y_val = torch.from_numpy(Y_val).long()

    val_set = Dataset_2(data_tensor=X_val, target_tensor=Y_val)
    val_loader = Data.DataLoader(dataset=val_set, batch_size=bs, shuffle=False)

    # Test

    X_test = torch.from_numpy(X_test).float()
    Y_test = torch.from_numpy(Y_test).long()
    V_test = torch.from_numpy(V_test).float()

    test_o_set = Dataset_4(data_tensor=X_test, target_tensor1=Y_test, target_tensor2=S_test, target_tensor3=V_test)
    test_o_loader = Data.DataLoader(dataset=test_o_set, batch_size=bs, shuffle=False)

    # test_v_set = Dataset_3(data_tensor=V_test, target_tensor1=Y_test, target_tensor2=S_test)
    # test_v_loader = Data.DataLoader(dataset=test_v_set, batch_size=bs, shuffle=False)

    #####################################

    best_epoch = 0
    best_F1 = 0

    CELoss = nn.CrossEntropyLoss()

    opt = optim.Adam(Classifier.parameters(),lr=learn_rate)

    print('Start training ...')

    # start_time = time.time()
    early_stop_flag = False
    for epoch in range(epoch_num):
        if early_stop_flag:
            print('rs: ', random_state)
            print('Origin: ', origin, ' | Vocal: ', vocal, ' | Remix: ', remix)
            print('CRNN: ', CRNN_model, ' | CRNNx2: ', CRNNx2_model)
            print('     best_epoch: ', best_epoch, ' | best_val_F1: %.2f'% best_F1)
            print('     Test original | frame level: %.2f'% test_F1_frame_o, ' | songs level: %.2f'% test_F1_songs_o)
            if vocal:
                print('     Test vocal | frame level: %.2f'% test_F1_frame_v, ' | songs level: %.2f'% test_F1_songs_v)
            break
        if stop_num:
            if epoch - best_epoch >= stop_num:
                early_stop_flag = True
                print('Early Stop!')
        all_loss = 0
        Classifier.train()

        if origin:
            for step, (batch_x, batch_y) in enumerate(original_loader):
                
                opt.zero_grad()

                batch_x = batch_x.cuda()
                batch_y = batch_y.cuda()
                batch_h = torch.randn(1, batch_x.size(0), 32).cuda()
                
                
                pred_y, emb = Classifier(batch_x, batch_h)

                
                loss = CELoss(pred_y, batch_y)
                
                loss.backward()
                opt.step()

                all_loss += loss
        if vocal:
            for step, (batch_x, batch_y) in enumerate(vocal_loader):
                
                opt.zero_grad()

                batch_x = batch_x.cuda()
                batch_y = batch_y.cuda()
                batch_h = torch.randn(1, batch_x.size(0), 32).cuda()
                

                pred_y, emb = Classifier(batch_x, batch_h)


                loss = CELoss(pred_y, batch_y)

                loss.backward()
                opt.step()

                all_loss += loss
        if remix:
            for step, ((batch_x, batch_y), batch_b) in enumerate(zip(vocal_loader,bgm_loader)):
                
                opt.zero_grad()

                batch_x = batch_x.cuda()
                batch_y = batch_y.cuda()
                batch_h = torch.randn(1, batch_x.size(0), 32).cuda()
                batch_b = batch_b.cuda()

                batch_x = 10.0*torch.log10((10.0**(batch_x/10.0)) + (10.0**(batch_b/10.0)))
                
                pred_y, emb = Classifier(batch_x, batch_h)

                loss = CELoss(pred_y, batch_y)

                loss.backward()
                opt.step()

                all_loss += loss

        print('epoch: ', epoch, ' | Loss: %.4f'% all_loss, ' | time: %.2f'% (time.time()-start_time), '(s)')
        start_time = time.time()
        if epoch % val_num == 0:

            Classifier.eval()

            frame_true = []
            frame_pred = []

            for step, (batch_x, batch_y) in enumerate(val_loader):
                
                batch_x = batch_x.cuda()
                batch_y = batch_y.cuda()
                batch_h = torch.randn(1, batch_x.size(0), 32).cuda()
                
                pred_y, emb = Classifier(batch_x, batch_h)

                pred_y = pred_y.detach().cpu().numpy()
                batch_y = batch_y.detach().cpu().numpy()

                for i in range(len(pred_y)):               
                    frame_true.append(batch_y[i])
                    frame_pred.append(np.argmax(pred_y[i]) )
                
            val_F1 = f1_score(frame_true, frame_pred, average='weighted')
            print('     val F1: %.2f'% val_F1)
            
                
            if best_F1 < val_F1:
                best_F1 = val_F1
                best_epoch = epoch

                print('     best_epoch: ', best_epoch, ' | best_val_F1: %.2f'% best_F1)

                torch.save({'Classifier_state_dict': Classifier.state_dict()
                            }, save_folder+'/model/CRNN2D_elu_model_state_dict')

                frame_true = []
                frame_pred = []

                songs_true = []
                songs_pred = []

                songs_list = []

                songs_vote_dict = {}
                songs_true_dict = {}

                emb_list = []

                for step, (batch_x, batch_y, batch_song, batch_v) in enumerate(test_o_loader):
                    
                    batch_x = batch_x.cuda()
                    batch_y = batch_y.cuda()
                    batch_h = torch.randn(1, batch_x.size(0), 32).cuda()

                    pred_y, emb = Classifier(batch_x, batch_h)

                    pred_y = pred_y.detach().cpu().numpy()
                    batch_y = batch_y.detach().cpu().numpy()
                    emb = emb.detach().cpu().numpy()
                    batch_v = batch_v.detach().cpu().numpy()

                    for i in range(len(pred_y)):                
                        frame_true.append(batch_y[i])
                        frame_pred.append(np.argmax(pred_y[i]))

                        emb_list.append(emb[i])
                        
                        

                        onehot = np.zeros(20)
                        onehot[np.argmax(pred_y[i])] += 1

                        if batch_song[i] not in songs_list:
                            songs_list.append(batch_song[i])
                            songs_true_dict[batch_song[i]] = batch_y[i]
                            songs_vote_dict[batch_song[i]] = onehot

                        else:
                            songs_vote_dict[batch_song[i]] += onehot

                for song in songs_list:
                    songs_true.append(songs_true_dict[song])
                    songs_pred.append(np.argmax(songs_vote_dict[song]))

                np.savez(save_folder+'/result/ori_result.npz', \
                    pred=np.array(frame_pred), true=np.array(frame_true), emb=np.array(emb_list))

                    
                test_F1_frame_o = f1_score(frame_true, frame_pred, average='weighted')
                test_F1_songs_o = f1_score(songs_true, songs_pred, average='weighted')

                print('     Test original | frame level: %.2f'% test_F1_frame_o, ' | songs level: %.2f'% test_F1_songs_o)

                if vocal:
                    frame_true = []
                    frame_pred = []

                    songs_true = []
                    songs_pred = []

                    songs_list = []

                    songs_vote_dict = {}
                    songs_true_dict = {}

                    for step, (batch_x, batch_y, batch_song, batch_v) in enumerate(test_o_loader):
                        
                        batch_x = batch_v.cuda()
                        batch_y = batch_y.cuda()
                        batch_h = torch.randn(1, batch_x.size(0), 32).cuda()

                        pred_y, emb = Classifier(batch_x, batch_h)

                        pred_y = pred_y.detach().cpu().numpy()
                        batch_y = batch_y.detach().cpu().numpy()

                        for i in range(len(pred_y)):                
                            frame_true.append(batch_y[i])
                            frame_pred.append(np.argmax(pred_y[i]))

                            onehot = np.zeros(20)
                            onehot[np.argmax(pred_y[i])] += 1
                            
                            if batch_song[i] not in songs_list:
                                songs_list.append(batch_song[i])
                                songs_true_dict[batch_song[i]] = batch_y[i]
                                songs_vote_dict[batch_song[i]] = onehot
                            else:
                                songs_vote_dict[batch_song[i]] += onehot

                    for song in songs_list:
                        songs_true.append(songs_true_dict[song])
                        songs_pred.append(np.argmax(songs_vote_dict[song]))
                        
                    test_F1_frame_v = f1_score(frame_true, frame_pred, average='weighted')
                    test_F1_songs_v = f1_score(songs_true, songs_pred, average='weighted')
                    print('     Test vocal | frame level: %.2f'% test_F1_frame_v, ' | songs level: %.2f'% test_F1_songs_v)