Beispiel #1
0

if __name__ == '__main__':

    segment_size_list = [18, 27, 54]
    n_inputs = 512 + 512 + 768
    local_models = []
    for segment_size in segment_size_list:
        loc_model = local_model(segment_size).cuda()
        loc_model.load_state_dict(
            torch.load(
                os.path.join(ENCODER_FOLDER,
                             'local_model_' + str(segment_size) + '.pt')))
        loc_model.eval()
        local_models.append(loc_model)
    model = global_model(n_inputs, 512).cuda()
    model.load_state_dict(
        torch.load(
            os.path.join(ENCODER_FOLDER, 'global_model_18_27_54_9051_123.pt')))

    db = pd.read_csv(CLIP_INFO_FILE, sep="\t")
    description_dict = json.load(
        open(os.path.join(AWS_FOLDER, 'descriptions.json')))

    for name in ['train', 'test', 'valid']:
        handle_set(name, output=True)

    # test_list_pub = pickle.load(open(os.path.join(MTAT_SPLIT_FOLDER, 'test_list_pub.cP'), 'rb'))
    # found_descriptions, found_paths, idx_to_word, word_to_idx = handle_list(test_list_pub, description_dict, db)
    #
    # valid_list_pub = pickle.load(open(os.path.join(MTAT_SPLIT_FOLDER, 'valid_list_pub.cP'), 'rb'))
Beispiel #2
0
def train_with_audio():
    indexer = json.load(open(os.path.join(REVIEWS_FOLDER, 'indexer.json')))
    reverse_indexer = json.load(
        open(os.path.join(REVIEWS_FOLDER, 'reverse_indexer.json')))
    vocab_size = len(indexer)

    language_model = LanguageModel(vocab_size, embedding_dim, hidden_dim,
                                   music_dim).cuda()
    language_model.load_state_dict(
        torch.load(os.path.join(DECODER_FOLDER, 'LanguageModel_4.pt')))
    language_model.train()
    # filter(lambda p: p.requires_grad, language_model.parameters()))
    optimizer = torch.optim.Adam(
        filter(lambda p: p.requires_grad, language_model.parameters()))
    loss_function = nn.CrossEntropyLoss()

    n_inputs = 512 + 512 + 768
    segment_size_list = [18, 27, 54]
    local_models = []
    for segment_size in segment_size_list:
        loc_model = local_model(segment_size).cuda()
        loc_model.load_state_dict(
            torch.load(
                os.path.join(ENCODER_FOLDER,
                             'local_model_' + str(segment_size) + '.pt')))
        loc_model.eval()
        local_models.append(loc_model)
    model = global_model(n_inputs, 512).cuda()
    model.load_state_dict(
        torch.load(
            os.path.join(ENCODER_FOLDER, 'global_model_18_27_54_9051_123.pt')))
    model.eval()

    id7d_to_path = pickle.load(
        open(os.path.join(MSD_SPLIT_FOLDER, '7D_id_to_path.pkl'), 'rb'))
    idmsd_to_id7d = pickle.load(
        open(os.path.join(MSD_SPLIT_FOLDER, 'MSD_id_to_7D_id.pkl'), 'rb'))

    pairs = json.load(open(os.path.join(MSD_SPLIT_FOLDER, 'pairs.json')))

    for e in range(epochs):

        start_time = time()
        epoch_loss = 0
        batch_number = 0
        song_number = 0

        for track_id, value in pairs.items():
            song_number += 1

            if song_number < 6:
                print("for testing")
                continue

            review = value['review']
            review = np.expand_dims(np.array(review), axis=0)

            try:
                npy_path = os.path.join(
                    MSD_NPY_FOLDER + 'new_pitchfork',
                    id7d_to_path[idmsd_to_id7d[track_id]][:-9] + '.npy')
                X = np.load(npy_path)
            except KeyError:
                print("No key?")
                continue
            except FileNotFoundError:
                print("No audio?")
                continue

            try:
                X = torch.from_numpy(X[:, :1255]).unsqueeze(0)
                X = Variable(X).cuda().float()
                X = torch.cat([loc_model(X)[1] for loc_model in local_models],
                              dim=1)
                _, music = model(X)
            except:
                print("Weird song (too short?)")
                print(X.shape)
                continue

            train_loader = MyLoader(review)
            for in_data, out_data in train_loader:
                batch_number += 1
                optimizer.zero_grad()
                X = Variable(torch.from_numpy(in_data).long()).cuda()
                Y = Variable(torch.from_numpy(out_data).long()).cuda()
                expanded_music = music.unsqueeze(1).expand(
                    X.shape[0], X.shape[1], -1).contiguous()
                out = language_model(X, expanded_music)
                out = out.view(out.shape[0] * out.shape[1], out.shape[2])
                Y = Y.view(-1)
                loss = loss_function(out, Y)
                loss.backward(retain_graph=True)
                optimizer.step()
                epoch_loss += loss.data.item()

        total_loss = epoch_loss / batch_number
        print("Epoch: {0}, train_loss: {1:.8f}, time: {2:.4f}".format(
            e + 1, total_loss,
            time() - start_time))
        generate_sample(language_model, cuda=True)
        torch.save(
            language_model.state_dict(),
            os.path.join(DECODER_FOLDER,
                         'LanguageModel_audio_' + str(e) + '.pt'))

    conn_pf.close()
def train(segment_size_list):
    if train_dataset == 'MTAT':
        train_list_pub = pickle.load(
            open(os.path.join(MTAT_SPLIT_FOLDER, 'train_list_pub.cP'), 'rb'))
    if train_dataset == 'MSD':
        id7d_to_path = pickle.load(open(os.path.join(MSD_SPLIT_FOLDER, '7D_id_to_path.pkl'), 'rb'))
        idmsd_to_id7d = pickle.load(
            open(os.path.join(MSD_SPLIT_FOLDER, 'MSD_id_to_7D_id.pkl'), 'rb'))
        train_list_pub_id = pickle.load(
            open(os.path.join(MSD_SPLIT_FOLDER, 'filtered_list_train.cP'), 'rb'))
        train_list_pub = [id7d_to_path[idmsd_to_id7d[song]][:-9] + '.npy' for song in train_list_pub]
    total_train_size = len(train_list_pub)
    index = list(range(total_train_size))

    combined = list(zip(train_list_pub, index))
    random.seed(seed)
    random.shuffle(combined)
    train_list_pub[:], index[:] = zip(*combined)

    n_inputs = 0
    for segment_size in segment_size_list:
        if segment_size == 18:
            n_inputs += 512
        if segment_size == 27:
            n_inputs += 512
        if segment_size == 54:
            n_inputs += 768
        if segment_size == 108:
            n_inputs += 1024
        if segment_size == 216:
            n_inputs += 1280

    local_models = []
    for segment_size in segment_size_list:
        loc_model = local_model(segment_size).cuda()
        loc_model.load_state_dict(os.path.join(ENCODER_FOLDER, torch.load('local_model_' + str(segment_size) + '.pt')))
        loc_model.eval()
        local_models.append(loc_model)
    model = global_model(n_inputs, 512).cuda()
    optimizer = torch.optim.SGD(
        model.parameters(),
        lr=learning_rate,
        momentum=momentum,
        nesterov=True
    )
    loss_function = nn.MultiLabelSoftMarginLoss()

    for start in range(0, total_train_size, n_songs):
        print("Loading datasets...", start)
        if train_dataset == 'MTAT':
            train_features = np.concatenate(
                [np.load(os.path.join(MTAT_NPY_FOLDER, 'training/' + train_list_pub[i])) for i in
                 range(start, min(start + n_songs, total_train_size))])
            train_labels = np.load(
                os.path.join(MTAT_SPLIT_FOLDER, 'y_train_pub.npy'))[
                [index[i] for i in range(start, min(start + n_songs, total_train_size))]]
        if train_dataset == 'MSD':
            train_features = np.concatenate(
                [np.expand_dims(np.load(os.path.join(MSD_NPY_FOLDER, 'testing/' + train_list_pub[i]))[:, :1255], axis=0)
                 for i in range(start, min(start + n_songs, total_train_size))])
            idmsd_to_tag = pickle.load(
                open(os.path.join(MSD_SPLIT_FOLDER, 'msd_id_to_tag_vector.cP'), 'rb'))
            train_labels = np.concatenate(
                [idmsd_to_tag[idmsd] for idmsd in train_list_pub_id[start:min(start + n_songs, total_train_size)]],
                axis=1)
        if normalization:
            mean = np.mean(train_features, axis=0)
            var = np.var(train_features, axis=0)
            train_features = (train_features - mean) / np.sqrt(var)

        train_data = CustomDataset(train_features, train_labels)
        train_size = len(train_data)
        train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size)
        print("Datasets loaded")

        print("Begin training...")
        for e in range(epochs):
            epoch_loss = 0
            correct = 0
            batch_number = 0
            model.train()
            for data, label in train_loader:
                batch_number += 1
                optimizer.zero_grad()
                X = Variable(data).cuda()
                X = torch.cat([loc_model(X)[1] for loc_model in local_models], dim=1)
                Y = Variable(label).cuda().float()
                out, _ = model(X)
                pred = (out.data > 0.50).float()
                predicted = pred.eq(Y.data.view_as(pred))
                correct += predicted.sum()
                loss = loss_function(out, Y)
                loss.backward()
                optimizer.step()
                epoch_loss += loss.data[0]
            total_loss = epoch_loss / batch_number
            train_accuracy = correct / (train_size * number_labels)
            # print("Epoch: {0}, loss: {1:.8f}".format(e+1, total_loss))
            # print("Epoch: {0}, train_accuracy: {1:.8f}".format(e+1, train_accuracy))

    print(number_tags)
    torch.save(model.state_dict(), os.path.join(ENCODER_FOLDER, 'global_model.pt'))
    print("Finished training")
def generate_sample(language_model,
                    forward=100,
                    width=15,
                    cuda=False,
                    with_gumbel=False,
                    gumbel_weight=1.0):
    pairs = json.load(open(os.path.join(MSD_SPLIT_FOLDER, 'pairs.json')))
    song_number = 0

    n_inputs = 512 + 512 + 768
    segment_size_list = [18, 27, 54]
    local_models = []
    for segment_size in segment_size_list:
        loc_model = local_model(segment_size)
        if cuda:
            loc_model = loc_model.cuda()
        loc_model.load_state_dict(
            torch.load(
                os.path.join(ENCODER_FOLDER,
                             'local_model_' + str(segment_size) + '.pt')))
        loc_model.eval()
        local_models.append(loc_model)
    model = global_model(n_inputs, 512)
    if cuda:
        model = model.cuda()
    model.load_state_dict(
        torch.load(
            os.path.join(ENCODER_FOLDER, 'global_model_18_27_54_9051_123.pt')))
    model.eval()

    for track_id, value in pairs.items():
        song_number += 1
        if song_number >= 6:
            break
        else:
            try:
                npy_path = os.path.join(
                    MSD_NPY_FOLDER + 'new_pitchfork',
                    id7d_to_path[idmsd_to_id7d[track_id]][:-9] + '.npy')
                X = np.load(npy_path)
            except KeyError:
                print("No key?")
                try:
                    if track_id == 'AROBTTH':
                        npy_path = os.path.join(MSD_NPY_FOLDER, '2975.npy')
                        X = np.load(npy_path)
                except:
                    print('A rush of blood to the head not found')
                    continue
            except FileNotFoundError:
                print(npy_path)
                print("No audio?")
                continue

            try:
                X = torch.from_numpy(X[:, :1255]).unsqueeze(0)
                X = Variable(X).float()
                if cuda:
                    X = X.cuda()
                X = torch.cat([loc_model(X)[1] for loc_model in local_models],
                              dim=1)
                _, music = model(X)
            except:
                print("Weird song (too short?)")
                print(X.shape)
                continue

            initialization = [0]
            initialization = np.expand_dims(np.array(initialization), axis=0)
            initialization = Variable(
                torch.from_numpy(np.transpose(initialization)).long())
            if cuda:
                # generated = torch.max(language_model.generate(initialization.cuda(), forward,
                #                        m=music, with_gumbel=with_gumbel, gumbel_weight=gumbel_weight), dim=2)[1].cpu().data.numpy()[0, :]
                generated = language_model.random_search(
                    initialization.cuda(),
                    forward,
                    width=width,
                    m=music,
                    with_gumbel=with_gumbel,
                    gumbel_weight=gumbel_weight).cpu().data.numpy()
            else:
                generated = torch.max(language_model.generate(
                    initialization,
                    forward,
                    m=music,
                    with_gumbel=with_gumbel,
                    gumbel_weight=gumbel_weight),
                                      dim=2)[1].data.numpy()[0, :]
            print('----------------')
            print(value['title'])
            print(value['album'])
            print(value['artist'])
            # print(' '.join([reverse_indexer[i] for i in value['review']]))
            print(' '.join([reverse_indexer[i] for i in generated]))
def test(segment_size_list):
    if test_dataset == 'MTAT':
        test_list_pub = pickle.load(open(os.path.join(MTAT_SPLIT_FOLDER, 'test_list_pub.cP'), 'rb'))
    if test_dataset == 'MSD':
        id7d_to_path = pickle.load(open(os.path.join(MSD_SPLIT_FOLDER, '7D_id_to_path.pkl'), 'rb'))
        idmsd_to_id7d = pickle.load(
            open(os.path.join(MSD_SPLIT_FOLDER, 'MSD_id_to_7D_id.pkl'), 'rb'))
        test_list_pub_id = pickle.load(
            open(os.path.join(MSD_SPLIT_FOLDER, 'filtered_list_test.cP'), 'rb'))
        test_list_pub = [id7d_to_path[idmsd_to_id7d[song]][:-9] + '.npy' for song in test_list_pub_id]
        del id7d_to_path, idmsd_to_id7d

    total_test_size = len(test_list_pub)

    n_inputs = 0
    for segment_size in segment_size_list:
        if segment_size == 18:
            n_inputs += 512
        if segment_size == 27:
            n_inputs += 512
        if segment_size == 54:
            n_inputs += 768
        if segment_size == 108:
            n_inputs += 1024
        if segment_size == 216:
            n_inputs += 1280

    local_models = []
    for segment_size in segment_size_list:
        loc_model = local_model(segment_size).cuda()
        loc_model.load_state_dict(os.path.join(ENCODER_FOLDER, torch.load('local_model_' + str(segment_size) + '.pt')))
        loc_model.eval()
        local_models.append(loc_model)
    model = global_model(n_inputs, 512).cuda()
    model.load_state_dict(torch.load(os.path.join(ENCODER_FOLDER, 'global_model_18_27_54_9051_123.pt')))
    model.eval()
    auc = AUCMeter()

    for start in range(0, total_test_size, n_songs):
        print("Loading dataset...", start)
        if test_dataset == 'MTAT':
            test_features = np.concatenate(
                [np.load(os.path.join(MTAT_NPY_FOLDER, 'testing/' + test_list_pub[i])) for i in
                 range(start, min(start + n_songs, total_test_size))])
            test_labels = np.load(
                os.path.join(MTAT_SPLIT_FOLDER, 'y_test_pub.npy'))[start:min(start + n_songs, total_test_size)]
        if test_dataset == 'MSD':
            test_features = np.concatenate(
                [np.expand_dims(np.load(os.path.join(MSD_NPY_FOLDER, 'testing/' + test_list_pub[i]))[:, :1255], axis=0)
                 for i in range(start, min(start + n_songs, total_test_size))])
            idmsd_to_tag = pickle.load(
                open(os.path.join(MSD_SPLIT_FOLDER, 'msd_id_to_tag_vector.cP'), 'rb'))
            test_labels = np.concatenate(
                [idmsd_to_tag[idmsd] for idmsd in test_list_pub_id[start:min(start + n_songs, total_test_size)]],
                axis=1)

        if normalization:
            mean = np.mean(test_features, axis=0)
            var = np.var(test_features, axis=0)
            test_features = (test_features - mean) / np.sqrt(var)

        test_data = CustomDataset(test_features, test_labels)
        test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size)
        print("Dataset loaded")

        for data, labels in test_loader:
            X = Variable(data).cuda()
            X = torch.cat([loc_model(X)[1] for loc_model in local_models], dim=1)
            out, _ = model(X)
            auc_out = np.reshape(out.data.cpu().numpy(), -1)
            auc_target = np.reshape(labels, -1)
            auc.add(auc_out, auc_target)

        del test_features, test_labels, test_data, test_loader

    auc_tuple = auc.value()
    print("AUC = ", auc_tuple[0])