batch_size=len(idx2song),
            pl_length=dp_train.out_size - 1,
            feat_dim=model.FEAT_DIM,
        )

        # initialize network
        my_net = Network(net)

        # load previously fit parameters
        params_file = os.path.join(out_dir, '{}_params.pkl'.format(model.name))
        my_net.load(file_path=params_file)

        # shape withheld continuations for evaluation
        _, Y_cont = shape_data(playlists_idx,
                               songs_idx,
                               idx2song,
                               features,
                               subset=cont_idx)

        # songs in the "query" playlists need to be masked to make sure that
        # they are not recommended as continuations
        _, Y_query = shape_data(playlists_idx,
                                songs_idx,
                                idx2song,
                                features,
                                subset=query_idx)

        # get num of song occurrences when model was fit for cold-start analysis
        # Y_fit = Y_query
        train_occ = np.asarray(Y_query.sum(axis=1)).flatten()
    # provide data information
    show_data_splits(playlists_idx, songs_idx, idx2song, song2artist,
                     train_idx, valid_idx, fit_idx, query_idx, cont_idx)

    # provide model information
    print('\nNetwork:')
    show_design(model)

    if args.train:
        #
        # train the hybrid model while validating on withheld playlists
        #

        # prepare input song features and playlist targets at training
        X_train, Y_train = shape_data(
            playlists_idx, songs_idx, idx2song, features,
            mode='train', subset=train_idx
        )

        # prepare input song features and playlist targets at validation
        X_valid, Y_valid = shape_data(
            playlists_idx, songs_idx, idx2song, features,
            mode='test', subset=valid_idx
        )

        # preprocess input features if required
        # use the training song features to standardize the validation data
        if model.standardize:
            scaler = prep.RobustScaler()
            X_train = scaler.fit_transform(X_train)
            X_valid = scaler.transform(X_valid)
Beispiel #3
0
        query_idx = np.intersect1d(test_idx_dsj, train_idx_cnt)
        cont_idx = np.intersect1d(test_idx_dsj, test_idx_cnt)

        # provide data information
        show_data_splits(playlists_idx, songs_idx, idx2song, song2artist,
                         train_idx, valid_idx, fit_idx, query_idx, cont_idx)

        #
        # extend the playlists in the query split and evaluate the
        # continuations by comparing them to actual withheld continuations
        #

        # prepare song-playlist matrix in test continuations
        _, Y_cont = shape_data(playlists_idx,
                               songs_idx,
                               idx2song=None,
                               features=None,
                               subset=cont_idx)

        # prepare song-playlist matrix in test queries
        # used to mask songs from queries
        _, Y_query = shape_data(playlists_idx,
                                songs_idx,
                                idx2song=None,
                                features=None,
                                subset=query_idx)

        # prepare song-playlist matrix in fit playlists
        _, Y_fit = shape_data(playlists_idx,
                              songs_idx,
                              idx2song=None,
        print('\nMatrix Factorization:')
        show_design(model)

        if args.train:
            #
            # train the factorization while validating on withheld playlists
            # for tuning purposes, one fold is enough
            #

            if fold > 0:
                print('\nI won\'t train anymore. Just did it for one fold.')
                break

            # prepare song-playlist matrix in training
            _, Y_train = shape_data(
                playlists_idx, songs_idx, idx2song=None, features=None,
                subset=train_idx
            )

            # prepare song-playlist matrix in validation
            _, Y_valid = shape_data(
                playlists_idx, songs_idx, idx2song=None, features=None,
                subset=valid_idx
            )

            # train the model
            train(
                model=model,
                train_target=Y_train,
                valid_target=Y_valid,
                out_dir=out_dir,
                use_gpu=args.use_gpu