コード例 #1
0
def load_piece_list(piece_names, aug_config=NO_AUGMENT):
    """
    Collect piece data
    """
    all_images = []
    all_specs = []
    all_o2c_maps = []
    #comment changed range from len(piece_names) to 2
    for ip in tqdm(2, ncols=70):
        piece_name = piece_names[ip]

        try:
            image, specs, o2c_maps = prepare_piece_data(DATA_ROOT_MSMD, piece_name,
                                                        aug_config=aug_config, require_audio=False)
        except:
            print("Problems with loading piece %s" % piece_name)
            print(sys.exc_info()[0])
            continue

        # keep stuff
        all_images.append(image)
        all_specs.append(specs)
        all_o2c_maps.append(o2c_maps)

    return all_images, all_specs, all_o2c_maps
コード例 #2
0
    def initialize_sheet_db(self, pieces, keep_snippets=False):
        """ init sheet music data base """
        print("Initializing sheet music db ...")

        self.id_to_piece = dict()
        self.sheet_snippet_ids = np.zeros(0, dtype=np.int)
        self.sheet_snippet_codes = np.zeros((0, self.embed_network.code_dim),
                                            dtype=np.float32)
        self.sheet_snippets = np.zeros(
            (0, self.snippet_shape[0] // 2, self.snippet_shape[1] // 2),
            dtype=np.uint8)

        # initialize retrieval pool
        for piece_idx, piece in enumerate(pieces):
            print(" (%03d / %03d) %s" % (piece_idx + 1, len(pieces), piece))

            # load piece
            self.id_to_piece[piece_idx] = piece
            piece_image, piece_specs, piece_o2c_maps = prepare_piece_data(
                ROOT_DIR, piece, require_audio=False)

            # initialize data pool with piece
            piece_pool = AudioScoreRetrievalPool([piece_image], [piece_specs],
                                                 [piece_o2c_maps],
                                                 data_augmentation=NO_AUGMENT,
                                                 shuffle=False)

            # embed sheet snippets of piece
            snippets = np.zeros((piece_pool.shape[0], 1, self.sheet_shape[0],
                                 self.sheet_shape[1]),
                                dtype=np.uint8)
            for i in range(piece_pool.shape[0]):

                # get image snippet
                snippet, _ = piece_pool[i:i + 1]
                snippet = snippet[0, 0]
                snippets[i, 0] = snippet

                # keep sheet snippets
                if keep_snippets:
                    snippet = resize_image(snippet, rsz=0.5).astype(
                        np.uint8)[np.newaxis]
                    self.sheet_snippets = np.concatenate(
                        (self.sheet_snippets, snippet))

            # compute sheet snippet codes
            codes = self.embed_network.compute_view_1(snippets)

            # keep codes
            self.sheet_snippet_codes = np.concatenate(
                (self.sheet_snippet_codes, codes))

            # save id of piece
            piece_ids = np.ones(piece_pool.shape[0], dtype=np.int) * piece_idx
            self.sheet_snippet_ids = np.concatenate(
                (self.sheet_snippet_ids, piece_ids))

        print("%s sheet snippet codes of %d pieces collected" %
              (self.sheet_snippet_codes.shape[0], len(pieces)))
コード例 #3
0
    def initialize_audio_db(self, pieces, augment, keep_snippets=False):
        """ init audio data base """
        print("Initializing audio db ...")

        self.id_to_perform = dict()
        self.perform_excerpt_ids = np.zeros(0, dtype=np.int)
        self.perform_excerpt_codes = np.zeros((0, self.embed_network.code_dim),
                                              dtype=np.float32)
        self.perform_excerpts = np.zeros(
            (0, self.excerpt_shape[0] // 2, self.excerpt_shape[1] // 2),
            dtype=np.uint8)

        # initialize retrieval pool
        for piece_idx, piece in enumerate(pieces):
            print(" (%03d / %03d) %s" % (piece_idx + 1, len(pieces), piece))

            # load piece
            self.id_to_perform[piece_idx] = piece
            piece_image, piece_specs, piece_o2c_maps = prepare_piece_data(
                ROOT_DIR, piece, aug_config=augment, require_audio=False)

            # initialize data pool with piece
            piece_pool = AudioScoreRetrievalPool([piece_image], [piece_specs],
                                                 [piece_o2c_maps],
                                                 data_augmentation=augment,
                                                 shuffle=False)

            # embed audio excerpt of piece
            excerpts = np.zeros((piece_pool.shape[0], 1, self.spec_shape[0],
                                 self.spec_shape[1]),
                                dtype=np.float32)
            for j in range(piece_pool.shape[0]):

                # get spectrogram excerpt
                _, spec = piece_pool[j:j + 1]
                excerpts[j, 0] = spec

                # TODO: keep excerpt snippets
                # don't know yet how much sense this makes
                if keep_snippets:
                    pass

            # compute audio excerpt code
            codes = self.embed_network.compute_view_2(excerpts)

            # keep code
            self.perform_excerpt_codes = np.concatenate(
                (self.perform_excerpt_codes, codes))

            # save id of piece
            piece_ids = np.ones(piece_pool.shape[0], dtype=np.int) * piece_idx
            self.perform_excerpt_ids = np.concatenate(
                (self.perform_excerpt_ids, piece_ids))

        print("%s audio excerpts of %d pieces collected" %
              (self.perform_excerpt_codes.shape[0], len(pieces)))
コード例 #4
0
    sheet_win_shape = model.INPUT_SHAPE_1[1:]
    spec_win_shape = model.INPUT_SHAPE_2[1:]

    # select pieces
    split = load_split(args.train_split)
    te_pieces = split["test"]
    # pieces = [te_pieces[te_pieces.index("BachJS__BWV790__bwv790")]]
    pieces = te_pieces

    # collect pixel errors for all pieces
    piece_pxl_errors = dict()

    for piece in pieces:

        print("\nTarget Piece: %s" % piece)
        piece_image, piece_specs, piece_o2c_maps = prepare_piece_data(
            ROOT_DIR, piece, aug_config=test_augment, require_audio=False)

        # initialize data pool with piece
        piece_pool = AudioScoreRetrievalPool([piece_image], [piece_specs],
                                             [piece_o2c_maps],
                                             data_augmentation=test_augment,
                                             shuffle=False)

        # compute spectrogram from file
        if args.real_audio:
            audio_file = "/home/matthias/cp/data/sheet_localization/real_music/0_real_audio/%s.flac" % piece
            if not os.path.exists(audio_file):
                continue
            spec = processor.process(audio_file).T
        # use pre-computed spectrogram
        else:
コード例 #5
0
        a2s_srv.initialize_audio_db(pieces=te_pieces, augment=test_augment, keep_snippets=False)
        a2s_srv.save_audio_db_file("audio_db_file.pkl")

    # load sheet music data base
    else:
        a2s_srv.load_audio_db_file("audio_db_file.pkl")

    # run full evaluation
    if args.full_eval:
        print(col.print_colored("\nRunning full evaluation:", col.UNDERLINE))

        ranks = []
        for tp in te_pieces:

            # load piece
            piece_image, _, _ = prepare_piece_data(ROOT_DIR, tp, aug_config=test_augment, require_audio=False)

            # detect piece from spectrogram
            ret_result, ret_votes = a2s_srv.detect_performance(piece_image, top_k=len(te_pieces), n_candidates=args.n_candidates, verbose=False)
            if tp in ret_result:
                rank = ret_result.index(tp) + 1
                ratio = ret_votes[ret_result.index(tp)]
            else:
                rank = len(ret_result)
                ratio = 0.0
            ranks.append(rank)
            color = col.OKBLUE if ranks[-1] == 1 else col.WARNING
            print(col.print_colored("rank: %02d (%.2f) " % (ranks[-1], ratio), color) + tp)

        # report results
        ranks = np.asarray(ranks)
コード例 #6
0
# path to MSMD data set
DATA_ROOT_MSMD = '/Users/abdelrahman/Desktop/Bachelor References/msmd_aug'

# for now we select just some pieces
# (this could be also the entire training set)
piece_names = [
    'BachCPE__cpe-bach-rondo__cpe-bach-rondo', 'BachJS__BWV259__bwv-259',
    'AdamA__giselle__giselle'
]

all_piece_images = []
all_piece_specs = []
all_piece_o2c_maps = []
for piece_name in piece_names:

    piece_image, piece_specs, piece_o2c_maps = prepare_piece_data(
        DATA_ROOT_MSMD, piece_name, require_audio=False)

    # keep stuff
    all_piece_images.append(piece_image)
    all_piece_specs.append(piece_specs)
    all_piece_o2c_maps.append(piece_o2c_maps)

# path to network parameters
param_file = "/Users/abdelrahman/Desktop/resultdumper/mutopia_ccal_cont_est_UV/params_all_split_mutopia_full_aug.pkl"

# this function is called before a snippet is fed to the network
# (for the present model it resizes the image by a factor of 2)
prepare_sheet_img = model.prepare

# initialize retrieval wrapper
embed_network = RetrievalWrapper(model,