def load_piece_list(piece_names, aug_config=NO_AUGMENT): """ Collect piece data """ all_images = [] all_specs = [] all_o2c_maps = [] #comment changed range from len(piece_names) to 2 for ip in tqdm(2, ncols=70): piece_name = piece_names[ip] try: image, specs, o2c_maps = prepare_piece_data(DATA_ROOT_MSMD, piece_name, aug_config=aug_config, require_audio=False) except: print("Problems with loading piece %s" % piece_name) print(sys.exc_info()[0]) continue # keep stuff all_images.append(image) all_specs.append(specs) all_o2c_maps.append(o2c_maps) return all_images, all_specs, all_o2c_maps
def initialize_sheet_db(self, pieces, keep_snippets=False): """ init sheet music data base """ print("Initializing sheet music db ...") self.id_to_piece = dict() self.sheet_snippet_ids = np.zeros(0, dtype=np.int) self.sheet_snippet_codes = np.zeros((0, self.embed_network.code_dim), dtype=np.float32) self.sheet_snippets = np.zeros( (0, self.snippet_shape[0] // 2, self.snippet_shape[1] // 2), dtype=np.uint8) # initialize retrieval pool for piece_idx, piece in enumerate(pieces): print(" (%03d / %03d) %s" % (piece_idx + 1, len(pieces), piece)) # load piece self.id_to_piece[piece_idx] = piece piece_image, piece_specs, piece_o2c_maps = prepare_piece_data( ROOT_DIR, piece, require_audio=False) # initialize data pool with piece piece_pool = AudioScoreRetrievalPool([piece_image], [piece_specs], [piece_o2c_maps], data_augmentation=NO_AUGMENT, shuffle=False) # embed sheet snippets of piece snippets = np.zeros((piece_pool.shape[0], 1, self.sheet_shape[0], self.sheet_shape[1]), dtype=np.uint8) for i in range(piece_pool.shape[0]): # get image snippet snippet, _ = piece_pool[i:i + 1] snippet = snippet[0, 0] snippets[i, 0] = snippet # keep sheet snippets if keep_snippets: snippet = resize_image(snippet, rsz=0.5).astype( np.uint8)[np.newaxis] self.sheet_snippets = np.concatenate( (self.sheet_snippets, snippet)) # compute sheet snippet codes codes = self.embed_network.compute_view_1(snippets) # keep codes self.sheet_snippet_codes = np.concatenate( (self.sheet_snippet_codes, codes)) # save id of piece piece_ids = np.ones(piece_pool.shape[0], dtype=np.int) * piece_idx self.sheet_snippet_ids = np.concatenate( (self.sheet_snippet_ids, piece_ids)) print("%s sheet snippet codes of %d pieces collected" % (self.sheet_snippet_codes.shape[0], len(pieces)))
def initialize_audio_db(self, pieces, augment, keep_snippets=False): """ init audio data base """ print("Initializing audio db ...") self.id_to_perform = dict() self.perform_excerpt_ids = np.zeros(0, dtype=np.int) self.perform_excerpt_codes = np.zeros((0, self.embed_network.code_dim), dtype=np.float32) self.perform_excerpts = np.zeros( (0, self.excerpt_shape[0] // 2, self.excerpt_shape[1] // 2), dtype=np.uint8) # initialize retrieval pool for piece_idx, piece in enumerate(pieces): print(" (%03d / %03d) %s" % (piece_idx + 1, len(pieces), piece)) # load piece self.id_to_perform[piece_idx] = piece piece_image, piece_specs, piece_o2c_maps = prepare_piece_data( ROOT_DIR, piece, aug_config=augment, require_audio=False) # initialize data pool with piece piece_pool = AudioScoreRetrievalPool([piece_image], [piece_specs], [piece_o2c_maps], data_augmentation=augment, shuffle=False) # embed audio excerpt of piece excerpts = np.zeros((piece_pool.shape[0], 1, self.spec_shape[0], self.spec_shape[1]), dtype=np.float32) for j in range(piece_pool.shape[0]): # get spectrogram excerpt _, spec = piece_pool[j:j + 1] excerpts[j, 0] = spec # TODO: keep excerpt snippets # don't know yet how much sense this makes if keep_snippets: pass # compute audio excerpt code codes = self.embed_network.compute_view_2(excerpts) # keep code self.perform_excerpt_codes = np.concatenate( (self.perform_excerpt_codes, codes)) # save id of piece piece_ids = np.ones(piece_pool.shape[0], dtype=np.int) * piece_idx self.perform_excerpt_ids = np.concatenate( (self.perform_excerpt_ids, piece_ids)) print("%s audio excerpts of %d pieces collected" % (self.perform_excerpt_codes.shape[0], len(pieces)))
sheet_win_shape = model.INPUT_SHAPE_1[1:] spec_win_shape = model.INPUT_SHAPE_2[1:] # select pieces split = load_split(args.train_split) te_pieces = split["test"] # pieces = [te_pieces[te_pieces.index("BachJS__BWV790__bwv790")]] pieces = te_pieces # collect pixel errors for all pieces piece_pxl_errors = dict() for piece in pieces: print("\nTarget Piece: %s" % piece) piece_image, piece_specs, piece_o2c_maps = prepare_piece_data( ROOT_DIR, piece, aug_config=test_augment, require_audio=False) # initialize data pool with piece piece_pool = AudioScoreRetrievalPool([piece_image], [piece_specs], [piece_o2c_maps], data_augmentation=test_augment, shuffle=False) # compute spectrogram from file if args.real_audio: audio_file = "/home/matthias/cp/data/sheet_localization/real_music/0_real_audio/%s.flac" % piece if not os.path.exists(audio_file): continue spec = processor.process(audio_file).T # use pre-computed spectrogram else:
a2s_srv.initialize_audio_db(pieces=te_pieces, augment=test_augment, keep_snippets=False) a2s_srv.save_audio_db_file("audio_db_file.pkl") # load sheet music data base else: a2s_srv.load_audio_db_file("audio_db_file.pkl") # run full evaluation if args.full_eval: print(col.print_colored("\nRunning full evaluation:", col.UNDERLINE)) ranks = [] for tp in te_pieces: # load piece piece_image, _, _ = prepare_piece_data(ROOT_DIR, tp, aug_config=test_augment, require_audio=False) # detect piece from spectrogram ret_result, ret_votes = a2s_srv.detect_performance(piece_image, top_k=len(te_pieces), n_candidates=args.n_candidates, verbose=False) if tp in ret_result: rank = ret_result.index(tp) + 1 ratio = ret_votes[ret_result.index(tp)] else: rank = len(ret_result) ratio = 0.0 ranks.append(rank) color = col.OKBLUE if ranks[-1] == 1 else col.WARNING print(col.print_colored("rank: %02d (%.2f) " % (ranks[-1], ratio), color) + tp) # report results ranks = np.asarray(ranks)
# path to MSMD data set DATA_ROOT_MSMD = '/Users/abdelrahman/Desktop/Bachelor References/msmd_aug' # for now we select just some pieces # (this could be also the entire training set) piece_names = [ 'BachCPE__cpe-bach-rondo__cpe-bach-rondo', 'BachJS__BWV259__bwv-259', 'AdamA__giselle__giselle' ] all_piece_images = [] all_piece_specs = [] all_piece_o2c_maps = [] for piece_name in piece_names: piece_image, piece_specs, piece_o2c_maps = prepare_piece_data( DATA_ROOT_MSMD, piece_name, require_audio=False) # keep stuff all_piece_images.append(piece_image) all_piece_specs.append(piece_specs) all_piece_o2c_maps.append(piece_o2c_maps) # path to network parameters param_file = "/Users/abdelrahman/Desktop/resultdumper/mutopia_ccal_cont_est_UV/params_all_split_mutopia_full_aug.pkl" # this function is called before a snippet is fed to the network # (for the present model it resizes the image by a factor of 2) prepare_sheet_img = model.prepare # initialize retrieval wrapper embed_network = RetrievalWrapper(model,