Exemple #1
0
    def fingerprint_file(self, file_path: str, song_name: str = None) -> None:
        """
        Given a path to a file the method generates hashes for it and stores them in the database
        for later be queried.

        :param file_path: path to the file.
        :param song_name: song name associated to the audio file.
        """
        song_name_from_path = decoder.get_audio_name_from_path(file_path)
        song_hash = decoder.unique_hash(file_path)
        song_name = song_name or song_name_from_path
        # don't refingerprint already fingerprinted files
        if song_hash in self.songhashes_set:
            print(f"{song_name} already fingerprinted, continuing...")
        else:
            song_name, hashes, file_hash = Dejavu._fingerprint_worker(
                file_path, self.limit, song_name=song_name)
            sid = self.db.insert_song(song_name, file_hash)

            self.db.insert_hashes(sid, hashes)
            self.db.set_song_fingerprinted(sid)
            self.__load_fingerprinted_audio_hashes()
Exemple #2
0
    def begin(self):
        for f in self.test_files:
            log_msg('--------------------------------------------------')
            log_msg(f'file: {f}')

            # get column
            col = self.get_column_id([
                x for x in re.findall("[0-9]sec", f) if x in self.test_seconds
            ][0])

            # format: XXXX_offset_length.mp3, we also take into account underscores within XXXX
            splits = get_audio_name_from_path(f).split("_")
            song = "_".join(
                splits[0:len(get_audio_name_from_path(f).split("_")) - 2])
            line = self.get_line_id(song)
            result = subprocess.check_output([
                "python", "dejavu.py", '-r', 'file',
                join(self.test_folder, f)
            ])

            if result.strip() == "None":
                log_msg('No match')
                self.result_match[line][col] = 'no'
                self.result_matching_times[line][col] = 0
                self.result_query_duration[line][col] = 0
                self.result_match_confidence[line][col] = 0

            else:
                result = result.strip()
                # we parse the output song back to a json
                result = json.loads(
                    result.decode('utf-8').replace("'",
                                                   '"').replace(': b"', ':"'))

                # which song did we predict? We consider only the first match.
                match = result[RESULTS][0]
                song_result = match[SONG_NAME]
                log_msg(f'song: {song}')
                log_msg(f'song_result: {song_result}')

                if song_result != song:
                    log_msg('invalid match')
                    self.result_match[line][col] = 'invalid'
                    self.result_matching_times[line][col] = 0
                    self.result_query_duration[line][col] = 0
                    self.result_match_confidence[line][col] = 0
                else:
                    log_msg('correct match')
                    print(self.result_match)
                    self.result_match[line][col] = 'yes'
                    self.result_query_duration[line][col] = round(
                        result[TOTAL_TIME], 3)
                    self.result_match_confidence[line][col] = match[
                        HASHES_MATCHED]

                    # using replace in f for getting rid of underscores in name
                    song_start_time = re.findall("_[^_]+", f.replace(song, ""))
                    song_start_time = song_start_time[0].lstrip("_ ")

                    result_start_time = round(
                        (match[OFFSET] * DEFAULT_WINDOW_SIZE *
                         DEFAULT_OVERLAP_RATIO) / DEFAULT_FS, 0)

                    self.result_matching_times[line][col] = int(
                        result_start_time) - int(song_start_time)
                    if abs(self.result_matching_times[line][col]) == 1:
                        self.result_matching_times[line][col] = 0

                    log_msg(f'query duration: {round(result[TOTAL_TIME], 3)}')
                    log_msg(f'confidence: {match[HASHES_MATCHED]}')
                    log_msg(f'song start_time: {song_start_time}')
                    log_msg(f'result start time: {result_start_time}')

                    if self.result_matching_times[line][col] == 0:
                        log_msg('accurate match')
                    else:
                        log_msg('inaccurate match')
            log_msg('--------------------------------------------------\n')