def compare_file(self, filepath, max_common=10): """ process file to fingerprints and search in database for similar, setting treshold to 1% ? """ print filepath # check file format and use appropriate handler #file_format = filepath.split('.')[-1] filename, file_extension = os.path.splitext(filepath) if not file_extension: file_extension = '.mp3' handler = formatHandlerFactory.get_handler(file_extension) # get tags artist, title, genre = self.extract_tags(filepath) print 'tags:', artist, title, genre ### make fp # prepare wav rate, data = handler.decode_to_wav(filepath) #process data hashes = fp.process_file(rate, data, filepath, vizualize=False) similarity_dict = {} similarity_list = [] for h in hashes: found = self.db_session.query(Fingerprint).filter(Fingerprint.fp_hash == h) #similarity_dict.update similarity_list += [h.song_id for h in found] from collections import Counter results = Counter(similarity_list).most_common(max_common) file_hash = unique_hash(filepath) song = Song(title, artist, file_hash) print results return (song, results)
def fingerprint_file(self, filepath): """ process file to fingerprints and store them in database """ # check file format and use appropriate handler #file_format = filepath.split('.')[-1] filename, file_extension = os.path.splitext(filepath) handler = formatHandlerFactory.get_handler(file_extension) # get file_hash to not process the same file file_hash = unique_hash(filepath) exists = self.db_session.query(Song).filter(Song.file_hash == file_hash).first() print 'exists', exists if exists: print 'file %s already processed in db' % filepath return None # get tags artist, title, genres = self.extract_tags(filepath) print 'tags:', artist, title, genres ### make fp # prepare wav rate, data = handler.decode_to_wav(filepath) #process data hashes = fp.process_file(rate, data, filepath, vizualize=False) # db.insert_song song = self.save_song(title, artist, genres, file_hash) # db.insert_hashes self.save_hashes(song, hashes) return None