def fingerprint_directory(self, path, extensions, nprocesses=None): # Try to use the maximum amount of processes if not given. try: nprocesses = nprocesses or multiprocessing.cpu_count() except NotImplementedError: nprocesses = 1 else: nprocesses = 1 if nprocesses <= 0 else nprocesses pool = multiprocessing.Pool(nprocesses) filenames_to_fingerprint = [] for filename, _ in decoder.find_files(path, extensions): # don't refingerprint already fingerprinted files if decoder.unique_hash(filename) in self.songhashes_set: dejavu.shared.UITEXTLOGGER.emit( "%s already fingerprinted, continuing..." % filename) print("%s already fingerprinted, continuing..." % filename) continue filenames_to_fingerprint.append(filename) # Prepare _fingerprint_worker input worker_input = zip(filenames_to_fingerprint, [self.limit] * len(filenames_to_fingerprint)) # Send off our tasks iterator = pool.imap_unordered(_fingerprint_worker, worker_input) # Loop till we have all of them while True: try: song_name, hashes, file_hash = iterator.next() except multiprocessing.TimeoutError: continue except StopIteration: break except: dejavu.shared.UITEXTLOGGER.emit("Failed fingerprinting") print("Failed fingerprinting") # Print traceback because we can't reraise it here traceback.print_exc(file=sys.stdout) else: dejavu.shared.UITEXTLOGGER.emit( "Saving finger prints to Database for %s" % song_name) print("Saving finger prints to Database for %s" % song_name) sid = self.db.insert_song(song_name, file_hash) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs() dejavu.shared.UITEXTLOGGER.emit( "Finished saving finger prints to Database for %s" % song_name) print("Finished saving finger prints to Database for %s" % song_name) pool.close() pool.join()
def fingerprint_directory(self, path, extensions, nprocesses=None, splited=False, splited_song_name=""): # Try to use the maximum amount of processes if not given. try: nprocesses = nprocesses or multiprocessing.cpu_count() except NotImplementedError: nprocesses = 1 else: nprocesses = 1 if nprocesses <= 0 else nprocesses pool = multiprocessing.Pool(nprocesses) filenames_to_fingerprint = [] for filename, _ in decoder.find_files(path, extensions): # don't refingerprint already fingerprinted files if decoder.path_to_songname(filename) in self.songnames_set: print "%s already fingerprinted, continuing..." % filename continue filenames_to_fingerprint.append(filename) # Prepare _fingerprint_worker input worker_input = zip(filenames_to_fingerprint, [self.limit] * len(filenames_to_fingerprint)) # Send off our tasks iterator = pool.imap_unordered(_fingerprint_worker, worker_input) if splited and splited_song_name: sid = self.db.insert_song(splited_song_name) # Loop till we have all of them while True: try: song_name, hashes = iterator.next() except multiprocessing.TimeoutError: continue except StopIteration: break except: print("Failed fingerprinting") # Print traceback because we can't reraise it here traceback.print_exc(file=sys.stdout) else: if not splited: sid = self.db.insert_song(song_name) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs() pool.close() pool.join()
def recognize_directory(self, path, extensions): for filename, _ in decoder.find_files(path, extensions): result = self.recognize_file(filename) if result["confidence"] >= self.confidence_limit: msg = ("From file %s we recognized: %s" % (filename, result)) msg = "[" + datetime.datetime.now().strftime( "%Y-%m-%d %H:%M:%S") + "]" + msg print msg logging.info(msg)
def fingerprint_directory(self, path, extensions, nprocesses=None): # Try to use the maximum amount of processes if not given. try: nprocesses = nprocesses or multiprocessing.cpu_count() except NotImplementedError: nprocesses = 1 else: nprocesses = 1 if nprocesses <= 0 else nprocesses pool = multiprocessing.Pool(nprocesses) filenames_to_fingerprint = [] for filename, _ in decoder.find_files(path, extensions): filenames_to_fingerprint.append(filename) # Prepare _fingerprint_worker input worker_input = zip(filenames_to_fingerprint, [self.limit] * len(filenames_to_fingerprint)) # Send off our tasks iterator = pool.imap_unordered(_fingerprint_worker, worker_input) # Loop till we have all of them batch_list = [] while True: try: song_name, hashes, file_hash, filename = iterator.next() hashes_list = list(hashes) except multiprocessing.TimeoutError: continue except StopIteration: break except: print("Failed fingerprinting") # Print traceback because we can't reraise it here traceback.print_exc(file=sys.stdout) else: batch_list.append({ "song_name": song_name, "hashes_list": hashes_list, "file_hash": file_hash, "filename": filename, }) if len(batch_list) == BATCH_NUMBER: call_fingerprint_create_api(batch_list) batch_list = [] if len(batch_list) < BATCH_NUMBER: call_fingerprint_create_api(batch_list) pool.close() pool.join()
def fingerprint_directory(self, path, extensions, nprocesses=None): # Try to use the maximum amount of processes if not given. try: nprocesses = nprocesses or multiprocessing.cpu_count() except NotImplementedError: nprocesses = 1 else: nprocesses = 1 if nprocesses <= 0 else nprocesses pool = multiprocessing.Pool(nprocesses) filenames_to_fingerprint = [] for filename, _ in decoder.find_files(path, extensions): # don't refingerprint already fingerprinted files if decoder.path_to_songname(filename) in self.songnames_set: print "%s already fingerprinted, continuing..." % filename continue filenames_to_fingerprint.append(filename) # Prepare _fingerprint_worker input worker_input = zip(filenames_to_fingerprint, [self.limit] * len(filenames_to_fingerprint)) # Send off our tasks iterator = pool.imap_unordered(_fingerprint_worker, worker_input) # Loop till we have all of them while True: try: song_name, hashes = iterator.next() except multiprocessing.TimeoutError: continue except StopIteration: break except: print("Failed fingerprinting") # Print traceback because we can't reraise it here import traceback, sys traceback.print_exc(file=sys.stdout) else: sid = self.db.insert_song(song_name) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs() pool.close() pool.join()
def fingerprint_directory(self, path, extensions, nprocesses=None): # Try to use the maximum amount of processes if not given. try: nprocesses = nprocesses or multiprocessing.cpu_count() except NotImplementedError: nprocesses = 1 else: nprocesses = 1 if nprocesses <= 0 else nprocesses pool = multiprocessing.Pool(nprocesses) filenames_to_fingerprint = [] for filename, _ in decoder.find_files(path, extensions): # don't refingerprint already fingerprinted files if decoder.unique_hash(filename) in self.songhashes_set: logging.getLogger('dejavu').warn( "%s already fingerprinted, continuing..." % filename) continue filenames_to_fingerprint.append(filename) # Prepare _fingerprint_worker input worker_input = zip(filenames_to_fingerprint, [self.limit] * len(filenames_to_fingerprint)) # Send off our tasks iterator = pool.imap_unordered(_fingerprint_worker, worker_input) # Loop till we have all of them while True: try: song_name, hashes, file_hash, audio_length = iterator.next() except multiprocessing.TimeoutError: continue except StopIteration: break except: logging.getLogger('dejavu').exception("Failed fingerprinting") else: logging.getLogger('dejavu').debug("Inserting " + song_name + " in database") sid = self.db.insert_song(song_name, file_hash, audio_length) self.db.insert_hashes(sid, set([(x[0], int(x[1])) for x in hashes])) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs() logging.getLogger('dejavu').info(song_name + " inserted in database") pool.close() pool.join()
def fingerprint_directory(self, path, extensions, nprocesses=None): # Try to use the maximum amount of processes if not given. try: nprocesses = nprocesses or multiprocessing.cpu_count() except NotImplementedError: nprocesses = 1 else: nprocesses = 1 if nprocesses <= 0 else nprocesses pool = multiprocessing.Pool(nprocesses) filenames_to_fingerprint = [] for filename, _ in decoder.find_files(path, extensions): # don't refingerprint already fingerprinted files if self.db.get_song_by_hash( decoder.unique_hash(filename)) is not None: logger.info("%s already fingerprinted, continuing..." % filename) continue filenames_to_fingerprint.append(filename) # Prepare _fingerprint_worker input worker_input = list( zip(filenames_to_fingerprint, [self.limit] * len(filenames_to_fingerprint))) # Send off our tasks iterator = pool.imap_unordered(_fingerprint_worker, worker_input) # Loop till we have all of them while True: try: song_name, hashes, file_hash = next(iterator) except multiprocessing.TimeoutError: continue except StopIteration: break except: logger.error("Failed fingerprinting") # Print traceback because we can't reraise it here traceback.print_exc(file=sys.stdout) else: sid = self.db.insert_song(song_name, file_hash) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) pool.close() pool.join()
def fingerprint_directory(self, path, extensions, output_dir=None, nprocesses=None): # Try to use the maximum amount of processes if not given. try: nprocesses = nprocesses or multiprocessing.cpu_count() except NotImplementedError: nprocesses = 1 else: nprocesses = 1 if nprocesses <= 0 else nprocesses pool = multiprocessing.Pool(nprocesses) filenames_to_fingerprint = [] for filename, _ in decoder.find_files(path, extensions): filenames_to_fingerprint.append(filename) # Prepare _fingerprint_worker input worker_input = zip(filenames_to_fingerprint, [None] * len(filenames_to_fingerprint)) # Send off our tasks iterator = pool.imap_unordered(_fingerprint_worker, worker_input) # Loop till we have all of them while True: try: song_name, hashes, file_hash = iterator.next() f = open(os.path.join(output_dir, song_name + '.fingerp'), 'w') sorted_hashes = sorted(list(hashes), key=lambda t: t[1]) for t in sorted_hashes: f.write(str(t[0]) + " - " + str(t[1]) + "\n") f.flush() f.close() except multiprocessing.TimeoutError: continue except StopIteration: break except: print("Failed fingerprinting") # Print traceback because we can't reraise it here traceback.print_exc(file=sys.stdout) pool.close() pool.join()
def fingerprint_directory(self, path, extensions, nprocesses=None): # Try to use the maximum amount of processes if not given. try: nprocesses = nprocesses or multiprocessing.cpu_count() except NotImplementedError: nprocesses = 1 else: nprocesses = 1 if nprocesses <= 0 else nprocesses pool = multiprocessing.Pool(nprocesses) results = [] for filename, _ in decoder.find_files(path, extensions): # don't refingerprint already fingerprinted files if decoder.path_to_songname(filename) in self.songnames_set: print "%s already fingerprinted, continuing..." % filename continue result = pool.apply_async(_fingerprint_worker, (filename, self.db, self.limit)) results.append(result) while len(results): for result in results[:]: # TODO: Handle errors gracefully and return them to the callee # in some way. try: result.get(timeout=2) except multiprocessing.TimeoutError: continue except: import traceback, sys traceback.print_exc(file=sys.stdout) results.remove(result) else: results.remove(result) pool.close() pool.join()
def fingerprint_translation_record_directory(self, path, extensions, nprocesses=None): msg = "Starting fingerprint translation record directory" print msg self.logger.info(msg) # Try to use the maximum amount of processes if not given. try: nprocesses = nprocesses or multiprocessing.cpu_count() except NotImplementedError: nprocesses = 1 else: nprocesses = 1 if nprocesses <= 0 else nprocesses pool = multiprocessing.Pool(nprocesses) filenames_to_fingerprint = [] for filename, _ in decoder.find_files(path, extensions): basename = os.path.basename(filename) if "_recording" in basename: continue # don't refingerprint already fingerprinted files if decoder.unique_hash(filename) in self.songhashes_set: msg = "%s already fingerprinted, continuing..." % filename print msg self.logger.info(msg) #os.remove(filename) continue filenames_to_fingerprint.append(filename) # Prepare _fingerprint_worker input worker_input = zip(filenames_to_fingerprint, [self.limit] * len(filenames_to_fingerprint)) # Send off our tasks iterator = pool.imap_unordered(_fingerprint_worker, worker_input) # Loop till we have all of them while True: try: song_name, hashes, file_hash = iterator.next() except multiprocessing.TimeoutError: continue except StopIteration: break except: msg = "Failed fingerprinting" print msg self.logger.info(msg) # Print traceback because we can't reraise it here traceback.print_exc(file=sys.stdout) else: sid = self.db.insert_song(song_name, file_hash) self.db.insert_repeat_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs() for filename in filenames_to_fingerprint: os.remove(filename) pool.close() pool.join()
def fingerprint_directory(self, path, extensions, nprocesses=None): '''对音频进行编码,并存到数据库中,每个音频先分成声道,再对每个声道按照一节一节的编码 Args: path: 音频的路径 extensions: 扩展名 nprocesses: 多进程的数量 ''' # Try to use the maximum amount of processes if not given. try: nprocesses = nprocesses or multiprocessing.cpu_count( ) # 返回当前系统有多少个cpu except NotImplementedError: nprocesses = 1 else: nprocesses = 1 if nprocesses <= 0 else nprocesses pool = multiprocessing.Pool(nprocesses) # 进程池 filenames_to_fingerprint = [] # 需要进行fingerprint的音频路径 for filename, _ in decoder.find_files(path, extensions): # don't refingerprint already fingerprinted files if decoder.unique_hash( filename ) in self.songhashes_set: # 对当前音频进行sha1编码,然后和所有歌曲的sha1编码list对比,有则不重复编码 print("%s already fingerprinted, continuing..." % filename) continue filenames_to_fingerprint.append(filename) # Prepare _fingerprint_worker input # worker_input为要进行fingerprint的zip元组 worker_input = zip( filenames_to_fingerprint, [self.limit] * len(filenames_to_fingerprint)) # [None]*2==[None, None] # Send off our tasks # 每个多进程处理一首歌,进程数与cpu数相同,比如有5首歌,开了4个进程,这个4个进程处理完4首歌后,再从进程池里拿一个进程处理最后一首歌 iterator = pool.imap_unordered( _fingerprint_worker, worker_input ) # imap_unordered和map类似,第一个参数是函数,第二个参数是迭代器,将迭代器中的数放到函数里执行 # Loop till we have all of them while True: try: # _fingerprint_worker()对每个音频编码的结果 song_name, hashes, file_hash = iterator.next() except multiprocessing.TimeoutError: continue except StopIteration: break except: print("Failed fingerprinting") # Print traceback because we can't reraise it here traceback.print_exc(file=sys.stdout) else: # 这个地方就可以把song_name和file_hash和hashes存到文本给洪宁,不过还得看一下,他是怎么听歌识曲的 sid = self.db.insert_song( song_name, file_hash) # 将音频name和使用sha1对音频编码存到song表里 self.db.insert_hashes( sid, hashes) # 将插入song表中对应的id和hashes存到fingerprint表中,这样sid就是外键 self.db.set_song_fingerprinted( sid) # 将song表中用于标志一首歌的指纹被存到数据库中的属性,改为1 self.get_fingerprinted_songs() pool.close() pool.join()