def _calculate_features(self, files): print('Calculating features...') features = [] total_prog = len(files) curr_prog = 0 for file in files: progress(percent=scale(0, total_prog, curr_prog), name=file) features.append( get_all_features(os.path.join(self.audio_path, file))) curr_prog += 1 progress(1) return features
def _calulate_distances(self, features): print('Calculating distances...') dist = [] size = len(features) total_prog = sum_n(size) curr_prog = 0 for i, ifeature in enumerate(features): j = i + 1 while j < size: jfeature = features[j] progress(percent=scale(0, total_prog, curr_prog), name=ifeature.name + ' <==> ' + jfeature.name) dist.append(get_distance(ifeature, jfeature)) curr_prog += 1 j += 1 progress(1) return dist
def _not_present_in_db(self, file_list): """ Checks if files present in audio_resources is already not processed in database :param file_list: list of files in audio_resources folder :return to_be_processed: list of files which are in audio_resources and not yet processed """ to_be_processed = [] size = len(file_list) total_prog = sum_n(size + 1) curr_prog = 0 progress(0) for i, ifile in enumerate(file_list): ifile_sha = sha256sum(os.path.join(self.audio_path, ifile)) j = i + 1 while j < size: jfile = file_list[j] jfile_sha = sha256sum(os.path.join(self.audio_path, jfile)) if ifile not in to_be_processed and not self.db.is_hashes_present( ifile_sha, jfile_sha): to_be_processed.append(ifile) curr_prog += 1 progress(scale(0, total_prog, curr_prog)) j += 1 progress(1) return to_be_processed
def _save_dist_to_db_new(self, distances): print('Saving to DB...') total_prog = len(distances) curr_prog = 0 progress(curr_prog) for dist in distances: if self.db.is_hashes_present(dist.hash1, dist.hash2): continue self.db.save_feature_distances(dist) curr_prog += 1 progress(scale(0, total_prog, curr_prog)) progress(1)
def calc_feature(self): """ Loop through the audio_resources folder and calculate features of each track and the distance between them and then save the distance to the database """ start_time = time.time() if not self.db.create_tables(): print('Problems in creating tables') return # loop over the directory file_list, file_list_size = self._get_files_from_folder() processed_file_list = [] processing = False total_prog = sum_n(file_list_size + 1) curr_prog = 0 print('Processing...') # start progress bar in console progress(curr_prog) i = 0 while i < file_list_size: ifile = file_list[i] # get audios if ifile not in processed_file_list: ipath = os.path.join(self.audio_path, ifile) update_ifile = True processed_file_list.append(ifile) ifile_sha = sha256sum(ipath) j = i + 1 while j < file_list_size: jfile = file_list[j] jpath = os.path.join(self.audio_path, jfile) jfile_sha = sha256sum(jpath) # check if their hash is already present in the distance table if self.db.is_hashes_present(ifile_sha, jfile_sha): j += 1 continue else: # if not, calculate features and distances and save to db if update_ifile: ithread = StartThread(target=get_all_features, args=(ipath, )) ithread.start() # ifeature = ex.get_all_features(os.path.join(audio_path, ifile)) jthread = StartThread(target=get_all_features, args=(jpath, )) jthread.start() if update_ifile: ifeature = ithread.join() update_ifile = False curr_prog += 1 progress(scale(0, total_prog, curr_prog)) jfeature = jthread.join() # jfeature = ex.get_all_features(os.path.join(audio_path, jfile)) # dist = ex.get_distance(ifeature, jfeature) # db.save_feature_distances(dist) if processing: # distances can be calculated until the next features are calculated dist_thread.join() dist_thread = StartThread(target=self._save_dist_to_db, args=( ifeature, jfeature, )) dist_thread.start() curr_prog += 1 progress(scale(0, total_prog, curr_prog)) processing = True j += 1 i += 1 if processing: dist_thread.join() message = 'Done! Took %.0f seconds to calculate features and distances between ' % ( time.time() - start_time) + str(file_list_size) + ' files' else: message = 'Data collected from database' progress(1) print() print(message)