def run(input_path, output_path, model_path, fps=10): init(output_path) pipeline = job.load(model_path) extractor = ft.FeatureExtractor(haralick_dist=4, clip_limit=4.0, hist_size=[8, 3, 3]) utl.extract_video_frames(str(input_path), join(output_path, SUBFOLDER_FRAMES), frames_per_second=fps) frame_paths = sorted( os.listdir(join(output_path, SUBFOLDER_FRAMES)), key=lambda f: int(os.path.splitext(f)[0].split('_')[1])) features = job.Parallel(n_jobs=-1, verbose=0, timeout=200)( job.delayed(process_video_frame)(output_path, f, extractor) for f in tqdm(frame_paths, total=len(frame_paths))) X_test = np.array(features) if X_test.shape[0] == 0: return y_pred = pipeline.predict(X_test) print(f'VPRO> SVM found {sum(y_pred)} retina frames') #print(f'VPRO> Writing frames to {join(output_path, SUBFOLDER_RESULTS)}') [ os.rename(join(output_path, SUBFOLDER_PROCESSED, frame_paths[i]), join(output_path, SUBFOLDER_RESULTS, frame_paths[i])) for i, y in enumerate(y_pred) if y == 1 ]
def process(self, rate, sample, path): filtered_sample = self.noise_remover.remove_noise(sample, rate) segmented_sounds = self.noise_remover.segmentator.Sounds if segmented_sounds: feature_extractor = features.FeatureExtractor(app_config, rate) extracted_features = feature_extractor.process(filtered_sample, segmented_sounds) extracted_features = numpy.nan_to_num(extracted_features) if app_config.stream: # If streaming audio: play audio if bark, and save raw segments to bark/non_bark folders segments_analysis = self.kiwi_finder.find_individual_calls(extracted_features) self.save_segments(segmented_sounds, segments_analysis, filtered_sample, rate) if 1 in segments_analysis: # Detect bark print "Bark detected, playing rain for 5 seconds" call(["aplay", "calming_sounds/rain.wav", "-d", "5"]) elif app_config.data_store: # If reading audio from disk: save features into db, and regenerate models self.store_features(extracted_features, path) self.move_processed_file(path) elif app_config.data_store: # Delete file with no segments old_path = path new_path = path.replace("categorized_data", "no_segments") shutil.move(old_path, new_path) print "Moved file from categorized_data to no_segments folder " + new_path
def attribute(self, files): authors = dict() for file in files: fe = features.FeatureExtractor(np.inf) feats = fe.analyze_texts([file]) pred = self.classifier.predict(feats) authors[file] = self.authors[int(pred[0])] #for file, author in authors.items(): # print("%s Author: %s"%(file, author)) return authors
def run(input_path: str, output_path: str, model_path: str, fps: int = 10, majority: float = 0.65, only_frames: bool = False) -> None: """ :param input_path: path to input video :param output_path: path to folder for temporary and result files, will overwrite exiting folder :param model_path: path to sklearn pipeline (created with snippet_extraction_training.ipynb) :param fps: number of frames that extracted per second of the video :param majority: percentage (0.0 to 1.0) of frames that have to show meaningful information :return: """ init(output_path) pipeline = job.load(model_path) extractor = ft.FeatureExtractor(haralick_dist=4, clip_limit=4.0, hist_size=[8, 3, 3]) utl.extract_video_frames(input_path, join(output_path, SUBFOLDER_FRAMES), frames_per_second=fps) X_test = np.empty((0, 156), dtype=np.float) file_paths = sorted(os.listdir(join(output_path, SUBFOLDER_FRAMES)), key=lambda f: int(os.path.splitext(f)[0].split('_')[1])) for i in trange(0, len(file_paths), BATCH_SIZE): start = time.monotonic() # print(f'VPRO> Start: {start:.2f}') # frames = job.Parallel(n_jobs=-1, verbose=0)(job.delayed(cv2.imread)(join(output_path, SUBFOLDER_FRAMES, f)) for f in file_paths[i:i+BATCH_SIZE]) # print(f'VPRO> After reading {time.monotonic()-start:.2f}') # frames = job.Parallel(n_jobs=-1, verbose=0)(job.delayed(preprocess_frames)(frame, output_path, i+j) for j, frame in enumerate(frames)) # print(f'VPRO> After pp {time.monotonic()-start:.2f}') # frames = [np.random.randint(0, 256, (850, 850, 3), dtype=np.uint8) if frames[j] is None or frames[j].size == 0 else frames[j] for j in # range(len(frames))] # features = extractor.transform(frames) # print(f'VPRO> End: {time.monotonic()-start:.2f}') # print(f'VPRO> Batch shape: {features.shape}, cur X_test shape: {X_test.shape}') features = job.Parallel(n_jobs=-1, verbose=0)(job.delayed(process_batch_frame)(f, file_paths, output_path, extractor) for f in range(i, i+BATCH_SIZE)) features = np.array(features) X_test = np.append(X_test, features, axis=0) if X_test.shape[0] == 0: return y_pred = pipeline.predict(X_test) snippet_idxs = majority_vote(y_pred, majority=majority) print(f'VPRO> SVM found {len(snippet_idxs)} retina snippets') print(f'VPRO> Writing frames to {join(output_path, SUBFOLDER_RESULTS)}') write_snippets_to_disk(snippet_idxs, output_path, name=os.path.splitext(os.path.basename(input_path))[0], fps=fps, only_frames=only_frames)
def identify(self, author_files): self.author_features = dict() self.authors = list() for author, files in author_files.items(): fe = features.FeatureExtractor(5000) self.author_features[author] = fe.analyze_texts(files) self.authors.append(author) #import pdb #pdb.set_trace() #self.classifier = classify.LogisticRegression() #self.classifier = classify.SVM_RBF() #self.classifier = classify.LinearSVM() self.classifier = classify.AdaBoost() self.classifier.preprocess(self.author_features[self.authors[0]], self.author_features[self.authors[1]]) self.classifier.train()
def run(self): for rate, sample, sample_name in self.fetcher.get_next_recording( data_store=app_config.data_store, bucket_name=app_config.bucket): filtered_sample = self.noise_remover.remove_noise(sample, rate) segmented_sounds = self.noise_remover.segmentator.Sounds if segmented_sounds: feature_extractor = features.FeatureExtractor(app_config, rate) extracted_features = feature_extractor.process( filtered_sample, segmented_sounds) kiwi_calls = self.kiwi_finder.find_individual_calls( extracted_features) result_per_file = self.kiwi_finder.find_kiwi( kiwi_calls, segmented_sounds, rate) self.reporter.write_results(result_per_file, kiwi_calls, sample_name, filtered_sample, rate, segmented_sounds) self.reporter.cleanup()
def worker(self): kiwi_finder = identification.KiwiFinder(self.app_config) noise_remover = noise_reduction.NoiseRemover() for rate, sample, sample_name in iter(self.recordings_q.get, "STOP"): exception = None try: filtered_sample = noise_remover.remove_noise(sample, rate) segmented_sounds = noise_remover.segmentator.Sounds feature_extractor = features.FeatureExtractor( self.app_config, rate) extracted_features = feature_extractor.process( signal=filtered_sample, segments=segmented_sounds) kiwi_calls = kiwi_finder.find_individual_calls( extracted_features) result_per_file = kiwi_finder.find_kiwi( kiwi_calls, segmented_sounds, rate) except Exception, ex: exception = ex self.output_q.put( (result_per_file, kiwi_calls, sample_name, filtered_sample, rate, segmented_sounds, exception))
import recordings_io import noise_reduction import features import identification app_config = configuration.Configurator().parse_arguments() reporter = reporting.Reporter(location=app_config.data_store, write_to_stdout=app_config.write_stdout) walker = recordings_io.get_recordings_walker(data_store=app_config.data_store, bucket=app_config.bucket) kiwi_finder = identification.KiwiFinder() for rate, sample, sample_name in walker.read_wave(): noise_remover = noise_reduction.NoiseRemover() try: filtered_sample = noise_remover.remove_noise(sample, rate) except ValueError: filtered_sample = sample segmented_sounds = noise_remover.segmentator.get_segmented_sounds() feature_extractor = features.FeatureExtractor() extracted_features = feature_extractor.process(filtered_sample, rate, segmented_sounds) kiwi_calls = kiwi_finder.find_individual_calls(extracted_features) result_per_file = kiwi_finder.find_kiwi(kiwi_calls) reporter.write_results(result_per_file, kiwi_calls, sample_name, filtered_sample, rate, segmented_sounds) reporter.cleanup()
def train(SEED, area_width, area_height, AUG=True): setup_seed(SEED) MODEL_NAME = 'AUG_area{}x{}_seed{}'.format(area_width, area_height, SEED) data_dir = '/program/xumingke/IEMOCAP/' train_files = [] train_files2 = [] valid_files = [] with open(data_dir + '/IEMOCAP_train_{}.csv'.format(SEED)) as f: fr = f.readlines() for line in fr: train_files.append(data_dir + '/' + line.split('\t')[2]) if (AUG): for i in range(0): train_files.append(data_dir + '/' + line.split('\t')[2] + '.' + str(i + 1)) for i in range(1): train_files2.append(data_dir + '/' + line.split('\t')[2] + '.' + str(i + 5)) with open(data_dir + '/IEMOCAP_dev_{}.csv'.format(SEED)) as f: fr = f.readlines() for line in fr: train_files.append(data_dir + '/' + line.split('\t')[2]) if (AUG): for i in range(0): train_files.append(data_dir + '/' + line.split('\t')[2] + '.' + str(i + 1)) for i in range(1): train_files2.append(data_dir + '/' + line.split('\t')[2] + '.' + str(i + 5)) with open(data_dir + '/IEMOCAP_test_{}.csv'.format(SEED)) as f: fr = f.readlines() for line in fr: valid_files.append(data_dir + '/' + line.split('\t')[2]) train_X, train_y = train_data_process(train_files, LABEL_DICT1, RATE, T_stride, T_overlop) train_X2, train_y2 = train_data_process(train_files2, LABEL_DICT1, RATE, T_stride, T_overlop) train_y = tf.concat([train_y, train_y2], 0) val_dict = valid_data_process(valid_files, LABEL_DICT1, RATE, T_stride, 1.6) feature_extractor = features.FeatureExtractor(rate=RATE) train_X_features = feature_extractor.get_features(FEATURES_TO_USE, train_X) train_X_features2 = feature_extractor.get_features(FEATURES_TO_USE, train_X2) valid_features_dict = {} for _, i in enumerate(val_dict): X1 = feature_extractor.get_features(FEATURES_TO_USE, val_dict[i]['X']) valid_features_dict[i] = {'X': X1, 'y': val_dict[i]['y']} train_X_features = tf.expand_dims(train_X_features, -1) train_X_features2 = tf.expand_dims(train_X_features2, -1) train_X_features = tf.concat([train_X_features, train_X_features2], 0) train_X_features = tf.cast(train_X_features, tf.float32) train_ds = tf.data.Dataset.from_tensor_slices( (train_X_features, train_y)).shuffle(train_X_features.shape[0]).batch(BATCH_SIZE) loss_object = tf.keras.losses.SparseCategoricalCrossentropy() optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, decay=1e-6) train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name='train_accuracy') test_loss = tf.keras.metrics.Mean(name='test_loss') test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name='test_accuracy') model = MODEL.AACNN(area_height, area_width) def train_step(images, labels): with tf.GradientTape() as tape: predictions = model(images) loss = loss_object(labels, predictions) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(loss) train_accuracy(labels, predictions) def test_step(images, labels): predictions = model(images) t_loss = loss_object(labels, predictions) test_loss(t_loss) test_accuracy(labels, predictions) print('training...') logging.warning('training seed={}'.format(SEED)) maxWA = 0 maxUA = 0 maxACC = 0 for epoch in range(Epochs): # 在下一个epoch开始时,重置评估指标 train_loss.reset_states() train_accuracy.reset_states() test_loss.reset_states() test_accuracy.reset_states() # tq = tqdm(total=len(train_y)) for step, (images, labels) in enumerate(train_ds): train_step(images, labels) # tq.update(BATCH_SIZE) # tq.close() template = 'Epoch {}, Loss: {}, Accuracy: {}\n' print( template.format( epoch + 1, train_loss.result(), train_accuracy.result() * 100, )) logging.warning( template.format( epoch + 1, train_loss.result(), train_accuracy.result() * 100, )) correct = 0 label_correct = [0, 0, 0, 0] label_total = [0, 0, 0, 0] for _, i in enumerate(valid_features_dict): x, y = valid_features_dict[i]['X'], valid_features_dict[i]['y'] x = tf.expand_dims(x, -1) x = tf.cast(x, tf.float32) y = np.array([y[0]]) out = model(x) out = tf.reduce_mean(out, 0, keepdims=True) label_total[y[0]] += 1 if (test_accuracy(y, out) > 0): correct += 1 label_correct[y[0]] += 1 test_accuracy.reset_states() label_acc = [ label_correct[0] / label_total[0], label_correct[1] / label_total[1], label_correct[2] / label_total[2], label_correct[3] / label_total[3] ] UA = (label_acc[0] + label_acc[1] + label_acc[2] + label_acc[3]) / 4 if (correct / len(valid_features_dict) > maxWA): maxWA = correct / len(valid_features_dict) if (UA > maxUA): maxUA = UA ACC = (correct / len(valid_features_dict)) + UA if (ACC > maxACC): print('saving model (WA:{},UA:{})\n'.format( correct / len(valid_features_dict), UA)) logging.warning('saving model (WA:{},UA:{})\n'.format( correct / len(valid_features_dict), UA)) model.save_weights('./models/{}'.format(MODEL_NAME)) maxACC = ACC print('label_correct:{}\nUA:{}'.format(label_correct, label_acc)) print('maxWA:{}\nmaxUA:{}'.format(maxWA, maxUA)) logging.warning('label_correct:{}\nUA:{}'.format( label_correct, label_acc)) logging.warning('maxWA:{}\nmaxUA:{}'.format(maxWA, maxUA)) print('end training on seed:{}'.format(SEED)) logging.warning('end training on seed:{}'.format(SEED)) del model
return val_dict if __name__ == '__main__': NOISE_TYPE = [NOISE_TYPE_0, NOISE_TYPE_1, NOISE_TYPE_2, NOISE_TYPE_3] for noise_type in range(len(NOISE_TYPE)): for noise_proportion in range(TRAIN_DIVIDE): for seed in SEED: MODEL_NAME = 'MACNN_NoiseAdditional_CL_{}_seed{}'.format( noise_type, seed, Amplitude_Factor) MODEL_PATH = 'models/{}_{}.pth'.format(MODEL_NAME, FEATURES_TO_USE) learning_rate = 0.001 setup_seed(seed) feature_extractor = features.FeatureExtractor(rate=RATE) train_X, train_y, val_dict = process_data( WAV_PATH, t=T_stride, train_overlap=T_overlop, noise_type=NOISE_TYPE[3], train_divide=TRAIN_DIVIDE, noise_proportion=noise_proportion, noise_path=NOISE_PATH, seed=seed) train_X_features = feature_extractor.get_features( FEATURES_TO_USE, train_X) valid_features_dict = {} for _, i in enumerate(val_dict): X1 = feature_extractor.get_features( FEATURES_TO_USE, val_dict[i]['X'])