Ejemplo n.º 1
0
def run(input_path, output_path, model_path, fps=10):
    init(output_path)
    pipeline = job.load(model_path)
    extractor = ft.FeatureExtractor(haralick_dist=4,
                                    clip_limit=4.0,
                                    hist_size=[8, 3, 3])

    utl.extract_video_frames(str(input_path),
                             join(output_path, SUBFOLDER_FRAMES),
                             frames_per_second=fps)

    frame_paths = sorted(
        os.listdir(join(output_path, SUBFOLDER_FRAMES)),
        key=lambda f: int(os.path.splitext(f)[0].split('_')[1]))
    features = job.Parallel(n_jobs=-1, verbose=0, timeout=200)(
        job.delayed(process_video_frame)(output_path, f, extractor)
        for f in tqdm(frame_paths, total=len(frame_paths)))
    X_test = np.array(features)
    if X_test.shape[0] == 0:
        return

    y_pred = pipeline.predict(X_test)

    print(f'VPRO> SVM found {sum(y_pred)} retina frames')
    #print(f'VPRO> Writing frames to {join(output_path, SUBFOLDER_RESULTS)}')
    [
        os.rename(join(output_path, SUBFOLDER_PROCESSED, frame_paths[i]),
                  join(output_path, SUBFOLDER_RESULTS, frame_paths[i]))
        for i, y in enumerate(y_pred) if y == 1
    ]
Ejemplo n.º 2
0
	def process(self, rate, sample, path):
		filtered_sample = self.noise_remover.remove_noise(sample, rate)
		segmented_sounds = self.noise_remover.segmentator.Sounds

		if segmented_sounds:
			feature_extractor = features.FeatureExtractor(app_config, rate)
			extracted_features = feature_extractor.process(filtered_sample, segmented_sounds)
			extracted_features = numpy.nan_to_num(extracted_features)

			if app_config.stream: # If streaming audio: play audio if bark, and save raw segments to bark/non_bark folders
				segments_analysis = self.kiwi_finder.find_individual_calls(extracted_features)

				self.save_segments(segmented_sounds, segments_analysis, filtered_sample, rate)
				if 1 in segments_analysis: # Detect bark
					print "Bark detected, playing rain for 5 seconds"
					call(["aplay", "calming_sounds/rain.wav", "-d", "5"])

			elif app_config.data_store: # If reading audio from disk: save features into db, and regenerate models
				self.store_features(extracted_features, path)
				self.move_processed_file(path)

		elif app_config.data_store: # Delete file with no segments
			old_path = path
			new_path = path.replace("categorized_data", "no_segments")
			shutil.move(old_path, new_path)
			print "Moved file from categorized_data to no_segments folder " + new_path
Ejemplo n.º 3
0
    def attribute(self, files):
        authors = dict()
        for file in files:
            fe = features.FeatureExtractor(np.inf)
            feats = fe.analyze_texts([file])
            pred = self.classifier.predict(feats)
            authors[file] = self.authors[int(pred[0])]

        #for file, author in authors.items():
        #    print("%s Author: %s"%(file, author))

        return authors
Ejemplo n.º 4
0
def run(input_path: str, output_path: str, model_path: str, fps: int = 10, majority: float = 0.65, only_frames: bool = False) -> None:
    """
    :param input_path: path to input video
    :param output_path: path to folder for temporary and result files, will overwrite exiting folder
    :param model_path: path to sklearn pipeline (created with snippet_extraction_training.ipynb)
    :param fps: number of frames that extracted per second of the video
    :param majority: percentage (0.0 to 1.0) of frames that have to show meaningful information
    :return:
    """

    init(output_path)
    pipeline = job.load(model_path)
    extractor = ft.FeatureExtractor(haralick_dist=4, clip_limit=4.0, hist_size=[8, 3, 3])

    utl.extract_video_frames(input_path, join(output_path, SUBFOLDER_FRAMES), frames_per_second=fps)

    X_test = np.empty((0, 156), dtype=np.float)
    file_paths = sorted(os.listdir(join(output_path, SUBFOLDER_FRAMES)), key=lambda f: int(os.path.splitext(f)[0].split('_')[1]))
    for i in trange(0, len(file_paths), BATCH_SIZE):
        start = time.monotonic()
        # print(f'VPRO> Start: {start:.2f}')
        # frames = job.Parallel(n_jobs=-1, verbose=0)(job.delayed(cv2.imread)(join(output_path, SUBFOLDER_FRAMES, f)) for f in file_paths[i:i+BATCH_SIZE])
        # print(f'VPRO> After reading {time.monotonic()-start:.2f}')
        # frames = job.Parallel(n_jobs=-1, verbose=0)(job.delayed(preprocess_frames)(frame, output_path, i+j) for j, frame in enumerate(frames))
        # print(f'VPRO> After pp {time.monotonic()-start:.2f}')
        # frames = [np.random.randint(0, 256, (850, 850, 3), dtype=np.uint8) if frames[j] is None or frames[j].size == 0 else frames[j] for j in
        #          range(len(frames))]
        # features = extractor.transform(frames)
        # print(f'VPRO> End: {time.monotonic()-start:.2f}')
        # print(f'VPRO> Batch shape: {features.shape}, cur X_test shape: {X_test.shape}')

        features = job.Parallel(n_jobs=-1, verbose=0)(job.delayed(process_batch_frame)(f, file_paths, output_path, extractor) for f in range(i, i+BATCH_SIZE))
        features = np.array(features)
        X_test = np.append(X_test, features, axis=0)

    if X_test.shape[0] == 0:
        return

    y_pred = pipeline.predict(X_test)

    snippet_idxs = majority_vote(y_pred, majority=majority)

    print(f'VPRO> SVM found {len(snippet_idxs)} retina snippets')
    print(f'VPRO> Writing frames to {join(output_path, SUBFOLDER_RESULTS)}')

    write_snippets_to_disk(snippet_idxs, output_path, name=os.path.splitext(os.path.basename(input_path))[0], fps=fps, only_frames=only_frames)
Ejemplo n.º 5
0
    def identify(self, author_files):
        self.author_features = dict()
        self.authors = list()
        for author, files in author_files.items():
            fe = features.FeatureExtractor(5000)
            self.author_features[author] = fe.analyze_texts(files)
            self.authors.append(author)
            #import pdb
            #pdb.set_trace()

        #self.classifier = classify.LogisticRegression()
        #self.classifier = classify.SVM_RBF()
        #self.classifier = classify.LinearSVM()
        self.classifier = classify.AdaBoost()
        self.classifier.preprocess(self.author_features[self.authors[0]],
                                   self.author_features[self.authors[1]])
        self.classifier.train()
Ejemplo n.º 6
0
    def run(self):
        for rate, sample, sample_name in self.fetcher.get_next_recording(
                data_store=app_config.data_store,
                bucket_name=app_config.bucket):

            filtered_sample = self.noise_remover.remove_noise(sample, rate)
            segmented_sounds = self.noise_remover.segmentator.Sounds
            if segmented_sounds:
                feature_extractor = features.FeatureExtractor(app_config, rate)
                extracted_features = feature_extractor.process(
                    filtered_sample, segmented_sounds)

                kiwi_calls = self.kiwi_finder.find_individual_calls(
                    extracted_features)
                result_per_file = self.kiwi_finder.find_kiwi(
                    kiwi_calls, segmented_sounds, rate)
                self.reporter.write_results(result_per_file, kiwi_calls,
                                            sample_name, filtered_sample, rate,
                                            segmented_sounds)
        self.reporter.cleanup()
Ejemplo n.º 7
0
    def worker(self):
        kiwi_finder = identification.KiwiFinder(self.app_config)
        noise_remover = noise_reduction.NoiseRemover()

        for rate, sample, sample_name in iter(self.recordings_q.get, "STOP"):
            exception = None
            try:
                filtered_sample = noise_remover.remove_noise(sample, rate)
                segmented_sounds = noise_remover.segmentator.Sounds
                feature_extractor = features.FeatureExtractor(
                    self.app_config, rate)
                extracted_features = feature_extractor.process(
                    signal=filtered_sample, segments=segmented_sounds)
                kiwi_calls = kiwi_finder.find_individual_calls(
                    extracted_features)
                result_per_file = kiwi_finder.find_kiwi(
                    kiwi_calls, segmented_sounds, rate)
            except Exception, ex:
                exception = ex
            self.output_q.put(
                (result_per_file, kiwi_calls, sample_name, filtered_sample,
                 rate, segmented_sounds, exception))
Ejemplo n.º 8
0
import recordings_io
import noise_reduction
import features
import identification

app_config = configuration.Configurator().parse_arguments()
reporter = reporting.Reporter(location=app_config.data_store,
                              write_to_stdout=app_config.write_stdout)
walker = recordings_io.get_recordings_walker(data_store=app_config.data_store,
                                             bucket=app_config.bucket)
kiwi_finder = identification.KiwiFinder()

for rate, sample, sample_name in walker.read_wave():
    noise_remover = noise_reduction.NoiseRemover()
    try:
        filtered_sample = noise_remover.remove_noise(sample, rate)
    except ValueError:
        filtered_sample = sample

    segmented_sounds = noise_remover.segmentator.get_segmented_sounds()

    feature_extractor = features.FeatureExtractor()
    extracted_features = feature_extractor.process(filtered_sample, rate,
                                                   segmented_sounds)

    kiwi_calls = kiwi_finder.find_individual_calls(extracted_features)
    result_per_file = kiwi_finder.find_kiwi(kiwi_calls)
    reporter.write_results(result_per_file, kiwi_calls, sample_name,
                           filtered_sample, rate, segmented_sounds)

reporter.cleanup()
Ejemplo n.º 9
0
def train(SEED, area_width, area_height, AUG=True):
    setup_seed(SEED)
    MODEL_NAME = 'AUG_area{}x{}_seed{}'.format(area_width, area_height, SEED)
    data_dir = '/program/xumingke/IEMOCAP/'
    train_files = []
    train_files2 = []
    valid_files = []
    with open(data_dir + '/IEMOCAP_train_{}.csv'.format(SEED)) as f:
        fr = f.readlines()
        for line in fr:
            train_files.append(data_dir + '/' + line.split('\t')[2])
            if (AUG):
                for i in range(0):
                    train_files.append(data_dir + '/' + line.split('\t')[2] +
                                       '.' + str(i + 1))
                for i in range(1):
                    train_files2.append(data_dir + '/' + line.split('\t')[2] +
                                        '.' + str(i + 5))
    with open(data_dir + '/IEMOCAP_dev_{}.csv'.format(SEED)) as f:
        fr = f.readlines()
        for line in fr:
            train_files.append(data_dir + '/' + line.split('\t')[2])
            if (AUG):
                for i in range(0):
                    train_files.append(data_dir + '/' + line.split('\t')[2] +
                                       '.' + str(i + 1))
                for i in range(1):
                    train_files2.append(data_dir + '/' + line.split('\t')[2] +
                                        '.' + str(i + 5))
    with open(data_dir + '/IEMOCAP_test_{}.csv'.format(SEED)) as f:
        fr = f.readlines()
        for line in fr:
            valid_files.append(data_dir + '/' + line.split('\t')[2])

    train_X, train_y = train_data_process(train_files, LABEL_DICT1, RATE,
                                          T_stride, T_overlop)
    train_X2, train_y2 = train_data_process(train_files2, LABEL_DICT1, RATE,
                                            T_stride, T_overlop)
    train_y = tf.concat([train_y, train_y2], 0)
    val_dict = valid_data_process(valid_files, LABEL_DICT1, RATE, T_stride,
                                  1.6)
    feature_extractor = features.FeatureExtractor(rate=RATE)

    train_X_features = feature_extractor.get_features(FEATURES_TO_USE, train_X)
    train_X_features2 = feature_extractor.get_features(FEATURES_TO_USE,
                                                       train_X2)

    valid_features_dict = {}
    for _, i in enumerate(val_dict):
        X1 = feature_extractor.get_features(FEATURES_TO_USE, val_dict[i]['X'])
        valid_features_dict[i] = {'X': X1, 'y': val_dict[i]['y']}

    train_X_features = tf.expand_dims(train_X_features, -1)
    train_X_features2 = tf.expand_dims(train_X_features2, -1)
    train_X_features = tf.concat([train_X_features, train_X_features2], 0)
    train_X_features = tf.cast(train_X_features, tf.float32)

    train_ds = tf.data.Dataset.from_tensor_slices(
        (train_X_features,
         train_y)).shuffle(train_X_features.shape[0]).batch(BATCH_SIZE)

    loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate,
                                         decay=1e-6)

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
        name='train_accuracy')

    test_loss = tf.keras.metrics.Mean(name='test_loss')
    test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
        name='test_accuracy')

    model = MODEL.AACNN(area_height, area_width)

    def train_step(images, labels):

        with tf.GradientTape() as tape:
            predictions = model(images)
            loss = loss_object(labels, predictions)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        train_loss(loss)
        train_accuracy(labels, predictions)

    def test_step(images, labels):
        predictions = model(images)
        t_loss = loss_object(labels, predictions)

        test_loss(t_loss)
        test_accuracy(labels, predictions)

    print('training...')
    logging.warning('training seed={}'.format(SEED))
    maxWA = 0
    maxUA = 0
    maxACC = 0
    for epoch in range(Epochs):
        # 在下一个epoch开始时,重置评估指标
        train_loss.reset_states()
        train_accuracy.reset_states()
        test_loss.reset_states()
        test_accuracy.reset_states()
        # tq = tqdm(total=len(train_y))
        for step, (images, labels) in enumerate(train_ds):
            train_step(images, labels)

            # tq.update(BATCH_SIZE)
        # tq.close()
        template = 'Epoch {}, Loss: {}, Accuracy: {}\n'
        print(
            template.format(
                epoch + 1,
                train_loss.result(),
                train_accuracy.result() * 100,
            ))
        logging.warning(
            template.format(
                epoch + 1,
                train_loss.result(),
                train_accuracy.result() * 100,
            ))

        correct = 0
        label_correct = [0, 0, 0, 0]
        label_total = [0, 0, 0, 0]

        for _, i in enumerate(valid_features_dict):

            x, y = valid_features_dict[i]['X'], valid_features_dict[i]['y']
            x = tf.expand_dims(x, -1)
            x = tf.cast(x, tf.float32)
            y = np.array([y[0]])
            out = model(x)
            out = tf.reduce_mean(out, 0, keepdims=True)

            label_total[y[0]] += 1
            if (test_accuracy(y, out) > 0):
                correct += 1
                label_correct[y[0]] += 1
            test_accuracy.reset_states()

        label_acc = [
            label_correct[0] / label_total[0],
            label_correct[1] / label_total[1],
            label_correct[2] / label_total[2],
            label_correct[3] / label_total[3]
        ]
        UA = (label_acc[0] + label_acc[1] + label_acc[2] + label_acc[3]) / 4
        if (correct / len(valid_features_dict) > maxWA):
            maxWA = correct / len(valid_features_dict)
        if (UA > maxUA):
            maxUA = UA
        ACC = (correct / len(valid_features_dict)) + UA
        if (ACC > maxACC):
            print('saving model (WA:{},UA:{})\n'.format(
                correct / len(valid_features_dict), UA))
            logging.warning('saving model (WA:{},UA:{})\n'.format(
                correct / len(valid_features_dict), UA))
            model.save_weights('./models/{}'.format(MODEL_NAME))
            maxACC = ACC
        print('label_correct:{}\nUA:{}'.format(label_correct, label_acc))
        print('maxWA:{}\nmaxUA:{}'.format(maxWA, maxUA))
        logging.warning('label_correct:{}\nUA:{}'.format(
            label_correct, label_acc))
        logging.warning('maxWA:{}\nmaxUA:{}'.format(maxWA, maxUA))

    print('end training on seed:{}'.format(SEED))
    logging.warning('end training on seed:{}'.format(SEED))
    del model
Ejemplo n.º 10
0
    return val_dict


if __name__ == '__main__':
    NOISE_TYPE = [NOISE_TYPE_0, NOISE_TYPE_1, NOISE_TYPE_2, NOISE_TYPE_3]
    for noise_type in range(len(NOISE_TYPE)):
        for noise_proportion in range(TRAIN_DIVIDE):
            for seed in SEED:
                MODEL_NAME = 'MACNN_NoiseAdditional_CL_{}_seed{}'.format(
                    noise_type, seed, Amplitude_Factor)
                MODEL_PATH = 'models/{}_{}.pth'.format(MODEL_NAME,
                                                       FEATURES_TO_USE)
                learning_rate = 0.001
                setup_seed(seed)

                feature_extractor = features.FeatureExtractor(rate=RATE)
                train_X, train_y, val_dict = process_data(
                    WAV_PATH,
                    t=T_stride,
                    train_overlap=T_overlop,
                    noise_type=NOISE_TYPE[3],
                    train_divide=TRAIN_DIVIDE,
                    noise_proportion=noise_proportion,
                    noise_path=NOISE_PATH,
                    seed=seed)
                train_X_features = feature_extractor.get_features(
                    FEATURES_TO_USE, train_X)
                valid_features_dict = {}
                for _, i in enumerate(val_dict):
                    X1 = feature_extractor.get_features(
                        FEATURES_TO_USE, val_dict[i]['X'])