def main(): args = parser.parse_args() print("50") if args.input_image_glob: if args.input_video_path: raise ValueError( "--input_image_glob and --input_video_path are mutually exclusive") if args.frame_rate is None: raise ValueError( "When --input_image_glob is provided, --frame_rate must be provided") frame_generator = object_detection.read_images( args.input_image_glob, frame_rate=args.frame_rate) timestep_s = 1.0 / args.frame_rate else: if not args.input_video_path: raise ValueError( "One of --input_image_glob and --input_video_path must be provided") frame_generator = object_detection.read_video_file(args.input_video_path) timestep_s = 1.0 / object_detection.get_video_fps(args.input_video_path) # TODO(cais): Support variable frame rate in video file. events = object_detection.detect_objects(frame_generator) tsv_rows = events_lib.convert_events_to_tsv_rows( events, tsv_data.VISUAL_OBJECTS_EVENTS_TIER, timestep_s=timestep_s) with open(args.output_tsv_path, mode="w") as f: tsv_writer = csv.writer(f, delimiter="\t") tsv_writer.writerow(tsv_data.COLUMN_HEADS) for row in tsv_rows: tsv_writer.writerow(row)
def main(): args = parser.parse_args() wav_paths = sorted(args.input_wav_paths.split(",")) events = [] for wav_path in wav_paths: fs, xs = wavfile.read(wav_path) if len(xs.shape) != 1: raise ValueError("Only mono audio is supported") # TODO(#35): Resapmle waveform if fs doesn't meet YAMNet requirement. def waveform_generator(): step_length = 16000 i = 0 while i < len(xs): yield xs[i:i + step_length] i += step_length events.extend( audio_events.extract_audio_events(waveform_generator, fs=fs, threshold_score=0.5)) tsv_rows = events_lib.convert_events_to_tsv_rows( events, tsv_data.AUDIO_EVENTS_TIER, ignore_class_names=audio_events.YAMNET_IGNORE_CLASS_NAMES) with open(args.output_tsv_path, mode="w") as f: tsv_writer = csv.writer(f, delimiter="\t") tsv_writer.writerow(tsv_data.COLUMN_HEADS) for row in tsv_rows: tsv_writer.writerow(row)
def testConvertEventsToTsvRows_withFinalEmptyClasses_ignoresSilence(self): events = [[("Speech", 0.9)], [("Speech", 0.95)], [("Silence", 0.99)], [("Hands", 0.55)], [("Hands", 0.6)], []] rows = events_lib.convert_events_to_tsv_rows( events, "AudioEvents1", ignore_class_names=("Silence", )) self.assertEqual(rows, [ (0.0, 2.0, "AudioEvents1", "Speech"), (3.0, 5.0, "AudioEvents1", "Hands"), ])
def testConvertEventsToTsvRows_withOverlapping(self): events = [ [("Speech", 0.6)], [("Speech", 0.6), ("Music", 0.3)], [("Speech", 0.5), ("Music", 0.4)], [("Music", 0.7)], [("Music", 0.8)], ] rows = events_lib.convert_events_to_tsv_rows(events, "AudioEvents1") self.assertEqual(rows, [ (0.0, 3.0, "AudioEvents1", "Speech"), (1.0, 5.0, "AudioEvents1", "Music"), ])
def testConvertEventsToTsvRows_singleEventTypeAtATime(self): events = [ [("Speech", 0.9)], [("Speech", 0.95)], [], [("Hands", 0.55)], [("Hands", 0.6)], ] rows = events_lib.convert_events_to_tsv_rows(events, "AudioEvents1") self.assertEqual(rows, [ (0.0, 2.0, "AudioEvents1", "Speech"), (3.0, 5.0, "AudioEvents1", "Hands"), ])
def testCustomTimestep(self): events = [ [("Speech", 0.6)], [("Speech", 0.6), ("Music", 0.3)], [("Speech", 0.5), ("Music", 0.4)], [("Music", 0.7)], [("Music", 0.8)], ] rows = events_lib.convert_events_to_tsv_rows(events, "AudioEvents1", timestep_s=2.5) self.assertEqual(rows, [ (0.0, 7.5, "AudioEvents1", "Speech"), (2.5, 12.5, "AudioEvents1", "Music"), ])