Ejemplo n.º 1
0
def main():
  args = parser.parse_args()
  print("50")
  if args.input_image_glob:
    if args.input_video_path:
      raise ValueError(
          "--input_image_glob and --input_video_path are mutually exclusive")
    if args.frame_rate is None:
      raise ValueError(
          "When --input_image_glob is provided, --frame_rate must be provided")
    frame_generator = object_detection.read_images(
        args.input_image_glob, frame_rate=args.frame_rate)
    timestep_s = 1.0 / args.frame_rate
  else:
    if not args.input_video_path:
      raise ValueError(
          "One of --input_image_glob and --input_video_path must be provided")
    frame_generator = object_detection.read_video_file(args.input_video_path)
    timestep_s = 1.0 / object_detection.get_video_fps(args.input_video_path)
      # TODO(cais): Support variable frame rate in video file.
  events = object_detection.detect_objects(frame_generator)

  tsv_rows = events_lib.convert_events_to_tsv_rows(
      events,
      tsv_data.VISUAL_OBJECTS_EVENTS_TIER,
      timestep_s=timestep_s)
  with open(args.output_tsv_path, mode="w") as f:
    tsv_writer = csv.writer(f, delimiter="\t")
    tsv_writer.writerow(tsv_data.COLUMN_HEADS)
    for row in tsv_rows:
      tsv_writer.writerow(row)
Ejemplo n.º 2
0
def main():
    args = parser.parse_args()

    wav_paths = sorted(args.input_wav_paths.split(","))
    events = []
    for wav_path in wav_paths:
        fs, xs = wavfile.read(wav_path)
        if len(xs.shape) != 1:
            raise ValueError("Only mono audio is supported")

        # TODO(#35): Resapmle waveform if fs doesn't meet YAMNet requirement.
        def waveform_generator():
            step_length = 16000
            i = 0
            while i < len(xs):
                yield xs[i:i + step_length]
                i += step_length

        events.extend(
            audio_events.extract_audio_events(waveform_generator,
                                              fs=fs,
                                              threshold_score=0.5))

    tsv_rows = events_lib.convert_events_to_tsv_rows(
        events,
        tsv_data.AUDIO_EVENTS_TIER,
        ignore_class_names=audio_events.YAMNET_IGNORE_CLASS_NAMES)
    with open(args.output_tsv_path, mode="w") as f:
        tsv_writer = csv.writer(f, delimiter="\t")
        tsv_writer.writerow(tsv_data.COLUMN_HEADS)
        for row in tsv_rows:
            tsv_writer.writerow(row)
Ejemplo n.º 3
0
 def testConvertEventsToTsvRows_withFinalEmptyClasses_ignoresSilence(self):
     events = [[("Speech", 0.9)], [("Speech", 0.95)], [("Silence", 0.99)],
               [("Hands", 0.55)], [("Hands", 0.6)], []]
     rows = events_lib.convert_events_to_tsv_rows(
         events, "AudioEvents1", ignore_class_names=("Silence", ))
     self.assertEqual(rows, [
         (0.0, 2.0, "AudioEvents1", "Speech"),
         (3.0, 5.0, "AudioEvents1", "Hands"),
     ])
Ejemplo n.º 4
0
 def testConvertEventsToTsvRows_withOverlapping(self):
     events = [
         [("Speech", 0.6)],
         [("Speech", 0.6), ("Music", 0.3)],
         [("Speech", 0.5), ("Music", 0.4)],
         [("Music", 0.7)],
         [("Music", 0.8)],
     ]
     rows = events_lib.convert_events_to_tsv_rows(events, "AudioEvents1")
     self.assertEqual(rows, [
         (0.0, 3.0, "AudioEvents1", "Speech"),
         (1.0, 5.0, "AudioEvents1", "Music"),
     ])
Ejemplo n.º 5
0
 def testConvertEventsToTsvRows_singleEventTypeAtATime(self):
     events = [
         [("Speech", 0.9)],
         [("Speech", 0.95)],
         [],
         [("Hands", 0.55)],
         [("Hands", 0.6)],
     ]
     rows = events_lib.convert_events_to_tsv_rows(events, "AudioEvents1")
     self.assertEqual(rows, [
         (0.0, 2.0, "AudioEvents1", "Speech"),
         (3.0, 5.0, "AudioEvents1", "Hands"),
     ])
Ejemplo n.º 6
0
 def testCustomTimestep(self):
     events = [
         [("Speech", 0.6)],
         [("Speech", 0.6), ("Music", 0.3)],
         [("Speech", 0.5), ("Music", 0.4)],
         [("Music", 0.7)],
         [("Music", 0.8)],
     ]
     rows = events_lib.convert_events_to_tsv_rows(events,
                                                  "AudioEvents1",
                                                  timestep_s=2.5)
     self.assertEqual(rows, [
         (0.0, 7.5, "AudioEvents1", "Speech"),
         (2.5, 12.5, "AudioEvents1", "Music"),
     ])