Python Detection.detect_subtitle_region Beispiele

Programmiersprache: Python

Namespace / Paketname: detection

Klasse / Typ: Detection

Methode / Funktion: detect_subtitle_region

Beispiele auf hotexamples.com: 2

Python Detection.detect_subtitle_region - 2 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die detection.Detection.detect_subtitle_region, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

Detection(30)

CNN_face_detection(4)

metrics(4)

detect(4)

get_active_cell(3)

run(3)

faces(2)

get_subtitle_region(2)

encode(2)

is_item_detected_in_image(2)

detect_subtitle_region(2)

get_char_width(2)

get_char_dist(2)

detect_char_regions(2)

cameraId(2)

motion_detection(1)

mouth_detection(1)

load_cascades(1)

load_CNN_detector(1)

object_detection(1)

predict(1)

process_results(1)

get_grouped_snippets(1)

get_frame_groups(1)

read_testing_sets(1)

get_face(1)

set_coord_rect(1)

mc_run(1)

find_opencv_objects(1)

get_disallowed_process_running(1)

detect_monster(1)

add_label(1)

add_prediction(1)

countours1(1)

create_segment_mask(1)

crowd_detection(1)

detect_age_gender(1)

detect_image(1)

detect_object(1)

from_csv(1)

detect_objects(1)

detect_video(1)

detection_process(1)

draw_rectangles(1)

evaluate(1)

eyes_detection(1)

face_detection(1)

set_pitch_dims(1)

Beispiel #1

Datei anzeigen

Datei: main.py Projekt: witcher425/mygit

def main():
    os.environ["TF_CPP_MIN_LOG_LEVEL"] = "1"

    with open("config.yml", "r") as config_file:
        cfg = yaml.load(config_file)

    det_cfg = cfg["detection"]
    rec_cfg = cfg["recognition"]

    logging.basicConfig(
        format="%(asctime)s %(module)-12s %(levelname)-8s %(message)s",
        level=cfg["log_level"])

    logging.info("Starting detection")

    detection = Detection(det_cfg)

    found_frames = detection.detect_subtitle_region(cfg["video"])

    y_start, y_end = detection.get_subtitle_region()
    char_width = detection.get_char_width()
    char_dist = detection.get_char_dist()
    if char_width == 0 or char_dist == 0:
        logging.error("Char width is 0")
        return

    logging.info(
        "Found y pos ({}, {}), character width {}, character distance {}".
        format(y_start, y_end, char_width, char_dist))

    recognition = Recognition(rec_cfg["model"], rec_cfg["weights"],
                              rec_cfg["dictionary"])

    cyk = True
    for index, f in enumerate(FONTS):
        font = load_font(f, char_width)
        font2 = load_font(f, char_width // 2)
    if font is None:
        logging.error("No CYK font found")
        cyk = False
    else:
        logging.info("Loaded font {}".format(FONTS[index]))

    for frame in found_frames:
        text = []
        img = Image.fromarray(frame)
        draw = ImageDraw.Draw(img)
        for char_region, start, stop in detection.detect_char_regions(
                frame[y_start:y_end, ]):
            res = recognition.recognize_character(char_region)
            text.append((start, stop, res[1], res[2]))

        for start, stop, char, prob in text:
            draw.rectangle([(start, y_start), (stop, y_end)],
                           outline=RECTANGLE_COLOR)
            draw.rectangle([(start + 1, y_start + 1), (stop - 1, y_end - 1)],
                           outline=RECTANGLE_COLOR)
            draw.rectangle([(start + 2, y_start + 2), (stop - 2, y_end - 2)],
                           outline=RECTANGLE_COLOR)

            probability = str(int(prob * 100)) + "%"
            if cyk:
                draw.text((start, y_start - (stop - start)),
                          char,
                          fill=FONT_COLOR,
                          font=font)
                draw.text((start, y_start - 1.5 * (stop - start)),
                          probability,
                          fill=FONT_COLOR,
                          font=font2)
            else:
                logging.info("Detected character {} ({})".format(
                    char, probability))

        cv2.imshow('image', np.array(img))
        cv2.waitKey(0)
        cv2.destroyAllWindows()

Beispiel #2

Datei anzeigen

Datei: main_part.py Projekt: drearycold/chinese-subtitle-ocr

def main():
    os.environ["TF_CPP_MIN_LOG_LEVEL"] = "1"

    with open(sys.argv[1], "r") as config_file:
        cfg = yaml.safe_load(config_file)

    print(str(cfg))

    det_cfg = cfg["detection"]
    rec_cfg = cfg["recognition"]

    logging.basicConfig(format="%(asctime)s %(module)-12s %(levelname)-8s %(message)s")

    logging.warn("Starting detection")


    detection = Detection(det_cfg)

    found_frames = detection.detect_subtitle_region(cfg["video"])

    y_start, y_end = detection.get_subtitle_region()
    char_width = detection.get_char_width()
    char_dist = detection.get_char_dist()
    if char_width == 0 or char_dist == 0:
        logging.error("Char width is 0")
        return

    logging.warn(
        "Found y pos ({}, {}), character width {}, character distance {}".format(y_start, y_end, char_width, char_dist))

    recognition = Recognition(rec_cfg["model"], rec_cfg["weights"], rec_cfg["dictionary"])

    cyk = True
    for index, f in enumerate(FONTS):
        font = load_font(f, char_width)
        font2 = load_font(f, char_width // 2)
    if font is None:
        logging.error("No CYK font found")
        cyk = False
    else:
        logging.warn("Loaded font {}".format(FONTS[index]))

    cap = cv2.VideoCapture(cfg["video"])
    save_image_seq = cfg["video_offset_start"]
    save_image_seq_end = cfg["video_offset_end"]
    cap.set(cv2.CAP_PROP_POS_FRAMES, save_image_seq)
    vout = cv2.VideoWriter(cfg["output_sub_video"], cv2.VideoWriter_fourcc(*'mp4v'), 29.97, (1920,1080-y_start+120))
    vout.set(cv2.VIDEOWRITER_PROP_QUALITY, 0.1)
    print(vout)

    custom_config = r'--psm 7 -l chi_sim'
    frames_ocr = {}
    #for frame in found_frames:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        text = []
        img = Image.fromarray(frame)
        draw = ImageDraw.Draw(img)
        x_start = 1920
        x_end = 0
        for char_region, start, stop in detection.detect_char_regions(frame[y_start:y_end, ], save_image=False, save_image_name="fill/seq_{}_{:06d}.tiff".format("{}", save_image_seq)):
            if x_start > start:
                x_start = start
            if x_end < stop:
                x_end = stop
            continue
            res = recognition.recognize_character(char_region)
            text.append((start, stop, res[1], res[2]))
            logging.warn("Detected Region {} {} in ({} {})".format(start, stop, y_start, y_end))

        save_image_seq += 1
        if save_image_seq > save_image_seq_end:
            break

        for start, stop, char, prob in text:
            draw.rectangle([(start, y_start), (stop, y_end)], outline=RECTANGLE_COLOR)
            draw.rectangle([(start + 1, y_start + 1), (stop - 1, y_end - 1)], outline=RECTANGLE_COLOR)
            draw.rectangle([(start + 2, y_start + 2), (stop - 2, y_end - 2)], outline=RECTANGLE_COLOR)

            probability = str(int(prob * 100)) + "%"
            if cyk:
                draw.text((start, y_start - (stop - start)), char, fill=FONT_COLOR, font=font)
                draw.text((start, y_start - 1.5 * (stop - start)), probability, fill=FONT_COLOR, font=font2)
            
            #logging.warn("Detected character {} ({})".format(char, probability))

        #cv2.imshow('image', np.array(img))
        #cv2.resizeWindow('image', int(1920/2), int(1080/2))
        #cv2.waitKey(0)
        #cv2.destroyAllWindows()

        vout.write(frame[y_start-120:1080, ])
        if x_start < x_end:
            gray = cv2.cvtColor(frame[y_start:y_end, x_start:x_end], cv2.COLOR_BGR2GRAY)
            #gray = img

            # threshhold
            ret,bin = cv2.threshold(gray,245,255,cv2.THRESH_BINARY)

            # closing
            kernel = np.ones((3,3),np.uint8)
            closing = cv2.morphologyEx(bin, cv2.MORPH_CLOSE, kernel)

            # invert black/white
            inv = cv2.bitwise_not(closing)

            img_rgb = cv2.cvtColor(inv, cv2.COLOR_GRAY2RGB)
            #print(img_rgb)
            data_xml = pytesseract.image_to_alto_xml(img_rgb, config=custom_config)
            print(str(save_image_seq) + " " + data_xml.decode('utf-8'))
            #print(str(i) + " " + json.dumps(data_xml.decode('utf-8')))
            frames_ocr[save_image_seq] = data_xml.decode('utf-8')

    cap.release()
    vout.release()

    with open(cfg['output_sub_ocr'], 'w') as outfile:
        json.dump(frames_ocr, outfile, sort_keys=True, indent=2)