Ejemplo n.º 1
0
def get_recipe(url, use_filter):
    # target Binging with Babish video
    transcript = YouTubeTranscriptApi.get_transcript(url)

    # Get verbs from db
    verbs = []
    with open("../../datasets/cooking_verbs.txt", 'r') as verb_file:
        verbs = verb_file.readlines()

    # ingredient extraction ============================
    db = Ingredients()
    ingreds = set([])
    actual_ingredients = []
    res = {}
    i = 0
    print('>>> Getting ingredients')
    for subtitle in transcript:
        i += 1
        text = subtitle['text']
        cur_ingredients = db.parse_ingredients(text)
        measurements = db.parse_measurements(text)
        if len(cur_ingredients) > 0:
            print('>>> Ingredients from line ' + str(i) + ': ')
            print(cur_ingredients)
        ingreds |= set(cur_ingredients)
        actual_ingredients += get_actual_ingredients(cur_ingredients,
                                                     measurements)

    print('>>> Ingredients detected: ')
    print(actual_ingredients)
    res['ingredients'] = actual_ingredients
    # ==================================================
    video_file = download_video(url)

    # steps extraction =================================
    times = ['seconds', 'minutes', 'hours', 'second', 'minute', 'hour']
    instructions = []
    pictures = []
    if use_filter:
        print('>>> Generating steps')
        i = 0
        for subtitle in transcript:
            text = subtitle['text']

            # Write all lines
            # with open('before_filter.txt', 'a') as before_file:
            #     before_file.write(text + '\n')

            # Remove lines without an ingredient, cooking verb, or time measurement
            for target in (list(ingreds) + verbs):
                if (target in text or len([t for t in times if (t in text)])):
                    instructions.append({'step': text})
                    if i % PICTURE_FREQUENCY == 0:
                        pictures.append(subtitle['start'])
                    print('>>> KEEPING LINE: ' + text)
                    # with open('after_filter.txt', 'a') as after_file:
                    #     after_file.write(text + '\n')
                    #     break
                    i += 1
                    break
    res['instructions'] = instructions
    print('>>> Steps Generation Complete')
    # ==================================================

    # frames extraction ================================
    print('>>> Extracting Frames...')
    file_names = extract_frames(video_file, pictures)
    i = 0
    for file_name in file_names:
        res['instructions'][i]['image'] = file_name
        i += PICTURE_FREQUENCY
    if os.path.exists(video_file):
        os.remove(video_file)
    # ==================================================
    print('>>> finished. result: ')
    print(res)
    return res