def generate(data): global video_url global path global json_result global keywords global text global result # Transcription and Cleaning text = youtube_transcribe(video_url) # Keywords Extractor keywords = get_keywords(text, 15) print('\nKeywords:\n', keywords) fp = open("keywords.txt", "w") fp.write("\n".join(keywords)) fp.close() json_result = web_scrape(keywords) # Summarization # Percentage of summary - input # percentage=int(input()) result = summary(text, 50) fh = open("summary.txt", "w") fh.write(result) fh.close()
from keyframes import Image_Processing from text_recognition_and_extraction import text_recognition import io import pytesseract # Path to your tesseract executable pytesseract.pytesseract.tesseract_cmd = r'G:\himanshu\Tesseract-OCR\tesseract.exe' if __name__ == '__main__': # Transcription and Cleaning url = input("Enter the URL = ") text = youtube_transcribe(url) # Keywords Extractor keywords = get_keywords(text, 15) print('\nKeywords:\n', keywords) # Summarization percentage = int( input( "Enter the percentage of information in text you want as summary : " )) result = summary(text, percentage) print('\nSummary:\n', result) # Keyframe Extraction Image_Processing(url, keywords) print("Images Extracted in 'out' folder") # Text Recognition And Extraction