def main(): # Transcription and Cleaning url = input("Enter the URL = ") sec = pafy.new(url).length print(f"\nVideo duration in sec = {sec}\n") # THRESHOLDS DYNAMIC_INTERVAL = (sec / 60) * 100 if sec <= 900: # 0-15 min NUM_KEYWORDS = 15 SUMMARY_PERCENT = 60 NON_TEXT_LEN = 50 SIMILAR_DISTANCE = 20 INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL SENTENCE_SIMILARITY = 0.35 WORDS_PER_PARA = 20 PERCENT_REDUCE = 0.6 SENTENCES_PER_PARA = 6 # HEADING_TRAINING = 500 # TOP_HEADINGS = 3 elif 900 < sec <= 1800: # 15-30 min NUM_KEYWORDS = 18 SUMMARY_PERCENT = 50 NON_TEXT_LEN = 50 SIMILAR_DISTANCE = 20 INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL SENTENCE_SIMILARITY = 0.35 WORDS_PER_PARA = 20 PERCENT_REDUCE = 0.6 SENTENCES_PER_PARA = 5 # HEADING_TRAINING = 500 # TOP_HEADINGS = 3 elif 1800 < sec <= 2700: # 30-45 min NUM_KEYWORDS = 20 SUMMARY_PERCENT = 40 NON_TEXT_LEN = 50 SIMILAR_DISTANCE = 20 INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL SENTENCE_SIMILARITY = 0.35 WORDS_PER_PARA = 20 PERCENT_REDUCE = 0.6 SENTENCES_PER_PARA = 4 # HEADING_TRAINING = 500 # TOP_HEADINGS = 3 elif 2700 < sec <= 3600: # 45-60 min NUM_KEYWORDS = 22 SUMMARY_PERCENT = 35 NON_TEXT_LEN = 50 SIMILAR_DISTANCE = 20 INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL SENTENCE_SIMILARITY = 0.35 WORDS_PER_PARA = 20 PERCENT_REDUCE = 0.6 SENTENCES_PER_PARA = 4 # HEADING_TRAINING = 500 # TOP_HEADINGS = 3 elif 3600 < sec <= 7200: # 1-2 hr NUM_KEYWORDS = 25 SUMMARY_PERCENT = 30 NON_TEXT_LEN = 50 SIMILAR_DISTANCE = 20 INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL SENTENCE_SIMILARITY = 0.35 WORDS_PER_PARA = 20 PERCENT_REDUCE = 0.6 SENTENCES_PER_PARA = 4 # HEADING_TRAINING = 500 # TOP_HEADINGS = 3 else: # More than 2 hr NUM_KEYWORDS = 30 SUMMARY_PERCENT = 25 NON_TEXT_LEN = 50 SIMILAR_DISTANCE = 20 INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL SENTENCE_SIMILARITY = 0.35 WORDS_PER_PARA = 20 PERCENT_REDUCE = 0.6 SENTENCES_PER_PARA = 4 # HEADING_TRAINING = 500 # TOP_HEADINGS = 3 start = time.perf_counter() yt = YoutubeTranscribe(url) text = yt.youtube_transcribe() # Keywords Extractor num_keywords = NUM_KEYWORDS words = KeywordsExtractor(text, num_keywords) keywords = words.ExtractKeywords() print(f'\nKeywords:\n {keywords}') # Summarization summ = Summarizer() percentage = SUMMARY_PERCENT summary_result = summ.summary(text, percentage) print(f'\nSummary:\n {summary_result}') # Keyframe Extraction (Output : 'out' folder) print("\nExtracting Keyframes\n") ip = ImageProcessing(url, keywords) ip.img_processing(text_threshold=NON_TEXT_LEN, dis_threshold=SIMILAR_DISTANCE, jump=INTERVAL_KEYFRAMES) # Paragraph and Headings (Output : paragraph_headings.txt) print("\nGenerating Paragraphs and Headings\n") pf = ParaFormation(summary_result) list_para = pf.paragraph(similarity_threshold=SENTENCE_SIMILARITY, word_threshold=WORDS_PER_PARA, percent_reduce=PERCENT_REDUCE) ph = ParaHeadings(list_para) # title_para = ph.get_titles_paras(sentence_threshold = SENTENCES_PER_PARA, training = HEADING_TRAINING, heading_threshold = TOP_HEADINGS) title_para = ph.get_titles_paras(sentence_threshold=SENTENCES_PER_PARA) # Final Notes (Includes Web Scraping) print("\nGenerating Final Notes\n") scraped_results = Scrapper(keywords, 2, 2, 2) s = scraped_results.web_scrape() notes = Notes(url, s) notes.generate_notes() print("\nBrevis-Notes.docx and Brevis-Notes.pdf(on Windows) Generated\n") if os.path.exists('res'): shutil.rmtree('res') finish = time.perf_counter() print(f'Serial: Finished in {round(finish-start, 2)} second(s)')
def main(): url = input("Enter the URL = ") #Thresholds sec = pafy.new(url).length print(f"\nVideo duration in sec = {sec}\n") # THRESHOLDS DYNAMIC_INTERVAL = (sec/60) * 100 if sec <= 900: # 0-15 min NUM_KEYWORDS = 15 SUMMARY_PERCENT = 60 NON_TEXT_LEN = 50 SIMILAR_DISTANCE = 20 INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL SENTENCE_SIMILARITY = 0.35 WORDS_PER_PARA = 20 PERCENT_REDUCE = 0.6 SENTENCES_PER_PARA = 6 # HEADING_TRAINING = 500 # TOP_HEADINGS = 3 elif 900 < sec <= 1800: # 15-30 min NUM_KEYWORDS = 18 SUMMARY_PERCENT = 50 NON_TEXT_LEN = 50 SIMILAR_DISTANCE = 20 INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL SENTENCE_SIMILARITY = 0.35 WORDS_PER_PARA = 20 PERCENT_REDUCE = 0.6 SENTENCES_PER_PARA = 5 # HEADING_TRAINING = 500 # TOP_HEADINGS = 3 elif 1800 < sec <= 2700: # 30-45 min NUM_KEYWORDS = 20 SUMMARY_PERCENT = 40 NON_TEXT_LEN = 50 SIMILAR_DISTANCE = 20 INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL SENTENCE_SIMILARITY = 0.35 WORDS_PER_PARA = 20 PERCENT_REDUCE = 0.6 SENTENCES_PER_PARA = 4 # HEADING_TRAINING = 500 # TOP_HEADINGS = 3 elif 2700 < sec <= 3600: # 45-60 min NUM_KEYWORDS = 22 SUMMARY_PERCENT = 35 NON_TEXT_LEN = 50 SIMILAR_DISTANCE = 20 INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL SENTENCE_SIMILARITY = 0.35 WORDS_PER_PARA = 20 PERCENT_REDUCE = 0.6 SENTENCES_PER_PARA = 4 # HEADING_TRAINING = 500 # TOP_HEADINGS = 3 elif 3600 < sec <= 7200: # 1-2 hr NUM_KEYWORDS = 25 SUMMARY_PERCENT = 30 NON_TEXT_LEN = 50 SIMILAR_DISTANCE = 20 INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL SENTENCE_SIMILARITY = 0.35 WORDS_PER_PARA = 20 PERCENT_REDUCE = 0.6 SENTENCES_PER_PARA = 4 # HEADING_TRAINING = 500 # TOP_HEADINGS = 3 else: # More than 2 hr NUM_KEYWORDS = 30 SUMMARY_PERCENT = 25 NON_TEXT_LEN = 50 SIMILAR_DISTANCE = 20 INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL SENTENCE_SIMILARITY = 0.35 WORDS_PER_PARA = 20 PERCENT_REDUCE = 0.6 SENTENCES_PER_PARA = 4 # HEADING_TRAINING = 500 # TOP_HEADINGS = 3 #Starting the timer start = time.perf_counter() # Transcription and Cleaning yt = YoutubeTranscribe(url) text = yt.youtube_transcribe() #Declaring a multiprocessing queue to exchange data between various functions Q=multiprocessing.Queue() #Running keywords and summary Processes parallely key_ext=multiprocessing.Process(target=Process_Extract_Keywords , args=(url,text,Q,NUM_KEYWORDS,NON_TEXT_LEN,SIMILAR_DISTANCE,INTERVAL_KEYFRAMES)) summ_ext=multiprocessing.Process(target=Process_Get_Summary , args=(text,SUMMARY_PERCENT,SENTENCE_SIMILARITY,WORDS_PER_PARA,PERCENT_REDUCE,SENTENCES_PER_PARA)) #Starting both process simultaneously key_ext.start() summ_ext.start() #Checking if the process have finished execution key_ext.join() summ_ext.join() #Fetching scraped links from the Queue scraped_res = Q.get() #Generating final notes notes = Notes(url,scraped_res) notes.generate_notes() print("\nBrevis-Notes.docx and Brevis-Notes.pdf(on Windows) Generated\n") #Removing the temporary res folder if os.path.exists('res'): shutil.rmtree('res') #Stopping the timer end=time.perf_counter() #Printing the time taken by the program for execution print(f"Finished in {round(end-start, 3)} second(s)")
def main(): # Parallel url = input("Enter the URL = ") start = time.perf_counter() # Transcription and Cleaning yt = YoutubeTranscribe(url) text = yt.youtube_transcribe() # Level1 with ThreadPoolExecutor() as executor: ''' Type1: level1_results1 = executor.submit(Test(10,20).RecArea) print(type(level1_results1)) print(dir(level1_results1)) print(level1_results1.result()) Type2: level1_results1 = list(executor.map(Test().RecArea,[10],[20])) print(level1_results1[0]) ''' # Keywords Extractor # num_keywords=int(input("Enter number of keywords to be extracted : ")) num_keywords = 10 level1_results1 = executor.submit( KeywordsExtractor(text, num_keywords).ExtractKeywords) # Summarization percentage = 40 level1_results2 = list( executor.map(Summarizer().summary, [text], [percentage])) print(f"\nKeywords:\n {level1_results1.result()}") print(f"\nSummary:\n {level1_results2[0]}") # Level2 with ThreadPoolExecutor() as executor: # Keyframe Extraction (Output : 'out' folder) print("\nExtracting Keyframes\n") level2_results1 = list( executor.map( ImageProcessing(url, level1_results1.result()).img_processing, [50], [20], [1000])) # Paragraph and Headings (Output : paragraph_headings.txt) print("\nGenerating Paragraphs and Headings\n") level2_results2 = executor.submit( ParaFormation(level1_results2[0]).paragraph) print("\nScraping Web\n") level2_results3 = executor.submit( Scrapper(level1_results1.result(), 2, 2, 2).web_scrape) print(len(os.listdir(os.path.join('res', 'out'))), "images extracted in 'out' folder") ph = ParaHeadings(level2_results2.result()) title_para = ph.get_titles_paras(sentence_threshold=2) # Final Notes notes = Notes(url, level2_results3.result()) notes.generate_notes() print("\nBrevis-Notes.docx and Brevis-Notes.pdf(on Windows) Generated\n") if os.path.exists('res'): shutil.rmtree('res') finish = time.perf_counter() print(f'Parallel: Finished in {round(finish-start, 2)} second(s)')
def gen(): global video_url global keywords global path global json_result global text global summary_result global scrape_json global option sec = pafy.new(video_url).length print(f"\nVideo duration in sec = {sec}\n") # THRESHOLDS DYNAMIC_INTERVAL = (sec / 60) * 100 if sec <= 900: # 0-15 min NUM_KEYWORDS = 15 SUMMARY_PERCENT = 60 NON_TEXT_LEN = 50 SIMILAR_DISTANCE = 20 INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL SENTENCE_SIMILARITY = 0.35 WORDS_PER_PARA = 20 PERCENT_REDUCE = 0.6 SENTENCES_PER_PARA = 6 # HEADING_TRAINING = 500 # TOP_HEADINGS = 3 elif 900 < sec <= 1800: # 15-30 min NUM_KEYWORDS = 18 SUMMARY_PERCENT = 50 NON_TEXT_LEN = 50 SIMILAR_DISTANCE = 20 INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL SENTENCE_SIMILARITY = 0.35 WORDS_PER_PARA = 20 PERCENT_REDUCE = 0.6 SENTENCES_PER_PARA = 5 # HEADING_TRAINING = 500 # TOP_HEADINGS = 3 elif 1800 < sec <= 2700: # 30-45 min NUM_KEYWORDS = 20 SUMMARY_PERCENT = 40 NON_TEXT_LEN = 50 SIMILAR_DISTANCE = 20 INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL SENTENCE_SIMILARITY = 0.35 WORDS_PER_PARA = 20 PERCENT_REDUCE = 0.6 SENTENCES_PER_PARA = 4 # HEADING_TRAINING = 500 # TOP_HEADINGS = 3 elif 2700 < sec <= 3600: # 45-60 min NUM_KEYWORDS = 22 SUMMARY_PERCENT = 35 NON_TEXT_LEN = 50 SIMILAR_DISTANCE = 20 INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL SENTENCE_SIMILARITY = 0.35 WORDS_PER_PARA = 20 PERCENT_REDUCE = 0.6 SENTENCES_PER_PARA = 4 # HEADING_TRAINING = 500 # TOP_HEADINGS = 3 elif 3600 < sec <= 7200: # 1-2 hr NUM_KEYWORDS = 25 SUMMARY_PERCENT = 30 NON_TEXT_LEN = 50 SIMILAR_DISTANCE = 20 INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL SENTENCE_SIMILARITY = 0.35 WORDS_PER_PARA = 20 PERCENT_REDUCE = 0.6 SENTENCES_PER_PARA = 4 # HEADING_TRAINING = 500 # TOP_HEADINGS = 3 else: # More than 2 hr NUM_KEYWORDS = 30 SUMMARY_PERCENT = 25 NON_TEXT_LEN = 50 SIMILAR_DISTANCE = 20 INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL SENTENCE_SIMILARITY = 0.35 WORDS_PER_PARA = 20 PERCENT_REDUCE = 0.6 SENTENCES_PER_PARA = 4 # HEADING_TRAINING = 500 # TOP_HEADINGS = 3 start = time.perf_counter() # if option == "Overview": # percentage = 50 # elif option == "Notes": # percentage = 60 # elif option == "Notes+Ref": # percentage = 80 #Running keywords and summary Processes parallely key_ext = multiprocessing.Process(target=Process_Extract_Keywords, args=(video_url, text, NUM_KEYWORDS, NON_TEXT_LEN, SIMILAR_DISTANCE, INTERVAL_KEYFRAMES)) summ_ext = multiprocessing.Process( target=Process_Get_Summary, args=(text, SUMMARY_PERCENT, SENTENCE_SIMILARITY, WORDS_PER_PARA, PERCENT_REDUCE, SENTENCES_PER_PARA)) #Starting both process simultaneously key_ext.start() summ_ext.start() #Checking if the process have finished execution key_ext.join() summ_ext.join() if option == "Overview" or option == "Notes": scrape_json = {} #Generating final notes notes = Notes(video_url, scrape_json) notes.generate_notes() print("\nBrevis-Notes.docx and Brevis-Notes.pdf(on Windows) Generated\n") with ZipFile('Brevis_Notes.zip', 'w') as zip: print("Writing zip") if os.path.exists(os.path.join('res', 'Brevis-Notes.pdf')): zip.write(os.path.join('res', 'Brevis-Notes.pdf'), arcname='Brevis-Notes.pdf') zip.write(os.path.join('res', 'Brevis-Notes.docx'), arcname='Brevis-Notes.docx') path = os.path.abspath("Brevis_Notes.zip") if os.path.exists('res'): shutil.rmtree('res') finish = time.perf_counter() print(f'Gen Function: Finished in {round(finish-start, 2)} second(s)')
def gen(): global video_url global keywords global path global json_result global text global summary_result global scrape_json global option sec = pafy.new(video_url).length print(f"\nVideo duration in sec = {sec}\n") # THRESHOLDS DYNAMIC_INTERVAL = (sec / 60) * 100 if sec <= 900: # 0-15 min NUM_KEYWORDS = 15 SUMMARY_PERCENT = 60 NON_TEXT_LEN = 50 SIMILAR_DISTANCE = 20 INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL SENTENCE_SIMILARITY = 0.35 WORDS_PER_PARA = 20 PERCENT_REDUCE = 0.6 SENTENCES_PER_PARA = 6 # HEADING_TRAINING = 500 # TOP_HEADINGS = 3 elif 900 < sec <= 1800: # 15-30 min NUM_KEYWORDS = 18 SUMMARY_PERCENT = 50 NON_TEXT_LEN = 50 SIMILAR_DISTANCE = 20 INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL SENTENCE_SIMILARITY = 0.35 WORDS_PER_PARA = 20 PERCENT_REDUCE = 0.6 SENTENCES_PER_PARA = 5 # HEADING_TRAINING = 500 # TOP_HEADINGS = 3 elif 1800 < sec <= 2700: # 30-45 min NUM_KEYWORDS = 20 SUMMARY_PERCENT = 40 NON_TEXT_LEN = 50 SIMILAR_DISTANCE = 20 INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL SENTENCE_SIMILARITY = 0.35 WORDS_PER_PARA = 20 PERCENT_REDUCE = 0.6 SENTENCES_PER_PARA = 4 # HEADING_TRAINING = 500 # TOP_HEADINGS = 3 elif 2700 < sec <= 3600: # 45-60 min NUM_KEYWORDS = 22 SUMMARY_PERCENT = 35 NON_TEXT_LEN = 50 SIMILAR_DISTANCE = 20 INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL SENTENCE_SIMILARITY = 0.35 WORDS_PER_PARA = 20 PERCENT_REDUCE = 0.6 SENTENCES_PER_PARA = 4 # HEADING_TRAINING = 500 # TOP_HEADINGS = 3 elif 3600 < sec <= 7200: # 1-2 hr NUM_KEYWORDS = 25 SUMMARY_PERCENT = 30 NON_TEXT_LEN = 50 SIMILAR_DISTANCE = 20 INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL SENTENCE_SIMILARITY = 0.35 WORDS_PER_PARA = 20 PERCENT_REDUCE = 0.6 SENTENCES_PER_PARA = 4 # HEADING_TRAINING = 500 # TOP_HEADINGS = 3 else: # More than 2 hr NUM_KEYWORDS = 30 SUMMARY_PERCENT = 25 NON_TEXT_LEN = 50 SIMILAR_DISTANCE = 20 INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL SENTENCE_SIMILARITY = 0.35 WORDS_PER_PARA = 20 PERCENT_REDUCE = 0.6 SENTENCES_PER_PARA = 4 # HEADING_TRAINING = 500 # TOP_HEADINGS = 3 start = time.perf_counter() if option == "Overview": if not os.path.exists(os.path.join('res', 'out')): os.mkdir(os.path.join('res', 'out')) elif option == "Notes" or option == "Notes+Ref": # Keyframe Extraction (Output : 'out' folder) print("\nExtracting Keyframes\n") ip = ImageProcessing(video_url, keywords) ip.img_processing(text_threshold=NON_TEXT_LEN, dis_threshold=SIMILAR_DISTANCE, jump=INTERVAL_KEYFRAMES) # Paragraph and Headings (Output : paragraph_headings.txt) print("\nGenerating Paragraphs and Headings\n") pf = ParaFormation(summary_result) list_para = pf.paragraph(similarity_threshold=SENTENCE_SIMILARITY, word_threshold=WORDS_PER_PARA, percent_reduce=PERCENT_REDUCE) ph = ParaHeadings(list_para) title_para = ph.get_titles_paras(sentence_threshold=SENTENCES_PER_PARA) # Final Notes (Includes Web Scraping) print("\nGenerating Final Notes\n") if option == "Overview" or option == "Notes": scrape_json = {} #scraped_results = Scrapper(scrape_keywords,2,2,2) #s = scraped_results.web_scrape() notes = Notes(video_url, scrape_json) notes.generate_notes() print("\nBrevis-Notes.docx Generated\n") with ZipFile('Brevis_Notes.zip', 'w') as zip: print("Writing zip") if os.path.exists(os.path.join('res', 'Brevis-Notes.pdf')): zip.write(os.path.join('res', 'Brevis-Notes.pdf'), arcname='Brevis-Notes.pdf') zip.write(os.path.join('res', 'Brevis-Notes.docx'), arcname='Brevis-Notes.docx') path = os.path.abspath("Brevis_Notes.zip") if os.path.exists('res'): shutil.rmtree('res') finish = time.perf_counter() print(f'Gen Function: Finished in {round(finish-start, 2)} second(s)')