Beispiel #1
0
def main():

    # Transcription and Cleaning
    url = input("Enter the URL = ")

    sec = pafy.new(url).length
    print(f"\nVideo duration in sec = {sec}\n")

    # THRESHOLDS

    DYNAMIC_INTERVAL = (sec / 60) * 100

    if sec <= 900:  # 0-15 min
        NUM_KEYWORDS = 15
        SUMMARY_PERCENT = 60
        NON_TEXT_LEN = 50
        SIMILAR_DISTANCE = 20
        INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL
        SENTENCE_SIMILARITY = 0.35
        WORDS_PER_PARA = 20
        PERCENT_REDUCE = 0.6
        SENTENCES_PER_PARA = 6
# 		HEADING_TRAINING = 500
# 		TOP_HEADINGS = 3

    elif 900 < sec <= 1800:  # 15-30 min
        NUM_KEYWORDS = 18
        SUMMARY_PERCENT = 50
        NON_TEXT_LEN = 50
        SIMILAR_DISTANCE = 20
        INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL
        SENTENCE_SIMILARITY = 0.35
        WORDS_PER_PARA = 20
        PERCENT_REDUCE = 0.6
        SENTENCES_PER_PARA = 5
# 		HEADING_TRAINING = 500
# 		TOP_HEADINGS = 3

    elif 1800 < sec <= 2700:  # 30-45 min
        NUM_KEYWORDS = 20
        SUMMARY_PERCENT = 40
        NON_TEXT_LEN = 50
        SIMILAR_DISTANCE = 20
        INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL
        SENTENCE_SIMILARITY = 0.35
        WORDS_PER_PARA = 20
        PERCENT_REDUCE = 0.6
        SENTENCES_PER_PARA = 4
# 		HEADING_TRAINING = 500
# 		TOP_HEADINGS = 3

    elif 2700 < sec <= 3600:  # 45-60 min
        NUM_KEYWORDS = 22
        SUMMARY_PERCENT = 35
        NON_TEXT_LEN = 50
        SIMILAR_DISTANCE = 20
        INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL
        SENTENCE_SIMILARITY = 0.35
        WORDS_PER_PARA = 20
        PERCENT_REDUCE = 0.6
        SENTENCES_PER_PARA = 4
# 		HEADING_TRAINING = 500
# 		TOP_HEADINGS = 3

    elif 3600 < sec <= 7200:  # 1-2 hr
        NUM_KEYWORDS = 25
        SUMMARY_PERCENT = 30
        NON_TEXT_LEN = 50
        SIMILAR_DISTANCE = 20
        INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL
        SENTENCE_SIMILARITY = 0.35
        WORDS_PER_PARA = 20
        PERCENT_REDUCE = 0.6
        SENTENCES_PER_PARA = 4
# 		HEADING_TRAINING = 500
# 		TOP_HEADINGS = 3

    else:  # More than 2 hr
        NUM_KEYWORDS = 30
        SUMMARY_PERCENT = 25
        NON_TEXT_LEN = 50
        SIMILAR_DISTANCE = 20
        INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL
        SENTENCE_SIMILARITY = 0.35
        WORDS_PER_PARA = 20
        PERCENT_REDUCE = 0.6
        SENTENCES_PER_PARA = 4


# 		HEADING_TRAINING = 500
# 		TOP_HEADINGS = 3

    start = time.perf_counter()

    yt = YoutubeTranscribe(url)
    text = yt.youtube_transcribe()

    # Keywords Extractor
    num_keywords = NUM_KEYWORDS
    words = KeywordsExtractor(text, num_keywords)
    keywords = words.ExtractKeywords()
    print(f'\nKeywords:\n {keywords}')

    # Summarization
    summ = Summarizer()
    percentage = SUMMARY_PERCENT
    summary_result = summ.summary(text, percentage)
    print(f'\nSummary:\n {summary_result}')

    # Keyframe Extraction (Output : 'out' folder)
    print("\nExtracting Keyframes\n")
    ip = ImageProcessing(url, keywords)
    ip.img_processing(text_threshold=NON_TEXT_LEN,
                      dis_threshold=SIMILAR_DISTANCE,
                      jump=INTERVAL_KEYFRAMES)

    # Paragraph and Headings (Output : paragraph_headings.txt)
    print("\nGenerating Paragraphs and Headings\n")
    pf = ParaFormation(summary_result)
    list_para = pf.paragraph(similarity_threshold=SENTENCE_SIMILARITY,
                             word_threshold=WORDS_PER_PARA,
                             percent_reduce=PERCENT_REDUCE)
    ph = ParaHeadings(list_para)
    # title_para = ph.get_titles_paras(sentence_threshold = SENTENCES_PER_PARA, training = HEADING_TRAINING, heading_threshold = TOP_HEADINGS)
    title_para = ph.get_titles_paras(sentence_threshold=SENTENCES_PER_PARA)

    # Final Notes (Includes Web Scraping)
    print("\nGenerating Final Notes\n")
    scraped_results = Scrapper(keywords, 2, 2, 2)
    s = scraped_results.web_scrape()
    notes = Notes(url, s)
    notes.generate_notes()
    print("\nBrevis-Notes.docx and Brevis-Notes.pdf(on Windows) Generated\n")

    if os.path.exists('res'):
        shutil.rmtree('res')

    finish = time.perf_counter()
    print(f'Serial: Finished in {round(finish-start, 2)} second(s)')
Beispiel #2
0
def main():

	url = input("Enter the URL = ")
	#Thresholds
	sec = pafy.new(url).length
	print(f"\nVideo duration in sec = {sec}\n")
	
	# THRESHOLDS
	
	DYNAMIC_INTERVAL = (sec/60) * 100
	
	if sec <= 900: # 0-15 min
		NUM_KEYWORDS = 15
		SUMMARY_PERCENT = 60
		NON_TEXT_LEN = 50
		SIMILAR_DISTANCE = 20
		INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL
		SENTENCE_SIMILARITY = 0.35
		WORDS_PER_PARA = 20
		PERCENT_REDUCE = 0.6
		SENTENCES_PER_PARA = 6
# 		HEADING_TRAINING = 500
# 		TOP_HEADINGS = 3
	
	elif 900 < sec <= 1800: # 15-30 min
		NUM_KEYWORDS = 18
		SUMMARY_PERCENT = 50 
		NON_TEXT_LEN = 50
		SIMILAR_DISTANCE = 20
		INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL
		SENTENCE_SIMILARITY = 0.35
		WORDS_PER_PARA = 20 
		PERCENT_REDUCE = 0.6
		SENTENCES_PER_PARA = 5
# 		HEADING_TRAINING = 500
# 		TOP_HEADINGS = 3

	elif 1800 < sec <= 2700: # 30-45 min
		NUM_KEYWORDS = 20
		SUMMARY_PERCENT = 40
		NON_TEXT_LEN = 50
		SIMILAR_DISTANCE = 20
		INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL
		SENTENCE_SIMILARITY = 0.35
		WORDS_PER_PARA = 20
		PERCENT_REDUCE = 0.6
		SENTENCES_PER_PARA = 4
# 		HEADING_TRAINING = 500
# 		TOP_HEADINGS = 3
   
	elif 2700 < sec <= 3600: # 45-60 min
		NUM_KEYWORDS = 22
		SUMMARY_PERCENT = 35
		NON_TEXT_LEN = 50
		SIMILAR_DISTANCE = 20
		INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL
		SENTENCE_SIMILARITY = 0.35
		WORDS_PER_PARA = 20
		PERCENT_REDUCE = 0.6
		SENTENCES_PER_PARA = 4
# 		HEADING_TRAINING = 500
# 		TOP_HEADINGS = 3
	
	elif 3600 < sec <= 7200: # 1-2 hr
		NUM_KEYWORDS = 25
		SUMMARY_PERCENT = 30
		NON_TEXT_LEN = 50
		SIMILAR_DISTANCE = 20
		INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL
		SENTENCE_SIMILARITY = 0.35
		WORDS_PER_PARA = 20
		PERCENT_REDUCE = 0.6
		SENTENCES_PER_PARA = 4
# 		HEADING_TRAINING = 500
# 		TOP_HEADINGS = 3
		
	else: # More than 2 hr
		NUM_KEYWORDS = 30
		SUMMARY_PERCENT = 25
		NON_TEXT_LEN = 50
		SIMILAR_DISTANCE = 20
		INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL
		SENTENCE_SIMILARITY = 0.35
		WORDS_PER_PARA = 20
		PERCENT_REDUCE = 0.6
		SENTENCES_PER_PARA = 4
# 		HEADING_TRAINING = 500
# 		TOP_HEADINGS = 3

    #Starting the timer    
	start = time.perf_counter()
    
    # Transcription and Cleaning
	yt = YoutubeTranscribe(url)
	text = yt.youtube_transcribe()

    #Declaring a multiprocessing queue to exchange data between various functions
	Q=multiprocessing.Queue()
    #Running keywords and summary Processes parallely
	key_ext=multiprocessing.Process(target=Process_Extract_Keywords , args=(url,text,Q,NUM_KEYWORDS,NON_TEXT_LEN,SIMILAR_DISTANCE,INTERVAL_KEYFRAMES))
	summ_ext=multiprocessing.Process(target=Process_Get_Summary , args=(text,SUMMARY_PERCENT,SENTENCE_SIMILARITY,WORDS_PER_PARA,PERCENT_REDUCE,SENTENCES_PER_PARA))
	#Starting both process simultaneously
	key_ext.start()
	summ_ext.start()
	#Checking if the process have finished execution
	key_ext.join()
	summ_ext.join()
	#Fetching scraped links from the Queue
	scraped_res = Q.get()
	
	#Generating final notes
	notes = Notes(url,scraped_res)
	notes.generate_notes()
	print("\nBrevis-Notes.docx and Brevis-Notes.pdf(on Windows) Generated\n")
	
	#Removing the temporary res folder
	if os.path.exists('res'):
	    shutil.rmtree('res')
	    
	#Stopping the timer  
	end=time.perf_counter()
	#Printing the time taken by the program for execution
	print(f"Finished in {round(end-start, 3)} second(s)")
Beispiel #3
0
def main():
    # Parallel
    url = input("Enter the URL = ")

    start = time.perf_counter()

    # Transcription and Cleaning
    yt = YoutubeTranscribe(url)
    text = yt.youtube_transcribe()

    # Level1
    with ThreadPoolExecutor() as executor:
        '''
        Type1:
        level1_results1 = executor.submit(Test(10,20).RecArea)
        print(type(level1_results1))
        print(dir(level1_results1))
        print(level1_results1.result())
        
        Type2:
        level1_results1 = list(executor.map(Test().RecArea,[10],[20]))
        print(level1_results1[0])
        '''

        # Keywords Extractor
        # num_keywords=int(input("Enter number of keywords to be extracted : "))
        num_keywords = 10
        level1_results1 = executor.submit(
            KeywordsExtractor(text, num_keywords).ExtractKeywords)

        # Summarization
        percentage = 40
        level1_results2 = list(
            executor.map(Summarizer().summary, [text], [percentage]))

        print(f"\nKeywords:\n {level1_results1.result()}")
        print(f"\nSummary:\n {level1_results2[0]}")

    # Level2
    with ThreadPoolExecutor() as executor:
        # Keyframe Extraction (Output : 'out' folder)
        print("\nExtracting Keyframes\n")
        level2_results1 = list(
            executor.map(
                ImageProcessing(url, level1_results1.result()).img_processing,
                [50], [20], [1000]))

        # Paragraph and Headings (Output : paragraph_headings.txt)
        print("\nGenerating Paragraphs and Headings\n")
        level2_results2 = executor.submit(
            ParaFormation(level1_results2[0]).paragraph)

        print("\nScraping Web\n")
        level2_results3 = executor.submit(
            Scrapper(level1_results1.result(), 2, 2, 2).web_scrape)

        print(len(os.listdir(os.path.join('res', 'out'))),
              "images extracted in 'out' folder")

    ph = ParaHeadings(level2_results2.result())
    title_para = ph.get_titles_paras(sentence_threshold=2)

    # Final Notes
    notes = Notes(url, level2_results3.result())
    notes.generate_notes()
    print("\nBrevis-Notes.docx and Brevis-Notes.pdf(on Windows) Generated\n")

    if os.path.exists('res'):
        shutil.rmtree('res')

    finish = time.perf_counter()

    print(f'Parallel: Finished in {round(finish-start, 2)} second(s)')
def gen():
    global video_url
    global keywords
    global path
    global json_result
    global text
    global summary_result
    global scrape_json
    global option

    sec = pafy.new(video_url).length
    print(f"\nVideo duration in sec = {sec}\n")

    # THRESHOLDS

    DYNAMIC_INTERVAL = (sec / 60) * 100

    if sec <= 900:  # 0-15 min
        NUM_KEYWORDS = 15
        SUMMARY_PERCENT = 60
        NON_TEXT_LEN = 50
        SIMILAR_DISTANCE = 20
        INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL
        SENTENCE_SIMILARITY = 0.35
        WORDS_PER_PARA = 20
        PERCENT_REDUCE = 0.6
        SENTENCES_PER_PARA = 6
# 		HEADING_TRAINING = 500
# 		TOP_HEADINGS = 3

    elif 900 < sec <= 1800:  # 15-30 min
        NUM_KEYWORDS = 18
        SUMMARY_PERCENT = 50
        NON_TEXT_LEN = 50
        SIMILAR_DISTANCE = 20
        INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL
        SENTENCE_SIMILARITY = 0.35
        WORDS_PER_PARA = 20
        PERCENT_REDUCE = 0.6
        SENTENCES_PER_PARA = 5
# 		HEADING_TRAINING = 500
# 		TOP_HEADINGS = 3

    elif 1800 < sec <= 2700:  # 30-45 min
        NUM_KEYWORDS = 20
        SUMMARY_PERCENT = 40
        NON_TEXT_LEN = 50
        SIMILAR_DISTANCE = 20
        INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL
        SENTENCE_SIMILARITY = 0.35
        WORDS_PER_PARA = 20
        PERCENT_REDUCE = 0.6
        SENTENCES_PER_PARA = 4
# 		HEADING_TRAINING = 500
# 		TOP_HEADINGS = 3

    elif 2700 < sec <= 3600:  # 45-60 min
        NUM_KEYWORDS = 22
        SUMMARY_PERCENT = 35
        NON_TEXT_LEN = 50
        SIMILAR_DISTANCE = 20
        INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL
        SENTENCE_SIMILARITY = 0.35
        WORDS_PER_PARA = 20
        PERCENT_REDUCE = 0.6
        SENTENCES_PER_PARA = 4
# 		HEADING_TRAINING = 500
# 		TOP_HEADINGS = 3

    elif 3600 < sec <= 7200:  # 1-2 hr
        NUM_KEYWORDS = 25
        SUMMARY_PERCENT = 30
        NON_TEXT_LEN = 50
        SIMILAR_DISTANCE = 20
        INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL
        SENTENCE_SIMILARITY = 0.35
        WORDS_PER_PARA = 20
        PERCENT_REDUCE = 0.6
        SENTENCES_PER_PARA = 4
# 		HEADING_TRAINING = 500
# 		TOP_HEADINGS = 3

    else:  # More than 2 hr
        NUM_KEYWORDS = 30
        SUMMARY_PERCENT = 25
        NON_TEXT_LEN = 50
        SIMILAR_DISTANCE = 20
        INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL
        SENTENCE_SIMILARITY = 0.35
        WORDS_PER_PARA = 20
        PERCENT_REDUCE = 0.6
        SENTENCES_PER_PARA = 4


# 		HEADING_TRAINING = 500
# 		TOP_HEADINGS = 3

    start = time.perf_counter()

    # if option == "Overview":
    # 	percentage = 50

    # elif option == "Notes":
    # 	percentage = 60

    # elif option == "Notes+Ref":
    # 	percentage = 80

    #Running keywords and summary Processes parallely
    key_ext = multiprocessing.Process(target=Process_Extract_Keywords,
                                      args=(video_url, text, NUM_KEYWORDS,
                                            NON_TEXT_LEN, SIMILAR_DISTANCE,
                                            INTERVAL_KEYFRAMES))
    summ_ext = multiprocessing.Process(
        target=Process_Get_Summary,
        args=(text, SUMMARY_PERCENT, SENTENCE_SIMILARITY, WORDS_PER_PARA,
              PERCENT_REDUCE, SENTENCES_PER_PARA))
    #Starting both process simultaneously
    key_ext.start()
    summ_ext.start()
    #Checking if the process have finished execution
    key_ext.join()
    summ_ext.join()

    if option == "Overview" or option == "Notes":
        scrape_json = {}
    #Generating final notes
    notes = Notes(video_url, scrape_json)
    notes.generate_notes()
    print("\nBrevis-Notes.docx and Brevis-Notes.pdf(on Windows) Generated\n")

    with ZipFile('Brevis_Notes.zip', 'w') as zip:
        print("Writing zip")
        if os.path.exists(os.path.join('res', 'Brevis-Notes.pdf')):
            zip.write(os.path.join('res', 'Brevis-Notes.pdf'),
                      arcname='Brevis-Notes.pdf')
        zip.write(os.path.join('res', 'Brevis-Notes.docx'),
                  arcname='Brevis-Notes.docx')

    path = os.path.abspath("Brevis_Notes.zip")

    if os.path.exists('res'):
        shutil.rmtree('res')

    finish = time.perf_counter()

    print(f'Gen Function: Finished in {round(finish-start, 2)} second(s)')
Beispiel #5
0
def gen():
    global video_url
    global keywords
    global path
    global json_result
    global text
    global summary_result
    global scrape_json
    global option

    sec = pafy.new(video_url).length
    print(f"\nVideo duration in sec = {sec}\n")

    # THRESHOLDS

    DYNAMIC_INTERVAL = (sec / 60) * 100

    if sec <= 900:  # 0-15 min
        NUM_KEYWORDS = 15
        SUMMARY_PERCENT = 60
        NON_TEXT_LEN = 50
        SIMILAR_DISTANCE = 20
        INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL
        SENTENCE_SIMILARITY = 0.35
        WORDS_PER_PARA = 20
        PERCENT_REDUCE = 0.6
        SENTENCES_PER_PARA = 6
# 		HEADING_TRAINING = 500
# 		TOP_HEADINGS = 3

    elif 900 < sec <= 1800:  # 15-30 min
        NUM_KEYWORDS = 18
        SUMMARY_PERCENT = 50
        NON_TEXT_LEN = 50
        SIMILAR_DISTANCE = 20
        INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL
        SENTENCE_SIMILARITY = 0.35
        WORDS_PER_PARA = 20
        PERCENT_REDUCE = 0.6
        SENTENCES_PER_PARA = 5
# 		HEADING_TRAINING = 500
# 		TOP_HEADINGS = 3

    elif 1800 < sec <= 2700:  # 30-45 min
        NUM_KEYWORDS = 20
        SUMMARY_PERCENT = 40
        NON_TEXT_LEN = 50
        SIMILAR_DISTANCE = 20
        INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL
        SENTENCE_SIMILARITY = 0.35
        WORDS_PER_PARA = 20
        PERCENT_REDUCE = 0.6
        SENTENCES_PER_PARA = 4
# 		HEADING_TRAINING = 500
# 		TOP_HEADINGS = 3

    elif 2700 < sec <= 3600:  # 45-60 min
        NUM_KEYWORDS = 22
        SUMMARY_PERCENT = 35
        NON_TEXT_LEN = 50
        SIMILAR_DISTANCE = 20
        INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL
        SENTENCE_SIMILARITY = 0.35
        WORDS_PER_PARA = 20
        PERCENT_REDUCE = 0.6
        SENTENCES_PER_PARA = 4
# 		HEADING_TRAINING = 500
# 		TOP_HEADINGS = 3

    elif 3600 < sec <= 7200:  # 1-2 hr
        NUM_KEYWORDS = 25
        SUMMARY_PERCENT = 30
        NON_TEXT_LEN = 50
        SIMILAR_DISTANCE = 20
        INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL
        SENTENCE_SIMILARITY = 0.35
        WORDS_PER_PARA = 20
        PERCENT_REDUCE = 0.6
        SENTENCES_PER_PARA = 4
# 		HEADING_TRAINING = 500
# 		TOP_HEADINGS = 3

    else:  # More than 2 hr
        NUM_KEYWORDS = 30
        SUMMARY_PERCENT = 25
        NON_TEXT_LEN = 50
        SIMILAR_DISTANCE = 20
        INTERVAL_KEYFRAMES = DYNAMIC_INTERVAL
        SENTENCE_SIMILARITY = 0.35
        WORDS_PER_PARA = 20
        PERCENT_REDUCE = 0.6
        SENTENCES_PER_PARA = 4


# 		HEADING_TRAINING = 500
# 		TOP_HEADINGS = 3

    start = time.perf_counter()

    if option == "Overview":
        if not os.path.exists(os.path.join('res', 'out')):
            os.mkdir(os.path.join('res', 'out'))

    elif option == "Notes" or option == "Notes+Ref":
        # Keyframe Extraction (Output : 'out' folder)
        print("\nExtracting Keyframes\n")
        ip = ImageProcessing(video_url, keywords)
        ip.img_processing(text_threshold=NON_TEXT_LEN,
                          dis_threshold=SIMILAR_DISTANCE,
                          jump=INTERVAL_KEYFRAMES)

    # Paragraph and Headings (Output : paragraph_headings.txt)
    print("\nGenerating Paragraphs and Headings\n")
    pf = ParaFormation(summary_result)
    list_para = pf.paragraph(similarity_threshold=SENTENCE_SIMILARITY,
                             word_threshold=WORDS_PER_PARA,
                             percent_reduce=PERCENT_REDUCE)
    ph = ParaHeadings(list_para)
    title_para = ph.get_titles_paras(sentence_threshold=SENTENCES_PER_PARA)

    # Final Notes (Includes Web Scraping)
    print("\nGenerating Final Notes\n")

    if option == "Overview" or option == "Notes":
        scrape_json = {}

    #scraped_results = Scrapper(scrape_keywords,2,2,2)
    #s = scraped_results.web_scrape()
    notes = Notes(video_url, scrape_json)
    notes.generate_notes()
    print("\nBrevis-Notes.docx Generated\n")

    with ZipFile('Brevis_Notes.zip', 'w') as zip:
        print("Writing zip")
        if os.path.exists(os.path.join('res', 'Brevis-Notes.pdf')):
            zip.write(os.path.join('res', 'Brevis-Notes.pdf'),
                      arcname='Brevis-Notes.pdf')
        zip.write(os.path.join('res', 'Brevis-Notes.docx'),
                  arcname='Brevis-Notes.docx')

    path = os.path.abspath("Brevis_Notes.zip")

    if os.path.exists('res'):
        shutil.rmtree('res')

    finish = time.perf_counter()

    print(f'Gen Function: Finished in {round(finish-start, 2)} second(s)')