Esempio n. 1
0
def quiz():

    video_id = request.args.get('v')

    # get summary
    summary = summarizer.get_summary(
        str(redis_utilities.get_dict(common.summary_dictionary, video_id)))

    # Add space after full stop
    rx = r"\.(?=\S)"
    summary = re.sub(rx, ". ", summary)
    re.sub(".", ". ", summary)

    json_sentences = json.loads(
        ibm_speech_to_text.get_timestamped_data(video_id))
    prevJumpToTime = 0

    questions = question_generator.generate_quiz(summary, video_id)
    for question in questions:
        correct_index = question['correctIndex']
        correct_answer = question['answers'][correct_index]
        for json_sentence in json_sentences:
            (json_sentence_time) = utilities.get_sec(json_sentence['time'])
            if json_sentence_time >= prevJumpToTime and json_sentence[
                    'line'].find(correct_answer) >= 0:
                question['jumpToTime'] = json_sentence_time
                prevJumpToTime = json_sentence_time
                break

    print(json.dumps(questions))
    redis_utilities.add_json(video_id, json.dumps(questions))
Esempio n. 2
0
    def updateVariable(self):
        for i, word in enumerate(self.words[:-1]):
            if word == 'variable':  #Find the reactor variables, tempanpassung and rotation
                self.init_vars_dict[self.words[i + 1]] = self.words[i + 3]

            elif word[-1] == ':':  #Find the initial variables, TMGa_1 etc.
                if word[0] == 'T' or word[0:2] == 'Cp':
                    word = word.replace(':', '')
                    var = word
                    self.initial_flows.append(var)
            # After that point, time and changing variables on that time are found
            elif isTimeFormat(word):
                self.change_times.append(get_sec(word))
            elif word == 'to' and self.words[i + 1].isdigit():
                var = self.words[i - 1]
                self.gas_flows.append(var)
            elif word == '=':
                '''When iterating valves, since the Martins input files give some weird valves (e.g. for, Prepare)
                if '.' structure is added.'''
                if self.words[i + 1] == 'open' or self.words[i + 1] == 'close':
                    if '.' in self.words[i - 1]:
                        var = self.words[i - 1]
                        self.valves.append(var)
                elif self.words[i - 1].startswith('Reactor'):
                    self.reactor_variables.append(self.words[i - 1])
            elif word == 'ReactorTemp' or word == 'ReactorPress':
                self.reactor_variables.append(word)
        self.reactor_variables = self.reactor_variables + [
            gas for gas in set(self.gas_flows)
            if not ('.' in gas or 'Run' in gas or 'Push' in gas)
        ]
        self.gas_flows = list(
            sorted(set(self.gas_flows) - set(self.reactor_variables)))
        self.valves = list(sorted(set(self.valves)))
        self.initial_flows = list(sorted(set(self.initial_flows)))
        self.reactor_variables = list(sorted(set(self.reactor_variables)))
        try:
            del self.reactor_variables['up']
        except:
            KeyError
#        for gas in gas_temp:
#        #Create reactor variables list
#        if '.' in gas or 'Run' in gas:
#            continue
#        else:
#            reactor_variables_temp.add(gas)

        self.change_times.insert(0, 0)
        #        print(len(set(gas_flows)-set(reactor_variables)))

        #Subtract common elements from gas lists
        return self.gas_flows, self.valves, self.initial_flows, self.reactor_variables, \
    self.valves, self.initial_flows, self.change_times, self.init_vars_dict
Esempio n. 3
0
def notes():
    '''video_id = request.args.get('v')
    if not video_id:
        return {"Issue":"Please pass video ID v in query params"}

    text_file_path = text_folder_path+video_id+'.txt'
    json_file_path = json_folder_path+video_id+'.json'

    url = 'https://www.youtube.com/watch?v=' + video_id'''
    v = ''
    if video_id not in common.summary_dictionary:
        utilities.transcribe(v, video_id)
    '''if not utilities.check_if_video_exists(video_id):
        utilities.get_vtt_file(url)
        vtt_to_txt.convert_to_text(video_id)
    '''
    text = str(redis_utilities.get_dict(common.summary_dictionary, video_id))
    notes = summarizer.get_summary_with_ratio(text, 0.5)
    summary = summarizer.get_summary_with_word_count(text, 100)

    #result_list = get_key_phrases(notes, 'en')
    #global_keywords = []

    prevJumpToTime = 0
    json_sentences = json.loads(
        ibm_speech_to_text.get_timestamped_data(video_id))

    sentence = str(notes[out_index])
    note = {}
    note['sentence'] = sentence
    #note['keywords'] = keyword_list
    print(keyword_list)
    # need to improve
    if len(keyword_list) > 0:
        keyword = keyword_list[0]['keyword']
        for json_sentence in json_sentences:
            json_sentence_time = utilities.get_sec(json_sentence['time'])
            if json_sentence_time >= prevJumpToTime and json_sentence[
                    'line'].find(keyword) >= 0:
                note['jumpToTime'] = json_sentence_time
                prevJumpToTime = json_sentence_time
                break
    else:
        note['jumpToTime'] = prevJumpToTime

    response = {"notes": notes, "summary": summary}
    return json.dumps(response)
Esempio n. 4
0
def home():
    video_id = request.args.get('v')
    if not video_id:
        return {"Issue": "Please pass video ID v in query params"}

    url = 'https://www.youtube.com/watch?v=' + video_id
    #get audio
    # video_id = utilities.get_audio_file(url)
    text_file_path = text_folder_path + video_id + '.txt'
    json_file_path = json_folder_path + video_id + '.json'

    if not utilities.check_if_video_exists(video_id):
        utilities.get_vtt_file(url)
        vtt_to_txt.convert_to_text(video_id)

    # get summary
    summary = summarizer.get_summary(open(text_file_path, 'r').read())

    # Add space after full stop
    rx = r"\.(?=\S)"
    summary = re.sub(rx, ". ", summary)
    re.sub(".", ". ", summary)

    # get questions and return
    json_sentences = json.loads(open(json_file_path, 'r').read())

    questions = question_generator.generate_trivia(summary)
    prevJumpToTime = 0
    for question in questions:
        correct_index = question['correctIndex']
        correct_answer = question['answers'][correct_index]
        for json_sentence in json_sentences:
            json_sentence_time = utilities.get_sec(json_sentence['time'])
            if json_sentence_time >= prevJumpToTime and json_sentence[
                    'line'].find(correct_answer) >= 0:
                question['jumpToTime'] = json_sentence_time
                prevJumpToTime = json_sentence_time
                break

    # print(questions)
    return json.dumps(questions)
Esempio n. 5
0
def notes():
    video_id = request.args.get('v')
    if not video_id:
        return {"Issue": "Please pass video ID v in query params"}

    text_file_path = text_folder_path + video_id + '.txt'
    json_file_path = json_folder_path + video_id + '.json'

    url = 'https://www.youtube.com/watch?v=' + video_id

    if not utilities.check_if_video_exists(video_id):
        utilities.get_vtt_file(url)
        vtt_to_txt.convert_to_text(video_id)

    text = open(text_file_path, 'r').read()
    notes = summarizer.get_summary_with_ratio(text, 0.5)
    summary = summarizer.get_summary_with_word_count(text, 100)

    result_list = get_key_phrases(notes, 'en')
    global_keywords = []

    prevJumpToTime = 0
    for out_index, cur_item in enumerate(result_list):
        keyword_list = []
        for current_phrase in cur_item['KeyPhrases']:
            # if current_phrase['Score'] >= target_phrase['Score'] and word_count(current_phrase['Text']) < word_count(target_phrase['Text']):
            if utilities.isGoodScore(current_phrase):
                keyword_list.append(current_phrase['Text'].strip())

        if len(keyword_list) > 0:
            keyword_list = utilities.filter_nouns(keyword_list, 'en')

            # remove similar words
            for keyword in keyword_list:
                if keyword not in global_keywords:
                    global_keywords.append(keyword.strip())
                else:
                    keyword_list.remove(keyword)

        keyword_list = [x for x in keyword_list if x in keyword_list]

        for keyword in keyword_list:
            try:
                print(keyword)
                syns = wordnet.synsets(keyword)
                if len(syns) > 0:
                    keyword_obj = {}
                    keyword_obj['keyword'] = keyword
                    keyword_obj['definition'] = syns[0].definition()
                    keyword_obj['examples'] = syns[0].examples()
                    keyword_list[keyword_list.index(keyword)] = keyword_obj
            except:
                print('error', keyword)

        keyword_list = [x for x in keyword_list if type(x) is not str]

        json_sentences = json.loads(open(json_file_path, 'r').read())

        sentence = str(notes[out_index])
        note = {}
        note['sentence'] = sentence
        note['keywords'] = keyword_list
        print(keyword_list)
        # need to improve
        if len(keyword_list) > 0:
            keyword = keyword_list[0]['keyword']
            for json_sentence in json_sentences:
                json_sentence_time = utilities.get_sec(json_sentence['time'])
                if json_sentence_time >= prevJumpToTime and json_sentence[
                        'line'].find(keyword) >= 0:
                    note['jumpToTime'] = json_sentence_time
                    prevJumpToTime = json_sentence_time
                    break
        else:
            note['jumpToTime'] = prevJumpToTime

        notes[out_index] = note

    response = {"notes": notes, "summary": summary}
    return json.dumps(response)