Esempio n. 1
0
    def get(cls):
        if cache.get("AccessToken") is not None:
            return cache.get("AccessToken")

        res = get_access_token() or ""

        cache.set("AccessToken", res, timeout=60 * 60)
        return res
Esempio n. 2
0
    def get_child_ids(cls, rid):
        child_ids = cache.get(cls.PREFIX_CHILDREN.format(rid))
        if not child_ids:
            from api.lib.perm.acl.role import RoleRelationCRUD
            child_ids = RoleRelationCRUD.get_child_ids(rid)
            cache.set(cls.PREFIX_CHILDREN.format(rid), child_ids, timeout=0)

        return child_ids
Esempio n. 3
0
    def get_by_name(cls, app_id, name):
        role = cache.get(cls.PREFIX_NAME.format(app_id, name))
        if role is None:
            role = Role.get_by(app_id=app_id, name=name, first=True, to_dict=False)
            if role is not None:
                cache.set(cls.PREFIX_NAME.format(app_id, name), role)

        return role
Esempio n. 4
0
    def get_parent_ids(cls, rid):
        parent_ids = cache.get(cls.PREFIX_PARENT.format(rid))
        if not parent_ids:
            from api.lib.perm.acl.role import RoleRelationCRUD
            parent_ids = RoleRelationCRUD.get_parent_ids(rid)
            cache.set(cls.PREFIX_PARENT.format(rid), parent_ids, timeout=0)

        return parent_ids
Esempio n. 5
0
    def get(cls, rid):
        role = cache.get(cls.PREFIX_ID.format(rid))
        if role is None:
            role = Role.get_by_id(rid)
            if role is not None:
                cache.set(cls.PREFIX_ID.format(rid), role)

        return role
Esempio n. 6
0
    def get(cls, key):
        perm = cache.get(cls.PREFIX_ID.format(key))
        perm = perm or cache.get(cls.PREFIX_NAME.format(key))
        if perm is None:
            perm = Permission.get_by_id(key)
            perm = perm or Permission.get_by(name=key, first=True, to_dict=False)
            if perm is not None:
                cache.set(cls.PREFIX_ID.format(key), perm)

        return perm
Esempio n. 7
0
    def get_resources(cls, rid):
        """
        :param rid: 
        :return: {id2perms: {resource_id: [perm,]}, group2perms: {group_id: [perm, ]}}
        """
        resources = cache.get(cls.PREFIX_RESOURCES.format(rid))
        if not resources:
            from api.lib.perm.acl.role import RoleCRUD
            resources = RoleCRUD.get_resources(rid)
            cache.set(cls.PREFIX_RESOURCES.format(rid), resources, timeout=0)

        return resources or {}
Esempio n. 8
0
def context_query(query_text, blob_uri, op_result, search_output_mode=SearchOutputMode.EXACT_MATCH):
    match_result_cache = cache.get('match_result_cache') or dict()
    match_cache_key = MatchCacheKey(blob_uri, query_text, True)
    if match_cache_key in match_result_cache: return match_result_cache[match_cache_key]
    
    match_results = []
    for result in op_result.results:
        # Check if the response is valid, which happens if and only if the result alternatives
        # is at least of length 1 AND the transcript is non-empty.
        if len(result.alternatives) == 0 or not result.alternatives[0].transcript: continue    
        tokens = [word.strip().lower() for word in result.alternatives[0].transcript.split(' ') if word != '']
        puncutation_translator = str.maketrans('', '', string.punctuation)
        sanitized_tokens = [token.translate(puncutation_translator) for token in tokens]

        predictions = get_context_search_model().predict((result.alternatives[0].transcript, query_text))
        for prediction in predictions:
            answer = prediction[0].strip()
            answer_tokens = answer.lower().translate(puncutation_translator).split(' ')

            if not answer: continue
            sublist = find_sub_list(sanitized_tokens, answer_tokens)

            if len(sublist) == 0: continue
            sublist_bounds = sublist[0]

            if search_output_mode == SearchOutputMode.EXACT_MATCH:
                start_word_index = sublist_bounds[0]
                end_word_index = sublist_bounds[1]
                transcript = answer
            elif search_output_mode == SearchOutputMode.SENTENCE:
                pass
        
            start_word = result.alternatives[0].words[start_word_index]
            end_word = result.alternatives[0].words[end_word_index]
            match_results.append({
                'matched_query': query_text,
                'start_time': get_word_time_seconds(start_word.start_time),
                'end_time': get_word_time_seconds(end_word.end_time),
                'confidence': prediction[2],
                'transcript': transcript 
            })

    match_result_cache[match_cache_key] = match_results
    cache.set('match_result_cache', match_result_cache, timeout=0)

    return match_results
Esempio n. 9
0
 def set(cls, user):
     '''
     设置key
     '''
     cache.set(cls.PREFIX_ID.format(user.id), user)
     cache.set(cls.PREFIX_NAME.format(user.username), user)
     cache.set(cls.PREFIX_NICK.format(user.nickname), user)
Esempio n. 10
0
 def set(cls, type_id, attr_id, attr):
     cache.set(cls.PREFIX_ID.format(type_id, attr_id), attr)
Esempio n. 11
0
def string_query(query_text, blob_uri, op_result, search_output_mode=SearchOutputMode.EXACT_MATCH):
    match_result_cache = cache.get('match_result_cache') or dict()
    match_cache_key = MatchCacheKey(blob_uri, query_text, False)
    if match_cache_key in match_result_cache: return match_result_cache[match_cache_key]
    
    match_results = []
    for result in op_result.results:
        # Check if the response is valid, which happens if and only if the result alternatives
        # is at least of length 1 AND the transcript is non-empty.
        if len(result.alternatives) == 0 or not result.alternatives[0].transcript: continue    
        sentences = [sentence.strip().lower() for sentence in nltk.tokenize.sent_tokenize(result.alternatives[0].transcript)]
        tokens = [word.strip().lower() for word in result.alternatives[0].transcript.split(' ') if word != '']
        for sentence in sentences:
            sublist_bounds = find_sub_list(tokens, sentence.split(' '))[0]
            sentence_word_infos = result.alternatives[0].words[sublist_bounds[0]:sublist_bounds[1]+1]

            matches = re.finditer(query_text, sentence)      
            match_cache = set()
            for match in matches:
                if search_output_mode == SearchOutputMode.EXACT_MATCH:
                    start_boundary = i = match.start()
                    
                    # Check if the match starts inside a word
                    if i != 0 and sentence[i - 1] != ' ':
                        start_boundary = sentence.rfind(' ', 0, i) + 1

                    end_boundary = j = match.end() - 1

                    # Check if the match ends inside a word
                    if j != len(sentence) - 1 and sentence[j + 1] != ' ':
                        end_boundary = sentence.find(' ', j)
                        if end_boundary == -1:
                            end_boundary = len(sentence) - 1
                        else:
                            end_boundary -= 1

                    # Add boundary
                    boundary = (start_boundary, end_boundary)
                    if boundary in match_cache: continue
                    match_cache.add(boundary)

                    start_word_index = sentence.count(' ', 0, start_boundary)
                    end_word_index = sentence.count(' ', 0, end_boundary)
                elif search_output_mode == SearchOutputMode.SENTENCE:
                    start_word_index = 0
                    end_word_index = len(sentence_word_infos) - 1

                confidence = result.alternatives[0].confidence
                start_word = sentence_word_infos[start_word_index]
                end_word = sentence_word_infos[end_word_index]
                transcript = sentence[match.start():match.end()]
                match_results.append({
                    'matched_query': transcript,
                    'start_time': get_word_time_seconds(start_word.start_time),
                    'end_time': get_word_time_seconds(end_word.end_time),
                    'confidence': confidence,
                    'transcript':  transcript
                })

    match_result_cache[match_cache_key] = match_results
    cache.set('match_result_cache', match_result_cache, timeout=0)

    return match_results
Esempio n. 12
0
 def set(cls, app):
     cache.set(cls.PREFIX_ID.format(app.id), app)
     cache.set(cls.PREFIX_NAME.format(app.name), app)
Esempio n. 13
0
 def get(cls, id):
     role = cache.get(cls.PREFIX_NAME.format(id))
     if role is None:
         role = Role.get_by_id(id)
         if role is not None:
             cache.set(cls.PREFIX_ID.format(rid), role)
Esempio n. 14
0
 def set(cls, id, permission):
     cache.set(cls.PREFIX_ID.format(id), permission)
Esempio n. 15
0
 def set_count_error(cls, key, value):
     cache.set(cls.PREFIX_ERROR.format(key), value)
Esempio n. 16
0
def query():
    route_start_time = start_time = time.time()
    print('='*20)

    data = get_validator_data()
    query_text = data['query'].strip().lower()
    search_output_mode = data['search_output_mode']
    
    auth_filepath = Path(current_app.instance_path) / Path(current_app.config['GOOGLE_CLOUD_AUTH_FILENAME'])
    speech_client = speech.SpeechClient.from_service_account_json(auth_filepath)
    storage_client = storage.Client.from_service_account_json(auth_filepath)

    bucket = storage_client.get_bucket(current_app.config['GOOGLE_CLOUD_STORAGE_BUCKET_NAME'])

    with open(get_tmp_filepath(), 'w+b') as input_file:
        data['file_input'] = data['file_input'][data['file_input'].find(',')+1:]
        file_input = data['file_input'] + '=' * ((4 - len(data['file_input']) % 4) % 4)
        input_file.write(base64.b64decode(file_input))
        input_hash = hash_util.get_md5_str(file_input)

        print('Decoding and writing input file took {:.3f} seconds'.format(time.time() - start_time)) 
        start_time = time.time()

    input_hash_to_blob_uri = cache.get('input_hash_to_blob_uri') or dict()
    if input_hash in input_hash_to_blob_uri:
        blob_uri = input_hash_to_blob_uri[input_hash]
        blob = bucket.blob(blob_uri.replace('gs://{}/'.format(current_app.config['GOOGLE_CLOUD_STORAGE_BUCKET_NAME']), ''))
    else:
        try:
            audio, sample_rate = librosa.load(input_file.name)
        except Exception as exception:
            raise exceptions.AudioFileLoadError('file_input', exception)

        print('Libroa audio file loading took {:.3f} seconds'.format(time.time() - start_time))
        start_time = time.time()

        tmp_filepath = get_tmp_filepath()

        # Preprocess the audio data by converting it to a mono WAVE
        audio_mono = librosa.to_mono(audio)
        soundfile.write(tmp_filepath, audio_mono, sample_rate, subtype='PCM_16', format='wav')

        print('Audio processing and exporting took {:.3f} seconds'.format(time.time() - start_time))
        start_time = time.time()

        # Upload the file to GCS if it doesn't already exists
        bucket_audio_root = current_app.config['GOOGLE_CLOUD_STORAGE_BUCKET_AUDIO_ROOT']
        blob_root = bucket_audio_root
        if blob_root and not bucket_audio_root.endswith('/'):
            blob_root = bucket_audio_root + '/'

        with open(tmp_filepath, 'rb') as audio_file:
            blob_filename = '{}{}'.format(blob_root, hash_util.get_crc32_str(audio_file)) 
            blob = bucket.blob(blob_filename)
            if not blob.exists():
                blob.upload_from_file(audio_file, rewind=True)

            blob_uri = 'gs://{}/{}'.format(current_app.config['GOOGLE_CLOUD_STORAGE_BUCKET_NAME'], blob_filename)
            input_hash_to_blob_uri[input_hash] = blob_uri
            cache.set('input_hash_to_blob_uri', input_hash_to_blob_uri, timeout=0)

        # Remove audio file now that we are done with it
        tmp_filepath.unlink()

        print('Uploading to GCS took {:.3f} seconds'.format(time.time() - start_time))
        start_time = time.time()

    Path(input_file.name).unlink()

    transcription_cache = cache.get('transcription_cache') or dict()
    if blob_uri in transcription_cache:
        op_result = transcription_cache[blob_uri]
        print('Loaded {} from cache'.format(blob_uri))
    else:
        # This configuration is predetermined...All audio files are converted 
        # a WAVE format with 16-bit PCM encoding. Sample rate is determined using librosa.
        config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=sample_rate,
            enable_speaker_diarization=True,
            enable_automatic_punctuation=True,
            language_code='en-US')

        operation = speech_client.long_running_recognize(config, types.RecognitionAudio(uri=blob_uri))
        op_result = operation.result()
        transcription_cache[blob_uri] = op_result
        cache.set('transcription_cache', transcription_cache, timeout=0)

    print('Transcription took {:.3f} seconds'.format(time.time() - start_time))
    start_time = time.time()

    if not data['is_context_search']:
        match_results = string_query(query_text, blob_uri, op_result, search_output_mode)
    else:
        match_results = context_query(query_text, blob_uri, op_result, search_output_mode)

    print('Search took {:.3f} seconds'.format(time.time() - start_time))

    end_time = time.time()

    access_link = blob.generate_signed_url(timedelta(hours=1))
    return jsonify(status_code=201, message='Query was successful!', matches=match_results, 
        access_link=access_link, elapsed_time=end_time - route_start_time, success=True)
Esempio n. 17
0
 def set(cls, ct):
     cache.set("CIType::Name::{0}".format(ct.name), ct)
     cache.set("CIType::ID::{0}".format(ct.id), ct)
     cache.set("CIType::Alias::{0}".format(ct.alias), ct)
Esempio n. 18
0
 def set(cls, key, values):
     ci_type = CITypeCache.get(key)
     if ci_type is not None:
         cache.set("CITypeAttribute::ID::{0}".format(ci_type.id), values)
         cache.set("CITypeAttribute::Name::{0}".format(ci_type.name),
                   values)
Esempio n. 19
0
 def set(cls, key, values):
     ci_type = CITypeCache.get(key)
     if ci_type is not None:
         cache.set(cls.PREFIX_ID.format(ci_type.id), values)
         cache.set(cls.PREFIX_NAME.format(ci_type.name), values)
Esempio n. 20
0
 def set(cls, attr):
     cache.set('Field::ID::{0}'.format(attr.id), attr)
     cache.set('Field::Name::{0}'.format(attr.name), attr)
     cache.set('Field::Alias::{0}'.format(attr.alias), attr)
Esempio n. 21
0
 def set(cls, attr):
     cache.set(cls.PREFIX_ID.format(attr.id), attr)
     cache.set(cls.PREFIX_NAME.format(attr.name), attr)
     cache.set(cls.PREFIX_ALIAS.format(attr.alias), attr)
Esempio n. 22
0
 def set(cls, ct):
     cache.set("RelationType::Name::{0}".format(ct.name), ct)
     cache.set("RelationType::ID::{0}".format(ct.id), ct)
Esempio n. 23
0
 def set(cls, ct):
     cache.set(cls.PREFIX_NAME.format(ct.name), ct)
     cache.set(cls.PREFIX_ID.format(ct.id), ct)
     cache.set(cls.PREFIX_ALIAS.format(ct.alias), ct)