def get(cls): if cache.get("AccessToken") is not None: return cache.get("AccessToken") res = get_access_token() or "" cache.set("AccessToken", res, timeout=60 * 60) return res
def get_child_ids(cls, rid): child_ids = cache.get(cls.PREFIX_CHILDREN.format(rid)) if not child_ids: from api.lib.perm.acl.role import RoleRelationCRUD child_ids = RoleRelationCRUD.get_child_ids(rid) cache.set(cls.PREFIX_CHILDREN.format(rid), child_ids, timeout=0) return child_ids
def get_by_name(cls, app_id, name): role = cache.get(cls.PREFIX_NAME.format(app_id, name)) if role is None: role = Role.get_by(app_id=app_id, name=name, first=True, to_dict=False) if role is not None: cache.set(cls.PREFIX_NAME.format(app_id, name), role) return role
def get_parent_ids(cls, rid): parent_ids = cache.get(cls.PREFIX_PARENT.format(rid)) if not parent_ids: from api.lib.perm.acl.role import RoleRelationCRUD parent_ids = RoleRelationCRUD.get_parent_ids(rid) cache.set(cls.PREFIX_PARENT.format(rid), parent_ids, timeout=0) return parent_ids
def get(cls, rid): role = cache.get(cls.PREFIX_ID.format(rid)) if role is None: role = Role.get_by_id(rid) if role is not None: cache.set(cls.PREFIX_ID.format(rid), role) return role
def get(cls, key): perm = cache.get(cls.PREFIX_ID.format(key)) perm = perm or cache.get(cls.PREFIX_NAME.format(key)) if perm is None: perm = Permission.get_by_id(key) perm = perm or Permission.get_by(name=key, first=True, to_dict=False) if perm is not None: cache.set(cls.PREFIX_ID.format(key), perm) return perm
def get_resources(cls, rid): """ :param rid: :return: {id2perms: {resource_id: [perm,]}, group2perms: {group_id: [perm, ]}} """ resources = cache.get(cls.PREFIX_RESOURCES.format(rid)) if not resources: from api.lib.perm.acl.role import RoleCRUD resources = RoleCRUD.get_resources(rid) cache.set(cls.PREFIX_RESOURCES.format(rid), resources, timeout=0) return resources or {}
def context_query(query_text, blob_uri, op_result, search_output_mode=SearchOutputMode.EXACT_MATCH): match_result_cache = cache.get('match_result_cache') or dict() match_cache_key = MatchCacheKey(blob_uri, query_text, True) if match_cache_key in match_result_cache: return match_result_cache[match_cache_key] match_results = [] for result in op_result.results: # Check if the response is valid, which happens if and only if the result alternatives # is at least of length 1 AND the transcript is non-empty. if len(result.alternatives) == 0 or not result.alternatives[0].transcript: continue tokens = [word.strip().lower() for word in result.alternatives[0].transcript.split(' ') if word != ''] puncutation_translator = str.maketrans('', '', string.punctuation) sanitized_tokens = [token.translate(puncutation_translator) for token in tokens] predictions = get_context_search_model().predict((result.alternatives[0].transcript, query_text)) for prediction in predictions: answer = prediction[0].strip() answer_tokens = answer.lower().translate(puncutation_translator).split(' ') if not answer: continue sublist = find_sub_list(sanitized_tokens, answer_tokens) if len(sublist) == 0: continue sublist_bounds = sublist[0] if search_output_mode == SearchOutputMode.EXACT_MATCH: start_word_index = sublist_bounds[0] end_word_index = sublist_bounds[1] transcript = answer elif search_output_mode == SearchOutputMode.SENTENCE: pass start_word = result.alternatives[0].words[start_word_index] end_word = result.alternatives[0].words[end_word_index] match_results.append({ 'matched_query': query_text, 'start_time': get_word_time_seconds(start_word.start_time), 'end_time': get_word_time_seconds(end_word.end_time), 'confidence': prediction[2], 'transcript': transcript }) match_result_cache[match_cache_key] = match_results cache.set('match_result_cache', match_result_cache, timeout=0) return match_results
def set(cls, user): ''' 设置key ''' cache.set(cls.PREFIX_ID.format(user.id), user) cache.set(cls.PREFIX_NAME.format(user.username), user) cache.set(cls.PREFIX_NICK.format(user.nickname), user)
def set(cls, type_id, attr_id, attr): cache.set(cls.PREFIX_ID.format(type_id, attr_id), attr)
def string_query(query_text, blob_uri, op_result, search_output_mode=SearchOutputMode.EXACT_MATCH): match_result_cache = cache.get('match_result_cache') or dict() match_cache_key = MatchCacheKey(blob_uri, query_text, False) if match_cache_key in match_result_cache: return match_result_cache[match_cache_key] match_results = [] for result in op_result.results: # Check if the response is valid, which happens if and only if the result alternatives # is at least of length 1 AND the transcript is non-empty. if len(result.alternatives) == 0 or not result.alternatives[0].transcript: continue sentences = [sentence.strip().lower() for sentence in nltk.tokenize.sent_tokenize(result.alternatives[0].transcript)] tokens = [word.strip().lower() for word in result.alternatives[0].transcript.split(' ') if word != ''] for sentence in sentences: sublist_bounds = find_sub_list(tokens, sentence.split(' '))[0] sentence_word_infos = result.alternatives[0].words[sublist_bounds[0]:sublist_bounds[1]+1] matches = re.finditer(query_text, sentence) match_cache = set() for match in matches: if search_output_mode == SearchOutputMode.EXACT_MATCH: start_boundary = i = match.start() # Check if the match starts inside a word if i != 0 and sentence[i - 1] != ' ': start_boundary = sentence.rfind(' ', 0, i) + 1 end_boundary = j = match.end() - 1 # Check if the match ends inside a word if j != len(sentence) - 1 and sentence[j + 1] != ' ': end_boundary = sentence.find(' ', j) if end_boundary == -1: end_boundary = len(sentence) - 1 else: end_boundary -= 1 # Add boundary boundary = (start_boundary, end_boundary) if boundary in match_cache: continue match_cache.add(boundary) start_word_index = sentence.count(' ', 0, start_boundary) end_word_index = sentence.count(' ', 0, end_boundary) elif search_output_mode == SearchOutputMode.SENTENCE: start_word_index = 0 end_word_index = len(sentence_word_infos) - 1 confidence = result.alternatives[0].confidence start_word = sentence_word_infos[start_word_index] end_word = sentence_word_infos[end_word_index] transcript = sentence[match.start():match.end()] match_results.append({ 'matched_query': transcript, 'start_time': get_word_time_seconds(start_word.start_time), 'end_time': get_word_time_seconds(end_word.end_time), 'confidence': confidence, 'transcript': transcript }) match_result_cache[match_cache_key] = match_results cache.set('match_result_cache', match_result_cache, timeout=0) return match_results
def set(cls, app): cache.set(cls.PREFIX_ID.format(app.id), app) cache.set(cls.PREFIX_NAME.format(app.name), app)
def get(cls, id): role = cache.get(cls.PREFIX_NAME.format(id)) if role is None: role = Role.get_by_id(id) if role is not None: cache.set(cls.PREFIX_ID.format(rid), role)
def set(cls, id, permission): cache.set(cls.PREFIX_ID.format(id), permission)
def set_count_error(cls, key, value): cache.set(cls.PREFIX_ERROR.format(key), value)
def query(): route_start_time = start_time = time.time() print('='*20) data = get_validator_data() query_text = data['query'].strip().lower() search_output_mode = data['search_output_mode'] auth_filepath = Path(current_app.instance_path) / Path(current_app.config['GOOGLE_CLOUD_AUTH_FILENAME']) speech_client = speech.SpeechClient.from_service_account_json(auth_filepath) storage_client = storage.Client.from_service_account_json(auth_filepath) bucket = storage_client.get_bucket(current_app.config['GOOGLE_CLOUD_STORAGE_BUCKET_NAME']) with open(get_tmp_filepath(), 'w+b') as input_file: data['file_input'] = data['file_input'][data['file_input'].find(',')+1:] file_input = data['file_input'] + '=' * ((4 - len(data['file_input']) % 4) % 4) input_file.write(base64.b64decode(file_input)) input_hash = hash_util.get_md5_str(file_input) print('Decoding and writing input file took {:.3f} seconds'.format(time.time() - start_time)) start_time = time.time() input_hash_to_blob_uri = cache.get('input_hash_to_blob_uri') or dict() if input_hash in input_hash_to_blob_uri: blob_uri = input_hash_to_blob_uri[input_hash] blob = bucket.blob(blob_uri.replace('gs://{}/'.format(current_app.config['GOOGLE_CLOUD_STORAGE_BUCKET_NAME']), '')) else: try: audio, sample_rate = librosa.load(input_file.name) except Exception as exception: raise exceptions.AudioFileLoadError('file_input', exception) print('Libroa audio file loading took {:.3f} seconds'.format(time.time() - start_time)) start_time = time.time() tmp_filepath = get_tmp_filepath() # Preprocess the audio data by converting it to a mono WAVE audio_mono = librosa.to_mono(audio) soundfile.write(tmp_filepath, audio_mono, sample_rate, subtype='PCM_16', format='wav') print('Audio processing and exporting took {:.3f} seconds'.format(time.time() - start_time)) start_time = time.time() # Upload the file to GCS if it doesn't already exists bucket_audio_root = current_app.config['GOOGLE_CLOUD_STORAGE_BUCKET_AUDIO_ROOT'] blob_root = bucket_audio_root if blob_root and not bucket_audio_root.endswith('/'): blob_root = bucket_audio_root + '/' with open(tmp_filepath, 'rb') as audio_file: blob_filename = '{}{}'.format(blob_root, hash_util.get_crc32_str(audio_file)) blob = bucket.blob(blob_filename) if not blob.exists(): blob.upload_from_file(audio_file, rewind=True) blob_uri = 'gs://{}/{}'.format(current_app.config['GOOGLE_CLOUD_STORAGE_BUCKET_NAME'], blob_filename) input_hash_to_blob_uri[input_hash] = blob_uri cache.set('input_hash_to_blob_uri', input_hash_to_blob_uri, timeout=0) # Remove audio file now that we are done with it tmp_filepath.unlink() print('Uploading to GCS took {:.3f} seconds'.format(time.time() - start_time)) start_time = time.time() Path(input_file.name).unlink() transcription_cache = cache.get('transcription_cache') or dict() if blob_uri in transcription_cache: op_result = transcription_cache[blob_uri] print('Loaded {} from cache'.format(blob_uri)) else: # This configuration is predetermined...All audio files are converted # a WAVE format with 16-bit PCM encoding. Sample rate is determined using librosa. config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=sample_rate, enable_speaker_diarization=True, enable_automatic_punctuation=True, language_code='en-US') operation = speech_client.long_running_recognize(config, types.RecognitionAudio(uri=blob_uri)) op_result = operation.result() transcription_cache[blob_uri] = op_result cache.set('transcription_cache', transcription_cache, timeout=0) print('Transcription took {:.3f} seconds'.format(time.time() - start_time)) start_time = time.time() if not data['is_context_search']: match_results = string_query(query_text, blob_uri, op_result, search_output_mode) else: match_results = context_query(query_text, blob_uri, op_result, search_output_mode) print('Search took {:.3f} seconds'.format(time.time() - start_time)) end_time = time.time() access_link = blob.generate_signed_url(timedelta(hours=1)) return jsonify(status_code=201, message='Query was successful!', matches=match_results, access_link=access_link, elapsed_time=end_time - route_start_time, success=True)
def set(cls, ct): cache.set("CIType::Name::{0}".format(ct.name), ct) cache.set("CIType::ID::{0}".format(ct.id), ct) cache.set("CIType::Alias::{0}".format(ct.alias), ct)
def set(cls, key, values): ci_type = CITypeCache.get(key) if ci_type is not None: cache.set("CITypeAttribute::ID::{0}".format(ci_type.id), values) cache.set("CITypeAttribute::Name::{0}".format(ci_type.name), values)
def set(cls, key, values): ci_type = CITypeCache.get(key) if ci_type is not None: cache.set(cls.PREFIX_ID.format(ci_type.id), values) cache.set(cls.PREFIX_NAME.format(ci_type.name), values)
def set(cls, attr): cache.set('Field::ID::{0}'.format(attr.id), attr) cache.set('Field::Name::{0}'.format(attr.name), attr) cache.set('Field::Alias::{0}'.format(attr.alias), attr)
def set(cls, attr): cache.set(cls.PREFIX_ID.format(attr.id), attr) cache.set(cls.PREFIX_NAME.format(attr.name), attr) cache.set(cls.PREFIX_ALIAS.format(attr.alias), attr)
def set(cls, ct): cache.set("RelationType::Name::{0}".format(ct.name), ct) cache.set("RelationType::ID::{0}".format(ct.id), ct)
def set(cls, ct): cache.set(cls.PREFIX_NAME.format(ct.name), ct) cache.set(cls.PREFIX_ID.format(ct.id), ct) cache.set(cls.PREFIX_ALIAS.format(ct.alias), ct)