def _constructTrie(self): trie = Trie() # Written in multiple lines instead of loop to prevent dealing with hashing trie.insertList( tuple( map(lambda x: x.lower(), [ str(member) for name, member in lh.SQLKeyword.__members__.items() ])), self.terminate_map['Keyword']) trie.insertList( tuple( map(lambda x: x.lower(), [ str(member) for name, member in lh.SQLFunction.__members__.items() ])), self.terminate_map['Function']) trie.insertList( tuple( map(lambda x: x.lower(), [ str(member) for name, member in lh.SQLDataType.__members__.items() ])), self.terminate_map['DataType']) trie.insertList( tuple( map(lambda x: x.lower(), [ str(member) for name, member in lh.SQLOperator.__members__.items() ])), self.terminate_map['Operator']) return trie
def __init__(self, lower=True): self.lower = lower self.trie = Trie() self.ent2type = {} # word list to type self.ent2id = {'<UNK>': 0} # word list to id self.space = ''
class LexiconBuilder(object): def __init__(self, tokens, value): self.lexicon = Trie() self.build(tokens, value) # 每次都会初始化pkl # def _build(self): # cnt = 0 # with open(self._path, 'r', encoding='utf-8') as rf: # for num, line in enumerate(rf, 1): # line = line.strip() # if line: # [token] = line.split() # self.lexicon.add(token, value) # cnt += 1 def build(self, tokens, value): """ :param tokens: word_list:['电视机', '电视', ...] :param value: word_onto:'tool','show_name',... :return: """ for token in tokens: self.lexicon.add(token, value) def show(self, num): return self.lexicon.show(num) def search(self, word): return self.lexicon.search(word, value_flag=True, verbose=False)
class Gazetteer: def __init__(self, lower): #lower = False self.trie = Trie() self.ent2type = {} ## word list to type self.ent2id = {"<UNK>": 0} ## word list to id self.lower = lower self.space = "" def enumerateMatchList(self, word_list): if self.lower: word_list = [word.lower() for word in word_list] match_list = self.trie.enumerateMatch(word_list, self.space) return match_list def insert(self, word_list, source): if self.lower: word_list = [word.lower() for word in word_list] self.trie.insert(word_list) string = self.space.join(word_list) if string not in self.ent2type: self.ent2type[string] = source if string not in self.ent2id: self.ent2id[string] = len(self.ent2id) def searchId(self, word_list): if self.lower: word_list = [word.lower() for word in word_list] string = self.space.join(word_list) if string in self.ent2id: return self.ent2id[string] return self.ent2id["<UNK>"] def searchType(self, word_list): if self.lower: word_list = [word.lower() for word in word_list] string = self.space.join(word_list) if string in self.ent2type: return self.ent2type[string] print( "Error in finding entity type at gazetteer.py, exit program! String:", string) exit(0) def size(self): return len(self.ent2type) def clean(self): self.trie = Trie() self.ent2type = {} self.ent2id = {"<UNK>": 0} self.space = ""
class Lexicon(object): def __init__(self, lower=True): self.lower = lower self.trie = Trie() self.ent2type = {} # word list to type self.ent2id = {'<UNK>': 0} # word list to id self.space = '' def enumerate_match_list(self, word_list): if self.lower: word_list = [word.lower() for word in word_list] match_list = self.trie.enumerate_match(word_list, self.space) return match_list def insert(self, word_list, source): if self.lower: word_list = [word.lower() for word in word_list] self.trie.insert(word_list) string = self.space.join(word_list) if string not in self.ent2type: self.ent2type[string] = source if string not in self.ent2id: self.ent2id[string] = len(self.ent2id) def search_id(self, word_list): if self.lower: word_list = [word.lower() for word in word_list] string = self.space.join(word_list) if string in self.ent2id: return self.ent2id[string] return self.ent2id['<UNK>'] def search_type(self, word_list): if self.lower: word_list = [word.lower() for word in word_list] string = self.space.join(word_list) if string in self.ent2type: return self.ent2type[string] print('Error in finding entity type at lexicon.py, exit programming') exit(0) def size(self): return len(self.ent2type) def clean(self): self.trie = Trie() self.ent2type = {} self.ent2id = {} self.space = ''
class Gazetteer: def __init__(self, lower): self.trie = Trie() self.ent2type = {} ## word list to type self.ent2id = {"<UNK>":0} ## word list to id self.lower = lower self.space = "" def enumerateMatchList(self, word_list): if self.lower: word_list = [word.lower() for word in word_list] match_list = self.trie.enumerateMatch(word_list, self.space) return match_list def insert(self, word_list, source): if self.lower: word_list = [word.lower() for word in word_list] self.trie.insert(word_list) string = self.space.join(word_list) if string not in self.ent2type: self.ent2type[string] = source if string not in self.ent2id: self.ent2id[string] = len(self.ent2id) def searchId(self, word_list): if self.lower: word_list = [word.lower() for word in word_list] string = self.space.join(word_list) if string in self.ent2id: return self.ent2id[string] return self.ent2id["<UNK>"] def searchType(self, word_list): if self.lower: word_list = [word.lower() for word in word_list] string = self.space.join(word_list) if string in self.ent2type: return self.ent2type[string] print ("Error in finding entity type at gazetteer.py, exit program! String:", string) exit(0) def size(self): return len(self.ent2type)
def __init__(self, lower): self.trie = Trie() self.ent2type = {} ## word list to type self.ent2id = {"<UNK>": 0} ## word list to id self.lower = lower self.space = ""
def __init__(self, tokens, value): self.lexicon = Trie() self.build(tokens, value) # 每次都会初始化pkl
def clean(self): self.trie = Trie() self.ent2type = {} self.ent2id = {"<UNK>": 0} self.space = ""
import os from flask_restful import Resource from flask import request from flask import jsonify from utils.utils import json_reader, get_movie_by_index, build_movie_hash_table from utils.trie import Trie from utils.search import MovieNameStrategy, GenreStrategy, ShowingTimeStrategy, SearchMovie parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) json_path = f'{parent_dir}/mockData/movies.json' movies = json_reader(json_path) trie = Trie() Trie.build_movie_trie(trie, movies) movie_hash_table = build_movie_hash_table(movies) class MovieListResource(Resource): def get(self): genre = request.args.get('genre') name = request.args.get('name') showing_time = request.args.get('showing_time') data_obj = {'name': name, 'genre': genre, 'showing_time': showing_time} if name is None and genre is None and showing_time is None: result = movies
def __init__(self, lower): self.trie = Trie() self.ent2type = {} ## word list to type self.ent2id = {"<UNK>":0} ## word list to id self.lower = lower self.space = ""
def clean(self): self.trie = Trie() self.ent2type = {} self.ent2id = {} self.space = ''