def search_fx3(): # 创建schema, stored为True表示能够被检索 schema = Schema(问题=TEXT(stored=True, analyzer=ChineseAnalyzer()), 回答=TEXT(stored=True, analyzer=ChineseAnalyzer()) ) # 解析poem.csv文件 with open('faq.csv', 'r', encoding='utf-8') as f: texts = [_.strip().split(',') for _ in f.readlines() if len(_.strip().split(',')) == 4] # 存储schema信息至indexdir目录 indexdir = 'indexdir/' if not os.path.exists(indexdir): os.mkdir(indexdir) ix = create_in(indexdir, schema) # 按照schema定义信息,增加需要建立索引的文档 writer = ix.writer() for i in range(1, len(texts)): 问题, 回答 = texts[i] writer.add_document(问题=问题, 回答=回答) writer.commit() # 创建一个检索器 searcher = ix.searcher() # find = input("请输入检索内容:") results = searcher.find("问题", "还书") print('一共发现%d个回答。' % len(results)) for i in range(min(10, len(results))): print(json.dumps(results[i].fields(), ensure_ascii=False))
def __init__(self): if whoosh.index.exists_in(CONFIG['app']['index_dir']): self.index = whoosh.index.open_dir(CONFIG['app']['index_dir']) else: schema = Schema(note_id=NUMERIC(stored=True, unique=True), notebook_id=NUMERIC(stored=True), title=TEXT(analyzer=ChineseAnalyzer()), snippet=TEXT(analyzer=ChineseAnalyzer())) self.index = whoosh.index.create_in(CONFIG['app']['index_dir'], schema)
class DuanwenxueIndexSchema(SchemaClass): title = TEXT(analyzer=ChineseAnalyzer(), stored=True, field_boost=2.0, sortable=True, vector=True) author = TEXT(stored=True, sortable=True) date = DATETIME(stored=True, sortable=True) content = TEXT(analyzer=ChineseAnalyzer(), vector=True) url = STORED() tag = KEYWORD(stored=True)
def __init__(self, idxd=True, faqfile='faq.csv', indexdir='indexdir/'): # 创建schema, stored为True表示能够被检索 self.schema = Schema(Q=TEXT(stored=True, analyzer=ChineseAnalyzer()), A=TEXT(stored=True, analyzer=ChineseAnalyzer())) # 存储schema信息至indexdir if not os.path.exists(indexdir): os.mkdir(indexdir) if idxd: self.idx = open_dir(indexdir) else: self.idx = create_in(indexdir, self.schema) self.index(faqfile) # 创建检索器 self.searcher = self.idx.searcher()
def open_index(self, index_folder, create_new=False): self.index_folder = index_folder if create_new: if os.path.exists(index_folder): shutil.rmtree(index_folder) print("deleted index folder: " + index_folder) if not os.path.exists(index_folder): os.mkdir(index_folder) exists = index.exists_in(index_folder) chinese_analyzer = ChineseAnalyzer() schema = Schema(path=ID(stored=True, unique=True), filename=TEXT(stored=True, field_boost=100.0), tags=KEYWORD(stored=True, scorable=True, field_boost=80.0), headlines=KEYWORD(stored=True, scorable=True, field_boost=60.0), doubleemphasiswords=KEYWORD(stored=True, scorable=True, field_boost=40.0), emphasiswords=KEYWORD(stored=True, scorable=True, field_boost=20.0), content=TEXT(stored=True, analyzer=chinese_analyzer), time=STORED) if not exists: self.ix = index.create_in(index_folder, schema) else: self.ix = index.open_dir(index_folder)
class Document(db.Model): __tablename__ = u'documents' __searchable__ = [u'search_column'] __analyzer__ = ChineseAnalyzer() id = db.Column(db.Integer, primary_key=True) title = db.Column(db.UnicodeText(256)) path = db.Column(db.UnicodeText(256)) search_column = db.Column(db.UnicodeText(256)) office = db.Column(db.UnicodeText(64)) model = db.Column(db.UnicodeText(64)) chapter = db.Column(db.UnicodeText(64)) date = db.Column(db.UnicodeText(64)) @property def get_url(self): filesystem = OSSFileAdmin( access_key='2zdr2JCTOpn9viiK', secret_key='2EnOtEoK90ycVpmjUn4BHVYYy5zmzx', bucket_name='filessystem', endpoint='https://oss-cn-shanghai.aliyuncs.com') return filesystem.storage.generate_url(file_path=self.path, expires=5 * 60)
def setUp(self): class TestConfig(object): SQLALCHEMY_TRACK_MODIFICATIONS = True SQLALCHEMY_DATABASE_URI = 'sqlite://' DEBUG = True TESTING = True MSEARCH_INDEX_NAME = mkdtemp() MSEARCH_BACKEND = 'whoosh' self.app = Flask(__name__) self.app.config.from_object(TestConfig()) self.db = SQLAlchemy(self.app) self.search = Search(self.app, db=self.db, analyzer=ChineseAnalyzer()) db = self.db class Post(db.Model, ModelSaveMixin): __tablename__ = 'basic_posts' __searchable__ = ['title', 'content'] id = db.Column(db.Integer, primary_key=True) title = db.Column(db.String(49)) content = db.Column(db.Text) def __repr__(self): return '<Post:{}>'.format(self.title) self.Post = Post with self.app.test_request_context(): self.db.create_all() for (i, title) in enumerate(titles, 1): post = self.Post(title=title, content='content%d' % i) post.save(self.db)
def build_index(dir): # load the well-process doc df = doc_preprocess(dir) # apply jieba chinese analyzer to tokenize the documents analyzer = ChineseAnalyzer() # create schema, stored = True means can be returned to user schema = Schema(idx=ID(stored=True), title=TEXT(stored=True, analyzer=analyzer), author=ID(stored=False), keyword=KEYWORD(analyzer=analyzer), content=TEXT(stored=False, analyzer=analyzer)) # store the schema information to 'indexdir' indexdir = 'indexdir/' if not os.path.exists(indexdir): os.mkdir(indexdir) ix = create_in(indexdir, schema) # build the index based on schema writer = ix.writer() for idx, val in df.iterrows(): writer.add_document(idx=str(idx), title=val[0], author=val[1], keyword=val[2], content=val[3]) writer.commit()
def gen_whoosh_database(if_rand=True, kind='1', kind_arr=[], post_type={}): analyzer = ChineseAnalyzer() schema = Schema(title=TEXT(stored=True, analyzer=analyzer), catid=TEXT(stored=True), type=TEXT(stored=True), link=ID( unique=True, stored=True, ), content=TEXT(stored=True, analyzer=analyzer)) whoosh_db = 'database/whoosh' if not os.path.exists(whoosh_db): os.makedirs(whoosh_db) ix = create_in(whoosh_db, schema) else: ix = open_dir(whoosh_db) writer = ix.writer() do_for_app2(writer, rand=if_rand) do_for_post(writer, rand=if_rand, doc_type=post_type['1']) do_for_wiki(writer, rand=if_rand, doc_type=post_type['1']) do_for_page(writer, rand=if_rand, doc_type=post_type['1']) for kind in kind_arr: do_for_app(writer, rand=if_rand, kind=kind, doc_type=post_type) print('-' * 10) writer.commit()
def search_index(index_dir, keyword): analyzer = ChineseAnalyzer() schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT(stored=True, analyzer=analyzer)) if not os.path.exists(index_dir): raise (" dir not exist") print "使用 索引目录 %s" % index_dir ix = open_dir(index_dir) searcher = ix.searcher() parser = QueryParser("content", schema=ix.schema) #utf8_str=unicode(gb2312_str, 'gb2312'); keyword = unicode(str(keyword)) print(keyword) print("result of " + keyword) q = parser.parse(keyword) #results = searcher.search(query,limit=20). 若要得到全部的结果,可把limit=None. results = searcher.search(q, limit=100) count = 0 for hit in results: count += 1 print "#===================================" print "count=%s" % count print "%s|%s" % (hit["path"], hit["title"]) print chomp(hit["content"])
class Post(db.Model): __tablename__ = "post" __searchable__ = ["title", "content"] __analyzer__ = ChineseAnalyzer() id = db.Column(db.Integer, primary_key=True, autoincrement=True) title = db.Column(db.String(128), nullable=False) content = db.Column(LONGTEXT, nullable=False) html_content = db.Column(LONGTEXT, nullable=False, default="") create_at = db.Column(db.DateTime, nullable=False) update_at = db.Column(db.DateTime, nullable=False) path = db.Column(db.String(256), nullable=False, unique=True) permalink = db.Column(db.String(128), nullable=False, unique=True) status = db.Column(db.Integer, nullable=False, default=1) # 1:公开,2:删除 author_id = db.Column(db.Integer, db.ForeignKey(User.id), nullable=False) category_id = db.Column(db.Integer, db.ForeignKey(Category.id), nullable=False) def __repr__(self): return "<Post %r>" % self.title def __init__(self, **kw): super(Post, self).__init__(**kw) if self.path is not None and self.permalink is None: self.permalink = hmac.new( current_app.config["HMAC_KEY"].encode("utf-8"), self.path.encode("utf-8"), "md5", ).hexdigest()
def create_farm_index( ): try: conn = ms.connect(host='120.79.14.47',user='******',passwd='Kermi0116',db='farm_products',charset="utf8") cu = conn.cursor(cursorclass = ms.cursors.DictCursor) cu.execute("set names utf8") cu.execute('SELECT id,maintype,type,place FROM `tendency`') iterms = cu.fetchall() if not iterms: return False analyzer = ChineseAnalyzer() schema = Schema(id=NUMERIC(stored=True, unique=True), maintype=TEXT(stored=True, analyzer=analyzer), type=TEXT(stored=True, analyzer=analyzer), place=TEXT(stored=True, analyzer=analyzer)) writer = create_index('farm_products_index',schema) if not writer: return False for iterm in iterms: writer.add_document(id=iterm['id'],maintype=iterm['maintype'],type=iterm['type'],place=iterm['place']) writer.commit() conn.close() return True except: return False
class Project(db.Model): __tablename__ = u'projects' __searchable__ = [u'title'] __analyzer__ = ChineseAnalyzer() id = db.Column(db.Integer, primary_key=True) """ title = db.Column(db.VARCHAR(255), unique=True) """ title = db.Column(db.UnicodeText(256)) model = db.Column(db.UnicodeText(64)) chapter = db.Column(db.UnicodeText(64)) date = db.Column(db.DATE, default=datetime.now) the_tools = db.relationship(u'Tool', backref=u'belong', lazy=u'dynamic') the_advises = db.relationship(u'Advise', backref=u'improve', lazy=u'dynamic') def __init__(self, title, model, chapter, date): self.title = title self.model = model self.chapter = chapter self.date = date def __unicode__(self): return u'项目:%s, 机型:%s, 章节:%s, 日期:%s' % (self.title, self.model, self.chapter, self.date) def __repr__(self): return self.__unicode__().encode('utf-8')
def create_job_index( ): try: conn = ms.connect(host='120.79.14.47',user='******',passwd='Kermi0116',db='58city',charset="utf8") cu = conn.cursor(cursorclass = ms.cursors.DictCursor) cu.execute("set names utf8") cu.execute('SELECT id,jobName,type,area_a,company_name FROM `job`') iterms = cu.fetchall() if not iterms: return False analyzer = ChineseAnalyzer() schema = Schema(id=NUMERIC(stored=True, unique=True), job_name=TEXT(stored=True, analyzer=analyzer), area_a=TEXT(stored=True, analyzer=analyzer), type=TEXT(stored=True), company_name=TEXT(stored=True, analyzer=analyzer)) writer = create_index('job_index',schema) if not writer: return False for iterm in iterms: writer.add_document(id=iterm['id'],job_name=iterm['jobName'],type=iterm['type'],company_name=iterm['company_name']) writer.commit() conn.close() return True except: return False
def buildIndex(): analyzer = ChineseAnalyzer() schema = Schema(title=TEXT(analyzer=analyzer, stored=True), path=ID(stored=False), content=TEXT(analyzer, stored=True)) if not os.path.exists('index'): os.mkdir('index') ix = create_in('index', schema=schema) writer = ix.writer() dirs = os.listdir('./laws') base = './laws/' count = 100 #id = 0 for dir2 in dirs: if count == 0: break count -= 1 files = os.listdir(base + dir2) for file in files: Dic = FaLvQieFen.readLaw('laws/' + dir2 + '/' + file) for k in Dic: title = Dic[k].Fa + Dic[k].name content = Dic[k].content print(title) print(content) id = dir2 + '/' + file + '/' + str(Dic[k].No) writer.add_document(title=title, content=content, path=id) #id += 1 writer.commit()
class Tool(db.Model): __tablename__ = u'tools' __searchable__ = [u'project_title'] __analyzer__ = ChineseAnalyzer() id = db.Column(db.Integer, primary_key=True) project_title = db.Column(db.UnicodeText(64)) name = db.Column(db.UnicodeText(64)) size = db.Column(db.UnicodeText()) number = db.Column(db.Integer, default=1) description = db.Column(db.UnicodeText()) project_id = db.Column(db.Integer, db.ForeignKey('projects.id')) def __unicode__(self): return u'<Tool {0!s}>'.format(self.name) def __repr__(self): return self.__unicode__().encode('utf-8') @property def to_html(self): return u"<tr><th><span class='glyphicon glyphicon-wrench' aria-hidden='true'>" \ u"</span></th><th>{0}</th><th>{1}</th><th>{2}</th><th>{3}</th></tr>"\ .format(self.name, self.size, self.number, self.description)
def build_whoosh_database(): analyzer = ChineseAnalyzer() schema = Schema(title=TEXT(stored=True, analyzer=analyzer), type=TEXT(stored=True), link=ID(stored=True), content=TEXT(stored=True, analyzer=analyzer)) ix = create_in(whoosh_database, schema) writer = ix.writer() uu = MInfor() tt = uu.get_all() for rec in tt: text2 = html2text.html2text(tornado.escape.xhtml_unescape( rec.cnt_html)) writer.add_document( title=rec.title, type='<span style="color:red;">[信息]</span>', link='/info/{0}'.format(rec.uid), content=text2, ) mpost = MPost() recs = mpost.query_all() for rec in recs: text2 = html2text.html2text(tornado.escape.xhtml_unescape( rec.cnt_html)) print(text2) writer.add_document(title=rec.title, type='<span style="color:blue;">[文档]</span>', link='/post/{0}.html'.format(rec.uid), content=text2) writer.commit()
class Config: SECRET_KEY = os.environ.get('SECRET_KEY') or 'hard_to_guess_string' # [IMP] 已被flask-sqlalchemy删除] 当前request结束时自动commit对sqlalchemy的所有更改 # [IMP] 相应视图函数中需要增加db.session.commit()语句 SQLALCHEMY_COMMIT_ON_TEARDOWN = True # UserWarning: SQLALCHEMY_TRACK_MODIFICATIONS adds significant overhead and # will be disabled by default in the future. Set it to True to suppress this warning. SQLALCHEMY_TRACK_MODIFICATIONS = False SQLALCHEMY_RECORD_QUERIES = True # 启用缓慢查询记录功能的配置(FLASKY_SLOW_DB_QUERY_TIME=0.5) MAIL_SERVER = 'smtp.qq.com' # 由于GFW使用qq邮箱 MAIL_PORT = 587 MAIL_USE_TLS = True MAIL_USERNAME = os.environ.get('MAIL_USERNAME') or '*****@*****.**' MAIL_PASSWORD = os.environ.get( 'MAIL_PASSWORD') or 'lxkimgiejkuwbiai' # qq邮箱设置中生成的授权码,在第三方客户端中替代密码使用 FLASKY_MAIL_SUBJECT_PREFIX = '[Flasky]' FLASKY_MAIL_SENDER = 'Flasky Admin <*****@*****.**>' # 发件人名称字符串 FLASKY_ADMIN = os.environ.get('FLASKY_ADMIN') or '*****@*****.**' FLASKY_POSTS_PER_PAGE = 20 # pagination/分页:设置每页显示的条数 FLASKY_FOLLOWERS_PER_PAGE = 50 FLASKY_COMMENTS_PER_PAGE = 30 FLASKY_SLOW_DB_QUERY_TIME = 0.5 WHOOSH_ANALYZER = ChineseAnalyzer() # 设置whoosh搜索引擎的默认分析器(用于分词) @staticmethod # behave like plain functions except that you can call them from an instance or the class def init_app( app ): # staticmethods are used to group functions which have some logical connection to a class pass
def index_create(request): analyser = ChineseAnalyzer() # 导入中文分词工具 """ 使用whoosh首先要建立schema对象,第一次创建索引时,必须定义索引的模式。该模式列出了索引中的字段。 字段是索引中每个文档的一条信息,例如其标题或文本内容。 下面使用到的schema索引对象。 whoosh.fields.ID:这种类型只是将字段的整个值索引(并可选地存储)为一个单元(也就是说,它不会将其分解为单个单词)。 这对于文件路径,URL,日期,类别等字段很有用。 whoosh.fields.STORED:此字段与文档一起存储,但未编入索引。此字段类型未编入索引且无法搜索。 这对于要在搜索结果中向用户显示的文档信息很有用。 whoosh.fields.TEXT:此类型用于正文。它索引(并可选地存储)文本并存储术语位置以允许短语搜索。 """ schema = Schema(title=TEXT(stored=True, analyzer=analyser), content=TEXT(analyzer=analyser)) # 创建索引结构 ix = create_in("index", schema=schema, indexname='index') # test为索引创建的地址,indexname为索引名称 writer = ix.writer() # 读取文件内容 datas = News.objects.all() for data in datas: news_title = data.title news_content = data.content writer.add_document(title=news_title, content=news_content) writer.commit()
class Channel(Base, db.Model, Utils): __tablename__ = 'channel' __searchable__ = ['name'] __analyzer__ = ChineseAnalyzer() id = Column(Integer, primary_key=True, autoincrement=True) name = Column(String(45)) introduce = Column(Text) creator = Column(ForeignKey('user.id')) messages = relationship('Message', secondary='channel_2_message', backref='m_channel', lazy='dynamic') def creat_channel(self): if not self.is_channel(self.name): new_theme = Channel(name=self.name, introduce=self.introduce, creator=self.creator) new_theme.save() def is_channel(self, name): return db.session.query(Channel).filter( Channel.name == name).count() > 0 def __repr__(self): return '<Channel %s>' % self.name
def __init__(self, root_dir): self.root_dir = root_dir self.prefix_len = len(self.root_dir) analyzer = ChineseAnalyzer() self.schema = Schema(title=ID(stored=True), url=ID(stored=True), content=TEXT(stored=True, analyzer=analyzer, field_boost=1.0), type=ID(stored=True), anchor=TEXT(stored=True, analyzer=analyzer, field_boost=3.0, phrase=False), page_rank=NUMERIC(stored=True, numtype=float, bits=32), extension=ID(stored=True)) self.img_pattern = re.compile( r'.+\.jpg|.+\.png|.+\.gif|.+\.bmp|.+\.jpeg', re.I) self.doc_pattern = re.compile( r'.+\.pdf|.+\.doc|.+\.docx|.+\.xls|.+\.xlsx|.+\.ppt|.+\.pptx|.+\.txt', re.I) self.file_pattern = re.compile( r'.+\.rar|.+\.mp4|.+\.mp3|.+\.zip|.+\.tar|.+\.gz|.+\.7z', re.I) self.unresolvable_pattern = re.compile(r'.+\.js|.+\.css', re.I) self.website_cnt = 0 self.link_analyzer = LinkAnalyzer() self.ix = None self.writer = None self.time = None
def indexing(self): """ 功能:根据网页类型进行索引的构建。文本类型有自己的索引,文档类型也有自己的索引,两个索引是分开的。 """ if not os.path.exists("indexdir_text"): os.mkdir("indexdir_text") if not os.path.exists("indexdir_doc"): os.mkdir("indexdir_doc") # 导入中文分词工具 analyser = ChineseAnalyzer() schema = Schema(ID=STORED, url=TEXT(stored=True), title=TEXT(stored=True, analyzer=analyser), content=TEXT(analyzer=analyser)) # 创建一个索引 ix = create_in("indexdir_text", schema) writer = ix.writer() # 将文本网页中的相关信息都加入到这个索引中 for id in self.text_url.keys(): url = self.id_to_str[id] title = self.text_url[id]["title"] content = self.text_url[id]["text"] writer.add_document(ID=id, url=url, title=title, content=content) writer.commit() # 重复上述类似过程 ix = create_in("indexdir_doc", schema) writer = ix.writer() for id in self.doc_url.keys(): url = self.id_to_str[id] title = "" content = self.doc_url[id]["text"] writer.add_document(ID=id, url=url, title=title, content=content) writer.commit()
def insert_in(data, index_file='./index/job_index',is_job=True): try: if not isinstance(data,list): return False analyzer = ChineseAnalyzer() ix = open_dir(index_file)#here to create schema # print(musics) writer = ix.writer() if is_job: schema = Schema(id=NUMERIC(stored=True, unique=True), job_name=TEXT(stored=True, analyzer=analyzer), type=TEXT(stored=True, analyzer=analyzer), company_name=TEXT(stored=True, analyzer=analyzer)) for iterm in data: writer.add_document(id=iterm['id'],job_name=iterm['jobName'],type=iterm['type'],company_name=iterm['company_name']) else: schema = Schema(id=NUMERIC(stored=True, unique=True), maintype=TEXT(stored=True, analyzer=analyzer), place=TEXT(stored=True, analyzer=analyzer)) for iterm in data: writer.add_document(id=iterm['id'],maintype=iterm['maintype'],place=iterm['place']) writer.commit() return True except: return False
def build_index(self): analyzer = ChineseAnalyzer() # 创建索引模板 schema = Schema( newsId=ID(stored=True), newsTitle=TEXT(stored=True, analyzer=analyzer), newsUrl=ID(stored=True), newsClick=NUMERIC(stored=True, sortable=True), newsPublishTime=TEXT(stored=True), newsContent=TEXT(stored=False, analyzer=analyzer), # 文章内容太长了,不存 ) # 索引文件相关 import os.path if not os.path.exists('index'): os.mkdir('index') ix = create_in('index', schema) print('未发现索引文件,已构建.') else: ix = open_dir('index') print('发现索引文件并载入....') # 索引构建 writer = ix.writer() indexed_amount = 0 total_amount = self.pagesCollection.count_documents({}) false_amount = self.pagesCollection.count_documents({'indexed': 'False'}) print(false_amount, '/', total_amount) while True: try: row = self.pagesCollection.find_one({'indexed': 'False'}) if row is None: # all indexed is 'True' 所有条目已经处理 writer.commit() print('所有条目索引处理完毕.') break else: # get new row 获取了新的条目 writer.add_document( newsId=str(row['_id']), newsTitle=row['newsTitle'], newsUrl=row['newsUrl'], newsClick=int(row['newsClick']), newsPublishTime=row['newsPublishTime'], newsContent=row['newsContent'], ) # the end self.pagesCollection.update_one({'_id': row['_id']}, {'$set': {'indexed': 'True'}}) writer.commit() # 每次构建提交一次 writer = ix.writer() # 然后重新打开 indexed_amount += 1 print(indexed_amount, '/', false_amount, '/', total_amount) except: print(row['_id'], '异常.') print('已处理', indexed_amount, '/', total_amount, '项.') break
class Message(Base, db.Model, Utils): __tablename__ = 'message' __searchable__ = ['body'] __analyzer__ = ChineseAnalyzer() id = Column(Integer, primary_key=True, autoincrement=True) body = Column(String(260)) time_create = Column(String(45)) time_update = Column(String(45)) comment_count = Column(Integer) quote_count = Column(Integer) author_id = Column(ForeignKey('user.id')) quote_id = Column(Integer) # 引用Message的id type = Column( Integer ) # Message的类型:0 普通Message, 1 回复Message, 2 回复&转发Message,3 转发Message channels = relationship('Channel', secondary='channel_2_message', backref='c_message', lazy='dynamic') images = relationship('Image', backref='img_Message', lazy='dynamic') favo_users = relationship('Message', secondary=message_favo, lazy='dynamic') def get_quoted_message(self): return db.session.query(Message).filter( Message.id == self.quote_id).one() def add_channel(self, channel_name, introduce=''): ''' 将一条消息加入一个频道,如没有该频道则创建 :param channel_name: 频道名称 :param introduce: 介绍 :return: Message ''' if db.session.query(Channel).filter( Channel.name == channel_name).count() > 0: channel = db.session.query(Channel).filter( Channel.name == channel_name).first() else: channel = Channel(name=channel_name, introduce=introduce, creator=self.author_id) self.channels.append(channel) self.save() return self def add_images(self, img_url): if self.images.count() < 5: image = Image(uploader=self.author_id, uploade_time=tools.generate_timestamp(), url=img_url) self.images.append(image) self.save() return self def __repr__(self): return '<Message %s>' % self.id
def init_search(self): """初始化搜索 """ analyzer = ChineseAnalyzer() schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT(stored=True, analyzer=analyzer)) # ix = create_in("indexdir", schema) idx = create_in("indexdir", schema=schema, indexname=INDEXNAME) #path 为索引创建的地址,indexname为索引名称
def init_extensions(app): global use_cache whoosh_searcher.init_app(app) configure_uploads(app, upload_photos) mail.init_app(app) admin.init_app(app) mongo.init_app(app, "MONGO") oauth.init_app(app) login_manager.init_app(app) # use_cache = app.config.get('USE_CACHE', False) # if use_cache: # cache.init_app(app, {}) with app.app_context(): # 添加flask-admin视图 admin.add_view(admin_view.RolesModelView(mongo.db['roles'], '角色管理')) admin.add_view(admin_view.UsersModelView(mongo.db['users'], '用户管理')) admin.add_view( admin_view.CatalogsModelView(mongo.db['catalogs'], '栏目管理', category='内容管理')) admin.add_view( admin_view.PostsModelView(mongo.db['posts'], '帖子管理', category='内容管理')) admin.add_view( admin_view.PassagewaysModelView(mongo.db['passageways'], '温馨通道', category='推广管理')) admin.add_view( admin_view.FriendLinksModelView(mongo.db['friend_links'], '友链管理', category='推广管理')) admin.add_view( admin_view.PagesModelView(mongo.db['pages'], '页面管理', category='推广管理')) admin.add_view( admin_view.FooterLinksModelView(mongo.db['footer_links'], '底部链接', category='推广管理')) admin.add_view( admin_view.AdsModelView(mongo.db['ads'], '广告管理', category='推广管理')) admin.add_view(admin_view.OptionsModelView(mongo.db['options'], '系统设置')) # 初始化Whoosh索引 chinese_analyzer = ChineseAnalyzer() post_schema = Schema(obj_id=ID(unique=True, stored=True), title=TEXT(stored=True, analyzer=chinese_analyzer), content=TEXT(stored=True, analyzer=chinese_analyzer), create_at=DATETIME(stored=True), catalog_id=ID(stored=True), user_id=ID(stored=True)) whoosh_searcher.add_index('posts', post_schema)
def __init__(self): self.file_index = None self.schema = Schema(floor_id=ID(stored=True), user_id=ID(stored=True), user_name=ID(stored=True), floor_content=TEXT(stored=True, analyzer=ChineseAnalyzer()), tie_id=ID(stored=True)) # 直接将file_index初始化 self.open_index()
def init_extensions(app): # 初始化 app 的时候,会调用 app.config 的 MONGO_URI 属性 # MONGO_URI = 'mongodb://localhost:27017/pyfly' # 以确定要连接的数据库名字 pyfly 、端口号 27017 # 并且给 mongo 定义一个 db 属性,其值就是数据库 pyfly mongo.init_app(app) # 调用 init_app 方法注册 app # 此方法的主要作用就是将 login_manager 本身赋值给 app.login_manager 属性 # 以便 app 能够使用其登录登出等功能 login_manager.init_app(app) # 获取配置信息并存储在 app 上 configure_uploads(app, upload_photos) admin.init_app(app) mail.init_app(app) whoosh_searcher.init_app(app) with app.app_context(): admin.add_view(admin_view.OptionsModelView(mongo.db['options'], '系统设置')) admin.add_view(admin_view.UsersModelView(mongo.db['users'], '用户管理')) admin.add_view( admin_view.CatalogsModelView(mongo.db['catalogs'], '栏目管理', category='内容管理')) admin.add_view( admin_view.PostsModelView(mongo.db['posts'], '帖子管理', category='内容管理')) admin.add_view( admin_view.PassagewaysModelView(mongo.db['passageways'], '温馨通道', category='推广管理')) admin.add_view( admin_view.FriendLinksModelView(mongo.db['friend_links'], '友链管理', category='推广管理')) admin.add_view( admin_view.PagesModelView(mongo.db['pages'], '页面管理', category='推广管理')) admin.add_view( admin_view.FooterLinksModelView(mongo.db['footer_links'], '底部链接', category='推广管理')) admin.add_view( admin_view.AdsModelView(mongo.db['ads'], '广告管理', category='推广管理')) # 使用 jieba 中文分词 chinese_analyzer = ChineseAnalyzer() # 建立索引模式对象 post_schema = Schema(obj_id=ID(unique=True, stored=True), title=TEXT(stored=True, analyzer=chinese_analyzer), content=TEXT(stored=True, analyzer=chinese_analyzer), create_at=DATETIME(stored=True), catalog_id=ID(stored=True), user_id=ID(stored=True)) whoosh_searcher.add_index('posts', post_schema)
class job_info(db.Model): __tablename__ = 'job_info' __searchable__ = ['city','companyName','positionName'] __analyzer__ = ChineseAnalyzer() id = db.Column(db.String(45), primary_key=True) companyName =db.Column(db.String(45)) industryField = db.Column(db.String(45)) positionName = db.Column(db.String(45)) jobNature = db.Column(db.String(45)) createTime = db.Column(db.String(255)) education = db.Column(db.String(255)) financeStage = db.Column(db.String(45)) companyLabelList = db.Column(db.String(255)) companySize = db.Column(db.String(45)) city = db.Column(db.String(255)) district = db.Column(db.String(45)) positionType = db.Column(db.String(45)) positionAdvantage = db.Column(db.String(45)) Salary = db.Column(db.String(45)) salaryMax = db.Column(db.Integer()) salaryMin = db.Column(db.Integer()) salaryAvg = db.Column(db.Integer()) workYear = db.Column(db.String(45)) jobinfo = db.relationship( 'Job_hire', backref='jobinfo', lazy = 'dynamic') def __init__(self,companyName,industryField,positionName,jobNature,createTime,financeStage,\ companyLabelList,companySize,city,district,positionType,education,\ positionAdvantage,Salary,salaryMax,salaryMin,salaryAvg,workYear): self.id = str(uuid4()) self.companyName = companyName self.industryField =industryField self.positionName =positionName self.jobNature = jobNature self.createTime = createTime self.financeStage = financeStage self.companyLabelList = companyLabelList self.companySize = companySize self.city = city self.district = district self.positionType = positionType self.education = education self.positionAdvantage = positionAdvantage self.Salary = Salary self.salaryMax = salaryMax self.salaryMin = salaryMin self.salaryAvg =salaryAvg self.workYear = workYear def __repr__(self): return '<Model job_info `{}`>'.format(self.id)