def search(): search_res= request.args.get('query') search_res=search_res.replace("'","") search_res=search_res.replace("!","") search_res=search_res.replace("(","") search_res=search_res.replace(")","") search_res=search_res.replace(":","") temp_val = search_res.split(" ") search_list=[] for search in temp_val: if search.isdigit(): search_data = conn.execute(select([my_cards]).where(or_( func.to_tsvector('english', my_cards.c.text).match(search, postgresql_regconfig='english'), func.to_tsvector('english', my_cards.c.name).match(search, postgresql_regconfig='english'), func.to_tsvector('english', my_cards.c.cardType).match(search, postgresql_regconfig='english'), func.to_tsvector('english', my_cards.c.subType).match(search, postgresql_regconfig='english'), func.to_tsvector('english', my_cards.c.family).match(search, postgresql_regconfig='english'), my_cards.c.attack==int(search), my_cards.c.defense==int(search)))) else: search_data = conn.execute(select([my_cards]).where(or_( func.to_tsvector('english', my_cards.c.text).match(search, postgresql_regconfig='english'), func.to_tsvector('english', my_cards.c.name).match(search, postgresql_regconfig='english'), func.to_tsvector('english', my_cards.c.cardType).match(search, postgresql_regconfig='english'), func.to_tsvector('english', my_cards.c.subType).match(search, postgresql_regconfig='english'), func.to_tsvector('english', my_cards.c.family).match(search, postgresql_regconfig='english')))) search_list+=format_list(search_data) return render_template('searchTemplate.html',search_data=search_list)
def handle_info_json(self, model, info, fulltextsearch=None): """Handle info JSON query filter.""" clauses = [] headlines = [] order_by_ranks = [] if '::' in info: pairs = info.split('|') for pair in pairs: if pair != '': k,v = pair.split("::") if fulltextsearch == '1': vector = _entity_descriptor(model, 'info')[k].astext clause = func.to_tsvector(vector).match(v) clauses.append(clause) if len(headlines) == 0: headline = func.ts_headline(self.language, vector, func.to_tsquery(v)) headlines.append(headline) order = func.ts_rank_cd(func.to_tsvector(vector), func.to_tsquery(v), 4).label('rank') order_by_ranks.append(order) else: clauses.append(_entity_descriptor(model, 'info')[k].astext == v) else: info = json.dumps(info) clauses.append(cast(_entity_descriptor(model, 'info'), Text) == info) return clauses, headlines, order_by_ranks
def setup_method(self, _): import transaction from sqlalchemy import func from geoalchemy2 import WKTElement from c2cgeoportal_commons.models import DBSession from c2cgeoportal_commons.models.main import FullTextSearch entry1 = FullTextSearch() entry1.label = "label 1" entry1.layer_name = "layer1" entry1.ts = func.to_tsvector("french", "soleil travail") entry1.the_geom = WKTElement("POINT(-90 -45)", 21781) entry1.public = True entry2 = FullTextSearch() entry2.label = "label 2" entry2.layer_name = "layer2" entry2.ts = func.to_tsvector("french", "pluie semaine") entry2.the_geom = WKTElement("POINT(-90 -45)", 21781) entry1.public = True entry3 = FullTextSearch() entry3.label = "label 3" entry3.layer_name = "layer2" entry3.ts = func.to_tsvector("french", "vent neige") entry3.the_geom = WKTElement("POINT(-90 -45)", 21781) entry1.public = True DBSession.add_all([entry1, entry2, entry3]) transaction.commit()
def search(): q = request.args['q'] email_exact = User.query.filter(User.email == q).one_or_none() if email_exact: return redirect(url_for('.user', user_id=email_exact.id)) gc_exact = GoCardlessPayment.query.filter( (GoCardlessPayment.gcid == q) | (GoCardlessPayment.mandate == q)).one_or_none() if gc_exact: return redirect(url_for('.payment', payment_id=gc_exact.id)) stripe_exact = StripePayment.query.filter(StripePayment.chargeid == q).one_or_none() if stripe_exact: return redirect(url_for('.payment', payment_id=stripe_exact.id)) bank_exact = BankPayment.query.filter(BankPayment.bankref == q).one_or_none() if bank_exact: return redirect(url_for('.payment', payment_id=bank_exact.id)) email_query = to_query(q.replace('@', ' ')) # Careful with the following query. It'll stop using the indexes if you change the # functions applied to the indexed columns. Which isn't really the end of the world given # how small our dataset is, but I spent ages trying to work out how to get Alembic to add # those indexes. So humour me. results = User.query.filter( func.to_tsvector('simple', User.name).match(to_query(q)) | (func.to_tsvector('simple', func.replace(User.email, '@', ' ')).match(email_query)) ) return render_template('admin/search-results.html', q=q, results=results)
def handle_info_json(self, model, info, fulltextsearch=None): """Handle info JSON query filter.""" clauses = [] headlines = [] order_by_ranks = [] if '::' in info: pairs = info.split('|') for pair in pairs: if pair != '': k, v = pair.split("::") if fulltextsearch == '1': vector = _entity_descriptor(model, 'info')[k].astext clause = func.to_tsvector(vector).match(v) clauses.append(clause) if len(headlines) == 0: headline = func.ts_headline( self.language, vector, func.to_tsquery(v)) headlines.append(headline) order = func.ts_rank_cd(func.to_tsvector(vector), func.to_tsquery(v), 4).label('rank') order_by_ranks.append(order) else: clauses.append( _entity_descriptor(model, 'info')[k].astext == v) else: info = json.dumps(info) clauses.append( cast(_entity_descriptor(model, 'info'), Text) == info) return clauses, headlines, order_by_ranks
def setUp(self): import transaction from sqlalchemy import func from geoalchemy import WKTSpatialElement from c2cgeoportal.models import FullTextSearch, User, Role from c2cgeoportal.models import DBSession user1 = User(username=u'__test_user1', password=u'__test_user1') role1 = Role(name=u'__test_role1', description=u'__test_role1') user1.role = role1 user2 = User(username=u'__test_user2', password=u'__test_user2') role2 = Role(name=u'__test_role2', description=u'__test_role2') user2.role = role2 entry1 = FullTextSearch() entry1.label = 'label1' entry1.layer_name = 'layer1' entry1.ts = func.to_tsvector('french', 'soleil travail') entry1.the_geom = WKTSpatialElement("POINT(-90 -45)") entry1.public = True entry2 = FullTextSearch() entry2.label = 'label2' entry2.layer_name = 'layer2' entry2.ts = func.to_tsvector('french', 'pluie semaine') entry2.the_geom = WKTSpatialElement("POINT(-90 -45)") entry2.public = False entry3 = FullTextSearch() entry3.label = 'label3' entry3.layer_name = 'layer3' entry3.ts = func.to_tsvector('french', 'vent neige') entry3.the_geom = WKTSpatialElement("POINT(-90 -45)") entry3.public = False entry3.role = role2 entry4 = FullTextSearch() entry4.label = 'label4' entry4.layer_name = 'layer1' entry4.ts = func.to_tsvector('french', 'soleil travail') entry4.the_geom = WKTSpatialElement("POINT(-90 -45)") entry4.public = True entry5 = FullTextSearch() entry5.label = 'label5' entry5.layer_name = 'layer1' entry5.ts = func.to_tsvector('french', 'params') entry5.the_geom = WKTSpatialElement("POINT(-90 -45)") entry5.public = True entry5.params = {'floor': 5} DBSession.add_all( [user1, user2, entry1, entry2, entry3, entry4, entry5]) transaction.commit()
def setUp(self): # noqa import transaction from sqlalchemy import func from geoalchemy2 import WKTElement from c2cgeoportal.models import FullTextSearch, User, Role from c2cgeoportal.models import DBSession user1 = User(username=u"__test_user1", password=u"__test_user1") role1 = Role(name=u"__test_role1", description=u"__test_role1") user1.role_name = role1.name user2 = User(username=u"__test_user2", password=u"__test_user2") role2 = Role(name=u"__test_role2", description=u"__test_role2") user2.role_name = role2.name entry1 = FullTextSearch() entry1.label = "label1" entry1.layer_name = "layer1" entry1.ts = func.to_tsvector("french", "soleil travail") entry1.the_geom = WKTElement("POINT(-90 -45)", 21781) entry1.public = True entry2 = FullTextSearch() entry2.label = "label2" entry2.layer_name = "layer2" entry2.ts = func.to_tsvector("french", "pluie semaine") entry2.the_geom = WKTElement("POINT(-90 -45)", 21781) entry2.public = False entry3 = FullTextSearch() entry3.label = "label3" entry3.layer_name = "layer3" entry3.ts = func.to_tsvector("french", "vent neige") entry3.the_geom = WKTElement("POINT(-90 -45)", 21781) entry3.public = False entry3.role = role2 entry4 = FullTextSearch() entry4.label = "label4" entry4.layer_name = "layer1" entry4.ts = func.to_tsvector("french", "soleil travail") entry4.the_geom = WKTElement("POINT(-90 -45)", 21781) entry4.public = True entry5 = FullTextSearch() entry5.label = "label5" entry5.layer_name = "layer1" entry5.ts = func.to_tsvector("french", "params") entry5.the_geom = WKTElement("POINT(-90 -45)", 21781) entry5.public = True entry5.params = {"floor": 5} DBSession.add_all([user1, user2, role1, role2, entry1, entry2, entry3, entry4, entry5]) transaction.commit()
def setUp(self): import transaction from sqlalchemy import func from geoalchemy import WKTSpatialElement from c2cgeoportal.models import FullTextSearch, User, Role from c2cgeoportal.models import DBSession user1 = User(username=u'__test_user1', password=u'__test_user1') role1 = Role(name=u'__test_role1', description=u'__test_role1') user1.role = role1 user2 = User(username=u'__test_user2', password=u'__test_user2') role2 = Role(name=u'__test_role2', description=u'__test_role2') user2.role = role2 entry1 = FullTextSearch() entry1.label = 'label1' entry1.layer_name = 'layer1' entry1.ts = func.to_tsvector('french', 'soleil travail') entry1.the_geom = WKTSpatialElement("POINT(-90 -45)") entry1.public = True entry2 = FullTextSearch() entry2.label = 'label2' entry2.layer_name = 'layer2' entry2.ts = func.to_tsvector('french', 'pluie semaine') entry2.the_geom = WKTSpatialElement("POINT(-90 -45)") entry2.public = False entry3 = FullTextSearch() entry3.label = 'label3' entry3.layer_name = 'layer3' entry3.ts = func.to_tsvector('french', 'vent neige') entry3.the_geom = WKTSpatialElement("POINT(-90 -45)") entry3.public = False entry3.role = role2 entry4 = FullTextSearch() entry4.label = 'label4' entry4.layer_name = 'layer1' entry4.ts = func.to_tsvector('french', 'soleil travail') entry4.the_geom = WKTSpatialElement("POINT(-90 -45)") entry4.public = True entry5 = FullTextSearch() entry5.label = 'label5' entry5.layer_name = 'layer1' entry5.ts = func.to_tsvector('french', 'params') entry5.the_geom = WKTSpatialElement("POINT(-90 -45)") entry5.public = True entry5.params = {'floor': 5} DBSession.add_all([user1, user2, entry1, entry2, entry3, entry4, entry5]) transaction.commit()
def _apply_dynamic_fulltext_filters(ingredients, backup_search=False): """ Applies full text filters similar to v1 but uses full text search approach for lexemes and speed up with index usage. N.B. that spaces in ingredient names will be replaced with '&' :param ingredients: List<string>: ["onion", "chicken", "pork tenderloin"] :return: """ dynamic_filters = [] max_ingredients = 500 if len( ingredients ) > 2 else 25 # 5 if backup_search else 250 # limits the amount of match ingredients; necessary in large or backup search title_subquery = lambda _: IngredientRecipe.recipe.in_( [-1]) # function to add title checking fo backup query if backup_search: max_ingredients = 10 # searches for some combination of ingredients instead of individual ingredients in the backup search ingredients = ['|'.join(i) for i in combinations(ingredients, 3)] \ if len(ingredients) >= 3 else ['|'.join(ingredients)] # print(len(ingredients)) random.shuffle(ingredients) ingredients = ingredients[:5] title_subquery = lambda ingredient: (IngredientRecipe.recipe.in_( db.session.query(Recipe.pk).filter( func.to_tsquery(FULLTEXT_INDEX_CONFIG, ingredient).op('@@') (func.to_tsvector(FULLTEXT_INDEX_CONFIG, func.coalesce(Recipe.title)))).limit(50))) # print(ingredients) for ingredient in ingredients: dynamic_filters.append( or_( IngredientRecipe.recipe.in_( db.session.query(IngredientRecipe.recipe).filter( IngredientRecipe.ingredient.in_( db.session.query(Ingredient.pk).filter( func.to_tsquery(FULLTEXT_INDEX_CONFIG, func.coalesce(ingredient)).op('@@')( func.to_tsvector(FULLTEXT_INDEX_CONFIG, func.coalesce(Ingredient.name)) ) ).\ order_by( desc( func.similarity( ingredient, Ingredient.name ) ) ).limit(max_ingredients) ) ), ), title_subquery(ingredient) ) ) return dynamic_filters
def text_search(self, tquery_str: str) -> "BookmarkViewQueryBuilder": self._query = self._query.outerjoin( FullText, FullText.url_uuid == SQLABookmark.url_uuid) # necessary to coalesce this as there may be no fulltext fulltext = func.coalesce(FullText.tsvector, func.to_tsvector("")) self._combined_tsvector = (func.to_tsvector( SQLABookmark.title).op("||")(func.to_tsvector( SQLABookmark.description)).op("||")(fulltext)) self._tsquery = func.to_tsquery(tquery_str) self._query = self._query.filter( self._combined_tsvector.op("@@")(self._tsquery)) return self
def _get_fts_phrase_clauses(self, data): """Return full-text search phrase clauses.""" q = self._parse_json('fts_phrase', data) err_base = 'invalid "fts_phrase" clause' clauses = [] for col, settings in q.items(): vector = self._get_vector(col) # Check params if not isinstance(settings, dict): msg = '{0}: {1} is not {2}'.format(err_base, col, dict) raise ValueError(msg) query = settings.get('query') if not query: msg = '{0}: "query" is required'.format(err_base) raise ValueError(msg) distance = settings.get('distance', 1) operator = ' <{}> '.format(distance) # Generate clause tokens = query.split() word_clauses = [] query_str = operator.join(tokens) ts_query = func.to_tsquery(query_str) clause = func.to_tsvector(vector).op('@@')(ts_query) clauses.append(clause) return clauses
def _add_fts( self, item: "c2cgeoportal_commons.models.main.TreeItem", interface: "c2cgeoportal_commons.models.main.Interface", action: str, role: Optional["c2cgeoportal_commons.models.main.Role"], ) -> None: from c2cgeoportal_commons.models.main import FullTextSearch # pylint: disable=import-outside-toplevel key = ( item.name if self.options.name else item.id, interface.id, role.id if role is not None else None, ) if key not in self.imported: self.imported.add(key) for lang in self.languages: fts = FullTextSearch() fts.label = self._[lang].gettext(item.name) fts.role = role fts.interface = interface fts.lang = lang fts.public = role is None fts.ts = func.to_tsvector( self.fts_languages[lang], " ".join( [self.fts_normalizer(self._[lang].gettext(item.name))] + [v.strip() for m in item.get_metadatas("searchAlias") for v in m.value.split(",")] ), ) fts.actions = [{"action": action, "data": item.name}] fts.from_theme = True self.session.add(fts)
def _get_fts_clauses(self, data): """Return full-text search clauses.""" q = self._parse_json('fts', data) err_base = 'invalid "fts" clause' clauses = [] for col, settings in q.items(): vector = self._get_vector(col) # Check params if not isinstance(settings, dict): msg = '{0}: {1} is not {2}'.format(err_base, col, dict) raise ValueError(msg) query = settings.get('query') if not query: msg = '{0}: "query" is required'.format(err_base) raise ValueError(msg) operator = settings.get('operator', 'and') prefix = settings.get('prefix', True) # Generate clauses tokens = query.split() word_clauses = [] for t in tokens: if prefix: t += ':*' clause = func.to_tsvector(vector).match(t) word_clauses.append(clause) if operator == 'or': clauses.append(or_(*word_clauses)) else: clauses.append(and_(*word_clauses)) return clauses
def get_first_matching_any_ts_queries_at_column(query, ts_queries, column): ts_vector = func.to_tsvector(cast(coalesce(column, ''), TEXT)) ts_queries_filter = or_(*[ ts_vector.match(ts_query, postgresql_regconfig=LANGUAGE) for ts_query in ts_queries ]) return query.filter(ts_queries_filter).first()
def make_fts_expr(languages, target, searchstring, op="&"): """Searches fulltext column, building ts_vector on the fly. `target` must have a gin index built with an ts_vector or this will be extremly slow. :param language: postgresql language string :param target: SQLAlchemy expression with type text :param searchstring: string of space-separated words to search :param op: operator used to join searchterms separated by space, | or & """ languages = list(languages) prepared_searchstring = _prepare_searchstring(op, searchstring) tsvec = func.to_tsvector(languages[0], target) for language in languages[1:]: tsvec = tsvec.op("||")(func.to_tsvector(language, target)) return make_fts_expr_tsvec(languages, tsvec, prepared_searchstring, op)
def search(): ''' Generic search page ''' form = SearchForm() if request.method == 'GET': return render_template('search.html', form=form) if request.method == 'POST': query = User.query #pylint: disable=no-member if form.country.data and form.country.data != 'ZZ': query = query.filter(User.country == form.country.data) if form.locales.data: query = query.join(User.languages).filter(UserLanguage.locale.in_( form.locales.data)) if form.expertise_domain_names.data: query = query.join(User.expertise_domains).filter(UserExpertiseDomain.name.in_( form.expertise_domain_names.data)) if form.fulltext.data: query = query.filter(func.to_tsvector(func.array_to_string(array([ User.first_name, User.last_name, User.organization, User.position, User.projects]), ' ')).op('@@')(func.plainto_tsquery(form.fulltext.data))) # TODO ordering by relevance return render_template('search-results.html', title='Expertise search', form=form, results=query.limit(20).all())
def transcribe_with_aws( document_id: int, aws_bucket_name: str = "soundqesstt", lang: str = "de-DE", db: Session = Depends(deps.get_db), ) -> Any: """ Get a specific document by id. """ document = crud.document.get(db, id=document_id) aws.transcribe_document(document, aws_bucket_name=aws_bucket_name, lang=lang) assert document.transcription document.fulltext = document.transcription.full_text document.fulltext_search_vector = func.to_tsvector( document.transcription.full_text) document.fulltext_regconfig = language_to_regconfig.get( Language(document.language)) if not document.fulltext_regconfig: raise ValueError( f"Not regconfig known for language {document.language}.") db.commit() db.refresh(document) return document
def make_fulltext_indexed_bookmark( session: Session, user: User, bookmark: sut.Bookmark, full_text: str ): # FIXME: this really shows the need for a library of common db functions url_uuid = sut.set_bookmark(session, user.user_uuid, bookmark) crawl_uuid = uuid4() body_uuid = uuid4() crawl_req = sut.CrawlRequest( crawl_uuid=crawl_uuid, url_uuid=url_uuid, requested=datetime(2018, 1, 3), got_response=True, ) crawl_resp = sut.CrawlResponse( crawl_uuid=crawl_uuid, headers={"content-type": "text/html"}, body_uuid=body_uuid, status_code=200, ) fulltext_obj = sut.FullText( url_uuid=url_uuid, crawl_uuid=crawl_uuid, inserted=datetime.utcnow().replace(tzinfo=timezone.utc), full_text=full_text, tsvector=func.to_tsvector(full_text), ) session.add_all([crawl_req, crawl_resp, fulltext_obj])
def create_fulltext_ingredient_search(ingredients, limit=DEFAULT_SEARCH_RESULT_SIZE, op=and_, backup_search=False): """ Function to create a fulltext query to filter out all recipes not containing <min_ingredients> ingredients. Ranks by recipe that contains the most ingredients, and then ranks by match of ingredients list to the title. This could probably be improved by adding additional search criteria similar to the previous fulltext search approach in create_fulltext_search_query. :param ingredients: List<string> ["onion", "chicken", "peppers"] :param limit: number of recipes to return :param order_by: the operation/func with which to order searches :return: List<Recipe> """ ingredients = _clean_and_stringify_ingredients_query(ingredients) return db.session.query(Recipe). \ join(IngredientRecipe). \ join(Ingredient). \ filter( op( *_apply_dynamic_fulltext_filters(ingredients, backup_search=backup_search) ) ). \ group_by(Recipe.pk). \ order_by(desc( func.ts_rank_cd( func.to_tsvector(FULLTEXT_INDEX_CONFIG, func.coalesce(Recipe.title)), func.to_tsquery(FULLTEXT_INDEX_CONFIG, '|'.join(i for i in ingredients)), 32 ) * RECIPE_TITLE_WEIGHT + func.ts_rank_cd( func.to_tsvector(FULLTEXT_INDEX_CONFIG, func.coalesce(Recipe.recipe_ingredients_text)), func.to_tsquery(FULLTEXT_INDEX_CONFIG, '|'.join(i for i in ingredients)), 32 ) * RECIPE_INGREDIENTS_WEIGHT + func.sum( func.ts_rank( func.to_tsvector(FULLTEXT_INDEX_CONFIG, func.coalesce(Ingredient.name)), func.to_tsquery(FULLTEXT_INDEX_CONFIG, '|'.join(i for i in ingredients)) ) ) * INGREDIENTS_WEIGHT + func.ts_rank_cd( func.to_tsvector(FULLTEXT_INDEX_CONFIG, func.coalesce(Recipe.recipe_ingredients_text)), func.to_tsquery(FULLTEXT_INDEX_CONFIG, '&'.join(i for i in ingredients)), 32 ) * RECIPE_MODIFIERS_WEIGHT )).limit(limit).all()
def test_match_tsvector(self): s = select([self.table_alt.c.id]).where( func.to_tsvector(self.table_alt.c.title).match('somestring')) self.assert_compile( s, 'SELECT mytable.id ' 'FROM mytable ' 'WHERE to_tsvector(mytable.title) ' '@@ to_tsquery(%(to_tsvector_1)s)')
def get_books_autocomplete(query, chunk, suggestions_per_query): limit = suggestions_per_query offset = limit * (int(chunk) - 1) entity_list = db.session\ .query(Book.id, Book.title)\ .filter((func.to_tsvector('simple', Book.title)) .match(query + ':*'))\ .order_by(func.ts_rank(func.to_tsvector('simple', Book.title), (query + ':*')))\ .offset(offset)\ .limit(limit)\ .all() finished = True if len(entity_list) < limit else False books = [{'id': b.id, 'title': b.title} for b in entity_list] return {'results': books, 'finished': finished}
def test_match_tsvectorconfig(self): s = select([self.table_alt.c.id]).where(func.to_tsvector("english", self.table_alt.c.title).match("somestring")) self.assert_compile( s, "SELECT mytable.id " "FROM mytable " "WHERE to_tsvector(%(to_tsvector_1)s, mytable.title) @@ " "to_tsquery(%(to_tsvector_2)s)", )
def test_match_tsvectorconfig_regconfig(self): s = select([self.table_alt.c.id]).where( func.to_tsvector('english', self.table_alt.c.title).match( 'somestring', postgresql_regconfig='english')) self.assert_compile( s, 'SELECT mytable.id ' 'FROM mytable ' 'WHERE to_tsvector(%(to_tsvector_1)s, mytable.title) @@ ' """to_tsquery('english', %(to_tsvector_2)s)""")
def search(): search_res = request.args.get('query') search_res = search_res.replace("'", "") search_res = search_res.replace("!", "") search_res = search_res.replace("(", "") search_res = search_res.replace(")", "") search_res = search_res.replace(":", "") temp_val = search_res.split(" ") search_list = [] for search in temp_val: if search.isdigit(): search_data = conn.execute( select([my_cards]).where( or_( func.to_tsvector('english', my_cards.c.text).match( search, postgresql_regconfig='english'), func.to_tsvector('english', my_cards.c.name).match( search, postgresql_regconfig='english'), func.to_tsvector('english', my_cards.c.cardType).match( search, postgresql_regconfig='english'), func.to_tsvector('english', my_cards.c.subType).match( search, postgresql_regconfig='english'), func.to_tsvector('english', my_cards.c.family).match( search, postgresql_regconfig='english'), my_cards.c.attack == int(search), my_cards.c.defense == int(search)))) else: search_data = conn.execute( select([my_cards]).where( or_( func.to_tsvector('english', my_cards.c.text).match( search, postgresql_regconfig='english'), func.to_tsvector('english', my_cards.c.name).match( search, postgresql_regconfig='english'), func.to_tsvector('english', my_cards.c.cardType).match( search, postgresql_regconfig='english'), func.to_tsvector('english', my_cards.c.subType).match( search, postgresql_regconfig='english'), func.to_tsvector('english', my_cards.c.family).match( search, postgresql_regconfig='english')))) search_list += format_list(search_data) return render_template('searchTemplate.html', search_data=search_list)
def test_match_tsvectorconfig_regconfig(self): s = select([self.table_alt.c.id]).where( func.to_tsvector('english', self.table_alt.c.title) .match('somestring', postgresql_regconfig='english') ) self.assert_compile( s, 'SELECT mytable.id ' 'FROM mytable ' 'WHERE to_tsvector(%(to_tsvector_1)s, mytable.title) @@ ' """to_tsquery('english', %(to_tsvector_2)s)""")
def _search_columns( title_column: Any, text_column: Any, terms: Sequence[str], /, ) -> ColumnElement[Boolean]: return func.to_tsvector( text("'english'"), title_column + text("' '") + text_column, ).match(' & '.join(terms), postgresql_regconfig='english')
def test_match_tsvector(self): s = select([self.table_alt.c.id]).where( func.to_tsvector(self.table_alt.c.title) .match('somestring') ) self.assert_compile( s, 'SELECT mytable.id ' 'FROM mytable ' 'WHERE to_tsvector(mytable.title) ' '@@ to_tsquery(%(to_tsvector_1)s)')
def addItem(): # Adds item to inventory and inventory history tables shape = request.values.get("shapeSelect") # If type is custom type entered by user, get that value instead of select option # in drop down. if request.form.get("typeSelect") == "New/Custom Type": type = request.form.get("customTypeTb") else: type = request.form.get("typeSelect") connector = request.form.get("connectorSelect") dimension_one = getDimension(request.form.get("addDimensionOne")) dimension_two = getDimension(request.form.get("addDimensionTwo")) quantity = onlyNumberString(request.form.get("addQuantity")) available_quantity = quantity price = onlyFloatString(request.form.get("addPrice")) notes = request.form.get("addNotes") location = request.form.get("addLocation") # Check to see if item exists first. If not, add item to db item = Inventory.query.filter_by(shape = shape) \ .filter_by(type = type) \ .filter_by(dimension_one = dimension_one)\ .filter_by(dimension_two = dimension_two)\ .filter_by(connector = connector) \ .filter_by(notes = notes).first() if not item: item = Inventory(connector = connector, quantity = quantity, available_quantity = available_quantity, dimension_one = dimension_one, \ dimension_two = dimension_two, price = price, shape = shape, \ type = type, location = location, notes = notes, date_updated = datetime.datetime.now()) db.session.add(item) db.session.commit() infoString = createInfoString(shape, type, dimension_one, dimension_two, connector, notes) item_txt = inventoryTxt(info=infoString, tsv=func.to_tsvector(infoString)) history_item = inventoryHistory(quantity = quantity, date_updated = datetime.datetime.now(), operation = "Add", inventory_id = item.id,\ order_id = 1) db.session.add(history_item) db.session.add(item_txt) # Otherwise just add to quantity else: item.quantity += int(quantity) history_item = inventoryHistory(quantity=quantity, date_updated=datetime.datetime.now(), operation="Add", inventory_id=item.id, order_id=1) db.session.add(history_item) db.session.commit() return redirect('/inventoryDisplay')
def find_authors(name=None, surname=None, book_title=None): if not (name or surname or book_title): return None authors_query = Author.query if name: authors_query = authors_query.filter( func.to_tsvector('simple', Author.name).match(name + ':*', postgresql_regconfig='simple')) if surname: authors_query = authors_query.filter( func.to_tsvector('simple', Author.surname).match( surname + ':*', postgresql_regconfig='simple')) if book_title: authors_query = authors_query\ .join(Author.books)\ .filter( func.to_tsvector('simple', Book.title) .match(book_title + ':*', postgresql_regconfig='simple')) return authors_query.order_by(Author.surname.asc(), Author.name.asc())
def handle_info_json(self, model, info, fulltextsearch=None): """Handle info JSON query filter.""" clauses = [] headlines = [] order_by_ranks = [] if info and '::' in info: pairs = info.split('|') for pair in pairs: if pair != '': k, v = pair.split("::") if fulltextsearch == '1': vector = _entity_descriptor(model, 'info')[k].astext clause = func.to_tsvector(vector).match(v) clauses.append(clause) if len(headlines) == 0: headline = func.ts_headline( self.language, vector, func.to_tsquery(v)) headlines.append(headline) order = func.ts_rank_cd( func.to_tsvector(vector), func.to_tsquery(v), 4).label('rank') order_by_ranks.append(order) else: clauses.append( _entity_descriptor(model, 'info')[k].astext == v) else: if type(info) == dict: clauses.append(_entity_descriptor(model, 'info') == info) if type(info) == str or type(info) == str: try: info = json.loads(info) if type(info) == int or type(info) == float: info = '"%s"' % info except ValueError: info = '"%s"' % info clauses.append(_entity_descriptor(model, 'info').contains(info)) return clauses, headlines, order_by_ranks
def test_match_tsvectorconfig(self): s = select([self.table_alt.c.id])\ .where( func.to_tsvector( 'english', self.table_alt.c.title )\ .match('somestring') ) self.assert_compile(s, 'SELECT mytable.id ' 'FROM mytable ' 'WHERE to_tsvector(%(to_tsvector_1)s, mytable.title) @@ ' 'to_tsquery(%(to_tsvector_2)s)' )
def setup_method(self, _): import transaction from sqlalchemy import func from geoalchemy2 import WKTElement from c2cgeoportal_commons.models import DBSession from c2cgeoportal_commons.models.main import FullTextSearch, Role, Interface from c2cgeoportal_commons.models.static import User user1 = User(username="******", password="******") role1 = Role(name="__test_role1", description="__test_role1") user1.role_name = role1.name user2 = User(username="******", password="******") role2 = Role(name="__test_role2", description="__test_role2") user2.role_name = role2.name entry1 = FullTextSearch() entry1.label = "label1" entry1.layer_name = "layer1" entry1.ts = func.to_tsvector("french", "soleil travail") entry1.the_geom = WKTElement("POINT(-90 -45)", 21781) entry1.public = True entry2 = FullTextSearch() entry2.label = "label2" entry2.layer_name = "layer2" entry2.ts = func.to_tsvector("french", "pluie semaine") entry2.the_geom = WKTElement("POINT(-90 -45)", 21781) entry2.public = False entry3 = FullTextSearch() entry3.label = "label3" entry3.layer_name = "layer3" entry3.ts = func.to_tsvector("french", "vent neige") entry3.the_geom = WKTElement("POINT(-90 -45)", 21781) entry3.public = False entry3.role = role2 entry4 = FullTextSearch() entry4.label = "label4" entry4.layer_name = "layer1" entry4.ts = func.to_tsvector("french", "soleil travail") entry4.the_geom = WKTElement("POINT(-90 -45)", 21781) entry4.public = True entry5 = FullTextSearch() entry5.label = "label5" entry5.ts = func.to_tsvector("french", "params") entry5.public = True entry5.params = {"floor": 5} entry5.actions = [{"action": "add_layer", "data": "layer1"}] entry6 = FullTextSearch() entry6.label = "label6" entry6.ts = func.to_tsvector("french", "params") entry6.interface = Interface("main") entry6.public = True DBSession.add_all([user1, user2, role1, role2, entry1, entry2, entry3, entry4, entry5, entry6]) transaction.commit()
def setUp(self): # noqa import transaction from sqlalchemy import func from geoalchemy2 import WKTElement from c2cgeoportal.models import FullTextSearch, User, Role, Interface from c2cgeoportal.models import DBSession user1 = User(username=u"__test_user1", password=u"__test_user1") role1 = Role(name=u"__test_role1", description=u"__test_role1") user1.role_name = role1.name user2 = User(username=u"__test_user2", password=u"__test_user2") role2 = Role(name=u"__test_role2", description=u"__test_role2") user2.role_name = role2.name entry1 = FullTextSearch() entry1.label = "label1" entry1.layer_name = "layer1" entry1.ts = func.to_tsvector("french", "soleil travail") entry1.the_geom = WKTElement("POINT(-90 -45)", 21781) entry1.public = True entry2 = FullTextSearch() entry2.label = "label2" entry2.layer_name = "layer2" entry2.ts = func.to_tsvector("french", "pluie semaine") entry2.the_geom = WKTElement("POINT(-90 -45)", 21781) entry2.public = False entry3 = FullTextSearch() entry3.label = "label3" entry3.layer_name = "layer3" entry3.ts = func.to_tsvector("french", "vent neige") entry3.the_geom = WKTElement("POINT(-90 -45)", 21781) entry3.public = False entry3.role = role2 entry4 = FullTextSearch() entry4.label = "label4" entry4.layer_name = "layer1" entry4.ts = func.to_tsvector("french", "soleil travail") entry4.the_geom = WKTElement("POINT(-90 -45)", 21781) entry4.public = True entry5 = FullTextSearch() entry5.label = "label5" entry5.ts = func.to_tsvector("french", "params") entry5.public = True entry5.params = {"floor": 5} entry5.actions = [{"action": "add_layer", "data": "layer1"}] entry6 = FullTextSearch() entry6.label = "label6" entry6.ts = func.to_tsvector("french", "params") entry6.interface = Interface("main") entry6.public = True DBSession.add_all([user1, user2, role1, role2, entry1, entry2, entry3, entry4, entry5, entry6]) transaction.commit()
def setUp(self): super(TestTSVectorOperators, self).setUp() class Product(self.Base): __tablename__ = 'product' id = Column(Integer, primary_key=True) name = Column(String) document = Column(TSVECTOR) self.Product = Product # This try/except skips the tests if we are unable to create the # tables in the PostgreSQL database. self.Base.metadata.create_all() self.manager.create_api(Product) # Create common records self.product1 = self.Product( id=1, name='Porsche 911', document=func.to_tsvector('Porsche 911')) self.product2 = self.Product( id=2, name='Porsche 918', document=func.to_tsvector('Porsche 918')) self.session.add_all([self.product1, self.product2]) self.session.commit()
def vectorize_files(self): """ Extract the text from the indexed files and store it. """ for locale, language in (('de_CH', 'german'), ('fr_CH', 'french')): files = [ SwissVote.__dict__[file].__get_by_locale__(self, locale) for file in self.indexed_files ] text = ' '.join([ extract_pdf_info(file.reference.file)[1] or '' for file in files if file ]).strip() setattr(self, f'searchable_text_{locale}', func.to_tsvector(language, text))
def get_authors_autocomplete(query, chunk, suggestions_per_query): limit = suggestions_per_query offset = limit * (int(chunk) - 1) entity_list = db.session\ .query(Author.id, Author.surname, Author.name)\ .filter((func.to_tsvector('simple', Author.name)) .match(query + ':*') | (func.to_tsvector('simple', Author.surname)) .match(query + ':*'))\ .order_by(func.ts_rank(func.to_tsvector('simple', Author.name), (query + ':*')))\ .offset(offset)\ .limit(limit)\ .all() finished = True if len(entity_list) < limit else False authors = [{ 'id': a.id, 'name': ((a.surname + ' ' + a.name).strip() if a.surname else a.name) } for a in entity_list] return {'results': authors, 'finished': finished}
def handle_info_json(self, model, info, fulltextsearch=None): """Handle info JSON query filter.""" clauses = [] headlines = [] order_by_ranks = [] if info and '::' in info: pairs = info.split('|') for pair in pairs: if pair != '': k,v = pair.split("::") if fulltextsearch == '1': vector = _entity_descriptor(model, 'info')[k].astext clause = func.to_tsvector(vector).match(v) clauses.append(clause) if len(headlines) == 0: headline = func.ts_headline(self.language, vector, func.to_tsquery(v)) headlines.append(headline) order = func.ts_rank_cd(func.to_tsvector(vector), func.to_tsquery(v), 4).label('rank') order_by_ranks.append(order) else: clauses.append(_entity_descriptor(model, 'info')[k].astext == v) else: if type(info) == dict: clauses.append(_entity_descriptor(model, 'info') == info) if type(info) == str or type(info) == unicode: try: info = json.loads(info) if type(info) == int or type(info) == float: info = '"%s"' % info except ValueError: info = '"%s"' % info clauses.append(_entity_descriptor(model, 'info').contains(info)) return clauses, headlines, order_by_ranks
def column_vector(self, column): if column.name in RESERVED_WORDS: column.name = quote_identifier(column.name) value = text("NEW.{column}".format(column=column.name)) try: vectorizer_func = vectorizer[column] except KeyError: pass else: value = vectorizer_func(value) value = func.coalesce(value, text("''")) value = func.to_tsvector(self.options["regconfig"], value) if column.name in self.options["weights"]: weight = self.options["weights"][column.name] value = func.setweight(value, weight) return value
def scrape_html(analysis): """Downloads and extracts content plus metadata for html page Parameters ---------- analysis: analysis object to be scraped session: the analysis session Returns ------- analysis: The updated analysis object """ a = newspaper.Article(analysis.gkg.document_identifier) a.download() if a.download_state == 2: a.parse() analysis.title = a.title analysis.authors = a.authors analysis.publication_date = a.publish_date or None text = re.sub('\s+', ' ', a.text) # collapse all whitespace # Scraping should fail if text is length 0 if len(text) == 0: raise Exception("Content is empty") text_clean = cleanup(text) # Clean text for analysis steps text_ts = remove_wordcloud_stopwords(text_clean) try: analysis.language = detect(text) except LangDetectException: raise Exception("Unable to determine language") if analysis.language != 'en': session.commit() raise Exception("Article not in English") content = DocumentContent(analysis=[analysis], content=text, content_clean=text_clean, content_type='text', content_ts=func.to_tsvector( 'simple_english', text_ts)) session = object_session(analysis) session.add(content) session.commit() return analysis else: # Temporary fix to deal with https://github.com/codelucas/newspaper/issues/280 raise Exception("Retrieval Failed")
def fulltext_search(self, query: str, including: bool): """ SELECT id, ts_headline(description, q) FROM ( SELECT id, description, q FROM goods, to_tsquery('english', 'query & here') q WHERE to_tsvector('english', description) @@ q ) search_t; """ query = self.__prepare_query(query, including) q = func.to_tsquery('english', query) inner_statement = self.__session \ .query(Goods.id, Goods.description, q) \ .select_from(q) \ .filter(func.to_tsvector('english', Goods.description).match(query, postgresql_regconfig='english')) \ .subquery() return self.__session.query( inner_statement.c.id, func.ts_headline(inner_statement.c.description, q)).all()
def load_specialty_lookup(session, filename): """ Seeds the table with provider specialties and their common english names, as well as the average claim amounts using a separate csv and a python dictionary. """ with open(filename, 'rb') as csvfile: lines = csv.reader(csvfile, delimiter = ',') for line in lines: lookup = model.SpecialtyLookup() lookup.search_term = line[0].strip() lookup.search_tsv = func.to_tsvector(lookup.search_term) lookup.specialty = line[1].strip() lookup.avg_claim = spec_dict.thedict[lookup.specialty][0] lookup.stdev = spec_dict.thedict[lookup.specialty][1] session.add(lookup) session.commit()
def handle_info_json(self, model, info, fulltextsearch=None): """Handle info JSON query filter.""" clauses = [] if '::' in info: pairs = info.split('|') for pair in pairs: if pair != '': k,v = pair.split("::") if fulltextsearch == '1': vector = _entity_descriptor(model, 'info')[k].astext clause = func.to_tsvector(vector).match(v) clauses.append(clause) else: clauses.append(_entity_descriptor(model, 'info')[k].astext == v) else: info = json.dumps(info) clauses.append(cast(_entity_descriptor(model, 'info'), Text) == info) return clauses
def load_procedures(session, filename): """ Seeds the procedures table with data from medicare's tab separated file. Since multiple doctors could file the same type of claim, each procedure could have multiple lines. This function checks if the procedure is already in the table using a dictionary. """ with open(filename, 'rb') as csvfile: print "Loading Procedures..." procedure_dict = {} # skip the header line and copyright statement header = csvfile.next() copyright = csvfile.next() lines = csv.reader(csvfile, delimiter = '\t') for line in lines: if line[12].strip() == 'US': # check if procedure was claimed in SF longzip = line[10].strip() short_zip = int(longzip[:5]) if short_zip in SF_ZIPS: procedure = model.Procedure() procedure.hcpcs_code = line[16].strip() procedure.hcpcs_descr = line[17].strip() procedure.hcpcs_tsv = func.to_tsvector(procedure.hcpcs_descr) if procedure_dict.get(procedure.hcpcs_code) == None: procedure_dict[procedure.hcpcs_code] = 0 session.add(procedure) print "Adding to session: ", procedure.hcpcs_code, procedure.hcpcs_descr session.commit()
def _add_fts(self, item, interface, action, role): from c2cgeoportal.models import FullTextSearch key = ( item.name if self.options.name else item.id, interface.id, role.id if role is not None else None ) if key not in self.imported: self.imported.add(key) for lang in self.languages: fts = FullTextSearch() fts.label = self._[lang].gettext(item.name) fts.role = role fts.interface = interface fts.lang = lang fts.public = role is None fts.ts = func.to_tsvector(self.fts_languages[lang], fts.label) fts.actions = [{ "action": action, "data": item.name, }] fts.from_theme = True self.session.add(fts)
def test_match_tsvector(self): s = select([self.table_alt.c.id]).where(func.to_tsvector(self.table_alt.c.title).match("somestring")) self.assert_compile( s, "SELECT mytable.id " "FROM mytable " "WHERE to_tsvector(mytable.title) " "@@ to_tsquery(%(to_tsvector_1)s)", )
def __init__(self, url, content): self.url = url self.content = func.to_tsvector(content)
def tsvector(obj): # pragma: no cover return func.to_tsvector('english', '{0}'.format(obj))
def cond_func(lang): return func.to_tsvector(lang, func.replace(target, ";", " ")).op("@@")(func.to_tsquery(lang, prepared_searchstring))