def check_if_amendement_are_in_db(start_date, end_date, size, output_file): service = AmendementSearchService() print u'Nombre total d\'amendement à checker : %s' % service.total_count(start_date=start_date, end_date=end_date) amendements_summary_iterator = service.iter(start_date=start_date, end_date=end_date, size=size) all_missing_urls = [] for amendements_summary in amendements_summary_iterator: print "Page %s / %s" % (amendements_summary.start / size, amendements_summary.total_count / size) amendement_hashes = [amendement_hash(a.url_amend) for a in amendements_summary.results] sql_amendement_hash = SQL('CONCAT(legislature, texteloi_id, numero)') db_amendement_hashes = [unicode(a.hash) for a in Amendement.select(sql_amendement_hash.alias('hash')).where(sql_amendement_hash << amendement_hashes)] missing_amendement_hashes = set(amendement_hashes) - set(db_amendement_hashes) missing_urls = [a.url for a in amendements_summary.results if amendement_hash(a.url) in missing_amendement_hashes] for missing_url in missing_urls: print u'Amendement manquant : %s' % missing_url all_missing_urls += list(missing_urls) print u'Nombre total d\'amendements manquants : %s' % len(all_missing_urls) with open(output_file, 'w') as f: f.write('\n'.join(all_missing_urls))
def search_for_tea(search_query, paginate_by=0, page=1): """ Searchs for teas using the given query and returns a peewee query with the results. If paginate_by and page are given (and positive), paginates the results and returns a tuple with the peewee query, the total number of results and the pages count. If search_query evaluates to False, returns [] instead of a peewee query. """ if not search_query: return [] if paginate_by <= 0 else [], 0, 0 search_terms = search_query.split() relevance = SQL('0') where_clause = SQL('1') for word in search_terms: relevance += (fn.IF(Tea.name.contains(word), app.config['SEARCH_WEIGHTS']['name'], 0) + fn.IF(Tea.vendor_internal_id == word, app.config['SEARCH_WEIGHTS']['vendor_code'], 0) + fn.IF(Tea.description.contains(word), app.config['SEARCH_WEIGHTS']['desc'], 0) + fn.IF(Tea.long_description.contains(word), app.config['SEARCH_WEIGHTS']['ldesc'], 0)) where_clause &= ((Tea.name.contains(word)) | (Tea.vendor_internal_id == word) | (Tea.description.contains(word)) | (Tea.long_description.contains(word))) teas = (Tea.select(Tea.name, Tea.slug, Tea.description, Tea.illustration, Tea.tips_raw, Tea.tips_mass, Tea.tips_volume, Tea.tips_duration, Tea.tips_temperature, TeaVendor.name.alias('vendor_name'), TeaVendor.slug.alias('vendor_slug'), relevance.alias('relevance')).join( TeaVendor).where(where_clause).having( SQL('relevance') != 0).order_by( SQL('relevance DESC'))) if paginate_by > 0: count = Tea.select().where(where_clause).count() pages_count = int(math.ceil(float(count) / paginate_by)) if page != 1 and page > pages_count: abort(404) teas = teas.paginate(page, paginate_by) return teas if paginate_by <= 0 else (teas, count, pages_count)
def fts_query_kind(term, kind): """ Prepare a full text search query whose result have the given content type Arguments: term: a string representing the search phrase kind: one of (image|video), a string specifying the expected content type Returns: peewee.SelectQuery """ relevance = SQL( ("MATCH (`text`, subject, title, description, creator, publisher) " "AGAINST (%s IN BOOLEAN MODE)"), params=(term, ), ) return (Document.select(Document, relevance.alias("relevance")).where( Document.content_type.contains(kind)).order_by( SQL("relevance").desc()))