def get(self): parser = reqparse.RequestParser() parser.add_argument('group', type=str, required=False) parser.add_argument('type', type=str, required=False) parser.add_argument('issue', type=str, required=False) parser.add_argument('filters', type=int, required=True) parser.add_argument('page', type=int, required=True) args = parser.parse_args() # Check filters if args.filters is 1: # Add filters # Check which filter to apply type_list = [Regex.from_native(re.compile(str(i).replace("\"", "")+'.*')) for i in args.type.split(',') if len(i) > 2] for t in type_list: t.flags ^= re.UNICODE group_list = [Regex.from_native(re.compile(str(i).replace("\"", "")+'.*')) for i in args.group.split(',') if len(i) > 2] for g in group_list: g.flags ^= re.UNICODE issue_list = [Regex.from_native(re.compile(str(i).replace("\"", "")+'.*')) for i in args.issue.split(',') if len(i) > 2] for i in issue_list: i.flags ^= re.UNICODE s = create_dic(mongo.db.organizations.find({ "$or": [ {"social_group": {"$in": group_list}}, {"type": {"$in": type_list}}, {"geo_issue": {"$in": issue_list}} ] }).skip(args.page * 25).limit(25)) else: s = create_dic(mongo.db.organizations.find({}).skip(args.page * 25).limit(25)) return jsonify(organizations=s)
def get(self, id=None): if id is None: parser = reqparse.RequestParser() parser.add_argument('figure', type=str, required=False) parser.add_argument('issue', type=str, required=False) parser.add_argument('filters', type=int, required=True) args = parser.parse_args() # Check filters if args.filters is 1: # Add filters # Check which filter to apply figure_list = [Regex.from_native(re.compile(str(i).replace("\"", "")+'.*')) for i in args.figure.split(',') if len(i) > 2] for t in figure_list: t.flags ^= re.UNICODE issue_list = [Regex.from_native(re.compile(str(i).replace("\"", "")+'.*')) for i in args.issue.split(',') if len(i) > 2] for i in issue_list: i.flags ^= re.UNICODE s = create_dic(mongo.db.directory.find({ "$or": [ {"figure": {"$in": figure_list}}, {"federal_entity": {"$in": issue_list}} ] })) else: s = create_dic(mongo.db.directory.find({})) else: s = create_dic(mongo.db.directory.find({})) return jsonify(directory=s)
def find_by_primary_key(self, *kwargs): args = {} for idx, pk in enumerate(self.__document.pk_fields): if pk == "id": args["_id"] = kwargs[idx] elif pk == 'company_key': company_key_is_list = type(kwargs[idx]) is list if company_key_is_list: args[pk] = {'$in': kwargs[idx]} else: args[pk] = kwargs[idx] else: is_case_insensitive_pk = 'insensitive_pk_fields' in self.__document._meta and pk in self.__document._meta['insensitive_pk_fields'] if not is_case_insensitive_pk: args[pk] = kwargs[idx] else: regex = Regex.from_native(re.compile("^" + kwargs[idx] + "$", re.IGNORECASE)) query_insensitive = {"$regex": regex} args[pk] = query_insensitive doc = self.__document.collection.find(args).limit(1) try: doc = doc[0] return doc except IndexError: raise DocumentNotFound("{} not found".format(self.__document.collection_name))
def delete_by_prefix(self, collection_name, field, prefix): try: pattern = re.compile('^' + prefix + '.*') regex = Regex.from_native(pattern) regex.flags ^= re.IGNORECASE self.db[collection_name].delete_many({field: {'$regex': regex}}) except Exception, e: raise e
def constructHashtags(self, keywords): hashtags_list = [] for word in keywords: pattern = re.compile(word[1:], re.IGNORECASE) regex = Regex.from_native(pattern) regex.flags ^= re.UNICODE hashtags_list.append(regex) return hashtags_list
def like(field, value): escaped_value = re.escape(value.replace('%', '')) if value.startswith('%') and value.endswith('%'): value = re.compile('.*' + escaped_value + '.*', re.IGNORECASE) elif value.startswith('%'): value = re.compile(escaped_value + '$', re.IGNORECASE) elif value.endswith('%'): value = re.compile('^' + escaped_value, re.IGNORECASE) return {field: {'$regex': Regex.from_native(value)}}
class RegularExpressionFieldTestCase(FieldTestCase): field = RegularExpressionField() pattern = re.compile('hello', re.UNICODE) regex = Regex.from_native(pattern) def test_to_python(self): self.assertEqual(self.pattern, self.field.to_python(self.regex)) def test_to_mongo(self): self.assertEqual(self.regex, self.field.to_mongo(self.regex))
def users_mentioning_others_most(self): pattern = re.compile("(?<=^|(?<=[^a-zA-Z0-9-_\\.]))@([A-Za-z]+[A-Za-z0-9_]+)") regex = Regex.from_native(pattern) pipeline = [ {"$match": {"text": {"$regex": regex}}}, {"$group": {"_id":"$user", "count": {"$sum": 1}}}, {"$sort": SON([("count", -1)])}, {"$limit": 10} ] return self._collection.aggregate(pipeline, allowDiskUse=True)
def get_ist_item(self, path=",Whole Program,", starting=False, ending=True): if type(path) is not list: path = [path] patterns = [] for p in path: patterns.append(("^" if starting else "") + p + ("$" if ending else "")) pattern = re.compile("|".join(patterns)) regex = Regex.from_native(pattern) regex.flags ^= re.UNICODE return self.ist.find({ "_id": regex })
def find_by_name(name, database): collection = database.events name = re.escape(name) pattern = re.compile('.*' + f'{name}' + '.*', re.IGNORECASE) regex = Regex.from_native(pattern) result = collection.find({ "Исполнитель": regex }, { "_id": 0 }).sort("Исполнитель", ASCENDING) return result
def search_recipe(): client = None try: recipe_data = request.form client = pymongo.MongoClient(os.environ['MONGODB_URI']) db = client.get_default_database() recipe_collection = db['recipes'] query_dict = dict() recipe_type = recipe_data.get('type', None) if recipe_type and recipe_type.upper() != 'ALL': query_dict.update({ 'type': recipe_type }) ingredient_search_type = recipe_data.get('ingredient_search_type', None) if ingredient_search_type and ingredient_search_type.upper() == 'ALL': ingredient_search_op = '$all' else: ingredient_search_op = '$in' ingredient_list = recipe_data.get('ingredient_list', None) if ingredient_list and len(ingredient_list) > 0: query_dict.update({ 'ingredients': { ingredient_search_op: [ Regex.from_native(re.compile(ing_str.replace('-','\s'), re.I)) for ing_str in ingredient_list ] } }) recipe_cursor = recipe_collection.find(query_dict) recipe_list = [] for recipe in recipe_cursor: recipe_list.append(recipe) return render_template( 'recipeapp/browse-recipes.html', recipe_list=recipe_list ) except Exception: logger.exception('Could not search recipe') abort(404) finally: if client: client.close()
def get_df_with_regex( self, data_source: MongoDataSource, field: str, regex: Pattern, permissions: Optional[str] = None, limit: Optional[int] = None, ) -> pd.DataFrame: # Create a copy in order to keep the original (deepcopy-like) data_source = MongoDataSource.parse_obj(data_source) data_source.query = normalize_query(data_source.query, data_source.parameters) data_source.query[0]['$match'] = { '$and': [data_source.query[0]['$match']] + [{field: {'$regex': Regex.from_native(regex)}}] } return self.get_slice(data_source, permissions, limit=limit).df
class RegularExpressionFieldTestCase(FieldTestCase): field = RegularExpressionField() pattern = re.compile('hello', re.UNICODE) regex = Regex.from_native(pattern) def assertPatternEquals(self, reg1, reg2): """Assert two compiled regular expression pattern objects are equal.""" self.assertEqual(reg1.pattern, reg2.pattern) self.assertEqual(reg1.flags, reg2.flags) def test_to_python(self): self.assertPatternEquals(self.pattern, self.field.to_python(self.pattern)) self.assertPatternEquals(self.pattern, self.field.to_python(self.regex)) def test_to_mongo(self): self.assertEqual(self.regex, self.field.to_mongo(self.regex))
def get_list_of_nodes(self, sorting_order=None, name_filter=None, pagination=None): sort_direction = ASCENDING filter_object = {} skip_count = 0 skip_limit = 0 if sorting_order: if sorting_order['name'].lower() == 'asc': sort_direction = ASCENDING elif sorting_order['name'].lower() == 'desc': sort_direction = DESCENDING if name_filter: name_pattern = Regex.from_native(re.compile('^' + name_filter['name'])) filter_object = {'name': name_pattern} if pagination: skip_count = int(pagination['skip']) skip_limit = int(pagination['limit']) return list( self.computation_nodes_collection.find(filter_object, {'name': True, 'address': True, 'port': True, '_id': False}, sort=[('name', sort_direction)], skip=skip_count, limit=skip_limit))
def get_list_of_nodes(self, sorting_order=None, name_filter=None, pagination=None, address=None): sort_direction = ASCENDING filter_object = {} skip_count = 0 skip_limit = 0 if sorting_order: if sorting_order['name'].lower() == 'asc': sort_direction = ASCENDING elif sorting_order['name'].lower() == 'desc': sort_direction = DESCENDING if name_filter: name_pattern = Regex.from_native( re.compile('^' + name_filter['name'])) filter_object = {'name': name_pattern} if pagination: skip_count = int(pagination['skip']) skip_limit = int(pagination['limit']) if address: filter_object['address'] = address return list( self.computation_nodes_collection.find(filter_object, { 'name': True, 'address': True, 'port': True, '_id': False }, sort=[('name', sort_direction)], skip=skip_count, limit=skip_limit))
def search_recipe(): """Search for a recipe in the MongoDB database Returns ------- str The string representation of the search results web page to be sent to the Web client """ client = None try: recipe_data = request.form client = pymongo.MongoClient(os.environ['MONGODB_URI']) db = client.get_default_database() recipe_collection = db['recipes'] query_dict = dict() recipe_type = recipe_data.get('type', None) # Only filter the recipe results based on the recipe category when it is not set to ALL if recipe_type and recipe_type.upper() != 'ALL': query_dict.update({'type': recipe_type}) ingredient_search_type = recipe_data.get('ingredient_search_type', None) if ingredient_search_type and ingredient_search_type.upper() == 'ALL': # The user has specified to filter based on for all ingredients provided ingredient_search_op = '$all' else: # The user has specified to filter based on any of the ingredients provided ingredient_search_op = '$in' ingredient_list = recipe_data.get('ingredient_list', None) if ingredient_list and len(ingredient_list) > 0: # Add the ingredient-based filter when ingredients for search have been provided query_dict.update({ 'ingredients': { ingredient_search_op: [ Regex.from_native( re.compile(ing_str.replace('-', '\s'), re.I)) for ing_str in ingredient_list ] } }) # Issue the recipe find request based on the constructed rich query recipe_cursor = recipe_collection.find(query_dict) # Create a list of recipes found to be passed in the context of the JinJa template recipe_list = [] for recipe in recipe_cursor: recipe_list.append(recipe) return render_template('recipeapp/browse-recipes.html', recipe_list=recipe_list) except Exception: logger.exception('Could not search recipe') abort(404) finally: if client: client.close()
def calculate_sum_of_expenditure_types(self, query_params): # Build match pipeline match = { "$match": {} } if query_params['tipPodataka'] != []: match['$match']["tipPodataka.slug"] = {'$in': query_params['tipPodataka']} if "klasifikacija" in query_params: if query_params['klasifikacija']['broj'] != []: query_params['klasifikacija']['broj'] = [str(i) for i in query_params['klasifikacija']['broj']] if "filteri" in query_params: ### Let's set the values rage for ukupno ### if "ukupno" in query_params["filteri"] and 'veceIliJednako' in query_params["filteri"]['ukupno']: if 'ukupno' not in match['$match']: match['$match']["ukupno"] = {} match['$match']["ukupno"]["$gte"] = query_params["filteri"]["ukupno"]["veceIliJednako"] if "ukupno" in query_params["filteri"] and 'manjeIliJednako' in query_params["filteri"]['ukupno']: if 'ukupno' not in match['$match']: match['$match']["ukupno"] = {} match['$match']["ukupno"]["$lte"] = query_params["filteri"]["ukupno"]["manjeIliJednako"] ### Let's set the values rage for sopstveniPrihodi ### if "sopstveniPrihodi" in query_params["filteri"] and 'veceIliJednako' in query_params["filteri"]['sopstveniPrihodi']: if 'sopstveniPrihodi' not in match['$match']: match['$match']["sopstveniPrihodi"] = {} match['$match']["sopstveniPrihodi"]["$gte"] = query_params["filteri"]["sopstveniPrihodi"]["veceIliJednako"] if "sopstveniPrihodi" in query_params["filteri"] and 'manjeIliJednako' in query_params["filteri"]['sopstveniPrihodi']: if 'sopstveniPrihodi' not in match['$match']: match['$match']["sopstveniPrihodi"] = {} match['$match']["sopstveniPrihodi"]["$lte"] = query_params["filteri"]["sopstveniPrihodi"]["manjeIliJednako"] ### Let's set the values rage for prihodiBudzeta ### if "prihodiBudzeta" in query_params["filteri"] and 'veceIliJednako' in query_params["filteri"]['prihodiBudzeta']: if 'prihodiBudzeta' not in match['$match']: match['$match']["prihodiBudzeta"] = {} match['$match']["prihodiBudzeta"]["$gte"] = query_params["filteri"]["prihodiBudzeta"]["veceIliJednako"] if "prihodiBudzeta" in query_params["filteri"] and 'manjeIliJednako' in query_params["filteri"]['prihodiBudzeta']: if 'prihodiBudzeta' not in match['$match']: match['$match']["prihodiBudzeta"] = {} match['$match']["prihodiBudzeta"]["$lte"] = query_params["filteri"]["prihodiBudzeta"]["manjeIliJednako"] ### Let's set the values rage for donacije ### if "donacije" in query_params["filteri"] and 'veceIliJednako' in query_params["filteri"]['donacije']: if 'donacije' not in match['$match']: match['$match']["donacije"] = {} match['$match']["donacije"]["$gte"] = query_params["filteri"]["donacije"]["veceIliJednako"] if "donacije" in query_params["filteri"] and 'manjeIliJednako' in query_params["filteri"]['donacije']: if 'donacije' not in match['$match']: match['$match']["donacije"] = {} match['$match']["donacije"]["$lte"] = query_params["filteri"]["donacije"]["manjeIliJednako"] ### Let's set the values rage for ostali ### if "ostali" in query_params["filteri"] and 'veceIliJednako' in query_params["filteri"]['ostali']: if 'ostali' not in match['$match']: match['$match']["ostali"] = {} match['$match']["ostali"]["$gte"] = query_params["filteri"]["ostali"]["veceIliJednako"] if "ostali" in query_params["filteri"] and 'manjeIliJednako' in query_params["filteri"]['ostali']: if 'ostali' not in match['$match']: match['$match']["ostali"] = {} match['$match']["ostali"]["$lte"] = query_params["filteri"]["ostali"]["manjeIliJednako"] # Add other filters if query_params['godine'] != []: match['$match']["godina"] = {'$in': query_params['godine']} if "opstine" in query_params: if query_params['opstine'] != []: match['$match']["opstina.slug"] = {'$in': query_params['opstine']} # Build group pipeline group = { "$group": { "_id": { "opstina": "$opstina.latinica", "godina": "$godina", "tipPodataka": "$tipPodataka.vrednost" }, "prihodiBudzeta": {"$sum": "$prihodiBudzeta"}, "sopstveniPrihodi": {"$sum": "$sopstveniPrihodi"}, "donacije": {"$sum": "$donacije"}, "ostali": {"$sum": "$ostali"}, "ukupno": {"$sum": "$ukupno"} } } # Build project pipeline project = { "$project": { "_id": 0, "opstina": "$_id.opstina", "godina": "$_id.godina", "tipPodataka": "$_id.tipPodataka", "prihodiBudzeta": "$prihodiBudzeta", "sopstveniPrihodi": "$sopstveniPrihodi", "donacije": "$donacije", "ostali": "$ostali", "ukupno": "$ukupno", } } if "klasifikacija" in query_params: if query_params['klasifikacija']['broj'] != []: if 'pocinjeSa' in query_params['klasifikacija'] and query_params['klasifikacija']['pocinjeSa'] != '': # Let's filter based on class options we picked and regex class number match['$match']['$or'] = [] match_class_number = {"klasifikacija.broj": {'$in': query_params['klasifikacija']['broj']}} match['$match']['$or'].append(match_class_number) # Since Pymongo driver works with python regex logic, our pattern should be adopted in a way that python # regex compiler understands, then convert it to a BSON Regex instance, # read more: http://api.mongodb.org/python/current/api/bson/regex.html pattern = re.compile("^%s" % query_params['klasifikacija']['pocinjeSa']) regex = Regex.from_native(pattern) regex.flags ^= re.UNICODE # Build match pipeline match_regex = { "klasifikacija.broj": regex } match['$match']['$or'].append(match_regex) else: match['$match']["klasifikacija.broj"] = {'$in': query_params['klasifikacija']['broj']} else: if query_params['klasifikacija']['pocinjeSa'] != '': pattern = re.compile("^%s" % query_params['klasifikacija']['pocinjeSa']) regex = Regex.from_native(pattern) regex.flags ^= re.UNICODE # Build match pipeline match['$match']["klasifikacija.broj"] = regex # Add this to param to group and project stages group['$group']['_id']['klasifikacijaBroj'] = "$klasifikacija.broj" project['$project']['klasifikacijaBroj'] = '$_id.klasifikacijaBroj' elif "kategorijaRoditelj" in query_params: if query_params['kategorijaRoditelj'] != []: match['$match']["kategorijaRoditelj.broj"] = {'$in': query_params['kategorijaRoditelj']} group['$group']['_id']['kategorijaRoditelj'] = "$kategorijaRoditelj.broj" project['$project']['kategorijaRoditelj'] = '$_id.kategorijaRoditelj' # Execute mongo request json_doc = mongo.db.opstine.aggregate([match, group, project]) return json_doc['result']
"$push": 1 } } }])) for f in filters: f["text"] = sum(f["text"]) filters = sorted(filters, key=lambda r: r['text'], reverse=True) print(filters[:10]) if (command == "3"): print( "Who is are the most mentioned Twitter users? (Provide the top five.)" ) pattern = re.compile( "(?<=^|(?<=[^a-zA-Z0-9-_\\.]))@([A-Za-z]+[A-Za-z0-9_]+)") regex = Regex.from_native(pattern) filters = list( db.tweets.aggregate([{ "$match": { "text": { "$regex": regex } } }, { "$project": { "user": "******", "texts": { "$split": ["$text", " "] } } }, {