def generate_search_query(search_string, filters, order, sort_by_arg, include_user=False): """ Generates query to search transfer accounts by their users' parameters. This is used by search_api, as well as the bulk disbursement API :param search_string - The search query string :param filters - A SQLAlchemy filter object to apply to the query :param order - Which order in which to display results. Use sqlalchemy.asc or sqlalchemy.desc :param sort_by_arg: Boolean. True returns original phone """ sort_types_to_database_types = { 'first_name': User.first_name, 'last_name': User.last_name, 'email': User.email, 'date_account_created': User.created, 'rank': 'rank', 'balance': TransferAccount._balance_wei, 'status': TransferAccount.is_approved, } if sort_by_arg not in sort_types_to_database_types: return { 'message': f'Invalid sort_by value {sort_by_arg}. Please use one of the following: {sort_types_to_database_types.keys()}'\ } # To add new searchable column, simply add a new SearchableColumn object! # And don't forget to add a trigram index on that column too-- see migration 33df5e72fca4 for reference user_search_columns = [ SearchableColumn('first_name', User.first_name, rank=1.5), SearchableColumn('last_name', User.last_name, rank=1.5), SearchableColumn('phone', User.phone, rank=2), SearchableColumn('public_serial_number', User.public_serial_number, rank=2), SearchableColumn('location', User.location, rank=1), SearchableColumn('primary_blockchain_address', User.primary_blockchain_address, rank=2), ] sum_search = reduce(lambda x,y: x+y, [sc.get_similarity_query(search_string) for sc in user_search_columns]) sort_by = sum_search if sort_by_arg == 'rank' else sort_types_to_database_types[sort_by_arg] # If there's no search string, the process is the same, just sort by account creation date sort_by = sort_types_to_database_types['date_account_created'] if sort_by == 'rank' and not search_string else sort_by entities = [TransferAccount, sum_search, User] if include_user else [TransferAccount] final_query = db.session.query(TransferAccount, User, sum_search)\ .outerjoin(TransferAccount, User.default_transfer_account_id == TransferAccount.id)\ .filter(TransferAccount.is_ghost != True) \ .with_entities(*entities)\ .order_by(order(sort_by)) # TODO: work out the difference between the above and # final_query = db.session.query(TransferAccount, User) \ # .outerjoin(TransferAccount, User.default_transfer_account_id == TransferAccount.id) \ # .with_entities(TransferAccount) \ # .order_by(order(sort_by)) # Joining custom attributes is quite expensive, and we don't need them in a listing of search results if include_user: final_query = final_query.options(lazyload(User.custom_attributes)) # If there is a search string, we only want to return ranked results! final_query = final_query.filter(sum_search!=0) if search_string else final_query return apply_filters(final_query, filters, User)
def execute_query(self, user_filters: dict = None, date_filter_attributes=None, enable_caching=True, population_query_result=False, dont_include_timeseries=False, start_date=None, end_date=None, group_by=None): """ :param user_filters: dict of filters to apply to all metrics :param date_filter_attributes: lookup table indicating which row to use when filtering by date :param enable_caching: set to False if you don't want the query result to be cached :param population_query_result: This is a representation of the number of users over time, used in post-processing of certian metrics. See postprocessing_actions.py for more details. :param dont_include_timeseries: if true, this skips calculating timeseries data and only fetches aggregated_query and total_query :param start_date: Start date for metrics queries (for calculating percent change within date range) :param End_date: End date for metrics queries (for calculating percent change within date range) :param group_by: Name of the group-by used, used for metrics cache key names """ actions = { 'primary': self.query_actions, 'aggregated_query': self.aggregated_query_actions, 'total_query': self.total_query_actions, 'start_day_query': self.total_query_actions, 'end_day_query': self.total_query_actions } combinatory_strategies = { 'primary': self.query_caching_combinatory_strategy, 'aggregated_query': self.aggregated_query_caching_combinatory_strategy, 'total_query': self.total_query_caching_combinatory_strategy, 'start_day_query': metrics_cache.QUERY_ALL, 'end_day_query': metrics_cache.QUERY_ALL } # Build the dict of queries to execute. Ungrouped metrics don't have aggregated queries, # and sometimes we only want aggregates and totals (based on dont_include_timeseries) if self.is_timeseries: if dont_include_timeseries: queries = { 'total_query': self.total_query, 'start_day_query': self.total_query, 'end_day_query': self.total_query } else: queries = { 'primary': self.query, 'total_query': self.total_query, 'start_day_query': self.total_query, 'end_day_query': self.total_query } if self.aggregated_query: queries['aggregated_query'] = self.aggregated_query if None in queries.values(): raise Exception('Timeseries query requires a query, and a total_query') else: queries = { 'primary': self.query } results = {} for query in queries: user_filters = user_filters or {} # Apply stock filters filtered_query = queries[query] for f in self.stock_filters: filtered_query = filtered_query.filter(*f) # Validate that the filters we're applying are in the metrics' filterable_by for f, _ in user_filters or []: if f not in self.filterable_by: raise Exception(f'{self.metric_name} not filterable by {f}') # Apply the applicable date filters if DATE in self.filterable_by or []: if start_date or end_date: date_filter_attribute = date_filter_attributes[self.object_model] date_filters = [] if start_date: date_filters.append(date_filter_attribute >= start_date) if end_date: date_filters.append(date_filter_attribute <= datetime.datetime.strptime(end_date, "%Y-%m-%d") + datetime.timedelta(days=1) ) if not self.bypass_user_filters: filtered_query = filtered_query.filter(*date_filters) # Handle start_day and end_day queries so we can have a percentage change for the whole day range if query in ['start_day_query', 'end_day_query']: date_filter_attribute = date_filter_attributes[self.object_model] # If a user provided end-date goes past today, just use today. Also if the user doesn't provide a day # also use today today = datetime.datetime.now().replace(minute=0, hour=0, second=0, microsecond=0) if not end_date or datetime.datetime.strptime(end_date, "%Y-%m-%d") > today: last_day = today else: last_day = datetime.datetime.strptime(end_date, "%Y-%m-%d") if not start_date: # Get first date where data is present if no other date is given first_day = metrics_cache.get_first_day(date_filter_attribute, enable_caching) else: first_day = datetime.datetime.strptime(start_date, "%Y-%m-%d") day = first_day if query == 'start_day_query' else last_day date_filters = [] # To filter for items on day n, we have to filter between day n and day n+1 date_filters.append(date_filter_attribute >= day) date_filters.append(date_filter_attribute <= day + datetime.timedelta(days=1) ) filtered_query = filtered_query.filter(*date_filters) if not self.bypass_user_filters: filtered_query = filters.apply_filters(filtered_query, user_filters, self.object_model) strategy = combinatory_strategies[query] result = metrics_cache.execute_with_partial_history_cache( self.metric_name, filtered_query, self.object_model, strategy, enable_caching, group_by=group_by, query_name=query) if not actions[query]: results[query] = result else: results[query] = postprocessing_actions.execute_postprocessing(result, population_query_result, actions[query]) if self.is_timeseries: result = {} # Get percentage change between first and last date start_value = results['start_day_query'] if 'start_day_query' in results else 0 end_value = results['end_day_query'] if 'end_day_query' in results else 0 try: increase = float(end_value) - float(start_value) percent_change = (increase / float(start_value)) * 100 except ZeroDivisionError: percent_change = None if self.value_type not in VALUE_TYPES: raise Exception(f'{self.value_type} not a valid metric type!') result['type'] = { 'value_type': self.value_type, 'display_decimals': 2 if self.value_type == COUNT_AVERAGE else 0 } if self.token: result['type']['currency_name'] = self.token.name result['type']['currency_symbol'] = self.token.symbol result['type']['display_decimals'] = self.token.display_decimals if self.token.display_decimals else 0 if not dont_include_timeseries: result['timeseries'] = results['primary'] if self.aggregated_query: result['aggregate'] = results['aggregated_query'] result['aggregate']['total'] = results['total_query'] result['aggregate']['percent_change'] = percent_change else: result['aggregate'] = {'total': results['total_query'], 'percent_change': percent_change} return result else: return results['primary']
def get(self): # HANDLE PARAM : search_stirng - Any search string. An empty string (or None) will just return everything! search_string = request.args.get('search_string') or '' # HANDLE PARAM : params - Standard filter object. Exact same as the ones Metrics uses! encoded_filters = request.args.get('params') filters = process_transfer_filters(encoded_filters) # HANDLE PARAM : order # Valid orders types are: `ASC` and `DESC` # Default: DESC order_arg = request.args.get('order') or 'DESC' if order_arg.upper() not in ['ASC', 'DESC']: return { 'message': 'Invalid order value \'{}\'. Please use \'ASC\' or \'DESC\''. format(order_arg) } order = asc if order_arg.upper() == 'ASC' else desc # HANDLE PARAM: sort_by # Valid orders types are: first_name, last_name, email, date_account_created, rank, balance, status # Default: rank sort_types_to_database_types = { 'first_name': User.first_name, 'last_name': User.last_name, 'email': User.email, 'date_account_created': User.created, 'rank': 'rank', 'balance': TransferAccount._balance_wei, 'status': TransferAccount.is_approved, } sort_by_arg = request.args.get('sort_by') or 'rank' if sort_by_arg not in sort_types_to_database_types: return { 'message': f'Invalid sort_by value {sort_by_arg}. Please use one of the following: {sort_types_to_database_types.keys()}'\ } # To add new searchable column, simply add a new SearchableColumn object! # And don't forget to add a trigram index on that column too-- see migration 33df5e72fca4 for reference user_search_columns = [ SearchableColumn('first_name', User.first_name, rank=1.5), SearchableColumn('last_name', User.last_name, rank=1.5), SearchableColumn('phone', User.phone, rank=2), SearchableColumn('public_serial_number', User.public_serial_number, rank=2), SearchableColumn('location', User.location, rank=1), SearchableColumn('primary_blockchain_address', User.primary_blockchain_address, rank=2), ] sum_search = reduce(lambda x, y: x + y, [ sc.get_similarity_query(search_string) for sc in user_search_columns ]) sort_by = sum_search if sort_by_arg == 'rank' else sort_types_to_database_types[ sort_by_arg] # If there's no search string, the process is the same, just sort by account creation date sort_by = sort_types_to_database_types[ 'date_account_created'] if sort_by == 'rank' and not search_string else sort_by final_query = db.session.query(TransferAccount, User, sum_search)\ .with_entities(TransferAccount, sum_search)\ .outerjoin(TransferAccount, User.default_transfer_account_id == TransferAccount.id)\ .filter(TransferAccount.is_ghost != True)\ .order_by(order(sort_by)) # If there is a search string, we only want to return ranked results! final_query = final_query.filter( sum_search != 0) if search_string else final_query final_query = apply_filters(final_query, filters, User) transfer_accounts, total_items, total_pages, _ = paginate_query( final_query, ignore_last_fetched=True) accounts = [resultTuple[0] for resultTuple in transfer_accounts] if AccessControl.has_sufficient_tier(g.user.roles, 'ADMIN', 'admin'): result = transfer_accounts_schema.dump(accounts) elif AccessControl.has_any_tier(g.user.roles, 'ADMIN'): result = view_transfer_accounts_schema.dump(accounts) return { 'message': 'Successfully Loaded.', 'items': total_items, 'pages': total_pages, 'query_time': datetime.datetime.utcnow(), 'data': { 'transfer_accounts': result.data } }
def generate_search_query(search_string, filters, order, sort_by_arg, include_user=False, search_type=TRANSFER_ACCOUNT): """ Generates query to search transfer accounts by their users' parameters. This is used by search_api, as well as the bulk disbursement API :param search_string - The search query string :param filters - A SQLAlchemy filter object to apply to the query :param order - Which order in which to display results. Use sqlalchemy.asc or sqlalchemy.desc :param sort_by_arg: Boolean. True returns original phone """ sender = aliased(User) recipient = aliased(User) sort_types_to_database_types = { TRANSFER_ACCOUNT: { 'first_name': User.first_name, 'last_name': User.last_name, 'email': User.email, 'date_account_created': User.created, 'rank': 'rank', 'balance': TransferAccount._balance_wei, 'status': TransferAccount.is_approved, }, CREDIT_TRANSFER: { 'sender_first_name': sender.first_name, 'sender_last_name': sender.last_name, 'recipient_first_name': recipient.first_name, 'recipient_last_name': recipient.last_name, 'amount': CreditTransfer._transfer_amount_wei, 'created': CreditTransfer.created, 'id': CreditTransfer.id, 'rank': 'rank', } } if sort_by_arg not in sort_types_to_database_types[search_type]: raise Exception( f'Invalid sort_by value {sort_by_arg}. Please use one of the following: {sort_types_to_database_types[search_type].keys()}' ) # To add new searchable column, simply add a new SearchableColumn object! # And don't forget to add a trigram index on that column too-- see migration 33df5e72fca4 for reference user_search_columns = [ SearchableColumn('first_name', User.first_name, rank=1.5), SearchableColumn('last_name', User.last_name, rank=1.5), SearchableColumn('phone', User.phone, rank=2), SearchableColumn('public_serial_number', User.public_serial_number, rank=2), SearchableColumn('location', User.location, rank=1), SearchableColumn('primary_blockchain_address', User.primary_blockchain_address, rank=2), ] sum_search = reduce( lambda x, y: x + y, [sc.get_similarity_query(search_string) for sc in user_search_columns]) sort_by = sum_search if sort_by_arg == 'rank' else sort_types_to_database_types[ search_type][sort_by_arg] # If there's no search string, the process is the same, just sort by account creation date if search_type == TRANSFER_ACCOUNT: # If the sort by argument is rank, but there are no ranks because there is no search string, sort by date account created sort_by = sort_types_to_database_types[search_type][ 'date_account_created'] if sort_by_arg == 'rank' and not search_string else sum_search entities = [TransferAccount, sum_search, User ] if include_user else [TransferAccount] final_query = db.session.query(TransferAccount, User, sum_search)\ .outerjoin(TransferAccount, User.default_transfer_account_id == TransferAccount.id)\ .filter(TransferAccount.is_ghost != True) \ .with_entities(*entities)\ .order_by(order(sort_by)) # Joining custom attributes is quite expensive, and we don't need them in a listing of search results if include_user: final_query = final_query.options(lazyload(User.custom_attributes)) else: # If the sort by argument is rank, but there are no ranks because there is no search string, sort by date transfer created sort_by = sort_types_to_database_types[search_type][ 'id'] if sort_by_arg == 'rank' and not search_string else sort_by if search_string: final_query = db.session.query(CreditTransfer, sum_search)\ .join(sender, sender.id == User.id)\ .join(recipient, recipient.id == User.id)\ .join(CreditTransfer, or_((recipient.id == CreditTransfer.recipient_user_id), (sender.id == CreditTransfer.sender_user_id)))\ .with_entities(CreditTransfer)\ .order_by(order(sort_by)) else: final_query = db.session.query(CreditTransfer)\ .order_by(order(sort_by)) # If there is a search string, we only want to return ranked results! final_query = final_query.filter( sum_search != 0) if search_string else final_query if search_type == CREDIT_TRANSFER: return apply_filters(final_query, filters, CreditTransfer) if search_type == TRANSFER_ACCOUNT: return apply_filters(final_query, filters, User)
def get(self): """ This endpoint searches transfer accounts and credit transfers. It will check first name/last name/phone number/email address Parameters: - search_string: Any string you want to search. When empty or not provided, all results will be returned. - search_type: Valid inputs are transfer_accounts, and credit_transfers. - order: Which order to return results in (ASC or DESC) - sort_by: What to sort by. `rank` works for both, and sorts by search relevance. - Transfer Accounts can be sorted by: 'first_name', 'last_name', 'email', 'date_account_created', 'rank', 'balance', 'status' - Credit Transfers can be sorted by: 'sender_first_name', 'sender_last_name', 'sender_email', 'sender_date_account_created', 'recipient_first_name', 'recipient_last_name', 'recipient_email', 'recipient_date_account_created', 'rank', 'amount', 'transfer_type', 'approval', 'date_transaction_created' Return Value: Results object, similar to the existing transfer_accounts and credit_transfers API return values """ # HANDLE PARAM : search_stirng search_string = request.args.get('search_string') or '' # HANDLE PARAM : search_type # Valid search types are: `transfer_accounts` and `credit_transfers` # Default: transfer_accounts search_type = request.args.get('search_type') or 'transfer_accounts' if search_type not in ['transfer_accounts', 'credit_transfers']: response_object = { 'message': 'Invalid search_type \'{}\'. Please use type \'transfer_accounts\' or \'credit_transfers\'' .format(search_type), } return make_response(jsonify(response_object)), 400 # HANDLE PARAM : sort_by # Valid params differ depending on sort_by. See: sort_by # Default: rank # Aliases used for joining the separate sender and recipient objects to transfers sender = aliased(User) recipient = aliased(User) # Build order by object sort_types_to_database_types = { 'first_name': User.first_name, 'last_name': User.last_name, 'email': User.email, 'date_account_created': User.created, 'rank': 'rank', 'balance': TransferAccount._balance_wei, 'status': TransferAccount.is_approved, 'amount': CreditTransfer.transfer_amount, 'transfer_type': CreditTransfer.transfer_type, 'approval': CreditTransfer.transfer_status, 'date_transaction_created': CreditTransfer.resolved_date, 'sender_first_name': sender.first_name, 'sender_last_name': sender.last_name, 'sender_email': sender.email, 'sender_date_account_created': recipient.created, 'recipient_first_name': recipient.first_name, 'recipient_last_name': recipient.last_name, 'recipient_email': recipient.email, 'recipient_date_account_created': recipient.created } # These lists are to validate the user input-- not using sort_types_to_database_types since credit_transfers and transfer_accounts have unique options user_sorting_options = [ 'first_name', 'last_name', 'email', 'date_account_created' ] sender_sorting_options = list( map(lambda s: 'sender_' + s, user_sorting_options) ) # sender_first_name, sender_last_name, etc... recipient_sorting_options = list( map(lambda s: 'recipient_' + s, user_sorting_options) ) # recipient_first_name, recipient_last_name, etc... sorting_options = { 'transfer_accounts': [*user_sorting_options, 'rank', 'balance', 'status'], 'credit_transfers': [ *sender_sorting_options, *recipient_sorting_options, 'rank', 'amount', 'transfer_type', 'approval', 'date_transaction_created' ] } sort_by_arg = request.args.get('sort_by') or 'rank' if sort_by_arg not in sorting_options[search_type]: response_object = { # Example output: # "Invalid sort_by value 'pizza'. Please use one of the following: 'first_name', 'last_name', 'email', 'rank', 'balance', 'status', 'date_account_created'" 'message': 'Invalid sort_by value \'{}\'. Please use one of the following: {}'\ .format(sort_by_arg, ', '.join('\'{}\''.format(a) for a in sorting_options[search_type])), } return make_response(jsonify(response_object)), 400 sort_by = sort_types_to_database_types[sort_by_arg] encoded_filters = request.args.get('params') filters = process_transfer_filters(encoded_filters) # HANDLE PARAM : order # Valid orders types are: `ASC` and `DESC` # Default: DESC order_arg = request.args.get('order') or 'DESC' if order_arg not in ['ASC', 'DESC']: response_object = { 'message': 'Invalid order value \'{}\'. Please use \'ASC\' or \'DESC\''. format(order_arg), } return make_response(jsonify(response_object)), 400 order = desc if order_arg == 'ASC': order = asc # Note: Using tsquery wildcards here. Good docs of them here: # https://www.postgresql.org/docs/current/datatype-textsearch.html#DATATYPE-TSQUERY # 'Fran deRoo' -> 'Fran:* | deRoo:*' # Matches strings like "Francine deRoos" # Will also match "Michiel deRoos" because of the or clause, but this will be ranked lower search_string = re.sub('\s+', ' ', search_string) search_terms = search_string.strip().split(' ') tsquery = ':* | '.join(search_terms) + ':*' # Return everything if the search string is empty if search_string == '': if search_type == 'transfer_accounts': final_query = TransferAccount.query.filter(TransferAccount.is_ghost != True)\ .join(User, User.default_transfer_account_id == TransferAccount.id) final_query = apply_filters(final_query, filters, User) if sort_by_arg == 'rank': # There's no search rank when there's no query string, so do chrono instead final_query = final_query.order_by(order(User.created)) else: final_query = final_query.order_by(order(sort_by)) transfer_accounts, total_items, total_pages = paginate_query( final_query, TransferAccount) result = transfer_accounts_schema.dump(transfer_accounts) data = {'transfer_accounts': result.data} else: final_query = CreditTransfer.query.filter()\ .outerjoin(sender, sender.default_transfer_account_id == CreditTransfer.sender_transfer_account_id)\ .outerjoin(recipient, recipient.default_transfer_account_id == CreditTransfer.recipient_transfer_account_id) if sort_by_arg == 'rank': # There's no search rank when there's no query string, so do chrono instead final_query = final_query.order_by( order(CreditTransfer.created)) else: final_query = final_query.order_by(order(sort_by)) credit_transfers, total_items, total_pages = paginate_query( final_query, CreditTransfer) result = credit_transfers_schema.dump(credit_transfers) data = {'credit_transfers': result.data} else: # First get users who match search string user_search_result = db.session.query( db.distinct(SearchView.id), SearchView, # This ugly (but functional) multi-tscolumn ranking is a modified from Ben Smithgall's blog post # https://www.codeforamerica.org/blog/2015/07/02/multi-table-full-text-search-with-postgres-flask-and-sqlalchemy/ db.func.max(db.func.full_text.ts_rank( db.func.setweight(db.func.coalesce(SearchView.tsv_email, ''), 'D')\ .concat(db.func.setweight(db.func.coalesce(SearchView.tsv_phone, ''), 'A'))\ .concat(db.func.setweight(db.func.coalesce(SearchView.tsv_first_name, ''), 'B'))\ .concat(db.func.setweight(db.func.coalesce(SearchView.tsv_last_name, ''), 'B'))\ .concat(db.func.setweight(db.func.coalesce(SearchView.tsv_public_serial_number, ''), 'A'))\ .concat(db.func.setweight(db.func.coalesce(SearchView.tsv_primary_blockchain_address, ''), 'A'))\ .concat(db.func.setweight(db.func.coalesce(SearchView.tsv_location, ''), 'C'))\ .concat(db.func.setweight(db.func.coalesce(SearchView.tsv_default_transfer_account_id, ''), 'A')), db.func.to_tsquery(tsquery, postgresql_regconfig='english')))\ .label('rank'))\ .group_by(SearchView)\ .subquery() # Then use those results to join aginst TransferAccount or CreditTransfer if search_type == 'transfer_accounts': # TransferAccount Search Logic final_query = db.session.query(TransferAccount)\ .join(user_search_result, user_search_result.c.default_transfer_account_id == TransferAccount.id)\ .join(User, user_search_result.c.default_transfer_account_id == User.default_transfer_account_id)\ .filter(user_search_result.c.rank > 0.0)\ .filter(TransferAccount.is_ghost != True) final_query = apply_filters(final_query, filters, User) if sort_by_arg == 'rank': final_query = final_query.order_by( order(user_search_result.c.rank)) else: final_query = final_query.order_by(order(sort_by)) transfer_accounts, total_items, total_pages = paginate_query( final_query, TransferAccount) result = transfer_accounts_schema.dump(transfer_accounts) data = {'transfer_accounts': result.data} # CreditTransfer Search Logic else: sender_search_result = aliased(user_search_result) recipient_search_result = aliased(user_search_result) # Join the search results objects to sort by rank, as well as aliased user objects (sender/recipient) for other sorting options final_query = db.session.query(CreditTransfer)\ .outerjoin(sender_search_result, sender_search_result.c.default_transfer_account_id == CreditTransfer.sender_transfer_account_id )\ .outerjoin(recipient_search_result, recipient_search_result.c.default_transfer_account_id == CreditTransfer.recipient_transfer_account_id )\ .outerjoin(sender, sender_search_result.c.default_transfer_account_id == sender.default_transfer_account_id )\ .outerjoin(recipient, recipient_search_result.c.default_transfer_account_id == recipient.default_transfer_account_id )\ .filter(or_(recipient_search_result.c.rank > 0.0, sender_search_result.c.rank > 0.0)) if sort_by_arg == 'rank': final_query = final_query.order_by( order(recipient_search_result.c.rank + sender_search_result.c.rank)) else: final_query = final_query.order_by(order(sort_by)) credit_transfers, total_items, total_pages = paginate_query( final_query, CreditTransfer) result = credit_transfers_schema.dump(credit_transfers) data = {'credit_transfers': result.data} response_object = { 'message': 'Successfully Loaded.', 'items': total_items, 'pages': total_pages, 'query_time': datetime.datetime.utcnow(), 'data': data } bytes_data = orjson.dumps(response_object) resp = make_response(bytes_data, 200) resp.mimetype = 'application/json' return resp