def substring(column, delimeter, session): if session.bind.dialect.name == 'sqlite': return func.iif( func.instr(column, delimeter) > 0, func.substr(column, 1, func.instr(column, delimeter) - 1), column) elif session.bind.dialect.name == 'mysql': return func.substring_index(column, delimeter, 1)
def search_around(timestamp: T.number, ): get_logger().info('/search_around %s', timestamp) utc_timestamp = timestamp # old 'timestamp' name is legacy # TODO meh. use count/pagination instead? delta_back = timedelta(hours=3).total_seconds() delta_front = timedelta(minutes=2).total_seconds() # TODO not sure about delta_front.. but it also serves as quick hack to accomodate for all the truncations etc return search_common( url= 'http://dummy.org', # NOTE: not used in the where query (below).. perhaps need to get rid of this where=lambda table, url: between( func.strftime( '%s', # NOTE: it's tz aware, e.g. would distinguish +05:00 vs -03:00 # this is a bit fragile, relies on cachew internal timestamp format, e.g. # 2020-11-10T06:13:03.196376+00:00 Europe/London func.substr( table.c.dt, 1, # substr is 1-indexed # instr finds the first match, but if not found it defaults to 0.. which we hack by concatting with ' ' func.instr( func.cast(table.c.dt, types.Unicode).op('||') (' '), ' ') - 1, # for f***s sake.. seems that cast is necessary otherwise it tries to treat ' ' as datetime??? )) - literal(utc_timestamp), literal(-delta_back), literal(delta_front), ), )
def _order_by(self, query, joins, sort_joins, sort_field, sort_desc): query, joins = super(AffiliationView, self)._order_by(query, joins, sort_joins, sort_field, sort_desc) if sort_field.name == 'code': # sort by the code field, which has entries like: # 1 # 1.1 # 1.2 # 1.10 # 1.11 # 10.1 # # these are hard to sort, because they don't sort correctly # numerically or lexicographically. Instead, we treat them # as parts of dotted-quad IP addresses and use mysql's inet_aton # to sort them. sort_field = func.inet_aton( func.if_(func.instr(sort_field, '.') > 0, func.concat(sort_field, '.0.0'), # eg. 10.2 func.concat(sort_field, '.0.0.0'))) # eg. 10 if sort_desc: sort_field = desc(sort_field) query = query.order_by(None).order_by(sort_field) return query, joins
def _order_by(self, query, joins, sort_joins, sort_field, sort_desc): query, joins = super(AffiliationView, self)._order_by(query, joins, sort_joins, sort_field, sort_desc) if sort_field.name == 'code': # sort by the code field, which has entries like: # 1 # 1.1 # 1.2 # 1.10 # 1.11 # 10.1 # # these are hard to sort, because they don't sort correctly # numerically or lexicographically. Instead, we treat them # as parts of dotted-quad IP addresses and use mysql's inet_aton # to sort them. sort_field = func.inet_aton( func.if_( func.instr(sort_field, '.') > 0, func.concat(sort_field, '.0.0'), # eg. 10.2 func.concat(sort_field, '.0.0.0'))) # eg. 10 if sort_desc: sort_field = desc(sort_field) query = query.order_by(None).order_by(sort_field) return query, joins
def substring(self, session, column, delimeter): """ https://stackoverflow.com/a/57763081 """ if session.bind.dialect.name == 'sqlite': return func.substr(column, func.instr(column, delimeter) + 1) elif session.bind.dialect.name == 'mysql': return func.substring_index(column, delimeter, -1)
def department_for_keyword(keyword): db_session = DB_Session_Factory.get_db_session() best_dep_keyword = None for dep_keyword in db_session.query(Department_Keyword).filter(func.instr(keyword, Department_Keyword.keyword) > 0): if best_dep_keyword is None or len(best_dep_keyword.keyword) < len(dep_keyword.keyword): best_dep_keyword = dep_keyword department = None if best_dep_keyword is not None: department = db_session.query(Department).get(best_dep_keyword.department_id) return department
def genes(page: int, limit: int): """index route for genes""" search = request.args.get("search", type=str) gene_query = GeneAlias.query if search: gene_query = gene_query.filter(func.instr(GeneAlias.name, search)) total_count = gene_query.with_entities( func.count(func.distinct(GeneAlias.alias_id))).scalar() results = gene_query.limit(limit).offset(page * (limit or 0)).all() if expects_json(request): return paginated_response(results, page, total_count, limit) elif expects_csv(request): return csv_response(results) abort( 406, "Only 'text/csv' and 'application/json' HTTP accept headers supported")
def list_directories(dbs, account, bucket_name, dirname, order_by="dname"): if dirname is None: dirname = "" dirname_len = len(dirname) + 1 results = (dbs.query( BucketObject.account, BucketObject.bucket_name, func.substr( BucketObject.name, dirname_len, func.instr(func.substr(BucketObject.name, dirname_len), "/"), ).label("dname"), func.sum(BucketObject.size).label("size"), func.max(BucketObject.mtime).label("mtime"), ).filter_by(account=account, bucket_name=bucket_name).filter( BucketObject.name.like(dirname + "%")).group_by( BucketObject.account, BucketObject.bucket_name, "dname").order_by(order_by)) return results
def list_datasets(page: int, limit: int) -> Response: order_by = request.args.get("order_by", type=str) allowed_columns = [ "dataset_type", "condition", "notes", "updated", "updated_by", "linked_files", "tissue_sample_type", "participant_codename", "family_codename", ] if order_by is None: order = None # system default, likely dataset_id elif order_by == "updated_by": order = models.Dataset.updated_by.username elif order_by == "linked_files": order = models.DatasetFile.path elif order_by == "tissue_sample_type": order = models.TissueSample.tissue_sample_type elif order_by == "participant_codename": order = models.Participant.participant_codename elif order_by == "family_codename": order = models.Family.family_codename elif order_by in allowed_columns: # Since this is an elif clause, we know special cases are already handled above. # order_by has been restricted to a known list, so we can safely use getattr order = getattr(models.Dataset, order_by) else: abort(400, description=f"order_by must be one of {allowed_columns}") if order: order_dir = request.args.get("order_dir", type=str) if order_dir == "desc": order = order.desc() elif order_dir == "asc": order = order.asc() else: abort(400, description="order_dir must be either 'asc' or 'desc'") filters = [] notes = request.args.get("notes", type=str) if notes: filters.append(func.instr(models.Dataset.notes, notes)) updated_by = request.args.get("updated_by", type=str) if updated_by: filters.append(func.instr(models.User.username, updated_by)) linked_files = request.args.get("linked_files", type=str) if linked_files: filters.append(func.instr(models.DatasetFile.path, linked_files)) participant_codename = request.args.get("participant_codename", type=str) if participant_codename: filters.append( func.instr(models.Participant.participant_codename, participant_codename)) family_codename = request.args.get("family_codename", type=str) if family_codename: filters.append( func.instr(models.Family.family_codename, family_codename)) dataset_type = request.args.get("dataset_type", type=str) if dataset_type: filters.append(models.Dataset.dataset_type.in_( dataset_type.split(","))) condition = request.args.get("condition", type=str) if condition: filters.append( filter_in_enum_or_abort(models.Dataset.condition, models.DatasetCondition, condition)) tissue_sample_type = request.args.get("tissue_sample_type", type=str) if tissue_sample_type: filters.append( filter_in_enum_or_abort( models.TissueSample.tissue_sample_type, models.TissueSampleType, tissue_sample_type, )) updated = request.args.get("updated", type=str) if updated: filters.append(filter_updated_or_abort(models.Dataset.updated, updated)) if app.config.get("LOGIN_DISABLED") or current_user.is_admin: user_id = request.args.get("user") else: user_id = current_user.user_id if user_id: # Regular user or assumed identity, return only permitted datasets query = (models.Dataset.query.options( contains_eager(models.Dataset.tissue_sample).contains_eager( models.TissueSample.participant).contains_eager( models.Participant.family), contains_eager(models.Dataset.tissue_sample).contains_eager( models.TissueSample.participant).joinedload( models.Participant.institution), contains_eager(models.Dataset.files), contains_eager(models.Dataset.updated_by), ).join(models.Dataset.tissue_sample).join( models.TissueSample.participant ).join(models.Participant.family).outerjoin(models.Dataset.files).join( models.Dataset.updated_by).join(models.groups_datasets_table).join( models.users_groups_table, models.groups_datasets_table.columns.group_id == models.users_groups_table.columns.group_id, ).filter(models.users_groups_table.columns.user_id == user_id, *filters)) else: # Admin or LOGIN_DISABLED, authorized to query all datasets query = (models.Dataset.query.options( contains_eager(models.Dataset.tissue_sample).contains_eager( models.TissueSample.participant).contains_eager( models.Participant.family), contains_eager(models.Dataset.tissue_sample).contains_eager( models.TissueSample.participant).joinedload( models.Participant.institution), contains_eager(models.Dataset.files), contains_eager(models.Dataset.updated_by), ).join(models.Dataset.tissue_sample).join( models.TissueSample.participant).join( models.Participant.family).outerjoin( models.Dataset.files).join( models.Dataset.updated_by).filter(*filters)) total_count = query.with_entities( func.count(distinct(models.Dataset.dataset_id))).scalar() datasets = query.order_by(order).limit(limit).offset(page * (limit or 0)).all() return jsonify({ "data": [{ **asdict(dataset), "tissue_sample_type": dataset.tissue_sample.tissue_sample_type, "participant_codename": dataset.tissue_sample.participant.participant_codename, "participant_type": dataset.tissue_sample.participant.participant_type, "institution": dataset.tissue_sample.participant.institution and dataset.tissue_sample.participant.institution.institution, "sex": dataset.tissue_sample.participant.sex, "family_codename": dataset.tissue_sample.participant.family.family_codename, "created_by": dataset.tissue_sample.created_by.username, "updated_by": dataset.tissue_sample.updated_by.username, } for dataset in datasets], "page": page if limit else 0, "total_count": total_count, })
def list_datasets(page: int, limit: int) -> Response: order_by = request.args.get("order_by", type=str) allowed_columns = [ "dataset_type", "condition", "notes", "updated", "updated_by", "linked_files", "tissue_sample_type", "participant_codename", "family_codename", "dataset_id", ] if order_by is None: order = None # system default, likely dataset_id elif order_by == "updated_by": order = models.User.username elif order_by == "linked_files": order = models.DatasetFile.path elif order_by == "tissue_sample_type": order = models.TissueSample.tissue_sample_type elif order_by == "participant_codename": order = models.Participant.participant_codename elif order_by == "family_codename": order = models.Family.family_codename elif order_by in allowed_columns: # Since this is an elif clause, we know special cases are already handled above. # order_by has been restricted to a known list, so we can safely use getattr order = getattr(models.Dataset, order_by) else: abort(400, description=f"order_by must be one of {allowed_columns}") if order: order_dir = request.args.get("order_dir", type=str) if order_dir == "desc": order = (order.desc(), models.Dataset.dataset_id.desc()) elif order_dir == "asc": order = (order.asc(), models.Dataset.dataset_id.asc()) else: abort(400, description="order_dir must be either 'asc' or 'desc'") else: # Default ordering order = (models.Dataset.dataset_id, ) filters = [] notes = request.args.get("notes", type=str) if notes: filters.append(func.instr(models.Dataset.notes, notes)) updated_by = request.args.get("updated_by", type=str) if updated_by: filters.append(func.instr(models.User.username, updated_by)) dataset_id = request.args.get("dataset_id", type=str) if dataset_id: filters.append(models.Dataset.dataset_id == dataset_id) linked_files = request.args.get("linked_files", type=str) if linked_files: filters.append(func.instr(models.DatasetFile.path, linked_files)) participant_codename = request.args.get("participant_codename", type=str) if participant_codename: filters.append( func.instr(models.Participant.participant_codename, participant_codename)) family_codename = request.args.get("family_codename", type=str) if family_codename: filters.append( func.instr(models.Family.family_codename, family_codename)) dataset_type = request.args.get("dataset_type", type=str) if dataset_type: filters.append(models.Dataset.dataset_type.in_( dataset_type.split(","))) condition = request.args.get("condition", type=str) if condition: filters.append( filter_in_enum_or_abort(models.Dataset.condition, models.DatasetCondition, condition)) tissue_sample_type = request.args.get("tissue_sample_type", type=str) if tissue_sample_type: filters.append( filter_in_enum_or_abort( models.TissueSample.tissue_sample_type, models.TissueSampleType, tissue_sample_type, )) updated = request.args.get("updated", type=str) if updated: filters.append(filter_updated_or_abort(models.Dataset.updated, updated)) if app.config.get("LOGIN_DISABLED") or current_user.is_admin: user_id = request.args.get("user") else: user_id = current_user.user_id query = ( models.Dataset.query.options( # tell the ORM that our join contains models we'd like to eager load, so it doesn't try to load them lazily # in my test, this sped up loading time by ~3x contains_eager(models.Dataset.tissue_sample).contains_eager( models.TissueSample.participant ).contains_eager(models.Participant.family), # eager load groups and files for speed, since we're not joining here, we can't order or search by these fields # ordering is probably not a great loss b/c one-to-many makes it tough, # but if we want to search, a solution might be a subquery (something similar to the filter in analyses.py:194-208) # FE will have to be adjusted to disallow sorting/filtering on these fields selectinload(models.Dataset.groups), selectinload(models.Dataset.files), ) # join with these since they are 1-to-1 and we want to order/filter on some of their fields .join(models.Dataset.tissue_sample).join( models.TissueSample.participant).join( models.Participant.family).join(models.Dataset.updated_by)) if user_id: # Regular user or assumed identity, return only permitted datasets query = (query.join(models.groups_datasets_table).join( models.users_groups_table, models.groups_datasets_table.columns.group_id == models.users_groups_table.columns.group_id, ).filter(models.users_groups_table.columns.user_id == user_id, *filters)) else: # Admin or LOGIN_DISABLED, authorized to query all datasets query = query.options(joinedload( models.Dataset.groups)).filter(*filters) group_code = request.args.get("group_code", type=str) if group_code: subquery = (models.Dataset.query.join(models.Dataset.groups).filter( func.instr(models.Group.group_code, group_code)).with_entities( models.Dataset.dataset_id).subquery()) query = query.filter(models.Dataset.dataset_id.in_(subquery)) # total_count always refers to the number of unique datasets in the database total_count = query.with_entities( func.count(distinct(models.Dataset.dataset_id))).scalar() datasets = query.order_by(*order).limit(limit).offset(page * (limit or 0)).all() results = [{ **asdict(dataset), "tissue_sample_type": dataset.tissue_sample.tissue_sample_type, "participant_aliases": dataset.tissue_sample.participant.participant_aliases, "participant_codename": dataset.tissue_sample.participant.participant_codename, "participant_type": dataset.tissue_sample.participant.participant_type, "institution": dataset.tissue_sample.participant.institution and dataset.tissue_sample.participant.institution.institution, "sex": dataset.tissue_sample.participant.sex, "family_codename": dataset.tissue_sample.participant.family.family_codename, "family_aliases": dataset.tissue_sample.participant.family.family_aliases, "created_by": dataset.created_by.username, "updated_by": dataset.updated_by.username, "group_code": [group.group_code for group in dataset.groups], } for dataset in datasets] app.logger.debug( "%d datasets to be returned; %d limit; %d total_count", len(datasets), limit or -1, total_count, ) if expects_json(request): return paginated_response(results, page, total_count, limit) elif expects_csv(request): return csv_response( results, filename="datasets_report.csv", colnames=[ "family_codename", "participant_codename", "tissue_sample_type", "dataset_type", "condition", "notes", "linked_files", "updated", "updated_by", "dataset_id", ], ) abort( 406, "Only 'text/csv' and 'application/json' HTTP accept headers supported")
def username(cls): # sqlite does not have a splitstr function return func.substr(cls.email, 1, func.instr(cls.email, '@') - 1)
def FindPeopleBySerialNumber(self, serial): return db.session.query(Person) \ .join(Bike) \ .filter(and_( \ Bike.personID == Person.id, \ func.instr(Bike.serial, serial) > 0)).all()
def list_participants(page: int, limit: int) -> Response: order_by = request.args.get("order_by", type=str) allowed_columns = [ "family_codename", "participant_codename", "notes", "participant_type", "sex", "affected", "solved", ] if order_by is None: order = None # system default, likely participant_id elif order_by == "family_codename": order = models.Family.family_codename elif order_by in allowed_columns: # Since this is an elif clause, we know family_codename is already handled above. # order_by has been restricted to a known list, so we can safely use getattr order = getattr(models.Participant, order_by) else: abort(400, description=f"order_by must be one of {allowed_columns}") if order: order_dir = request.args.get("order_dir", type=str) if order_dir == "desc": order = order.desc() elif order_dir == "asc": order = order.asc() else: abort(400, description="order_dir must be either 'asc' or 'desc'") filters = [] family_codename = request.args.get("family_codename", type=str) if family_codename: filters.append( func.instr(models.Family.family_codename, family_codename)) participant_codename = request.args.get("participant_codename", type=str) if participant_codename: filters.append( func.instr(models.Participant.participant_codename, participant_codename)) notes = request.args.get("notes", type=str) if notes: filters.append(func.instr(models.Participant.notes, notes)) participant_type = request.args.get("participant_type", type=str) if participant_type: filters.append( filter_in_enum_or_abort( models.Participant.participant_type, models.ParticipantType, participant_type, )) sex = request.args.get("sex", type=str) if sex: filters.append( filter_in_enum_or_abort(models.Participant.sex, models.Sex, sex)) affected = request.args.get("affected", type=str) if affected: filters.append( filter_nullable_bool_or_abort(models.Participant.affected, affected)) solved = request.args.get("solved", type=str) if solved: filters.append( filter_nullable_bool_or_abort(models.Participant.solved, solved)) if app.config.get("LOGIN_DISABLED") or current_user.is_admin: user_id = request.args.get("user") else: user_id = current_user.user_id if user_id: # Regular user or assumed identity, return only permitted participants query = (models.Participant.query.options( joinedload(models.Participant.institution), contains_eager(models.Participant.family), contains_eager(models.Participant.tissue_samples).contains_eager( models.TissueSample.datasets), ).join(models.Participant.family).outerjoin( models.Participant.tissue_samples).outerjoin( models.TissueSample.datasets).join( models.groups_datasets_table, models.Dataset.dataset_id == models.groups_datasets_table.columns.dataset_id, ).join( models.users_groups_table, models.groups_datasets_table.columns.group_id == models.users_groups_table.columns.group_id, ).filter(models.users_groups_table.columns.user_id == user_id, *filters)) else: # Admin or LOGIN_DISABLED, authorized to query all participants query = (models.Participant.query.options( joinedload(models.Participant.institution), contains_eager(models.Participant.family), joinedload(models.Participant.tissue_samples).joinedload( models.TissueSample.datasets), ).join(models.Participant.family).filter(*filters)) # .count() returns the number of rows in the SQL response. When we join across a one-to-many # relationship, each parent row is multiplied by the number of children it has. This causes # .count() to disagree with len() as SQLAlchemy reroutes the duplicated rows back into their # mapped objects. In addition, .count() just wraps the main query in a subquery, so it can be # inefficient. Luckily, we can sidestep this whole problem efficiently by having the database # count the number of distinct parent primary keys returned. https://gist.github.com/hest/8798884 total_count = query.with_entities( func.count(distinct(models.Participant.participant_id))).scalar() participants = query.order_by(order).limit(limit).offset( page * (limit or 0)).all() return jsonify({ "data": [{ **asdict(participant), "family_codename": participant.family.family_codename, "institution": participant.institution.institution if participant.institution else None, "updated_by": participant.updated_by.username, "created_by": participant.created_by.username, "tissue_samples": [{ **asdict(tissue_sample), "datasets": tissue_sample.datasets } for tissue_sample in participant.tissue_samples], } for participant in participants], "page": page if limit else 0, "total_count": total_count, })
def list_analyses(page: int, limit: int) -> Response: app.logger.debug("Parsing query parameters..") order_by = request.args.get("order_by", type=str) allowed_columns = [ "updated", "result_path", "notes", "analysis_state", "pipeline_id", "assignee", "requester", "priority", "requested", "analysis_id", ] assignee_user = aliased(models.User) requester_user = aliased(models.User) app.logger.debug("Validating 'order_by' parameter..") if order_by is None: order = None # system default, likely analysis_id elif order_by == "assignee": order = assignee_user.username elif order_by == "requester": order = requester_user.username elif order_by in allowed_columns: # Since this is an elif clause, we know special cases are already handled above. # order_by has been restricted to a known list, so we can safely use getattr order = getattr(models.Analysis, order_by) else: abort(400, description=f"order_by must be one of {allowed_columns}") if order: app.logger.debug("Validating 'order_dir' parameter..") order_dir = request.args.get("order_dir", type=str) if order_dir == "desc": order = order.desc() elif order_dir == "asc": order = order.asc() else: abort(400, description="order_dir must be either 'asc' or 'desc'") app.logger.debug("Ordering by '%s' in '%s' direction", order_by, order_dir) app.logger.debug("Validating filter parameters..") filters = [] assignee = request.args.get("assignee", type=str) if assignee: app.logger.debug("Filter by assignee: '%s'", assignee) filters.append(func.instr(assignee_user.username, assignee)) requester = request.args.get("requester", type=str) if requester: app.logger.debug("Filter by requester: '%s'", requester) filters.append(func.instr(requester_user.username, requester)) notes = request.args.get("notes", type=str) if notes: app.logger.debug("Filter by notes: '%s'", notes) filters.append(func.instr(models.Analysis.notes, notes)) # this edges into the [GET] /:id endpoint, but the index/show payloads diverge, causing issues for the FE, and this is technically still a filter analysis_id = request.args.get("analysis_id", type=str) if analysis_id: app.logger.debug("Filter by analysis_id: '%s'", analysis_id) filters.append(func.instr(models.Analysis.analysis_id, analysis_id)) priority = request.args.get("priority", type=str) if priority: app.logger.debug("Filter by priority: '%s'", priority) filters.append( filter_in_enum_or_abort( models.Analysis.priority, models.PriorityType, priority, )) result_path = request.args.get("result_path", type=str) if result_path: app.logger.debug("Filter by result_path: '%s'", result_path) filters.append(func.instr(models.Analysis.result_path, result_path)) updated = request.args.get("updated", type=str) if updated: app.logger.debug("Filter by updated: '%s'", updated) filters.append( filter_updated_or_abort(models.Analysis.updated, updated)) requested = request.args.get("requested", type=str) if requested: app.logger.debug("Filter by requested: '%s'", requested) filters.append( filter_updated_or_abort(models.Analysis.requested, requested)) analysis_state = request.args.get("analysis_state", type=str) if analysis_state: app.logger.debug("Filter by analysis_state: '%s'", analysis_state) filters.append( filter_in_enum_or_abort( models.Analysis.analysis_state, models.AnalysisState, analysis_state, )) pipeline_id = request.args.get("pipeline_id", type=str) if pipeline_id: try: filters.append( models.Analysis.pipeline_id.in_( [int(pk) for pk in pipeline_id.split(",")])) except ValueError as err: abort(400, description=err) app.logger.debug("Filter by pipeline_id: '%s'", pipeline_id) app.logger.debug("Getting user_id..") if app.config.get("LOGIN_DISABLED") or current_user.is_admin: user_id = request.args.get("user") else: user_id = current_user.user_id app.logger.debug("user_id: '%s'", user_id) app.logger.debug("Querying and applying filters..") query = models.Analysis.query.options( selectinload(models.Analysis.datasets).options( selectinload(models.Dataset.tissue_sample).options( selectinload(models.TissueSample.participant).options( selectinload(models.Participant.family))))) if assignee: query = query.join(assignee_user, models.Analysis.assignee) if requester: query = query.join(requester_user, models.Analysis.requester) if user_id: # Regular user or assumed identity, return only permitted analyses query = (query.join(models.datasets_analyses_table).join( models.groups_datasets_table, models.datasets_analyses_table.columns.dataset_id == models.groups_datasets_table.columns.dataset_id, ).join( models.users_groups_table, models.groups_datasets_table.columns.group_id == models.users_groups_table.columns.group_id, ).filter(models.users_groups_table.columns.user_id == user_id, *filters)) else: # Admin or LOGIN_DISABLED, authorized to query all analyses query = query.filter(*filters) participant_codename = request.args.get("participant_codename", type=str) if participant_codename: app.logger.debug("Filtering by participant_codename: '%s' (subquery)", participant_codename) # use a subquery on one-to-many-related fields instead of eager/filter so that the related fields themselves aren't excluded # (we want all analyses that have at least 1 particpant that matches the search, along with *all* related participants) subquery = (models.Analysis.query.join(models.Analysis.datasets).join( models.Dataset.tissue_sample).join( models.TissueSample.participant).filter( func.instr(models.Participant.participant_codename, participant_codename)).with_entities( models.Analysis.analysis_id).subquery()) query = query.filter(models.Analysis.analysis_id.in_(subquery)) family_codename = request.args.get("family_codename", type=str) if family_codename: app.logger.debug("Filtering by family_codename: '%s' (subquery)", family_codename) subquery = (models.Analysis.query.join(models.Analysis.datasets).join( models.Dataset.tissue_sample).join( models.TissueSample.participant).join( models.Participant.family).filter( func.instr(models.Family.family_codename, family_codename)).with_entities( models.Analysis.analysis_id).subquery()) query = query.filter(models.Analysis.analysis_id.in_(subquery)) if request.args.get("search"): # multifield search app.logger.debug("Searching across multiple fields by '%s'", request.args.get("search")) subquery = (models.Analysis.query.join(models.Analysis.datasets).join( models.Dataset.tissue_sample).join( models.TissueSample.participant).join( models.Participant.family).filter( or_( func.instr(models.Family.family_codename, request.args.get("search")), func.instr(models.Family.family_aliases, request.args.get("search")), func.instr( models.Participant.participant_codename, request.args.get("search"), ), func.instr( models.Participant.participant_aliases, request.args.get("search"), ), )).with_entities( models.Analysis.analysis_id).subquery()) query = query.filter(models.Analysis.analysis_id.in_(subquery)) total_count = query.with_entities( func.count(distinct(models.Analysis.analysis_id))).scalar() analyses = query.order_by(order).limit(limit).offset(page * (limit or 0)).all() app.logger.info("Query successful") results = [{ **asdict(analysis), "participant_codenames": [ d.tissue_sample.participant.participant_codename for d in analysis.datasets ], "family_codenames": [ d.tissue_sample.participant.family.family_codename for d in analysis.datasets ], "requester": analysis.requester.username, "updated_by": analysis.updated_by.username, "assignee": analysis.assignee_id and analysis.assignee.username, "pipeline": analysis.pipeline, } for analysis in analyses] if expects_json(request): app.logger.debug("Returning paginated response..") return paginated_response(results, page, total_count, limit) elif expects_csv(request): app.logger.debug("Returning paginated response..") results = [{ k: v if k != "pipeline" else v.pipeline_name for k, v in result.items() } for result in results] return csv_response( results, filename="analyses_report.csv", colnames=[ "pipeline", "analysis_state", "participant_codenames", "family_codenames", "priority", "requester", "assignee", "updated", "result_path", "notes", "analysis_id", ], ) abort( 406, "Only 'text/csv' and 'application/json' HTTP accept headers supported")