def _query_join_relation(self, query: BaseQuery, root_relation: str) -> BaseQuery: """ Helper function that applies necessary joins for dotted columns on a SQLAlchemy query object :param query: SQLAlchemy query object :param root_relation: The root part of a dotted column, so the root relation :return: Transformed SQLAlchemy Query """ relations = self.get_related_model_and_join(root_relation) for relation in relations: model_relation, relation_join = relation # Support multiple joins for the same table if self.is_model_already_joined(query, model_relation): # Since the join already exists apply a new aliased one model_relation = aliased(model_relation) # The binary expression needs to be inverted relation_pk = self.get_pk(model_relation) relation_join = BinaryExpression(relation_join.left, relation_pk, relation_join.operator) query = query.join(model_relation, relation_join, isouter=True) return query
def query_articles_by_tag(self, tag: str, query_obj: BaseQuery = None): if query_obj is not None: query_obj = query_obj.join(Article.tags).filter(Tag.name == tag) else: query_obj = Article.query.join( Article.tags).filter(Tag.name == tag) return query_obj
def apply_filter(query: BaseQuery) -> BaseQuery: for param, value in request.args.items(): if param not in {"fields", "sort", "page", "limit"}: if param == "username": query = query.filter( GithubUserInfo.username.like(f"%{value}%")) continue if param == "language": langs_list = value.split(" ") for lang in langs_list: query = query.join(GithubUserInfo.repositories).filter( GithubRepositories.languages.contains(lang)) continue if param.startswith("number_of_repositories"): if param.endswith("[gt]"): users_ids_list = [] subquery = (query.join( GithubUserInfo.repositories).distinct( GithubUserInfo.id).all()) for user in subquery: if len(user.repositories) > int(value): users_ids_list.append(user.id) query = query.filter( GithubUserInfo.id.in_(users_ids_list)) continue elif param.endswith("[gte]"): users_ids_list = [] subquery = (query.join( GithubUserInfo.repositories).distinct( GithubUserInfo.id).all()) for user in subquery: if len(user.repositories) >= int(value): users_ids_list.append(user.id) query = query.filter( GithubUserInfo.id.in_(users_ids_list)) continue elif param.endswith("[lt]"): users_ids_list = [] subquery = (query.join( GithubUserInfo.repositories).distinct( GithubUserInfo.id).all()) for user in subquery: if len(user.repositories) < int(value): users_ids_list.append(user.id) query = query.filter( GithubUserInfo.id.in_(users_ids_list)) continue elif param.endswith("[lte]"): users_ids_list = [] subquery = (query.join( GithubUserInfo.repositories).distinct( GithubUserInfo.id).all()) for user in subquery: if len(user.repositories) <= int(value): users_ids_list.append(user.id) query = query.filter( GithubUserInfo.id.in_(users_ids_list)) continue else: users_ids_list = [] subquery = (query.join( GithubUserInfo.repositories).distinct( GithubUserInfo.id).all()) for user in subquery: if len(user.repositories) == int(value): users_ids_list.append(user.id) query = query.filter( GithubUserInfo.id.in_(users_ids_list)) continue if param.startswith("date"): pass ## last_commit_date scraping functionality to be added to database ## try: ## value = datetime.strptime(value, "%d-%m-%Y").date() ## if param.endswith("[gt]"): ## query = query.filter(GithubUserInfo.date > value) ## continue ## elif param.endswith("[gte]"): ## query = query.filter(GithubUserInfo.date >= value) ## continue ## elif param.endswith("[lt]"): ## query = query.filter(GithubUserInfo.date < value) ## continue ## elif param.endswith("[lte]"): ## query = query.filter(GithubUserInfo.date <= value) ## continue ## else: ## query = query.filter(GithubUserInfo.date == value) ## continue ## except ValueError: ## continue if param.startswith("stars"): if param.endswith("[gt]"): users_ids_list = [] users_stars_dict = {} subquery = (query.join( GithubUserInfo.repositories).distinct( GithubUserInfo.id).all()) for user in subquery: users_stars_dict[user] = 0 for repo in user.repositories: users_stars_dict[user] = ( users_stars_dict[user] + repo.stars) if users_stars_dict[user] > int(value): users_ids_list.append(user.id) query = query.filter( GithubUserInfo.id.in_(users_ids_list)) continue elif param.endswith("[gte]"): users_ids_list = [] users_stars_dict = {} subquery = (query.join( GithubUserInfo.repositories).distinct( GithubUserInfo.id).all()) for user in subquery: users_stars_dict[user] = 0 for repo in user.repositories: users_stars_dict[user] = ( users_stars_dict[user] + repo.stars) if users_stars_dict[user] >= int(value): users_ids_list.append(user.id) query = query.filter( GithubUserInfo.id.in_(users_ids_list)) continue elif param.endswith("[lt]"): users_ids_list = [] users_stars_dict = {} subquery = (query.join( GithubUserInfo.repositories).distinct( GithubUserInfo.id).all()) for user in subquery: users_stars_dict[user] = 0 for repo in user.repositories: users_stars_dict[user] = ( users_stars_dict[user] + repo.stars) if users_stars_dict[user] < int(value): users_ids_list.append(user.id) query = query.filter( GithubUserInfo.id.in_(users_ids_list)) continue elif param.endswith("[lte]"): users_ids_list = [] users_stars_dict = {} subquery = (query.join( GithubUserInfo.repositories).distinct( GithubUserInfo.id).all()) for user in subquery: users_stars_dict[user] = 0 for repo in user.repositories: users_stars_dict[user] = ( users_stars_dict[user] + repo.stars) if users_stars_dict[user] <= int(value): users_ids_list.append(user.id) query = query.filter( GithubUserInfo.id.in_(users_ids_list)) continue else: users_ids_list = [] users_stars_dict = {} subquery = (query.join( GithubUserInfo.repositories).distinct( GithubUserInfo.id).all()) for user in subquery: users_stars_dict[user] = 0 for repo in user.repositories: users_stars_dict[user] = ( users_stars_dict[user] + repo.stars) if users_stars_dict[user] == int(value): users_ids_list.append(user.id) query = query.filter( GithubUserInfo.id.in_(users_ids_list)) continue return query.all() return query
def gather_muts_and_sites(self, mutations: BaseQuery, sites: BaseQuery, show_progress=True, occurrences_in: List[MutationSource] = None, intersection=None) -> MotifsData: """If occurrences_in is provided, the count of mutations will represent number of occurrences of mutations in provided sources, instead of number of distinct substitutions. """ if intersection: accepted_sites = sites.join(Mutation.affected_sites).filter( and_(*[Mutation.in_sources(source) for source in intersection])).all() else: accepted_sites = sites.all() mutations_affecting_sites = mutations.filter( Mutation.affected_sites.any(Site.types.contains(self.site_type))) muts_around_sites_with_motif = defaultdict(dict) muts_breaking_sites_motif = defaultdict(dict) sites_with_broken_motif = defaultdict(set) sites_with_motif = select_sites_with_motifs(accepted_sites, self.site_specific_motifs) if occurrences_in: def mutation_count(mut: Mutation): return sum([ mut.sources_map[source.name].get_value() if source.name in mut.sources_map else 0 for source in occurrences_in ]) else: def mutation_count(mut): return 1 is_affected = self.breaking_modes[self.mode] if show_progress: ptm_muts = mutations_affecting_sites.count() mutations_affecting_sites = tqdm(mutations_affecting_sites, total=ptm_muts) for mutation in mutations_affecting_sites: sites = mutation.affected_sites for site in sites: if site not in accepted_sites: continue for motif_name, motif in self.site_specific_motifs.items(): if site in sites_with_motif[motif_name]: count = mutation_count(mutation) muts_around_sites_with_motif[motif_name][ mutation] = count mutated_sequence = mutate_sequence(site, mutation, offset=7) if is_affected(mutated_sequence, motif): sites_with_broken_motif[motif_name].add(site) muts_breaking_sites_motif[motif_name][ mutation] = count return MotifsData( sites_with_motif=sites_with_motif, sites_with_broken_motif=sites_with_broken_motif, muts_around_sites_with_motif=muts_around_sites_with_motif, muts_breaking_sites_motif=muts_breaking_sites_motif)