コード例 #1
0
ファイル: GeminiQuery.py プロジェクト: bgossele/gemini
    def parse_clause(self, clause, base_clause_parser, table):

        clause = clause.strip()
        depth = 0
        min_depth = 100000  #Arbitrary bound on nr of nested clauses.

        in_wildcard_clause = False

        for i in range(0, len(clause)):
            if clause[i] == '[':
                in_wildcard_clause = True
            elif clause[i] == ']':
                in_wildcard_clause = False
            elif in_wildcard_clause:  #currently in wildcard thingy, so doesn't mean anything. Move on.
                continue
            elif clause[i] == '(':
                depth += 1
            elif clause[i] == ')':
                depth -= 1
            elif i < len(clause) - 2:
                if clause[i:i + 2] == "||":
                    if depth == 0:
                        left = self.parse_clause(clause[:i].strip(),
                                                 base_clause_parser, table)
                        right = self.parse_clause(clause[i + 2:].strip(),
                                                  base_clause_parser, table)
                        return OR_expression(left, right)
                    else:
                        min_depth = min(min_depth, depth)
                elif clause[i:i + 2] == "&&":
                    if depth == 0:
                        left = self.parse_clause(clause[:i].strip(),
                                                 base_clause_parser, table)
                        right = self.parse_clause(clause[i + 2:].strip(),
                                                  base_clause_parser, table)
                        return AND_expression(left, right)
                    else:
                        min_depth = min(min_depth, depth)
                elif i < len(clause) - 3:
                    if clause[i:i + 3] == "NOT":
                        if depth == 0:
                            body = self.parse_clause(clause[i + 3:].strip(),
                                                     base_clause_parser, table)
                            return NOT_expression(
                                body, table, self.get_partition_key(table),
                                self.n_variants)
                        else:
                            min_depth = min(min_depth, depth)
        if depth == 0:
            if min_depth < 100000:
                #Strip away all brackets to expose uppermost boolean operator
                return self.parse_clause(
                    clause[min_depth:len(clause) - min_depth],
                    base_clause_parser, table)
            else:
                #No more boolean operators, strip all remaining brackets
                token = clause.strip('(').strip(')')
                return base_clause_parser(token)
        else:
            sys.exit("ERROR in %s. Brackets don't match" % clause)
コード例 #2
0
ファイル: GeminiQuery.py プロジェクト: bgossele/gemini
    def run(self,
            query,
            gt_filter=None,
            show_variant_samples=False,
            variant_samples_delim=',',
            predicates=None,
            needs_genotypes=False,
            needs_genes=False,
            show_families=False,
            test_mode=False,
            needs_sample_names=False,
            nr_cores=1,
            start_time=-42,
            use_header=False,
            exp_id="Oink",
            timeout=10.0,
            batch_size=100):
        """
        Execute a query against a Gemini database. The user may
        specify:

            1. (reqd.) an SQL `query`.
            2. (opt.) a genotype filter.
        """
        self.query = self.formatter.format_query(query).replace('==', '=')
        self.gt_filter = gt_filter
        #print self.query + '; gt-filter = %s \n' % gt_filter
        self.nr_cores = nr_cores
        self.start_time = start_time
        self.use_header = use_header
        self.exp_id = exp_id
        self.timeout = timeout
        self.batch_size = batch_size
        if self._is_gt_filter_safe() is False:
            sys.exit("ERROR: unsafe --gt-filter command.")

        self.show_variant_samples = show_variant_samples
        self.variant_samples_delim = variant_samples_delim
        self.test_mode = test_mode
        self.needs_genotypes = needs_genotypes
        self.needs_vcf_columns = False
        if self.formatter.name == 'vcf':
            self.needs_vcf_columns = True
        self.needs_sample_names = needs_sample_names

        self.needs_genes = needs_genes
        self.show_families = show_families
        if predicates:
            self.predicates += predicates

        # make sure the SELECT columns are separated by a
        # comma and a space. then tokenize by spaces.
        self.query = self.query.replace(',', ', ')
        self.query_pieces = self.query.split()
        if not any(s.startswith("gt") for s in self.query_pieces) and \
           not any(s.startswith("(gt") for s in self.query_pieces) and \
           not any(".gt" in s for s in self.query_pieces):
            if self.gt_filter is None:
                self.query_type = "no-genotypes"
            else:
                self.gt_filter_exp = self._correct_genotype_filter()
                self.query_type = "filter-genotypes"
        else:
            if self.gt_filter is None:
                self.query_type = "select-genotypes"
            else:
                self.gt_filter_exp = self._correct_genotype_filter()
                self.query_type = "filter-genotypes"

        (self.requested_columns, self.from_table, where_clause,
         self.rest_of_query) = get_query_parts(self.query)
        self.extra_columns = []

        if where_clause != '':
            self.where_exp = self.parse_where_clause(where_clause,
                                                     self.from_table)
            if not self.gt_filter is None:
                self.where_exp = AND_expression(self.where_exp,
                                                self.gt_filter_exp)
        else:
            if not self.gt_filter is None:
                self.where_exp = self.gt_filter_exp
            else:
                self.where_exp = None

        self._apply_query()
        self.query_executed = True