Ejemplo n.º 1
0
    def run_simple_query(self, query):
        (requested_columns, from_table, where_clause,
         rest_of_query) = get_query_parts(query)
        if where_clause != '':
            where_exp = self.parse_where_clause(where_clause, from_table)
        if not where_exp is None:
            matches = where_exp.evaluate(self.session, "*")

        if len(matches) == 0:
            return OrderedDict([])
        else:
            try:
                dink_query = "SELECT %s FROM %s" % (
                    ','.join(requested_columns), from_table)
                if matches != "*":
                    if from_table.startswith('samples'):
                        in_clause = "','".join(matches)
                        dink_query += " WHERE %s IN ('%s')" % (
                            self.get_partition_key(from_table), in_clause)
                    else:
                        in_clause = ",".join(map(str, matches))
                        dink_query += " WHERE %s IN (%s)" % (
                            self.get_partition_key(from_table), in_clause)
                dink_query += " " + rest_of_query
                self.session.row_factory = ordered_dict_factory
                return self.session.execute(dink_query)

            except cassandra.protocol.SyntaxException as e:
                print "Cassandra error: {0}".format(e)
                sys.exit("The query issued (%s) has a syntax error." % query)
Ejemplo n.º 2
0
 def run_simple_query(self, query):
     (requested_columns, from_table, where_clause, rest_of_query) = get_query_parts(query)
     if where_clause != '':
         where_exp = self.parse_where_clause(where_clause, from_table)
     if not where_exp is None:
         matches = where_exp.evaluate(self.session, "*")
         
     if len(matches) == 0:
         return OrderedDict([])
     else:
         try:
             dink_query = "SELECT %s FROM %s" % (','.join(requested_columns), from_table)
             if matches != "*":
                 if from_table.startswith('samples'):
                     in_clause = "','".join(matches)            
                     dink_query += " WHERE %s IN ('%s')" % (self.get_partition_key(from_table), in_clause)
                 else:
                     in_clause = ",".join(map(str, matches))            
                     dink_query += " WHERE %s IN (%s)" % (self.get_partition_key(from_table), in_clause)
             dink_query += " " + rest_of_query
             self.session.row_factory = ordered_dict_factory
             return self.session.execute(dink_query)                
             
         except cassandra.protocol.SyntaxException as e:
             print "Cassandra error: {0}".format(e)
             sys.exit("The query issued (%s) has a syntax error." % query)
Ejemplo n.º 3
0
    def run(self,
            query,
            gt_filter=None,
            show_variant_samples=False,
            variant_samples_delim=',',
            predicates=None,
            needs_genotypes=False,
            needs_genes=False,
            show_families=False,
            test_mode=False,
            needs_sample_names=False,
            nr_cores=1,
            start_time=-42,
            use_header=False,
            exp_id="Oink",
            timeout=10.0,
            batch_size=100):
        """
        Execute a query against a Gemini database. The user may
        specify:

            1. (reqd.) an SQL `query`.
            2. (opt.) a genotype filter.
        """
        self.query = self.formatter.format_query(query).replace('==', '=')
        self.gt_filter = gt_filter
        #print self.query + '; gt-filter = %s \n' % gt_filter
        self.nr_cores = nr_cores
        self.start_time = start_time
        self.use_header = use_header
        self.exp_id = exp_id
        self.timeout = timeout
        self.batch_size = batch_size
        if self._is_gt_filter_safe() is False:
            sys.exit("ERROR: unsafe --gt-filter command.")

        self.show_variant_samples = show_variant_samples
        self.variant_samples_delim = variant_samples_delim
        self.test_mode = test_mode
        self.needs_genotypes = needs_genotypes
        self.needs_vcf_columns = False
        if self.formatter.name == 'vcf':
            self.needs_vcf_columns = True
        self.needs_sample_names = needs_sample_names

        self.needs_genes = needs_genes
        self.show_families = show_families
        if predicates:
            self.predicates += predicates

        # make sure the SELECT columns are separated by a
        # comma and a space. then tokenize by spaces.
        self.query = self.query.replace(',', ', ')
        self.query_pieces = self.query.split()
        if not any(s.startswith("gt") for s in self.query_pieces) and \
           not any(s.startswith("(gt") for s in self.query_pieces) and \
           not any(".gt" in s for s in self.query_pieces):
            if self.gt_filter is None:
                self.query_type = "no-genotypes"
            else:
                self.gt_filter_exp = self._correct_genotype_filter()
                self.query_type = "filter-genotypes"
        else:
            if self.gt_filter is None:
                self.query_type = "select-genotypes"
            else:
                self.gt_filter_exp = self._correct_genotype_filter()
                self.query_type = "filter-genotypes"

        (self.requested_columns, self.from_table, where_clause,
         self.rest_of_query) = get_query_parts(self.query)
        self.extra_columns = []

        if where_clause != '':
            self.where_exp = self.parse_where_clause(where_clause,
                                                     self.from_table)
            if not self.gt_filter is None:
                self.where_exp = AND_expression(self.where_exp,
                                                self.gt_filter_exp)
        else:
            if not self.gt_filter is None:
                self.where_exp = self.gt_filter_exp
            else:
                self.where_exp = None

        self._apply_query()
        self.query_executed = True
Ejemplo n.º 4
0
    def run(self, query, gt_filter=None, show_variant_samples=False,
            variant_samples_delim=',', predicates=None,
            needs_genotypes=False, needs_genes=False,
            show_families=False, test_mode=False, 
            needs_sample_names=False, nr_cores = 1,
            start_time = -42, use_header = False,
            exp_id="Oink", timeout=10.0, batch_size = 100):
        """
        Execute a query against a Gemini database. The user may
        specify:

            1. (reqd.) an SQL `query`.
            2. (opt.) a genotype filter.
        """
        self.query = self.formatter.format_query(query).replace('==','=')
        self.gt_filter = gt_filter
        #print self.query + '; gt-filter = %s \n' % gt_filter
        self.nr_cores = nr_cores
        self.start_time = start_time
        self.use_header = use_header
        self.exp_id = exp_id
        self.timeout = timeout
        self.batch_size = batch_size
        if self._is_gt_filter_safe() is False:
            sys.exit("ERROR: unsafe --gt-filter command.")
        
        self.show_variant_samples = show_variant_samples
        self.variant_samples_delim = variant_samples_delim
        self.test_mode = test_mode
        self.needs_genotypes = needs_genotypes
        self.needs_vcf_columns = False
        if self.formatter.name == 'vcf':
            self.needs_vcf_columns = True
        self.needs_sample_names = needs_sample_names

        self.needs_genes = needs_genes
        self.show_families = show_families
        if predicates:
            self.predicates += predicates

        # make sure the SELECT columns are separated by a
        # comma and a space. then tokenize by spaces.
        self.query = self.query.replace(',', ', ')
        self.query_pieces = self.query.split()
        if not any(s.startswith("gt") for s in self.query_pieces) and \
           not any(s.startswith("(gt") for s in self.query_pieces) and \
           not any(".gt" in s for s in self.query_pieces):
            if self.gt_filter is None:
                self.query_type = "no-genotypes"
            else:
                self.gt_filter_exp = self._correct_genotype_filter()
                self.query_type = "filter-genotypes"
        else:
            if self.gt_filter is None:
                self.query_type = "select-genotypes"
            else:
                self.gt_filter_exp = self._correct_genotype_filter()
                self.query_type = "filter-genotypes"

        (self.requested_columns, self.from_table, where_clause, self.rest_of_query) = get_query_parts(self.query)
        self.extra_columns = []
        
        if where_clause != '':
            self.where_exp = self.parse_where_clause(where_clause, self.from_table)
            if not self.gt_filter is None:
                self.where_exp = AND_expression(self.where_exp, self.gt_filter_exp)
        else:
            if not self.gt_filter is None:
                self.where_exp = self.gt_filter_exp
            else:
                self.where_exp = None
            
        self._apply_query()
        self.query_executed = True