def run_simple_query(self, query): (requested_columns, from_table, where_clause, rest_of_query) = get_query_parts(query) if where_clause != '': where_exp = self.parse_where_clause(where_clause, from_table) if not where_exp is None: matches = where_exp.evaluate(self.session, "*") if len(matches) == 0: return OrderedDict([]) else: try: dink_query = "SELECT %s FROM %s" % ( ','.join(requested_columns), from_table) if matches != "*": if from_table.startswith('samples'): in_clause = "','".join(matches) dink_query += " WHERE %s IN ('%s')" % ( self.get_partition_key(from_table), in_clause) else: in_clause = ",".join(map(str, matches)) dink_query += " WHERE %s IN (%s)" % ( self.get_partition_key(from_table), in_clause) dink_query += " " + rest_of_query self.session.row_factory = ordered_dict_factory return self.session.execute(dink_query) except cassandra.protocol.SyntaxException as e: print "Cassandra error: {0}".format(e) sys.exit("The query issued (%s) has a syntax error." % query)
def run_simple_query(self, query): (requested_columns, from_table, where_clause, rest_of_query) = get_query_parts(query) if where_clause != '': where_exp = self.parse_where_clause(where_clause, from_table) if not where_exp is None: matches = where_exp.evaluate(self.session, "*") if len(matches) == 0: return OrderedDict([]) else: try: dink_query = "SELECT %s FROM %s" % (','.join(requested_columns), from_table) if matches != "*": if from_table.startswith('samples'): in_clause = "','".join(matches) dink_query += " WHERE %s IN ('%s')" % (self.get_partition_key(from_table), in_clause) else: in_clause = ",".join(map(str, matches)) dink_query += " WHERE %s IN (%s)" % (self.get_partition_key(from_table), in_clause) dink_query += " " + rest_of_query self.session.row_factory = ordered_dict_factory return self.session.execute(dink_query) except cassandra.protocol.SyntaxException as e: print "Cassandra error: {0}".format(e) sys.exit("The query issued (%s) has a syntax error." % query)
def run(self, query, gt_filter=None, show_variant_samples=False, variant_samples_delim=',', predicates=None, needs_genotypes=False, needs_genes=False, show_families=False, test_mode=False, needs_sample_names=False, nr_cores=1, start_time=-42, use_header=False, exp_id="Oink", timeout=10.0, batch_size=100): """ Execute a query against a Gemini database. The user may specify: 1. (reqd.) an SQL `query`. 2. (opt.) a genotype filter. """ self.query = self.formatter.format_query(query).replace('==', '=') self.gt_filter = gt_filter #print self.query + '; gt-filter = %s \n' % gt_filter self.nr_cores = nr_cores self.start_time = start_time self.use_header = use_header self.exp_id = exp_id self.timeout = timeout self.batch_size = batch_size if self._is_gt_filter_safe() is False: sys.exit("ERROR: unsafe --gt-filter command.") self.show_variant_samples = show_variant_samples self.variant_samples_delim = variant_samples_delim self.test_mode = test_mode self.needs_genotypes = needs_genotypes self.needs_vcf_columns = False if self.formatter.name == 'vcf': self.needs_vcf_columns = True self.needs_sample_names = needs_sample_names self.needs_genes = needs_genes self.show_families = show_families if predicates: self.predicates += predicates # make sure the SELECT columns are separated by a # comma and a space. then tokenize by spaces. self.query = self.query.replace(',', ', ') self.query_pieces = self.query.split() if not any(s.startswith("gt") for s in self.query_pieces) and \ not any(s.startswith("(gt") for s in self.query_pieces) and \ not any(".gt" in s for s in self.query_pieces): if self.gt_filter is None: self.query_type = "no-genotypes" else: self.gt_filter_exp = self._correct_genotype_filter() self.query_type = "filter-genotypes" else: if self.gt_filter is None: self.query_type = "select-genotypes" else: self.gt_filter_exp = self._correct_genotype_filter() self.query_type = "filter-genotypes" (self.requested_columns, self.from_table, where_clause, self.rest_of_query) = get_query_parts(self.query) self.extra_columns = [] if where_clause != '': self.where_exp = self.parse_where_clause(where_clause, self.from_table) if not self.gt_filter is None: self.where_exp = AND_expression(self.where_exp, self.gt_filter_exp) else: if not self.gt_filter is None: self.where_exp = self.gt_filter_exp else: self.where_exp = None self._apply_query() self.query_executed = True
def run(self, query, gt_filter=None, show_variant_samples=False, variant_samples_delim=',', predicates=None, needs_genotypes=False, needs_genes=False, show_families=False, test_mode=False, needs_sample_names=False, nr_cores = 1, start_time = -42, use_header = False, exp_id="Oink", timeout=10.0, batch_size = 100): """ Execute a query against a Gemini database. The user may specify: 1. (reqd.) an SQL `query`. 2. (opt.) a genotype filter. """ self.query = self.formatter.format_query(query).replace('==','=') self.gt_filter = gt_filter #print self.query + '; gt-filter = %s \n' % gt_filter self.nr_cores = nr_cores self.start_time = start_time self.use_header = use_header self.exp_id = exp_id self.timeout = timeout self.batch_size = batch_size if self._is_gt_filter_safe() is False: sys.exit("ERROR: unsafe --gt-filter command.") self.show_variant_samples = show_variant_samples self.variant_samples_delim = variant_samples_delim self.test_mode = test_mode self.needs_genotypes = needs_genotypes self.needs_vcf_columns = False if self.formatter.name == 'vcf': self.needs_vcf_columns = True self.needs_sample_names = needs_sample_names self.needs_genes = needs_genes self.show_families = show_families if predicates: self.predicates += predicates # make sure the SELECT columns are separated by a # comma and a space. then tokenize by spaces. self.query = self.query.replace(',', ', ') self.query_pieces = self.query.split() if not any(s.startswith("gt") for s in self.query_pieces) and \ not any(s.startswith("(gt") for s in self.query_pieces) and \ not any(".gt" in s for s in self.query_pieces): if self.gt_filter is None: self.query_type = "no-genotypes" else: self.gt_filter_exp = self._correct_genotype_filter() self.query_type = "filter-genotypes" else: if self.gt_filter is None: self.query_type = "select-genotypes" else: self.gt_filter_exp = self._correct_genotype_filter() self.query_type = "filter-genotypes" (self.requested_columns, self.from_table, where_clause, self.rest_of_query) = get_query_parts(self.query) self.extra_columns = [] if where_clause != '': self.where_exp = self.parse_where_clause(where_clause, self.from_table) if not self.gt_filter is None: self.where_exp = AND_expression(self.where_exp, self.gt_filter_exp) else: if not self.gt_filter is None: self.where_exp = self.gt_filter_exp else: self.where_exp = None self._apply_query() self.query_executed = True