def check_crosstable_aggregate(self, other_table, col1, join1, col2=None, join2=None, sort=None, truncate=None, constraint={}): """ Check that col1 is the sorted array of values in col2 where join1 = join2 Here col2 and join2 default to col1 and join1, and join1 and join2 are allowed to be lists of columns sort defaults to col2, but can be a list of columns in other_table """ if col2 is None: col2 = col1 if truncate is not None: col1 = SQL("t1.{0}[:%s]" % (int(truncate))).format( Identifier(col1)) if sort is None: sort = SQL(" ORDER BY t2.{0}").format(Identifier(col2)) else: sort = SQL(" ORDER BY {0}").format( SQL(", ").join( SQL("t2.{0}").format(Identifier(col)) for col in sort)) return self._run_crosstable(col2, other_table, col1, join1, join2, constraint, subselect_wrapper="ARRAY", extra=sort)
def check_hecke_orbit_code(self): """ hecke_orbit_code is as defined """ if self.hecke_orbit_code != []: # test enabled assert len(self.hecke_orbit_code) == 2 hoc_column = self.hecke_orbit_code[0] if len(self.hecke_orbit_code[1]) == 4: N_column, k_column, i_column, x_column = self.hecke_orbit_code[ 1] else: assert len(self.hecke_orbit_code[1]) == 3 x_column = None N_column, k_column, i_column = self.hecke_orbit_code[1] # N + (k<<24) + ((i-1)<<36) + ((x-1)<<52) if x_column is None: return self._run_query( SQL("{0} != {1}::bigint + ({2}::integer::bit(64)<<24)::bigint + (({3}-1)::integer::bit(64)<<36)::bigint" ).format( *map(Identifier, [hoc_column, N_column, k_column, i_column]))) else: return self._run_query( SQL("{0} != {1}::bigint + ({2}::integer::bit(64)<<24)::bigint + (({3}-1)::integer::bit(64)<<36)::bigint + (({4}-1)::integer::bit(64)<<52)::bigint" ).format(*map(Identifier, [ hoc_column, N_column, k_column, i_column, x_column ])))
def check_amn(self): """ Check a_{mn} = a_m*a_n when (m,n) = 1 and m,n < some bound """ pairs = [(2, 3), (2, 5), (3, 4), (2, 7), (3, 5), (2, 9), (4, 5), (3, 7), (2, 11), (3, 8), (2, 13), (4, 7), (2, 15), (3, 10), (5, 6), (3, 11), (2, 17), (5, 7), (4, 9), (2, 19), (3, 13), (5, 8), (3, 14), (6, 7), (4, 11), (5, 9), (3, 16), (3, 17), (4, 13), (5, 11), (7, 8), (3, 19), (3, 20), (4, 15), (5, 12)][:15] query = SQL("NOT ({0})").format(SQL(" AND ").join(SQL("check_cc_prod(an_normalized[{0}:{0}], an_normalized[{1}:{1}], an_normalized[{2}:{2}])").format(Literal(int(m)), Literal(int(n)), Literal(int(m*n))) for m, n in pairs)) return self._run_query(query, ratio=0.1)
def check_string_concatenation(self, label_col, other_columns, constraint={}, sep='.', convert_to_base26={}): """ Check that the label_column is the concatenation of the other columns with the given separator Input: - ``label_col`` -- the label_column - ``other_columns`` -- the other columns from which we can deduce the label - ``constraint`` -- a dictionary, as passed to the search method - ``sep`` -- the separator for the join - ``convert_to_base26`` -- a dictionary where the keys are columns that we need to convert to base26, and the values is that the shift that we need to apply """ oc_converted = [ SQL('to_base26({0} + {1})').format( Identifier(col), Literal(int(convert_to_base26[col]))) if col in convert_to_base26 else Identifier(col) for col in other_columns ] #intertwine the separator oc = [ oc_converted[i // 2] if i % 2 == 0 else Literal(sep) for i in range(2 * len(oc_converted) - 1) ] return self._run_query( SQL(" != ").join([SQL(" || ").join(oc), Identifier(label_col)]), constraint)
def check_crosstable_dotprod(self, other_table, col1, join1, col2, join2=None, constraint={}): """ Check that col1 is the sum of the product of the values in the columns of col2 over rows of other_table with self.table.join1 = other_table.join2. There are some peculiarities of this method, resulting from its application to mf_subspaces. col1 is allowed to be a pair, in which case the difference col1[0] - col1[1] will be compared. col2 does not take value col1 as a default, since they are playing different roles. """ if isinstance(col1, list): if len(col1) != 2: raise ValueError("col1 must have length 2") col1 = SQL("t1.{0} - t1.{1}").format(Identifier(col1[0]), Identifier(col1[1])) dotprod = SQL("SUM({0})").format( SQL(" * ").join( SQL("t2.{0}").format(Identifier(col)) for col in col2)) return self._run_crosstable(dotprod, other_table, col1, join1, join2, constraint)
def check_sato_tate_value(self): """ for k>1 check that sato_tate_group is consistent with is_cm and char_order (it should be (k-1).2.3.cn where n=char_order if is_cm is false, and (k-1).2.1.dn if is_cm is true) """ return (self._run_query(SQL("sato_tate_group != (weight-1) || {0} || char_order").format(Literal(".2.3.c")), constraint={'is_cm':False, 'weight':{'$gt':1}}) + self._run_query(SQL("sato_tate_group != (weight-1) || {0} || char_order").format(Literal(".2.1.d")), constraint={'is_cm':True, 'weight':{'$gt':1}}))
def check_array_concatenation(self, a_columns, b_columns, constraint={}): if isinstance(a_columns, basestring): a_columns = [a_columns] if isinstance(b_columns, basestring): b_columns = [b_columns] return self._run_query(SQL("{0} != {1}").format( SQL(" || ").join(map(Identifier, a_columns)), SQL(" || ").join(map(Identifier, b_columns))), constraint=constraint)
def _check_arith(self, a_columns, b_columns, constraint, op): if isinstance(a_columns, basestring): a_columns = [a_columns] if isinstance(b_columns, basestring): b_columns = [b_columns] return self._run_query( SQL(" != ").join([ SQL(" %s " % op).join(map(Identifier, a_columns)), SQL(" %s " % op).join(map(Identifier, b_columns)) ]), constraint)
def _run_query(self, condition=None, constraint={}, values=None, table=None, query=None, ratio=1): """ Run a query to check a condition. The number of returned failures will be limited by the ``_cur_limit`` attribute of this ``TableChecker``. If ``_cur_label`` is set, only that label will be checked. INPUT: - ``condition`` -- an SQL object giving a condition on the search table - ``constraint`` -- a dictionary, as passed to the search method, or an SQL object - ``values`` -- a list of values to fill in for ``%s`` in the condition. - ``table`` -- an SQL object or string giving the table to execute this query on. Defaults to the table for this TableChecker. - ``query`` -- an SQL object giving the whole query, leaving out only the ``_cur_label`` and ``_cur_limit`` parts. Note that ``condition``, ``constraint``, ``table`` and ``ratio`` will be ignored if query is provided. - ``ratio`` -- the ratio of rows in the table to run this query on. """ if values is None: values = [] label_col = Identifier(self.label_col) if query is None: if table is None: table = self.table.search_table if isinstance(table, string_types): if ratio == 1: table = Identifier(table) else: table = SQL("{0} TABLESAMPLE SYSTEM({1})").format( Identifier(table), Literal(ratio)) # WARNING: the following is not safe from SQL injection, so be careful if you copy this code query = SQL("SELECT {0} FROM {1} WHERE {2}").format( label_col, table, condition) if not isinstance(constraint, Composable): constraint, cvalues = self.table._parse_dict(constraint) if constraint is not None: values = values + cvalues if constraint is not None: query = SQL("{0} AND {1}").format(query, constraint) if self._cur_label is not None: query = SQL("{0} AND {1} = %s").format(query, label_col) values += [self._cur_label] query = SQL("{0} LIMIT %s").format(query) cur = db._execute(query, values + [self._cur_limit]) return [rec[0] for rec in cur]
def check_newspaces_num_spaces(self): """ check that num_spaces matches the number of records in mf_newspaces with this level and weight and positive dimension """ # TIME about 2s # TODO: check that the number of char_orbits of level N and weight k is the same as the number of rows in mf_newspaces with this weight and level. The following doesn't work since num_spaces counts spaces with positive dimension # self.check_crosstable_count('char_dir_orbits', 'num_spaces', ['level', 'weight_parity'], ['modulus', 'parity'])) return self._run_crosstable(SQL("COUNT(*)"), 'mf_newspaces', 'num_spaces', ['level', 'weight'], extra=SQL(" AND t2.dim > 0"))
def check_self_dual_by_embeddings(self): """ if is_self_dual is present but field_poly is not present, check that embedding data in mf_hecke_cc is consistent with is_self_dual """ # TIME > 1300s # I expect this to take about 3/4h # we a create a temp table as we can't use aggregates under WHERE db._execute(SQL("CREATE TEMP TABLE tmp_cc AS SELECT t1.hecke_orbit_code, every(0 = all(t1.an_normalized[:][2:2] )) self_dual FROM mf_hecke_cc t1, mf_newforms t2 WHERE t1.hecke_orbit_code=t2.hecke_orbit_code AND t2.is_self_dual AND t2.field_poly is NULL GROUP BY t1.hecke_orbit_code")) query = SQL("SELECT t1.label FROM mf_newforms t1, tmp_cc t2 WHERE NOT t2.self_dual AND t1.hecke_orbit_code = t2.hecke_orbit_code") return self._run_query(query=query)
def _make_join(self, join1, join2): if not isinstance(join1, list): join1 = [join1] if join2 is None: join2 = join1 elif not isinstance(join2, list): join2 = [join2] if len(join1) != len(join2): raise ValueError("join1 and join2 must have the same length") join1 = [self._make_sql(j, "t1") for j in join1] join2 = [self._make_sql(j, "t2") for j in join2] return SQL(" AND ").join( SQL("{0} = {1}").format(j1, j2) for j1, j2 in zip(join1, join2))
def check_artin_degree(self): """ if present, we'd like to check that artin_field has Galois group of order artin_degree this is hard, so we just check that the degree of the polynomial is a divisor of artin_degree """ return self.check_divisible('artin_degree', SQL("array_length(artin_field, 1) - 1"), constraint={'artin_field':{'$exists':True}})
def check_inner_twists(self): """ check that inner_twists is consistent with inner_twist_count and that both are present if field_poly is set """ return (self._run_query(SQL("inner_twist_count != (SELECT SUM(s) FROM UNNEST((inner_twists[1:array_length(inner_twists,1)][2:2])) s)"), constraint={'inner_twist_count':{'$gt':0}}) + self.check_values({'inner_twists':{'$exists':True}, 'inner_twist_count':{'$gt':0}}, {'field_poly':{'$exists':True}}))
def check_embedding_m(self): """ check that embedding_m is consistent with conrey_label and embedding_index """ # About 250s query = SQL("WITH foo AS ( SELECT label, embedding_m, ROW_NUMBER() OVER ( PARTITION BY hecke_orbit_code ORDER BY conrey_index, embedding_index) FROM mf_hecke_cc) SELECT label FROM foo WHERE embedding_m != row_number") return self._run_query(query=query)
def check_an_length(self): """ check that an_normalized is a list of pairs of doubles of length at least 1000 """ # TIME > 3600s return self._run_query(SQL("array_length({0}, 1) < 1000 OR array_length({0}, 2) != 2").format( Identifier("an_normalized")))
def check_angles_interval(self): """ check that angles lie in (-0.5,0.5] """ # about 20 min query = SQL("array_min(angles) <= -0.5 OR array_max(angles) > 0.5") return self._run_query(query)
def check_array_len_gte_constant(self, column, limit, constraint={}): """ Length of array greater than or equal to limit """ return self._run_query( SQL("array_length({0}, 1) < %s").format(Identifier(column)), constraint, [limit])
def check_array_product(self, array_column, value_column, constraint={}): """ Checks that prod(array_column) == value_column """ return self._run_query( SQL("(SELECT PROD(s) FROM UNNEST({0}) s) != {1}").format( Identifier(array_column), Identifier(value_column)), constraint)
def check_trace_bound1(self): """ check that trace_bound = 1 if hecke_orbit_dims set and all dims distinct """ # TIME about 2s return self._run_query( SQL("hecke_orbit_dims!= ARRAY(SELECT DISTINCT UNNEST(hecke_orbit_dims) ORDER BY 1)" ), {'trace_bound': 1})
def check_trace_bound1_from_dims(self): """ check that trace_bound = 1 if hecke_orbit_dims set and all dims distinct """ # TIME about 2s return self._run_query( SQL("hecke_orbit_dims IS NOT NULL AND hecke_orbit_dims = ARRAY(SELECT DISTINCT UNNEST(hecke_orbit_dims) ORDER BY 1) AND num_forms > 1 AND trace_bound != 1" ))
def check_field_poly_is_cyclotomic(self): """ if hecke_ring_cyclotomic_generator > 0, check that field_poly_is_cyclotomic is set in mf_newforms record. """ # TIME about 2s # could be done with _run_crosstable from mf_newforms query = SQL("SELECT t1.label FROM mf_hecke_nf t1, mf_newforms t2 WHERE NOT t2.field_poly_is_cyclotomic AND t1.hecke_ring_cyclotomic_generator > 0 AND t1.label = t2.label") return self._run_query(query=query)
def check_parity_value(self): """ the value on -1 should agree with the parity for this char_orbit_index in char_dir_orbits """ # TIME about 500s return (self._run_crosstable(SQL("2*t2.values[1][2]"), 'char_dir_values', 'order', 'orbit_label', constraint={'parity': -1}, subselect_wrapper="ALL") + self._run_crosstable(SQL("t2.values[1][2]"), 'char_dir_values', 0, 'orbit_label', constraint={'parity': 1}, subselect_wrapper="ALL"))
def check_is_primitive(self): """ check that is_primitive is true if and only if modulus=conductor """ # TIME about 1s # Since we can't use constraint on modulus=conductor, we construct the constraint directly return self.check_iff({'is_primitive': True}, SQL("modulus = conductor"))
def check_letter_code(self, index_column, letter_code_column, constraint={}): return self._run_query( SQL("{0} != to_base26({1} - 1)").format( Identifier(letter_code_column), Identifier(index_column)), constraint)
def check_analytic_rank(self): """ if analytic_rank is present, check that matches order_of_vanishing in lfunctions record, and is are constant across the orbit """ # TIME about 1200s db._execute( SQL("CREATE TEMP TABLE temp_mftbl AS SELECT label, string_to_array(label,'.'), analytic_rank, dim FROM mf_newforms WHERE analytic_rank is NOT NULL" )) db._execute( SQL("CREATE TEMP TABLE temp_ltbl AS SELECT order_of_vanishing,(string_to_array(origin,'/'))[5:8],degree FROM lfunc_lfunctions WHERE origin LIKE 'ModularForm/GL2/Q/holomorphic%' and degree=2" )) db._execute( SQL("CREATE INDEX temp_ltbl_string_to_array_index on temp_ltbl using HASH(string_to_array)" )) db._execute( SQL("CREATE INDEX temp_mftbl_string_to_array_index on temp_mftbl using HASH(string_to_array)" )) query = SQL( "SELECT label FROM temp_mftbl t1 WHERE array_fill(t1.analytic_rank::smallint, ARRAY[t1.dim]) != ARRAY(SELECT t2.order_of_vanishing FROM temp_ltbl t2 WHERE t2.string_to_array = t1.string_to_array )" ) res = self._run_query(query=query) db._execute(SQL("DROP TABLE temp_mftbl")) db._execute(SQL("DROP TABLE temp_ltbl")) return res
def check_sum_AL_dims(self): """ If AL_dims is set, check that AL_dims sum to dim """ # TIME 0.3 s query = SQL( r'SELECT label FROM mf_newspaces t1 WHERE t1.dim !=( SELECT SUM(s.d) FROM (SELECT ((jsonb_array_elements("AL_dims"))->>1)::int d FROM mf_newspaces t2 WHERE t2.label = t1.label) s ) AND "AL_dims" is not NULL' ) return self._run_query(query=query)
def check_field_poly(self): """ if field_poly is set, check that is monic and of degree dim """ return self._run_query( SQL('array_length(field_poly, 1) = 1 AND field_poly[dim + 1] = 1' ), {'field_poly': { '$exists': True }})
def check_conrey_indexes(self): """ when grouped by hecke_orbit_code, check that conrey_indexs match conrey_indexes, embedding_index ranges from 1 to relative_dim (when grouped by conrey_index), and embedding_m ranges from 1 to dim """ # ps: In check_embedding_m and check_embedding_index, we already checked that embedding_m and check_embedding_index are in an increasing sequence query = SQL("WITH foo as (SELECT hecke_orbit_code, sort(array_agg(DISTINCT conrey_index)) conrey_indexes, count(DISTINCT embedding_index) relative_dim, count(embedding_m) dim FROM mf_hecke_cc GROUP BY hecke_orbit_code) SELECT t1.label FROM mf_newforms t1, foo WHERE t1.hecke_orbit_code = foo.hecke_orbit_code AND (t1.conrey_indexes != foo.conrey_indexes OR t1.relative_dim != foo.relative_dim OR t1.dim != foo.dim)") return self._run_query(query=query)
def _run_crosstable(self, quantity, other_table, col, join1, join2=None, constraint={}, values=[], subselect_wrapper="", extra=None): """ Checks that `quantity` matches col INPUT: - ``quantity`` -- a column name or an SQL object giving some quantity from the ``other_table`` - ``other_table`` -- the name of the other table - ``col`` -- an integer or the name of column to check against ``quantity`` - ``join1`` -- a column or list of columns on self on which we will join the two tables - ``join2`` -- a column or list of columns (default: `None`) on ``other_table`` on which we will join the two tables. If `None`, we take ``join2`` = ``join1``, see `_make_join` - ``constraint`` -- a dictionary, as passed to the search method - ``subselect_wrapper`` -- a string, e.g., "ARRAY" to convert the inner select query - ``extra`` -- SQL object to append to the subquery. This can hold additional constraints or set the sort order for the inner select query """ # WARNING: since it uses _run_query, this whole function is not safe against SQL injection, # so should only be run locally in data validation join = self._make_join(join1, join2) col = self._make_sql(col, "t1") if isinstance(quantity, basestring): quantity = SQL("t2.{0}").format(Identifier(quantity)) # This is unsafe subselect_wrapper = SQL(subselect_wrapper) if extra is None: extra = SQL("") condition = SQL( "{0} != {1}(SELECT {2} FROM {3} t2 WHERE {4}{5})").format( col, subselect_wrapper, quantity, Identifier(other_table), join, extra) return self._run_query(condition, constraint, values, table=SQL("{0} t1").format( Identifier(self.table.search_table)))