def parse_range3(arg, split0 = False): if type(arg) == str: arg = arg.replace(' ', '') if ',' in arg: return sum([parse_range3(a, split0) for a in arg.split(',')],[]) elif '-' in arg[1:]: ix = arg.index('-', 1) start, end = arg[:ix], arg[ix + 1:] if start: low = ZZ(str(start)) else: raise SearchParsingError("It needs to be an integer (such as 25), a range of integers (such as 2-10 or 2..10), or a comma-separated list of these (such as 4,9,16 or 4-25, 81-121).") if end: high = ZZ(str(end)) else: raise SearchParsingError("It needs to be an integer (such as 25), a range of integers (such as 2-10 or 2..10), or a comma-separated list of these (such as 4,9,16 or 4-25, 81-121).") if low == high: return [low] if split0 and low < 0 and high > 0: if low == -1: m = [low] else: m = [low,ZZ(-1)] if high == 1: p = [high] else: p = [ZZ(1),high] return [m,p] else: return [[low, high]] else: return [ZZ(str(arg))]
def parse_galgrp(inp, query, qfield, err_msg=None, list_ok=True): try: if list_ok: from lmfdb.galois_groups.transitive_group import complete_group_codes gcs = complete_group_codes(inp) else: from lmfdb.galois_groups.transitive_group import complete_group_code gcs = complete_group_code(inp.upper()) galfield, nfield = qfield if nfield and nfield not in query: nvals = list(set([s[0] for s in gcs])) if len(nvals) == 1: query[nfield] = nvals[0] else: query[nfield] = {'$in': nvals} # if nfield was already in the query, we could try to intersect it with nvals cands = ['{}T{}'.format(s[0],s[1]) for s in gcs] if len(cands) == 1: query[galfield] = cands[0] else: query[galfield] = {'$in': cands} except NameError: if re.match(r'^[ACDS]\d+$', inp): raise SearchParsingError("The requested group is not in the database") if err_msg: raise SearchParsingError(err_msg) else: raise SearchParsingError("It needs to be a list made up of GAP id's, such as [4,1] or [12,5], transitive groups in nTj notation, such as 5T1, and <a title = 'Galois group labels' knowl='nf.galois_group.name'>group labels</a>")
def parse_equality_constraints(inp, query, qfield, prefix='a', parse_singleton=int, shift=0): # Note that postgres -> index is one-based for piece in inp.split(','): piece = piece.strip().split('=') if len(piece) != 2: raise SearchParsingError("It must be a comma separated list of expressions of the form %sN=T"%(prefix)) n,t = piece n = n.strip() if not n.startswith(prefix): raise SearchParsingError("%s does not start with %s"%(n, prefix)) n = int(n[len(prefix):]) + shift t = parse_singleton(t.strip()) query[qfield + '.%s'%n] = t
def prep_raw(inp, names={}): """ Prepare an input string for being passed as a ``$raw`` value to the database search. INPUT: - ``inp`` -- a string from the website. Aleady split up by commas and .. range indicators - ``names`` -- a dictionary providing a translation from user input to column names. Only keys in the dictionary are accepted. OUTPUT: A string with implicit multiplications inserted and full column names substituted for short names This function will raise a SearchParsingError if there is a syntax error or if there is a variable that's not in the names list """ inp = implicit_mul(inp, level=10) # level = 10 includes (a+b)(c+d) -> (a+b)*(c+d) which isn't safe in Sage but should be okay for us def filtered_var(s): if s not in names: raise SearchParsingError("%s is not a column of this table" % s) return var(s) # We use Sage's parser to make sure that the user input is well formed P = Parser(make_var=filtered_var) try: P.parse_expression(inp) except SyntaxError: raise SearchParsingError("syntax error") pieces = re.split(r'([A-Za-z_]+)', inp) processed = [] for piece in pieces: if piece in names: processed.append(names[piece]) else: processed.append(piece) return {'$raw': "".join(processed)}
def parse_container(inp, query, qfield): inp = inp.replace('T','t') format_ok = re.match(r'^\d+(t\d+)?$',inp) if format_ok: query[qfield] = str(inp) else: raise SearchParsingError("You must specify a permutation representation, such as 6T13" )
def parse_bracketed_rats(inp, query, qfield, maxlength=None, exactlength=None, split=True, process=None, listprocess=None, keepbrackets=False, extractor=None): if (not BRACKETED_RAT_RE.match(inp) or (maxlength is not None and inp.count(',') > maxlength - 1) or (exactlength is not None and inp.count(',') != exactlength - 1) or (exactlength is not None and inp == '[]' and exactlength > 0)): if exactlength == 2: lstr = "pair of rational numbers" example = "[2,3/2] or [3,3]" elif exactlength == 1: lstr = "list of 1 rational number" example = "[2/5]" elif exactlength is not None: lstr = "list of %s rational numbers" % exactlength example = str(list(range(2,exactlength+2))).replace(", ","/13,") + " or " + str([3]*exactlength).replace(", ","/4,") elif maxlength is not None: lstr = "list of at most %s rational numbers" % maxlength example = str(list(range(2,maxlength+2))).replace(", ","/13,") + " or " + str([2]*max(1, maxlength-2)).replace(", ","/41,") else: lstr = "list of rational numbers" example = "[1/7,2,3] or [5,6/71]" raise SearchParsingError("It needs to be a %s in square brackets, such as %s." % (lstr, example)) else: if inp == '[]': # fixes bug in the code below (split never returns an empty list) if split: query[qfield] = [] else: query[qfield] = '' return L = [QQ(a) for a in inp[1:-1].split(',')] if process is not None: L = [process(a) for a in L] if listprocess is not None: L = listprocess(L) if extractor is not None: for qf, v in zip(qfield, extractor(L)): if qf in query and query[qf] != v: raise SearchParsingError("Inconsistent specification of %s: %s vs %s"%(qf, query[qf], v)) query[qf] = v elif split: query[qfield] = L else: inp = '[%s]'%','.join([str(a) for a in L]) if keepbrackets: inp = inp.replace("[","['").replace("]","']").replace(",","','") query[qfield] = inp else: query[qfield] = inp[1:-1]
def raise_power(ab): if ab.count("e") == 0: return ZZ(ab) elif ab.count("e") == 1: a,b = ab.split("e") return ZZ(a)**ZZ(b) else: raise SearchParsingError("Malformed absolute discriminant. It must be a sequence of strings AeB for A and B integers, joined by _s. For example, 2e7_3e5_11.")
def parse_primes(inp, query, qfield, mode=None, radical=None): format_ok = LIST_POSINT_RE.match(inp) if format_ok: primes = [int(p) for p in inp.split(',')] format_ok = all([ZZ(p).is_prime(proof=False) for p in primes]) if not format_ok: raise SearchParsingError("It needs to be a prime (such as 5), or a comma-separated list of primes (such as 2,3,11).") _parse_subset(primes, query, qfield, mode, radical, prod)
def parse_hmf_weight(inp, query, qfield): parallel_field, normal_field = qfield try: query[parallel_field] = int(inp) except ValueError: try: query[normal_field] = str(split_list(inp)) except ValueError: raise SearchParsingError("It must be either an integer (parallel weight) or a comma separated list of integers enclosed in brackets, such as 2, or [2,2], or [2,4,6].")
def parse_bool(inp, query, qfield, process=None, blank=[]): if inp in blank: return if process is None: process = lambda x: x if inp in ["True", "yes", "1", "even"]: # artin reps use parse_bool for an is_even parity field query[qfield] = process(True) elif inp in ["False", "no", "-1", "0", "odd"]: query[qfield] = process(False) elif inp == "Any": # On the Galois groups page, these indicate "All" pass else: raise SearchParsingError("It must be True or False.")
def make_sub_query(part): sub_query = {} part = part.strip() if not part: raise SearchParsingError("Every count specified must be nonempty.") if part[0] == '[': ispec = initial_segment + [x.strip() for x in part[1:-1].split(',')] if not all(ispec): raise SearchParsingError("Every count specified must be nonempty.") if len(ispec) == 1 and first_field is not None: sub_query[first_field] = parse_one(ispec[0])[1] else: if any('-' in x[1:] for x in ispec): raise SearchParsingError("Ranges not supported.") sub_query[qfield] = {'$startswith':' '.join(ispec) + ' '} elif first_field is not None: sub_query[first_field] = parse_one(part)[1] else: if '-' in part[1:]: raise SearchParsingError("Ranges not supported.") sub_query[qfield] = {'$startswith':'%s %s '%(' '.join(initial_segment), part)} return sub_query
def parse_restricted(inp, query, qfield, allowed, process=None, blank=[]): if inp in blank: return if process is None: process = lambda x: x allowed = [str(a) for a in allowed] if inp not in allowed: if len(allowed) == 0: allowed_str = "unspecified" if len(allowed) == 1: allowed_str = allowed[0] elif len(allowed) == 2: allowed_str = " or ".join(allowed) else: allowed_str = ", ".join(allowed[:-1]) + " or " + allowed[-1] raise SearchParsingError("It must be %s"%allowed_str) query[qfield] = process(inp)
def parse_floats(inp, query, qfield, allow_singletons=False): parse_endpoint = float if allow_singletons: msg = "It needs to be an float (such as 25 or 25.0), a range of floats (such as 2.1-8.7), or a comma-separated list of these (such as 4,9.2,16 or 4-25.1, 81-121)." def parse_singleton(a): if isinstance(a, string_types) and '.' in a: prec = len(a) - a.find('.') - 1 else: prec = 0 a = float(a) return {'$gte': a - 0.5 * 10**(-prec), '$lte': a + 0.5 * 10**(-prec)} else: msg = "It must be a range of floats (such as 2.1-8.7) or a comma-separated list of these (such as 4-25.1, 81-121)." def parse_singleton(a): raise SearchParsingError(msg) if LIST_FLOAT_RE.match(inp): collapse_ors(parse_range2(inp, qfield, parse_singleton, parse_endpoint), query) else: raise SearchParsingError(msg)
def parse_signed_ints(inp, query, qfield, parse_one=None): if parse_one is None: parse_one = lambda x: (int(x.sign()), int(x.abs())) if x != 0 else (1,0) sign_field, abs_field = qfield if SIGNED_LIST_RE.match(inp): parsed = parse_range3(inp, split0 = True) # if there is only one part, we don't need an $or if len(parsed) == 1: parsed = parsed[0] if type(parsed) == list: s0, d0 = parse_one(parsed[0]) s1, d1 = parse_one(parsed[1]) if s0 < 0: query[abs_field] = {'$gte': d1, '$lte': d0} else: query[abs_field] = {'$lte': d1, '$gte': d0} else: s0, d0 = parse_one(parsed) query[abs_field] = d0 if sign_field is not None: query[sign_field] = s0 else: iquery = [] for x in parsed: if type(x) == list: if len(x) == 1: s0, abs_D = parse_one(x[0]) else: s0, d0 = parse_one(x[0]) s1, d1 = parse_one(x[1]) if s0 < 0: abs_D = {'$gte': d1, '$lte': d0} else: abs_D = {'$lte': d1, '$gte': d0} else: s0, abs_D = parse_one(x) if sign_field is None: iquery.append({abs_field: abs_D}) else: iquery.append({sign_field: s0, abs_field: abs_D}) collapse_ors(['$or', iquery], query) else: raise SearchParsingError("It needs to be an integer (such as 25), a range of integers (such as 2-10 or 2..10), or a comma-separated list of these (such as 4,9,16 or 4-25, 81-121).")
def _parse_subset(inp, query, qfield, mode, radical, product): def add_condition(kwd): if qfield in query: query[qfield][kwd] = inp else: query[qfield] = {kwd: inp} if mode == 'exclude': add_condition('$notcontains') elif mode == 'subset': # sadly, jsonb GIN indexes don't support <@, so we don't want to use # $containedin if we can help it. # Even more sadly, even switching to querying on the radical doesn't help, # since the query planner still uses an index scan on the primary key. #if len(inp) <= 5 and radical is not None: # if radical in query: # raise SearchParsingError("Cannot specify containment and equality simultaneously") # query[radical] = {'$or': [product(X) for X in subsets(inp)]} #else: add_condition('$containedin') elif mode == 'include' or not mode: # include is the default add_condition('$contains') elif mode == 'exactly': if radical is not None: query[radical] = product(inp) return inp = sorted(inp) if inp: dup_free = [inp[0]] for i,x in enumerate(inp[1:]): if x != inp[i]: dup_free.append(x) else: dup_free = [] if qfield in query: raise SearchParsingError("Cannot specify containment and equality simultaneously") query[qfield] = dup_free else: raise ValueError("Unrecognized mode: programming error in LMFDB code")
def __call__(self, info, query, field=None, name=None, qfield=None, *args, **kwds): try: if field is None: field=self.default_field inp = info.get(field) if not inp: return if name is None: if self.default_name is None: name = field.replace('_',' ').capitalize() else: name = self.default_name inp = str(inp) if SPACES_RE.search(inp): raise SearchParsingError("You have entered spaces in between digits. Please add a comma or delete the spaces.") inp = clean_input(inp, self.clean_spaces) if qfield is None: if field is None: qfield = self.default_qfield else: qfield = field if self.prep_ranges: inp = prep_ranges(inp) if self.prep_plus: inp = inp.replace('+','') if self.pass_name: self.f(inp, query, name, qfield, *args, **kwds) else: self.f(inp, query, qfield, *args, **kwds) if self.clean_info: info[field] = inp except (ValueError, AttributeError, TypeError) as err: if self.error_is_safe: flash_error("<span style='color:black'>%s</span> is not a valid input for <span style='color:black'>%s</span>. "+str(err)+".", inp, name) else: flash_error("<span style='color:black'>%s</span> is not a valid input for <span style='color:black'>%s</span>. %s", inp, name, str(err)) info['err'] = '' raise
def filtered_var(s): if s not in names: raise SearchParsingError("%s is not a column of this table" % s) return var(s)
def parse_singleton(a): raise SearchParsingError(msg)
def parse_nf_elt(inp, query, name, qfield, field_label='field_label'): if field_label not in query: raise SearchParsingError("You must specify a field when searching by %s"%name) deg = int(query[field_label].split('.')[0]) query[qfield] = pol_string_to_list(inp, deg=deg)
def notq(): raise SearchParsingError(r"The rational numbers $\Q$ cannot be a proper intermediate field.")
def input_to_subfield(inp): def finish(result): return '.'.join([str(z) for z in result]) def notq(): raise SearchParsingError(r"The rational numbers $\Q$ cannot be a proper intermediate field.") # Change unicode dash with minus sign inp = inp.replace(u'\u2212', '-') # remove non-ascii characters from inp # we need to encode and decode for Python 3, as 'str' object has no attribute 'decode' inp = re.sub(r'[^\x00-\x7f]', r'', inp) if len(inp) == 0: return None # Do we have a nf label if re.match(r'\d+\.\d+\.[0-9e_]+\.\d+',inp): from lmfdb import db myfield = db.nf_fields.lookup(inp) if myfield: return finish(myfield['coeffs']) else: raise SearchParsingError("It is not the label for a subfield in the database.") F = inp.lower() # keep original if needed # Is it a polynomial if 'x' in F: F1 = F.replace('^', '**') R = PolynomialRing(ZZ, 'x') pol = PolynomialRing(QQ,'x')(str(F1)) pol *= pol.denominator() if not pol.is_irreducible(): raise SearchParsingError("It is not an irreducible polynomial.") coeffs = R(pari(pol).polredabs()).coefficients(sparse=False) if coeffs == [0,1]: notq() return finish(coeffs) # Nicknames if F == 'q': notq() if F in ['qi', 'q(i)']: return '1.0.1' if F[0] == 'q': if '(' in F and ')' in F: F=F.replace('(','').replace(')','') inp=inp.replace('(','').replace(')','') if F[1:5] in ['sqrt', 'root']: try: d = ZZ(str(F[5:])).squarefree_part() except (TypeError, ValueError): d = 0 if d == 0 or d == 1: raise SearchParsingError("After {0}, the remainder must be a nonzero integer which is not a perfect square. Use {0}5 or {0}-11 for example.".format(inp[:5])) # Recursion has it use polredabs to get the polynomial return input_to_subfield("x^2 - (%s)" % d) # Look for cyclotomic if F[0:5] == 'qzeta': if '_' in F: F = F.replace('_','') match_obj = re.match(r'^qzeta(\d+)(\+|plus)?$', F) if not match_obj: raise SearchParsingError("After {0}, the remainder must be a positive integer or a positive integer followed by '+'. Use {0}5 or {0}19+, for example.".format(F[:5])) d = ZZ(str(match_obj.group(1))) if d % 4 == 2: d /= 2 # Q(zeta_6)=Q(zeta_3), etc) if d < 1: raise SearchParsingError("After {0}, the remainder must be a positive integer or a positive integer followed by '+'. Use {0}5 or {0}19+, for example.".format(F[:5])) if d==1: # asking for Q notq() if match_obj.group(2): # asking for the totally real field from lmfdb.number_fields.web_number_field import rcyclolookup if d < 5: # again, asking for subfield Q notq() if d in rcyclolookup: return input_to_subfield(rcyclolookup[d]) else: raise SearchParsingError("Subfield %s is not available." % F) f = pari.polcyclo(d) return input_to_subfield(str(f)) # Want polcyclo here raise SearchParsingError('%s is not in the database.' % F) f = pari.polcyclo(d) return input_to_subfield(str(f)) raise SearchParsingError('It is not a valid field nickname or label, or a defining polynomial.')
def nf_string_to_label(FF): # parse Q, Qsqrt2, Qsqrt-4, Qzeta5, etc if FF in ['q', 'Q']: return '1.1.1.1' if FF.lower() in ['qi', 'q(i)']: return '2.0.4.1' # Change unicode dash with minus sign FF = FF.replace(u'\u2212', '-') # remove non-ascii characters from F # we need to encode and decode for Python 3, as 'str' object has no attribute 'decode' # Remove non-ascii characters FF = re.sub(r'[^\x00-\x7f]', r'', FF) F = FF.lower() # keep original if needed if len(F) == 0: raise SearchParsingError("Entry for the field was left blank. You need to enter a field label, field name, or a polynomial.") if F[0] == 'q': if '(' in F and ')' in F: F=F.replace('(','').replace(')','') if F[1:5] in ['sqrt', 'root']: try: d = ZZ(str(F[5:])).squarefree_part() except (TypeError, ValueError): d = 0 if d == 0: raise SearchParsingError("After {0}, the remainder must be a nonzero integer. Use {0}5 or {0}-11 for example.".format(FF[:5])) if d == 1: return '1.1.1.1' if d % 4 in [2, 3]: D = 4 * d else: D = d absD = D.abs() s = 0 if D < 0 else 2 return '2.%s.%s.1' % (s, str(absD)) if F[0:5] == 'qzeta': if '_' in F: F = F.replace('_','') match_obj = re.match(r'^qzeta(\d+)(\+|plus)?$', F) if not match_obj: raise SearchParsingError("After {0}, the remainder must be a positive integer or a positive integer followed by '+'. Use {0}5 or {0}19+, for example.".format(F[:5])) d = ZZ(str(match_obj.group(1))) if d % 4 == 2: d /= 2 # Q(zeta_6)=Q(zeta_3), etc) if match_obj.group(2): # asking for the totally real field from lmfdb.number_fields.web_number_field import rcyclolookup if d in rcyclolookup: return rcyclolookup[d] else: raise SearchParsingError('%s is not in the database.' % F) # Now not the totally real subfield from lmfdb.number_fields.web_number_field import cyclolookup if d in cyclolookup: return cyclolookup[d] else: raise SearchParsingError('%s is not in the database.' % F) raise SearchParsingError('It is not a valid field name or label, or a defining polynomial.') # check if a polynomial was entered F = F.replace('X', 'x') if 'x' in F: F1 = F.replace('^', '**') # print F from lmfdb.number_fields.number_field import poly_to_field_label F1 = poly_to_field_label(F1) if F1: return F1 raise SearchParsingError('%s does not define a number field in the database.'%F) # Expand out factored labels, like 11.11.11e20.1 if not re.match(r'\d+\.\d+\.[0-9e_]+\.\d+',F): raise SearchParsingError("A number field label must be of the form d.r.D.n, such as 2.2.5.1.") parts = F.split(".") def raise_power(ab): if ab.count("e") == 0: return ZZ(ab) elif ab.count("e") == 1: a,b = ab.split("e") return ZZ(a)**ZZ(b) else: raise SearchParsingError("Malformed absolute discriminant. It must be a sequence of strings AeB for A and B integers, joined by _s. For example, 2e7_3e5_11.") parts[2] = str(prod(raise_power(c) for c in parts[2].split("_"))) return ".".join(parts)
def parse_posints(inp, query, qfield, parse_singleton=int): if LIST_POSINT_RE.match(inp): collapse_ors(parse_range2(inp, qfield, parse_singleton), query) else: raise SearchParsingError("It needs to be a positive integer (such as 25), a range of positive integers (such as 2-10 or 2..10), or a comma-separated list of these (such as 4,9,16 or 4-25, 81-121).")
def parse_rats(inp, query, qfield, process=None): if process is None: process = lambda x: x if LIST_RAT_RE.match(inp): collapse_ors(parse_range2rat(inp, qfield, process), query) else: raise SearchParsingError("It needs to be a non-negative rational number (such as 4/3), a range of non-negative rational numbers (such as 2-5/2 or 2.5..10), or a comma-separated list of these (such as 4,9,16 or 4-25, 81-121).")
def parse_bracketed_posints(inp, query, qfield, maxlength=None, exactlength=None, split=True, process=None, listprocess=None, check_divisibility=None, keepbrackets=False, extractor=None): if (not BRACKETED_POSINT_RE.match(inp) or (maxlength is not None and inp.count(',') > maxlength - 1) or (exactlength is not None and inp.count(',') != exactlength - 1) or (exactlength is not None and inp == '[]' and exactlength > 0)): if exactlength == 2: lstr = "pair of integers" example = "[6,2] or [32,32]" elif exactlength == 1: lstr = "list of 1 integer" example = "[2]" elif exactlength is not None: lstr = "list of %s integers" % exactlength example = str(list(range(2,exactlength+2))).replace(" ","") + " or " + str([3]*exactlength).replace(" ","") elif maxlength is not None: lstr = "list of at most %s integers" % maxlength example = str(list(range(2,maxlength+2))).replace(" ","") + " or " + str([2]*max(1, maxlength-2)).replace(" ","") else: lstr = "list of integers" example = "[1,2,3] or [5,6]" raise SearchParsingError("It needs to be a %s in square brackets, such as %s." % (lstr, example)) else: if inp == '[]': # fixes bug in the code below (split never returns an empty list) if split: query[qfield] = [] else: query[qfield] = '' return L = [int(a) for a in inp[1:-1].split(',')] if check_divisibility == 'decreasing': # Check that each entry divides the previous #L = [int(a) for a in inp[1:-1].split(',')] for i in range(len(L)-1): if L[i] % L[i+1] != 0: raise SearchParsingError("Each entry must divide the previous, such as [4,2].") elif check_divisibility == 'increasing': # Check that each entry divides the previous # L = [int(a) for a in inp[1:-1].split(',')] for i in range(len(L)-1): if L[i+1] % L[i] != 0: raise SearchParsingError("Each entry must divide the next, such as [2,4].") if process is not None: L = [process(a) for a in L] if listprocess is not None: L = listprocess(L) if extractor is not None: # This is currently only used by number field signatures # It assumes degree is fairly simple in the query for qf, v in zip(qfield, extractor(L)): if qf in query: # If used more generally we should check every modifier # value -1 is used to force empty search results if isinstance(query[qf], dict): if (('$in' in query[qf] and not v in query[qf]['$in']) or ('$gt' in query[qf] and not v > query[qf]['$gt']) or ('$gte' in query[qf] and not v >= query[qf]['$gte']) or ('$lt' in query[qf] and not v < query[qf]['$lt']) or ('$lte' in query[qf] and not v <= query[qf]['$lte'])): query[qf] = -1 else: query[qf] = v else: if v != query[qf]: query[qf] = -1 else: query[qf] = v elif split: query[qfield] = L else: inp = '[%s]'%','.join([str(a) for a in L]) query[qfield] = inp if keepbrackets else inp[1:-1]
def parse_rational(inp, query, qfield): if QQ_RE.match(inp): query[qfield] = str(QQ(inp)) else: raise SearchParsingError("It needs to be a rational number.")