class Urlizer: """ Convert any URLs in text into clickable links. Work on http://, https://, www. links, and also on links ending in one of the original seven gTLDs (.com, .edu, .gov, .int, .mil, .net, and .org). Links can have trailing punctuation (periods, commas, close-parens) and leading punctuation (opening parens) and it'll still do the right thing. """ trailing_punctuation_chars = ".,:;!" wrapping_punctuation = [("(", ")"), ("[", "]")] simple_url_re = _lazy_re_compile(r"^https?://\[?\w", re.IGNORECASE) simple_url_2_re = _lazy_re_compile( r"^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org)($|/.*)$", re.IGNORECASE ) word_split_re = _lazy_re_compile(r"""([\s<>"']+)""") mailto_template = "mailto:{local}@{domain}" url_template = '<a href="{href}"{attrs}>{url}</a>' def __call__(self, text, trim_url_limit=None, nofollow=False, autoescape=False): """ If trim_url_limit is not None, truncate the URLs in the link text longer than this limit to trim_url_limit - 1 characters and append an ellipsis. If nofollow is True, give the links a rel="nofollow" attribute. If autoescape is True, autoescape the link text and URLs. """ safe_input = isinstance(text, SafeData) words = self.word_split_re.split(str(text)) return "".join( [ self.handle_word( word, safe_input=safe_input, trim_url_limit=trim_url_limit, nofollow=nofollow, autoescape=autoescape, ) for word in words ] ) def handle_word( self, word, *, safe_input, trim_url_limit=None, nofollow=False, autoescape=False, ): if "." in word or "@" in word or ":" in word: # lead: Punctuation trimmed from the beginning of the word. # middle: State of the word. # trail: Punctuation trimmed from the end of the word. lead, middle, trail = self.trim_punctuation(word) # Make URL we want to point to. url = None nofollow_attr = ' rel="nofollow"' if nofollow else "" if self.simple_url_re.match(middle): url = smart_urlquote(html.unescape(middle)) elif self.simple_url_2_re.match(middle): url = smart_urlquote("http://%s" % html.unescape(middle)) elif ":" not in middle and self.is_email_simple(middle): local, domain = middle.rsplit("@", 1) try: domain = punycode(domain) except UnicodeError: return word url = self.mailto_template.format(local=local, domain=domain) nofollow_attr = "" # Make link. if url: trimmed = self.trim_url(middle, limit=trim_url_limit) if autoescape and not safe_input: lead, trail = escape(lead), escape(trail) trimmed = escape(trimmed) middle = self.url_template.format( href=escape(url), attrs=nofollow_attr, url=trimmed, ) return mark_safe(f"{lead}{middle}{trail}") else: if safe_input: return mark_safe(word) elif autoescape: return escape(word) elif safe_input: return mark_safe(word) elif autoescape: return escape(word) return word def trim_url(self, x, *, limit): if limit is None or len(x) <= limit: return x return "%s…" % x[: max(0, limit - 1)] def trim_punctuation(self, word): """ Trim trailing and wrapping punctuation from `word`. Return the items of the new state. """ lead, middle, trail = "", word, "" # Continue trimming until middle remains unchanged. trimmed_something = True while trimmed_something: trimmed_something = False # Trim wrapping punctuation. for opening, closing in self.wrapping_punctuation: if middle.startswith(opening): middle = middle[len(opening) :] lead += opening trimmed_something = True # Keep parentheses at the end only if they're balanced. if ( middle.endswith(closing) and middle.count(closing) == middle.count(opening) + 1 ): middle = middle[: -len(closing)] trail = closing + trail trimmed_something = True # Trim trailing punctuation (after trimming wrapping punctuation, # as encoded entities contain ';'). Unescape entities to avoid # breaking them by removing ';'. middle_unescaped = html.unescape(middle) stripped = middle_unescaped.rstrip(self.trailing_punctuation_chars) if middle_unescaped != stripped: punctuation_count = len(middle_unescaped) - len(stripped) trail = middle[-punctuation_count:] + trail middle = middle[:-punctuation_count] trimmed_something = True return lead, middle, trail @staticmethod def is_email_simple(value): """Return True if value looks like an email address.""" # An @ must be in the middle of the value. if "@" not in value or value.startswith("@") or value.endswith("@"): return False try: p1, p2 = value.split("@") except ValueError: # value contains more than one @. return False # Dot must be in p2 (e.g. example.com) if "." not in p2 or p2.startswith("."): return False return True
version = Database.version_info #if version < (1, 4, 0): # raise ImproperlyConfigured('mysqlclient 1.4.0 or newer is required; you have %s.' % Database.__version__) # MySQLdb returns TIME columns as timedelta -- they are more like timedelta in # terms of actual behavior as they are signed and include days -- and Django # expects time. django_conversions = { **conversions, **{FIELD_TYPE.TIME: backend_utils.typecast_time}, } # This should match the numerical portion of the version numbers (we can treat # versions like 5.0.24 and 5.0.24a as the same). server_version_re = _lazy_re_compile(r'(\d{1,2})\.(\d{1,2})\.(\d{1,2})') class CursorWrapper: """ A thin wrapper around MySQLdb's normal cursor class that catches particular exception instances and reraises them with the correct types. Implemented as a wrapper, rather than a subclass, so that it isn't stuck to the particular underlying representation returned by Connection.cursor(). """ codes_for_integrityerror = ( 1048, # Column cannot be null 1690, # BIGINT UNSIGNED value is out of range 3819, # CHECK constraint is violated 4025, # CHECK constraint failed
from io import BytesIO from django.utils.deprecation import RemovedInDjango40Warning from django.utils.functional import SimpleLazyObject, keep_lazy_text, lazy from django.utils.regex_helper import _lazy_re_compile from django.utils.translation import gettext as _, gettext_lazy, pgettext @keep_lazy_text def capfirst(x): """Capitalize the first letter of a string.""" return x and str(x)[0].upper() + str(x)[1:] # Set up regular expressions re_words = _lazy_re_compile(r'<[^>]+?>|([^<>\s]+)', re.S) re_chars = _lazy_re_compile(r'<[^>]+?>|(.)', re.S) re_tag = _lazy_re_compile(r'<(/)?(\S+?)(?:(\s*/)|\s.*?)?>', re.S) re_newlines = _lazy_re_compile(r'\r\n|\r') # Used in normalize_newlines re_camel_case = _lazy_re_compile(r'(((?<=[a-z])[A-Z])|([A-Z](?![A-Z]|$)))') @keep_lazy_text def wrap(text, width): """ A word-wrap function that preserves existing line breaks. Expects that existing line breaks are posix newlines. Preserve all white space except added line breaks consume the space on which they break the line.
regex = _lazy_re_compile( # noqa: W605 r"^" # protocol identifier r"(?:(?:https?|ftp)://)" # user:pass authentication r"(?:[-a-z\u00a1-\uffff0-9._~%!$&'()*+,;=:]+" r"(?::[-a-z0-9._~%!$&'()*+,;=:]*)?@)?" r"(?:" r"(?P<private_ip>" # IP address exclusion # private & local networks r"(?:(?:10|127)" + ip_middle_octet + r"{2}" + ip_last_octet + r")|" r"(?:(?:169\.254|192\.168)" + ip_middle_octet + ip_last_octet + r")|" r"(?:172\.(?:1[6-9]|2\d|3[0-1])" + ip_middle_octet + ip_last_octet + r"))" r"|" # private & local hosts r"(?P<private_host>" r"(?:localhost))" r"|" # IP address dotted notation octets # excludes loopback network 0.0.0.0 # excludes reserved space >= 224.0.0.0 # excludes network & broadcast addresses # (first & last IP address of each class) r"(?P<public_ip>" r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])" r"" + ip_middle_octet + r"{2}" r"" + ip_last_octet + r")" r"|" # IPv6 RegEx from https://stackoverflow.com/a/17871737 r"\[(" # 1:2:3:4:5:6:7:8 r"([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|" # 1:: 1:2:3:4:5:6:7:: r"([0-9a-fA-F]{1,4}:){1,7}:|" # 1::8 1:2:3:4:5:6::8 1:2:3:4:5:6::8 r"([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|" # 1::7:8 1:2:3:4:5::7:8 1:2:3:4:5::8 r"([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|" # 1::6:7:8 1:2:3:4::6:7:8 1:2:3:4::8 r"([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|" # 1::5:6:7:8 1:2:3::5:6:7:8 1:2:3::8 r"([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|" # 1::4:5:6:7:8 1:2::4:5:6:7:8 1:2::8 r"([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|" # 1::3:4:5:6:7:8 1::3:4:5:6:7:8 1::8 r"[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|" # ::2:3:4:5:6:7:8 ::2:3:4:5:6:7:8 ::8 :: r":((:[0-9a-fA-F]{1,4}){1,7}|:)|" # fe80::7:8%eth0 fe80::7:8%1 # (link-local IPv6 addresses with zone index) r"fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|" r"::(ffff(:0{1,4}){0,1}:){0,1}" r"((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}" # ::255.255.255.255 ::ffff:255.255.255.255 ::ffff:0:255.255.255.255 # (IPv4-mapped IPv6 addresses and IPv4-translated addresses) r"(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|" r"([0-9a-fA-F]{1,4}:){1,4}:" r"((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}" # 2001:db8:3:4::192.0.2.33 64:ff9b::192.0.2.33 # (IPv4-Embedded IPv6 Address) r"(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])" r")\]|" # host name r"(?:(?:(?:xn--)|[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]-?)*" r"[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]+)" # domain name r"(?:\.(?:(?:xn--)|[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]-?)*" r"[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]+)*" # TLD identifier r"(?:\.(?:(?:xn--[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]{2,})|" r"[a-z\u00a1-\uffff\U00010000-\U0010ffff]{2,}))" r")" # port number r"(?::\d{2,5})?" # resource path r"(?:/[-a-z\u00a1-\uffff\U00010000-\U0010ffff0-9._~%!$&'()*+,;=:@/]*)?" # query string r"(?:\?\S*)?" # fragment r"(?:#\S*)?" r"$", re.UNICODE | re.IGNORECASE)
class SafeExceptionReporterFilter: """ Use annotations made by the sensitive_post_parameters and sensitive_variables decorators to filter out sensitive information. """ cleansed_substitute = '********************' hidden_settings = _lazy_re_compile('API|TOKEN|KEY|SECRET|PASS|SIGNATURE', flags=re.I) def cleanse_setting(self, key, value): """ Cleanse an individual setting key/value of sensitive content. If the value is a dictionary, recursively cleanse the keys in that dictionary. """ try: is_sensitive = self.hidden_settings.search(key) except TypeError: is_sensitive = False if is_sensitive: cleansed = self.cleansed_substitute elif isinstance(value, dict): cleansed = {k: self.cleanse_setting(k, v) for k, v in value.items()} elif isinstance(value, list): cleansed = [self.cleanse_setting('', v) for v in value] elif isinstance(value, tuple): cleansed = tuple([self.cleanse_setting('', v) for v in value]) else: cleansed = value if callable(cleansed): cleansed = CallableSettingWrapper(cleansed) return cleansed def get_safe_settings(self): """ Return a dictionary of the settings module with values of sensitive settings replaced with stars (*********). """ settings_dict = {} for k in dir(settings): if k.isupper(): settings_dict[k] = self.cleanse_setting(k, getattr(settings, k)) return settings_dict def get_safe_request_meta(self, request): """ Return a dictionary of request.META with sensitive values redacted. """ if not hasattr(request, 'META'): return {} return {k: self.cleanse_setting(k, v) for k, v in request.META.items()} def is_active(self, request): """ This filter is to add safety in production environments (i.e. DEBUG is False). If DEBUG is True then your site is not safe anyway. This hook is provided as a convenience to easily activate or deactivate the filter on a per request basis. """ return settings.DEBUG is False def get_cleansed_multivaluedict(self, request, multivaluedict): """ Replace the keys in a MultiValueDict marked as sensitive with stars. This mitigates leaking sensitive POST parameters if something like request.POST['nonexistent_key'] throws an exception (#21098). """ sensitive_post_parameters = getattr(request, 'sensitive_post_parameters', []) if self.is_active(request) and sensitive_post_parameters: multivaluedict = multivaluedict.copy() for param in sensitive_post_parameters: if param in multivaluedict: multivaluedict[param] = self.cleansed_substitute return multivaluedict def get_post_parameters(self, request): """ Replace the values of POST parameters marked as sensitive with stars (*********). """ if request is None: return {} else: sensitive_post_parameters = getattr(request, 'sensitive_post_parameters', []) if self.is_active(request) and sensitive_post_parameters: cleansed = request.POST.copy() if sensitive_post_parameters == '__ALL__': # Cleanse all parameters. for k in cleansed: cleansed[k] = self.cleansed_substitute return cleansed else: # Cleanse only the specified parameters. for param in sensitive_post_parameters: if param in cleansed: cleansed[param] = self.cleansed_substitute return cleansed else: return request.POST def cleanse_special_types(self, request, value): try: # If value is lazy or a complex object of another kind, this check # might raise an exception. isinstance checks that lazy # MultiValueDicts will have a return value. is_multivalue_dict = isinstance(value, MultiValueDict) except Exception as e: return '{!r} while evaluating {!r}'.format(e, value) if is_multivalue_dict: # Cleanse MultiValueDicts (request.POST is the one we usually care about) value = self.get_cleansed_multivaluedict(request, value) return value def get_traceback_frame_variables(self, request, tb_frame): """ Replace the values of variables marked as sensitive with stars (*********). """ # Loop through the frame's callers to see if the sensitive_variables # decorator was used. current_frame = tb_frame.f_back sensitive_variables = None while current_frame is not None: if (current_frame.f_code.co_name == 'sensitive_variables_wrapper' and 'sensitive_variables_wrapper' in current_frame.f_locals): # The sensitive_variables decorator was used, so we take note # of the sensitive variables' names. wrapper = current_frame.f_locals['sensitive_variables_wrapper'] sensitive_variables = getattr(wrapper, 'sensitive_variables', None) break current_frame = current_frame.f_back cleansed = {} if self.is_active(request) and sensitive_variables: if sensitive_variables == '__ALL__': # Cleanse all variables for name in tb_frame.f_locals: cleansed[name] = self.cleansed_substitute else: # Cleanse specified variables for name, value in tb_frame.f_locals.items(): if name in sensitive_variables: value = self.cleansed_substitute else: value = self.cleanse_special_types(request, value) cleansed[name] = value else: # Potentially cleanse the request and any MultiValueDicts if they # are one of the frame variables. for name, value in tb_frame.f_locals.items(): cleansed[name] = self.cleanse_special_types(request, value) if (tb_frame.f_code.co_name == 'sensitive_variables_wrapper' and 'sensitive_variables_wrapper' in tb_frame.f_locals): # For good measure, obfuscate the decorated function's arguments in # the sensitive_variables decorator's frame, in case the variables # associated with those arguments were meant to be obfuscated from # the decorated function's frame. cleansed['func_args'] = self.cleansed_substitute cleansed['func_kwargs'] = self.cleansed_substitute return cleansed.items()
from django.conf import settings from django.core import signals, signing from django.core.exceptions import DisallowedRedirect from django.core.serializers.json import DjangoJSONEncoder from django.http.cookie import SimpleCookie from django.utils import timezone from django.utils.datastructures import ( CaseInsensitiveMapping, _destruct_iterable_mapping_values, ) from django.utils.encoding import iri_to_uri from django.utils.http import http_date from django.utils.regex_helper import _lazy_re_compile _charset_from_content_type_re = _lazy_re_compile( r';\s*charset=(?P<charset>[^\s;]+)', re.I) class ResponseHeaders(CaseInsensitiveMapping): def __init__(self, data): """ Populate the initial data using __setitem__ to ensure values are correctly encoded. """ if not isinstance(data, Mapping): data = {k: v for k, v in _destruct_iterable_mapping_values(data)} self._store = {} for header, value in data.items(): self[header] = value def _convert_to_charset(self, value, charset, mime_encode=False):
class SelectDateWidget(Widget): """ A widget that splits date input into three <select> boxes. This also serves as an example of a Widget that has more than one HTML element and hence implements value_from_datadict. """ none_value = ('', '---') month_field = '%s_month' day_field = '%s_day' year_field = '%s_year' template_name = 'django/forms/widgets/select_date.html' input_type = 'select' select_widget = Select date_re = _lazy_re_compile(r'(\d{4}|0)-(\d\d?)-(\d\d?)$') def __init__(self, attrs=None, years=None, months=None, empty_label=None): self.attrs = attrs or {} # Optional list or tuple of years to use in the "year" select box. if years: self.years = years else: this_year = datetime.date.today().year self.years = range(this_year, this_year + 10) # Optional dict of months to use in the "month" select box. if months: self.months = months else: self.months = MONTHS # Optional string, list, or tuple to use as empty_label. if isinstance(empty_label, (list, tuple)): if not len(empty_label) == 3: raise ValueError('empty_label list/tuple must have 3 elements.') self.year_none_value = ('', empty_label[0]) self.month_none_value = ('', empty_label[1]) self.day_none_value = ('', empty_label[2]) else: if empty_label is not None: self.none_value = ('', empty_label) self.year_none_value = self.none_value self.month_none_value = self.none_value self.day_none_value = self.none_value def get_context(self, name, value, attrs): context = super().get_context(name, value, attrs) date_context = {} year_choices = [(i, str(i)) for i in self.years] if not self.is_required: year_choices.insert(0, self.year_none_value) year_name = self.year_field % name date_context['year'] = self.select_widget(attrs, choices=year_choices).get_context( name=year_name, value=context['widget']['value']['year'], attrs={**context['widget']['attrs'], 'id': 'id_%s' % year_name}, ) month_choices = list(self.months.items()) if not self.is_required: month_choices.insert(0, self.month_none_value) month_name = self.month_field % name date_context['month'] = self.select_widget(attrs, choices=month_choices).get_context( name=month_name, value=context['widget']['value']['month'], attrs={**context['widget']['attrs'], 'id': 'id_%s' % month_name}, ) day_choices = [(i, i) for i in range(1, 32)] if not self.is_required: day_choices.insert(0, self.day_none_value) day_name = self.day_field % name date_context['day'] = self.select_widget(attrs, choices=day_choices,).get_context( name=day_name, value=context['widget']['value']['day'], attrs={**context['widget']['attrs'], 'id': 'id_%s' % day_name}, ) subwidgets = [] for field in self._parse_date_fmt(): subwidgets.append(date_context[field]['widget']) context['widget']['subwidgets'] = subwidgets return context def format_value(self, value): """ Return a dict containing the year, month, and day of the current value. Use dict instead of a datetime to allow invalid dates such as February 31 to display correctly. """ year, month, day = None, None, None if isinstance(value, (datetime.date, datetime.datetime)): year, month, day = value.year, value.month, value.day elif isinstance(value, str): match = self.date_re.match(value) if match: # Convert any zeros in the date to empty strings to match the # empty option value. year, month, day = [int(val) or '' for val in match.groups()] elif settings.USE_L10N: input_format = get_format('DATE_INPUT_FORMATS')[0] try: d = datetime.datetime.strptime(value, input_format) except ValueError: pass else: year, month, day = d.year, d.month, d.day return {'year': year, 'month': month, 'day': day} @staticmethod def _parse_date_fmt(): fmt = get_format('DATE_FORMAT') escaped = False for char in fmt: if escaped: escaped = False elif char == '\\': escaped = True elif char in 'Yy': yield 'year' elif char in 'bEFMmNn': yield 'month' elif char in 'dj': yield 'day' def id_for_label(self, id_): for first_select in self._parse_date_fmt(): return '%s_%s' % (id_, first_select) return '%s_month' % id_ def value_from_datadict(self, data, files, name): y = data.get(self.year_field % name) m = data.get(self.month_field % name) d = data.get(self.day_field % name) if y == m == d == '': return None if y is not None and m is not None and d is not None: input_format = get_format('DATE_INPUT_FORMATS')[0] try: date_value = datetime.date(int(y), int(m), int(d)) except ValueError: # Return pseudo-ISO dates with zeros for any unselected values, # e.g. '2017-0-23'. return '%s-%s-%s' % (y or 0, m or 0, d or 0) date_value = datetime_safe.new_date(date_value) return date_value.strftime(input_format) return data.get(name) def value_omitted_from_data(self, data, files, name): return not any( ('{}_{}'.format(name, interval) in data) for interval in ('year', 'month', 'day') )
inliner.document.settings.link_base, text.lower(), )), **options) return [node], [] if docutils_is_available: docutils.parsers.rst.roles.register_canonical_role('cmsreference', default_reference_role) for name, urlbase in ROLES.items(): create_reference_role(name, urlbase) # Match the beginning of a named or unnamed group. named_group_matcher = _lazy_re_compile(r'\(\?P(<\w+>)') unnamed_group_matcher = _lazy_re_compile(r'\(') def replace_named_groups(pattern): r""" Find named groups in `pattern` and replace them with the group name. E.g., 1. ^(?P<a>\w+)/b/(\w+)$ ==> ^<a>/b/(\w+)$ 2. ^(?P<a>\w+)/b/(?P<c>\w+)/$ ==> ^<a>/b/<c>/$ 3. ^(?P<a>\w+)/b/(\w+) ==> ^<a>/b/(\w+) 4. ^(?P<a>\w+)/b/(?P<c>\w+) ==> ^<a>/b/<c> """ named_group_indices = [(m.start(0), m.end(0), m.group(1)) for m in named_group_matcher.finditer(pattern)] # Tuples of (named capture group pattern, group name). group_pattern_and_name = []
stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, cwd=repo_dir, universal_newlines=True, ) timestamp = git_log.stdout tz = datetime.timezone.utc try: timestamp = datetime.datetime.fromtimestamp(int(timestamp), tz=tz) except ValueError: return None return timestamp.strftime('%Y%m%d%H%M%S') version_component_re = _lazy_re_compile(r'(\d+|[a-z]+|\.)') def get_version_tuple(version): """ Return a tuple of version numbers (e.g. (1, 2, 3)) from the version string (e.g. '1.2.3'). """ version_numbers = [] for item in version_component_re.split(version): if item and item != '.': try: component = int(item) except ValueError: break else:
import re from django.utils.regex_helper import _lazy_re_compile # Regular expression for recognizing HEXEWKB and WKT. A prophylactic measure # to prevent potentially malicious input from reaching the underlying C # library. Not a substitute for good web security programming practices. hex_regex = _lazy_re_compile(r'^[0-9A-F]+$', re.I) wkt_regex = _lazy_re_compile( r'^(SRID=(?P<srid>\-?\d+);)?' r'(?P<wkt>' r'(?P<type>POINT|LINESTRING|LINEARRING|POLYGON|MULTIPOINT|' r'MULTILINESTRING|MULTIPOLYGON|GEOMETRYCOLLECTION)' r'[ACEGIMLONPSRUTYZ\d,\.\-\+\(\) ]+)$', re.I) json_regex = _lazy_re_compile(r'^(\s+)?\{.*}(\s+)?$', re.DOTALL)
import warnings from io import StringIO from django.template.base import Lexer, TokenType from django.utils.regex_helper import _lazy_re_compile from . import TranslatorCommentWarning, trim_whitespace TRANSLATOR_COMMENT_MARK = "Translators" dot_re = _lazy_re_compile(r"\S") def blankout(src, char): """ Change every non-whitespace character to the given char. Used in the templatize function. """ return dot_re.sub(char, src) context_re = _lazy_re_compile(r"""^\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?'))\s*""") inline_re = _lazy_re_compile( # Match the trans/translate 'some text' part. r"""^\s*trans(?:late)?\s+((?:"[^"]*?")|(?:'[^']*?'))""" # Match and ignore optional filters r"""(?:\s*\|\s*[^\s:]+(?::(?:[^\s'":]+|(?:"[^"]*?")|(?:'[^']*?')))?)*""" # Match the optional context part r"""(\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?')))?\s*""" ) block_re = _lazy_re_compile(
"""Compare two HTML documents.""" from html.parser import HTMLParser from django.utils.regex_helper import _lazy_re_compile # ASCII whitespace is U+0009 TAB, U+000A LF, U+000C FF, U+000D CR, or U+0020 # SPACE. # https://infra.spec.whatwg.org/#ascii-whitespace ASCII_WHITESPACE = _lazy_re_compile(r'[\t\n\f\r ]+') def normalize_whitespace(string): return ASCII_WHITESPACE.sub(' ', string) class Element: def __init__(self, name, attributes): self.name = name self.attributes = sorted(attributes) self.children = [] def append(self, element): if isinstance(element, str): element = normalize_whitespace(element) if self.children and isinstance(self.children[-1], str): self.children[-1] += element self.children[-1] = normalize_whitespace(self.children[-1]) return elif self.children: # removing last children if it is only whitespace
VARIABLE_TAG_END = '}}' COMMENT_TAG_START = '{#' COMMENT_TAG_END = '#}' TRANSLATOR_COMMENT_MARK = 'Translators' SINGLE_BRACE_START = '{' SINGLE_BRACE_END = '}' # what to report as the origin for templates that come from non-loader sources # (e.g. strings) UNKNOWN_SOURCE = '<unknown source>' # match a variable or block tag and capture the entire tag, including start/end # delimiters tag_re = (_lazy_re_compile( '(%s.*?%s|%s.*?%s|%s.*?%s)' % (re.escape(BLOCK_TAG_START), re.escape(BLOCK_TAG_END), re.escape(VARIABLE_TAG_START), re.escape(VARIABLE_TAG_END), re.escape(COMMENT_TAG_START), re.escape(COMMENT_TAG_END)))) logger = logging.getLogger('django.template') class TokenType(Enum): TEXT = 0 VAR = 1 BLOCK = 2 COMMENT = 3 class VariableDoesNotExist(Exception): def __init__(self, msg, params=()):
from collections import namedtuple import sqlparse from django.db import DatabaseError from django.db.backends.base.introspection import BaseDatabaseIntrospection from django.db.backends.base.introspection import FieldInfo as BaseFieldInfo from django.db.backends.base.introspection import TableInfo from django.db.models import Index from django.utils.regex_helper import _lazy_re_compile FieldInfo = namedtuple("FieldInfo", BaseFieldInfo._fields + ("pk", "has_json_constraint")) field_size_re = _lazy_re_compile(r"^\s*(?:var)?char\s*\(\s*(\d+)\s*\)\s*$") def get_field_size(name): """Extract the size number from a "varchar(11)" type name""" m = field_size_re.search(name) return int(m[1]) if m else None # This light wrapper "fakes" a dictionary interface, because some SQLite data # types include variables in them -- e.g. "varchar(30)" -- and can't be matched # as a simple dictionary lookup. class FlexibleFieldLookupDict: # Maps SQL types to Django Field types. Some of the SQL types have multiple # entries here because SQLite allows for anything and doesn't normalize the # field type; it uses whatever was given. base_data_types_reverse = {
class DatabaseOperations(BaseDatabaseOperations): # Oracle uses NUMBER(5), NUMBER(11), and NUMBER(19) for integer fields. # SmallIntegerField uses NUMBER(11) instead of NUMBER(5), which is used by # SmallAutoField, to preserve backward compatibility. integer_field_ranges = { "SmallIntegerField": (-99999999999, 99999999999), "IntegerField": (-99999999999, 99999999999), "BigIntegerField": (-9999999999999999999, 9999999999999999999), "PositiveBigIntegerField": (0, 9999999999999999999), "PositiveSmallIntegerField": (0, 99999999999), "PositiveIntegerField": (0, 99999999999), "SmallAutoField": (-99999, 99999), "AutoField": (-99999999999, 99999999999), "BigAutoField": (-9999999999999999999, 9999999999999999999), } set_operators = { **BaseDatabaseOperations.set_operators, "difference": "MINUS" } # TODO: colorize this SQL code with style.SQL_KEYWORD(), etc. _sequence_reset_sql = """ DECLARE table_value integer; seq_value integer; seq_name user_tab_identity_cols.sequence_name%%TYPE; BEGIN BEGIN SELECT sequence_name INTO seq_name FROM user_tab_identity_cols WHERE table_name = '%(table_name)s' AND column_name = '%(column_name)s'; EXCEPTION WHEN NO_DATA_FOUND THEN seq_name := '%(no_autofield_sequence_name)s'; END; SELECT NVL(MAX(%(column)s), 0) INTO table_value FROM %(table)s; SELECT NVL(last_number - cache_size, 0) INTO seq_value FROM user_sequences WHERE sequence_name = seq_name; WHILE table_value > seq_value LOOP EXECUTE IMMEDIATE 'SELECT "'||seq_name||'".nextval FROM DUAL' INTO seq_value; END LOOP; END; /""" # Oracle doesn't support string without precision; use the max string size. cast_char_field_without_max_length = "NVARCHAR2(2000)" cast_data_types = { "AutoField": "NUMBER(11)", "BigAutoField": "NUMBER(19)", "SmallAutoField": "NUMBER(5)", "TextField": cast_char_field_without_max_length, } def cache_key_culling_sql(self): cache_key = self.quote_name("cache_key") return ( f"SELECT {cache_key} " f"FROM %s " f"ORDER BY {cache_key} OFFSET %%s ROWS FETCH FIRST 1 ROWS ONLY") # EXTRACT format cannot be passed in parameters. _extract_format_re = _lazy_re_compile(r"[A-Z_]+") def date_extract_sql(self, lookup_type, sql, params): extract_sql = f"TO_CHAR({sql}, %s)" extract_param = None if lookup_type == "week_day": # TO_CHAR(field, 'D') returns an integer from 1-7, where 1=Sunday. extract_param = "D" elif lookup_type == "iso_week_day": extract_sql = f"TO_CHAR({sql} - 1, %s)" extract_param = "D" elif lookup_type == "week": # IW = ISO week number extract_param = "IW" elif lookup_type == "quarter": extract_param = "Q" elif lookup_type == "iso_year": extract_param = "IYYY" else: lookup_type = lookup_type.upper() if not self._extract_format_re.fullmatch(lookup_type): raise ValueError(f"Invalid loookup type: {lookup_type!r}") # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/EXTRACT-datetime.html return f"EXTRACT({lookup_type} FROM {sql})", params return extract_sql, (*params, extract_param) def date_trunc_sql(self, lookup_type, sql, params, tzname=None): sql, params = self._convert_field_to_tz(sql, params, tzname) # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ROUND-and-TRUNC-Date-Functions.html trunc_param = None if lookup_type in ("year", "month"): trunc_param = lookup_type.upper() elif lookup_type == "quarter": trunc_param = "Q" elif lookup_type == "week": trunc_param = "IW" else: return f"TRUNC({sql})", params return f"TRUNC({sql}, %s)", (*params, trunc_param) # Oracle crashes with "ORA-03113: end-of-file on communication channel" # if the time zone name is passed in parameter. Use interpolation instead. # https://groups.google.com/forum/#!msg/django-developers/zwQju7hbG78/9l934yelwfsJ # This regexp matches all time zone names from the zoneinfo database. _tzname_re = _lazy_re_compile(r"^[\w/:+-]+$") def _prepare_tzname_delta(self, tzname): tzname, sign, offset = split_tzname_delta(tzname) return f"{sign}{offset}" if offset else tzname def _convert_field_to_tz(self, sql, params, tzname): if not (settings.USE_TZ and tzname): return sql, params if not self._tzname_re.match(tzname): raise ValueError("Invalid time zone name: %s" % tzname) # Convert from connection timezone to the local time, returning # TIMESTAMP WITH TIME ZONE and cast it back to TIMESTAMP to strip the # TIME ZONE details. if self.connection.timezone_name != tzname: from_timezone_name = self.connection.timezone_name to_timezone_name = self._prepare_tzname_delta(tzname) return ( f"CAST((FROM_TZ({sql}, '{from_timezone_name}') AT TIME ZONE " f"'{to_timezone_name}') AS TIMESTAMP)", params, ) return sql, params def datetime_cast_date_sql(self, sql, params, tzname): sql, params = self._convert_field_to_tz(sql, params, tzname) return f"TRUNC({sql})", params def datetime_cast_time_sql(self, sql, params, tzname): # Since `TimeField` values are stored as TIMESTAMP change to the # default date and convert the field to the specified timezone. sql, params = self._convert_field_to_tz(sql, params, tzname) convert_datetime_sql = ( f"TO_TIMESTAMP(CONCAT('1900-01-01 ', TO_CHAR({sql}, 'HH24:MI:SS.FF')), " f"'YYYY-MM-DD HH24:MI:SS.FF')") return ( f"CASE WHEN {sql} IS NOT NULL THEN {convert_datetime_sql} ELSE NULL END", (*params, *params), ) def datetime_extract_sql(self, lookup_type, sql, params, tzname): sql, params = self._convert_field_to_tz(sql, params, tzname) return self.date_extract_sql(lookup_type, sql, params) def datetime_trunc_sql(self, lookup_type, sql, params, tzname): sql, params = self._convert_field_to_tz(sql, params, tzname) # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ROUND-and-TRUNC-Date-Functions.html trunc_param = None if lookup_type in ("year", "month"): trunc_param = lookup_type.upper() elif lookup_type == "quarter": trunc_param = "Q" elif lookup_type == "week": trunc_param = "IW" elif lookup_type == "hour": trunc_param = "HH24" elif lookup_type == "minute": trunc_param = "MI" elif lookup_type == "day": return f"TRUNC({sql})", params else: # Cast to DATE removes sub-second precision. return f"CAST({sql} AS DATE)", params return f"TRUNC({sql}, %s)", (*params, trunc_param) def time_trunc_sql(self, lookup_type, sql, params, tzname=None): # The implementation is similar to `datetime_trunc_sql` as both # `DateTimeField` and `TimeField` are stored as TIMESTAMP where # the date part of the later is ignored. sql, params = self._convert_field_to_tz(sql, params, tzname) trunc_param = None if lookup_type == "hour": trunc_param = "HH24" elif lookup_type == "minute": trunc_param = "MI" elif lookup_type == "second": # Cast to DATE removes sub-second precision. return f"CAST({sql} AS DATE)", params return f"TRUNC({sql}, %s)", (*params, trunc_param) def get_db_converters(self, expression): converters = super().get_db_converters(expression) internal_type = expression.output_field.get_internal_type() if internal_type in ["JSONField", "TextField"]: converters.append(self.convert_textfield_value) elif internal_type == "BinaryField": converters.append(self.convert_binaryfield_value) elif internal_type == "BooleanField": converters.append(self.convert_booleanfield_value) elif internal_type == "DateTimeField": if settings.USE_TZ: converters.append(self.convert_datetimefield_value) elif internal_type == "DateField": converters.append(self.convert_datefield_value) elif internal_type == "TimeField": converters.append(self.convert_timefield_value) elif internal_type == "UUIDField": converters.append(self.convert_uuidfield_value) # Oracle stores empty strings as null. If the field accepts the empty # string, undo this to adhere to the Django convention of using # the empty string instead of null. if expression.output_field.empty_strings_allowed: converters.append(self.convert_empty_bytes if internal_type == "BinaryField" else self.convert_empty_string) return converters def convert_textfield_value(self, value, expression, connection): if isinstance(value, Database.LOB): value = value.read() return value def convert_binaryfield_value(self, value, expression, connection): if isinstance(value, Database.LOB): value = force_bytes(value.read()) return value def convert_booleanfield_value(self, value, expression, connection): if value in (0, 1): value = bool(value) return value # cx_Oracle always returns datetime.datetime objects for # DATE and TIMESTAMP columns, but Django wants to see a # python datetime.date, .time, or .datetime. def convert_datetimefield_value(self, value, expression, connection): if value is not None: value = timezone.make_aware(value, self.connection.timezone) return value def convert_datefield_value(self, value, expression, connection): if isinstance(value, Database.Timestamp): value = value.date() return value def convert_timefield_value(self, value, expression, connection): if isinstance(value, Database.Timestamp): value = value.time() return value def convert_uuidfield_value(self, value, expression, connection): if value is not None: value = uuid.UUID(value) return value @staticmethod def convert_empty_string(value, expression, connection): return "" if value is None else value @staticmethod def convert_empty_bytes(value, expression, connection): return b"" if value is None else value def deferrable_sql(self): return " DEFERRABLE INITIALLY DEFERRED" def fetch_returned_insert_columns(self, cursor, returning_params): columns = [] for param in returning_params: value = param.get_value() if value == []: raise DatabaseError( "The database did not return a new row id. Probably " '"ORA-1403: no data found" was raised internally but was ' "hidden by the Oracle OCI library (see " "https://code.djangoproject.com/ticket/28859).") columns.append(value[0]) return tuple(columns) def field_cast_sql(self, db_type, internal_type): if db_type and db_type.endswith( "LOB") and internal_type != "JSONField": return "DBMS_LOB.SUBSTR(%s)" else: return "%s" def no_limit_value(self): return None def limit_offset_sql(self, low_mark, high_mark): fetch, offset = self._get_limit_offset_params(low_mark, high_mark) return " ".join(sql for sql in ( ("OFFSET %d ROWS" % offset) if offset else None, ("FETCH FIRST %d ROWS ONLY" % fetch) if fetch else None, ) if sql) def last_executed_query(self, cursor, sql, params): # https://cx-oracle.readthedocs.io/en/latest/api_manual/cursor.html#Cursor.statement # The DB API definition does not define this attribute. statement = cursor.statement # Unlike Psycopg's `query` and MySQLdb`'s `_executed`, cx_Oracle's # `statement` doesn't contain the query parameters. Substitute # parameters manually. if isinstance(params, (tuple, list)): for i, param in enumerate(reversed(params), start=1): param_num = len(params) - i statement = statement.replace( ":arg%d" % param_num, force_str(param, errors="replace")) elif isinstance(params, dict): for key in sorted(params, key=len, reverse=True): statement = statement.replace( ":%s" % key, force_str(params[key], errors="replace")) return statement def last_insert_id(self, cursor, table_name, pk_name): sq_name = self._get_sequence_name(cursor, strip_quotes(table_name), pk_name) cursor.execute('"%s".currval' % sq_name) return cursor.fetchone()[0] def lookup_cast(self, lookup_type, internal_type=None): if lookup_type in ("iexact", "icontains", "istartswith", "iendswith"): return "UPPER(%s)" if internal_type == "JSONField" and lookup_type == "exact": return "DBMS_LOB.SUBSTR(%s)" return "%s" def max_in_list_size(self): return 1000 def max_name_length(self): return 30 def pk_default_value(self): return "NULL" def prep_for_iexact_query(self, x): return x def process_clob(self, value): if value is None: return "" return value.read() def quote_name(self, name): # SQL92 requires delimited (quoted) names to be case-sensitive. When # not quoted, Oracle has case-insensitive behavior for identifiers, but # always defaults to uppercase. # We simplify things by making Oracle identifiers always uppercase. if not name.startswith('"') and not name.endswith('"'): name = '"%s"' % truncate_name(name, self.max_name_length()) # Oracle puts the query text into a (query % args) construct, so % signs # in names need to be escaped. The '%%' will be collapsed back to '%' at # that stage so we aren't really making the name longer here. name = name.replace("%", "%%") return name.upper() def regex_lookup(self, lookup_type): if lookup_type == "regex": match_option = "'c'" else: match_option = "'i'" return "REGEXP_LIKE(%%s, %%s, %s)" % match_option def return_insert_columns(self, fields): if not fields: return "", () field_names = [] params = [] for field in fields: field_names.append("%s.%s" % ( self.quote_name(field.model._meta.db_table), self.quote_name(field.column), )) params.append(InsertVar(field)) return "RETURNING %s INTO %s" % ( ", ".join(field_names), ", ".join(["%s"] * len(params)), ), tuple(params) def __foreign_key_constraints(self, table_name, recursive): with self.connection.cursor() as cursor: if recursive: cursor.execute( """ SELECT user_tables.table_name, rcons.constraint_name FROM user_tables JOIN user_constraints cons ON (user_tables.table_name = cons.table_name AND cons.constraint_type = ANY('P', 'U')) LEFT JOIN user_constraints rcons ON (user_tables.table_name = rcons.table_name AND rcons.constraint_type = 'R') START WITH user_tables.table_name = UPPER(%s) CONNECT BY NOCYCLE PRIOR cons.constraint_name = rcons.r_constraint_name GROUP BY user_tables.table_name, rcons.constraint_name HAVING user_tables.table_name != UPPER(%s) ORDER BY MAX(level) DESC """, (table_name, table_name), ) else: cursor.execute( """ SELECT cons.table_name, cons.constraint_name FROM user_constraints cons WHERE cons.constraint_type = 'R' AND cons.table_name = UPPER(%s) """, (table_name, ), ) return cursor.fetchall() @cached_property def _foreign_key_constraints(self): # 512 is large enough to fit the ~330 tables (as of this writing) in # Django's test suite. return lru_cache(maxsize=512)(self.__foreign_key_constraints) def sql_flush(self, style, tables, *, reset_sequences=False, allow_cascade=False): if not tables: return [] truncated_tables = {table.upper() for table in tables} constraints = set() # Oracle's TRUNCATE CASCADE only works with ON DELETE CASCADE foreign # keys which Django doesn't define. Emulate the PostgreSQL behavior # which truncates all dependent tables by manually retrieving all # foreign key constraints and resolving dependencies. for table in tables: for foreign_table, constraint in self._foreign_key_constraints( table, recursive=allow_cascade): if allow_cascade: truncated_tables.add(foreign_table) constraints.add((foreign_table, constraint)) sql = ([ "%s %s %s %s %s %s %s %s;" % ( style.SQL_KEYWORD("ALTER"), style.SQL_KEYWORD("TABLE"), style.SQL_FIELD(self.quote_name(table)), style.SQL_KEYWORD("DISABLE"), style.SQL_KEYWORD("CONSTRAINT"), style.SQL_FIELD(self.quote_name(constraint)), style.SQL_KEYWORD("KEEP"), style.SQL_KEYWORD("INDEX"), ) for table, constraint in constraints ] + [ "%s %s %s;" % ( style.SQL_KEYWORD("TRUNCATE"), style.SQL_KEYWORD("TABLE"), style.SQL_FIELD(self.quote_name(table)), ) for table in truncated_tables ] + [ "%s %s %s %s %s %s;" % ( style.SQL_KEYWORD("ALTER"), style.SQL_KEYWORD("TABLE"), style.SQL_FIELD(self.quote_name(table)), style.SQL_KEYWORD("ENABLE"), style.SQL_KEYWORD("CONSTRAINT"), style.SQL_FIELD(self.quote_name(constraint)), ) for table, constraint in constraints ]) if reset_sequences: sequences = [ sequence for sequence in self.connection.introspection.sequence_list() if sequence["table"].upper() in truncated_tables ] # Since we've just deleted all the rows, running our sequence ALTER # code will reset the sequence to 0. sql.extend(self.sequence_reset_by_name_sql(style, sequences)) return sql def sequence_reset_by_name_sql(self, style, sequences): sql = [] for sequence_info in sequences: no_autofield_sequence_name = self._get_no_autofield_sequence_name( sequence_info["table"]) table = self.quote_name(sequence_info["table"]) column = self.quote_name(sequence_info["column"] or "id") query = self._sequence_reset_sql % { "no_autofield_sequence_name": no_autofield_sequence_name, "table": table, "column": column, "table_name": strip_quotes(table), "column_name": strip_quotes(column), } sql.append(query) return sql def sequence_reset_sql(self, style, model_list): output = [] query = self._sequence_reset_sql for model in model_list: for f in model._meta.local_fields: if isinstance(f, AutoField): no_autofield_sequence_name = self._get_no_autofield_sequence_name( model._meta.db_table) table = self.quote_name(model._meta.db_table) column = self.quote_name(f.column) output.append( query % { "no_autofield_sequence_name": no_autofield_sequence_name, "table": table, "column": column, "table_name": strip_quotes(table), "column_name": strip_quotes(column), }) # Only one AutoField is allowed per model, so don't # continue to loop break return output def start_transaction_sql(self): return "" def tablespace_sql(self, tablespace, inline=False): if inline: return "USING INDEX TABLESPACE %s" % self.quote_name(tablespace) else: return "TABLESPACE %s" % self.quote_name(tablespace) def adapt_datefield_value(self, value): """ Transform a date value to an object compatible with what is expected by the backend driver for date columns. The default implementation transforms the date to text, but that is not necessary for Oracle. """ return value def adapt_datetimefield_value(self, value): """ Transform a datetime value to an object compatible with what is expected by the backend driver for datetime columns. If naive datetime is passed assumes that is in UTC. Normally Django models.DateTimeField makes sure that if USE_TZ is True passed datetime is timezone aware. """ if value is None: return None # Expression values are adapted by the database. if hasattr(value, "resolve_expression"): return value # cx_Oracle doesn't support tz-aware datetimes if timezone.is_aware(value): if settings.USE_TZ: value = timezone.make_naive(value, self.connection.timezone) else: raise ValueError( "Oracle backend does not support timezone-aware datetimes when " "USE_TZ is False.") return Oracle_datetime.from_datetime(value) def adapt_timefield_value(self, value): if value is None: return None # Expression values are adapted by the database. if hasattr(value, "resolve_expression"): return value if isinstance(value, str): return datetime.datetime.strptime(value, "%H:%M:%S") # Oracle doesn't support tz-aware times if timezone.is_aware(value): raise ValueError( "Oracle backend does not support timezone-aware times.") return Oracle_datetime(1900, 1, 1, value.hour, value.minute, value.second, value.microsecond) def adapt_decimalfield_value(self, value, max_digits=None, decimal_places=None): return value def combine_expression(self, connector, sub_expressions): lhs, rhs = sub_expressions if connector == "%%": return "MOD(%s)" % ",".join(sub_expressions) elif connector == "&": return "BITAND(%s)" % ",".join(sub_expressions) elif connector == "|": return "BITAND(-%(lhs)s-1,%(rhs)s)+%(lhs)s" % { "lhs": lhs, "rhs": rhs } elif connector == "<<": return "(%(lhs)s * POWER(2, %(rhs)s))" % {"lhs": lhs, "rhs": rhs} elif connector == ">>": return "FLOOR(%(lhs)s / POWER(2, %(rhs)s))" % { "lhs": lhs, "rhs": rhs } elif connector == "^": return "POWER(%s)" % ",".join(sub_expressions) elif connector == "#": raise NotSupportedError("Bitwise XOR is not supported in Oracle.") return super().combine_expression(connector, sub_expressions) def _get_no_autofield_sequence_name(self, table): """ Manually created sequence name to keep backward compatibility for AutoFields that aren't Oracle identity columns. """ name_length = self.max_name_length() - 3 return "%s_SQ" % truncate_name(strip_quotes(table), name_length).upper() def _get_sequence_name(self, cursor, table, pk_name): cursor.execute( """ SELECT sequence_name FROM user_tab_identity_cols WHERE table_name = UPPER(%s) AND column_name = UPPER(%s)""", [table, pk_name], ) row = cursor.fetchone() return self._get_no_autofield_sequence_name( table) if row is None else row[0] def bulk_insert_sql(self, fields, placeholder_rows): query = [] for row in placeholder_rows: select = [] for i, placeholder in enumerate(row): # A model without any fields has fields=[None]. if fields[i]: internal_type = getattr(fields[i], "target_field", fields[i]).get_internal_type() placeholder = ( BulkInsertMapper.types.get(internal_type, "%s") % placeholder) # Add columns aliases to the first select to avoid "ORA-00918: # column ambiguously defined" when two or more columns in the # first select have the same value. if not query: placeholder = "%s col_%s" % (placeholder, i) select.append(placeholder) query.append("SELECT %s FROM DUAL" % ", ".join(select)) # Bulk insert to tables with Oracle identity columns causes Oracle to # add sequence.nextval to it. Sequence.nextval cannot be used with the # UNION operator. To prevent incorrect SQL, move UNION to a subquery. return "SELECT * FROM (%s)" % " UNION ALL ".join(query) def subtract_temporals(self, internal_type, lhs, rhs): if internal_type == "DateField": lhs_sql, lhs_params = lhs rhs_sql, rhs_params = rhs params = (*lhs_params, *rhs_params) return ( "NUMTODSINTERVAL(TO_NUMBER(%s - %s), 'DAY')" % (lhs_sql, rhs_sql), params, ) return super().subtract_temporals(internal_type, lhs, rhs) def bulk_batch_size(self, fields, objs): """Oracle restricts the number of parameters in a query.""" if fields: return self.connection.features.max_query_params // len(fields) return len(objs) def conditional_expression_supported_in_where_clause(self, expression): """ Oracle supports only EXISTS(...) or filters in the WHERE clause, others must be compared with True. """ if isinstance(expression, (Exists, Lookup, WhereNode)): return True if isinstance(expression, ExpressionWrapper) and expression.conditional: return self.conditional_expression_supported_in_where_clause( expression.expression) if isinstance(expression, RawSQL) and expression.conditional: return True return False
from django.core.exceptions import FieldDoesNotExist from django.db import models, router from django.db.models.constants import LOOKUP_SEP from django.db.models.deletion import Collector from django.forms.utils import pretty_name from django.urls import NoReverseMatch, reverse from django.utils import formats, timezone from django.utils.html import format_html from django.utils.regex_helper import _lazy_re_compile from django.utils.text import capfirst from django.utils.translation import ngettext, override as translation_override QUOTE_MAP = {i: '_%02X' % i for i in b'":/_#?;@&=+$,"[]<>%\n\\'} UNQUOTE_MAP = {v: chr(k) for k, v in QUOTE_MAP.items()} UNQUOTE_RE = _lazy_re_compile('_(?:%s)' % '|'.join([x[1:] for x in UNQUOTE_MAP])) class FieldIsAForeignKeyColumnName(Exception): """A field is a foreign key attname, i.e. <FK>_id.""" pass def lookup_needs_distinct(opts, lookup_path): """ Return True if 'distinct()' should be used to query the given lookup path. """ lookup_fields = lookup_path.split(LOOKUP_SEP) # Go through the fields (following all relations) and look for an m2m. for field_name in lookup_fields: if field_name == 'pk':
from django.utils.itercompat import is_iterable from django.utils.regex_helper import _lazy_re_compile __all__ = ( 'AsyncClient', 'AsyncRequestFactory', 'Client', 'RedirectCycleError', 'RequestFactory', 'encode_file', 'encode_multipart', ) BOUNDARY = 'BoUnDaRyStRiNg' MULTIPART_CONTENT = 'multipart/form-data; boundary=%s' % BOUNDARY CONTENT_TYPE_RE = _lazy_re_compile(r'.*; charset=([\w\d-]+);?') # Structured suffix spec: https://tools.ietf.org/html/rfc6838#section-4.2.8 JSON_CONTENT_TYPE_RE = _lazy_re_compile(r'^application\/(.+\+)?json') class RedirectCycleError(Exception): """The test client has been asked to follow a redirect loop.""" def __init__(self, message, last_response): super().__init__(message) self.last_response = last_response self.redirect_chain = last_response.redirect_chain class FakePayload: """ A wrapper around BytesIO that restricts what can be read since data from
import django from django.conf import settings from django.core.exceptions import ImproperlyConfigured from django.core.files.temp import NamedTemporaryFile from django.core.management.base import BaseCommand, CommandError from django.core.management.utils import ( find_command, handle_extensions, is_ignored_path, popen_wrapper, ) from django.utils.encoding import DEFAULT_LOCALE_ENCODING from django.utils.functional import cached_property from django.utils.jslex import prepare_js_for_gettext from django.utils.regex_helper import _lazy_re_compile from django.utils.text import get_text_list from django.utils.translation import templatize plural_forms_re = _lazy_re_compile(r'^(?P<value>"Plural-Forms.+?\\n")\s*$', re.MULTILINE | re.DOTALL) STATUS_OK = 0 NO_LOCALE_DIR = object() def check_programs(*programs): for program in programs: if find_command(program) is None: raise CommandError( "Can't find %s. Make sure you have GNU gettext tools 0.15 or " "newer installed." % program ) @total_ordering class TranslatableFile:
from django.http.multipartparser import MultiPartParser, MultiPartParserError from django.utils.datastructures import ( CaseInsensitiveMapping, ImmutableList, MultiValueDict, ) from django.utils.deprecation import RemovedInDjango40Warning from django.utils.encoding import escape_uri_path, iri_to_uri from django.utils.functional import cached_property from django.utils.http import is_same_domain, limited_parse_qsl from django.utils.regex_helper import _lazy_re_compile from .multipartparser import parse_header RAISE_ERROR = object() host_validation_re = _lazy_re_compile( r"^([a-z0-9.-]+|\[[a-f0-9]*:[a-f0-9\.:]+\])(:\d+)?$") class UnreadablePostError(OSError): pass class RawPostDataException(Exception): """ You cannot access raw_post_data from a request that has multipart/* POST data if it has been accessed via POST, FILES, etc.. """ pass
potential_ip = host_match[1] try: validate_ipv6_address(potential_ip) except ValidationError: raise ValidationError(self.message, code=self.code) # The maximum length of a full host name is 253 characters per RFC 1034 # section 3.1. It's defined to be 255 bytes or less, but this includes # one byte for the length of the name and one byte for the trailing dot # that's used to indicate absolute names in DNS. if len(urlsplit(value).netloc) > 253: raise ValidationError(self.message, code=self.code) integer_validator = RegexValidator( _lazy_re_compile(r'^-?\d+\Z'), message=_('Enter a valid integer.'), code='invalid', ) def validate_integer(value): return integer_validator(value) @deconstructible class EmailValidator: message = _('Enter a valid email address.') code = 'invalid' user_regex = _lazy_re_compile( r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z]+)*\Z" # dot-atom
from io import BytesIO from django.conf import settings from django.core import signals from django.core.handlers import base from django.http import HttpRequest, QueryDict, parse_cookie from django.urls import set_script_prefix from django.utils.encoding import repercent_broken_unicode from django.utils.functional import cached_property from django.utils.regex_helper import _lazy_re_compile _slashes_re = _lazy_re_compile(rb"/+") class LimitedStream: """Wrap another stream to disallow reading it past a number of bytes.""" def __init__(self, stream, limit, buf_size=64 * 1024 * 1024): self.stream = stream self.remaining = limit self.buffer = b"" self.buf_size = buf_size def _read_limited(self, size=None): if size is None or size > self.remaining: size = self.remaining if size == 0: return b"" result = self.stream.read(size) self.remaining -= len(result) return result
class EmailValidator: message = _('Enter a valid email address.') code = 'invalid' user_regex = _lazy_re_compile( r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z]+)*\Z" # dot-atom r'|^"([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-\011\013\014\016-\177])*"\Z)', # quoted-string re.IGNORECASE) domain_regex = _lazy_re_compile( # max length for domain name labels is 63 characters per RFC 1034 r'((?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+)(?:[A-Z0-9-]{2,63}(?<!-))\Z', re.IGNORECASE) literal_regex = _lazy_re_compile( # literal form, ipv4 or ipv6 address (SMTP 4.1.3) r'\[([A-f0-9:.]+)\]\Z', re.IGNORECASE) domain_whitelist = ['localhost'] def __init__(self, message=None, code=None, whitelist=None): if message is not None: self.message = message if code is not None: self.code = code if whitelist is not None: self.domain_whitelist = whitelist def __call__(self, value): if not value or '@' not in value: raise ValidationError(self.message, code=self.code) user_part, domain_part = value.rsplit('@', 1) if not self.user_regex.match(user_part): raise ValidationError(self.message, code=self.code) if (domain_part not in self.domain_whitelist and not self.validate_domain_part(domain_part)): # Try for possible IDN domain-part try: domain_part = punycode(domain_part) except UnicodeError: pass else: if self.validate_domain_part(domain_part): return raise ValidationError(self.message, code=self.code) def validate_domain_part(self, domain_part): if self.domain_regex.match(domain_part): return True literal_match = self.literal_regex.match(domain_part) if literal_match: ip_address = literal_match[1] try: validate_ipv46_address(ip_address) return True except ValidationError: pass return False def __eq__(self, other): return (isinstance(other, EmailValidator) and (self.domain_whitelist == other.domain_whitelist) and (self.message == other.message) and (self.code == other.code))
from io import BytesIO from django.conf import settings from django.core import signals from django.core.handlers import base from django.http import HttpRequest, QueryDict, parse_cookie from django.urls import set_script_prefix from django.utils.encoding import repercent_broken_unicode from django.utils.functional import cached_property from django.utils.regex_helper import _lazy_re_compile _slashes_re = _lazy_re_compile(br'/+') class LimitedStream: """Wrap another stream to disallow reading it past a number of bytes.""" def __init__(self, stream, limit): self.stream = stream self.remaining = limit self.buffer = b'' def _read_limited(self, size=None): if size is None or size > self.remaining: size = self.remaining if size == 0: return b'' result = self.stream.read(size) self.remaining -= len(result) return result def read(self, size=None):
class URLValidator(RegexValidator): ul = '\u00a1-\uffff' # Unicode letters range (must not be a raw string). # IP patterns ipv4_re = r'(?:0|25[0-5]|2[0-4]\d|1\d?\d?|[1-9]\d?)(?:\.(?:0|25[0-5]|2[0-4]\d|1\d?\d?|[1-9]\d?)){3}' ipv6_re = r'\[[0-9a-f:.]+\]' # (simple regex, validated later) # Host patterns hostname_re = r'[a-z' + ul + r'0-9](?:[a-z' + ul + r'0-9-]{0,61}[a-z' + ul + r'0-9])?' # Max length for domain name labels is 63 characters per RFC 1034 sec. 3.1 domain_re = r'(?:\.(?!-)[a-z' + ul + r'0-9-]{1,63}(?<!-))*' tld_re = ( r'\.' # dot r'(?!-)' # can't start with a dash r'(?:[a-z' + ul + '-]{2,63}' # domain label r'|xn--[a-z0-9]{1,59})' # or punycode label r'(?<!-)' # can't end with a dash r'\.?' # may have a trailing dot ) host_re = '(' + hostname_re + domain_re + tld_re + '|localhost)' regex = _lazy_re_compile( r'^(?:[a-z0-9.+-]*)://' # scheme is validated separately r'(?:[^\s:@/]+(?::[^\s:@/]*)?@)?' # user:pass authentication r'(?:' + ipv4_re + '|' + ipv6_re + '|' + host_re + ')' r'(?::\d{2,5})?' # port r'(?:[/?#][^\s]*)?' # resource path r'\Z', re.IGNORECASE) message = _('Enter a valid URL.') schemes = ['http', 'https', 'ftp', 'ftps'] unsafe_chars = frozenset('\t\r\n') def __init__(self, schemes=None, **kwargs): super().__init__(**kwargs) if schemes is not None: self.schemes = schemes def __call__(self, value): if not isinstance(value, str): raise ValidationError(self.message, code=self.code) if self.unsafe_chars.intersection(value): raise ValidationError(self.message, code=self.code) # Check if the scheme is valid. scheme = value.split('://')[0].lower() if scheme not in self.schemes: raise ValidationError(self.message, code=self.code) # Then check full URL try: super().__call__(value) except ValidationError as e: # Trivial case failed. Try for possible IDN domain if value: try: scheme, netloc, path, query, fragment = urlsplit(value) except ValueError: # for example, "Invalid IPv6 URL" raise ValidationError(self.message, code=self.code) try: netloc = punycode(netloc) # IDN -> ACE except UnicodeError: # invalid domain part raise e url = urlunsplit((scheme, netloc, path, query, fragment)) super().__call__(url) else: raise else: # Now verify IPv6 in the netloc part host_match = re.search(r'^\[(.+)\](?::\d{2,5})?$', urlsplit(value).netloc) if host_match: potential_ip = host_match[1] try: validate_ipv6_address(potential_ip) except ValidationError: raise ValidationError(self.message, code=self.code) # The maximum length of a full host name is 253 characters per RFC 1034 # section 3.1. It's defined to be 255 bytes or less, but this includes # one byte for the length of the name and one byte for the trailing dot # that's used to indicate absolute names in DNS. if len(urlsplit(value).netloc) > 253: raise ValidationError(self.message, code=self.code)
def _compile(self, regex): """Compile and return the given regular expression.""" try: return re.compile(regex) except re.error as e: raise ImproperlyConfigured( '"%s" is not a valid regular expression: %s' % (regex, e) ) from e def __str__(self): return str(self._regex) _PATH_PARAMETER_COMPONENT_RE = _lazy_re_compile( r'<(?:(?P<converter>[^>:]+):)?(?P<parameter>[^>]+)>' ) def _route_to_regex(route, is_endpoint=False): """ Convert a path pattern into a regular expression. Return the regular expression and a dictionary mapping the capture names to the converters. For example, 'foo/<int:pk>' returns '^foo\\/(?P<pk>[0-9]+)' and {'pk': <django.urls.converters.IntConverter>}. """ if not set(route).isdisjoint(string.whitespace): raise ImproperlyConfigured("URL route '%s' cannot contain whitespace." % route) original_route = route parts = ['^'] converters = {}
scheme_chars, unquote, ) from urllib.parse import urlencode as original_urlencode from urllib.parse import uses_params from django.utils.datastructures import MultiValueDict from django.utils.regex_helper import _lazy_re_compile # based on RFC 7232, Appendix C ETAG_MATCH = _lazy_re_compile( r""" \A( # start of string and capture group (?:W/)? # optional weak indicator " # opening quote [^"]* # any sequence of non-quote characters " # end quote )\Z # end of string and capture group """, re.X, ) MONTHS = "jan feb mar apr may jun jul aug sep oct nov dec".split() __D = r"(?P<day>[0-9]{2})" __D2 = r"(?P<day>[ 0-9][0-9])" __M = r"(?P<mon>\w{3})" __Y = r"(?P<year>[0-9]{4})" __Y2 = r"(?P<year>[0-9]{2})" __T = r"(?P<hour>[0-9]{2}):(?P<min>[0-9]{2}):(?P<sec>[0-9]{2})" RFC1123_DATE = _lazy_re_compile(r"^\w{3}, %s %s %s %s GMT$" % (__D, __M, __Y, __T)) RFC850_DATE = _lazy_re_compile(r"^\w{6,9}, %s-%s-%s %s GMT$" % (__D, __M, __Y2, __T))
""" import calendar import datetime from email.utils import format_datetime as format_datetime_rfc5322 from django.utils.dates import ( MONTHS, MONTHS_3, MONTHS_ALT, MONTHS_AP, WEEKDAYS, WEEKDAYS_ABBR, ) from django.utils.regex_helper import _lazy_re_compile from django.utils.timezone import ( _datetime_ambiguous_or_imaginary, get_default_timezone, is_naive, make_aware, ) from django.utils.translation import gettext as _ re_formatchars = _lazy_re_compile(r'(?<!\\)([aAbcdDeEfFgGhHiIjlLmMnNoOPrsStTUuwWyYzZ])') re_escaped = _lazy_re_compile(r'\\(.)') class Formatter: def format(self, formatstr): pieces = [] for i, piece in enumerate(re_formatchars.split(str(formatstr))): if i % 2: if type(self.data) is datetime.date and hasattr(TimeFormat, piece): raise TypeError( "The format for date objects may not contain " "time-related format specifiers (found '%s')." % piece ) pieces.append(str(getattr(self, piece)())) elif piece:
return True def _start_transaction_under_autocommit(self): """ Start a transaction explicitly in autocommit mode. Staying in autocommit mode works around a bug of sqlite3 that breaks savepoints when autocommit is disabled. """ self.cursor().execute("BEGIN") def is_in_memory_db(self): return self.creation.is_in_memory_db(self.settings_dict['NAME']) FORMAT_QMARK_REGEX = _lazy_re_compile(r'(?<!%)%s') class SQLiteCursorWrapper(Database.Cursor): """ Django uses "format" style placeholders, but pysqlite2 uses "qmark" style. This fixes it -- but note that if you want to use a literal "%s" in a query, you'll need to use "%%s". """ def execute(self, query, params=None): if params is None: return Database.Cursor.execute(self, query) query = self.convert_query(query) return Database.Cursor.execute(self, query, params) def executemany(self, query, param_list):
# Translations are cached in a dictionary for every language. # The active translations are stored by threadid to make them thread local. _translations = {} _active = Local() # The default translation is based on the settings file. _default = None # magic gettext number to separate context from message CONTEXT_SEPARATOR = "\x04" # Format of Accept-Language header values. From RFC 2616, section 14.4 and 3.9 # and RFC 3066, section 2.1 accept_language_re = _lazy_re_compile(r''' ([A-Za-z]{1,8}(?:-[A-Za-z0-9]{1,8})*|\*) # "en", "en-au", "x-y-z", "es-419", "*" (?:\s*;\s*q=(0(?:\.\d{,3})?|1(?:\.0{,3})?))? # Optional "q=1.00", "q=0.8" (?:\s*,\s*|$) # Multiple accepts per header. ''', re.VERBOSE) language_code_re = _lazy_re_compile( r'^[a-z]{1,8}(?:-[a-z0-9]{1,8})*(?:@[a-z0-9]{1,20})?$', re.IGNORECASE ) language_code_prefix_re = _lazy_re_compile(r'^/(\w+([@-]\w+)?)(/|$)') @receiver(setting_changed) def reset_cache(**kwargs): """ Reset global state when LANGUAGES setting has been changed, as some
from django.utils.cache import patch_vary_headers from django.utils.deprecation import MiddlewareMixin from django.utils.regex_helper import _lazy_re_compile from django.utils.text import compress_sequence, compress_string re_accepts_gzip = _lazy_re_compile(r'\bgzip\b') class GZipMiddleware(MiddlewareMixin): """ Compress content if the browser allows gzip compression. Set the Vary header accordingly, so that caches will base their storage on the Accept-Encoding header. """ def process_response(self, request, response): # It's not worth attempting to compress really short responses. if not response.streaming and len(response.content) < 200: return response # Avoid gzipping if we've already got a content-encoding. if response.has_header('Content-Encoding'): return response patch_vary_headers(response, ('Accept-Encoding',)) ae = request.META.get('HTTP_ACCEPT_ENCODING', '') if not re_accepts_gzip.search(ae): return response if response.streaming: # Delete the `Content-Length` header for streaming content, because