def is_cookie_value(s): '''cookie-value = *cookie-octet / ( DQUOTE *cookie-octet DQUOTE ) ''' regex_str = araq.CONST_STR['ckocts'] prefix = "^[" suffix = "]*$" regex1 = araq._creat_regex(regex_str, prefix=prefix, suffix=suffix) m1 = regex1.search(s) regex_str = araq.CONST_STR['ckocts'] prefix = "^\"[" suffix = "]*\"$" regex2 = araq._creat_regex(regex_str, prefix=prefix, suffix=suffix) m2 = regex2.search(s) rslt = (bool(araq._real_dollar(s, m1))) | (bool(araq._real_dollar(s, m2))) return (rslt)
def is_hms_time(s): '''hms-time = time-field ":" time-field ":" time-field''' regex_str = "^[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}$" regex = re.compile(regex_str) m = regex.search(s) rslt = (bool(araq._real_dollar(s, m))) return (rslt)
def is_date_token_list(s): '''date-token-list = date-token *( 1*delimiter date-token )''' dt = re.escape(araq.CONST_STR['ndels']) ads = re.escape(araq.CONST_STR['dels']) regex_str = "^[" + dt + "]+" + "([" + ads + "]+" + "[" + dt + "]+)*$" m = regex.search(s) return (bool(araq._real_dollar(s, m)))
def is_domain_value(s, **kwargs): ''' domain-value = <subdomain>; defined in [RFC1034], Section 3.5 <subdomain> ::= <label> | <subdomain> "." <label> <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ] <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str> <let-dig-hyp> ::= <let-dig> | "-" <let-dig> ::= <letter> | <digit> <letter> ::= any one of the 52 alphabetic characters A through Z in upper case and a through z in lower case <digit> ::= any one of the ten digits 0 through 9 enhanced by [RFC1123], Section 2.1 ''' if ('mode' in kwargs): mode = kwargs['mode'] else: mode = 'loose' if (mode == 'strict'): pass else: s = remove_domain_leading_dot(s) regex_label = re.compile("^[a-zA-Z](([0-9a-zA-Z\-])*[0-9a-zA-Z])*$") arr = s.split(".") rslt = True for i in range(0, arr.__len__()): m = regex_label.search(arr[i]) cond = bool(araq._real_dollar(arr[i], m)) if (cond): pass else: rslt = False break return (rslt)
def is_delimiter(c): '''delimiter = %x09 / %x20-2F / %x3B-40 / %x5B-60 / %x7B-7E''' regex_str = araq.CONST_STR['dels'] prefix = "^[" suffix = "]$" regex = araq._creat_regex(regex_str, prefix=prefix, suffix=suffix) m = regex.search(c) return (bool(araq._real_dollar(c, m)))
def is_non_digit(c): '''non-digit = %x00-2F / %x3A-FF''' nds = araq.CONST_STR['ndigits'] regex_str = nds prefix = "^[" suffix = "]$" regex = araq._creat_regex(regex_str, prefix=prefix, suffix=suffix) m = regex.search(c) return (bool(araq._real_dollar(c, m)))
def is_extension_av(s): '''<any CHAR except CTLs or ";">''' regex_ctls_str = araq.CONST_STR['ctls'] regex_str = regex_ctls_str + ";" prefix = "^[^" suffix = "]+$" regex = araq._creat_regex(regex_str, prefix=prefix, suffix=suffix) m = regex.search(s) return (bool(araq._real_dollar(s, m)))
def is_date_token(s): '''date-token = 1*non-delimiter''' dt = araq.CONST_STR['ndels'] regex_str = dt prefix = "^[" suffix = "]+$" regex = araq._creat_regex(regex_str, prefix=prefix, suffix=suffix) m = regex.search(s) return (bool(araq._real_dollar(s, m)))
def is_non_delimiter(c): '''non-delimiter = %x00-08 / %x0A-1F / DIGIT / ":" / ALPHA / %x7F-FF''' ndels = araq.CONST_STR['ndels'] regex_str = ndels prefix = "^[" suffix = "]$" regex = araq._creat_regex(regex_str, prefix=prefix, suffix=suffix) m = regex.search(c) return (bool(araq._real_dollar(c, m)))
def is_max_age_value(s): ''' "Max-Age=" non-zero-digit *DIGIT In practice, both expires-av and max-age-av are limited to dates representable by the user agent. ''' regex = re.compile("^[1-9][0-9]*$") m = regex.search(s) return (bool(araq._real_dollar(s, m)))
def is_token(s): '''1*<any CHAR except CTLs or separators> ''' regex_ctls_str = araq.CONST_STR['ctls'] regex_separators_str = araq.CONST_STR['sps'] regex_str = regex_ctls_str + regex_separators_str prefix = "^[^" suffix = "]+$" regex = araq._creat_regex(regex_str, prefix=prefix, suffix=suffix) m = regex.search(s) return (bool(araq._real_dollar(s, m)))
def is_cookie_octet(c): ''' %x21 / %x23-2B / %x2D-3A / %x3C-5B / %x5D-7E; US-ASCII characters excluding CTLs, whitespace DQUOTE, comma, semicolon, and backslash ''' regex_str = araq.CONST_STR['ckocts'] prefix = "^[" suffix = "]$" regex = araq._creat_regex(regex_str, prefix=prefix, suffix=suffix) m = regex.search(c) return (bool(araq._real_dollar(s, m)))
def is_time(s): ''' time = hms-time ( non-digit *OCTET ) cant understand why ( non-digit *OCTET ) ''' regex_str = "^([0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2})" #cant understand why ( non-digit *OCTET ) nds = "[" + re.escape(araq.CONST_STR['ndigits']) + "]{0,1}" prefix = "[" octs = re.escape(araq.CONST_STR['octs']) suffix = "]*$" regex_str = regex_str + nds + prefix + octs + suffix regex = re.compile(regex_str) m = regex.search(s) rslt = (bool(araq._real_dollar(s, m))) return (rslt)
def is_maxage_av(s): ''' the attribute-name case-insensitively matches the string "Max-Age" "Max-Age=" non-zero-digit *DIGIT In practice, both expires-av and max-age-av are limited to dates representable by the user agent. ''' prefix = s[:8] if (prefix == "max-age="): nums = s[9:] regex = re.compile("^[1-9][0-9]*$") m = regex.search(nums) return (bool(araq._real_dollar(nums, m))) else: return (False)
def is_day_of_month(s, **kwargs): ''' day-of-month = 1*2DIGIT ( non-digit *OCTET ) cant understand why ( non-digit *OCTET ) ''' # regex_str = "^[0-9]{1,2}" # ndts = re.escape(araq.CONST_STR['ndigits']) # prefix = "[" # octs = re.escape(araq.CONST_STR['octs']) # suffix = "]*$" #cant understand why ( non-digit *OCTET ) # regex_str = regex_str + "[" + ndts + "]{0,1}" + prefix + octs + suffix regex_str = "^[0-9]{1,2}$" regex = re.compile(regex_str) m = regex.search(s) rslt = (bool(araq._real_dollar(s, m))) return (rslt)
def is_year(s): ''' year = 2*4DIGIT ( non-digit *OCTET ) cant understand why ( non-digit *OCTET ) ''' # regex_str = "^([0-9]{2,4})" #cant understand why ( non-digit *OCTET ) # nds = "[" + re.escape(araq.CONST_STR['ndigits']) + "]{0,1}" # prefix = "[" # octs = re.escape(araq.CONST_STR['octs']) # suffix = "]*$" # regex_str = regex_str + nds + prefix + octs + suffix #avoid conflict with day-of-month ,only support 4numbers year regex_str = "^([0-9]{4})$" regex = re.compile(regex_str) m = regex.search(s) rslt = (bool(araq._real_dollar(s, m))) return (rslt)
def is_month(s, **kwargs): '''( "jan" / "feb" / "mar" / "apr" /"may" / "jun" / "jul" / "aug" /"sep" / "oct" / "nov" / "dec" ) *OCTET mode = 'loose' will case-insensitively by default loose ''' if ('mode' in kwargs): mode = kwargs['mode'] else: mode = 'loose' if (mode == 'loose'): s = str.lower(s) else: pass regex_str = "^(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)" prefix = "[" octs = re.escape(araq.CONST_STR['octs']) suffix = "]*$" regex_str = regex_str + prefix + octs + suffix regex = re.compile(regex_str) m = regex.search(s) rslt = (bool(araq._real_dollar(s, m))) return (rslt)
def detect_time_fmt(date_value, **kwargs): ''' ####################HTTP-date############### # HTTP-date = rfc1123-date | rfc850-date | asctime-date # rfc1123-date = wkday "," SP date1 SP time SP "GMT" # rfc850-date = weekday "," SP date2 SP time SP "GMT" # asctime-date = wkday SP date3 SP time SP 4DIGIT # date1 = 2DIGIT SP month SP 4DIGIT # ; day month year (e.g., 02 Jun 1982) # date2 = 2DIGIT "-" month "-" 2DIGIT # ; day-month-year (e.g., 02-Jun-82) # date3 = month SP ( 2DIGIT | ( SP 1DIGIT )) # ; month day (e.g., Jun 2) # time = 2DIGIT ":" 2DIGIT ":" 2DIGIT # ; 00:00:00 - 23:59:59 # wkday = "Mon" | "Tue" | "Wed" # | "Thu" | "Fri" | "Sat" | "Sun" # weekday = "Monday" | "Tuesday" | "Wednesday" # | "Thursday" | "Friday" | "Saturday" | "Sunday" ''' if ('mode' in kwargs): mode = kwargs['mode'] else: mode = "strict" month = 'Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec' weekday = 'Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday' wkday = 'Mon|Tue|Wed|Thu|Fri|Sat|Sun' #### #Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format rfc1123 = ''.join(("(", wkday, ")", ", ", "[0-9]{2} ", "(", month, ")", " [0-9]{4} ", "[0-9]{2}:[0-9]{2}:[0-9]{2} ", "GMT")) rfc1123 = "^" + rfc1123 + "$" regex_rfc1123 = re.compile(rfc1123) #### # 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday) rfc1123_nowkday = ''.join(("[0-9]{2} ", "(", month, ")", " [0-9]{4} ", "[0-9]{2}:[0-9]{2}:[0-9]{2} ", "GMT")) rfc1123_nowkday = "^" + rfc1123_nowkday + "$" regex_rfc1123_nowkday = re.compile(rfc1123_nowkday) #### rfc1123_tzoffset = ''.join( ("(", wkday, ")", ", ", "[0-9]{2} ", "(", month, ")", " [0-9]{4} ", "[0-9]{2}:[0-9]{2}:[0-9]{2} ", "[\+\-][0-9]{4}")) rfc1123_tzoffset = "^" + rfc1123_tzoffset + "$" regex_rfc1123_tzoffset = re.compile(rfc1123_tzoffset) #### rfc1123_notz = ''.join(("(", wkday, ")", ", ", "[0-9]{2} ", "(", month, ")", " [0-9]{4} ", "[0-9]{2}:[0-9]{2}:[0-9]{2}")) rfc1123_notz = "^" + rfc1123_notz + "$" regex_rfc1123_notz = re.compile(rfc1123_notz) #### rfc1123_hypen = ''.join( ("(", wkday, ")", ", ", "[0-9]{2}-", "(", month, ")", "-[0-9]{4} ", "[0-9]{2}:[0-9]{2}:[0-9]{2} ", "GMT")) rfc1123_hypen = "^" + rfc1123_hypen + "$" regex_rfc1123_hypen = re.compile(rfc1123_hypen) #### #Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format rfc850 = ''.join(("(", weekday, ")", ", ", "[0-9]{2}-", "(", month, ")", "-[0-9]{2} ", "[0-9]{2}:[0-9]{2}:[0-9]{2} ", "GMT")) rfc850 = "^" + rfc850 + "$" regex_rfc850 = re.compile(rfc850) #### #Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format rfc850_broken = ''.join( ("(", weekday, ")", ", ", "[0-9]{2}-", "(", month, ")", "-[0-9]{4} ", "[0-9]{2}:[0-9]{2}:[0-9]{2} ", "GMT")) rfc850_broken = "^" + rfc850_broken + "$" regex_rfc850_broken = re.compile(rfc850_broken) #### #08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP no weekday rfc850_broken_nowkday = ''.join( ("[0-9]{2}-", "(", month, ")", "-[0-9]{4} ", "[0-9]{2}:[0-9]{2}:[0-9]{2} ", "GMT")) rfc850_broken_nowkday = "^" + rfc850_broken_nowkday + "$" regex_rfc850_broken_nowkday = re.compile(rfc850_broken_nowkday) #### rfc850_a = ''.join(("(", wkday, ")", ", ", "[0-9]{2}-", "(", month, ")", "-[0-9]{2} ", "[0-9]{2}:[0-9]{2}:[0-9]{2} ", "GMT")) rfc850_a = "^" + rfc850_a + "$" regex_rfc850_a = re.compile(rfc850_a) #### #08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday) rfc850_nowkday = ''.join(("[0-9]{2}-", "(", month, ")", "-[0-9]{2} ", "[0-9]{2}:[0-9]{2}:[0-9]{2} ", "GMT")) rfc850_nowkday = "^" + rfc850_nowkday + "$" regex_rfc850_nowkday = re.compile(rfc850_nowkday) #### asctime = ''.join( ("(", wkday, ")", " ", "(", month, ")", "(( [0-9]{2})|( [0-9]{1}))", " ", "[0-9]{2}:[0-9]{2}:[0-9]{2} ", "[0-9]{4}")) asctime = "^" + asctime + "$" regex_asctime = re.compile(asctime) #### #1994-02-03 14:15:29 -0100 -- ISO 8601 format iso8601 = ''.join(("[0-9]{4}", "\-", "[0-9]{2}", "\-", "[0-9]{2} ", "[0-9]{2}:[0-9]{2}:[0-9]{2} ", "[\+\-][0-9]{4}")) iso8601 = "^" + iso8601 + "$" regex_iso8601 = re.compile(iso8601) #### if (mode == 'strict'): if (araq._real_dollar(date_value, regex_rfc1123)): return ('rfc1123') elif (araq._real_dollar(date_value, regex_rfc1123_notz)): return ('rfc1123_notz') elif (araq._real_dollar(date_value, regex_rfc1123_nowkday)): return ('rfc1123_nowkday') elif (araq._real_dollar(date_value, regex_rfc1123_tzoffset)): return ('rfc1123_tzoffset') elif (araq._real_dollar(date_value, regex_rfc1123_hypen)): return ('rfc1123_hypen') elif (araq._real_dollar(date_value, regex_rfc850)): return ('rfc850') elif (araq._real_dollar(date_value, regex_rfc850_a)): return ('rfc850_a') elif (araq._real_dollar(date_value, regex_rfc850_broken)): return ('rfc850_broken') elif (araq._real_dollar(date_value, regex_rfc850_broken_nowkday)): return ('rfc850_broken_nowkday') elif (araq._real_dollar(date_value, regex_rfc850_nowkday)): return ('rfc850_nowkday') elif (araq._real_dollar(date_value, regex_asctime)): return ('asctime') elif (araq._real_dollar(date_value, regex_iso8601)): return ('asctime') else: return (None) else: if (regex_rfc1123.search(date_value)): return ('rfc1123') elif (regex_rfc1123_tzoffset.search(date_value)): return ('rfc1123_tzoffset') elif (regex_rfc1123_notz.search(date_value)): return ('rfc1123_notz') elif (regex_rfc1123_nowkday.search(date_value)): return ('rfc1123_nowkday') elif (regex_rfc1123_hypen.search(date_value)): return ('rfc1123_hypen') elif (regex_rfc850.search(date_value)): return ('rfc850') elif (regex_rfc850_a.search(date_value)): return ('rfc850_a') elif (regex_rfc850_broken.search(date_value)): return ('rfc850_broken') elif (regex_rfc850_broken_nowkday.search(date_value)): return ('rfc850_broken_nowkday') elif (regex_rfc850_nowkday.search(date_value)): return ('rfc850_nowkday') elif (regex_asctime.search(date_value)): return ('asctime') elif (regex_iso8601.search(date_value)): return ('asctime') else: return (None)
def is_time_field(s): '''time-field = 1*2DIGIT''' regex = re.compile("^[0-9]{1,2}$") m = regex.search(s) return (bool(araq._real_dollar(s, m)))