def urls(start, target): if validate_url(start) and validate_url( target) and 'wikipedia' in start and 'wikipedia' in target: start_url = parse.urlparse(start).path[1:] target_url = parse.urlparse(target).path[1:] return start_url, target_url else: print('invalide urls') exit()
def post(self): self.check_xsrf_cookie() upstream_server = self.get_argument("upstream_server", "") password = self.get_argument("password", "") http_username = self.get_argument("http_username", "") http_password = self.get_argument("http_password", "") if upstream_server and not validate_url(upstream_server): self.set_flash_message("error", "Url is not valid.") self.redirect("/__manage") else: self.api_data.upstream_server = upstream_server self.api_data.http_username = http_username if password: password_hash = yield self.application.pool.submit( bcrypt.hashpw, password, bcrypt.gensalt()) self.api_data.password = password_hash if not http_username: self.api_data.http_password = "" elif http_password: http_password_hash = yield self.application.pool.submit( crypt, http_password, gencryptsalt()) self.api_data.http_password = http_password_hash self.api_data.save() self.set_flash_message("success", "Settings has been successfully saved.") self.redirect("/__manage/settings")
def validate(url: str) -> bool: """ Performs a check if the string provided is url or not. """ validation: Union[bool, ValidationFailure] = validate_url(url) if type(validation) is bool: return validation return False
def is_url(url): # regex could be unprecise #regex = re.compile( # r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))") if validate_url(url): return True return False
def index_view(request): if request.method == 'POST': long_url = request.POST.get('original_url', '') # Для избежания дублирования в БД используется get_or_create if not validate_url(long_url): return HttpResponse("Not valid url is got", status=status.HTTP_400_BAD_REQUEST) url = UrlModel.objects.get_or_create(original_url=long_url) short_url = SITE_NAME + encode(url[0].id) return HttpResponse(short_url, status=status.HTTP_200_OK)
def get_aws_function(text): """ Reqests open API to get url to AWS function that produces audio :param text: text to speech """ try: response = get( "https://nextup.com/ivona/php/nextup-polly" "/CreateSpeech/CreateSpeechGet3.php", allow_redirects=True, params={ "voice": "Maxim", "language": "ru-RU", "text": text }) response.raise_for_status() validate_url(response.text) return response.text except Exception as e: raise ApiRequestError("API call error!", e)
def validate_project(project, is_edit=False): """ Validates important bits of a project database entry :param project: The json from the request, containing the data for a project. :param is_edit: If we are updating the database, we need to know, so we don't check for dupes on urls. :return: A response with the error or None if the project is valid. """ # Validate length if not len(project['name']) and \ not len(project['blogurl']) and \ not len(project['output']): return make_response(jsonify(status='too-short'), 400) # Validate project name if not re.match('^\\w+$', project['name']): return make_response(jsonify(status='invalid-project-name'), 400) # Check if project name already exists name_check = db.session.query( TProjects.name).filter(TProjects.name == project['name']).first() if name_check and not is_edit: return make_response(jsonify(status='project-exists'), 400) # Validate existing only if we are not editing url_exists = False if not is_edit: url_exists = db.session.query(TProjects.blogurl).filter( TProjects.blogurl == project['blogurl']).first() if not validate_url(project['blogurl']) or url_exists: return make_response(jsonify(status='invalid-blog-url'), 400) # Validate output path output = Path(project['output']).absolute() # The second check is needed, since javascript is passing an empty string instead of # null. For some reason, this makes SQLAlchemy accept the data and commit it to the db # without exception. This check prevents this issue from happening. if not output.exists() or len(project['output'].strip()) == 0: return make_response(jsonify(status='invalid-path-output'), 400) # Validate cache path, if caching is enabled if project['gravatar_cache']: cache = Path(project['gravatar_cache_dir']).absolute() if not cache.exists() or len( project['gravatar_cache_dir'].strip()) == 0: return make_response(jsonify(status='invalid-path-cache'), 400) return None
def find_extern_links(content, domain, suffix, url): found = False number_extern_links = 0 try: soup = bs4.BeautifulSoup(content.lower(), 'lxml') except Exception as e: log(action_logging_enum=ERROR, logging_text="Couldn't load content of website for external content check.") for a_tag in soup.findAll("a"): href = a_tag.attrs.get("href") if href == "" or href is None: # href empty tag continue try: if not validate_url(href): if validate_url(urljoin(url, href)): href = urljoin(url, href) else: continue components = get_url_components(href) extracted_domain = components[3] extracted_suffix = components[4] if extracted_domain.__ne__(domain) or extracted_suffix.__ne__(suffix): number_extern_links += 1 found = True except Exception as e: continue return found, number_extern_links
def parseLinks(self, url): # Appends to CRAWLQUEUE the valid links on the page that URL links to. try: links = BeautifulSoup(get(url).text, 'lxml').findAll("a", href=True) for l in links: if l['href'].split('/')[0] == '': # Prepends host to path to make internal links valid. link = ''.join([url, l['href']]) else: link = l['href'] link = link.rsplit('?', 1)[0] # Removes query strings from URL. if validate_url( link ) and link not in self.crawlQueue and link not in self.visited: self.crawlQueue.append(link) except: pass
def add_link(url, url_type, login, custom_short_url=None): ''' adds link in database''' if url_type not in url_types: url_type = 'public' if url[:4] != 'http': url = 'http://' + url if not validate_url(url): return jsonify({"msg": "Bad url"}), 400 try: last_id = db.request_insert_three('Urls', 'url, url_type, user_id', url, url_type, login) hashed = getHash(last_id[0][0]) db.request_update('Urls', 'short_url', hashed, 'id', last_id[0][0]) if custom_short_url is not None: db.request_update('Urls', 'custom_short_url', custom_short_url, 'id', last_id[0][0]) except sqlite3.IntegrityError: custom_short_url = None # Переделать под возврат кор. ссылки в джcоне jsonify return jsonify(short_url=hashed, custom_short_url=custom_short_url), 200
def do_content_search(content): soup = bs4.BeautifulSoup(content.lower(), 'html.parser') if soup.find('a', href=True): for link in soup.find_all('a', href=True): if any(link.text.__eq__(token.lower()) for token in login_token_list): login_url = str(link['href']).strip() if login_url.startswith('/') and url.endswith('/'): login_url = url + login_url.replace('/', '', 1) if login_url.startswith('./'): if url.endswith('/'): login_url = url + login_url.replace('./', '', 1) else: login_url = url + login_url.replace('.', '', 1) if validate_url(login_url): log(action_logging_enum=INFO, logging_text="Login page found by content analysis.") return login_url, True return url, False
def post(self): self.check_xsrf_cookie() upstream_server = self.get_argument("upstream_server", "") password = self.get_argument("password", "") http_username = self.get_argument("http_username", "") http_password = self.get_argument("http_password", "") if upstream_server and not validate_url(upstream_server): self.set_flash_message( "error", "Url is not valid.") self.redirect("/__manage") else: self.api_data.upstream_server = upstream_server self.api_data.http_username = http_username if password: password_hash = yield self.application.pool.submit( bcrypt.hashpw, password, bcrypt.gensalt()) self.api_data.password = password_hash if not http_username: self.api_data.http_password = "" elif http_password: http_password_hash = yield self.application.pool.submit( crypt, http_password, gencryptsalt()) self.api_data.http_password = http_password_hash self.api_data.save() self.set_flash_message( "success", "Settings has been successfully saved.") self.redirect("/__manage/settings")
def do_redirect(): url = request.args.get('url', '') if not validate_url(url): return render_template('invalid-url.html', url=url) return redirect(url)
def test_validate_url_nok_malformed_http(): assert validators.validate_url('http/:/asdf.com/') is False
def test_validate_url_nok_dot_trailing_slash(): assert validators.validate_url('http://../') is False
def extract_features_from_website(url, label, predict): """ extract all features from website, if predict set to true a pandas dataframe is created """ try: global brand_list global phishy_list global login_list global tld_list # save original url for object instance url_orig = url # get different components of url components = get_url_components(url) fqdn = components[0] scheme = components[1] subdomain = components[2] domain = components[3] suffix = components[4] port = components[5] path = components[6] query = components[7] fragment = components[8] netloc = fqdn url_no_prot = url if scheme: netloc = scheme + "://" + fqdn if port: netloc = netloc + ":" + port url_no_prot = url.replace(scheme + "://", "", 1) # check for redirects of url resp_url, num_redirects, protocol, content = get_redirects(url) # try again if no connection could have been established if content == -1: time.sleep(3) resp_url, num_redirects, protocol, content = get_redirects(url) if content == -1: return None # get content for homepage hp_url, hp_num_redirects, hp_protocol, hp_content = get_redirects( "{}://www.{}.{}".format(scheme, domain, suffix)) if hp_content == -1: time.sleep(3) hp_url, hp_num_redirects, hp_protocol, hp_content = get_redirects( "{}://www.{}.{}".format(scheme, domain, suffix)) # read content in parser if not hp_content == -1: hp_soup = bs4.BeautifulSoup(hp_content.lower(), 'html.parser') soup = bs4.BeautifulSoup(content.lower(), 'html.parser') url = resp_url # number of redirects done by website if num_redirects > 0: bool_redirect_website = True else: bool_redirect_website = False # website has favicon/ check if website has favicon bool_favicon_website = False try: icon = favicon.get(url, timeout=3) bool_favicon_website = True except Exception as e: bool_favicon_website = False # website has links pointing to extern content bool_content_extern_website = False # number of links pointing to extern content int_links_extern_website = 0 bool_content_extern_website, int_links_extern_website = find_extern_links( content.lower(), domain, suffix, url) # check for custom status bar bool_custom_statusbar_website = bool( str(content).lower().replace(" ", "").__contains__("window.status=")) # custom right click bool_disable_rightclick_website = False if str(content).replace( " ", "").lower().__contains__("document.oncontextmenu="): bool_disable_rightclick_website = True res = soup.findAll("body") if res: for element in res: try: right_click_arg = element['oncontextmenu'] if str(right_click_arg) == "return false": bool_disable_right_click = True except Exception as e: continue # has pop up window bool_popup_website = False hidden_count = 0 res = soup.findAll("div") if res: for tag in res: try: arg = tag['class'] if "popup" in arg: bool_popup_website = True except Exception as e: pass try: arg = tag['style'] arg = str(arg).replace(" ", "") if arg.__contains__("display:none") or arg.__contains__( "visibility:hidden"): hidden_count += 1 except Exception as e: continue # has iframe bool_iframe_website = False res = soup.findAll("iframe") if res: bool_iframe_website = True # has action tag > custom 2. feature - is action extern? bool_action_website = False bool_action_extern_website = False # has bool form post bool_form_post_website = False res = soup.findAll("form") if res: for element in res: try: if element["action"]: bool_action_website = True action_url = element["action"] if validate_url(action_url) or validate_url( urljoin(netloc, action_url)): if validate_url(urljoin(netloc, action_url)): action_url = urljoin(netloc, action_url) extracted_action_url = get_url_components( action_url) domain_action_url = extracted_action_url[3] suffix_action_url = extracted_action_url[4] if not suffix == suffix_action_url or not domain == domain_action_url: bool_action_extern_website = True break if element["method"] == "post": bool_form_post_website = True except Exception as e: continue # has phishy tokens in visible content int_phishy_tokens_website = 0 for text in soup.stripped_strings: int_phishy_tokens_website += sum(1 for word in phishy_list if text.__contains__(word)) # has input tag bool_input_website = False if get_element_count("input", soup) > 0: bool_input_website = True # find meta description res = soup.find('meta', attrs={'name': 'og:description'}) if not res: res = soup.find('meta', attrs={'property': 'description'}) if not res: res = soup.find('meta', attrs={'name': 'description'}) if not hp_content == -1: hp_res = hp_soup.find('meta', attrs={'name': 'og:description'}) if not hp_res: hp_res = hp_soup.find('meta', attrs={'property': 'description'}) if not hp_res: hp_res = hp_soup.find('meta', attrs={'name': 'description'}) float_description_sim_website = 0 if hp_content == -1: float_description_sim_website = -1 if not hp_content == -1: if res and hp_res: try: hp_desc = hp_res['content'] desc = res['content'] # compute similarity of description from home and login page float_description_sim_website = string_similarity( desc, hp_desc) except Exception: pass # bond status login and homepage bool_bond_status_website = False # most frequent domain ist extern > tru/false bool_freq_domain_extern_website = False res = soup.findAll("a") domain_list = [] link_list = [] href_count = 0 redirect_object_list = [] if res: for a_tag in res: try: href = a_tag.attrs.get("href") href_count += 1 if validate_url(href) or validate_url(urljoin( netloc, href)): if validate_url(urljoin(netloc, href)): href = urljoin(netloc, href) if href == hp_url: bool_bond_status_website = True components_href = get_url_components(href) domain_href = components_href[3] suffix_href = components_href[4] if is_IP(domain): continue link_list.append(href) domain_list.append("{},{}".format( domain_href, suffix_href)) except Exception as e: continue link_list = list(set(link_list)) link_list = link_list[:10] if not hp_content == -1: try: redirect_object_list = get_redirects_list(link_list) except Exception as e: log(action_logging_enum=ERROR, logging_text=str(e)) if redirect_object_list: for redirect_object in redirect_object_list: if not bool_bond_status_website and not hp_content == -1 and redirect_object_list: try: website_sim = html_similarity.similarity( str(hp_content).lower(), str(redirect_object.content).lower(), k=0.3) if website_sim == 1: bool_bond_status_website = True except Exception: continue if domain_list: occure_count = Counter(domain_list) most_freq = occure_count.most_common(1)[0][0] most_frq_domain, most_freq_suffix = most_freq.split(",", 1) if not str(most_frq_domain) == domain or not str( suffix) == most_freq_suffix: bool_freq_domain_extern_website = True # jaccard similarity between homepage and login page float_login_home_website = 0 if not hp_content == -1: try: float_login_home_website = html_similarity.similarity( str(content).lower(), str(hp_content).lower(), k=0.3) except Exception: pass # website has copyright bool_copyright_website = False # similarity from copyright of login page and home page copy = "" hp_copy = "" if not hp_content == -1: float_copyright_sim_website = 0 for text in soup.stripped_strings: if '©' in text: copy = re.sub(r'\s+', ' ', text) bool_copyright_website = True for text in hp_soup.stripped_strings: if '©' in text: hp_copy = re.sub(r'\s+', ' ', text) if copy and hp_copy: float_copyright_sim_website = string_similarity(copy, hp_copy) else: float_copyright_sim_website = 0 # similarity from title of login page and home page float_title_sim_website = 0 if not hp_content == -1: try: title = soup.title.text hp_title = hp_soup.title.text float_title_sim_website = string_similarity(title, hp_title) except Exception: float_title_sim_website = 0 pass # unique links/all links on page float_unique_links_website = 0 if link_list: float_unique_links_website = len(list( set(link_list))) / len(link_list) # lexical analysis for all links on website bool_link_analysis_website = True # dataframe = pd.DataFrame() # try: # redirect_object = RedirectEntry(url=url, redirects=num_redirects, content=content, protocol=protocol) # dataframe = pd.DataFrame(extract_features_from_URL(redirect_object, "Predict", brand_list=brand_list, # tld_list=tld_list, phishy_list=phishy_list, predict=True)) # except Exception as e: # pass # if not dataframe.empty: # try: # df = pd.DataFrame(dataframe.iloc[0]).transpose() # prediction = predict_url(df) # if int(prediction) == 0: # bool_link_analysis_website = False # except Exception: # pass # number of input elements int_input_website = 0 # find form accompanied by labels with loginwords bool_input_login_website = False form = soup.find("form") try: if form: inputs = form.find_all("input") if inputs: int_input_website = len(inputs) for inp in inputs: try: if inp["type"] == "hidden": hidden_count += 1 except Exception: continue label_tags = form.findAll("label") if label_tags: for label_entry in label_tags: if any( str(label_entry.text).__contains__(word) for word in login_list): bool_input_login_website = True except Exception: pass # website has button bool_button_website = False button_count = get_element_count("button", soup) if button_count > 0: bool_button_website = True # website has meta information bool_meta_website = False if soup.find("meta"): bool_meta_website = True # has hidden elements bool_hidden_element_website = False if hidden_count > 0: bool_hidden_element_website = True # number of option tags int_option_website = get_element_count("option", soup) int_option_website = get_element_count("option", soup) # number select tags int_select_website = get_element_count("select", soup) # number th tags int_th_website = get_element_count("th", soup) # number of tr tags int_tr_website = get_element_count("tr", soup) # number of table tags int_table_website = get_element_count("table", soup) # number of href in a tag int_href_website = href_count # number of list item tags int_li_website = get_element_count("li", soup) # number of unordered list tags int_ul_website = get_element_count("ul", soup) # number of ordered list tags int_ol_website = get_element_count("ol", soup) # number of div tags int_div_website = get_element_count("div", soup) # number of span tags int_span_website = get_element_count("span", soup) # number of article tags int_article_website = get_element_count("article", soup) # number of p tags int_p_website = get_element_count("p", soup) # number of checkbox tags int_checkbox_website = get_element_count("input", soup, "type", "checkbox") # number of buttons int_button_website = button_count # number of images int_image_website = get_element_count("img", soup) if predict == False: entry = FeatureEntryContent( bool_redirect_website=bool_redirect_website, bool_favicon_website=bool_favicon_website, bool_content_extern_website=bool_content_extern_website, int_links_extern_website=int_links_extern_website, bool_custom_statusbar_website=bool_custom_statusbar_website, bool_disable_rightclick_website=bool_disable_rightclick_website, bool_popup_website=bool_popup_website, bool_iframe_website=bool_iframe_website, bool_action_website=bool_action_website, bool_action_extern_website=bool_action_extern_website, bool_form_post_website=bool_form_post_website, int_phishy_tokens_website=int_phishy_tokens_website, bool_input_website=bool_input_website, float_description_sim_website=float_description_sim_website, bool_bond_status_website=bool_bond_status_website, bool_freq_domain_extern_website=bool_freq_domain_extern_website, float_login_home_website=float_login_home_website, bool_copyright_website=bool_copyright_website, float_copyright_sim_website=float_copyright_sim_website, float_title_sim_website=float_title_sim_website, float_unique_links_website=float_unique_links_website, # bool_link_analysis_website=bool_link_analysis_website, int_input_website=int_input_website, bool_input_login_website=bool_input_login_website, bool_button_website=bool_button_website, bool_meta_website=bool_meta_website, bool_hidden_element_website=bool_hidden_element_website, int_option_website=int_option_website, int_select_website=int_select_website, int_th_website=int_th_website, int_tr_website=int_tr_website, int_table_website=int_table_website, int_href_website=int_href_website, int_li_website=int_li_website, int_ul_website=int_ul_website, int_ol_website=int_ol_website, int_div_website=int_div_website, int_span_website=int_span_website, int_article_website=int_article_website, int_p_website=int_p_website, int_checkbox_website=int_checkbox_website, int_button_website=int_button_website, int_image_website=int_image_website, label=label, url=url_orig, final_url=url) log(action_logging_enum=INFO, logging_text="Processed datapoint. {}".format(url)) return entry elif predict: data = { "ID": [0], "Has Redirect": [bool_redirect_website], "Has Favicon": [bool_favicon_website], "Has Extern Content": [bool_content_extern_website], "Number Extern Links": [int_links_extern_website], "Has Custom StatusBar": [bool_custom_statusbar_website], "Has Disabled RightClick": [bool_disable_rightclick_website], "Has PopUp": [bool_popup_website], "Has iFrame": [bool_iframe_website], "Has Action": [bool_action_website], "Has Extern Action": [bool_action_extern_website], "Has Form with POST": [bool_form_post_website], "Number PhishyTokens": [int_phishy_tokens_website], "Has Input": [bool_input_website], "Ratio Description Sim": [float_description_sim_website], "Has Bond Status": [bool_bond_status_website], "Has Freq Domain Extern": [bool_freq_domain_extern_website], "Ratio Similarity": [float_login_home_website], "Has Copyright": [bool_copyright_website], "Ratio Copyright Sim": [float_copyright_sim_website], "Ratio Title Sim": [float_title_sim_website], "Ratio Unique Links": [float_unique_links_website], "Number Inputs": [int_input_website], "Has Input for Login": [bool_input_login_website], "Has Button": [bool_button_website], "Has Meta": [bool_meta_website], "Has Hidden Element": [bool_hidden_element_website], "Number Option": [int_option_website], "Number Select": [int_select_website], "Number TH": [int_th_website], "Number TR": [int_tr_website], "Number Table": [int_table_website], "Number HREF": [int_href_website], "Number LI": [int_li_website], "Number UL": [int_ul_website], "Number OL": [int_ol_website], "Number DIV": [int_div_website], "Number Span": [int_span_website], "Number Article": [int_article_website], "Number Paragr": [int_p_website], "Number Checkbox": [int_checkbox_website], "Number Button": [int_checkbox_website], "Number Image": [int_image_website], "Label": [label], "URL": [url_orig], "Final URL": [url] } columns = list(CONTENT_FEATURE_LIST_COLUMN_NAMES) df = pd.DataFrame(data, columns=columns) return df except Exception as e: log(action_logging_enum=WARNING, logging_text=str(e)) log(action_logging_enum=WARNING, logging_text=str(e.__traceback__)) exc_type, exc_obj, tb = sys.exc_info() f = tb.tb_frame lineno = tb.tb_lineno filename = f.f_code.co_filename linecache.checkcache(filename) line = linecache.getline(filename, lineno, f.f_globals) log( ERROR, 'EXCEPTION IN ({}, LINE {} "{}"): {}'.format( filename, lineno, line.strip(), exc_obj)) log(action_logging_enum=WARNING, logging_text="Could not extract content features for {}".format( url)) log(action_logging_enum=INFO, logging_text="Failed datapoint. {}".format(url)) return None
def test_validate_url_nok_double_question(): assert validators.validate_url('http://??') is False
def test_validate_url_nok_space(): assert validators.validate_url( 'http://foo.bar?q=Spaces should be encoded') is False
url = argv.pop(i + 1) except: pass if arg == "-l": try: limit = int(argv.pop(i + 1)) except ValueError: print( "Error: the LIMIT amount you provided wasn't an integer.") exit() if arg == "-o": try: out_file = argv.pop(i + 1) except: pass # If URL is valid, begin crawling. if url: if validate_url(url): WebCrawler(url, limit, out_file) else: print( "Error: the URL you provided is not valid (make sure you include 'http://' or 'https://')." ) else: print( "Error: you must provide a valid URL (string) following the '-u' flag." )
def test_validate_url_nok_double_slash_a(): assert validators.validate_url('//a') is False
def test_validate_url_nok_malformed_https(): assert validators.validate_url('https/:/fdsa.com') is False
def test_validate_url_nok_rdar(): assert validators.validate_url('rdar://1234') is False
def test_validate_url_nok_domain_only(): assert validators.validate_url('foo.com') is False
def test_validate_url_nok_proto_triple_slash_a(): assert validators.validate_url('http:///a') is False
def test_validate_url_nok_triple_slash(): assert validators.validate_url('///') is False
def get_files(self, arguments, key): list_target_paths = list() list_final_target_paths = list() str_kustomization_path = str() try: # test if the key to configure is even defined in input helmizer config list_kustomization_children = self.helmizer_config["kustomize"][ key].get(list) str_kustomization_directory = str() try: str_kustomization_directory = self.helmizer_config["helmizer"][ "kustomization-directory"].get(str) except NotFoundError: str_kustomization_directory = "." str_kustomization_path = path.dirname( path.abspath( path.normpath( path.join( arguments.helmizer_config, str_kustomization_directory, )))) if len(list_kustomization_children) > 0: for target_path in list_kustomization_children: str_child_path = path.abspath( path.join(str_kustomization_path, target_path)) # walk directory if path.isdir(str_child_path): for (dirpath, _, filenames) in walk(str_child_path): for filename in filenames: list_target_paths.append( path.join(dirpath, filename)) # file elif path.isfile(str_child_path): list_target_paths.append(str_child_path) # url elif validate_url(str_child_path): list_target_paths.append(str_child_path) # convert absolute paths into paths relative to the kustomization directory for final_target_path in list_target_paths: list_final_target_paths.append( path.relpath(final_target_path, str_kustomization_path)) # remove any ignored files try: for ignore in self.helmizer_config["helmizer"][ "ignore"].get(list): logging.debug( f"Removing ignored file from final list: {ignore}") list_final_target_paths.remove(ignore) except ValueError: pass except NotFoundError: pass return list_final_target_paths except NotFoundError: logging.debug(f"key not found: {key}") pass except KeyError: logging.debug(f"key not found: {key}") pass except TypeError: pass
def get_files(self: object, arguments: object, key: str) -> list: target_paths: list = list() final_target_paths: list = list() kustomization_path: str = str() try: # test if the key to configure is even defined in input helmizer config kustomization_children: list = self.helmizer_config["kustomize"][ key].get(list) kustomization_directory: str = str() try: kustomization_directory: str = self.helmizer_config[ "helmizer"]["kustomization-directory"].get(str) except NotFoundError: kustomization_directory: str = "." kustomization_path = path.dirname( path.abspath( path.normpath( path.join( arguments.helmizer_config, kustomization_directory, )))) if len(kustomization_children) > 0: # each target path for target_path in kustomization_children: child_path: str = path.abspath( path.join(kustomization_path, target_path)) # walk directory if path.isdir(child_path): for (dirpath, _, filenames) in walk(child_path): for filename in filenames: target_paths.append( path.join(dirpath, filename)) # file elif path.isfile(child_path): target_paths.append(child_path) # url elif validate_url(child_path): target_paths.append(child_path) # remove any ignored files try: # walk directory to remove multiple files for ignore in self.helmizer_config["helmizer"][ "ignore"].get(list): ignore_abspath: str = path.abspath( path.join(kustomization_path, ignore)) if path.isdir(ignore_abspath): for (dirpath, _, filenames) in walk(ignore_abspath): for filename in filenames: file_path: str = path.join( dirpath, filename) logging.debug( f"Removing ignored file from final list: {file_path}" ) target_paths.remove(file_path) # remove a file else: logging.debug( f"Removing ignored file from final list: {path.join(kustomization_path, ignore)}" ) target_paths.remove( path.join(kustomization_path, ignore)) # just one file except ValueError: pass except NotFoundError: pass # convert absolute paths into paths relative to the kustomization directory for final_target_path in target_paths: final_target_paths.append( path.relpath(final_target_path, kustomization_path)) return final_target_paths except NotFoundError or KeyError or TypeError: logging.debug(f"key not found: {key}") return final_target_paths
def url(self, value): validate_url(value) self._url = value
def test_validate_url_nok_double_question_trailing_slash(): assert validators.validate_url('http://??/') is False
def test_validate_url_nok_h(): assert validators.validate_url('h://test') is False
def test_validate_url_nok_double_hash(): assert validators.validate_url('http://##') is False