def validate_git_url(url): """ Raises error if given repository url is not valid. """ try: url_validator(url, True) # basic check except ValidationFailure: raise BadRequest('Bad Request', 400, {'error': "The repository url '%s' is not valid." % url})
def dispatch_request(self) -> jsonify: url = request.values.get('url') if not url_validator(url): return jsonify({'error': 'Not a valid URL.'}), 400 # Bad request. status = 200 # OK. start = 0 while True: code = generate_code(url, start) obj = db.session.query(Url).filter_by(code=code).first() data = {'url': url, 'code': code} if not obj: obj = Url(**data) db.session.add(obj) db.session.commit() status = 201 # Created. if obj and obj.url != url: # Edge case scenario; # In order to avoid conflicts, increment starting index and try again. start += 1 continue break return jsonify(data), status
def get_product_url(self, item_soup_object): """This method creates an absolute product url. item_soup_object is a Beautiful Soup object related to 1 product returns str""" search_result = item_soup_object.find("a", class_="title-link")["href"] self.product_url = base_url + str(search_result) # self.product_url = "https://5karmanov.ru" + str(search_result) if url_validator(self.product_url): return self.product_url
def bucket_url(bucket, acceleration=False): """ Convert a S3 bucket name string in to a S3 bucket url. :param bucket: Bucket name :param acceleration: Use transfer acceleration if the endpoint is available :return: Bucket URL """ url = 'https://{bucket}.s3.amazonaws.com'.format(bucket=bucket) url_accel = 'https://{bucket}.s3-accelerate.amazonaws.com'.format( bucket=bucket) return url_accel if url_validator(url_accel) and acceleration else url
def search_sites_yandex(result_html): """Looking for urls with search results only. Create a BS object and looks for all <a> tags in HTML with attribute tabindex=2, selects only external links with https. @return: list of found urls """ soup_results = BeautifulSoup(result_html, "html.parser") list_of_finded_atags = soup_results.find_all("a", tabindex=2) list_urls = [] for a_tag in list_of_finded_atags: if url_validator(a_tag["href"]): list_urls.append(a_tag["href"]) return list_urls
def create_sector_page_url(sector_url, page_number): """This func creates a url for single page in each sector. https://5karmanov.ru/cat/aksessuary-muzhskie?&page=5 sector_url: str, url of a sector (presented in sectors_search_url in settings.py) page_number: int, serial number of page (received via get_total_num_pages) """ sector_page_url = sector_url + "?&page=" + str(page_number) if url_validator(sector_page_url): return sector_page_url else: logging.exception( "Ошибка формирования ссылки страницы галереи товаров секции")
def reconfigure(): """ This function will restart the configuration """ with open(filepath, "w+") as f: f.write("[CONFIGURATION]\n") while True: webhook = input("Enter the webhook: ") if url_validator(webhook): f.write("webhook = {}\n".format(webhook)) break else: print("Invalid URL")
def _get_formed_url(self, url): # Typical case for url forming. typical_case = self.endpoint + url if self.endpoint.endswith('/'): formed_url = typical_case if not url.startswith( '/') else self.endpoint[:-1] + url else: formed_url = typical_case if url.startswith( '/') else self.endpoint + '/' + url if url_validator(formed_url): return formed_url else: raise ValueError(f'Url "{formed_url}" is not valid!')
def _form_url(self, endpoint): # Typical case for url forming. typical_case = self.base_url + endpoint if self.base_url.endswith('/'): formed_url = typical_case if not endpoint.startswith( '/') else self.base_url[:-1] + endpoint else: formed_url = typical_case if endpoint.startswith( '/') else self.base_url + '/' + endpoint if url_validator(formed_url): return formed_url else: raise ValueError(f'Url "{formed_url}" is not valid!')
def recursive_search(url, total_url_qt): """While qt of urls found less than User asked for open every ling from step above. Create a BS object and looks for all <a> tags in HTML, selects only external links with https. Adds found urls to list @return: list of found urls """ search_result = url[:] print("Запускаю рекрусивный поиск") for i in url: while len(search_result) < total_url_qt: result_html = get_html(i) if result_html: soup_results = BeautifulSoup(result_html, "html.parser") list_of_found_urls = soup_results.find_all("a", href=True) for url in list_of_found_urls: if url_validator(url["href"]): search_result.append(url["href"]) return search_result
def convert(self, value: str, param, context) -> str: """The function which will perform validation or normalization Arguments: value (str): The URL Returns: str: The validated URL """ ## Make sure URL is a string url = f"{value}" ## Check if the URL is valid if not url_validator(url): self.fail(f'Could not validate "{value!r}" as a URL') ## Return the lower case domain return url
def is_valid_url(self, url=None): try: if not url: url = self._url # return True for all valid urls return url_validator(url) except ValidationFailure as e: app_logger.info("{} : Skipping since not a valid url".format(url)) return False except Exception as e: app_logger.error("{} : Skipping as {}".format(url, e)) return False
def search_sites_by_google(result_html): """Looking for urls with search results only. Create a BS object and looks for all <div> tags in HTML with class "kCrYT", selects only external links with https. Also cleans found urls from google prefix and /url?=q @return: list of found urls """ soup_results = BeautifulSoup(result_html, "html.parser") html_found = soup_results.find_all("div", class_="kCrYT") list_urls_found = [] for div_tag in html_found: a_tag_found = div_tag.find('a') if a_tag_found: url_found = a_tag_found['href'] try: url = url_found.partition("&")[0] url_cleaned = url.strip("/url?=q") if url_validator(url_cleaned): list_urls_found.append(url_cleaned) except AttributeError: continue return list(set(list_urls_found))
def get_diff_url(self, organization, repository, base_sha, head_sha): """ Given the organization and repository, generate a github URL that will compare the provided SHAs. Arguments: organization (str): An organization name as it will appear in github repository (str): The organization's repository name base_sha (str): The base commit's SHA head_sha (str): Compare the base SHA with this commit Returns: A string constaining the URL Raises: InvalidUrlException: If the basic validator does not believe this to be a valid URL """ calculated_url = 'https://github.com/{}/{}/compare/{}...{}'.format( organization, repository, base_sha, head_sha) if not url_validator(calculated_url): raise InvalidUrlException(calculated_url) return calculated_url
jp_tz = timezone(timedelta(hours=+9), 'JST') # with open(f"{__file__}.output", mode='a') as o_f: def put_db(uid, title, url, report, error): print(report) cur.execute(f"insert into {TABLE_NAME} VALUES (:uid, :title, :url, :report, :error, :updated)", (uid, title, url, report, error, datetime.now(jp_tz)) ) conn.commit() MAX_REPEAT = 9999 for repeat, uid in enumerate(params.record_cache): if repeat >= MAX_REPEAT: break rec = api.get_record(params, uid) title = rec.title login_url = rec.login_url msg = "invalid Login URL" try: if login_url and not url_validator(login_url): put_db(uid, title, login_url, msg, 1) continue except validators.ValidationFailure: put_db(uid, title, login_url, msg, 1) continue try: login_url_parsed = urlparse(rec.login_url) except ValueError: #(MatchError, IndexError): msg = "parse error in Login URL" put_db(uid, title, login_url, msg, 2) continue # logger.debug( f"Login URL ({rec.login_url}) error at record uid: {record_uid}" ) else: # title = rec.title net_loc = login_url_parsed.netloc.split(':')[0]
def decorated_function(*args, **kwargs): url_data = request.form.get("url", request.get_data().decode("utf-8")) if not url_validator(url_data) or len(url_data) > 10000: abort(422) session["url"] = url_data return f(*args, **kwargs)
def url_validator(self, key, url): assert url_validator(url.replace('_', '-')) return url
def main(): logging.debug("Parsing arguments") parser = argparse.ArgumentParser() parser.add_argument( "-org", "--organisation", required=True, choices=list(ORGANISATIONS.keys()), help="Name of your organisation.", ) parser.add_argument("-u", "--username", required=True, type=str, help="Organisation username") parser.add_argument( "-w", "--weekday", required=True, choices=list(WEEKDAYS.keys()), help="Day of the week of the lesson", ) parser.add_argument( "-s", "--starttime", required=True, type=validate_starttime, help="Time when the lesson starts e.g. '19:15'", ) parser.add_argument( "-f", "--facility", required=True, type=str, help= "Facility where the lesson takes place e.g. 'Sport Center Polyterrasse'", ) parser.add_argument( "sportfahrplan_nr", type=int, help= "number at the end of link to particular sport on ASVZ Sportfahrplan, e.g. 45743 in https://asvz.ch/426-sportfahrplan?f[0]=sport:45743 for volleyball.", ) args = parser.parse_args() logging.debug("Parsed arguments") current_time = datetime.today() start_time = datetime( current_time.year, current_time.month, current_time.day, args.starttime.hour, args.starttime.minute, ) # special case if one starts the script max 24h before the enrollement # e.g enrollment at Monday 20:00, script started on Sunday 21:00 if current_time > start_time: start_time += timedelta(days=1) logging.info( "The enrollement for today is already over. Assuming you wanted to enroll tomorrow." ) url = "{}{}&date={}-{:02d}-{:02d}%20{}:{}".format( BASE_URL, args.sportfahrplan_nr, start_time.year, start_time.month, start_time.day + 1, args.starttime.hour, args.starttime.minute, ) if not url_validator(url): logging.error("Invalid url specified: '{}'".format(url)) exit(1) password = getpass.getpass("Organisation password:"******"Summary:\n\tOrganisation: {}\n\tUsername: {}\n\tPassword: {}\n\tWeekday: {}\n\tEnrollment time: {}\n\tFacility: {}\n\tSportfahrplan: {}" .format( args.organisation, args.username, "*" * len(password), args.weekday, start_time, args.facility, url, )) logging.info( "Downloading chromedriver for installed version of Chrome/Chromium") load_chromedriver() logging.info("Script started") wait_until(start_time) asvz_enroll( args.organisation, args.username, password, args.weekday, start_time, args.facility, url, ) logging.info("Script successfully finished")
def main(): logging.debug("Parsing arguments") # parse args parser = argparse.ArgumentParser() parser.add_argument("-u", "--username", help="ETHZ username i.e. nethz") parser.add_argument("-p", "--password", help="ETHZ password") parser.add_argument( "-w", "--weekday", help="Day of the week of the lesson i.e. 0-6 for Monday-Sunday", ) parser.add_argument("-t", "--time", help="Time when the lesson starts e.g. '19:15'") parser.add_argument( "-e", "--enrollemnt_time_difference", help= "number of hours in between start of enrollment and start of the event" ) parser.add_argument( "-f", "--facility", help= "Facility where the lesson takes place e.g. 'Sport Center Polyterrasse'", ) parser.add_argument( "sportfahrplan", help= "link to particular sport on ASVZ Sportfahrplan, e.g. https://asvz.ch/426-sportfahrplan?f[0]=sport:45743 for volleyball. Make sure there starts only one lesson for that particular time at that particular location (i.e. use ASVZ filters).", ) args = parser.parse_args() logging.debug("Parsed arguments") # validate args if args.weekday == "0": weekday = "Montag" weekday_nr = 0 elif args.weekday == "1": weekday = "Dienstag" weekday_nr = 1 elif args.weekday == "2": weekday = "Mittwoch" weekday_nr = 2 elif args.weekday == "3": weekday = "Donnerstag" weekday_nr = 3 elif args.weekday == "4": weekday_nr = 4 weekday = "Freitag" elif args.weekday == "5": weekday = "Samstag" weekday_nr = 5 elif args.weekday == "6": weekday = "Sonntag" weekday_nr = 6 else: logging.error("invalid weekday specified") exit(1) try: start_time = datetime.strptime(args.time, TIMEFORMAT) except ValueError: logging.error("invalid time specified") exit(1) enrollemnt_time_difference = int(args.enrollemnt_time_difference) if not (enrollemnt_time_difference >= 1) and not (24 >= enrollemnt_time_difference): logging.error("invalid enrollment time specified") exit(1) if not url_validator(args.sportfahrplan): logging.error("invalid url specified") exit(1) #constructing enrollemnt date and time current_time = datetime.today() start_time = datetime( current_time.year, current_time.month, current_time.day, start_time.hour, start_time.minute, ) while start_time.weekday() != weekday_nr: start_time += timedelta(days=1) #constucting link with time filter if (start_time.hour >= 10): plan_hour = str(start_time.hour) else: plan_hour = "0" + str(start_time.hour) if (start_time.minute >= 10): plan_min = str(start_time.minute) else: plan_min = "0" + str(start_time.minute) if (start_time.day >= 10): plan_day = str(start_time.day) else: plan_day = "0" + str(start_time.day) if (start_time.month >= 10): plan_month = str(start_time.month) else: plan_month = "0" + str(start_time.month) sportfahrplan_mit_zeitfilter = args.sportfahrplan + "&date=" + str( start_time.year ) + "-" + plan_month + "-" + plan_day + "%20" + plan_hour + ":" + plan_min #constructing enrollment start time enrollment_start_time = start_time enrollment_start_time -= timedelta(hours=enrollemnt_time_difference) logging.info("Script started") waiting_fct(enrollment_start_time) asvz_enroll( args.username, args.password, weekday, args.facility, start_time, sportfahrplan_mit_zeitfilter, ) logging.info("Script successfully finished")
print("Attempting to setup will delete everything and start fresh.") res = input("Continue? (Y/N) [Default N] ") if res.lower() == "y": delete_configuration() setup() elif res.lower() == "n" or res.lower() == "": print("Exiting") exit() else: print("Could not read input. Exiting") exit() # Editing / Changing a Webhook elif args.edit: if url_validator(args.edit[1]): if list_all() is None or args.edit[0] in list_all(): cli.change_url(args.edit[0], args.edit[1]) else: print("Did not find \"{}\" in database, would you like to enter it?".format(args.edit[0])) res = input("Enter? (Y/N) [Default N] ") if res.lower() == "y" or res.lower() == "": cli.add_names(args.edit[0], args.edit[1]) else: print("Exiting") exit() else: print("Invalid webhook URL") # Adding a Webhook elif args.add: if url_validator(args.add[1]):