def main(): # Handle command line inputs parser = argparse.ArgumentParser(description="Crawls webpages for URLs") parser.add_argument('-w', type=int, help='Number of processes (default: 2 * cpu_count()).') parser.add_argument('-l', dest='domain', action='store_true', help='If set crawls only domain specific URLs.') parser.add_argument('url', help='URL to crawl.') parser.add_argument('-v', help='Enable verbose.', dest='verbose', action='store_true') parser.add_argument( '-r', help= 'Enable robots.txt url blocking and throttling. Superseedes -w and forces workers to 1.', dest='robot', action='store_true') parser.add_argument('-sd', help='Single depth url crawl.', dest='single', action='store_true') parser.add_argument('-s', help='Silent. Superseedes -v and disables logging.', dest='silent', action='store_true') parser.set_defaults(limit=False, robot=False, domain=False, single=False, silent=False) args = parser.parse_args() logger_to_stdout() if args.verbose: set_verbose() if args.silent: set_silent() if args.w: # TODO: do proper conversion check for the workers input crawl(args.url, workers=args.w, limit_to_domain=args.domain, robot=args.robot, single=args.single) else: crawl(args.url, limit_to_domain=args.domain, robot=args.robot, single=args.single)
def main(): # Handle command line inputs p = argparse.ArgumentParser( description= "Reads logs in the Apache Combined Log Format and the Common Log Format." ) p.add_argument('-f', help='Tail the log file.', dest='tail', action='store_true') p.add_argument('-l', help='Path to logfile.', dest='file') p.add_argument('-v', help='Enable verbose.', dest='verbose', action='store_true') p.add_argument('-p', help='Arguments for the plugins. Splitted with spaces', dest='plugs', nargs='+') p.add_argument('-s', help='Silent. Superseedes -v and disables logging.', dest='silent', action='store_true') p.set_defaults(verbose=False, silent=False, file='/tmp/access.log', tail=False, plugs=[]) args = p.parse_args() logger_to_stdout() if args.verbose: set_verbose() if args.silent: set_silent() manager = PluginManager() manager.load_plugins(args.plugs) parser.tail(args.file, manager, tail=args.tail)
def run_all(config_file, pkgName, git, patch, skip_setup, read_cache, project, suite_dir, test_cases, base_dir, output_dir, verbose, virttype, debug, debugcase, re_run, commands): """ Main process of DTS, it will run all test suites in the config file. """ global requested_tests global result global excel_report global json_report global stats_report global log_handler global check_case_inst # save global variable serializer = Serializer() # load check/support case lists check_case_inst = CheckCase() # prepare the output folder if output_dir == '': output_dir = settings.FOLDERS['Output'] if not os.path.exists(output_dir): os.mkdir(output_dir) # add external library exec_file = os.path.realpath(__file__) extra_libs_path = exec_file.replace('framework/dts.py', '') + 'extra_libs' sys.path.insert(1, extra_libs_path) # add python module search path sys.path.append(suite_dir) # enable debug mode if debug is True: settings.save_global_setting(settings.DEBUG_SETTING, 'yes') if debugcase is True: settings.save_global_setting(settings.DEBUG_CASE_SETTING, 'yes') # init log_handler handler if verbose is True: logger.set_verbose() if re_run < 0: re_run = 0 logger.log_dir = output_dir log_handler = getLogger('dts') log_handler.config_execution('dts') # run designated test case requested_tests = test_cases # Read config file dts_cfg_folder = settings.load_global_setting(settings.DTS_CFG_FOLDER) if dts_cfg_folder != '': config_file = dts_cfg_folder + os.sep + config_file config = ConfigParser.SafeConfigParser() load_cfg = config.read(config_file) if len(load_cfg) == 0: raise ConfigParseException(config_file) # parse commands dts_commands = dts_parse_commands(commands) os.environ["TERM"] = "dumb" # change rst output folder rst.path2Result = output_dir # report objects excel_report = ExcelReporter(output_dir + '/test_results.xls') json_report = JSONReporter(output_dir + '/test_results.json') stats_report = StatsReporter(output_dir + '/statistics.txt') result = Result() crbInsts = [] crbs_conf = CrbsConf() crbs = crbs_conf.load_crbs_config() # for all Execution sections for section in config.sections(): dts_parse_param(config, section) # verify if the delimiter is good if the lists are vertical duts, targets, test_suites = dts_parse_config(config, section) for dut in duts: log_handler.info("\nDUT " + dut) # look up in crbs - to find the matching IP for dut in duts: for crb in crbs: if crb['section'] == dut: crbInsts.append(crb) break # only run on the dut in known crbs if len(crbInsts) == 0: log_handler.error(" SKIP UNKNOWN CRB") continue result.dut = duts[0] # init global lock create_parallel_locks(len(duts)) # init dut, tester crb duts, tester = dts_crbs_init(crbInsts, skip_setup, read_cache, project, base_dir, serializer, virttype) tester.set_re_run(re_run) # register exit action atexit.register(quit_execution, duts, tester) check_case_inst.check_dut(duts[0]) # Run DUT prerequisites if dts_run_prerequisties(duts, tester, pkgName, patch, dts_commands, serializer) is False: dts_crbs_exit(duts, tester) continue dts_run_target(duts, tester, targets, test_suites) dts_crbs_exit(duts, tester) save_all_results()
def main(): parser = argparse.ArgumentParser( description="Scrape blinkist.com and generate pretty output") parser.add_argument( "--language", choices={"en", "de"}, default="en", help= "The language to scrape books in - either 'en' for english or 'de' for german", ) parser.add_argument( "--match-language", action="store_true", default=False, help= "Skip scraping books if not in the requested language (not all book are avaible in german)", ) def check_cooldown(value): if int(value) < 1: raise argparse.ArgumentTypeError("Can't be smaller than 1") return int(value) parser.add_argument( "--cooldown", type=check_cooldown, default=1, help= "Seconds to wait between scraping books, and downloading audio files. Can't be smaller than 1", ) parser.add_argument( "--headless", action="store_true", default=False, help= "Start the automated web browser in headless mode. Works only if you already logged in once", ) parser.add_argument( "--audio", action="store_true", default=False, help="Download the audio blinks for each book", ) parser.add_argument( "--concat-audio", action="store_true", default=False, help= "Concatenate the audio blinks into a single file and tag it. Requires ffmpeg", ) parser.add_argument( "--keep-noncat", action="store_true", default=False, help= "Keep the individual blink audio files, instead of deleting them (works with '--concat-audio' only)", ) parser.add_argument( "--no-scrape", action="store_true", default=False, help= "Don't scrape the website, only process existing json files in the dump folder. Do not provide email or password with this option.", ) parser.add_argument( "--book", default=False, help="Scrapes this book only, takes the blinkist url for the book" "(e.g. https://www.blinkist.com/en/books/... or https://www.blinkist.com/en/nc/reader/...)", ) parser.add_argument( "--daily-book", action="store_true", default=False, help="Scrapes the free daily book only.", ) parser.add_argument( "--books", default=False, help= "Scrapes the list of books, takes a txt file with the list of blinkist urls for the books" "(e.g. https://www.blinkist.com/en/books/... or https://www.blinkist.com/en/nc/reader/...)", ) parser.add_argument( "--book-category", default="Uncategorized", help= "When scraping a single book, categorize it under this category (works with '--book' only)", ) parser.add_argument( "--categories", type=str, nargs="+", default="", help= ("Only the categories whose label contains at least one string here will be scraped." "Case-insensitive; use spaces to separate categories. " "(e.g. '--categories entrep market' will only scrape books under 'Entrepreneurship' and 'Marketing & Sales')" ), ) parser.add_argument( "--ignore-categories", type=str, nargs="+", default="", help= ("If a category label contains anything in ignored_categories, books under that category will not be scraped. " "Case-insensitive; use spaces to separate categories. " "(e.g. '--ignored-categories entrep market' will skip scraping of 'Entrepreneurship' and 'Marketing & Sales')" ), ) parser.add_argument( "--create-html", action="store_true", default=True, help="Generate a formatted html document for the book", ) parser.add_argument( "--create-epub", action="store_true", default=True, help="Generate a formatted epub document for the book", ) parser.add_argument( "--create-pdf", action="store_true", default=False, help= "Generate a formatted pdf document for the book. Requires wkhtmltopdf", ) parser.add_argument( "--save-cover", action="store_true", default=False, help="Save a copy of the Blink cover artwork in the folder", ) parser.add_argument( "--embed-cover-art", action="store_true", default=False, help= "Embed the Blink cover artwork into the concatenated audio file (works with '--concat-audio' only)", ) parser.add_argument( "--chromedriver", help= "Path to a specific chromedriver executable instead of the built-in one", ) parser.add_argument("-v", "--verbose", action="store_true", help="Increases logging verbosity") if "--no-scrape" not in sys.argv: parser.add_argument( "email", help="The email to log into your premium Blinkist account") parser.add_argument( "password", help="The password to log into your premium Blinkist account") args = parser.parse_args() # set up logger verbosity logger.set_verbose(log, args.verbose) def generate_book_outputs(book_json, cover_img=False): if args.create_html: generator.generate_book_html(book_json, cover_img) if args.create_epub: generator.generate_book_epub(book_json) if args.create_pdf: generator.generate_book_pdf(book_json, cover_img) def scrape_book(driver, processed_books, book_url, category, match_language): book_json, dump_exists = scraper.scrape_book_data( driver, book_url, category=category, match_language=match_language) if book_json: cover_img_file = False cover_tmp_file = False if args.audio: audio_files = scraped_audio_exists(book_json) if not audio_files: audio_files = scraper.scrape_book_audio( driver, book_json, args.language) if audio_files and args.concat_audio: if type(audio_files) == list: if args.embed_cover_art: cover_tmp_file = scraper.download_book_cover_image( book_json, filename="_cover.jpg", alt_file="cover.jpg") generator.combine_audio(book_json, audio_files, args.keep_noncat, cover_tmp_file) if args.save_cover: cover_img_file = scraper.download_book_cover_image( book_json, filename="cover.jpg", alt_file="_cover.jpg") generate_book_outputs(book_json, cover_img=cover_img_file) else: generate_book_outputs(book_json) if cover_tmp_file: if os.path.exists(cover_tmp_file): log.debug(f"Deleting {cover_tmp_file}") os.remove(cover_tmp_file) else: log.debug(f'Could not find "{cover_tmp_file}"') processed_books.append(book_url) return dump_exists def finish(start_time, processed_books, driver=None): if driver: driver.close() elapsed_time = time.time() - start_time formatted_time = "{:02d}:{:02d}:{:02d}".format( int(elapsed_time // 3600), int(elapsed_time % 3600 // 60), int(elapsed_time % 60), ) total_books = len(processed_books) log.info( f"Processed {total_books} book{'s' if total_books != 1 else ''} in {formatted_time}" ) # start scraping log.info("Starting scrape run...") processed_books = [] start_time = time.time() if args.no_scrape: # if the --no-scrape argument is passed, just process the # existing json dump files for file in glob.glob(os.path.join(os.getcwd(), "dump", "*.json")): generate_book_outputs(file) processed_books.append(file) finish(start_time, processed_books) else: match_language = args.language if args.match_language else "" # if no login cookies were found, don't start a headless browser # so that the user can solve recaptcha and log in start_headless = args.headless if not scraper.has_login_cookies(): start_headless = False # add uBlock (if the conditions are right) use_ublock = not (args.book or args.headless) driver = scraper.initialize_driver( headless=start_headless, with_ublock=use_ublock, chromedriver_path=args.chromedriver, ) is_logged_in = scraper.login(driver, args.language, args.email, args.password) if is_logged_in: if args.book or args.daily_book: # scrape single book book_url = (args.book if not args.daily_book else scraper.get_daily_book_url(driver, args.language)) scrape_book( driver, processed_books, book_url, category={"label": args.book_category}, match_language=match_language, ) elif args.books: # scrape list of books with open(args.books, "r") as books_urls: for book_url in books_urls.readlines(): dump_exists = scrape_book( driver, processed_books, book_url.strip(), category={"label": args.book_category}, match_language=match_language, ) if not dump_exists: time.sleep(args.cooldown) else: # scrape all books / categories all_books = scraper.get_all_books(driver, args.language) categories = scraper.get_categories( driver, args.language, specified_categories=args.categories, ignored_categories=args.ignore_categories, ) for category in categories: books_urls = scraper.get_all_books_for_categories( driver, category) for book_url in books_urls: dump_exists = scrape_book( driver, processed_books, book_url, category=category, match_language=match_language, ) # if we processed the book from an existing dump # no scraping was involved, no need to cooldown if not dump_exists: time.sleep(args.cooldown) uncategorized_books = [ x for x in all_books if x not in processed_books ] log.info( f"Scraping {len(uncategorized_books)} remaining uncategorized books..." ) for book_url in uncategorized_books: dump_exists = scrape_book( driver, processed_books, book_url, category={"label": "Uncategorized"}, match_language=match_language, ) if not dump_exists: time.sleep(args.cooldown) else: log.error("Unable to login into Blinkist") finish(start_time, processed_books, driver)
def parse_arguments(): """ parse command line arguments """ parser = argparse.ArgumentParser() parser.add_argument('filename', help='image file that will be decorated') parser.add_argument('-c', '--compress', default=config.DEFAULT_COMPRESS_LEVEL, type=int, help=('set compress level of output image. ' '(0 to 5, 0: highest compress, 5: no compress) ' '(default={})'.format(config.DEFAULT_COMPRESS_LEVEL))) parser.add_argument('-d', '--density', default=config.DEFAULT_DENSITY_LEVEL, type=int, help=('set particles density. ' '(0 to 5, 0: most sparse, 5: most dense) ' '(default={})'.format(config.DEFAULT_DENSITY_LEVEL))) parser.add_argument('-f', '--frames', default=config.DEFAULT_N_FRAMES, type=int, help=('set output image\'s number of frames ' '(default={})'.format(config.DEFAULT_N_FRAMES))) parser.add_argument('-p', '--particle', default=config.DEFAULT_PARTICLE_TYPE, help=('set type of particle ' '(types: SNOW) ' '(default={})'.format(config.DEFAULT_PARTICLE_TYPE))) parser.add_argument('-P', '--palette', default=config.DEFAULT_COLOR_PALETTE, help=('set color palette ' '(types: BRIGHT, DAWN, PINK, WHITE) ' '(default={})'.format(config.DEFAULT_COLOR_PALETTE))) parser.add_argument('-s', '--speed', default=config.DEFAULT_SPEED_LEVEL, type=int, help=('set particles speed. ' '(0 to 5, 0: slowest, 5: fastest) ' '(default={})'.format(config.DEFAULT_SPEED_LEVEL))) parser.add_argument('-S', '--size', default=config.DEFAULT_SIZE_LEVEL, type=int, help=('set size of particle ' '(0 to 5, 0: smallest, 5: largest) ' '(default={})'.format(config.DEFAULT_SIZE_LEVEL))) parser.add_argument('-v', '--verbose', default=False, const=True, action='store_const', help='print intermediate logs') parser.add_argument('-w', '--webp', default='GIF', const='WEBP', dest='format', action='store_const', help='change output image format from gif to webp') parser.add_argument('-o', '--output', default=config.DEFAULT_OUTPUT_NAME, help=('set output file name ' '(default={})'.format(config.DEFAULT_OUTPUT_NAME))) parser.add_argument('--custom_speed', default=None, help=('set custom particle speed ' '(format: (x_min,x_max,y_min,y_max))')) parser.add_argument('--custom_size', default=None, help=('set custom particle size ' '(foramt: (min, max)')) # not implemented parser.add_argument('--not_continuous', default=False, const=True, action='store_const', help='output image becomes not continous') args = parser.parse_args() if args.verbose: logger.set_verbose() settings = { 'filename': args.filename, 'frame_size': config.FRAME_SIZES[args.compress], 'continuous': not args.not_continuous, 'density': config.PARTICLE_NUMBERS[args.density] if args.not_continuous else config.LANE_NUMBERS[args.density], 'n_frames': args.frames, 'type': args.particle.upper(), 'palette': args.palette.upper(), 'speed': config.SPEED_LEVELS[args.speed], 'size': config.SIZE_LEVELS[args.size], 'format': args.format, 'outputname': args.output if '.' in args.output else args.output + '.' + args.format.lower() } if args.custom_speed: settings['speed'] = literal_eval(args.custom_speed) if args.custom_size: settings['size'] = literal_eval(args.custom_size) return settings
def run_all(config_file, pkgName, git, patch, skip_setup, read_cache, project, suite_dir, test_cases, base_dir, output_dir, verbose, debug): """ Main process of DTS, it will run all test suites in the config file. """ global config global serializer global nic global requested_tests global result global excel_report global stats global log_handler global debug_mode # prepare the output folder if not os.path.exists(output_dir): os.mkdir(output_dir) # add python module search path for folder in FOLDERS.values(): sys.path.append(folder) sys.path.append(suite_dir) # enable debug mode if debug is True: debug_mode = True # init log_handler handler if verbose is True: logger.set_verbose() logger.log_dir = output_dir log_handler = getLogger('dts') log_handler.config_execution('dts') # run designated test case requested_tests = test_cases # Read config file config = ConfigParser.SafeConfigParser() config.read(config_file) # register exit action atexit.register(close_crb_sessions) os.environ["TERM"] = "dumb" serializer = Serializer() # excel report and statistics file result = Result() rst.path2Result = output_dir excel_report = ExcelReporter(output_dir + '/test_results.xls') stats = StatsReporter(output_dir + '/statistics.txt') # for all Exectuion sections for section in config.sections(): dts_parse_param(section) # verify if the delimiter is good if the lists are vertical dutIP, targets, test_suites, nics = dts_parse_config(section) log_handler.info("\nDUT " + dutIP) # look up in crbs - to find the matching IP crbInst = None for crb in crbs: if crb['IP'] == dutIP: crbInst = crb break # only run on the dut in known crbs if crbInst is None: log_handler.error(" SKIP UNKNOWN CRB") continue result.dut = dutIP # init dut, tester crb dts_crbs_init(crbInst, skip_setup, read_cache, project, base_dir, nics) # Run DUT prerequisites if dts_run_prerequisties(pkgName, patch) is False: dts_crbs_exit() continue dts_run_target(crbInst, targets, test_suites, nics) dts_crbs_exit() save_all_results()
def run_all(config_file, pkgName, patch, force_setup, read_cache, project, suite_dir, test_cases, base_dir, output_dir, verbose, virttype, debug, debugcase, re_run, commands, pktgen, test_configs): """ Main process of DTS, it will run all test suites in the config file. """ global requested_tests global result global excel_report global json_report global stats_report global log_handler global check_case_inst # save global variable serializer = Serializer() # load check/support case lists check_case_inst = CheckCase() # prepare the output folder if output_dir == '': output_dir = settings.FOLDERS['Output'] if not os.path.exists(output_dir): os.mkdir(output_dir) # add python module search path sys.path.append(suite_dir) # enable debug mode if debug is True: settings.save_global_setting(settings.DEBUG_SETTING, 'yes') if debugcase is True: settings.save_global_setting(settings.DEBUG_CASE_SETTING, 'yes') # init log_handler handler if verbose is True: logger.set_verbose() if re_run < 0: re_run = 0 logger.log_dir = output_dir log_handler = getLogger('dts') log_handler.config_execution('dts') # run designated test case requested_tests = test_cases # Read config file dts_cfg_folder = settings.load_global_setting(settings.DTS_CFG_FOLDER) if dts_cfg_folder != '': config_file = dts_cfg_folder + os.sep + config_file config = ConfigParser.SafeConfigParser() load_cfg = config.read(config_file) if len(load_cfg) == 0: raise ConfigParseException(config_file) # parse commands dts_commands = dts_parse_commands(commands) os.environ["TERM"] = "dumb" # change rst output folder rst.path2Result = output_dir # report objects excel_report = ExcelReporter(output_dir + '/test_results.xls') json_report = JSONReporter(output_dir + '/test_results.json') stats_report = StatsReporter(output_dir + '/statistics.txt') result = Result() crbInsts = [] crbs_conf = CrbsConf() crbs = crbs_conf.load_crbs_config() # for all Exectuion sections for section in config.sections(): # Skip configuration sections if section in ['DPDK', 'Pktgen', 'Tester_DPDK', 'Tester_Pktgen',\ 'latency', 'reset']: continue dts_parse_param(config, section) # verify if the delimiter is good if the lists are vertical duts, targets, test_suites = dts_parse_config(config, section) # look up in crbs - to find the matching IP for dut in duts: for crb in crbs: if crb['section'] == dut: crbInsts.append(crb) break # only run on the dut in known crbs if len(crbInsts) == 0: log_handler.error(" SKIP UNKNOWN CRB") continue result.dut = duts[0] # init dut, tester crb duts, testers = dts_crbs_init(crbInsts, read_cache, project, base_dir, serializer, virttype, test_configs) for tester in testers: tester.set_re_run(re_run) # register exit action atexit.register(quit_execution, duts, testers) check_case_inst.change_dut(duts[0]) test_configs["force_setup"] = force_setup # Check if set-up is installed on all CRBs: if force_setup is False: setup_ready = True dut_dpdk_repo = parse_repo(dict(config.items("DPDK"))) dut_pktgen_repo = parse_repo(dict(config.items("Pktgen"))) for dut in duts: setup_ready = setup_ready and dut.check_setup( dut_dpdk_repo, dut_pktgen_repo, test_configs["skip_target_env_setup"]) tester_dpdk_repo = parse_repo(dict(config.items("Tester_DPDK")))\ if "Tester_DPDK" in config.sections() else dut_dpdk_repo tester_pktgen_repo = parse_repo(dict(config.items("Tester_Pktgen")))\ if "Tester_Pktgen" in config.sections() else dut_pktgen_repo for tester in testers: setup_ready = setup_ready and tester.check_setup( tester_dpdk_repo, tester_pktgen_repo, test_configs["skip_target_env_setup"]) else: setup_ready = False show_speedup_options_messages(read_cache, setup_ready, test_configs["try_reuse_pcaps"], test_cases) for tester in testers: tester.set_speedup_options(read_cache, setup_ready) for dut in duts: dut.set_speedup_options(read_cache, setup_ready) # Clone DPDK and Pktgen repos and apply patches if not setup_ready: prepare_repos(config, pkgName, pktgen) # Run DUT prerequisites if dts_run_prerequisties(duts, testers, pkgName, patch, dts_commands, serializer, pktgen, test_configs) is False: dts_crbs_exit(duts, testers) continue dts_run_target(duts, testers, targets, test_suites, test_configs) dts_crbs_exit(duts, testers) save_all_results()