def parse_arguments(): parser = argparse.ArgumentParser() parser.add_argument("-c", "--cfg", dest="cfgfile", help="Configuration file", required=True) parser.add_argument("-s", "--searchstringst", dest="string", help="String to search for", required=True) parser.add_argument('-d', '--delete', action='store_true', help="Flag to delete records") parser.add_argument('-a', '--always_commit', action='store_true', help="Flag to commit directly") args = parser.parse_args() if args.cfgfile is None or args.string is None: parser.print_help() parser.exit() return args
def process_arguments(): parser = argparse.ArgumentParser(description='Retrieve revision information for Wikipedia article(s).') parser.add_argument('-c', '--category', metavar='category_title', dest='category', help='The name of a Wikipedia category (e.g. Category:2009_earthquakes).') parser.add_argument('-a', '--article', metavar='article_title', dest='article', help='The name of a Wikipedia article (e.g. 2009_Bhutan_earthquake).') parser.add_argument('-i', '--input', metavar='input_filename', dest='infilename', help='Name of input file a list of articles and categories, one per line.') parser.add_argument('-d', '--depth', metavar='depth', dest='depth', default=0, help='The crawling depth for the given category, integer >= 0. Default is 0.') parser.add_argument('-xc', metavar='excluded_categories', dest='excluded_categories', help='A list of categories to exclude from the results, separated by commas (e.g. Category:a,Category:b).') parser.add_argument('-xa', metavar='excluded_articles', dest='excluded_articles', help='A list of articles to exclude from the results, separated by commas (e.g. article1,article2).') parser.add_argument('-xf', metavar='exclusions_filename', dest='exclusions_filename', help='Name of file containing list of articles and/or categories, one per line, to exclude from the results.') parser.add_argument('-o', '--output', metavar='output_filename', dest='outfilename', required=True, help='Name of output CSV file. *REQUIRED*') args = parser.parse_args() if not (args.infilename or args.article or args.category): parser.exit(status=-1, message='At least one form of input (article, category, or infile) is needed!\n') articles = [] categories = [] excluded_articles = [] excluded_categories = [] if args.excluded_articles: excluded_articles = args.excluded_articles.split(',') if args.excluded_categories: excluded_categories = args.excluded_categories.split(',') if args.exclusions_filename: with open(args.exclusions_filename, 'rb') as exclusions_file: titles = exclusions_file.readlines() for title in titles: if title.find('Category:')==0: excluded_categories.append(title.rstrip()) else: excluded_articles.append(title.rstrip()) if args.article: articles.append(args.article) if args.category: categories.append(args.category) if args.infilename: titles = [] with open(args.infilename, 'rb') as infile: titles = infile.readlines() for title in titles: if title.find('Category:')==0: categories.append(title.rstrip()) else: articles.append(title.rstrip()) articles = list(set(articles)) categories = list(set(categories)) return (articles, categories, excluded_articles, excluded_categories, depth, outfilename)
def check_path(path, parser): """Checks the file path existence, type and permissions""" if not os.path.exists(path): print("File does not exist:\n%s", path) parser.exit(1) if not os.path.isfile(path): print("File is a directory:\n%s", path) parser.exit(1) if not os.access(path, os.R_OK): print("File does not have read permissions:\n%s", path) parser.exit(1)
def main(): colorama.init() parser = argparse.ArgumentParser() parser.add_argument('-v', '--version', action='version', version='%s v%s' % (parser.prog, __version__)) if sys.version_info[:2] >= (3, 7): subparsers = parser.add_subparsers(dest='command', required=True) else: subparsers = parser.add_subparsers(dest='command') parser_latest = subparsers.add_parser(CMD_LATEST, help="get the latest price of an asset", description="Get the latest [asset] price (in GBP). " "If no data source [-ds] is given, " "the same data source(s) as " "'bittytax' are used.") parser_latest.add_argument('asset', type=str, nargs=1, help="symbol of cryptoasset or fiat currency " "(i.e. BTC/LTC/ETH or EUR/USD)") parser_latest.add_argument('quantity', type=validate_quantity, nargs='?', help="quantity to price (optional)") parser_latest.add_argument('targetasset', type=str.upper, nargs='?', default=config.CCY, help="convert into specificed target asset (optional, default=%s)" % config.CCY) parser_latest.add_argument('-ds', choices=datasource_choices(upper=True) + ['ALL'], metavar='{' + ', '.join(datasource_choices()) + '} or ALL', dest='datasource', type=str.upper, help="specify the data source to use, or all") parser_latest.add_argument('-d', '--debug', action='store_true', help="enable debug logging") parser_history = subparsers.add_parser(CMD_HISTORY, help="get the historical price of an asset", description="Get the historic [asset] price (in GBP) " "for the [date] specified. " "If no data source [-ds] is given, " "the same data source(s) as " "'bittytax' are used.") parser_history.add_argument('asset', type=str.upper, nargs=1, help="symbol of cryptoasset or fiat currency " "(i.e. BTC/LTC/ETH or EUR/USD)") parser_history.add_argument('date', type=validate_date, nargs=1, help="date (YYYY-MM-DD or DD/MM/YYYY)") parser_history.add_argument('quantity', type=validate_quantity, nargs='?', help="quantity to price (optional)") parser_history.add_argument('targetasset', type=str.upper, nargs='?', default=config.CCY, help="convert into specificed target asset (optional, default=%s)" % config.CCY) parser_history.add_argument('-ds', choices=datasource_choices(upper=True) + ['ALL'], metavar='{' + ', '.join(datasource_choices()) + '} or ALL', dest='datasource', type=str.upper, help="specify the data source to use, or all") parser_history.add_argument('-nc', '--nocache', action='store_true', help="bypass data cache") parser_history.add_argument('-d', '--debug', action='store_true', help="enable debug logging") parser_list = subparsers.add_parser(CMD_LIST, help="list all assets", description='List all assets, or filter by [asset].') parser_list.add_argument('asset', type=str, nargs='?', help="symbol of cryptoasset or fiat currency " "(i.e. BTC/LTC/ETH or EUR/USD)") parser_list.add_argument('-s', type=str, nargs='+', metavar='STRING', dest='search', help="search assets using STRING") parser_list.add_argument('-ds', choices=datasource_choices(upper=True) + ['ALL'], metavar='{' + ', '.join(datasource_choices()) + '} or ALL', dest='datasource', type=str.upper, help="specify the data source to use, or all") parser_list.add_argument('-d', '--debug', action='store_true', help="enable debug logging") config.args = parser.parse_args() if config.args.debug: print("%s%s v%s" % (Fore.YELLOW, parser.prog, __version__)) print("%spython: v%s" % (Fore.GREEN, platform.python_version())) print("%ssystem: %s, release: %s" % (Fore.GREEN, platform.system(), platform.release())) config.output_config() if config.args.command in (CMD_LATEST, CMD_HISTORY): symbol = config.args.asset[0] target_symbol = config.args.targetasset asset = price = False try: if config.args.datasource: if config.args.command == CMD_HISTORY: assets = AssetData().get_historic_price_ds(symbol, config.args.date[0], config.args.datasource) else: assets = AssetData().get_latest_price_ds(symbol, config.args.datasource) btc = None for asset in assets: if not asset['price']: continue output_ds_price(asset) if asset['quote'] == 'BTC': if btc is None: if config.args.command == CMD_HISTORY: btc = get_historic_btc_price(config.args.date[0]) else: btc = get_latest_btc_price() if btc['price'] is not None: price_ccy = btc['price'] * asset['price'] output_ds_price(btc) price = True else: price_ccy = asset['price'] price = True output_price(symbol, price_ccy) if not assets: asset = False else: value_asset = ValueAsset(price_tool=True) if config.args.command == CMD_HISTORY: price_ccy, name, _ = value_asset.get_historical_price(symbol, config.args.date[0], target_symbol) else: price_ccy, name, _ = value_asset.get_latest_price(symbol, target_symbol) if price_ccy is not None: output_price(symbol, price_ccy, target_symbol) price = True if name is not None: asset = True except DataSourceError as e: parser.exit("%sERROR%s %s" % (Back.RED+Fore.BLACK, Back.RESET+Fore.RED, e)) if not asset: parser.exit("%sWARNING%s Prices for %s are not supported" % ( Back.YELLOW+Fore.BLACK, Back.RESET+Fore.YELLOW, symbol)) if not price: if config.args.command == CMD_HISTORY: parser.exit("%sWARNING%s Price for %s on %s is not available" % ( Back.YELLOW+Fore.BLACK, Back.RESET+Fore.YELLOW, symbol, config.args.date[0].strftime('%Y-%m-%d'))) else: parser.exit("%sWARNING%s Current price for %s is not available" % ( Back.YELLOW+Fore.BLACK, Back.RESET+Fore.YELLOW, symbol)) elif config.args.command == CMD_LIST: symbol = config.args.asset try: assets = AssetData().get_assets(symbol, config.args.datasource, config.args.search) except DataSourceError as e: parser.exit("%sERROR%s %s" % (Back.RED+Fore.BLACK, Back.RESET+Fore.RED, e)) if symbol and not assets: parser.exit("%sWARNING%s Asset %s not found" % ( Back.YELLOW+Fore.BLACK, Back.RESET+Fore.YELLOW, symbol)) if config.args.search and not assets: parser.exit("No results found") output_assets(assets)
def usage() -> argparse.Namespace: """Parse the options provided on the command line. Returns: argparse.Namespace: The parameters provided on the command line. """ parser = argparse.ArgumentParser(add_help=False) parser.add_argument('-h', '--help', action='store_true', help='show this help message and exit') group = parser.add_argument_group("General", "Simulation general settings") group.add_argument("--first-date", help="The first date to be processed. " "Default to the current date", type=datetime_type, default=np.datetime64("now")) group.add_argument("--last-date", help="The last date to be processed. " "Default to the last date allowing to cover an entire " "cycle.", type=datetime_type) group = parser.add_argument_group("Execution", "Runtime parameters options ") group.add_argument("--debug", action="store_true", help="Put swot simulator in debug mode") group.add_argument("--log", metavar='PATH', help="Path to the logbook to use", type=argparse.FileType("w")) group.add_argument("--scheduler-file", help="Path to a file with scheduler information to " "launch swot simulator on a cluster. By " "default, use a local cluster.", metavar='PATH', type=argparse.FileType("r")) group = parser.add_argument_group("LocalCluster", "Dask local cluster option") group.add_argument("--n-workers", help="Number of workers to start (Default to 1)", type=int, metavar='N', default=1) group.add_argument("--processes", help="Whether to use processes (True) or threads " "(False). Defaults to False", action="store_true") group.add_argument("--threads-per-worker", help="Number of threads per each worker. " "(Default to 1)", type=int, metavar='N', default=1) group = parser.add_argument_group("Configuration") group.add_argument("--template", help="Writes the default configuration of the " "simulator into the file and ends the program.", metavar="PATH", type=argparse.FileType("w")) namespace = argparse.Namespace() namespace, _ = parser._parse_known_args(sys.argv[1:], namespace) def add_settings(parser): """Added the argument defining the settings of the simulator.""" parser.add_argument("settings", type=argparse.FileType('r'), help="Path to the parameters file") # Displays help and ends the program. if "help" in namespace: add_settings(parser) parser.print_help() parser.exit(0) # Checking exclusive options. if "scheduler_file" in namespace: for item in ["n_workers", "processes", "threads_per_worker"]: if item in namespace: item = item.replace("_", "-") raise RuntimeError( f"--{item}: not allowed with argument --scheduler-file") # Write the template configuration file and ends the program if "template" in namespace: namespace.template.write(settings.template()) sys.stdout.write(f""" The template has been written in the file: {namespace.template.name!r}. """) parser.exit(0) # The partial analysis of the command line arguments is finished, the last # argument is added and parsed one last time. add_settings(parser) return parser.parse_args()
def main(): """Main entry point for the script""" parser = argparse.ArgumentParser(description='Determines the top 10 most common source IP addresses, and their hit' ' rates, for a fleet of 1000 web servers within the last hour') parser.add_argument('server_list', help='Path to the text file containing list of servers') parser.add_argument('-N', default=10, type=int, help='Outputs N number of most common IP Addresses (Default: 10') parser.add_argument('-L', '--logpath', default='/var/log/httpd-access-log', type=str, help='Change the log file location on server, common for all ' '(Default: /var/log/httpd-access-log)') parser.add_argument('-T', '--timediff', default=1, type=int, help='Change the time difference when looking for common IPs, in hours (Default: 1 hour)') args = parser.parse_args() # Makes sure the server list file is valid check_path(args.server_list, parser) # Dictionaries for IP Address and Hit counts ip_dict = collections.defaultdict(int) hit_success = collections.defaultdict(int) with open(args.server_list, "rb") as servers: for server in servers: # The program expects a valid format for listing servers hostname, user, passwd = server.split() # Generate RSA key for host key verification (Skipped) #key = paramiko.RSAKey(data=base64.decodestring('AAA...')) # needs host key # Starts the SSH Client client = paramiko.SSHClient() # Add the host to known hosts by adding the RSA key (Skipped) #client.get_host_keys().add('ssh.example.com', 'ssh-rsa', key) # Ignores the warnings for RSA Keys client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) # Connects to the server client.connect(hostname.decode('UTF-8'), username=user.decode('UTF-8'), password=passwd.decode('UTF-8')) # Copies the log file data to a variable _, data, _ = client.exec_command("cat {}".format(args.logpath)) log_data = [] # Stores the log data in a list for line in data: log_data.append(line.strip("\n")) # Parses each log data and stores the IP address and hit counts at each step for log in log_data: ip_address, date_time, status_code = parse_log(log) if check_time(date_time, args.timediff): ip_dict[ip_address] += 1 if status_code == "200": hit_success[ip_address] += 1 # An ascending list of IP address occurrences ip_list = sorted(list(ip_dict.items()), key=operator.itemgetter(1)) if ip_list: print("IP Address Hit Rate") for _ in range(args.N): # Gets the last element that has the highest occurrence try: top_ip, total_hits = ip_list.pop() except IndexError: break # Hit Rate = # of successful connections/total connection attempts hit_rate = (hit_success[top_ip]/total_hits)*100 print("{0} ---- {1:.2f}%".format(top_ip, hit_rate)) else: print("No results found.") parser.exit(0)
def main(): import textwrap # Parse command-line arguments parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=textwrap.dedent("""\ Scrapes subreddits and puts their content in a plain text file. Use with --posts to download posts, --subs to download subreddits, and --config to make custom Pushshift API calls. """), ) mode_group = parser.add_mutually_exclusive_group(required=True) mode_group.add_argument( "--posts", dest="posts_file", type=str, default="", help="A file containing the list of posts to download, one per line.", ) mode_group.add_argument( "--subs", dest="subs_file", type=str, # required=False, default="", help= "A file containing the list of subreddits to download, one per line.", ) mode_group.add_argument( "--config", dest="config_file", type=str, # required=False, default="", help= "A file containing the arguments for the Pushshift APIs. See config.default.txt for a sample config file.", ) parser.add_argument( "--start", dest="start_date", type=str, # required=True, help="The date to start parsing from, in YYYY-MM-DD format", ) parser.add_argument( "--end", dest="end_date", type=str, # required=True, help="The final date of the parsing, in YYYY-MM-DD format", ) parser.add_argument( "--output", dest="output_folder", type=str, required=True, help="The output folder", ) parser.add_argument( "--blacklist", dest="blacklist_file", type=str, required=False, default="", help="A file containing the lines to skip.", ) parser.add_argument( "--workers", dest="num_workers", type=int, required=False, default=1, help="Number of parallel workers", ) if len(sys.argv[1:]) == 0: parser.print_help() parser.exit() args = parser.parse_args() if args.config_file or args.subs_file: if not (args.start_date and args.end_date): parser.error( "Start date and end date are required in --config or --subs mode." ) pattern = re.compile("^[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]$") if not (pattern.match(args.start_date) and pattern.match(args.end_date)): parser.error("Invalid date format.") check_output_directory(args.output_folder) try: reddit = do_reddit_login() except ImportError: parser.error( "Failed to load configuration. Did you create reddit_config.py?") if args.subs_file: subs = load_list_from_file(args.subs_file) blacklist = load_blacklist( args.blacklist_file) if args.blacklist_file else [] if args.num_workers > 1: with Pool(args.num_workers) as p: p.map( process_subs, [( reddit, sub, args.start_date, args.end_date, args.output_folder, blacklist, ) for sub in subs], ) else: for sub in subs: process_subs(( reddit, sub, args.start_date, args.end_date, args.output_folder, blacklist, )) elif args.posts_file: posts = load_list_from_file(args.posts_file) blacklist = load_blacklist( args.blacklist_file) if args.blacklist_file else [] if args.num_workers > 1: with Pool(args.num_workers) as p: p.map( process_posts, [(reddit, post, blacklist, args.output_folder) for post in posts], ) else: for post in posts: process_posts((reddit, post, blacklist, args.output_folder)) else: blacklist = load_blacklist( args.blacklist_file) if args.blacklist_file else [] config = load_config(args.config_file) if args.config_file else {} if args.num_workers > 1: with Pool(args.num_workers) as p: p.map( process_all, [( reddit, start_split, end_split, args.output_folder, config, blacklist, ) for (start_split, end_split) in make_splits( args.start_date, args.end_date, args.num_workers)], ) else: start_ts, end_ts = make_splits(args.start_date, args.end_date, 1)[0] process_all((reddit, start_ts, end_ts, args.output_folder, config, blacklist)) print("Done!") exit(0)