def update_dependencies(args): config = load_config(args.config) scan_modules = load_modules(config['SCAN_MODULES']) updated = [] for scan_module in scan_modules.values(): print(scan_module.name) if hasattr(scan_module, 'update_dependencies'): options = config['SCAN_MODULE_OPTIONS'].get(scan_module.name, {}) scan_module.update_dependencies(options) updated.append(scan_module.name) if updated: print(' '.join(updated)) else: print('Nothing to update.')
def print_master_config(args): config = load_config(args.config) scan_modules = load_modules(config['SCAN_MODULES']) dependencies = {} for scan_module in scan_modules.values(): dependencies[scan_module.name] = set(scan_module.dependencies) modules_topology = {} for index, module_list in enumerate(toposort(dependencies)): for module_name in module_list: modules_topology[module_name] = index output = '# Scan modules with topological dependency order index.\n' output += '# Run the following to obtain this configuration value:\n' output += '# privacyscanner print_master_config --config yourconfig.py\n' output += 'SCAN_MODULES = {}'.format(pprint.pformat(modules_topology, indent=4)) print(output)
def __init__(self, worker_id, ppid, db_dsn, scan_module_list, scan_module_options, max_tries, max_executions, write_pipe, stop_event, ack_event, raven_dsn): self._id = worker_id self._pid = os.getpid() self._ppid = ppid self._max_executions = max_executions self._write_pipe = write_pipe self._stop_event = stop_event self._ack_event = ack_event self._old_sigterm = signal.SIG_DFL self._old_sigint = signal.SIG_DFL self._raven_client = None if has_raven and raven_dsn: self._raven_client = raven.Client(raven_dsn) self._job_queue = JobQueue(db_dsn, load_modules(scan_module_list), scan_module_options, max_tries)
def update_dependencies(args): config = load_config(args.config) scan_modules = load_modules(config['SCAN_MODULES'], config['SCAN_MODULE_OPTIONS']) updated = [] stream_handler = ScanStreamHandler() for scan_module in scan_modules.values(): logger = logging.Logger(scan_module.name) logger.addHandler(stream_handler) if hasattr(scan_module, 'update_dependencies'): logger.info('Updating dependencies') scan_module.update_dependencies() updated.append(scan_module.name) if updated: print('\nUpdated dependencies of: ' + ' '.join(updated)) else: print('\nNothing to update.')
def scan_site(args): config = load_config(args.config) _require_dependencies(config) site_parsed = urlparse(args.site) if site_parsed.scheme not in ('http', 'https'): raise CommandError('Invalid site: {}'.format(args.site)) results_dir = args.results if results_dir is None: results_dir = slugify(site_parsed.netloc) + '_' results_dir += hashlib.sha512(args.site.encode()).hexdigest()[:10] results_dir = Path(results_dir).resolve() try: results_dir.mkdir(exist_ok=True) except IOError as e: raise CommandError( 'Could not create results directory: {}'.format(e)) from e result_file = results_dir / 'results.json' result_json = {'site_url': args.site} if args.import_results: try: with open(args.import_results) as f: import_json = json.load(f) except IOError as e: raise CommandError( 'Could not open result JSON: {}'.format(e)) from e except ValueError as e: raise CommandError( 'Could not parse result JSON: {}'.format(e)) from e else: result_json.update(import_json) try: with result_file.open('w') as f: json.dump(result_json, f, indent=2) f.write('\n') except IOError as e: raise CommandError('Could not write result JSON: {}'.format(e)) from e scan_modules = load_modules(config['SCAN_MODULES'], config['SCAN_MODULE_OPTIONS']) scan_module_names = args.scan_modules if scan_module_names is None: scan_module_names = scan_modules.keys() # Order scan_module_names by dependency topologically dependencies = {} for scan_module_name in scan_module_names: mod = scan_modules[scan_module_name] dependencies[mod.name] = set(mod.dependencies) scan_module_names = toposort_flatten(dependencies) if args.skip_dependencies: scan_module_names = [ scan_module_name for scan_module_name in scan_module_names if scan_module_name in args.scan_modules ] has_error = False result = Result(result_json, DirectoryFileHandler(results_dir)) stream_handler = ScanStreamHandler() logs_dir = results_dir / 'logs' logs_dir.mkdir(exist_ok=True) lock_dir = config['STORAGE_PATH'] / 'locks' lock_dir.mkdir(exist_ok=True) scan_queue = [ QueueEntry(mod_name, 0, None) for mod_name in scan_module_names ] scan_queue.reverse() while scan_queue: scan_module_name, num_try, not_before = scan_queue.pop() if not_before is not None: # noinspection PyTypeChecker while datetime.utcnow() < not_before: time.sleep(0.5) mod = scan_modules[scan_module_name] num_try += 1 log_filename = (logs_dir / (mod.name + '.log')) file_handler = ScanFileHandler(str(log_filename)) logger = logging.Logger(mod.name) logger.addHandler(stream_handler) logger.addHandler(file_handler) with tempfile.TemporaryDirectory() as temp_dir: old_cwd = os.getcwd() os.chdir(temp_dir) logger.info('Starting %s', mod.name) try: with NumericLock(lock_dir) as worker_id: scan_meta = ScanMeta(worker_id=worker_id, num_tries=num_try) mod.logger = logger mod.scan_site(result, scan_meta) except RetryScan: if num_try <= config['MAX_TRIES']: scan_queue.append( QueueEntry(scan_module_name, num_try, not_before)) logger.info('Scan module `%s` will be retried', mod.name) else: has_error = True except RescheduleLater as e: scan_queue.append( QueueEntry(scan_module_name, num_try, e.not_before)) except Exception: if num_try <= config['MAX_TRIES']: scan_queue.append( QueueEntry(scan_module_name, num_try, not_before)) has_error = True logger.exception('Scan module `%s` failed.', mod.name) finally: os.chdir(old_cwd) with result_file.open('w') as f: json.dump(result.get_results(), f, indent=2, sort_keys=True) f.write('\n') logger.info('Finished %s', mod.name) pprint.pprint(result.get_results()) if has_error: sys.exit(1)