예제 #1
0
def update_dependencies(args):
    config = load_config(args.config)
    scan_modules = load_modules(config['SCAN_MODULES'])
    updated = []
    for scan_module in scan_modules.values():
        print(scan_module.name)
        if hasattr(scan_module, 'update_dependencies'):
            options = config['SCAN_MODULE_OPTIONS'].get(scan_module.name, {})
            scan_module.update_dependencies(options)
            updated.append(scan_module.name)
    if updated:
        print(' '.join(updated))
    else:
        print('Nothing to update.')
예제 #2
0
def print_master_config(args):
    config = load_config(args.config)
    scan_modules = load_modules(config['SCAN_MODULES'])
    dependencies = {}
    for scan_module in scan_modules.values():
        dependencies[scan_module.name] = set(scan_module.dependencies)
    modules_topology = {}
    for index, module_list in enumerate(toposort(dependencies)):
        for module_name in module_list:
            modules_topology[module_name] = index
    output = '# Scan modules with topological dependency order index.\n'
    output += '# Run the following to obtain this configuration value:\n'
    output += '# privacyscanner print_master_config --config yourconfig.py\n'
    output += 'SCAN_MODULES = {}'.format(pprint.pformat(modules_topology, indent=4))
    print(output)
예제 #3
0
 def __init__(self, worker_id, ppid, db_dsn, scan_module_list,
              scan_module_options, max_tries, max_executions, write_pipe,
              stop_event, ack_event, raven_dsn):
     self._id = worker_id
     self._pid = os.getpid()
     self._ppid = ppid
     self._max_executions = max_executions
     self._write_pipe = write_pipe
     self._stop_event = stop_event
     self._ack_event = ack_event
     self._old_sigterm = signal.SIG_DFL
     self._old_sigint = signal.SIG_DFL
     self._raven_client = None
     if has_raven and raven_dsn:
         self._raven_client = raven.Client(raven_dsn)
     self._job_queue = JobQueue(db_dsn, load_modules(scan_module_list),
                                scan_module_options, max_tries)
예제 #4
0
def update_dependencies(args):
    config = load_config(args.config)
    scan_modules = load_modules(config['SCAN_MODULES'],
                                config['SCAN_MODULE_OPTIONS'])
    updated = []
    stream_handler = ScanStreamHandler()
    for scan_module in scan_modules.values():
        logger = logging.Logger(scan_module.name)
        logger.addHandler(stream_handler)
        if hasattr(scan_module, 'update_dependencies'):
            logger.info('Updating dependencies')
            scan_module.update_dependencies()
            updated.append(scan_module.name)
    if updated:
        print('\nUpdated dependencies of: ' + ' '.join(updated))
    else:
        print('\nNothing to update.')
예제 #5
0
def scan_site(args):
    config = load_config(args.config)
    _require_dependencies(config)

    site_parsed = urlparse(args.site)
    if site_parsed.scheme not in ('http', 'https'):
        raise CommandError('Invalid site: {}'.format(args.site))

    results_dir = args.results
    if results_dir is None:
        results_dir = slugify(site_parsed.netloc) + '_'
        results_dir += hashlib.sha512(args.site.encode()).hexdigest()[:10]
    results_dir = Path(results_dir).resolve()
    try:
        results_dir.mkdir(exist_ok=True)
    except IOError as e:
        raise CommandError(
            'Could not create results directory: {}'.format(e)) from e

    result_file = results_dir / 'results.json'
    result_json = {'site_url': args.site}
    if args.import_results:
        try:
            with open(args.import_results) as f:
                import_json = json.load(f)
        except IOError as e:
            raise CommandError(
                'Could not open result JSON: {}'.format(e)) from e
        except ValueError as e:
            raise CommandError(
                'Could not parse result JSON: {}'.format(e)) from e
        else:
            result_json.update(import_json)
    try:
        with result_file.open('w') as f:
            json.dump(result_json, f, indent=2)
            f.write('\n')
    except IOError as e:
        raise CommandError('Could not write result JSON: {}'.format(e)) from e

    scan_modules = load_modules(config['SCAN_MODULES'],
                                config['SCAN_MODULE_OPTIONS'])
    scan_module_names = args.scan_modules

    if scan_module_names is None:
        scan_module_names = scan_modules.keys()

    # Order scan_module_names by dependency topologically
    dependencies = {}
    for scan_module_name in scan_module_names:
        mod = scan_modules[scan_module_name]
        dependencies[mod.name] = set(mod.dependencies)
    scan_module_names = toposort_flatten(dependencies)

    if args.skip_dependencies:
        scan_module_names = [
            scan_module_name for scan_module_name in scan_module_names
            if scan_module_name in args.scan_modules
        ]

    has_error = False
    result = Result(result_json, DirectoryFileHandler(results_dir))
    stream_handler = ScanStreamHandler()
    logs_dir = results_dir / 'logs'
    logs_dir.mkdir(exist_ok=True)
    lock_dir = config['STORAGE_PATH'] / 'locks'
    lock_dir.mkdir(exist_ok=True)
    scan_queue = [
        QueueEntry(mod_name, 0, None) for mod_name in scan_module_names
    ]
    scan_queue.reverse()
    while scan_queue:
        scan_module_name, num_try, not_before = scan_queue.pop()
        if not_before is not None:
            # noinspection PyTypeChecker
            while datetime.utcnow() < not_before:
                time.sleep(0.5)
        mod = scan_modules[scan_module_name]
        num_try += 1
        log_filename = (logs_dir / (mod.name + '.log'))
        file_handler = ScanFileHandler(str(log_filename))
        logger = logging.Logger(mod.name)
        logger.addHandler(stream_handler)
        logger.addHandler(file_handler)
        with tempfile.TemporaryDirectory() as temp_dir:
            old_cwd = os.getcwd()
            os.chdir(temp_dir)
            logger.info('Starting %s', mod.name)
            try:
                with NumericLock(lock_dir) as worker_id:
                    scan_meta = ScanMeta(worker_id=worker_id,
                                         num_tries=num_try)
                    mod.logger = logger
                    mod.scan_site(result, scan_meta)
            except RetryScan:
                if num_try <= config['MAX_TRIES']:
                    scan_queue.append(
                        QueueEntry(scan_module_name, num_try, not_before))
                    logger.info('Scan module `%s` will be retried', mod.name)
                else:
                    has_error = True
            except RescheduleLater as e:
                scan_queue.append(
                    QueueEntry(scan_module_name, num_try, e.not_before))
            except Exception:
                if num_try <= config['MAX_TRIES']:
                    scan_queue.append(
                        QueueEntry(scan_module_name, num_try, not_before))
                has_error = True
                logger.exception('Scan module `%s` failed.', mod.name)
            finally:
                os.chdir(old_cwd)
                with result_file.open('w') as f:
                    json.dump(result.get_results(),
                              f,
                              indent=2,
                              sort_keys=True)
                    f.write('\n')
            logger.info('Finished %s', mod.name)
    pprint.pprint(result.get_results())
    if has_error:
        sys.exit(1)