Exemple #1
0
    def run(self):
        while self._max_executions > 0:
            # Stop if our master died.
            if self._ppid != os.getppid():
                break

            # Our master asked us to stop. We must obey.
            if self._stop_event.is_set():
                break
            job = self._job_queue.get_job_nowait()
            if job is None:
                time.sleep(1)
                continue
            start_info = (job.scan_id, job.scan_module.name, datetime.today(),
                          job.num_tries)
            self._notify_master('job_started', start_info)
            result = Result(job.current_result, NoOpFileHandler())
            logger = logging.Logger(job.scan_module.name)
            logger.addHandler(
                WorkerWritePipeHandler(self._pid, self._write_pipe))
            logger.addHandler(ScanStreamHandler())
            scan_meta = ScanMeta(worker_id=self._id, num_tries=job.num_tries)
            with tempfile.TemporaryDirectory() as temp_dir:
                old_cwd = os.getcwd()
                os.chdir(temp_dir)
                try:
                    job.scan_module.scan_site(result, logger, job.options,
                                              scan_meta)
                except RetryScan:
                    self._job_queue.report_failure()
                    self._notify_master('job_failed', (datetime.today(), ))
                except RescheduleLater as e:
                    self._job_queue.reschedule(e.not_before)
                    self._job_queue.report_result(result.get_updates())
                    self._notify_master('job_finished', (datetime.today(), ))
                except Exception:
                    logger.exception('Scan module `{}` failed.'.format(
                        job.scan_module.name))
                    self._job_queue.report_failure()
                    self._notify_master('job_failed', (datetime.today(), ))
                    if self._raven_client:
                        self._raven_client.captureException(
                            tags={
                                'scan_id': job.scan_id,
                                'scan_module_name': job.scan_module.name
                            },
                            extra={'result': result.get_results()})
                else:
                    self._job_queue.report_result(result.get_updates())
                    self._notify_master('job_finished', (datetime.today(), ))
                finally:
                    os.chdir(old_cwd)
                    kill_everything(self._pid, only_children=True)
            self._max_executions -= 1
        kill_everything(self._pid)
Exemple #2
0
def update_dependencies(args):
    config = load_config(args.config)
    scan_modules = load_modules(config['SCAN_MODULES'],
                                config['SCAN_MODULE_OPTIONS'])
    updated = []
    stream_handler = ScanStreamHandler()
    for scan_module in scan_modules.values():
        logger = logging.Logger(scan_module.name)
        logger.addHandler(stream_handler)
        if hasattr(scan_module, 'update_dependencies'):
            logger.info('Updating dependencies')
            scan_module.update_dependencies()
            updated.append(scan_module.name)
    if updated:
        print('\nUpdated dependencies of: ' + ' '.join(updated))
    else:
        print('\nNothing to update.')
Exemple #3
0
def scan_site(args):
    config = load_config(args.config)
    _require_dependencies(config)

    site_parsed = urlparse(args.site)
    if site_parsed.scheme not in ('http', 'https'):
        raise CommandError('Invalid site: {}'.format(args.site))

    results_dir = args.results
    if results_dir is None:
        results_dir = slugify(site_parsed.netloc) + '_'
        results_dir += hashlib.sha512(args.site.encode()).hexdigest()[:10]
    results_dir = Path(results_dir).resolve()
    try:
        results_dir.mkdir(exist_ok=True)
    except IOError as e:
        raise CommandError(
            'Could not create results directory: {}'.format(e)) from e

    result_file = results_dir / 'results.json'
    result_json = {'site_url': args.site}
    if args.import_results:
        try:
            with open(args.import_results) as f:
                import_json = json.load(f)
        except IOError as e:
            raise CommandError(
                'Could not open result JSON: {}'.format(e)) from e
        except ValueError as e:
            raise CommandError(
                'Could not parse result JSON: {}'.format(e)) from e
        else:
            result_json.update(import_json)
    try:
        with result_file.open('w') as f:
            json.dump(result_json, f, indent=2)
            f.write('\n')
    except IOError as e:
        raise CommandError('Could not write result JSON: {}'.format(e)) from e

    scan_modules = load_modules(config['SCAN_MODULES'],
                                config['SCAN_MODULE_OPTIONS'])
    scan_module_names = args.scan_modules

    if scan_module_names is None:
        scan_module_names = scan_modules.keys()

    # Order scan_module_names by dependency topologically
    dependencies = {}
    for scan_module_name in scan_module_names:
        mod = scan_modules[scan_module_name]
        dependencies[mod.name] = set(mod.dependencies)
    scan_module_names = toposort_flatten(dependencies)

    if args.skip_dependencies:
        scan_module_names = [
            scan_module_name for scan_module_name in scan_module_names
            if scan_module_name in args.scan_modules
        ]

    has_error = False
    result = Result(result_json, DirectoryFileHandler(results_dir))
    stream_handler = ScanStreamHandler()
    logs_dir = results_dir / 'logs'
    logs_dir.mkdir(exist_ok=True)
    lock_dir = config['STORAGE_PATH'] / 'locks'
    lock_dir.mkdir(exist_ok=True)
    scan_queue = [
        QueueEntry(mod_name, 0, None) for mod_name in scan_module_names
    ]
    scan_queue.reverse()
    while scan_queue:
        scan_module_name, num_try, not_before = scan_queue.pop()
        if not_before is not None:
            # noinspection PyTypeChecker
            while datetime.utcnow() < not_before:
                time.sleep(0.5)
        mod = scan_modules[scan_module_name]
        num_try += 1
        log_filename = (logs_dir / (mod.name + '.log'))
        file_handler = ScanFileHandler(str(log_filename))
        logger = logging.Logger(mod.name)
        logger.addHandler(stream_handler)
        logger.addHandler(file_handler)
        with tempfile.TemporaryDirectory() as temp_dir:
            old_cwd = os.getcwd()
            os.chdir(temp_dir)
            logger.info('Starting %s', mod.name)
            try:
                with NumericLock(lock_dir) as worker_id:
                    scan_meta = ScanMeta(worker_id=worker_id,
                                         num_tries=num_try)
                    mod.logger = logger
                    mod.scan_site(result, scan_meta)
            except RetryScan:
                if num_try <= config['MAX_TRIES']:
                    scan_queue.append(
                        QueueEntry(scan_module_name, num_try, not_before))
                    logger.info('Scan module `%s` will be retried', mod.name)
                else:
                    has_error = True
            except RescheduleLater as e:
                scan_queue.append(
                    QueueEntry(scan_module_name, num_try, e.not_before))
            except Exception:
                if num_try <= config['MAX_TRIES']:
                    scan_queue.append(
                        QueueEntry(scan_module_name, num_try, not_before))
                has_error = True
                logger.exception('Scan module `%s` failed.', mod.name)
            finally:
                os.chdir(old_cwd)
                with result_file.open('w') as f:
                    json.dump(result.get_results(),
                              f,
                              indent=2,
                              sort_keys=True)
                    f.write('\n')
            logger.info('Finished %s', mod.name)
    pprint.pprint(result.get_results())
    if has_error:
        sys.exit(1)