Ejemplo n.º 1
0
 def formfield_for_choice_field(self, db_field, request, **kwargs):
     if db_field.name == "scanner":
         kwargs['choices'] = (("", "---------"), )
         for key, value in SCANNERS.items():
             if key in [CUSTOMS, YARA]:
                 kwargs['choices'] += ((key, value), )
     return super().formfield_for_choice_field(db_field, request, **kwargs)
Ejemplo n.º 2
0
def run_scanner(results, upload_pk, scanner, api_url, api_key):
    """
    Run a scanner on a FileUpload via RPC and store the results.

    - `results` are the validation results passed in the validation chain. This
       task is a validation task, which is why it must receive the validation
       results as first argument.
    - `upload_pk` is the FileUpload ID.
    """
    scanner_name = SCANNERS.get(scanner)
    log.info('Starting scanner "%s" task for FileUpload %s.', scanner_name,
             upload_pk)

    if not results['metadata']['is_webextension']:
        log.info(
            'Not running scanner "%s" for FileUpload %s, it is not a '
            'webextension.', scanner_name, upload_pk)
        return results

    upload = FileUpload.objects.get(pk=upload_pk)

    try:
        if not os.path.exists(upload.path):
            raise ValueError('File "{}" does not exist.'.format(upload.path))

        scanner_result = ScannerResult(upload=upload, scanner=scanner)

        with statsd.timer('devhub.{}'.format(scanner_name)):
            json_payload = {
                'api_key': api_key,
                'download_url': upload.get_authenticated_download_url(),
            }
            response = requests.post(url=api_url,
                                     json=json_payload,
                                     timeout=settings.SCANNER_TIMEOUT)

        try:
            data = response.json()
        except ValueError:
            # Log the response body when JSON decoding has failed.
            raise ValueError(response.text)

        if response.status_code != 200 or 'error' in data:
            raise ValueError(data)

        scanner_result.results = data
        scanner_result.save()

        statsd.incr('devhub.{}.success'.format(scanner_name))
        log.info('Ending scanner "%s" task for FileUpload %s.', scanner_name,
                 upload_pk)
    except Exception:
        statsd.incr('devhub.{}.failure'.format(scanner_name))
        # We log the exception but we do not raise to avoid perturbing the
        # submission flow.
        log.exception('Error in scanner "%s" task for FileUpload %s.',
                      scanner_name, upload_pk)

    return results
Ejemplo n.º 3
0
class ScannerResult(ModelBase):
    upload = models.ForeignKey(
        FileUpload,
        related_name='scanners_results',
        on_delete=models.SET_NULL,
        null=True,
    )
    # Store the "raw" results of a scanner.
    results = JSONField(default=[])
    scanner = models.PositiveSmallIntegerField(choices=SCANNERS.items())
    version = models.ForeignKey(
        'versions.Version',
        related_name='scanners_results',
        on_delete=models.CASCADE,
        null=True,
    )
    has_matches = models.NullBooleanField()
    matched_rules = models.ManyToManyField('ScannerRule',
                                           through='ScannerMatch')

    class Meta:
        db_table = 'scanners_results'
        constraints = [
            models.UniqueConstraint(
                fields=('upload', 'scanner', 'version'),
                name='scanners_results_upload_id_scanner_'
                'version_id_ad9eb8a6_uniq',
            )
        ]
        indexes = [models.Index(fields=('has_matches', ))]

    def add_yara_result(self, rule, tags=None, meta=None):
        """This method is used to store a Yara result."""
        self.results.append({
            'rule': rule,
            'tags': tags or [],
            'meta': meta or {}
        })

    def extract_rule_names(self):
        """This method parses the raw results and returns the (matched) rule
        names. Not all scanners have rules that necessarily match."""
        if self.scanner == YARA:
            return sorted({result['rule'] for result in self.results})
        if self.scanner == CUSTOMS and 'matchedRules' in self.results:
            return self.results['matchedRules']
        # We do not have support for the remaining scanners (yet).
        return []

    def save(self, *args, **kwargs):
        matched_rules = ScannerRule.objects.filter(
            scanner=self.scanner, name__in=self.extract_rule_names())
        self.has_matches = bool(matched_rules)
        # Save the instance first...
        super().save(*args, **kwargs)
        # ...then add the associated rules.
        for scanner_rule in matched_rules:
            self.matched_rules.add(scanner_rule)
Ejemplo n.º 4
0
def run_scanner(results, upload_pk, scanner, api_url, api_key):
    """
    Run a scanner on a FileUpload via RPC and store the results.

    - `results` are the validation results passed in the validation chain. This
       task is a validation task, which is why it must receive the validation
       results as first argument.
    - `upload_pk` is the FileUpload ID.
    """
    scanner_name = SCANNERS.get(scanner)
    log.info('Starting scanner "%s" task for FileUpload %s.', scanner_name,
             upload_pk)

    if not results['metadata']['is_webextension']:
        log.info(
            'Not running scanner "%s" for FileUpload %s, it is not a webextension.',
            scanner_name,
            upload_pk,
        )
        return results

    upload = FileUpload.objects.get(pk=upload_pk)

    try:
        if not os.path.exists(upload.path):
            raise ValueError('File "{}" does not exist.'.format(upload.path))

        scanner_result = ScannerResult(upload=upload, scanner=scanner)

        with statsd.timer('devhub.{}'.format(scanner_name)):
            _run_scanner_for_url(
                scanner_result,
                upload.get_authenticated_download_url(),
                scanner,
                api_url,
                api_key,
            )

        scanner_result.save()

        if scanner_result.has_matches:
            statsd.incr('devhub.{}.has_matches'.format(scanner_name))
            for scanner_rule in scanner_result.matched_rules.all():
                statsd.incr('devhub.{}.rule.{}.match'.format(
                    scanner_name, scanner_rule.id))

        statsd.incr('devhub.{}.success'.format(scanner_name))
        log.info('Ending scanner "%s" task for FileUpload %s.', scanner_name,
                 upload_pk)
    except Exception as exc:
        statsd.incr('devhub.{}.failure'.format(scanner_name))
        log.exception('Error in scanner "%s" task for FileUpload %s.',
                      scanner_name, upload_pk)
        if not waffle.switch_is_active('ignore-exceptions-in-scanner-tasks'):
            raise exc

    return results
Ejemplo n.º 5
0
class AbstractScannerRule(ModelBase):
    name = models.CharField(
        max_length=200,
        help_text=_('This is the exact name of the rule used by a scanner.'),
    )
    scanner = models.PositiveSmallIntegerField(choices=SCANNERS.items())
    action = models.PositiveSmallIntegerField(choices=ACTIONS.items(),
                                              default=NO_ACTION)
    is_active = models.BooleanField(
        default=True,
        help_text=_(
            'When unchecked, the scanner results will not be bound to this '
            'rule and the action will not be executed.'))
    definition = models.TextField(null=True, blank=True)

    class Meta(ModelBase.Meta):
        abstract = True
        unique_together = ('name', 'scanner')

    def __str__(self):
        return self.name

    def clean(self):
        if self.scanner == YARA:
            self.clean_yara()

    def clean_yara(self):
        if not self.definition:
            raise ValidationError(
                {'definition': _('Yara rules should have a definition')})

        if 'rule {}'.format(self.name) not in self.definition:
            raise ValidationError({
                'definition':
                _('The name of the rule in the definition should match '
                  'the name of the scanner rule')
            })

        if len(re.findall(r'rule\s+.+?\s+{', self.definition)) > 1:
            raise ValidationError({
                'definition':
                _('Only one Yara rule is allowed in the definition')
            })

        try:
            yara.compile(source=self.definition)
        except yara.SyntaxError as syntaxError:
            raise ValidationError({
                'definition': _('The definition is not valid: %(error)s') % {
                    'error': syntaxError,
                }
            })
        except Exception:
            raise ValidationError({
                'definition':
                _('An error occurred when compiling the definition')
            })
Ejemplo n.º 6
0
    def get(self, request, format=None):
        label = self.request.query_params.get('label', None)
        if label is not None and label not in [LABEL_BAD, LABEL_GOOD]:
            raise ParseError("invalid value for label")

        scanner = self.request.query_params.get('scanner', None)
        if scanner is not None and scanner not in list(SCANNERS.values()):
            raise ParseError("invalid value for scanner")

        return super().get(request, format)
Ejemplo n.º 7
0
def run_scanner(upload_pk, scanner, api_url, api_key):
    """
    Run a scanner on a FileUpload via RPC and store the results.
    """
    scanner_name = SCANNERS.get(scanner)
    log.info('Starting scanner "%s" task for FileUpload %s.', scanner_name,
             upload_pk)

    upload = FileUpload.objects.get(pk=upload_pk)

    if not upload.path.endswith('.xpi'):
        log.info('Not running scanner "%s" for FileUpload %s, it is not a xpi '
                 'file.', scanner_name, upload_pk)
        return

    try:
        if not os.path.exists(upload.path):
            raise ValueError('File "{}" does not exist.' .format(upload.path))

        result = ScannersResult()
        result.upload = upload
        result.scanner = scanner

        with statsd.timer('devhub.{}'.format(scanner_name)):
            json_payload = {
                'api_key': api_key,
                'download_url': upload.get_authenticated_download_url(),
            }
            response = requests.post(url=api_url,
                                     json=json_payload,
                                     timeout=settings.SCANNER_TIMEOUT)

        try:
            results = response.json()
        except ValueError:
            # Log the response body when JSON decoding has failed.
            raise ValueError(response.text)

        if 'error' in results:
            raise ValueError(results)

        result.results = results
        result.save()

        statsd.incr('devhub.{}.success'.format(scanner_name))
        log.info('Ending scanner "%s" task for FileUpload %s.', scanner_name,
                 upload_pk)
    except Exception:
        statsd.incr('devhub.{}.failure'.format(scanner_name))
        # We log the exception but we do not raise to avoid perturbing the
        # submission flow.
        log.exception('Error in scanner "%s" task for FileUpload %s.',
                      scanner_name, upload_pk)
Ejemplo n.º 8
0
    def get(self, request, format=None):
        if not waffle.switch_is_active('enable-scanner-results-api'):
            raise Http404

        label = self.request.query_params.get('label', None)
        if label is not None and label not in [LABEL_BAD, LABEL_GOOD]:
            raise ParseError("invalid value for label")

        scanner = self.request.query_params.get('scanner', None)
        if scanner is not None and scanner not in list(SCANNERS.values()):
            raise ParseError("invalid value for scanner")

        return super().get(request, format)
Ejemplo n.º 9
0
class ScannerRule(ModelBase):
    name = models.CharField(
        max_length=200,
        help_text=_('This is the exact name of the rule used by a scanner.'),
    )
    scanner = models.PositiveSmallIntegerField(choices=SCANNERS.items())
    action = models.PositiveSmallIntegerField(choices=ACTIONS.items(),
                                              default=NO_ACTION)
    is_active = models.BooleanField(default=True)

    class Meta:
        db_table = 'scanners_rules'
        unique_together = ('name', 'scanner')

    def __str__(self):
        return self.name
Ejemplo n.º 10
0
    def get_queryset(self):
        label = self.request.query_params.get('label', None)
        scanner = next(
            (key for key in SCANNERS
             if SCANNERS.get(key) == self.request.query_params.get('scanner')),
            None,
        )

        bad_results = ScannerResult.objects.exclude(version=None)
        good_results = ScannerResult.objects.exclude(version=None)

        if scanner:
            bad_results = bad_results.filter(scanner=scanner)
            good_results = good_results.filter(scanner=scanner)

        bad_filters = Q(state=TRUE_POSITIVE) | Q(
            version__versionlog__activity_log__action__in=(
                amo.LOG.BLOCKLIST_BLOCK_ADDED.id,
                amo.LOG.BLOCKLIST_BLOCK_EDITED.id,
            ))

        good_results = (
            good_results.filter(
                Q(version__versionlog__activity_log__action__in=(
                    amo.LOG.CONFIRM_AUTO_APPROVED.id,
                    amo.LOG.APPROVE_VERSION.id,
                ))
                & ~Q(version__versionlog__activity_log__user_id=settings.
                     TASK_USER_ID  # noqa
                     )).exclude(bad_filters).distinct().annotate(label=Value(
                         LABEL_GOOD, output_field=CharField())).all())
        bad_results = (bad_results.filter(bad_filters).distinct().annotate(
            label=Value(LABEL_BAD, output_field=CharField())).all())

        queryset = ScannerResult.objects.none()

        if not label:
            queryset = good_results.union(bad_results)
        elif label == LABEL_GOOD:
            queryset = good_results
        elif label == LABEL_BAD:
            queryset = bad_results

        return queryset.order_by('-pk')
Ejemplo n.º 11
0
class ScannerResult(ModelBase):
    upload = models.ForeignKey(FileUpload,
                               related_name='scanners_results',
                               on_delete=models.SET_NULL,
                               null=True)
    # Store the "raw" results of a scanner (optionally).
    results = JSONField(default=None)
    scanner = models.PositiveSmallIntegerField(choices=SCANNERS.items())
    version = models.ForeignKey('versions.Version',
                                related_name='scanners_results',
                                on_delete=models.CASCADE,
                                null=True)
    matches = JSONField(default=[])
    has_matches = models.NullBooleanField()

    class Meta:
        db_table = 'scanners_results'
        constraints = [
            models.UniqueConstraint(fields=('upload', 'scanner', 'version'),
                                    name='scanners_results_upload_id_scanner_'
                                    'version_id_ad9eb8a6_uniq'),
        ]
        indexes = [
            models.Index(fields=('has_matches', )),
        ]

    def add_match(self, rule, tags=None, meta=None):
        """This method is used to store a matched rule."""
        self.matches.append({
            'rule': rule,
            'tags': tags or [],
            'meta': meta or {},
        })
        self.has_matches = True

    def save(self, *args, **kwargs):
        if self.has_matches is None:
            self.has_matches = bool(self.matches)
        super().save(*args, **kwargs)

    @property
    def matched_rules(self):
        return sorted({match['rule'] for match in self.matches})
Ejemplo n.º 12
0
class ScannersResult(ModelBase):
    upload = models.ForeignKey(FileUpload,
                               related_name='scanners_results',
                               on_delete=models.SET_NULL,
                               null=True)
    results = JSONField(default={})
    scanner = models.PositiveSmallIntegerField(choices=SCANNERS.items())
    version = models.ForeignKey('versions.Version',
                                related_name='scanners_results',
                                on_delete=models.CASCADE,
                                null=True)

    class Meta:
        db_table = 'scanners_results'
        constraints = [
            models.UniqueConstraint(fields=('upload', 'scanner', 'version'),
                                    name='scanners_results_upload_id_scanner_'
                                    'version_id_ad9eb8a6_uniq'),
        ]
Ejemplo n.º 13
0
 def test_scanner_choices(self):
     field = self.model._meta.get_field('scanner')
     assert field.choices == SCANNERS.items()
Ejemplo n.º 14
0
 def get_scanner_name(self):
     return SCANNERS.get(self.scanner)
Ejemplo n.º 15
0
class AbstractScannerResult(ModelBase):
    # Store the "raw" results of a scanner.
    results = JSONField(default=[])
    scanner = models.PositiveSmallIntegerField(choices=SCANNERS.items())
    has_matches = models.NullBooleanField()
    state = models.PositiveSmallIntegerField(
        choices=RESULT_STATES.items(), null=True, blank=True, default=UNKNOWN
    )
    version = models.ForeignKey(
        'versions.Version',
        related_name="%(class)ss",
        on_delete=models.CASCADE,
        null=True,
    )

    class Meta(ModelBase.Meta):
        abstract = True
        indexes = [
            models.Index(fields=('has_matches',)),
            models.Index(fields=('state',)),
        ]

    def add_yara_result(self, rule, tags=None, meta=None):
        """This method is used to store a Yara result."""
        self.results.append(
            {'rule': rule, 'tags': tags or [], 'meta': meta or {}}
        )

    def extract_rule_names(self):
        """This method parses the raw results and returns the (matched) rule
        names. Not all scanners have rules that necessarily match."""
        if self.scanner == YARA:
            return sorted({result['rule'] for result in self.results})
        if self.scanner == CUSTOMS and 'matchedRules' in self.results:
            return self.results['matchedRules']
        # We do not have support for the remaining scanners (yet).
        return []

    def save(self, *args, **kwargs):
        rule_model = self._meta.get_field('matched_rules').related_model
        matched_rules = rule_model.objects.filter(
            scanner=self.scanner,
            name__in=self.extract_rule_names(),
            # See: https://github.com/mozilla/addons-server/issues/13143
            is_active=True,
        )
        self.has_matches = bool(matched_rules)
        # Save the instance first...
        super().save(*args, **kwargs)
        # ...then add the associated rules.
        for scanner_rule in matched_rules:
            self.matched_rules.add(scanner_rule)

    def get_scanner_name(self):
        return SCANNERS.get(self.scanner)

    def get_pretty_results(self):
        return json.dumps(self.results, indent=2)

    def get_files_by_matched_rules(self):
        res = defaultdict(list)
        if self.scanner is YARA:
            for item in self.results:
                res[item['rule']].append(item['meta'].get('filename', '???'))
        elif self.scanner is CUSTOMS:
            scanMap = self.results.get('scanMap', {})
            for filename, rules in scanMap.items():
                for ruleId, data in rules.items():
                    if data.get('RULE_HAS_MATCHED', False):
                        res[ruleId].append(filename)
        return res

    def can_report_feedback(self):
        return (
            self.has_matches and self.state == UNKNOWN and self.scanner != WAT
        )

    def can_revert_feedback(self):
        return (
            self.has_matches and self.state != UNKNOWN and self.scanner != WAT
        )

    def get_git_repository(self):
        return {
            CUSTOMS: settings.CUSTOMS_GIT_REPOSITORY,
            YARA: settings.YARA_GIT_REPOSITORY,
        }.get(self.scanner)

    @classmethod
    def run_action(cls, version):
        """Try to find and execute an action for a given version, based on the
        scanner results and associated rules.

        If an action is found, it is run synchronously from this method, not in
        a task.
        """
        log.info('Checking rules and actions for version %s.', version.pk)

        rule_model = cls.matched_rules.rel.model
        result_query_name = cls._meta.get_field(
            'matched_rules').related_query_name()

        rule = (
            rule_model.objects.filter(**{
                f'{result_query_name}__version': version, 'is_active': True,
            })
            .order_by(
                # The `-` sign means descending order.
                '-action'
            )
            .first()
        )

        if not rule:
            log.info('No action to execute for version %s.', version.pk)
            return

        action_id = rule.action
        action_name = ACTIONS.get(action_id, None)

        if not action_name:
            raise Exception("invalid action %s" % action_id)

        ACTION_FUNCTIONS = {
            NO_ACTION: _no_action,
            FLAG_FOR_HUMAN_REVIEW: _flag_for_human_review,
            DELAY_AUTO_APPROVAL: _delay_auto_approval,
            DELAY_AUTO_APPROVAL_INDEFINITELY: (
                _delay_auto_approval_indefinitely),
        }

        action_function = ACTION_FUNCTIONS.get(action_id, None)

        if not action_function:
            raise Exception("no implementation for action %s" % action_id)

        # We have a valid action to execute, so let's do it!
        log.info(
            'Starting action "%s" for version %s.', action_name, version.pk)
        action_function(version)
        log.info('Ending action "%s" for version %s.', action_name, version.pk)
Ejemplo n.º 16
0
class AbstractScannerRule(ModelBase):
    name = models.CharField(
        max_length=200,
        help_text=_('This is the exact name of the rule used by a scanner.'),
    )
    scanner = models.PositiveSmallIntegerField(choices=SCANNERS.items())
    action = models.PositiveSmallIntegerField(choices=ACTIONS.items(),
                                              default=NO_ACTION)
    is_active = models.BooleanField(
        default=True,
        help_text=_(
            'When unchecked, the scanner results will not be bound to this '
            'rule and the action will not be executed.'),
    )
    definition = models.TextField(null=True, blank=True)

    class Meta(ModelBase.Meta):
        abstract = True
        unique_together = ('name', 'scanner')

    @classmethod
    def get_yara_externals(cls):
        """
        Return a dict with the various external variables we inject in every
        yara rule automatically and their default values.
        """
        return {
            'is_json_file': False,
            'is_manifest_file': False,
            'is_locale_file': False,
        }

    def __str__(self):
        return self.name

    def clean(self):
        if self.scanner == YARA:
            self.clean_yara()

    def clean_yara(self):
        if not self.definition:
            raise ValidationError(
                {'definition': _('Yara rules should have a definition')})

        if f'rule {self.name}' not in self.definition:
            raise ValidationError({
                'definition':
                _('The name of the rule in the definition should match '
                  'the name of the scanner rule')
            })

        if len(re.findall(r'rule\s+.+?\s+{', self.definition)) > 1:
            raise ValidationError({
                'definition':
                _('Only one Yara rule is allowed in the definition')
            })

        try:
            yara.compile(source=self.definition,
                         externals=self.get_yara_externals())
        except yara.SyntaxError as syntaxError:
            raise ValidationError({
                'definition': _('The definition is not valid: %(error)s') % {
                    'error': syntaxError
                }
            })
        except Exception:
            raise ValidationError({
                'definition':
                _('An error occurred when compiling the definition')
            })