Esempio n. 1
0
class FileSchema(Schema):
    """The file schema.

    This is the schema for the file document stored within the mongo database.
    """

    not_blank = marshmallow.validate.Length(min=1, error='Field cannot be blank')

    _id = fields.ObjectId(load_only=True)
    file_type = fields.Enum(required=True, type=enums.FileType, missing=enums.FileType.FILE)

    name = fields.Str(required=True, validate=not_blank)

    sha256_digest = fields.Str()

    description = fields.Str()
    tags = fields.Str()

    magic = fields.Str()
    mime = fields.Str()
    size = fields.Int()

    timestamp = fields.DateTime("%Y-%m-%dT%H:%M:%S.%f")

    submission_type = fields.Str(validate=not_blank, default="unknown")

    parents = fields.Dict(values=fields.List(fields.Str(validate=not_blank)), keys=fields.Str(validate=not_blank), default={})
    children = fields.Dict(values=fields.List(fields.Str(validate=not_blank)), keys=fields.Str(validate=not_blank), default={})
Esempio n. 2
0
class CommandSchema(Schema):
    """The command schema.

    This is the base schema for the command document stored within the mongo
    database.

    Note:
        Scales are allowed to embed additional information into this document
        but it will be ignored.
    """

    _id = fields.ObjectId(load_only=True)
    _output_id = fields.ObjectId(load_only=True, missing=None)  # GridFS

    sha256_digest = fields.Str(required=True)
    scale = fields.Str(required=True)
    command = fields.Str(required=True)

    args = fields.Dict(default={}, missing={})
    asynchronous = fields.Boolean(default=False)
    timeout = fields.Int(default=600)

    format = fields.Str(type=enums.Format, missing=enums.Format.JSON)
    output = fields.Raw(dump_only=True, default=None, missing=None)
    status = fields.Str(type=enums.Status, missing=enums.Status.PENDING, default=enums.Status.PENDING)

    timestamp = fields.DateTime("%Y-%m-%dT%H:%M:%S.%f")
    start_time = fields.DateTime("%Y-%m-%dT%H:%M:%S.%f")
    end_time = fields.DateTime("%Y-%m-%dT%H:%M:%S.%f")
Esempio n. 3
0
    class CommandsSchema(schema.Schema):
        """Extends `Schema`.

        Defines the valid schema for post request.
        """
        args = fields.Dict(required=False, default={}, missing={})
        command = fields.Str(required=True)
        format = fields.Str(type=enums.Format, missing=enums.Format.JSON)
        sha256_digests = fields.List(fields.Str(), required=True)
        scale = fields.Str(required=True)
        timeout = fields.Int(required=False)
Esempio n. 4
0
class StoreHandler(snake_handler.SnakeHandler):
    """Extends `SnakeHandler`."""
    @tornadoparser.use_args({
        # filter[field]: str
        'file_type':
        fields.Enum(type=enums.FileType, required=False, missing=None),
        'from':
        fields.Int(required=False, missing=0),
        'limit':
        fields.Int(required=False, missing=10),
        'operator':
        fields.Str(required=False, missing='and'),
        'order':
        fields.Int(required=False, missing=-1),
        'sort':
        fields.Str(required=False, missing=None)
    })
    async def get(self, data):
        documents = []
        filter_ = self.create_filter(self.request.arguments, data['operator'])
        if filter_:
            filter_ = {'$and': [filter_]}
            if data['file_type']:
                filter_['$and'] += [{'file_type': data['file_type']}]
        elif data['file_type']:
            filter_ = {'file_type': data['file_type']}
        # NOTE: With async (motor) there is no count() on cursor so we have to work around that
        total = await db.async_file_collection.db.files.count_documents(
            filter_ if filter_ else {})
        cursor = db.async_file_collection.select_all(filter_, data['order'],
                                                     data['sort'],
                                                     data['limit'],
                                                     data['from'])
        while await cursor.fetch_next:
            documents += [cursor.next_object()]

        documents = schema.FileSchema(many=True).dump(
            schema.FileSchema(many=True).load(documents))
        self.jsonify({'samples': documents, 'total': total})
        self.finish()
Esempio n. 5
0
class FilesHandler(snake_handler.SnakeHandler):
    """Extends `SnakeHandler`."""
    @tornadoparser.use_args({
        'limit':
        fields.Str(required=False),
        'operator':
        fields.Str(required=False, missing='and'),
        'order':
        fields.Int(required=False, missing=-1),
        'sort':
        fields.Str(required=False)
    })
    async def get(self, data):
        documents = []
        sort = None
        if 'sort' in data.keys():
            sort = data['sort']
        filter_ = self.create_filter(self.request.arguments, data['operator'])
        if filter_:
            filter_ = {'$and': [{'file_type': enums.FileType.FILE}, filter_]}
        else:
            filter_ = {'file_type': enums.FileType.FILE}
        cursor = db.async_file_collection.select_all(filter_, data['order'],
                                                     sort)
        index = 0
        while await cursor.fetch_next:
            if 'limit' in data.keys():
                if index >= int(data['limit']):
                    break
                index += 1
            documents += [cursor.next_object()]

        documents = schema.FileSchema(many=True).dump(
            schema.FileSchema(many=True).load(documents))
        self.jsonify({'files': documents})
        self.finish()
Esempio n. 6
0
class Commands(scale.Commands):
    def check(self):
        strings = shutil.which('strings')
        if not strings:
            raise error.CommandWarning("Binary 'strings' not found")
        return

    @scale.command({
        'info': 'This function will return strings found within the file'
    })
    def all_strings(self, args, file, opts):
        return str(subprocess.check_output(["strings", file.file_path]), encoding="utf-8").split('\n')

    @staticmethod
    def all_strings_plaintext(json):
        return '\n'.join(json)

    @scale.command({
        'args': {
            'min_length': fields.Int(default=5)
        },
        'info': 'This function will return interesting strings found within the file'
    })
    def interesting(self, args, file, opts):
        strings = str(subprocess.check_output(["strings", file.file_path]), encoding="utf-8").split('\n')
        min_length = args['min_length']
        output = []
        for string in strings:
            rules = []
            match = regex.IPV4_REGEX.search(string)
            if match and len(match.group()) > min_length:
                rules += ['IPV4_REGEX']
            match = regex.IPV6_REGEX.search(string)
            if match and len(match.group()) > min_length:
                rules += ['IPV6_REGEX']
            match = regex.EMAIL_REGEX.search(string)
            if match and len(match.group()) > min_length:
                rules += ['EMAIL_REGEX']
            match = regex.URL_REGEX.search(string)
            if match and len(match.group()) > min_length:
                rules += ['URL_REGEX']
            match = regex.DOMAIN_REGEX.search(string)
            if match and len(match.group()) > min_length:
                rules += ['DOMAIN_REGEX']
            match = regex.WINDOWS_PATH_REGEX.search(string)
            if match and len(match.group()) > min_length:
                rules += ['WINDOWS_PATH_REGEX']
            match = regex.MAC_REGEX.search(string)
            if match and len(match.group()) > min_length:
                rules += ['MAC_REGEX']
            match = regex.DATE1_REGEX.search(string)
            if match and len(match.group()) > min_length:
                rules += ['DATE1_REGEX']
            match = regex.DATE2_REGEX.search(string)
            if match and len(match.group()) > min_length:
                rules += ['DATE2_REGEX']
            match = regex.DATE3_REGEX.search(string)
            if match and len(match.group()) > min_length:
                rules += ['DATE3_REGEX']

            match = regex.UNIX_PATH_REGEX.search(string)
            if match:
                valid_path = False
                match_str = match.group()
                if len(match_str) <= min_length:
                    continue
                if ((match_str.startswith("'") and match_str.endswith("'")) or (match_str.startswith('"') and match_str.endswith('"'))):
                    valid_path = True
                elif any(char in SPECIAL_CHARS for char in match_str):
                    valid_path = True
                    for i in SPECIAL_CHARS:
                        if i in match_str:
                            index = match_str.index(i)
                            if index > 0 and match_str[index - 1] != "\\":
                                valid_path = False
                else:
                    valid_path = True
                if valid_path:
                    rules += ['UNIX_PATH_REGEX']

            if rules:
                output += ['{} ({})'.format(string, ', '.join(rules))]
        return output

    @staticmethod
    def interesting_plaintext(json):
        return '\n'.join(json)
Esempio n. 7
0
class Interface(scale.Interface):
    def check(self):
        if CUCKOO_API is None or CUCKOO_API == '':
            raise error.InterfaceError(
                "config variable 'cuckoo_api' has not been set")

    @scale.pull({'info': 'summary of scores for the sample'})
    def info(self, args, file, opts):
        try:
            j = requests.get(CUCKOO_API + '/files/view/sha256/' +
                             file.sha256_digest,
                             verify=VERIFY).json()
        except requests.exceptions.RequestException:
            raise error.InterfaceError("failed to connect to Cuckoo")

        if 'sample' not in j:
            raise error.InterfaceWarning(
                "file has never been submitted to Cuckoo")
        s_id = j['sample']['id']
        r = requests.get(CUCKOO_API + '/tasks/list', verify=VERIFY)
        if not r.status_code == requests.codes.ok:  # pylint: disable=no-member
            return "No reports, sample must be pending/running", "pending"
        j = r.json()
        output = []
        for t in j['tasks']:
            if t['sample_id'] == s_id:
                r = requests.get(CUCKOO_API + '/tasks/report/' + str(t['id']),
                                 verify=VERIFY)
                if r.status_code == requests.codes.ok:  # pylint: disable=no-member
                    j = r.json()
                    output += [{
                        'score': j['info']['score'],
                        'name': j['info']['machine']['name']
                    }]
        if not output:
            return error.InterfaceWarning("no information available!")
        return {'info': output}

    def info_markdown(self, json):
        output = md.table_header(('Machine', 'Score'))
        for j in json['info']:
            score = j['score']
            if score > 5:
                s = "%red " + str(score) + " %"
            elif score > 3:
                s = "%yellow " + str(score) + " %"
            else:
                s = str(score)
            output += md.table_row((j['name'], s))
        return output

    @scale.pull({
        'args': {
            'id': fields.Str(required=True)
        },
        'info': 'view report summary'
    })
    def report(self, args, file, opts):
        # TODO: Hash match!
        try:
            r = requests.get(CUCKOO_API + '/tasks/report/' + args['id'],
                             verify=VERIFY)
        except requests.exceptions.RequestException:
            raise error.InterfaceError("failed to connect to Cuckoo")
        if not r.status_code == requests.codes.ok:  # pylint: disable=no-member
            return "No task for given id"
        j = r.json()
        output = {
            'score':
            j['info']['score'],
            'platform':
            j['info']['platform'],
            'analysis': {
                'category': j['info']['category'],
                'started': j['info']['started'],
                'ended': j['info']['ended'],
                'duration': j['info']['duration']
            },
            'machine': {
                'name': j['info']['machine']['name'],
                'manager': j['info']['machine']['manager']
            },
            'signatures': [{
                'severity': x['severity'],
                'description': x['description']
            } for x in j['signatures']]
        }
        return output

    def report_markdown(self, json):
        output = md.h4('General')
        output += md.paragraph(md.bold('Score: ') + str(json['score']))
        output += md.cr()
        output += md.paragraph(md.bold('Platform: ') + json['platform'])
        output += md.h4('Analysis')
        output += md.table_header(('Category', 'Started', 'Ended', 'Duration'))
        output += md.table_row(
            (json['analysis']['category'], str(json['analysis']['started']),
             str(json['analysis']['ended']),
             str(json['analysis']['duration'])))
        output += md.h4('Machines')
        output += md.table_header(('Name', 'Manager'))
        output += md.table_row(
            (json['machine']['name'], json['machine']['manager']))
        output += md.h4('Signatures')
        output += md.table_header(('Severity', 'Description'))
        for s in json['signatures']:
            if s['severity'] > 2:
                output += md.table_row(
                    ('%red ' + str(s['severity']) + ' %', s['description']))
            elif s['severity'] > 1:
                output += md.table_row(
                    ('%orange ' + str(s['severity']) + ' %', s['description']))
            else:
                output += md.table_row(
                    ('%blue ' + str(s['severity']) + ' %', s['description']))
        return output

    @scale.pull({'info': 'view reports for sample'})
    def reports(self, args, file, opts):
        try:
            j = requests.get(CUCKOO_API + '/files/view/sha256/' +
                             file.sha256_digest,
                             verify=VERIFY).json()
        except requests.exceptions.RequestException:
            raise error.InterfaceError("failed to connect to Cuckoo")

        if 'sample' not in j:
            raise error.InterfaceWarning(
                "file has never been submitted to Cuckoo")
        s_id = j['sample']['id']
        r = requests.get(CUCKOO_API + '/tasks/list', verify=VERIFY)
        if not r.status_code == requests.codes.ok:  # pylint: disable=no-member
            return "No reports, sample must be pending/running", "pending"
        j = r.json()
        output = {'reports': []}
        for t in j['tasks']:
            if t['sample_id'] == s_id:
                output['reports'] += [{
                    'id':
                    str(t['id']),
                    'url':
                    config.scale_configs['cuckoo']['cuckoo_url'] +
                    str(t['id']),
                    'timestamp':
                    str(t['added_on']),
                    'status':
                    str(t['status'])
                }]
        return output

    def reports_markdown(self, json):
        output = md.table_header(('ID', 'URL', 'Timestamp', 'Status'))
        for r in json['reports']:
            output += md.table_row(
                (r['id'], r['url'], r['timestamp'], r['status']))
        return output

    @scale.push({
        'args': {
            'machine': fields.Str(required=False),
            'priority': fields.Int(required=False),
            'timeout': fields.Int(required=False)
        },
        'info': 'submit sample to cuckoo'
    })
    def submit(self, args, file, opts):
        document = db.file_collection.select(file.sha256_digest)
        with open(file.file_path, "rb") as f:
            try:
                r = requests.post(CUCKOO_API + '/tasks/create/file',
                                  files={"file": (document['name'], f)},
                                  verify=VERIFY)
            except requests.exceptions.RequestException:
                raise error.InterfaceError("failed to connect to Cuckoo")

        if not r.status_code == requests.codes.ok:  # pylint: disable=no-member
            raise error.InterfaceError('failed to submit sample to Cuckoo')

        j = r.json()

        if not j["task_id"]:
            raise error.InterfaceError('failed to submit sample to Cuckoo')

        return j
Esempio n. 8
0
class CommandHandler(snake_handler.SnakeHandler):
    """Extends `SnakeHandler`."""
    @tornadoparser.use_args({
        # 'args': fields.Dict(required=False, default={}, missing={}),
        'command':
        fields.Str(required=True),
        'format':
        fields.Str(type=enums.Format, missing=enums.Format.JSON),
        'output':
        fields.Bool(required=False, default=True, missing=True),
        'scale':
        fields.Str(required=True),
        'sha256_digest':
        fields.Str(required=True)
    })
    async def get(self, data):
        # NOTE: Tornado/Marshmallow does not like Dict in args, will have to parse manually
        # TODO: Use marshmallow validation
        if 'args' in self.request.arguments and self.request.arguments['args']:
            data['args'] = json.loads(self.request.arguments['args'][0])
        else:
            data['args'] = {}
        document = await db.async_command_collection.select(
            data['sha256_digest'], data['scale'], data['command'],
            data['args'])
        if not document:
            self.write_warning("no output for given data", 404, data)
            self.finish()
            return

        if document['status'] == enums.Status.ERROR:
            self.write_warning("%s" % document['output'], 404, data)
            self.finish()
            return

        document = schema.CommandSchema().load(document)
        output = None
        if document['_output_id']:
            output = await db.async_command_output_collection.get(
                document['_output_id'])
        try:
            scale = scale_manager.get_scale(data['scale'])
            commands = scale_manager.get_component(
                scale, enums.ScaleComponent.COMMANDS)
            if data['output']:
                document['output'] = commands.snake.format(
                    data['format'], document['command'], output)
            document['format'] = data['format']
        except (SnakeError, TypeError) as err:
            self.write_warning("%s" % err, 404, data)
            self.finish()
            return

        document = schema.CommandSchema().dump(document)
        self.jsonify({'command': document})
        self.finish()

    @tornadoparser.use_args({
        'args':
        fields.Dict(required=False, default={}, missing={}),
        'asynchronous':
        fields.Bool(required=False),
        'command':
        fields.Str(required=True),
        'format':
        fields.Str(type=enums.Format, missing=enums.Format.JSON),
        'scale':
        fields.Str(required=True),
        'sha256_digest':
        fields.Str(required=True),
        'timeout':
        fields.Int(required=False)
    })
    async def post(self, data):
        # Check that there is a file for this hash
        document = await db.async_file_collection.select(data['sha256_digest'])
        if not document:
            self.write_warning("no sample for given data", 404, data)
            self.finish()
            return

        # Check scale support
        try:
            scale = scale_manager.get_scale(data['scale'],
                                            document['file_type'])
            commands = scale_manager.get_component(
                scale, enums.ScaleComponent.COMMANDS)
            cmd = commands.snake.command(data['command'])
        except SnakeError as err:
            self.write_warning("%s" % err, 404, data)
            self.finish()
            return

        # Validate arguments as to not waste users time, yes this is also done on execution
        result, args = validate_args(cmd, data['args'])
        if not result:
            self.write_warning(args, 422, data)
            self.finish()
            return
        data['args'] = args

        # Queue command
        try:
            document = await route_support.queue_command(data)
        except SnakeError as err:
            self.write_warning("%s" % err, 500, data)
            self.finish()
            return

        document = schema.CommandSchema().load(document)
        output = None
        if document['_output_id']:
            output = await db.async_command_output_collection.get(
                document['_output_id'])
        try:
            document['output'] = commands.snake.format(data['format'],
                                                       document['command'],
                                                       output)
            document['format'] = data['format']
        except SnakeError as err:
            self.write_warning("%s" % err, 404, data)
            self.finish()
            return

        # Dump and finish
        document = schema.CommandSchema().dump(document)
        self.jsonify({"command": document})
        self.finish()