Example #1
0
    def initialize(self):
        """Extend `initialize`.

        Works out what sort of request we have and how to parse it. Streaming
        may not actually be required in which case it will not be used.
        """
        self.bytes_read = 0
        self.content_length = 0
        self.content_type = ''
        self.data = bytes()
        self.error = None
        self.stream = None

        if self.request.headers and 'Content-Encoding' in self.request.headers:
            gen_log.warning("Unsupported Content-Encoding: %s",
                            self.request.headers['Content-Encoding'])
            return
        if self.request.headers and 'Content-Type' in self.request.headers:
            self.content_length = int(
                self.request.headers['Content-Length']
            ) if 'Content-Length' in self.request.headers else 0
            self.content_type = self.request.headers['Content-Type']
            if self.content_type.startswith(
                    "application/x-www-form-urlencoded"):
                return
            elif self.content_type.startswith("multipart/form-data"):
                # If we have a POST that is multipart/form-data we will stream any file
                # content to disk. This will prevent excessive RAM usage. Clearly we
                # will need to keep tabs on the overall data size or someone could
                # still use too much RAM!
                self.stream = self.Stream()
                boundary = None
                fields = self.content_type.split(";")
                for field in fields:
                    k, _, v = field.strip().partition("=")
                    if k == "boundary" and v:
                        boundary = bytes(v, 'utf8')
                if not boundary:
                    raise error.SnakeError('Content boundary not found')
                if boundary.startswith(b'"') and boundary.endswith(b'"'):
                    boundary = boundary[1:-1]
                self.stream.boundary = boundary
                self.stream.working_dir = tempfile.TemporaryDirectory(
                    dir=path.abspath(path.expanduser(
                        snake_config['cache_dir'])))
            else:
                self.error = error.SnakeError('Unsupported Content-Type: %s' %
                                              self.content_type)
Example #2
0
async def unzip_file_python(file_path,
                            file_name,
                            output_dir,
                            protected=False,
                            password=None):
    """Unzip file using ZipFile.

    Uses ZipFile to extract a file from a zip into a given directory. It will
    handle password protected folders and if no password is presented then it
    will loop through a list of passwords stored in the snake configuration.

    Note:
        Only zips with a single file are supported.

    Args:
        file_path (str): The path to the zipped file.
        file_name (str): The name of the file to extract from the zip.
        output_dir (str): The directory to extract the file to.
        protected (bool, optional): Is the zip password protected. Defaults to False.
        protected (str, optional): The password for the zip. Defaults to None.

    Returns:
        str: The path of the extracted file.

    Raises:
        RuntimeError: For any error that is not related to a Bad Password.
        SnakeError: When extraction of the file has failed.
    """
    zip_file = zipfile.ZipFile(file_path)
    new_path = None
    if protected:
        if password:
            try:
                new_path = zip_file.extract(file_name, output_dir,
                                            bytes(password, 'utf-8'))
            except RuntimeError as err:
                if 'Bad password' not in str(err):
                    raise
        else:
            for passwd in snake_config['zip_passwords']:
                try:
                    new_path = zip_file.extract(file_name, output_dir,
                                                bytes(passwd, 'utf-8'))
                except RuntimeError as err:
                    if 'Bad password' not in str(err):
                        raise
                if new_path:
                    break
        if not new_path:
            raise error.SnakeError('ZipError: incorrect password')
    else:
        new_path = zip_file.extract(file_name, output_dir, None)
    return new_path
Example #3
0
def test_snake_error():
    """
    Test the class SnakeError
    """
    with pytest.raises(TypeError):
        error.SnakeError()  # pylint: disable=no-value-for-parameter

    err = error.SnakeError('hello')
    assert 'hello' in err.message
    assert None is err.status_code
    assert None is err.payload

    err = error.SnakeError('hello', 500)
    assert 'hello' in err.message
    assert 500 is err.status_code
    assert None is err.payload

    err = error.SnakeError('hello', 500, 'extra')
    assert 'hello' in err.message
    assert 500 is err.status_code
    assert 'extra' is err.payload
Example #4
0
async def unzip_file(file_path, password=None):
    """Unzip a file.

    Unzips a file using unzip or ZipFile. For speed reasons if unzip is
    installed it will be used in favour of the ZipFile library. It will extract
    the file to the same directory as that of the zip folder.

    Note: The zip file must contrail only one file.

    Args:
        file_path (str): The zip file to unzip.
        password (str): The password for the zip. Defaults to None.

    Returns:
        str: The path to the extract file.

    Raises:
        SnakeError: When the zip file contains more than one file.
                    When the extraction fails.
    """
    zip_file = zipfile.ZipFile(file_path)
    info_list = zip_file.infolist()
    if len(info_list) != 1:
        raise error.SnakeError(
            'ZipError: only one file is allowed in the container')
    i = info_list[0]
    working_dir = os.path.dirname(file_path)
    new_path = None
    protected = i.flag_bits & 0x1
    # NOTE: ZipFile is slow as balls so we outsource to unzip if installed
    outsource = shutil.which('unzip')
    if outsource:
        new_path = await unzip_file_unix(file_path, i.filename, working_dir,
                                         protected, password)
    else:
        new_path = await unzip_file_python(file_path, i.filename, working_dir,
                                           protected, password)
    if not new_path:
        raise error.SnakeError('ZipError: failed to extract file')
    return new_path
Example #5
0
async def store_file(sha256_digest, file_path, file_type, data):
    """Store a file to disk.

    Uses file storage to store the new file to disk. Upon success insert the
    metadata into the database.

    Args:
        sha256_digest (str): The has of the file to store.
        file_path (str): The location of the file to move into the store.
        file_type (:obj:`FileType`): The type of the file being stored.
        data (:obj:`CommandSchema`): The metadata for the file.

    Returns:
        :obj:`CommandSchema`: The updated document metadata.

    Raises:
        SnakeError: When the metadata cannot be inserted into the database.
    """
    # Save the file to the 'filedb' and add it to the database
    file_storage = utils.FileStorage()
    file_storage.create(file_path, sha256_digest)
    if not file_storage.save(move=True):
        raise error.SnakeError("Failed to store file on disk")
    data.update(file_storage.to_dict())
    data['name'] = strip_extensions(data['name'])
    data['timestamp'] = datetime.utcnow()
    data = schema.FileSchema().dump(data)
    data['file_type'] = file_type  # load_only=True
    document = await db.async_file_collection.insert(data)
    if not document:
        file_storage.delete()
        raise error.SnakeError("Failed to insert document")
    document = await db.async_file_collection.select(file_storage.sha256_digest
                                                     )

    # Run any autoruns, if allowed
    await execute_autoruns(sha256_digest, file_type, file_storage.mime)

    return document
Example #6
0
 def extract(self, args, file, opts):
     samples = []
     with tempfile.TemporaryDirectory(dir=path.abspath(
             path.expanduser(
                 config.snake_config['cache_dir']))) as temp_dir:
         # Extract the samples
         proc = subprocess.run(
             [self.binwalk_path, file.file_path, '-e', '-C', temp_dir],
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE)
         if not proc:
             raise error.CommandError(
                 "failed to successfully extract from sample")
         # Get file name
         document = db.file_collection.select(file.sha256_digest)
         if not document:
             raise error.SnakeError("failed to get sample's metadata")
         # There will be one output directory connataining files with the offsets as names
         contents = os.listdir(temp_dir)
         if not contents:
             return []
         directory = path.join(temp_dir, contents[0])
         for i in os.listdir(directory):
             file_path = path.join(directory, i)
             name = '{}.{}'.format(document['name'], i)
             file_schema = schema.FileSchema().load({
                 'name':
                 name,
                 'description':
                 'extracted with binwalk'
             })
             new_file = fs.FileStorage()
             new_file.create(file_path)
             new_document = submitter.submit(file_schema,
                                             enums.FileType.FILE, new_file,
                                             file, NAME)
             new_document = schema.FileSchema().dump(
                 schema.FileSchema().load(
                     new_document))  # Required to clean the above
             samples += [new_document]
     return samples
Example #7
0
    def load_scale_config(self, scale_name):
        """Load a scale configuration from file

        This loads the scale configuration files based on the scale name
        passed. It will load the base config along with the etc configuration
        if present.

        Args:
            scale_name (str): The name of the scale to load the configuration
            for.

        Raises:
            SnakeError: When the external configuration file fails to load.
        """
        self.scale_configs[scale_name] = {}
        # Load base if we need one
        config_path = pkg_resources.resource_filename(
            "snake.scales.{}".format(scale_name), "{}.conf".format(scale_name))
        if path.exists(config_path):
            with open(config_path, 'rb') as stream:
                base_config = yaml.safe_load(stream)
            self.scale_configs[scale_name].update(base_config)

            # Try and load from etc config
            etc_conf = path.join(
                path.abspath(path.expanduser(constants.ETC_DIR)), "scales",
                "{}.conf".format(scale_name))
            if path.exists(etc_conf):
                try:
                    etc_config = {}
                    with open(etc_conf, 'rb') as stream:
                        etc_config = yaml.safe_load(stream)
                    if etc_config is None:  # The config file is empty this is fine
                        etc_config = {}
                    self.scale_configs[scale_name].update(etc_config)
                except Exception as err:
                    raise error.SnakeError(
                        'failed to load config: {}: {} - {}'.format(
                            etc_conf, err.__class__, err))
Example #8
0
    def binary_carver(self, args, file, opts):
        sample = {}
        with tempfile.TemporaryDirectory(dir=path.abspath(
                path.expanduser(
                    config.snake_config['cache_dir']))) as temp_dir:
            # Try and carve
            file_path = r2_bin_carver.carve(file.file_path, temp_dir,
                                            args['offset'], args['size'],
                                            args['magic_bytes'])
            if not file_path:
                raise error.CommandError('failed to carve binary')
            if args['patch']:
                if not r2_bin_carver.patch(file_path):
                    raise error.CommandError(
                        'failed to patch binary, not a valid pe file')

            # Get file name
            document = db.file_collection.select(file.sha256_digest)
            if not document:
                raise error.SnakeError("failed to get sample's metadata")

            # Create schema and save
            name = '{}.{}'.format(document['name'], args['offset'])
            file_schema = schema.FileSchema().load({
                'name':
                name,
                'description':
                'extracted with radare2 script r2_bin_carver.py'
            })
            new_file = fs.FileStorage()
            new_file.create(file_path)
            sample = submitter.submit(file_schema, enums.FileType.FILE,
                                      new_file, file, NAME)
            sample = schema.FileSchema().dump(schema.FileSchema().load(
                sample))  # Required to clean the above

        return sample
Example #9
0
    def data_received(self, chunk):  # pylint: disable=too-many-branches, too-many-statements
        if self.error:
            raise self.error  # pylint: disable=raising-bad-type

        self.bytes_read += len(chunk)

        if len(
                self.data
        ) > 104857600:  # Ensure the someone is not trying to fill RAM, 100MB
            raise error.SnakeError('Content-Length too large (truncated)')

        if self.stream:  # Cache files to disk
            chunk = self.stream.tail + chunk
            chunk_len = len(chunk)
            i = 0
            while i < chunk_len:
                if self.stream.state == 0:  # Find start of header
                    soh = chunk.find(b'--' + self.stream.boundary, i)
                    if soh != -1:
                        self.data += chunk[soh:soh +
                                           len(self.stream.boundary) + 4]
                        i = soh + len(self.stream.boundary) + 4
                        self.stream.state = 1
                        continue
                elif self.stream.state == 1:  # Find end of header
                    eoh = chunk.find(b'\r\n\r\n', i)
                    if eoh != -1:
                        self.stream.header += chunk[i:eoh + 4]
                        i = eoh + 4
                        if b'filename=' in self.stream.header:  # We have a file
                            self.stream.state = 2
                        else:
                            self.stream.state = 3
                        self.data += self.stream.header
                        self.stream.header = bytes()
                        continue
                elif self.stream.state == 2:  # Handle file based content
                    soh = chunk.find(b'--' + self.stream.boundary, i)
                    if soh != -1:
                        f_path = path.join(self.stream.working_dir.name,
                                           str(self.stream.file_count))
                        with open(f_path, 'a+b') as f:
                            f.write(chunk[i:soh -
                                          2])  # -2 drops the extra '\r\n'
                        self.data += bytes(f_path + '\r\n', 'utf-8')
                        self.stream.file_count += 1
                        i = soh
                        self.stream.state = 0
                        continue
                elif self.stream.state == 3:  # Handle all other content
                    soh = chunk.find(b'--' + self.stream.boundary, i)
                    if soh != -1:
                        self.data += chunk[i:soh]
                        i = soh
                        self.stream.state = 0
                        continue

                # Handle the overlapping tail
                if i + TAIL_SIZE < chunk_len:
                    if self.stream.state == 2:
                        f_path = path.join(self.stream.working_dir.name,
                                           str(self.stream.file_count))
                        with open(f_path, 'a+b') as f:
                            f.write(chunk[i:chunk_len - TAIL_SIZE])
                    elif self.stream.state == 1:
                        self.stream.header += chunk[i:chunk_len - TAIL_SIZE]
                    else:
                        self.data += chunk[i:chunk_len - TAIL_SIZE]
                    self.stream.tail = chunk[chunk_len - TAIL_SIZE:]
                    i += chunk_len
                else:
                    self.stream.tail = chunk[i:]
                    i += chunk_len
        else:  # Otherwise be normal
            self.data += chunk

        if self.bytes_read >= self.content_length:  # Finished, parse the new content
            httputil.parse_body_arguments(self.content_type,
                                          self.data,
                                          self.request.body_arguments,
                                          self.request.files,
                                          headers=None)
            for k, v in self.request.body_arguments.items():
                self.request.arguments.setdefault(k, []).extend(v)
Example #10
0
 def __init__(self, *args, **kwargs):  # pylint: disable=unused-argument
     raise error.SnakeError('error')
Example #11
0
async def queue_command(data):
    """Queue commands for execution

    This will queue commands for execution on the celery workers.

    Note:
        The returned command schema will reflect the status of the queued
        command.

    Args:
        data (:obj:`CommandSchema`): The command to queue for execution.

    Returns:
        :obj:`CommandSchema`: The command schema with updates
    """
    # The lastest execution always wins, thus we replace the current one in the db
    document = await db.async_command_collection.select(
        data['sha256_digest'], data['scale'], data['command'], data['args'])
    if document:
        if 'status' in document and document['status'] == enums.Status.RUNNING:
            return schema.CommandSchema().dump(
                schema.CommandSchema().load(document))
        else:
            _output_id = None
            if '_output_id' in document:
                _output_id = document['_output_id']
            data['timestamp'] = datetime.utcnow()
            data = schema.CommandSchema().dump(data)
            await db.async_command_collection.replace(data['sha256_digest'],
                                                      data['scale'],
                                                      data['command'],
                                                      data['args'], data)
            # NOTE: We delete after the replace to try and prevent concurrent
            # reads to a file while it is being deleted
            if _output_id:
                await db.async_command_output_collection.delete(_output_id)
    else:
        # Save the command, this will be in a pending state
        data['timestamp'] = datetime.utcnow()
        data = schema.CommandSchema().dump(data)
        await db.async_command_collection.insert(data)

    data = schema.CommandSchema().load(data)
    if data['asynchronous'] is True:
        celery.execute_command.apply_async(args=[data],
                                           time_limit=data['timeout'] + 30,
                                           soft_time_limit=data['timeout'])
    else:
        task = celery.execute_command.apply_async(
            args=[data],
            time_limit=data['timeout'] + 30,
            soft_time_limit=data['timeout'])
        result = await celery.wait_for_task(task)
        if not task.successful():
            document = await db.async_command_collection.select(
                data['sha256_digest'], data['scale'], data['command'],
                data['args'])
            _output_id = None
            if '_output_id' in document:
                _output_id = document['_output_id']
            _new_output_id = await db.async_command_output_collection.put(
                document['command'],
                b"{'error': 'worker failed please check log'}")
            document['_output_id'] = _new_output_id
            document['status'] = enums.Status.FAILED
            await db.async_command_collection.update(document['sha256_digest'],
                                                     document['scale'],
                                                     document['command'],
                                                     data['args'], document)
            if _output_id:
                await db.async_command_output_collection.delete(_output_id)
            raise error.SnakeError(result)

    return await db.async_command_collection.select(data['sha256_digest'],
                                                    data['scale'],
                                                    data['command'],
                                                    data['args'])
Example #12
0
async def unzip_file_unix(file_path,
                          file_name,
                          output_dir,
                          protected=False,
                          password=None):  # pylint: disable=too-many-branches
    """Unzip file using unzip.

    Uses unzip binary to extract a file from a zip into a given directory. It
    will handle password protected folders and if no password is presented then
    it will loop through a list of passwords stored in the snake configuration.

    Note:
        Only zips with a single file are supported.

    Args:
        file_path (str): The path to the zipped file.
        file_name (str): The name of the file to extract from the zip.
        output_dir (str): The directory to extract the file to.
        protected (bool, optional): Is the zip password protected. Defaults to False.
        protected (str, optional): The password for the zip. Defaults to None.

    Returns:
        str: The path of the extracted file.

    Raises:
        SnakeError: When extraction of the file has failed.
    """
    err = ''
    new_path = None
    if protected:
        if password:
            proc = await asyncio.create_subprocess_exec(
                *[
                    "unzip", "-P",
                    bytes(password, "utf-8"), "-j", file_path, file_name, "-d",
                    output_dir
                ],
                stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.PIPE)
            _stdout, stderr = await proc.communicate()
            if not proc.returncode:
                # NOTE: We flatten dirs so we must strip dirs from file_name if present
                new_path = os.path.join(output_dir, file_name.split('/')[-1])
            else:
                err = str(stderr, encoding='utf-8')
        else:
            for passwd in snake_config['zip_passwords']:
                proc = await asyncio.create_subprocess_exec(
                    *[
                        "unzip", "-P",
                        bytes(passwd, "utf-8"), "-j", file_path, file_name,
                        "-d", output_dir
                    ],
                    stdout=asyncio.subprocess.PIPE,
                    stderr=asyncio.subprocess.PIPE)
                _stdout, stderr = await proc.communicate()
                if not proc.returncode:
                    # NOTE: We flatten dirs so we must strip dirs from file_name if present
                    new_path = os.path.join(output_dir,
                                            file_name.split('/')[-1])
                else:
                    err = str(stderr, encoding='utf-8')
                if new_path:
                    break
    else:
        proc = await asyncio.create_subprocess_exec(
            *["unzip", "-j", file_path, file_name, "-d", output_dir],
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.PIPE)
        _stdout, stderr = await proc.communicate()
        if not proc.returncode:
            # NOTE: We flatten dirs so we must strip dirs from file_name if present
            new_path = os.path.join(output_dir, file_name.split('/')[-1])
        else:
            err = str(stderr, encoding='utf-8')
    if not new_path:
        if 'incorrect password' in err:
            raise error.SnakeError('ZipError: incorrect password')
        else:
            raise error.SnakeError('ZipError: {}'.format(err))

    return new_path
Example #13
0
def submit(file_schema, file_type, file, parent, scale_name):  # pylint: disable=too-many-branches
    """Submit a new file to Snake.

    This is used generally by the command component of scales to submit a new
    file into snake.

    Args:


    """

    # We need to be safe here so instance check the above
    if not isinstance(file_schema, dict):
        raise TypeError("file_schema must be of type dict")
    if not isinstance(file, fs.FileStorage):
        raise TypeError("file must be of type FileSchema")
    if not isinstance(parent, fs.FileStorage):
        raise TypeError("parent must be of type FileStorage")

    # If the hashes are the same, just stop
    if file.sha256_digest == parent.sha256_digest:
        return db.file_collection.select(file.sha256_digest)

    # Create submission type
    submission_type = 'scale:{}'.format(scale_name)

    # Check if the file to submit is already in Snake, if not lets add it
    document = db.file_collection.select(file.sha256_digest)
    if not document:
        # Validate
        data = schema.FileSchema().dump(schema.FileSchema().load(file_schema))
        # Save the file
        if not file.save(move=True):
            raise error.SnakeError("could not save new file to disk for hash {}".format(file.sha256_digest))
        data.update(file.to_dict())
        # NOTE: Don't set the parent we will do this later, so blank them out
        # if the scale tried to be smart
        data['children'] = {}
        data['parents'] = {}
        data['submission_type'] = submission_type
        data['timestamp'] = datetime.utcnow()
        data = schema.FileSchema().dump(data)
        data['file_type'] = file_type  # load_only=True
        # Save
        db.file_collection.insert(data)

    # Update the parent child relationships
    document = db.file_collection.select(file.sha256_digest)
    if document:
        # HACK: This is needed to get submission_type of parent
        p = db.file_collection.select(parent.sha256_digest)

        # Check if the parent and type already exist
        if 'parents' not in document:
            document['parents'] = {}
        if parent.sha256_digest in document['parents']:
            if submission_type in document['parents'][parent.sha256_digest]:
                return document
            else:
                document['parents'][parent.sha256_digest] += [p["submission_type"]]
        else:
            document['parents'][parent.sha256_digest] = [p["submission_type"]]
        # Validate
        document = schema.FileSchema().dump(schema.FileSchema().load(document))
        # Update
        db.file_collection.update(file.sha256_digest, document)

        # Update the parents children
        document = db.file_collection.select(parent.sha256_digest)
        if not document:  # Parent does not exist it has been delete, don't update it
            return db.file_collection.select(file.sha256_digest)
        if 'children' not in document:
            document['children'] = {}
        if file.sha256_digest in document['children']:
            if submission_type in document['children'][file.sha256_digest]:
                return db.file_collection.select(file.sha256_digest)
            else:
                document['children'][file.sha256_digest] += [submission_type]
        else:
            document['children'][file.sha256_digest] = [submission_type]
        # Validate
        document = schema.FileSchema().dump(schema.FileSchema().load(document))
        # Update
        db.file_collection.update(parent.sha256_digest, document)
    else:
        raise error.SnakeError("could not submit new file for hash {}".format(file.sha256_digest))

    return db.file_collection.select(file.sha256_digest)