Esempio n. 1
0
def test_bad_cert():
    """Make sure that the client detects that the test cert is self signed."""
    with mocks.Server() as server:
        try:
            assemblyline_client.get_client(server.address)
            assert False
        except assemblyline_client.ClientError as ce:
            assert 'CERTIFICATE_VERIFY_FAILED' in str(
                ce) or 'certificate verify failed' in str(ce)
Esempio n. 2
0
def test_encrypt_apikey_auth():
    """Send an encryped apikey and decrypt it."""
    with mocks.Server() as server:
        assemblyline_client.get_client(server.address,
                                       verify=False,
                                       apikey=('username', 'ANAPIKEY'))
        assert len(server.logins) == 1
        assert server.logins[0]['user'] == 'username'
        assert server.logins[0]['apikey'] != 'ANAPIKEY'
        assert server.private_key.decrypt(
            b64decode(server.logins[0]['apikey']), 'ERROR') == b'ANAPIKEY'
Esempio n. 3
0
def test_encrypt_password_auth():
    """Send an encryped password and decrypt it."""
    with mocks.Server() as server:
        assemblyline_client.get_client(server.address,
                                       verify=False,
                                       auth=('username', 'password'))
        assert len(server.logins) == 1
        assert server.logins[0]['user'] == 'username'
        assert server.logins[0]['password'] != 'password'
        assert server.private_key.decrypt(
            b64decode(server.logins[0]['password']), 'ERROR') == b'password'
Esempio n. 4
0
def main(url: str, username: str, apikey: str, min_score: int,
         incident_num: int):
    """
    Example:
    python3 results-analyzer.py --url="https://<domain-of-Assemblyline-instance>" --username="******" --apikey="<api-key-name>:<key>" --incident_num=123
    """
    # Phase 1: Parameter validation
    try:
        validate_parameters(url)
    except Exception as e:
        # If there are any exceptions raised at this point, bail!
        print(e)
        log.error(e)
        return

    # Phase 2: Create the Assemblyline Client
    al_client = get_client(url, apikey=(username, apikey))

    # Phase 3: Open important files and read their contents
    report_file = open(REPORT_FILE, "a")
    report_file.write("FilePath,SHA256,Score,URL,Errors\n")
    # Phase 4: Get submission details for each ingest_id
    log.debug(
        f"Searching for the submission for incident number {incident_num}")
    submission_res = al_client.search.stream.submission(
        f"params.description:'Incident Number\: {incident_num}' AND max_score:>={min_score}"
    )
    for submission in submission_res:

        # Phase 5: Wait until the submission has completed
        state = submission["state"]
        while state != "completed":
            msg = f"{submission['sid']} is still in the state:{state}. Sleeping for 2 seconds and trying again."
            print(msg)
            log.debug(msg)
            sleep(2)
            specific_submission_res = al_client.submission.full(
                submission["sid"])
            state = specific_submission_res["state"]

        # Deep dive into the submission to get the files
        full_sub = al_client.submission.full(submission["sid"])
        for file in full_sub["files"]:

            # Report accordingly.
            msg = f"{full_sub['metadata']['filename']},{file['sha256']},{full_sub['max_score']},{url}/submission/report/{submission['sid']},{full_sub['errors']}\n"
            print(msg)
            log.debug(msg)
            report_file.write(msg)

    msg = "All done!"
    print(msg)
    log.debug(msg)
Esempio n. 5
0
    def client(datastore):
        user = datastore.user.get('admin')
        random_pass = get_random_password(length=48)
        key_name = "key_%s" % get_random_id().lower()
        user.apikeys[key_name] = {
            "password": bcrypt.hash(random_pass),
            "acl": ["R", "W"]
        }
        datastore.user.save('admin', user)
        api_key = "%s:%s" % (key_name, random_pass)

        c = get_client(UI_HOST, apikey=('admin', api_key), verify=False)
        return c
Esempio n. 6
0
    def run(self):
        if self.data_type not in ['file']:
            self.notSupported()

        al_client = get_client(self.assemblyline_server,
                               apikey=(self.assemblyline_user,
                                       self.assemblyline_key),
                               verify=self.assemblyline_verifyssl)
        al_client.submit('/path/to/my/file.txt')

        time.sleep(self.getParam("config.delay", 60))

        self.report({'data': self.getData(), 'input': self._input})
Esempio n. 7
0
    def do_source_update(self, service: Service) -> None:
        self.log.info(f"Connecting to Assemblyline API: {UI_SERVER}...")
        run_time = time.time()
        username = self.ensure_service_account()
        with temporary_api_key(self.datastore, username) as api_key:
            with tempfile.TemporaryDirectory() as update_dir:
                al_client = get_client(UI_SERVER, apikey=(username, api_key), verify=False)
                old_update_time = self.get_source_update_time()

                self.log.info("Connected!")

                # Parse updater configuration
                previous_hashes: dict[str, dict[str, str]] = self.get_source_extra()
                sources: dict[str, UpdateSource] = {_s['name']: _s for _s in service.update_config.sources}
                files_sha256: dict[str, dict[str, str]] = {}

                # Go through each source and download file
                for source_name, source_obj in sources.items():
                    source = source_obj.as_primitives()
                    uri: str = source['uri']
                    default_classification = source.get('default_classification', classification.UNRESTRICTED)
                    try:
                        # Pull sources from external locations (method depends on the URL)
                        files = git_clone_repo(source, old_update_time, self.default_pattern, self.log, update_dir) \
                            if uri.endswith('.git') else url_download(source, old_update_time, self.log, update_dir)

                        # Add to collection of sources for caching purposes
                        self.log.info(f"Found new {self.updater_type} rule files to process for {source_name}!")
                        validated_files = list()
                        for file, sha256 in files:
                            files_sha256.setdefault(source_name, {})
                            if previous_hashes.get(source_name, {}).get(file, None) != sha256 and self.is_valid(file):
                                files_sha256[source_name][file] = sha256
                                validated_files.append((file, sha256))

                        # Import into Assemblyline
                        self.import_update(validated_files, al_client, source_name, default_classification)

                    except SkipSource:
                        # This source hasn't changed, no need to re-import into Assemblyline
                        self.log.info(f'No new {self.updater_type} rule files to process for {source_name}')
                        if source_name in previous_hashes:
                            files_sha256[source_name] = previous_hashes[source_name]
                        continue

        self.set_source_update_time(run_time)
        self.set_source_extra(files_sha256)
        self.set_active_config_hash(self.config_hash(service))
        self.local_update_flag.set()
Esempio n. 8
0
def test_noauth_submit(mocker):
    """Submit a file and ensure that the same file is unpacked."""
    with mocks.Server() as server:

        client = assemblyline_client.get_client(server.address, verify=False)
        submits = server.submits

        # Submit a file with contents
        client.submit(path='readme.txt', contents=b'abc123')
        assert len(submits) == 1
        assert b64decode(submits[0]['binary']) == b'abc123'
        assert submits[0]['name'] == 'readme.txt'
        submits.pop()

        # Submit a file from a file
        mocker.patch('os.path.exists', return_value=True)
        mocker.patch('assemblyline_client.v3_client.open',
                     mock.mock_open(read_data=b'abc123'),
                     create=True)
        client.submit(path='readme.txt')
        assert len(submits) == 1
        assert b64decode(submits[0]['binary']) == b'abc123'
        assert submits[0]['name'] == 'readme.txt'
        submits.pop()
Esempio n. 9
0
def _main(arguments):
    global al_result_to_text

    signal(SIGINT, SIG_DFL)
    if sys.platform.startswith("linux"):
        from signal import SIGPIPE
        signal(SIGPIPE, SIG_DFL)

    user = None
    pw = None
    cert = None
    apikey = None
    transport = "https"
    host = "localhost"
    port = 443
    kw = {}
    verify = True

    config = ConfigParser()
    config.read([expanduser("~/.al/submit.cfg")])
    for section in config.sections():
        if section == "auth":
            if 'user' in config.options('auth'):
                user = config.get('auth', 'user')
            if 'password' in config.options('auth'):
                pw = config.get('auth', 'password')
            if 'cert' in config.options('auth'):
                cert = config.get('auth', 'cert')
            if 'apikey' in config.options('auth'):
                apikey = config.get('auth', 'apikey')
            if 'insecure' in config.options('auth'):
                verify = config.get('auth',
                                    'insecure').lower() not in ['true', 'yes']
        elif section == "server":
            if 'transport' in config.options('server'):
                transport = config.get('server', 'transport')
            if 'host' in config.options('server'):
                host = config.get('server', 'host')
            if 'port' in config.options('server'):
                port = config.get('server', 'port')
            if 'cert' in config.options('server'):
                verify = config.get('server', 'cert')

    server = "%s://%s:%s" % (transport, host, port)

    # parse the command line args
    from argparse import ArgumentParser
    parser = ArgumentParser(description=description_string)
    parser.add_argument('files', metavar='file/dir', nargs='+')
    parser.add_argument('-v',
                        '--version',
                        action='version',
                        version=__version__)
    parser.add_argument('-q',
                        '--quiet',
                        action='store_true',
                        help='Runs in quiet mode')
    parser.add_argument('-a',
                        '--async',
                        dest='async_command',
                        action='store_true',
                        help='Run in asynchronized mode (uses ingest API).')
    parser.add_argument(
        '-n',
        '--no-output',
        action='store_true',
        help=
        'Only works in conjunction with -a. Ingests the file and does not wait for the output.'
    )
    parser.add_argument(
        '-i',
        '--insecure',
        action='store_true',
        default=not verify,
        help=
        'Skip server cert validation. DEFAULT: insecure in auth section of ~/.al/submit.cfg'
    )
    parser.add_argument('-t',
                        '--text',
                        action='store_true',
                        help='Dumps results as text instead of json.')
    parser.add_argument(
        '-d',
        '--run-dynamic',
        action='store_true',
        help='Adds Dynamic Analysis to the list of service to run.')
    parser.add_argument(
        '-u',
        '--user',
        default=user,
        metavar='"user"',
        help=
        'username to be used to connect to AL. DEFAULT: user in auth section of ~/.al/submit.cfg'
    )
    parser.add_argument(
        '-p',
        '--password',
        default=pw,
        metavar='"MYPASSWORD"',
        help=
        'password of the user. DEFAULT: password in auth section of ~/.al/submit.cfg'
    )
    parser.add_argument('-o',
                        '--output-file',
                        metavar='"/home/user/output.txt"',
                        help='File to write the results to. DEFAULT: stdout')
    parser.add_argument(
        '-s',
        '--server',
        default=server,
        metavar='"https://localhost:443"',
        help='Server to connect to. DEFAULT: transport://host:port in '
        'server section of ~/.al/submit.cfg')
    parser.add_argument(
        '-c',
        '--cert',
        default=cert,
        metavar='"/path/to/pki.pem"',
        help=
        'Client cert used to connect to server. DEFAULT: cert in auth section of ~/.al/submit.cfg'
    )
    parser.add_argument(
        '-k',
        '--apikey',
        default=apikey,
        metavar='"MY_RANDOM_API_KEY"',
        help=
        'apikey to use for the user to login. DEFAULT: apikey in auth section of ~/.al/submit.cfg'
    )
    parser.add_argument('-j',
                        '--json-params',
                        metavar='"{ ... }"',
                        help='A JSON dictionary of submission parameters.')
    parser.add_argument('-m',
                        '--metadata',
                        metavar='"{ ... }"',
                        help='A JSON dictionary of submission metadata.')
    parser.add_argument(
        '--srv-spec',
        metavar='"{ ... }"',
        help='A JSON dictionary of service specific parameters.')
    parser.add_argument(
        '--server-crt',
        metavar='"/path/to/server.crt"',
        help='DEFAULT: cert in server section of ~/.al/submit.cfg')

    params = parser.parse_args(arguments)

    args = params.files
    verbose = not params.quiet
    async_command = params.async_command
    no_output = params.no_output
    json_output = not params.text
    dynamic = params.run_dynamic
    user = params.user
    cert = params.cert
    pw = params.password
    apikey = params.apikey

    if params.insecure:
        verify = False
    else:
        if params.server_crt:
            verify = params.server_crt

    if not cert and not user:
        sys.stderr.write("This server requires authentication...\n")
        sys.exit(1)

    if user and not pw and not apikey:
        if verbose:
            sys.stderr.write(
                "You specified a username without a password.  What is your password?\n"
            )
        pw = getpass()

    output = params.output_file

    if output:
        f = None
        try:
            f = open(output, "ab")
        except Exception:  # pylint: disable=W0702
            sys.stderr.write("!!ERROR!! Output file cannot be created (%s)\n" %
                             output)
        finally:
            try:
                f.close()
            except Exception:  # pylint: disable=W0702
                pass

    server = params.server

    if not server:
        sys.stderr.write(
            "!!ERROR!! No server specified, -s option is mandatory.\n\n%s" %
            parser.format_help())
        return -1

    if params.metadata:
        kw['metadata'] = json.loads(params.metadata)

    if params.json_params:
        kw["params"] = json.loads(params.json_params)

    if params.srv_spec:
        kw.setdefault("params", {})
        kw["params"]["service_spec"] = json.loads(params.srv_spec)

    auth = None
    api_auth = None
    if user and apikey:
        api_auth = (user, apikey)
    elif user and pw:
        auth = (user, pw)

    options = {
        'verbose': verbose,
        'json_output': json_output,
    }

    read_from_pipe = False
    if sys.platform.startswith("linux") or sys.platform.startswith("freebsd"):
        try:
            if select.select([
                    sys.stdin,
            ], [], [], 0.0)[0]:
                read_from_pipe = True
        except io.UnsupportedOperation:
            # stdin has probably been replaced with a non-file python object
            # this is fine.
            pass

    if len(args) == 0 and not read_from_pipe:
        sys.stdout.write("%s\n" % parser.format_help())
        return 0

    try:
        client = get_client(server,
                            apikey=api_auth,
                            auth=auth,
                            cert=cert,
                            verify=verify)
        if isinstance(client, Client4):
            from assemblyline_client.v4_client.common.submit_utils import al_result_to_text
        else:
            from assemblyline_client.v3_client.utils import al_result_to_text
    except ClientError as e:
        if e.status_code == 401:
            sys.stderr.write(
                "!!ERROR!! Authentication to the server failed.\n")
        elif e.status_code == 495:
            sys.stderr.write(
                "!!ERROR!! Invalid SSL connection to the server:\n\t%s\n" % e)
        else:
            raise
        return 1

    if dynamic:
        p = client.user.submission_params("__CURRENT__")
        if "Dynamic Analysis" not in p['services']['selected']:
            p['services']['selected'].append("Dynamic Analysis")

        if 'params' in kw:
            p.update(kw['params'])

        kw['params'] = p

    if async_command and not no_output:
        kw['nq'] = "al_submit_%s" % get_random_id()

    # sanity check path
    if len(args) == 0 and read_from_pipe:
        while True:
            line = sys.stdin.readline()
            if not line:
                break

            line = line.strip()
            if line == '-':
                line = '/dev/stdin'

            if async_command:
                kw.setdefault('metadata', {})
                kw['metadata']['al_submit_id'] = get_id_from_path(line)
                send_async(client, line, verbose=verbose, **kw)
            else:
                send(client, line, output, options, **kw)
    else:
        ret_val = 0
        file_list = []

        for arg in args:
            if arg == '-':
                file_list.append('/dev/stdin')
            elif not exists(arg):
                sys.stderr.write("!!ERROR!! %s => File does not exist.\n" %
                                 arg)
                ret_val = 1
            elif isdir(arg):
                for root, _, fname_list in walk(arg):
                    for fname in fname_list:
                        file_list.append(join(root, fname))
            else:
                file_list.append(arg)

        queued_files = [get_id_from_path(f) for f in file_list]
        output_thread = None
        if async_command and not no_output:
            output_thread = start_result_thread(client, queued_files, output,
                                                options, **kw)

        for input_file in file_list:
            if async_command:
                kw.setdefault('metadata', {})
                kw['metadata']['al_submit_id'] = get_id_from_path(input_file)
                if not send_async(client, input_file, verbose=verbose, **kw):
                    with ASYNC_LOCK:
                        queued_files.remove(get_id_from_path(input_file))
                    if verbose:
                        sys.stderr.write(
                            "\tWARNING: Could not send file %s.\n" %
                            input_file)
                    ret_val = 1
            else:
                if not send(client, input_file, output, options, **kw):
                    ret_val = 1

        if output_thread:
            output_thread.join()

        if ret_val != 0 and len(file_list) > 1:
            if verbose:
                sys.stderr.write(
                    "\n** WARNING: al_submit encountered some "
                    "errors while processing multiple files. **\n")

        return ret_val
Esempio n. 10
0
    def do_local_update(self) -> None:
        old_update_time = self.get_local_update_time()
        if not os.path.exists(UPDATER_DIR):
            os.makedirs(UPDATER_DIR)

        _, time_keeper = tempfile.mkstemp(prefix="time_keeper_", dir=UPDATER_DIR)
        if self._service.update_config.generates_signatures:
            output_directory = tempfile.mkdtemp(prefix="update_dir_", dir=UPDATER_DIR)

            self.log.info("Setup service account.")
            username = self.ensure_service_account()
            self.log.info("Create temporary API key.")
            with temporary_api_key(self.datastore, username) as api_key:
                self.log.info(f"Connecting to Assemblyline API: {UI_SERVER}")
                al_client = get_client(UI_SERVER, apikey=(username, api_key), verify=False)

                # Check if new signatures have been added
                self.log.info("Check for new signatures.")
                if al_client.signature.update_available(
                        since=epoch_to_iso(old_update_time) or '', sig_type=self.updater_type)['update_available']:
                    self.log.info("An update is available for download from the datastore")

                    self.log.debug(f"{self.updater_type} update available since {epoch_to_iso(old_update_time) or ''}")

                    extracted_zip = False
                    attempt = 0

                    # Sometimes a zip file isn't always returned, will affect service's use of signature source. Patience..
                    while not extracted_zip and attempt < 5:
                        temp_zip_file = os.path.join(output_directory, 'temp.zip')
                        al_client.signature.download(
                            output=temp_zip_file,
                            query=f"type:{self.updater_type} AND (status:NOISY OR status:DEPLOYED)")

                        self.log.debug(f"Downloading update to {temp_zip_file}")
                        if os.path.exists(temp_zip_file) and os.path.getsize(temp_zip_file) > 0:
                            self.log.debug(f"File type ({os.path.getsize(temp_zip_file)}B): {zip_ident(temp_zip_file, 'unknown')}")
                            try:
                                with ZipFile(temp_zip_file, 'r') as zip_f:
                                    zip_f.extractall(output_directory)
                                    extracted_zip = True
                                    self.log.info("Zip extracted.")
                            except BadZipFile:
                                attempt += 1
                                self.log.warning(f"[{attempt}/5] Bad zip. Trying again after 30s...")
                                time.sleep(30)
                            except Exception as e:
                                self.log.error(f'Problem while extracting signatures to disk: {e}')
                                break

                            os.remove(temp_zip_file)

                    if extracted_zip:
                        self.log.info("New ruleset successfully downloaded and ready to use")
                        self.serve_directory(output_directory, time_keeper)
                    else:
                        self.log.error("Signatures aren't saved to disk.")
                        shutil.rmtree(output_directory, ignore_errors=True)
                        if os.path.exists(time_keeper):
                            os.unlink(time_keeper)
                else:
                    self.log.info("No signature updates available.")
                    shutil.rmtree(output_directory, ignore_errors=True)
                    if os.path.exists(time_keeper):
                        os.unlink(time_keeper)
        else:
            output_directory = self.prepare_output_directory()
            self.serve_directory(output_directory, time_keeper)
Esempio n. 11
0
def test_noauth():
    """The test server should let us login with no authentication."""
    with mocks.Server() as server:
        assemblyline_client.get_client(server.address, verify=False)
        assert len(server.logins) == 1
Esempio n. 12
0
import jwt
import os

from assemblyline_client import get_client
from flask import Flask, render_template, request, redirect

TOKEN = None
AL_HOST = os.environ.get("AL_HOST", "127.0.0.1.nip.io")
AL_USER = os.environ.get("AL_USER", "admin")
AL_APIKEY = os.environ.get("AL_APIKEY", "devkey:admin")
OBO_HOST = os.environ.get("OBO_HOST", "127.0.0.1.nip.io")
CLIENT = get_client(f"https://{AL_HOST}", apikey=(AL_USER, AL_APIKEY), verify=False)

##########################
# App settings
app = Flask("obo_test")
app.config.update(
    SESSION_COOKIE_SECURE=False,
    SECRET_KEY="Not so secret is it?!",
    PREFERRED_URL_SCHEME='https'
)


@app.route("/token/", methods=["GET"])
def get_token():
    global TOKEN
    TOKEN = request.values.get("token")
    return redirect("/")

def sigma_update() -> None:
    """
    Using an update configuration file as an input, which contains a list of sources, download all the file(s).
    """
    # noinspection PyBroadException
    try:
        # Load updater configuration
        update_config = {}
        if UPDATE_CONFIGURATION_PATH and os.path.exists(
                UPDATE_CONFIGURATION_PATH):
            with open(UPDATE_CONFIGURATION_PATH, 'r') as yml_fh:
                update_config = yaml.safe_load(yml_fh)
        else:
            LOGGER.error(
                f"Update configuration file doesn't exist: {UPDATE_CONFIGURATION_PATH}"
            )
            exit()

        # Exit if no update sources given
        if 'sources' not in update_config.keys(
        ) or not update_config['sources']:
            LOGGER.error(
                f"Update configuration does not contain any source to update from"
            )
            exit()

        # Initialise al_client
        server = update_config['ui_server']
        user = update_config['api_user']
        api_key = update_config['api_key']
        LOGGER.info(f"Connecting to Assemblyline API: {server}...")
        al_client = get_client(server, apikey=(user, api_key), verify=False)
        LOGGER.info(f"Connected!")

        # Parse updater configuration
        previous_update = update_config.get('previous_update', None)
        previous_hash = json.loads(
            update_config.get('previous_hash', None) or "{}")
        sources = {
            source['name']: source
            for source in update_config['sources']
        }
        files_sha256 = {}
        source_default_classification = {}

        # Go through each source and download file
        for source_name, source in sources.items():
            uri: str = source['uri']
            source_default_classification[source_name] = source.get(
                'default_classification', classification.UNRESTRICTED)

            if uri.endswith('.git'):
                files = git_clone_repo(source, previous_update=previous_update)
                for file, sha256 in files:
                    files_sha256.setdefault(source_name, {})
                    if previous_hash.get(source_name, {}).get(file,
                                                              None) != sha256:
                        try:
                            if val_file(file):
                                files_sha256[source_name][file] = sha256
                            else:
                                LOGGER.warning(
                                    f"{file} was not imported due to failed validation"
                                )
                        except UnsupportedFeature as e:
                            LOGGER.warning(f"{file} | {e}")

            else:
                files = url_download(source, previous_update=previous_update)
                for file, sha256 in files:
                    files_sha256.setdefault(source_name, {})
                    if previous_hash.get(source_name, {}).get(file,
                                                              None) != sha256:
                        if val_file(file):
                            files_sha256[source_name][file] = sha256
                        else:
                            LOGGER.warning(
                                f"{file} was not imported due to failed validation"
                            )

        if files_sha256:
            LOGGER.info("Found new Sigma rule files to process!")
            sigma_importer = SigmaImporter(al_client, logger=LOGGER)
            for source, source_val in files_sha256.items():
                total_imported = 0
                default_classification = source_default_classification[source]
                for file in source_val.keys():
                    try:
                        total_imported += sigma_importer.import_file(
                            file,
                            source,
                            default_classification=default_classification)
                    except ValueError:
                        LOGGER.warning(
                            f"{file} failed to import due to a Sigma error")
                    except ComposerError:
                        LOGGER.warning(
                            f"{file} failed to import due to a YAML-parsing error"
                        )
                LOGGER.info(
                    f"{total_imported} signatures were imported for source {source}"
                )

        else:
            LOGGER.info('No new Sigma rule files to process')

        if al_client.signature.update_available(
                since=previous_update or '',
                sig_type='sigma')['update_available']:
            LOGGER.info(
                "An update is available for download from the datastore")

            if not os.path.exists(UPDATE_OUTPUT_PATH):
                os.makedirs(UPDATE_OUTPUT_PATH)

            temp_zip_file = os.path.join(UPDATE_OUTPUT_PATH, 'temp.zip')

            al_client.signature.download(
                output=temp_zip_file,
                query="type:sigma AND (status:NOISY OR status:DEPLOYED)")
            LOGGER.info(
                f"sigs downloaded in  {os.listdir(UPDATE_OUTPUT_PATH)}")
            if os.path.exists(temp_zip_file):
                with ZipFile(temp_zip_file, 'r') as zip_f:
                    zip_f.extractall(UPDATE_OUTPUT_PATH)

                os.remove(temp_zip_file)

            # Create the response yaml
            with open(os.path.join(UPDATE_OUTPUT_PATH, 'response.yaml'),
                      'w') as yml_fh:
                yaml.safe_dump(dict(hash=json.dumps(files_sha256)), yml_fh)

            LOGGER.info(
                f"New ruleset successfully downloaded and ready to use")

        LOGGER.info(f"Sigma updater completed successfully")
    except Exception:
        LOGGER.exception("Updater ended with an exception!")
def main(url: str, username: str, apikey: str, ttl: int, classification: str, service_selection: str, is_test: bool, path: str, fresh: bool, incident_num: int, retries: int):
    """
    Example:
    python3 file-submitter.py --url="https://<domain-of-Assemblyline-instance>" --username="******" --apikey="<api-key-name>:<key>" --classification="<classification>" --service_selection="<service-name>,<service-name>" --path "/path/to/compromised/directory" --incident_num=123
    """
    # Phase 1: Parameter validation
    try:
        service_selection = validate_parameters(url, service_selection)
    except Exception as e:
        # If there are any exceptions raised at this point, bail!
        print(e)
        log.error(e)
        return

    # Phase 2: Setting the parameters
    settings = {
        "ttl": ttl,
        "ignore_cache": True,
        "description": f"Incident Number: {incident_num}",
        "classification": classification,
        "services": {
            "selected": service_selection
        }
    }

    # Phase 3: Test mode
    if is_test:
        msg = f"The Assemblyline ingest settings you would use are: {settings}"
        print(msg)
        log.debug(msg)
        return

    # Phase 4: Initialize key variables
    hash_table = []
    number_of_files_ingested = 0
    if fresh and os.path.exists(HASH_FILE):
        os.remove(HASH_FILE)

    # Phase 5: Script Resumption Logic
    # If the script somehow crashed or stopped prematurely, then the text file containing
    # the hashes which have been ingested to Assemblyline will still exist on the host.
    # Therefore, we will check if that file exists, and if so, then we will grab the last
    # hash that has been ingested to Assemblyline and use that as our starting point for
    # the current run.
    resume_ingestion_sha = None
    skip = False
    if os.path.exists(HASH_FILE):
        # This grabs the last hash in the file.
        resume_ingestion_sha = check_output(["tail", "-1", HASH_FILE]).decode().strip("\n")
        # This adds the most recent hash that has been ingested to the hash table, so that
        # we do not re-ingest it during this run.
        if resume_ingestion_sha:
            hash_table.append(resume_ingestion_sha)
            skip = True

    # Create file handlers for the two information files we need.
    hash_file = open(HASH_FILE, "a+")
    skipped_file = open(SKIPPED_FILE, "a+")

    # Phase 6: Create the Assemblyline Client
    al_client = get_client(url, apikey=(username, apikey))

    retry_count = 0

    # Phase 7: Recursively go through every file in the provided folder and its sub-folders.
    for root, dir_names, file_names in os.walk(path):
        for file_name in file_names:
            file_path = os.path.join(root, file_name)

            # Retry up until x number of retries
            while retry_count < retries:
                # Wrap everything in a try-catch so we become invincible
                try:
                    file_size = os.path.getsize(file_path)

                    # If the file is not within the file size bounds, we can't upload it
                    if file_size > MAX_FILE_SIZE:
                        msg = f"{file_path} is too big. Size: {file_size} > {MAX_FILE_SIZE}."
                        print(msg)
                        log.debug(msg)
                        continue
                    elif file_size < MIN_FILE_SIZE:
                        msg = f"{file_path} is too small. Size: {file_size} < {MIN_FILE_SIZE}."
                        print(msg)
                        log.debug(msg)
                        continue

                    # Phase 8: Ingestion Logic

                    # Create a sha256 hash using the file contents.
                    sha = get_id_from_data(open(file_path, "rb").read())

                    # We only care about files that occur after the last sha in the hash file
                    if resume_ingestion_sha and resume_ingestion_sha == sha:
                        skip = False

                    # If we have yet to come up to the file who matches the last submitted sha, continue looking!
                    if skip:
                        continue

                    # If file is in hash table, don't ingest it
                    if sha in hash_table:
                        continue
                    else:
                        hash_table.append(sha)

                    # Phase 9: Ingestion and logging everything

                    # Pre-ingestion logging
                    pre_ingestion_message = f"{file_path} ({sha}) is about to be ingested."
                    print(pre_ingestion_message)
                    log.debug(pre_ingestion_message)

                    # Actual ingestion
                    resp = al_client.ingest(path=file_path, fname=file_name, params=settings, metadata={"filename": file_path})

                    # Documenting the hash and the ingest_id into the text files
                    number_of_files_ingested += 1
                    hash_file.write(f"{sha}\n")
                    ingest_id = resp['ingest_id']

                    # Post ingestion logging
                    post_ingestion_message = f"{file_path} ({sha}) has been ingested with ingest_id {ingest_id}."
                    print(post_ingestion_message)
                    log.debug(post_ingestion_message)

                    # Success, now break!
                    break
                except Exception as e:
                    print(e)
                    log.error(e)

                    # Logic for skipping files based on number of retries
                    retry_count += 1
                    if retry_count >= retries:
                        msg = f"{file_path} was skipped due to {e}."
                        print(msg)
                        skipped_file.write(msg)
                    else:
                        sleep(5)

    msg = "All done!"
    print(msg)
    log.debug(msg)
def yara_update(updater_type, update_config_path, update_output_path,
                download_directory, externals, cur_logger) -> None:
    """
    Using an update configuration file as an input, which contains a list of sources, download all the file(s).
    """
    # noinspection PyBroadException
    try:
        # Load updater configuration
        update_config = {}
        if update_config_path and os.path.exists(update_config_path):
            with open(update_config_path, 'r') as yml_fh:
                update_config = yaml.safe_load(yml_fh)
        else:
            cur_logger.error(f"Update configuration file doesn't exist: {update_config_path}")
            exit()

        # Exit if no update sources given
        if 'sources' not in update_config.keys() or not update_config['sources']:
            cur_logger.error(f"Update configuration does not contain any source to update from")
            exit()

        # Initialise al_client
        server = update_config['ui_server']
        user = update_config['api_user']
        api_key = update_config['api_key']
        cur_logger.info(f"Connecting to Assemblyline API: {server}...")
        al_client = get_client(server, apikey=(user, api_key), verify=False)
        cur_logger.info(f"Connected!")

        # Parse updater configuration
        previous_update = update_config.get('previous_update', None)
        previous_hash = json.loads(update_config.get('previous_hash', None) or "{}")
        sources = {source['name']: source for source in update_config['sources']}
        files_sha256 = {}
        files_default_classification = {}

        # Create working directory
        updater_working_dir = os.path.join(tempfile.gettempdir(), 'updater_working_dir')
        if os.path.exists(updater_working_dir):
            shutil.rmtree(updater_working_dir)
        os.makedirs(updater_working_dir)

        # Go through each source and download file
        for source_name, source in sources.items():
            os.makedirs(os.path.join(updater_working_dir, source_name))
            # 1. Download signatures
            cur_logger.info(f"Downloading files from: {source['uri']}")
            uri: str = source['uri']

            if uri.endswith('.git'):
                files = git_clone_repo(download_directory, source, cur_logger, previous_update=previous_update)
            else:
                files = [url_download(download_directory, source, cur_logger, previous_update=previous_update)]

            processed_files = set()

            # 2. Aggregate files
            file_name = os.path.join(updater_working_dir, f"{source_name}.yar")
            mode = "w"
            for file in files:
                # File has already been processed before, skip it to avoid duplication of rules
                if file in processed_files:
                    continue

                cur_logger.info(f"Processing file: {file}")

                file_dirname = os.path.dirname(file)
                processed_files.add(os.path.normpath(file))
                with open(file, 'r') as f:
                    f_lines = f.readlines()

                temp_lines = []
                for i, f_line in enumerate(f_lines):
                    if f_line.startswith("include"):
                        lines, processed_files = replace_include(f_line, file_dirname, processed_files, cur_logger)
                        temp_lines.extend(lines)
                    else:
                        temp_lines.append(f_line)

                # guess the type of files that we have in the current file
                guessed_category = guess_category(file)
                parser = Plyara()
                signatures = parser.parse_string("\n".join(temp_lines))

                # Ignore "cuckoo" rules
                if "cuckoo" in parser.imports:
                    parser.imports.remove("cuckoo")

                # Guess category
                if guessed_category:
                    for s in signatures:
                        if 'metadata' not in s:
                            s['metadata'] = []

                        # Do not override category with guessed category if it already exists
                        for meta in s['metadata']:
                            if 'category' in meta:
                                continue

                        s['metadata'].append({'category': guessed_category})
                        s['metadata'].append({guessed_category: s.get('rule_name')})

                # Save all rules from source into single file
                with open(file_name, mode) as f:
                    for s in signatures:
                        # Fix imports and remove cuckoo
                        s['imports'] = utils.detect_imports(s)
                        if "cuckoo" not in s['imports']:
                            f.write(utils.rebuild_yara_rule(s))

                if mode == "w":
                    mode = "a"

            # Check if the file is the same as the last run
            if os.path.exists(file_name):
                cache_name = os.path.basename(file_name)
                sha256 = get_sha256_for_file(file_name)
                if sha256 != previous_hash.get(cache_name, None):
                    files_sha256[cache_name] = sha256
                    files_default_classification[cache_name] = source.get('default_classification',
                                                                          classification.UNRESTRICTED)
                else:
                    cur_logger.info(f'File {cache_name} has not changed since last run. Skipping it...')

        if files_sha256:
            cur_logger.info(f"Found new {updater_type.upper()} rules files to process!")

            yara_importer = YaraImporter(updater_type, al_client, logger=cur_logger)

            # Validating and importing the different signatures
            for base_file in files_sha256:
                cur_logger.info(f"Validating output file: {base_file}")
                cur_file = os.path.join(updater_working_dir, base_file)
                source_name = os.path.splitext(os.path.basename(cur_file))[0]
                default_classification = files_default_classification.get(base_file, classification.UNRESTRICTED)

                try:
                    _compile_rules(cur_file, externals, cur_logger)
                    yara_importer.import_file(cur_file, source_name, default_classification=default_classification)
                except Exception as e:
                    raise e
        else:
            cur_logger.info(f'No new {updater_type.upper()} rules files to process...')

        # Check if new signatures have been added
        if al_client.signature.update_available(since=previous_update or '', sig_type=updater_type)['update_available']:
            cur_logger.info("An update is available for download from the datastore")

            if not os.path.exists(update_output_path):
                os.makedirs(update_output_path)

            temp_zip_file = os.path.join(update_output_path, 'temp.zip')
            al_client.signature.download(output=temp_zip_file,
                                         query=f"type:{updater_type} AND (status:NOISY OR status:DEPLOYED)")

            if os.path.exists(temp_zip_file):
                with ZipFile(temp_zip_file, 'r') as zip_f:
                    zip_f.extractall(update_output_path)

                os.remove(temp_zip_file)

            # Create the response yaml
            with open(os.path.join(update_output_path, 'response.yaml'), 'w') as yml_fh:
                yaml.safe_dump(dict(hash=json.dumps(files_sha256)), yml_fh)

            cur_logger.info(f"New ruleset successfully downloaded and ready to use")

        cur_logger.info(f"{updater_type.upper()} updater completed successfully")
    except Exception:
        cur_logger.exception("Updater ended with an exception!")