예제 #1
0
def _merge_safe_hashes(new, old):
    try:
        # Check if hash types match
        if new['type'] != old['type']:
            raise InvalidSafehash(
                f"Safe hash type mismatch: {new['type']} != {old['type']}")

        # Use max classification
        old['classification'] = CLASSIFICATION.max_classification(
            old['classification'], new['classification'])

        # Update updated time
        old['updated'] = now_as_iso()

        # Update hashes
        old['hashes'].update(new['hashes'])

        # Update type specific info
        if old['type'] == 'file':
            old.setdefault('file', {})
            new_names = new.get('file', {}).pop('name', [])
            if 'name' in old['file']:
                for name in new_names:
                    if name not in old['file']['name']:
                        old['file']['name'].append(name)
            elif new_names:
                old['file']['name'] = new_names
            old['file'].update(new.get('file', {}))
        elif old['type'] == 'tag':
            old['tag'] = new['tag']

        # Merge sources
        src_map = {x['name']: x for x in new['sources']}
        if not src_map:
            raise InvalidSafehash("No valid source found")

        old_src_map = {x['name']: x for x in old['sources']}
        for name, src in src_map.items():
            src_cl = src.get('classification', None)
            if src_cl:
                old['classification'] = CLASSIFICATION.max_classification(
                    old['classification'], src_cl)

            if name not in old_src_map:
                old_src_map[name] = src
            else:
                old_src = old_src_map[name]
                if old_src['type'] != src['type']:
                    raise InvalidSafehash(
                        f"Source {name} has a type conflict: {old_src['type']} != {src['type']}"
                    )

                for reason in src['reason']:
                    if reason not in old_src['reason']:
                        old_src['reason'].append(reason)
        old['sources'] = old_src_map.values()
        return old
    except Exception as e:
        raise InvalidSafehash(f"Invalid data provided: {str(e)}")
예제 #2
0
def filter_sections(sections, user_classification, min_classification):
    max_classification = min_classification

    # Filtering section you do not have access to
    temp_sections = [
        s for s in sections if CLASSIFICATION.is_accessible(
            user_classification, s['classification'])
    ]
    final_sections = []
    for section in temp_sections:
        # TODO: Depth analysis should be done before returning sections
        try:
            # Recalculation max classification using the currently accessible sections
            section['classification'] = CLASSIFICATION.max_classification(
                section['classification'], min_classification)
            max_classification = CLASSIFICATION.max_classification(
                section['classification'], max_classification)
        except InvalidClassification:
            continue

        if section['body_format'] in [
                "GRAPH_DATA", "URL", "JSON", "KEY_VALUE"
        ] and isinstance(section['body'], str):
            # Loading JSON formatted sections
            try:
                section['body'] = json.loads(section['body'])
            except ValueError:
                pass

        # Changing tags to a list
        section['tags'] = tag_dict_to_list(section['tags'])
        final_sections.append(section)

    # Telling the user a section was hidden
    if len(sections) != len(final_sections):
        hidden_section = dict(
            body=
            "One of the sections produced by the service has been removed because you do not have enough "
            "priviledges to see its results. \n\nContact system administrators for more information.",
            title_text="WARNING: Service sections have been sanitized",
            depth=0,
            classification=CLASSIFICATION.UNRESTRICTED,
            tags={},
            heuristic=None,
            body_format="TEXT")
        final_sections.insert(0, hidden_section)

    return max_classification, final_sections
예제 #3
0
def format_result(user_classification,
                  r,
                  min_classification,
                  build_hierarchy=False):
    if not CLASSIFICATION.is_accessible(user_classification,
                                        min_classification):
        return None

    # Drop sections user does not have access and set others to at least min classification
    max_classification, r['result']['sections'] = filter_sections(
        r['result']['sections'], user_classification, min_classification)

    # Drop supplementary and extracted files that the user does not have access to
    for ftype in ['supplementary', 'extracted']:
        r['response'][ftype] = [
            x for x in r['response'][ftype] if CLASSIFICATION.is_accessible(
                user_classification, x['classification'])
        ]

    # Set result classification to at least min but no more then viewable result classification
    r['classification'] = CLASSIFICATION.max_classification(
        max_classification, min_classification)

    if build_hierarchy:
        try:
            section_hierarchy, _ = build_heirarchy_rec(r['result']['sections'])
            r['section_hierarchy'] = section_hierarchy['children']
        except InvalidSectionList:
            LOGGER.warning(
                f"Could not generate section hierarchy for {r['response']['service_name']} "
                f"service. Will use old display method.")

    return r
예제 #4
0
def filter_sections(sections, user_classification, min_classification):
    max_classification = min_classification

    # Filtering section you do not have access to
    temp_sections = [
        s for s in sections if CLASSIFICATION.is_accessible(
            user_classification, s['classification'])
    ]
    final_sections = []
    for section in temp_sections:
        try:
            # Recalculation max classification using the currently accessible sections
            section['classification'] = CLASSIFICATION.max_classification(
                section['classification'], min_classification)
            max_classification = CLASSIFICATION.max_classification(
                section['classification'], max_classification)
        except InvalidClassification:
            continue

        final_sections.append(fix_section_data(section))

    # Telling the user a section was hidden
    if len(sections) != len(final_sections):
        hidden_section = dict(
            body=
            "One of the sections produced by the service has been removed because you do not have enough "
            "priviledges to see its results. \n\nContact system administrators for more information.",
            title_text="WARNING: Service sections have been sanitized",
            depth=0,
            classification=CLASSIFICATION.UNRESTRICTED,
            tags=[],
            heuristic=None,
            body_format="TEXT")
        final_sections.insert(0, hidden_section)

    return max_classification, final_sections
예제 #5
0
    def get_user_classification(self, group_dn_list):
        """
        Extend the users classification information with the configured group information

        NB: This is not fully implemented at this point

        :param group_dn_list: list of DNs the user is member of
        :return:
        """

        ret = CLASSIFICATION.UNRESTRICTED
        for group_dn in group_dn_list:
            if group_dn in self.classification_mappings:
                ret = CLASSIFICATION.max_classification(
                    ret, self.classification_mappings[group_dn])

        return ret
예제 #6
0
def get_file_results(sha256, **kwargs):
    """
    Get the all the file results of a specific file.

    Variables:
    sha256         => A resource locator for the file (SHA256)

    Arguments:
    None

    Data Block:
    None

    API call example:
    /api/v4/file/result/123456...654321/

    Result example:
    {"file_info": {},            # File info Block
     "results": {},              # Full result list
     "errors": {},               # Full error list
     "parents": {},              # List of possible parents
     "childrens": {},            # List of children files
     "tags": {},                 # List tags generated
     "metadata": {},             # Metadata facets results
     "file_viewer_only": True }  # UI switch to disable features
    """
    user = kwargs['user']
    file_obj = STORAGE.file.get(sha256, as_obj=False)

    if not file_obj:
        return make_api_response({}, "This file does not exists", 404)

    if user and Classification.is_accessible(user['classification'],
                                             file_obj['classification']):
        max_c12n = file_obj['classification']
        output = {
            "file_info": file_obj,
            "results": [],
            "tags": {},
            "attack_matrix": {},
            'heuristics': {},
            "signatures": set()
        }

        with concurrent.futures.ThreadPoolExecutor(4) as executor:
            res_ac = executor.submit(list_file_active_keys, sha256,
                                     user["access_control"])
            res_parents = executor.submit(list_file_parents, sha256,
                                          user["access_control"])
            res_children = executor.submit(list_file_childrens, sha256,
                                           user["access_control"])
            res_meta = executor.submit(STORAGE.get_file_submission_meta,
                                       sha256, config.ui.statistics.submission,
                                       user["access_control"])

        active_keys, alternates = res_ac.result()
        output['parents'] = res_parents.result()
        output['childrens'] = res_children.result()
        output['metadata'] = res_meta.result()

        output['results'] = []
        output['alternates'] = {}
        res = STORAGE.result.multiget(active_keys,
                                      as_dictionary=False,
                                      as_obj=False)
        for r in res:
            res = format_result(user['classification'],
                                r,
                                file_obj['classification'],
                                build_hierarchy=True)
            if res:
                max_c12n = Classification.max_classification(
                    max_c12n, res['classification'])
                output['results'].append(res)

        for i in alternates:
            if i['response']['service_name'] not in output["alternates"]:
                output["alternates"][i['response']['service_name']] = []
            i['response']['service_version'] = i['id'].split(".",
                                                             3)[2].replace(
                                                                 "_", ".")
            output["alternates"][i['response']['service_name']].append(i)

        output['errors'] = []
        output['file_viewer_only'] = True

        done_heuristics = set()
        for res in output['results']:
            sorted_sections = sorted(res.get('result', {}).get('sections', []),
                                     key=lambda i: i['heuristic']['score']
                                     if i['heuristic'] is not None else 0,
                                     reverse=True)
            for sec in sorted_sections:
                h_type = "info"

                if sec.get('heuristic', False):
                    # Get the heuristics data
                    if sec['heuristic']['score'] < 0:
                        h_type = "safe"
                    elif sec['heuristic']['score'] < 300:
                        h_type = "info"
                    elif sec['heuristic']['score'] < 1000:
                        h_type = "suspicious"
                    else:
                        h_type = "malicious"

                    if sec['heuristic']['heur_id'] not in done_heuristics:
                        item = (sec['heuristic']['heur_id'],
                                sec['heuristic']['name'])
                        output['heuristics'].setdefault(h_type, [])
                        output['heuristics'][h_type].append(item)
                        done_heuristics.add(sec['heuristic']['heur_id'])

                    # Process Attack matrix
                    for attack in sec['heuristic'].get('attack', []):
                        attack_id = attack['attack_id']
                        for cat in attack['categories']:
                            output['attack_matrix'].setdefault(cat, [])
                            item = (attack_id, attack['pattern'], h_type)
                            if item not in output['attack_matrix'][cat]:
                                output['attack_matrix'][cat].append(item)

                    # Process Signatures
                    for signature in sec['heuristic'].get('signature', []):
                        sig = (signature['name'], h_type,
                               signature.get('safe', False))
                        if sig not in output['signatures']:
                            output['signatures'].add(sig)

                # Process tags
                for t in sec['tags']:
                    output["tags"].setdefault(t['type'], [])
                    t_item = (t['value'], h_type, t['safelisted'])
                    if t_item not in output["tags"][t['type']]:
                        output["tags"][t['type']].append(t_item)

        output['signatures'] = list(output['signatures'])

        output['file_info']['classification'] = max_c12n
        return make_api_response(output)
    else:
        return make_api_response({}, "You are not allowed to view this file",
                                 403)
예제 #7
0
def download_file(sha256, **kwargs):
    """
    Download the file using the default encoding method. This api
    will force the browser in download mode.

    Variables:
    sha256       => A resource locator for the file (sha256)

    Arguments (optional):
    encoding     => Type of encoding use for the resulting file
    name         => Name of the file to download
    sid          => Submission ID where the file is from

    Data Block:
    None

    API call example:
    /api/v4/file/download/123456...654321/

    Result example:
    <THE FILE BINARY ENCODED IN SPECIFIED FORMAT>
    """
    user = kwargs['user']
    file_obj = STORAGE.file.get(sha256, as_obj=False)

    if not file_obj:
        return make_api_response({}, "The file was not found in the system.",
                                 404)

    if user and Classification.is_accessible(user['classification'],
                                             file_obj['classification']):
        params = load_user_settings(user)

        name = request.args.get('name', sha256) or sha256
        name = os.path.basename(name)
        name = safe_str(name)

        sid = request.args.get('sid', None) or None
        submission = {}
        file_metadata = {}
        if sid is not None:
            submission = STORAGE.submission.get(sid, as_obj=False)
            if submission is None:
                submission = {}

            if Classification.is_accessible(user['classification'],
                                            submission['classification']):
                file_metadata.update(unflatten(submission['metadata']))

        if Classification.enforce:
            submission_classification = submission.get(
                'classification', file_obj['classification'])
            file_metadata[
                'classification'] = Classification.max_classification(
                    submission_classification, file_obj['classification'])

        encoding = request.args.get('encoding', params['download_encoding'])
        password = request.args.get('password', params['default_zip_password'])

        if encoding not in FILE_DOWNLOAD_ENCODINGS:
            return make_api_response(
                {},
                f"{encoding.upper()} is not in the valid encoding types: {FILE_DOWNLOAD_ENCODINGS}",
                403)

        if encoding == "raw" and not ALLOW_RAW_DOWNLOADS:
            return make_api_response(
                {}, "RAW file download has been disabled by administrators.",
                403)

        if encoding == "zip":
            if not ALLOW_ZIP_DOWNLOADS:
                return make_api_response(
                    {},
                    "PROTECTED file download has been disabled by administrators.",
                    403)
            elif not password:
                return make_api_response(
                    {}, "No password given or retrieved from user's settings.",
                    403)

        download_dir = None
        target_path = None

        # Create a temporary download location
        if encoding == 'zip':
            download_dir = tempfile.mkdtemp()
            download_path = os.path.join(download_dir, name)
        else:
            _, download_path = tempfile.mkstemp()

        try:
            downloaded_from = FILESTORE.download(sha256, download_path)

            if not downloaded_from:
                return make_api_response(
                    {}, "The file was not found in the system.", 404)

            # Encode file
            if encoding == 'raw':
                target_path = download_path
            elif encoding == 'zip':
                name += '.zip'
                target_path = os.path.join(download_dir, name)
                subprocess.run([
                    'zip', '-j', '--password', password, target_path,
                    download_path
                ],
                               capture_output=True)
            else:
                target_path, name = encode_file(download_path, name,
                                                file_metadata)

            return stream_file_response(open(target_path, 'rb'), name,
                                        os.path.getsize(target_path))

        finally:
            # Cleanup
            if target_path:
                if os.path.exists(target_path):
                    os.unlink(target_path)
            if download_path:
                if os.path.exists(download_path):
                    os.unlink(download_path)
            if download_dir:
                if os.path.exists(download_dir):
                    os.rmdir(download_dir)
    else:
        return make_api_response({},
                                 "You are not allowed to download this file.",
                                 403)
예제 #8
0
def ingest_single_file(**kwargs):
    """
    Ingest a single file, sha256 or URL in the system

        Note 1:
            If you are submitting a sha256 or a URL, you must use the application/json encoding and one of
            sha256 or url parameters must be included in the data block.

        Note 2:
            If you are submitting a file directly, you have to use multipart/form-data encoding this
            was done to reduce the memory footprint and speedup file transfers
             ** Read documentation of mime multipart standard if your library does not support it**

            The multipart/form-data for sending binary has two parts:
                - The first part contains a JSON dump of the optional params and uses the name 'json'
                - The last part conatins the file binary, uses the name 'bin' and includes a filename

        Note 3:
            The ingest API uses the user's default settings to submit files to the system
            unless these settings are overridden in the 'params' field. Although, there are
            exceptions to that rule. Fields deep_scan, ignore_filtering, ignore_cache are
            resetted to False because the lead to dangerous behavior in the system.

    Variables:
    None

    Arguments:
    None

    Data Block (SHA256 or URL):
    {
     //REQUIRED VALUES: One of the following
     "sha256": "1234...CDEF"         # SHA256 hash of the file
     "url": "http://...",            # Url to fetch the file from

     //OPTIONAL VALUES
     "name": "file.exe",             # Name of the file

     "metadata": {                   # Submission Metadata
         "key": val,                    # Key/Value pair for metadata parameters
         },

     "params": {                     # Submission parameters
         "key": val,                    # Key/Value pair for params that differ from the user's defaults
         },                                 # DEFAULT: /api/v3/user/submission_params/<user>/

     "generate_alert": False,        # Generate an alert in our alerting system or not
     "notification_queue": None,     # Name of the notification queue
     "notification_threshold": None, # Threshold for notification
    }

    Data Block (Binary):

    --0b34a3c50d3c02dd804a172329a0b2aa               <-- Randomly generated boundary for this http request
    Content-Disposition: form-data; name="json"      <-- JSON data blob part (only previous optional values valid)

    {"params": {"ignore_cache": true}, "generate_alert": true}
    --0b34a3c50d3c02dd804a172329a0b2aa               <-- Switch to next part, file part
    Content-Disposition: form-data; name="bin"; filename="name_of_the_file_to_scan.bin"

    <BINARY DATA OF THE FILE TO SCAN... DOES NOT NEED TO BE ENCODDED>

    --0b34a3c50d3c02dd804a172329a0b2aa--             <-- End of HTTP transmission

    Result example:
    { "ingest_id": <ID OF THE INGESTED FILE> }
    """
    user = kwargs['user']
    out_dir = os.path.join(TEMP_SUBMIT_DIR, get_random_id())
    extracted_path = original_file = None
    try:
        # Get data block and binary blob
        if 'multipart/form-data' in request.content_type:
            if 'json' in request.values:
                data = json.loads(request.values['json'])
            else:
                data = {}
            binary = request.files['bin']
            name = data.get("name", binary.filename)
            sha256 = None
            url = None
        elif 'application/json' in request.content_type:
            data = request.json
            binary = None
            sha256 = data.get('sha256', None)
            url = data.get('url', None)
            name = data.get("name",
                            None) or sha256 or os.path.basename(url) or None
        else:
            return make_api_response({}, "Invalid content type", 400)

        if not data:
            return make_api_response({}, "Missing data block", 400)

        # Get notification queue parameters
        notification_queue = data.get('notification_queue', None)
        notification_threshold = data.get('notification_threshold', None)
        if not isinstance(notification_threshold,
                          int) and notification_threshold:
            return make_api_response(
                {}, "notification_threshold should be and int", 400)

        # Get file name
        if not name:
            return make_api_response({}, "Filename missing", 400)

        name = safe_str(os.path.basename(name))
        if not name:
            return make_api_response({}, "Invalid filename", 400)

        try:
            os.makedirs(out_dir)
        except Exception:
            pass
        original_file = out_file = os.path.join(out_dir, name)

        # Prepare variables
        extra_meta = {}
        fileinfo = None
        do_upload = True
        al_meta = {}

        # Load default user params
        s_params = ui_to_submission_params(load_user_settings(user))

        # Reset dangerous user settings to safe values
        s_params.update({
            'deep_scan': False,
            "priority": 150,
            "ignore_cache": False,
            "ignore_dynamic_recursion_prevention": False,
            "ignore_filtering": False,
            "type": "INGEST"
        })

        # Apply provided params
        s_params.update(data.get("params", {}))

        # Load file
        if not binary:
            if sha256:
                fileinfo = STORAGE.file.get_if_exists(
                    sha256,
                    as_obj=False,
                    archive_access=config.datastore.ilm.update_archive)
                if FILESTORE.exists(sha256):
                    if fileinfo:
                        if not Classification.is_accessible(
                                user['classification'],
                                fileinfo['classification']):
                            return make_api_response(
                                {}, "SHA256 does not exist in our datastore",
                                404)
                        else:
                            # File's classification must be applied at a minimum
                            s_params[
                                'classification'] = Classification.max_classification(
                                    s_params['classification'],
                                    fileinfo['classification'])
                    else:
                        # File is in storage and the DB no need to upload anymore
                        do_upload = False
                    # File exists in the filestore and the user has appropriate file access
                    FILESTORE.download(sha256, out_file)
                else:
                    return make_api_response(
                        {}, "SHA256 does not exist in our datastore", 404)
            else:
                if url:
                    if not config.ui.allow_url_submissions:
                        return make_api_response(
                            {}, "URL submissions are disabled in this system",
                            400)

                    try:
                        safe_download(url, out_file)
                        extra_meta['submitted_url'] = url
                    except FileTooBigException:
                        return make_api_response({},
                                                 "File too big to be scanned.",
                                                 400)
                    except InvalidUrlException:
                        return make_api_response({},
                                                 "Url provided is invalid.",
                                                 400)
                    except ForbiddenLocation:
                        return make_api_response(
                            {}, "Hostname in this URL cannot be resolved.",
                            400)
                else:
                    return make_api_response(
                        {},
                        "Missing file to scan. No binary, sha256 or url provided.",
                        400)
        else:
            binary.save(out_file)

        if do_upload and os.path.getsize(out_file) == 0:
            return make_api_response({},
                                     err="File empty. Ingestion failed",
                                     status_code=400)

        # Apply group params if not specified
        if 'groups' not in s_params:
            s_params['groups'] = user['groups']

        # Get generate alert parameter
        generate_alert = data.get('generate_alert',
                                  s_params.get('generate_alert', False))
        if not isinstance(generate_alert, bool):
            return make_api_response({}, "generate_alert should be a boolean",
                                     400)

        # Override final parameters
        s_params.update({
            'generate_alert':
            generate_alert,
            'max_extracted':
            config.core.ingester.default_max_extracted,
            'max_supplementary':
            config.core.ingester.default_max_supplementary,
            'priority':
            min(s_params.get("priority", 150), config.ui.ingest_max_priority),
            'submitter':
            user['uname']
        })

        # Enforce maximum DTL
        if config.submission.max_dtl > 0:
            s_params['ttl'] = min(int(
                s_params['ttl']), config.submission.max_dtl) if int(
                    s_params['ttl']) else config.submission.max_dtl

        # No need to re-calculate fileinfo if we have it already
        if not fileinfo:
            # Calculate file digest
            fileinfo = IDENTIFY.fileinfo(out_file)

            # Validate file size
            if fileinfo['size'] > MAX_SIZE and not s_params.get(
                    'ignore_size', False):
                msg = f"File too large ({fileinfo['size']} > {MAX_SIZE}). Ingestion failed"
                return make_api_response({}, err=msg, status_code=413)
            elif fileinfo['size'] == 0:
                return make_api_response({},
                                         err="File empty. Ingestion failed",
                                         status_code=400)

            # Decode cart if needed
            extracted_path, fileinfo, al_meta = decode_file(
                out_file, fileinfo, IDENTIFY)
            if extracted_path:
                out_file = extracted_path

        # Alter filename and classification based on CaRT output
        meta_classification = al_meta.pop('classification',
                                          s_params['classification'])
        if meta_classification != s_params['classification']:
            try:
                s_params['classification'] = Classification.max_classification(
                    meta_classification, s_params['classification'])
            except InvalidClassification as ic:
                return make_api_response(
                    {},
                    "The classification found inside the cart file cannot be merged with "
                    f"the classification the file was submitted as: {str(ic)}",
                    400)
        name = al_meta.pop('name', name)

        # Validate ingest classification
        if not Classification.is_accessible(user['classification'],
                                            s_params['classification']):
            return make_api_response(
                {}, "You cannot start a submission with higher "
                "classification then you're allowed to see", 400)

        # Freshen file object
        expiry = now_as_iso(s_params['ttl'] * 24 * 60 *
                            60) if s_params.get('ttl', None) else None
        STORAGE.save_or_freshen_file(fileinfo['sha256'], fileinfo, expiry,
                                     s_params['classification'])

        # Save the file to the filestore if needs be
        # also no need to test if exist before upload because it already does that
        if do_upload:
            FILESTORE.upload(out_file, fileinfo['sha256'], location='far')

        # Setup notification queue if needed
        if notification_queue:
            notification_params = {
                "queue": notification_queue,
                "threshold": notification_threshold
            }
        else:
            notification_params = {}

        # Load metadata, setup some default values if they are missing and append the cart metadata
        ingest_id = get_random_id()
        metadata = flatten(data.get("metadata", {}))
        metadata['ingest_id'] = ingest_id
        metadata['type'] = s_params['type']
        metadata.update(al_meta)
        if 'ts' not in metadata:
            metadata['ts'] = now_as_iso()
        metadata.update(extra_meta)

        # Set description if it does not exists
        s_params['description'] = s_params[
            'description'] or f"[{s_params['type']}] Inspection of file: {name}"

        # Create submission object
        try:
            submission_obj = Submission({
                "sid":
                ingest_id,
                "files": [{
                    'name': name,
                    'sha256': fileinfo['sha256'],
                    'size': fileinfo['size']
                }],
                "notification":
                notification_params,
                "metadata":
                metadata,
                "params":
                s_params
            })
        except (ValueError, KeyError) as e:
            return make_api_response({}, err=str(e), status_code=400)

        # Send submission object for processing
        ingest.push(submission_obj.as_primitives())
        submission_received(submission_obj)

        return make_api_response({"ingest_id": ingest_id})

    finally:
        # Cleanup files on disk
        try:
            if original_file and os.path.exists(original_file):
                os.unlink(original_file)
        except Exception:
            pass

        try:
            if extracted_path and os.path.exists(extracted_path):
                os.unlink(extracted_path)
        except Exception:
            pass

        try:
            if os.path.exists(out_dir):
                shutil.rmtree(out_dir, ignore_errors=True)
        except Exception:
            pass
예제 #9
0
def submit(**kwargs):
    """
    Submit a single file, sha256 or url for analysis

        Note 1:
            If you are submitting a sh256 or a URL, you must use the application/json encoding and one of
            sha256 or url parameters must be included in the data block.

        Note 2:
            If you are submitting a file directly, you have to use multipart/form-data encoding this
            was done to reduce the memory footprint and speedup file transfers
             ** Read documentation of mime multipart standard if your library does not support it**

            The multipart/form-data for sending binary has two parts:
                - The first part contains a JSON dump of the optional params and uses the name 'json'
                - The last part conatins the file binary, uses the name 'bin' and includes a filename

    Variables:
    None

    Arguments:
    None

    Data Block (SHA256 or URL):
    {
      // REQUIRED: One of the two following
      "sha256": "123...DEF",      # SHA256 hash of the file already in the datastore
      "url": "http://...",        # Url to fetch the file from

      // OPTIONAL VALUES
      "name": "file.exe",         # Name of the file to scan otherwise the sha256 or base file of the url

      "metadata": {               # Submission metadata
        "key": val,                 # Key/Value pair metadata values
      },

      "params": {                 # Submission parameters
        "key": val,                 # Key/Value pair for params that different then defaults
      },                            # Default params can be fetch at /api/v3/user/submission_params/<user>/
    }

    Data Block (Binary):

    --0b34a3c50d3c02dd804a172329a0b2aa               <-- Randomly generated boundary for this http request
    Content-Disposition: form-data; name="json"      <-- JSON data blob part (only previous optional values valid)

    {"metadata": {"hello": "world"}}
    --0b34a3c50d3c02dd804a172329a0b2aa               <-- Switch to next part, file part
    Content-Disposition: form-data; name="bin"; filename="name_of_the_file_to_scan.bin"

    <BINARY DATA OF THE FILE TO SCAN... DOES NOT NEED TO BE ENCODDED>

    --0b34a3c50d3c02dd804a172329a0b2aa--             <-- End of HTTP transmission


    Result example:
    <Submission message object as a json dictionary>
    """
    user = kwargs['user']
    out_dir = os.path.join(TEMP_SUBMIT_DIR, get_random_id())

    quota_error = check_submission_quota(user)
    if quota_error:
        return make_api_response("", quota_error, 503)

    submit_result = None
    try:
        # Get data block and binary blob
        if 'multipart/form-data' in request.content_type:
            if 'json' in request.values:
                data = json.loads(request.values['json'])
            else:
                data = {}
            binary = request.files['bin']
            name = data.get("name", binary.filename)
            sha256 = None
            url = None
        elif 'application/json' in request.content_type:
            data = request.json
            binary = None
            sha256 = data.get('sha256', None)
            url = data.get('url', None)
            name = data.get("name",
                            None) or sha256 or os.path.basename(url) or None
        else:
            return make_api_response({}, "Invalid content type", 400)

        if data is None:
            return make_api_response({}, "Missing data block", 400)

        if not name:
            return make_api_response({}, "Filename missing", 400)

        name = safe_str(os.path.basename(name))
        if not name:
            return make_api_response({}, "Invalid filename", 400)

        # Create task object
        if "ui_params" in data:
            s_params = ui_to_submission_params(data['ui_params'])
        else:
            s_params = ui_to_submission_params(load_user_settings(user))

        s_params.update(data.get("params", {}))
        if 'groups' not in s_params:
            s_params['groups'] = user['groups']

        s_params['quota_item'] = True
        s_params['submitter'] = user['uname']
        if not s_params['description']:
            s_params['description'] = "Inspection of file: %s" % name

        # Enforce maximum DTL
        if config.submission.max_dtl > 0:
            s_params['ttl'] = min(int(
                s_params['ttl']), config.submission.max_dtl) if int(
                    s_params['ttl']) else config.submission.max_dtl

        if not Classification.is_accessible(user['classification'],
                                            s_params['classification']):
            return make_api_response(
                {}, "You cannot start a scan with higher "
                "classification then you're allowed to see", 400)

        # Prepare the output directory
        try:
            os.makedirs(out_dir)
        except Exception:
            pass
        out_file = os.path.join(out_dir, name)

        # Get the output file
        extra_meta = {}
        if not binary:
            if sha256:
                fileinfo = STORAGE.file.get_if_exists(
                    sha256,
                    as_obj=False,
                    archive_access=config.datastore.ilm.update_archive)
                if FILESTORE.exists(sha256):
                    if fileinfo:
                        if not Classification.is_accessible(
                                user['classification'],
                                fileinfo['classification']):
                            return make_api_response(
                                {}, "SHA256 does not exist in our datastore",
                                404)
                        else:
                            # File's classification must be applied at a minimum
                            s_params[
                                'classification'] = Classification.max_classification(
                                    s_params['classification'],
                                    fileinfo['classification'])

                    # File exists in the filestore and the user has appropriate file access
                    FILESTORE.download(sha256, out_file)
                else:
                    return make_api_response(
                        {}, "SHA256 does not exist in our datastore", 404)
            else:
                if url:
                    if not config.ui.allow_url_submissions:
                        return make_api_response(
                            {}, "URL submissions are disabled in this system",
                            400)

                    try:
                        safe_download(url, out_file)
                        extra_meta['submitted_url'] = url
                    except FileTooBigException:
                        return make_api_response({},
                                                 "File too big to be scanned.",
                                                 400)
                    except InvalidUrlException:
                        return make_api_response({},
                                                 "Url provided is invalid.",
                                                 400)
                    except ForbiddenLocation:
                        return make_api_response(
                            {}, "Hostname in this URL cannot be resolved.",
                            400)
                else:
                    return make_api_response(
                        {},
                        "Missing file to scan. No binary, sha256 or url provided.",
                        400)
        else:
            with open(out_file, "wb") as my_file:
                my_file.write(binary.read())

        try:
            metadata = flatten(data.get('metadata', {}))
            metadata.update(extra_meta)

            submission_obj = Submission({
                "files": [],
                "metadata": metadata,
                "params": s_params
            })
        except (ValueError, KeyError) as e:
            return make_api_response("", err=str(e), status_code=400)

        # Submit the task to the system
        try:
            submit_result = SubmissionClient(datastore=STORAGE, filestore=FILESTORE, config=config, identify=IDENTIFY)\
                .submit(submission_obj, local_files=[out_file])
            submission_received(submission_obj)
        except SubmissionException as e:
            return make_api_response("", err=str(e), status_code=400)

        return make_api_response(submit_result.as_primitives())

    finally:
        if submit_result is None:
            decrement_submission_quota(user)

        try:
            # noinspection PyUnboundLocalVariable
            os.unlink(out_file)
        except Exception:
            pass

        try:
            shutil.rmtree(out_dir, ignore_errors=True)
        except Exception:
            pass
def get_report(submission_id, **kwargs):
    """
    Create a report for a submission based on its ID.

    Variables:
    submission_id   ->   ID of the submission to create the report for

    Arguments:
    None

    Data Block:
    None

    Result example:
    { <THE REPORT> }
    """
    user = kwargs['user']
    submission = STORAGE.submission.get(submission_id, as_obj=False)
    if submission is None:
        return make_api_response("", "Submission ID %s does not exists." % submission_id, 404)

    submission['important_files'] = set()
    submission['report_filtered'] = False

    if user and Classification.is_accessible(user['classification'], submission['classification']):
        if submission['state'] != 'completed':
            return make_api_response("", f"It is too early to generate the report. "
                                         f"Submission ID {submission_id} is incomplete.", 425)

        tree = STORAGE.get_or_create_file_tree(submission, config.submission.max_extraction_depth,
                                               cl_engine=Classification, user_classification=user['classification'])
        submission['file_tree'] = tree['tree']
        submission['classification'] = Classification.max_classification(submission['classification'],
                                                                         tree['classification'])
        if tree['filtered']:
            submission['report_filtered'] = True

        errors = submission.pop('errors', None)
        submission['params']['services']['errors'] = list(set([x.split('.')[1] for x in errors]))

        def recurse_get_names(data):
            output = {}
            for key, val in data.items():
                output.setdefault(key, [])

                for res_name in val['name']:
                    output[key].append(res_name)

                children = recurse_get_names(val['children'])
                for c_key, c_names in children.items():
                    output.setdefault(c_key, [])
                    output[c_key].extend(c_names)

            return output

        name_map = recurse_get_names(tree['tree'])

        summary = get_or_create_summary(submission_id, submission.pop('results', []), user['classification'],
                                        submission['state'] == "completed")
        tags = [t for t in summary['tags'] if not t['safelisted']]

        attack_matrix = summary['attack_matrix']
        heuristics = summary['heuristics']
        submission['classification'] = Classification.max_classification(submission['classification'],
                                                                         summary['classification'])
        if summary['filtered']:
            submission['report_filtered'] = True

        if summary['partial']:
            submission['report_partial'] = True

        submission['heuristic_sections'] = cleanup_heuristic_sections(summary['heuristic_sections'])
        submission['heuristic_name_map'] = summary['heuristic_name_map']
        submission['attack_matrix'] = {}
        submission['heuristics'] = {}
        submission['tags'] = {}

        # Process attack matrix
        for item in attack_matrix:
            sha256 = item['key'][:64]

            for cat in item['categories']:

                submission['attack_matrix'].setdefault(cat, {})
                submission['attack_matrix'][cat].setdefault(item['name'], {'h_type': item['h_type'], 'files': []})
                for name in name_map.get(sha256, [sha256]):
                    if (name, sha256) not in submission['attack_matrix'][cat][item['name']]['files']:
                        submission['attack_matrix'][cat][item['name']]['files'].append((name, sha256))
                    submission['important_files'].add(sha256)

        # Process heuristics
        for h_type, items in heuristics.items():
            submission['heuristics'].setdefault(h_type, {})
            for item in items:
                sha256 = item['key'][:64]
                submission['heuristics'][h_type].setdefault(item['name'], [])
                for name in name_map.get(sha256, [sha256]):
                    if (name, sha256) not in submission['heuristics'][h_type][item['name']]:
                        submission['heuristics'][h_type][item['name']].append((name, sha256))
                    submission['important_files'].add(sha256)

        # Process tags
        for t in tags:
            summary_type = None

            if t["type"] in config.submission.tag_types.behavior:
                summary_type = 'behaviors'
            elif t["type"] in config.submission.tag_types.attribution:
                summary_type = 'attributions'
            elif t["type"] in config.submission.tag_types.ioc:
                summary_type = 'indicators_of_compromise'

            if t['value'] == "" or summary_type is None:
                continue

            sha256 = t["key"][:64]

            # Tags
            submission['tags'].setdefault(summary_type, {})
            submission['tags'][summary_type].setdefault(t['type'], {})
            submission['tags'][summary_type][t['type']].setdefault(t['value'], {'h_type': t['h_type'], 'files': []})
            if HEUR_RANK_MAP[submission['tags'][summary_type][t['type']][t['value']]['h_type']] < \
                    HEUR_RANK_MAP[t['h_type']]:
                submission['tags'][summary_type][t['type']][t['value']]['h_type'] = t['h_type']

            for name in name_map.get(sha256, [sha256]):
                if (name, sha256) not in submission['tags'][summary_type][t['type']][t['value']]['files']:
                    submission['tags'][summary_type][t['type']][t['value']]['files'].append((name, sha256))
                submission['important_files'].add(sha256)

        submitted_sha256 = submission['files'][0]['sha256']
        submission["file_info"] = STORAGE.file.get(submitted_sha256, as_obj=False)
        if submitted_sha256 in submission['important_files']:
            submission['important_files'].remove(submitted_sha256)

        submission['important_files'] = list(submission['important_files'])

        return make_api_response(submission)
    else:
        return make_api_response("", "You are not allowed to view the data of this submission", 403)
def get_file_submission_results(sid, sha256, **kwargs):
    """
    Get the all the results and errors of a specific file
    for a specific Submission ID

    Variables:
    sid         => Submission ID to get the result for
    sha256         => Resource locator to get the result for

    Arguments (POST only):
    extra_result_keys   =>  List of extra result keys to get
    extra_error_keys    =>  List of extra error keys to get

    Data Block:
    None

    Result example:
    {"errors": [],    # List of error blocks
     "file_info": {}, # File information block (md5, ...)
     "results": [],   # List of result blocks
     "tags": [] }     # List of generated tags
    """
    user = kwargs['user']

    # Check if submission exist
    data = STORAGE.submission.get(sid, as_obj=False)
    if data is None:
        return make_api_response("", "Submission ID %s does not exists." % sid, 404)

    if data and user and Classification.is_accessible(user['classification'], data['classification']):
        # Prepare output
        output = {
            "file_info": {},
            "results": [],
            "tags": {},
            "errors": [],
            "attack_matrix": {},
            'heuristics': {},
            "signatures": set()
        }

        # Extra keys - This is a live mode optimisation
        res_keys = data.get("results", [])
        err_keys = data.get("errors", [])

        if request.method == "POST" and data['state'] != "completed":
            try:
                req_data = request.json
                extra_rkeys = req_data.get("extra_result_keys", [])
                extra_ekeys = req_data.get("extra_error_keys", [])
                # Load keys
                res_keys.extend(extra_rkeys)
                err_keys.extend(extra_ekeys)
            except BadRequest:
                pass

        res_keys = list(set(res_keys))
        err_keys = list(set(err_keys))

        # Get File, results and errors
        temp_file = STORAGE.file.get(sha256, as_obj=False)
        if not temp_file:
            output['file_info']['sha256'] = sha256
            output['signatures'] = list(output['signatures'])
            output['missing'] = True
            return make_api_response(output, "The file you are trying to view is missing from the system", 404)
        if not Classification.is_accessible(user['classification'], temp_file['classification']):
            return make_api_response("", "You are not allowed to view the data of this file", 403)
        output['file_info'] = temp_file
        max_c12n = output['file_info']['classification']

        temp_results = list(STORAGE.get_multiple_results([x for x in res_keys if x.startswith(sha256)],
                                                         cl_engine=Classification, as_obj=False).values())
        results = []
        for r in temp_results:
            r = format_result(user['classification'], r, temp_file['classification'], build_hierarchy=True)
            if r:
                max_c12n = Classification.max_classification(max_c12n, r['classification'])
                results.append(r)
        output['results'] = results

        try:
            output['errors'] = STORAGE.error.multiget([x for x in err_keys if x.startswith(sha256)],
                                                      as_obj=False, as_dictionary=False)
        except MultiKeyError as e:
            LOGGER.warning(f"Trying to get multiple errors but some are missing: {str(e.keys)}")
            output['errors'] = e.partial_output

        output['metadata'] = STORAGE.get_file_submission_meta(sha256, config.ui.statistics.submission,
                                                              user["access_control"])

        done_heuristics = set()
        for res in output['results']:
            sorted_sections = sorted(res.get('result', {}).get('sections', []),
                                     key=lambda i: i['heuristic']['score'] if i['heuristic'] is not None else 0,
                                     reverse=True)
            for sec in sorted_sections:
                h_type = "info"
                if sec.get('heuristic', False):
                    # Get the heuristics data
                    if sec['heuristic']['score'] < 0:
                        h_type = "safe"
                    elif sec['heuristic']['score'] < 300:
                        h_type = "info"
                    elif sec['heuristic']['score'] < 1000:
                        h_type = "suspicious"
                    else:
                        h_type = "malicious"

                    if sec['heuristic']['heur_id'] not in done_heuristics:
                        item = (sec['heuristic']['heur_id'], sec['heuristic']['name'])
                        output['heuristics'].setdefault(h_type, [])
                        output['heuristics'][h_type].append(item)
                        done_heuristics.add(sec['heuristic']['heur_id'])

                    # Process Attack matrix
                    for attack in sec['heuristic'].get('attack', []):
                        attack_id = attack['attack_id']
                        for cat in attack['categories']:
                            output['attack_matrix'].setdefault(cat, [])
                            item = (attack_id, attack['pattern'], h_type)
                            if item not in output['attack_matrix'][cat]:
                                output['attack_matrix'][cat].append(item)

                    # Process Signatures
                    for signature in sec['heuristic'].get('signature', []):
                        sig = (signature['name'], h_type, signature.get('safe', False))
                        if sig not in output['signatures']:
                            output['signatures'].add(sig)

                # Process tags
                for t in sec['tags']:
                    output["tags"].setdefault(t['type'], {})
                    current_htype = output["tags"][t['type']].get(t['value'], None)
                    if not current_htype:
                        output["tags"][t['type']][t['value']] = (h_type, t['safelisted'])
                    else:
                        if current_htype == 'malicious' or h_type == 'malicious':
                            output["tags"][t['type']][t['value']] = ('malicious', t['safelisted'])
                        elif current_htype == 'suspicious' or h_type == 'suspicious':
                            output["tags"][t['type']][t['value']] = ('suspicious', t['safelisted'])
                        else:
                            output["tags"][t['type']][t['value']] = ('info', t['safelisted'])

        for t_type in output["tags"]:
            output["tags"][t_type] = [(k, v[0], v[1]) for k, v in output['tags'][t_type].items()]

        output['signatures'] = list(output['signatures'])

        output['file_info']['classification'] = max_c12n
        return make_api_response(output)
    else:
        return make_api_response("", "You are not allowed to view the data of this submission", 403)
def get_full_results(sid, **kwargs):
    """
    Get the full results for a given Submission ID. The difference
    between this and the get results API is that this one gets the
    actual values of the result and error keys instead of listing
    the keys.

    Variables:
    sid         => Submission ID to get the full results for

    Arguments:
    None

    Data Block:
    None

    Result example:
    {"classification": "UNRESTRICTIED"  # Access control for the submission
     "error_count": 0,                  # Number of errors in this submission
     "errors": [],                      # List of error blocks (see Get Service Error)
     "file_count": 4,                   # Number of files in this submission
     "files": [                         # List of submitted files
       ["FNAME", "sha256"], ...],              # Each file = List of name/sha256
     "file_infos": {                    # Dictionary of fil info blocks
       "234...235": <<FILE_INFO>>,          # File in block
       ...},                                # Keyed by file's sha256
     "file_tree": {                     # File tree of the submission
       "333...7a3": {                       # File tree item
        "children": {},                         # Recursive children of file tree item
        "name": ["file.exe",...]                # List of possible names for the file
        "score": 0                              # Score of the file
       },, ...},                            # Keyed by file's sha256
     "missing_error_keys": [],          # Errors that could not be fetched from the datastore
     "missing_result_keys": [],         # Results that could not be fetched from the datastore
     "results": [],                     # List of Results Blocks (see Get Service Result)
     "services": {                      # Service Block
       "selected": ["mcafee"],              # List of selected services
       "params": {},                        # Service specific parameters
       "excluded": []                       # List of excluded services
       },
     "state": "completed",              # State of the submission
     "submission": {                    # Submission Block
       "profile": true,                     # Should keep stats about execution?
       "description": "",                   # Submission description
       "ttl": 30,                           # Submission days to live
       "ignore_filtering": false,           # Ignore filtering services?
       "priority": 1000,                    # Submission priority, higher = faster
       "ignore_cache": true,                # Force reprocess even is result exist?
       "groups": ["group", ...],            # List of groups with access
       "sid": "ab9...956",                  # Submission ID
       "submitter": "user",                 # Uname of the submitter
       "max_score": 1422, },                # Score of highest scoring file
     "times": {                         # Timing block
       "completed": "2014-...",             # Completed time
       "submitted": "2014-..."              # Submitted time
       }
    }
    """
    max_retry = 10

    def get_results(keys):
        out = {}
        res = {}
        retry = 0
        while keys and retry < max_retry:
            if retry:
                time.sleep(2 ** (retry - 7))
            res.update(STORAGE.get_multiple_results(keys, Classification, as_obj=False))
            keys = [x for x in keys if x not in res]
            retry += 1

        results = {}
        for k, v in res.items():
            file_info = data['file_infos'].get(k[:64], None)
            if file_info:
                v = format_result(user['classification'], v, file_info['classification'])
                if v:
                    results[k] = v

        out["results"] = results
        out["missing_result_keys"] = keys

        return out

    def get_errors(keys):
        out = {}
        err = {}
        missing = []
        retry = 0
        while keys and retry < max_retry:
            if retry:
                time.sleep(2 ** (retry - 7))
            try:
                err.update(STORAGE.error.multiget(keys, as_obj=False))
            except MultiKeyError as e:
                LOGGER.warning(f"Trying to get multiple errors but some are missing: {str(e.keys)}")
                err.update(e.partial_output)
                missing.extend(e.keys)
            keys = [x for x in keys if x not in err and x not in missing]
            retry += 1

        out["errors"] = err
        out["missing_error_keys"] = keys + missing

        return out

    def get_file_infos(keys):
        infos = {}
        missing = []
        retry = 0
        while keys and retry < max_retry:
            if retry:
                time.sleep(2 ** (retry - 7))
            try:
                infos.update(STORAGE.file.multiget(keys, as_obj=False))
            except MultiKeyError as e:
                LOGGER.warning(f"Trying to get multiple files but some are missing: {str(e.keys)}")
                infos.update(e.partial_output)
                missing.extend(e.keys)
            keys = [x for x in keys if x not in infos and x not in missing]
            retry += 1

        return infos, missing

    def recursive_flatten_tree(tree):
        sha256s = []

        for key, val in tree.items():
            sha256s.extend(recursive_flatten_tree(val.get('children', {})))
            if key not in sha256s:
                sha256s.append(key)

        return list(set(sha256s))

    user = kwargs['user']
    data = STORAGE.submission.get(sid, as_obj=False)
    if data is None:
        return make_api_response("", "Submission ID %s does not exists." % sid, 404)

    if data and user and Classification.is_accessible(user['classification'], data['classification']):
        res_keys = data.get("results", [])
        err_keys = data.get("errors", [])

        data['file_tree'] = STORAGE.get_or_create_file_tree(data, config.submission.max_extraction_depth,
                                                            cl_engine=Classification,
                                                            user_classification=user['classification'])['tree']
        data['file_infos'], data['missing_file_keys'] = get_file_infos(recursive_flatten_tree(data['file_tree']))
        data.update(get_results(res_keys))
        data.update(get_errors(err_keys))

        for r in data['results'].values():
            data['classification'] = Classification.max_classification(data['classification'], r['classification'])

        for f in data['file_infos'].values():
            data['classification'] = Classification.max_classification(data['classification'], f['classification'])

        return make_api_response(data)
    else:
        return make_api_response("", "You are not allowed to view the data of this submission", 403)
예제 #13
0
def add_or_update_hash(**kwargs):
    """
    Add a hash in the safelist if it does not exist or update its list of sources if it does

    Arguments:
    None

    Data Block:
    {
     "classification": "TLP:W",    # Classification of the safe hash (Computed for the mix of sources) - Optional
     "enabled": true,              # Is the safe hash enabled or not
     "file": {                     # Information about the file  - Only used in file mode
       "name": ["file.txt"]            # Possible names for the file
       "size": 12345,                  # Size of the file
       "type": "document/text"},       # Type of the file
     },
     "hashes": {                   # Information about the safe hash - At least one hash required
       "md5": "123...321",             # MD5 hash of the safe hash
       "sha1": "1234...4321",          # SHA1 hash of the safe hash
       "sha256": "12345....54321",     # SHA256 of the safe hash
     "sources": [                  # List of sources for why the file is safelisted, dedupped on name - Required
       {"classification": "TLP:W",     # Classification of the source (default: TLP:W) - Optional
        "name": "NSRL",                # Name of external source or user who safelisted it - Required
        "reason": [                    # List of reasons why the source is safelisted - Required
          "Found as test.txt on default windows 10 CD",
          "Found as install.txt on default windows XP CD"
        ],
        "type": "external"},           # Type or source (external or user) - Required
       {"classification": "TLP:W",
        "name": "admin",
        "reason": ["We've seen this file many times and it leads to False positives"],
        "type": "user"}
     ],
     "signature": {               # Signature information  - Only used in signature mode
       "name": "Avira.Eicar",         # Name of signature
     },
     "tag": {                     # Tag information  - Only used in tag mode
         "type": "network.url",        # Type of tag
         "value": "google.ca"          # Value of the tag
     },
     "type": "tag"                # Type of safelist hash (tag or file)
    }

    Result example:
    {
     "success": true,         # Was the hash successfully added
     "op": "add"              # Was it added to the system or updated
    }
    """
    # Load data
    data = request.json
    user = kwargs['user']

    # Set defaults
    data.setdefault('classification', CLASSIFICATION.UNRESTRICTED)
    data.setdefault('hashes', {})
    if data['type'] == 'tag':
        tag_data = data.get('tag', None)
        if tag_data is None or 'type' not in tag_data or 'value' not in tag_data:
            return make_api_response(None, "Tag data not found", 400)

        hashed_value = f"{tag_data['type']}: {tag_data['value']}".encode(
            'utf8')
        data['hashes']['md5'] = hashlib.md5(hashed_value).hexdigest()
        data['hashes']['sha1'] = hashlib.sha1(hashed_value).hexdigest()
        data['hashes']['sha256'] = hashlib.sha256(hashed_value).hexdigest()
        data.pop('file', None)
        data.pop('signature', None)

    elif data['type'] == 'signature':
        sig_data = data.get('signature', None)
        if sig_data is None or 'name' not in sig_data:
            return make_api_response(None, "Signature data not found", 400)

        hashed_value = f"signature: {sig_data['name']}".encode('utf8')
        data['hashes']['md5'] = hashlib.md5(hashed_value).hexdigest()
        data['hashes']['sha1'] = hashlib.sha1(hashed_value).hexdigest()
        data['hashes']['sha256'] = hashlib.sha256(hashed_value).hexdigest()
        data.pop('tag', None)
        data.pop('file', None)

    elif data['type'] == 'file':
        data.pop('tag', None)
        data.pop('signature', None)
        data.setdefault('file', {})

    data['added'] = data['updated'] = now_as_iso()

    # Find the best hash to use for the key
    qhash = data['hashes'].get(
        'sha256', data['hashes'].get('sha1', data['hashes'].get('md5', None)))
    # Validate hash length
    if not qhash:
        return make_api_response(None, "No valid hash found", 400)

    # Validate sources
    src_map = {}
    for src in data['sources']:
        if src['type'] == 'user':
            if src['name'] != user['uname']:
                return make_api_response(
                    {},
                    f"You cannot add a source for another user. {src['name']} != {user['uname']}",
                    400)
        else:
            if 'signature_importer' not in user['type']:
                return make_api_response(
                    {},
                    "You do not have sufficient priviledges to add an external source.",
                    403)

        src_cl = src.get('classification', None)
        if src_cl:
            data['classification'] = CLASSIFICATION.max_classification(
                data['classification'], src_cl)

        src_map[src['name']] = src

    with Lock(f'add_or_update-safelist-{qhash}', 30):
        old = STORAGE.safelist.get_if_exists(qhash, as_obj=False)
        if old:
            try:
                # Save data to the DB
                STORAGE.safelist.save(qhash, _merge_safe_hashes(data, old))
                return make_api_response({'success': True, "op": "update"})
            except InvalidSafehash as e:
                return make_api_response({}, str(e), 400)
        else:
            try:
                data['sources'] = src_map.values()
                STORAGE.safelist.save(qhash, data)
                return make_api_response({'success': True, "op": "add"})
            except Exception as e:
                return make_api_response({},
                                         f"Invalid data provided: {str(e)}",
                                         400)