Example #1
0
def decode_file(original_path, fileinfo):
    extracted_path = None
    hdr = {}
    with open(original_path, 'rb') as original_file:
        if is_cart(original_file.read(256)):
            original_file.seek(0)

            extracted_fd, extracted_path = tempfile.mkstemp()
            extracted_file = os.fdopen(extracted_fd, 'wb')

            cart_extracted = False
            try:
                hdr, _ = unpack_stream(original_file, extracted_file)
                cart_extracted = True

            except Exception:
                extracted_path = None
                hdr = {}
                fileinfo['type'] = 'corrupted/cart'

            finally:
                extracted_file.close()

            if cart_extracted:
                fileinfo = identify.fileinfo(extracted_path)

    return extracted_path, fileinfo, hdr
Example #2
0
def main():
    if len(sys.argv) != 3:
        usage()
        exit(1)

    name = sys.argv[1]
    svc_class = class_by_name(name) if '.' in name else service_by_name(name)

    filename = sys.argv[2]
    if not os.path.isfile(filename):
        print 'Invalid input file: %s' % filename
        exit(3)

    fi = fileinfo(filename)

    sha256 = fi['sha256']
    # The transport expects the filename to be the sha256.
    # Create a symlink if required.
    created_link = False
    if filename != sha256:
        try:
            if platform.system() == 'Windows':
                import shutil
                shutil.copyfile(filename, sha256)
            else:
                os.symlink(filename, sha256)
        except Exception as ex:  #pylint: disable=W0703
            print 'exception trying to link file: %s' % str(ex)
        created_link = True

    scan_file(svc_class, **fi)

    if created_link:
        os.unlink(sha256)
Example #3
0
def create_service_task(sample):
    fileinfo_keys = ["magic", "md5", "mime", "sha1", "sha256", "size", "type"]

    return ServiceTask({
        "sid":
        1,
        "metadata": {},
        "deep_scan":
        False,
        "service_name":
        "Not Important",
        "service_config": {
            "extract_body_text": False,
            "save_emlparser_output": False,
        },
        "fileinfo":
        dict((k, v) for k, v in fileinfo(f"/tmp/{sample}").items()
             if k in fileinfo_keys),
        "filename":
        sample,
        "min_classification":
        "TLP:WHITE",
        "max_files":
        501,
        "ttl":
        3600,
    })
    def find_scripts_and_exes(apktool_out_dir: str, result: Result):
        scripts = []
        executables = []
        apks = []

        # We are gonna do the full apktool output dir here but in case we want to do less,
        # you can edit the test_path list
        test_paths = [apktool_out_dir]
        for path in test_paths:
            for root, _, files in os.walk(path):
                for f in files:
                    if f.endswith(".smali"):
                        continue
                    cur_file = os.path.join(root, f)
                    file_type = fileinfo(cur_file)['type']

                    if "code/sh" in file_type:
                        scripts.append(cur_file.replace(apktool_out_dir, ''))
                    elif "executable/linux" in file_type:
                        executables.append(cur_file.replace(apktool_out_dir, ''))
                    elif "android/apk" in file_type:
                        executables.append(cur_file.replace(apktool_out_dir, ''))

        if scripts:
            res_script = ResultSection("Shell script(s) found inside APK", parent=result,
                                       heuristic=Heuristic(1))
            for script in sorted(scripts)[:20]:
                res_script.add_line(script)
            if len(scripts) > 20:
                res_script.add_line(f"and {len(scripts) - 20} more...")

        if executables:
            res_exe = ResultSection("Executable(s) found inside APK", parent=result,
                                    heuristic=Heuristic(2))
            for exe in sorted(executables)[:20]:
                res_exe.add_line(exe)
            if len(executables) > 20:
                res_exe.add_line(f"and {len(executables) - 20} more...")

        if apks:
            res_apk = ResultSection("Other APKs where found inside the APK", parent=result,
                                    heuristic=Heuristic(19))
            for apk in sorted(apks)[:20]:
                res_apk.add_line(apk)
            if len(apks) > 20:
                res_apk.add_line(f"and {len(apks) - 20} more...")
Example #5
0
    def _ready_file(self, local_path: str, expiry, classification, cleanup,
                    sha256=None, upload=False) -> Tuple[str, int, dict]:
        """Take a file from local storage and prepare it for submission.

        After this method finished the file will ONLY exist on the filestore, not locally.
        """
        extracted_path = None
        try:
            # Analyze the file and make sure the file table is up to date
            fileinfo = identify.fileinfo(local_path)

            if fileinfo['size'] == 0:
                raise SubmissionException("File empty. Submission failed")

            if sha256 is not None and fileinfo['sha256'] != sha256:
                raise CorruptedFileStoreException(f"SHA256 mismatch between received and calculated "
                                                  f"sha256. {sha256} != {fileinfo['sha256']}")

            # Check if there is an integrated decode process for this file
            # eg. files that are packaged, and the contained file (not the package
            # that local_path points to) should be passed into the system.
            extracted_path, fileinfo, al_meta = decode_file(local_path, fileinfo)
            al_meta['classification'] = al_meta.get('classification', classification)

            if extracted_path:
                local_path = extracted_path
                self.filestore.upload(local_path, fileinfo['sha256'])
            elif upload:
                self.filestore.upload(local_path, fileinfo['sha256'])

            self.datastore.save_or_freshen_file(fileinfo['sha256'], fileinfo, expiry,
                                                al_meta['classification'], redis=self.redis)
            return fileinfo['sha256'], fileinfo['size'], al_meta

        finally:
            # If we extracted anything delete it
            if extracted_path:
                if os.path.exists(extracted_path):
                    os.unlink(extracted_path)

            # If we DIDN'T download anything, still delete it
            if local_path and cleanup:
                if os.path.exists(local_path):
                    os.unlink(local_path)
Example #6
0
    def identify(cls, transport, storage, sha256, **kw):
        """ Identify a file. """
        assert_valid_sha256(sha256)

        classification = kw['classification']

        kw['ttl'] = ttl = effective_ttl(kw)
        kw['__expiry_ts__'] = expiry = ttl_to_expiry(ttl)

        # By the time identify is called, either the file was in our cache
        # and we freshed its ttl or the client has successfully transfered
        # the file to us.
        local_path = transport.local_path(sha256)
        if not local_path:
            path = kw.get("path", None)
            if path and os.path.exists(path):
                local_path = path

        if not transport.exists(sha256):
            log.warning('File specified is not on server: %s %s.',
                        sha256, str(transport))
            return None

        temporary_path = fileinfo = None
        try:
            if not local_path:
                temporary_path = tempfile.mktemp(prefix="submission.identify")
                transport.download(sha256, temporary_path)
                local_path = temporary_path

            fileinfo = identify.fileinfo(local_path)

            storage.save_or_freshen_file(sha256, fileinfo, expiry, classification)
        finally:
            if temporary_path:
                try:
                    os.unlink(temporary_path)
                except:  # pylint: disable=W0702
                    pass

        return fileinfo
Example #7
0
def ready_body(core, body=None):
    out = {
        'salt': get_random_id(),
    }
    out.update(body or {})
    out = json.dumps(out).encode()
    sha256 = hashlib.sha256()
    sha256.update(out)
    core.filestore.put(sha256.hexdigest(), out)

    with NamedTemporaryFile() as file:
        file.write(out)
        file.flush()
        fileinfo = identify.fileinfo(file.name)
        core.ds.save_or_freshen_file(sha256.hexdigest(),
                                     fileinfo,
                                     now_as_iso(500),
                                     'U',
                                     redis=core.redis)

    return sha256.hexdigest(), len(out)
Example #8
0
def decode_file(original_path, fileinfo):
    extracted_path = None
    original_name = None
    al_meta = {}
    if fileinfo['tag'] in NEUTERED_FORMAT:
        from cart import unpack_stream

        extracted_fd, extracted_path = tempfile.mkstemp()
        extracted_file = os.fdopen(extracted_fd, 'wb')

        original_file = open(original_path)

        hdr, _ = unpack_stream(original_file, extracted_file)
        original_name = hdr.get('name', os.path.basename(original_path))
        al_meta = hdr.get("al", {}).get("meta", {})

        extracted_file.close()
        fileinfo = identify.fileinfo(extracted_path)
        if original_name:
            fileinfo['path'] = original_name

    return extracted_path, original_name, fileinfo, al_meta
Example #9
0
def test_identify():
    # Setup test data
    aaaa = f"{'A' * 10000}".encode()
    sha256 = hashlib.sha256(aaaa).hexdigest()

    # Prep temp file
    _, input_path = tempfile.mkstemp()
    output_path = f"{input_path}.cart"

    try:
        # Write temp file
        with open(input_path, 'wb') as oh:
            oh.write(aaaa)

        # Create a cart file
        with open(output_path, 'wb') as oh:
            with open(input_path, 'rb') as ih:
                pack_stream(ih, oh, {'name': 'test_identify.a'})

        # Validate the cart file created
        meta = get_metadata_only(output_path)
        assert meta.get("sha256", None) == sha256

        # Validate identify file detection
        info = fileinfo(output_path)
        assert info.get("type", None) == "archive/cart"

        # Validate identify hashing
        output_sha256 = subprocess.check_output(['sha256sum',
                                                 output_path])[:64].decode()
        assert info.get("sha256", None) == output_sha256
    finally:
        # Cleanup output file
        if os.path.exists(output_path):
            os.unlink(output_path)

        # Cleanup input file
        if os.path.exists(input_path):
            os.unlink(input_path)
    def try_run(self):
        try:
            self.service_class = load_module_by_path(SERVICE_PATH)
        except ValueError:
            raise
        except Exception:
            LOG.error("Could not find service in path. Check your environment variables.")
            raise

        self.load_service_manifest()

        if not os.path.isfile(FILE_PATH):
            LOG.info(f"File not found: {FILE_PATH}")
            return

        self.file_dir = os.path.dirname(FILE_PATH)

        # Get filename and working dir
        file_name = os.path.basename(FILE_PATH)
        working_dir = os.path.join(self.file_dir, f'{os.path.basename(FILE_PATH)}_{SERVICE_NAME.lower()}')

        # Start service
        self.service.start_service()

        # Identify the file
        file_info = identify.fileinfo(FILE_PATH)
        if file_info['type'] == "archive/cart":
            # This is a CART file, uncart it and recreate the file info object
            original_temp = os.path.join(tempfile.gettempdir(), file_info['sha256'])
            with open(FILE_PATH, 'rb') as ifile, open(original_temp, 'wb') as ofile:
                unpack_stream(ifile, ofile)

            file_info = identify.fileinfo(original_temp)
            target_file = os.path.join(tempfile.gettempdir(), file_info['sha256'])
            shutil.move(original_temp, target_file)
            LOG.info(f"File was a CaRT archive, it was un-CaRTed to {target_file} for processing")

        else:
            # It not a cart, move the file to the right place to be processed
            target_file = os.path.join(tempfile.gettempdir(), file_info['sha256'])
            shutil.copyfile(FILE_PATH, target_file)

        # Create service processing task
        service_task = ServiceTask(dict(
            sid=get_random_id(),
            metadata={},
            service_name=SERVICE_NAME,
            service_config=self.submission_params,
            fileinfo=dict(
                magic=file_info['magic'],
                md5=file_info['md5'],
                mime=file_info['mime'],
                sha1=file_info['sha1'],
                sha256=file_info['sha256'],
                size=file_info['size'],
                type=file_info['type'],
            ),
            filename=file_name,
            min_classification=forge.get_classification().UNRESTRICTED,
            max_files=501,  # TODO: get the actual value
            ttl=3600,
        ))

        LOG.info(f"Starting task with SID: {service_task.sid}")

        # Set the working directory to a directory with same parent as input file
        if os.path.isdir(working_dir):
            shutil.rmtree(working_dir)
        if not os.path.isdir(working_dir):
            os.makedirs(os.path.join(working_dir, 'working_directory'))

        self.service.handle_task(service_task)

        # Move the result.json and extracted/supplementary files to the working directory
        source = os.path.join(tempfile.gettempdir(), 'working_directory')
        if not os.path.exists(source):
            os.makedirs(source)

        files = os.listdir(source)
        for f in files:
            shutil.move(os.path.join(source, f), os.path.join(working_dir, 'working_directory'))

        # Cleanup files from the original directory created by the service base
        shutil.rmtree(source)

        result_json = os.path.join(tempfile.gettempdir(),
                                   f'{service_task.sid}_{service_task.fileinfo.sha256}_result.json')

        if not os.path.exists(result_json):
            raise Exception("A service error occured and no result json was found.")

        # Validate the generated result
        with open(result_json, 'r') as fh:
            try:
                result = json.load(fh)
                result.pop('temp_submission_data', None)
                for file in result['response']['extracted'] + result['response']['supplementary']:
                    file.pop('path', None)

                # Load heuristics
                heuristics = get_heuristics()

                # Transform heuristics and calculate score
                total_score = 0
                for section in result['result']['sections']:
                    if section['heuristic']:
                        heur_id = section['heuristic']['heur_id']

                        try:
                            section['heuristic'] = service_heuristic_to_result_heuristic(section['heuristic'],
                                                                                         heuristics)
                            total_score += section['heuristic']['score']
                        except InvalidHeuristicException:
                            section['heuristic'] = None
                        section['heuristic']['name'] = heuristics[heur_id]['name']
                result['result']['score'] = total_score

                # Add timestamps for creation, archive and expiry
                result['created'] = now_as_iso()
                result['archive_ts'] = now_as_iso(1 * 24 * 60 * 60)
                result['expiry_ts'] = now_as_iso(service_task.ttl * 24 * 60 * 60)

                result = Result(result)

                # Print the result on console if in debug mode
                if args.debug:
                    f"{SERVICE_NAME.upper()}-RESULT".center(60, '-')
                    for line in pprint.pformat(result.result.as_primitives()).split('\n'):
                        LOG.debug(line)
            except Exception as e:
                LOG.error(f"Invalid result created: {str(e)}")

        LOG.info(f"Cleaning up file used for temporary processing: {target_file}")
        os.unlink(target_file)

        LOG.info(f"Moving {result_json} to the working directory: {working_dir}/result.json")
        shutil.move(result_json, os.path.join(working_dir, 'result.json'))

        LOG.info(f"Successfully completed task. Output directory: {working_dir}")
Example #11
0
def ingest_single_file(**kwargs):
    """
    Ingest a single file, sha256 or URL in the system

        Note 1:
            If you are submitting a sha256 or a URL, you must use the application/json encoding and one of
            sha256 or url parameters must be included in the data block.

        Note 2:
            If you are submitting a file directly, you have to use multipart/form-data encoding this
            was done to reduce the memory footprint and speedup file transfers
             ** Read documentation of mime multipart standard if your library does not support it**

            The multipart/form-data for sending binary has two parts:
                - The first part contains a JSON dump of the optional params and uses the name 'json'
                - The last part conatins the file binary, uses the name 'bin' and includes a filename

        Note 3:
            The ingest API uses the user's default settings to submit files to the system
            unless these settings are overridden in the 'params' field. Although, there are
            exceptions to that rule. Fields deep_scan, ignore_filtering, ignore_cache are
            resetted to False because the lead to dangerous behavior in the system.

    Variables:
    None

    Arguments:
    None

    Data Block (SHA256 or URL):
    {
     //REQUIRED VALUES: One of the following
     "sha256": "1234...CDEF"         # SHA256 hash of the file
     "url": "http://...",            # Url to fetch the file from

     //OPTIONAL VALUES
     "name": "file.exe",             # Name of the file

     "metadata": {                   # Submission Metadata
         "key": val,                    # Key/Value pair for metadata parameters
         },

     "params": {                     # Submission parameters
         "key": val,                    # Key/Value pair for params that differ from the user's defaults
         },                                 # DEFAULT: /api/v3/user/submission_params/<user>/

     "generate_alert": False,        # Generate an alert in our alerting system or not
     "notification_queue": None,     # Name of the notification queue
     "notification_threshold": None, # Threshold for notification
    }

    Data Block (Binary):

    --0b34a3c50d3c02dd804a172329a0b2aa               <-- Randomly generated boundary for this http request
    Content-Disposition: form-data; name="json"      <-- JSON data blob part (only previous optional values valid)

    {"params": {"ignore_cache": true}, "generate_alert": true}
    --0b34a3c50d3c02dd804a172329a0b2aa               <-- Switch to next part, file part
    Content-Disposition: form-data; name="bin"; filename="name_of_the_file_to_scan.bin"

    <BINARY DATA OF THE FILE TO SCAN... DOES NOT NEED TO BE ENCODDED>

    --0b34a3c50d3c02dd804a172329a0b2aa--             <-- End of HTTP transmission

    Result example:
    { "ingest_id": <ID OF THE INGESTED FILE> }
    """
    user = kwargs['user']
    out_dir = os.path.join(TEMP_SUBMIT_DIR, get_random_id())
    extracted_path = original_file = None
    with forge.get_filestore() as f_transport:
        try:
            # Get data block and binary blob
            if 'multipart/form-data' in request.content_type:
                if 'json' in request.values:
                    data = json.loads(request.values['json'])
                else:
                    data = {}
                binary = request.files['bin']
                name = data.get("name", binary.filename)
                sha256 = None
                url = None
            elif 'application/json' in request.content_type:
                data = request.json
                binary = None
                sha256 = data.get('sha256', None)
                url = data.get('url', None)
                name = data.get(
                    "name", None) or sha256 or os.path.basename(url) or None
            else:
                return make_api_response({}, "Invalid content type", 400)

            if not data:
                return make_api_response({}, "Missing data block", 400)

            # Get notification queue parameters
            notification_queue = data.get('notification_queue', None)
            notification_threshold = data.get('notification_threshold', None)
            if not isinstance(notification_threshold,
                              int) and notification_threshold:
                return make_api_response(
                    {}, "notification_threshold should be and int", 400)

            # Get generate alert parameter
            generate_alert = data.get('generate_alert', False)
            if not isinstance(generate_alert, bool):
                return make_api_response({},
                                         "generate_alert should be a boolean",
                                         400)

            # Get file name
            if not name:
                return make_api_response({}, "Filename missing", 400)

            name = os.path.basename(name)
            if not name:
                return make_api_response({}, "Invalid filename", 400)

            try:
                os.makedirs(out_dir)
            except Exception:
                pass
            original_file = out_file = os.path.join(out_dir, name)

            # Load file
            extra_meta = {}
            if not binary:
                if sha256:
                    if f_transport.exists(sha256):
                        f_transport.download(sha256, out_file)
                    else:
                        return make_api_response(
                            {}, "SHA256 does not exist in our datastore", 404)
                else:
                    if url:
                        if not config.ui.allow_url_submissions:
                            return make_api_response(
                                {},
                                "URL submissions are disabled in this system",
                                400)

                        try:
                            safe_download(url, out_file)
                            extra_meta['submitted_url'] = url
                        except FileTooBigException:
                            return make_api_response(
                                {}, "File too big to be scanned.", 400)
                        except InvalidUrlException:
                            return make_api_response(
                                {}, "Url provided is invalid.", 400)
                        except ForbiddenLocation:
                            return make_api_response(
                                {}, "Hostname in this URL cannot be resolved.",
                                400)
                    else:
                        return make_api_response(
                            {},
                            "Missing file to scan. No binary, sha256 or url provided.",
                            400)
            else:
                with open(out_file, "wb") as my_file:
                    my_file.write(binary.read())

            # Load default user params
            s_params = ui_to_submission_params(
                STORAGE.user_settings.get(user['uname'], as_obj=False))
            if not s_params:
                s_params = get_default_user_settings(user)

            # Reset dangerous user settings to safe values
            s_params.update({
                'deep_scan': False,
                "priority": 150,
                "ignore_cache": False,
                "ignore_dynamic_recursion_prevention": False,
                "ignore_filtering": False,
                "type": "INGEST"
            })

            # Apply provided params
            s_params.update(data.get("params", {}))

            # Override final parameters
            s_params.update({
                'generate_alert':
                generate_alert,
                'max_extracted':
                config.core.ingester.default_max_extracted,
                'max_supplementary':
                config.core.ingester.default_max_supplementary,
                'priority':
                min(s_params.get("priority", 150),
                    config.ui.ingest_max_priority),
                'submitter':
                user['uname']
            })

            # Calculate file digest
            fileinfo = identify.fileinfo(out_file)

            # Validate file size
            if fileinfo['size'] > MAX_SIZE and not s_params.get(
                    'ignore_size', False):
                msg = f"File too large ({fileinfo['size']} > {MAX_SIZE}). Ingestion failed"
                return make_api_response("", err=msg, status_code=400)
            elif fileinfo['size'] == 0:
                return make_api_response("",
                                         err="File empty. Ingestion failed",
                                         status_code=400)

            # Decode cart if needed
            extracted_path, fileinfo, al_meta = decode_file(out_file, fileinfo)
            if extracted_path:
                out_file = extracted_path

            # Save the file to the filestore if needs be
            sha256 = fileinfo['sha256']
            if not f_transport.exists(sha256):
                f_transport.upload(out_file, sha256, location='far')

            # Freshen file object
            expiry = now_as_iso(s_params['ttl'] * 24 * 60 *
                                60) if s_params.get('ttl', None) else None
            STORAGE.save_or_freshen_file(fileinfo['sha256'], fileinfo, expiry,
                                         s_params['classification'])

            # Setup notification queue if needed
            if notification_queue:
                notification_params = {
                    "queue": notification_queue,
                    "threshold": notification_threshold
                }
            else:
                notification_params = {}

            # Load metadata, setup some default values if they are missing and append the cart metadata
            ingest_id = get_random_id()
            metadata = flatten(data.get("metadata", {}))
            metadata['ingest_id'] = ingest_id
            metadata['type'] = s_params['type']
            name = al_meta.pop('name', name)
            metadata.update(al_meta)
            if 'ts' not in metadata:
                metadata['ts'] = now_as_iso()
            metadata.update(extra_meta)

            # Set description if it does not exists
            s_params['description'] = s_params[
                'description'] or f"[{s_params['type']}] Inspection of file: {name}"

            # Create submission object
            try:
                submission_obj = Submission({
                    "sid":
                    ingest_id,
                    "files": [{
                        'name': name,
                        'sha256': sha256,
                        'size': fileinfo['size']
                    }],
                    "notification":
                    notification_params,
                    "metadata":
                    metadata,
                    "params":
                    s_params
                })
            except (ValueError, KeyError) as e:
                return make_api_response("", err=str(e), status_code=400)

            # Send submission object for processing
            ingest.push(submission_obj.as_primitives())
            return make_api_response({"ingest_id": ingest_id})

        finally:
            # Cleanup files on disk
            try:
                if original_file and os.path.exists(original_file):
                    os.unlink(original_file)
            except Exception:
                pass

            try:
                if extracted_path and os.path.exists(extracted_path):
                    os.unlink(extracted_path)
            except Exception:
                pass

            try:
                if os.path.exists(out_dir):
                    shutil.rmtree(out_dir, ignore_errors=True)
            except Exception:
                pass
Example #12
0
    def submit_multi(cls, storage, transport, files, **kw):
        """ Submit all files into one submission

            submit_multi can be used when all the files are already present in the
            file storage.

            files is an array of (name, sha256) tuples

            Any kw are passed to the Task created to dispatch this submission.
        """
        sid = str(uuid.uuid4())
        classification = kw['classification']

        kw['max_extracted'] = max_extracted(kw)
        kw['max_supplementary'] = max_supplementary(kw)
        kw['ttl'] = ttl = effective_ttl(kw)
        kw['__expiry_ts__'] = expiry = ttl_to_expiry(ttl)

        submissions = []
        temporary_path = None
        dispatch_request = None
        # Generate static fileinfo data for each file.
        for name, sha256 in files:
            local_path = transport.local_path(sha256)

            if not transport.exists(sha256):
                raise SubmissionException('File specified is not on server: %s %s.' % (sha256, str(transport)))

            try:
                if not local_path:
                    temporary_path = tempfile.mktemp(prefix="submission.submit_multi")
                    transport.download(sha256, temporary_path)
                    local_path = temporary_path

                fileinfo = identify.fileinfo(local_path)
                storage.save_or_freshen_file(sha256, fileinfo, expiry, classification)

                decode_file = forge.get_decode_file()
                massaged_path, new_name, fileinfo, al_meta = \
                    decode_file(local_path, fileinfo)

                if massaged_path:
                    name = new_name
                    local_path = massaged_path
                    sha256 = fileinfo['sha256']

                    if not transport.exists(sha256):
                        transport.put(local_path, sha256)
                    storage.save_or_freshen_file(sha256, fileinfo, expiry, classification)

                ignore_size = kw.get('ignore_size', False)
                max_size = config.submissions.max.size
                if fileinfo['size'] > max_size and not ignore_size:
                    msg = "File too large (%d > %d). Submission failed" % (fileinfo['size'], max_size)
                    raise SubmissionException(msg)

                # We'll just merge the mandatory arguments, fileinfo, and any
                # optional kw and pass those all on to the dispatch callback.
                task_args = fileinfo
                task_args['priority'] = 0  # Just a default.
                task_args.update(kw)
                task_args['srl'] = sha256
                task_args['original_filename'] = name
                task_args['sid'] = sid
                task_args['path'] = name

                if 'metadata' in task_args:
                    task_args['metadata'].update(al_meta)
                else:
                    task_args['metadata'] = al_meta

                dispatch_request = Task.create(**task_args)
                submissions.append(dispatch_request)
            finally:
                if temporary_path:
                    try:
                        os.unlink(temporary_path)
                    except:  # pylint: disable=W0702
                        pass

        storage.create_submission(
            dispatch_request.sid,
            dispatch_request.as_submission_record(),
            files)

        dispatch_queue = forge.get_dispatch_queue()
        for submission in submissions:
            dispatch_queue.submit(submission)

        log.debug("Submission complete. Dispatched: %s", dispatch_request)
        return submissions[0].raw.copy()
Example #13
0
    def submit_inline(cls, storage, transport, file_paths, **kw):
        """ Submit local samples to the submission service.

            submit_inline can be used when the sample to submit is already
            local to the submission service. It does the presumit, filestore
            upload and submit.

            Any kw are passed to the Task created to dispatch this submission.
        """
        classification = kw['classification']

        kw['max_extracted'] = max_extracted(kw)
        kw['max_supplementary'] = max_supplementary(kw)
        kw['ttl'] = ttl = effective_ttl(kw)
        kw['__expiry_ts__'] = expiry = ttl_to_expiry(ttl)

        submissions = []
        file_tuples = []
        dispatch_request = None
        # Generate static fileinfo data for each file.
        for file_path in file_paths:

            file_name = os.path.basename(file_path)
            fileinfo = identify.fileinfo(file_path)

            ignore_size = kw.get('ignore_size', False)
            max_size = config.submissions.max.size
            if fileinfo['size'] > max_size and not ignore_size:
                msg = "File too large (%d > %d). Submission Failed" % \
                      (fileinfo['size'], max_size)
                raise SubmissionException(msg)

            decode_file = forge.get_decode_file()
            temp_path, original_name, fileinfo, al_meta = \
                decode_file(file_path, fileinfo)

            if temp_path:
                file_path = temp_path
                if not original_name:
                    original_name = os.path.splitext(file_name)[0]
                file_name = original_name

            sha256 = fileinfo['sha256']

            storage.save_or_freshen_file(sha256, fileinfo, expiry, classification)

            file_tuples.append((file_name, sha256))

            if not transport.exists(sha256):
                log.debug('File not on remote filestore. Uploading %s', sha256)
                transport.put(file_path, sha256, location='near')

            if temp_path:
                os.remove(temp_path)

            # We'll just merge the mandatory arguments, fileinfo, and any
            # optional kw and pass those all on to the dispatch callback.
            task_args = fileinfo
            task_args['priority'] = 0  # Just a default.
            task_args.update(kw)
            task_args['srl'] = sha256
            task_args['original_filename'] = file_name
            task_args['path'] = file_name

            if 'metadata' in task_args:
                task_args['metadata'].update(al_meta)
            else:
                task_args['metadata'] = al_meta

            dispatch_request = Task.create(**task_args)
            submissions.append(dispatch_request)

        storage.create_submission(
            dispatch_request.sid,
            dispatch_request.as_submission_record(),
            file_tuples)

        dispatch_queue = forge.get_dispatch_queue()
        for submission in submissions:
            dispatch_queue.submit(submission)

        log.debug("Submission complete. Dispatched: %s", dispatch_request)

        # Ugly - fighting with task to give UI something that makes sense.
        file_result_tuples = \
            zip(file_paths, [dispatch_request.raw for dispatch_request in submissions])
        result = submissions[0].raw.copy()
        fileinfos = []
        for filename, result in file_result_tuples:
            finfo = result['fileinfo']
            finfo['original_filename'] = os.path.basename(filename)
            finfo['path'] = finfo['original_filename']
            fileinfos.append(finfo)
        result['fileinfo'] = fileinfos
        return result
Example #14
0
    def submit(cls, transport, storage, sha256, path, priority, submitter, **kw):
        """ Execute a submit.

        Any kw are passed along in the dispatched request.

        """
        assert_valid_sha256(sha256)
        queue = forge.get_dispatch_queue()

        classification = kw['classification']

        kw['max_extracted'] = max_extracted(kw)
        kw['max_supplementary'] = max_supplementary(kw)
        kw['ttl'] = ttl = effective_ttl(kw)
        kw['__expiry_ts__'] = expiry = ttl_to_expiry(ttl)

        # By the time submit is called, either the file was in our cache
        # and we freshed its ttl or the client has successfully transfered
        # the file to us.
        local_path = transport.local_path(sha256)

        if not transport.exists(sha256):
            raise SubmissionException('File specified is not on server: %s %s.' % (sha256, str(transport)))

        root_sha256 = sha256
        temporary_path = massaged_path = None
        try:
            if not local_path:
                temporary_path = tempfile.mktemp(prefix="submission.submit")
                transport.download(sha256, temporary_path)
                local_path = temporary_path

            fileinfo = identify.fileinfo(local_path)
            if fileinfo['sha256'] != sha256:
                raise CorruptedFileStoreException('SHA256 mismatch between received '
                                                  'and calculated sha256. %s != %s' % (sha256, fileinfo['sha256']))
            storage.save_or_freshen_file(sha256, fileinfo, expiry, classification)

            decode_file = forge.get_decode_file()
            massaged_path, _, fileinfo, al_meta = decode_file(local_path, fileinfo)

            if massaged_path:
                local_path = massaged_path
                sha256 = fileinfo['sha256']

                transport.put(local_path, sha256)
                storage.save_or_freshen_file(sha256, fileinfo, expiry, classification)

            ignore_size = kw.get('ignore_size', False)
            max_size = config.submissions.max.size
            if fileinfo['size'] > max_size and not ignore_size:
                msg = "File too large (%d > %d). Submission failed" % (fileinfo['size'], max_size)
                raise SubmissionException(msg)

            # We'll just merge the mandatory arguments, fileinfo, and any
            # optional kw and pass those all on to the dispatch callback.
            task_args = fileinfo
            task_args.update(kw)
            task_args.update({
                'original_selected': kw.get('selected', []),
                'root_sha256': root_sha256,
                'srl': sha256,
                'sha256': sha256,
                'priority': priority,
                'submitter': submitter,
                'path': safe_str(path)})

            if 'metadata' in task_args:
                task_args['metadata'].update(al_meta)
            else:
                task_args['metadata'] = al_meta

            submit_task = Task.create(**task_args)
            if submit_task.is_initial():
                storage.create_submission(
                    submit_task.sid,
                    submit_task.as_submission_record(),
                    [(os.path.basename(path), submit_task.srl)])
            log.debug("Submission complete. Dispatching: %s", submit_task)

            queue.send(submit_task, shards=SHARDS)

            return submit_task.raw
        finally:
            if massaged_path:
                try:
                    os.unlink(massaged_path)
                except:  # pylint:disable=W0702
                    pass

            if temporary_path:
                try:
                    os.unlink(temporary_path)
                except:  # pylint:disable=W0702
                    pass
Example #15
0
def sample(request):
    sample_path = os.path.join("tests", "samples", request.param)
    sha256_of_file = fileinfo(sample_path)["sha256"]
    shutil.copy(sample_path, os.path.join("/tmp", sha256_of_file))
    yield sha256_of_file
    os.remove(os.path.join("/tmp", sha256_of_file))