Esempio n. 1
0
    def evaluate(self, image_obj, context):
        match_filter = self.regex_name.value()

        if match_filter:
            match_decoded = ensure_str(base64.b64encode(ensure_bytes(match_filter)))
        else:
            return

        for thefile, regexps in list(context.data.get('content_regexp', {}).items()):
            thefile = ensure_str(thefile)
            if not regexps:
                continue
            for regexp in regexps.keys():
                decoded_regexp = ensure_str(base64.b64decode(ensure_bytes(regexp)))
                try:
                    regexp_name, theregexp = decoded_regexp.split("=", 1)
                except:
                    regexp_name = None
                    theregexp = decoded_regexp

                if not match_filter:
                    self._fire(msg='File content analyzer found regexp match in container: file={} regexp={}'.format(thefile, decoded_regexp))
                elif regexp == match_filter or theregexp == match_decoded:
                    self._fire(msg='File content analyzer found regexp match in container: file={} regexp={}'.format(thefile, decoded_regexp))
                elif regexp_name and regexp_name == match_decoded:
                    self._fire(msg='File content analyzer found regexp match in container: file={} regexp={}'.format(thefile, decoded_regexp))
Esempio n. 2
0
    def render_GET(self, request):
        try:
            versions = localconfig.get_versions()
        except:
            versions = {}

        request.responseHeaders.addRawHeader(b"Content-Type", b"application/json")

        ret = {
            'service': {
                'version': versions.get('service_version', None),
            },
            'api': {
            },
            'db': {
                'schema_version': versions.get('db_version', None),
            }
        }

        try:
            response = utils.ensure_bytes(json.dumps(ret))
        except:
            response = utils.ensure_bytes(json.dumps({}))

        return response
Esempio n. 3
0
    def evaluate(self, image_obj, context):
        match_filter = self.secret_contentregexp.value(default_if_none=[])
        if match_filter:
            matches = [ensure_str(base64.b64encode(ensure_bytes(x))) for x in match_filter]
            matches_decoded = match_filter
        else:
            matches = []
            matches_decoded = []

        for thefile, regexps in list(context.data.get('secret_content_regexp', {}).items()):
            thefile = ensure_str(thefile)
            if not regexps:
                continue
            for regexp in regexps.keys():
                decoded_regexp = ensure_str(base64.b64decode(ensure_bytes(regexp)))
                try:
                    regexp_name, theregexp = decoded_regexp.split("=", 1)
                except:
                    regexp_name = None
                    theregexp = decoded_regexp

                if not matches:
                    self._fire(msg='Secret search analyzer found regexp match in container: file={} regexp={}'.format(thefile, decoded_regexp))
                elif regexp in matches or theregexp in matches_decoded:
                    self._fire(msg='Secret search analyzer found regexp match in container: file={} regexp={}'.format(thefile, decoded_regexp))
                elif regexp_name and regexp_name in matches_decoded:
                    self._fire(msg='Secret search analyzer found regexp match in container: file={} regexp={}'.format(thefile, decoded_regexp))
Esempio n. 4
0
    def evaluate(self, image_obj, context):
        match_decoded = self.regex_name.value()

        if match_decoded:
            match_encoded = ensure_str(base64.b64encode(ensure_bytes(match_decoded)))

        for thefile, regexps in list(context.data.get("content_regexp", {}).items()):
            thefile = ensure_str(thefile)
            if not regexps:
                continue
            for regexp in regexps.keys():
                found = False
                decoded_regexp = ensure_str(base64.b64decode(ensure_bytes(regexp)))
                try:
                    regexp_name, theregexp = decoded_regexp.split("=", 1)
                except:
                    regexp_name = None
                    theregexp = decoded_regexp

                if not match_decoded:
                    found = True
                elif theregexp == match_decoded or regexp == match_encoded:
                    found = True
                elif regexp_name and regexp_name == match_decoded:
                    found = True

                if found:
                    self._fire(
                        msg="File content analyzer found regexp match in container: file={} regexp={}".format(
                            thefile, decoded_regexp
                        )
                    )
Esempio n. 5
0
    def evaluate(self, image_obj, context):
        match_filter = self.secret_contentregexp.value(default_if_none=[])
        name_filter = self.name_regexps.value()
        name_re = re.compile(
            name_filter) if self.name_regexps.value() else None
        match_type = self.match_type.value(default_if_none="found")

        if match_filter:
            matches = [base64.b64encode(ensure_bytes(x)) for x in match_filter]
            matches_decoded = match_filter
        else:
            matches = []
            matches_decoded = []

        onefound = False
        for thefile, regexps in list(
                context.data.get("secret_content_regexp", {}).items()):
            thefile = ensure_str(thefile)

            if not regexps:
                continue

            if regexps and (not name_re or name_re.match(thefile)):
                for regexp in list(regexps.keys()):
                    found = False
                    decoded_regexp = ensure_str(
                        base64.b64decode(ensure_bytes(regexp)))

                    try:
                        regexp_name, theregexp = decoded_regexp.split("=", 1)
                    except:
                        regexp_name = None
                        theregexp = decoded_regexp

                    if not matches:
                        found = onefound = True
                    elif regexp in matches or theregexp in matches_decoded:
                        found = onefound = True
                    elif regexp_name and regexp_name in matches_decoded:
                        found = onefound = True

                    if found and match_type == "found":
                        self._fire(
                            msg=
                            "Secret content search analyzer found regexp match in container: file={} regexp={}"
                            .format(thefile, decoded_regexp))

        if not onefound and match_type == "notfound":
            f_filter = name_filter
            if not f_filter:
                f_filter = "*"

            m_filter = match_filter
            if not m_filter:
                m_filter = "all"
            self._fire(
                msg=
                "Secret content search analyzer did not find regexp match in container: filename_regex={} content_regex_name={}"
                .format(f_filter, m_filter))
Esempio n. 6
0
def load_keys(config: dict):
    """
    Based on the passed configuration, load a dict mapping the key name to bytes of the key

    :param config: dict with specific keys to find keys, paths
    :return:
    """

    keys = {}

    if config:
        if config.get('private_key_path'):
            priv_keypath = config.get('private_key_path')
            try:
                with open(priv_keypath, 'rb') as pem_fp:
                    keys['private'] = pem_fp.read()

            except IOError as e:
                raise Exception('Could not load private key file from path: {}. Error: {}'.format(priv_keypath, e))

        if config.get('public_key_path'):
            pub_keypath = config.get('public_key_path')
            try:
                with open(pub_keypath, 'rb') as crt_fp:
                    keys['public'] = crt_fp.read()
            except IOError as e:
                raise Exception('Could not load public key file from path: {}. Error: {}'.format(pub_keypath, e))

        elif config.get('secret'):
            keys['secret'] = ensure_bytes(config.get('secret'))

    return keys
Esempio n. 7
0
    def _execute(self):
        # if image record already exists, exit.
        with session_scope() as session:
            if db_catalog_image.get(self.image_digest, self.account, session):
                logger.info('Image archive restore found existing image records already. Aborting restore.')
                raise ImageConflict('Conflict: Image already exists in system. No restore possible')

        dest_obj_mgr = object_store.get_manager()

        # Load the archive manifest
        m = self.fileobj.read()

        if m:
            tf = tempfile.NamedTemporaryFile(prefix='analysis_archive_{}'.format(self.image_digest), dir=localconfig.get_config()['tmp_dir'], delete=False)
            try:
                tf.write(ensure_bytes(m))
                tf.close()

                # Load the archive from the temp file
                with ImageArchive.for_reading(tf.name) as img_archive:

                    logger.debug('Using manifest: {}'.format(img_archive.manifest))

                    self.restore_artifacts(img_archive, dest_obj_mgr)
                    self.restore_records(img_archive.manifest)
                    self._reload_policy_engine(img_archive.manifest)
            finally:
                os.remove(tf.name)

        else:
            raise Exception('No archive manifest found in archive record. Cannot restore')        
Esempio n. 8
0
    def get_by_uri(self, uri):
        try:
            container, key = self._parse_uri(uri)
            if container != self.container_name:
                logger.warn(
                    "Container mismatch between content_uri and configured cotnainer name: {} in db record, but {} in config".format(
                        container, self.container_name
                    )
                )

            resp = self.client.download(
                container=container, objects=[key], options={"out_file": "-"}
            )
            for obj in resp:
                if "contents" in obj and obj["action"] == "download_object":
                    content = b"".join([x for x in obj["contents"]])
                    ret = utils.ensure_bytes(content)
                    return ret
                elif obj["action"] == "download_object" and not obj["success"]:
                    raise ObjectKeyNotFoundError(
                        bucket="", key="", userId="", caused_by=None
                    )
                raise Exception(
                    "Unexpected operation/action from swift: {}".format(obj["action"])
                )
        except SwiftError as e:
            raise ObjectStorageDriverError(cause=e)
Esempio n. 9
0
def create_archive(bucket, archiveid, bodycontent):
    httpcode = 500
    try:
        accountName = ApiRequestContextProxy.namespace()
        archive_sys = archive.get_manager()

        try:
            jsonbytes = anchore_utils.ensure_bytes(json.dumps(bodycontent))
            my_svc = ApiRequestContextProxy.get_service()
            if my_svc is not None:
                resource_url = (my_svc.service_record["base_url"] + "/" +
                                my_svc.service_record["version"] +
                                "/archive/" + bucket + "/" + archiveid)
            else:
                resource_url = "N/A"

            rc = archive_sys.put(accountName, bucket, archiveid, jsonbytes)
            return_object = resource_url
            httpcode = 200
        except Exception as err:
            httpcode = 500
            raise err

    except Exception as err:
        return_object = anchore_engine.common.helpers.make_response_error(
            err, in_httpcode=httpcode)

    return return_object, httpcode
Esempio n. 10
0
    def get_by_uri(self, uri):
        try:
            container, key = self._parse_uri(uri)
            if container != self.container_name:
                logger.warn(
                    'Container mismatch between content_uri and configured cotnainer name: {} in db record, but {} in config'
                    .format(container, self.container_name))

            resp = self.client.download(container=container,
                                        objects=[key],
                                        options={'out_file': '-'})
            for obj in resp:
                if 'contents' in obj and obj['action'] == 'download_object':
                    content = b''.join([x for x in obj['contents']])
                    ret = utils.ensure_bytes(content)
                    return (ret)
                elif obj['action'] == 'download_object' and not obj['success']:
                    raise ObjectKeyNotFoundError(bucket='',
                                                 key='',
                                                 userId='',
                                                 caused_by=None)
                raise Exception(
                    'Unexpected operation/action from swift: {}'.format(
                        obj['action']))
        except SwiftError as e:
            raise ObjectStorageDriverError(cause=e)
Esempio n. 11
0
def create_object(bucket, archiveid, bodycontent):
    httpcode = 500
    try:
        account_name = ApiRequestContextProxy.namespace()
        obj_mgr = anchore_engine.subsys.object_store.manager.get_manager()

        jsonbytes = anchore_utils.ensure_bytes(json.dumps(bodycontent))
        rc = obj_mgr.put(account_name, bucket, archiveid, jsonbytes)

        my_svc = ApiRequestContextProxy.get_service()
        if my_svc is not None:
            resource_url = my_svc.service_record[
                'base_url'] + "/" + my_svc.service_record[
                    'version'] + "/archive/" + bucket + "/" + archiveid
        else:
            resource_url = "N/A"

        return_object = resource_url
        httpcode = 200

    except Exception as err:
        return_object = anchore_engine.common.helpers.make_response_error(
            err, in_httpcode=httpcode)

    return return_object, httpcode
Esempio n. 12
0
def _safe_base64_encode(data_provider):
    try:
        return utils.ensure_str(
            base64.encodebytes(utils.ensure_bytes(data_provider())))
    except Exception as err:
        logger.warn("could not base64 encode content - exception: %s", err)
    return ""
Esempio n. 13
0
def load_keys(config: dict):
    """
    Based on the passed configuration, load a dict mapping the key name to bytes of the key

    :param config: dict with specific keys to find keys, paths
    :return:
    """

    keys = {}

    if config:
        if config.get('private_key_path'):
            priv_keypath = config.get('private_key_path')

            with open(priv_keypath, 'rb') as pem_fp:
                keys['private'] = pem_fp.read()

        # TODO add public x509 cert support to get the key from (DER, PEM formats)
        if config.get('public_key_path'):
            pub_keypath = config.get('public_key_path')
            with open(pub_keypath, 'rb') as crt_fp:
                keys['public'] = crt_fp.read()

        elif config.get('secret'):
            keys['secret'] = ensure_bytes(config.get('secret'))

    return keys
Esempio n. 14
0
    def load_retrieved_files(self, analysis_report, image_obj):
        """
        Loads the analyzer retrieved files from the image, saves them in the db

        :param retrieve_files_json:
        :param image_obj:
        :return:
        """
        log.info('Loading retrieved files')
        retrieve_files_json = analysis_report.get('retrieve_files')
        if not retrieve_files_json:
            return []

        matches = retrieve_files_json.get('file_content.all', {}).get('base', {})
        records = []

        for filename, match_string in list(matches.items()):
            match = AnalysisArtifact()
            match.image_user_id = image_obj.user_id
            match.image_id = image_obj.id
            match.analyzer_id = 'retrieve_files'
            match.analyzer_type = 'base'
            match.analyzer_artifact = 'file_content.all'
            match.artifact_key = filename
            try:
                match.binary_value = base64.b64decode(ensure_bytes(match_string))
            except:
                log.exception('Could not b64 decode the file content for {}'.format(filename))
                raise
            records.append(match)

        return records
Esempio n. 15
0
    def _fetch_group_data(
            self,
            group: GroupDownloadOperationConfiguration) -> typing.Iterable:
        """
        Execute the download and write the data into the local repo location for the group

        :param group:
        :return: generator for the record count of each chunk as it is downloaded
        """

        get_next = True
        next_token = None
        chunk_number = 0
        count = 0
        since = group.parameters.since if not self.fetch_all else None

        while get_next:
            logger.info(
                'Downloading page {} of feed data for feed group: {}/{}'.
                format(chunk_number, group.feed, group.group))
            group_data = self.service_client.get_feed_group_data(
                group.feed, group.group, since=since, next_token=next_token)
            get_next = bool(group_data.next_token)
            next_token = group_data.next_token
            count += group_data.record_count
            if group_data.data is not None:
                self.local_repo.write_data(group.feed, group.group,
                                           chunk_number,
                                           ensure_bytes(group_data.data))
            chunk_number += 1
            yield group_data.record_count

        logger.info(
            'Completed data download of for feed group: {}/{}. Total pages: {}'
            .format(group.feed, group.group, chunk_number))
Esempio n. 16
0
    def get_paged_feed_group_data(self,
                                  feed,
                                  group,
                                  since=None,
                                  next_token=None):
        if type(since) == datetime.datetime:
            since = since.isoformat()

        files = []
        group_path = os.path.join(self.src_path, feed, group)
        if next_token:
            next_token = ensure_str(
                base64.decodebytes(ensure_bytes(next_token)))
        data = []
        size = 0
        token = None

        back_boundary = since
        forward_boundary = self.newest_allowed.isoformat(
        ) if self.newest_allowed else None
        logger.debug(
            'Getting data for {}/{} with back boundary {} and forward boundary {}'
            .format(feed, group, back_boundary, forward_boundary))
        for datafile_name in sorted(os.listdir(group_path)):
            if (not back_boundary or (datafile_name >= back_boundary)) and (
                    not forward_boundary or
                (forward_boundary and datafile_name <= forward_boundary)) and (
                    not next_token or datafile_name >= next_token):
                logger.debug('Using data file {}'.format(datafile_name))
                fpath = os.path.join(group_path, datafile_name)
                s = os.stat(fpath)
                if size + s.st_size > self.max_content_size:
                    token = datafile_name
                    break
                else:
                    size += s.st_size
                    with open(fpath) as f:
                        content = json.load(f)
                        data += content
            else:
                logger.debug('Data file {} outside of bounds, skipping'.format(
                    datafile_name))
                continue

        return data, ensure_str(base64.encodebytes(
            ensure_bytes(token))) if token else None
Esempio n. 17
0
    def _do_compress(self, data):
        """
        Handle data compression based on global config. Returns the data to use as payload, compressed as necessary
        based on config.

        :param data:
        :return:
        """
        if self.config[COMPRESSION_SECTION_KEY][COMPRESSION_ENABLED_KEY] is True and self.primary_client.__supports_compressed_data__ and len(data) > \
                self.config[COMPRESSION_SECTION_KEY][COMPRESSION_MIN_SIZE_KEY] * 1024:
            is_compressed = True
            final_payload = utils.ensure_bytes(zlib.compress(utils.ensure_bytes(data), COMPRESSION_LEVEL))
        else:
            is_compressed = False
            final_payload = utils.ensure_bytes(data)

        return final_payload, is_compressed
Esempio n. 18
0
 def _decode(self, raw_result):
     is_b64 = bool(raw_result.get('b64_encoded', False))
     data = raw_result.get('jsondata')
     if data and is_b64:
         return base64.decodebytes(utils.ensure_bytes(data))
     elif data is None:
         return b''
     else:
         return data
Esempio n. 19
0
    def load_javas(self, analysis_json, containing_image):
        pkgs_json = analysis_json.get('package_list', {}).get('pkgs.java',
                                                              {}).get('base')
        if not pkgs_json:
            return []

        pkgs = []
        for path, pkg_str in list(pkgs_json.items()):
            pkg_json = json.loads(pkg_str)

            n = ImagePackage()

            # primary keys
            # TODO - some java names have a version in it, need to clean that up
            n.name = pkg_json.get('name')
            n.pkg_type = 'java'
            n.arch = 'N/A'
            n.pkg_path = path
            version = None
            versions_json = {}
            for k in [
                    'maven-version', 'implementation-version',
                    'specification-version'
            ]:
                if not version and pkg_json.get(k, 'N/A') != 'N/A':
                    version = pkg_json.get(k)
                versions_json[k] = pkg_json.get(k, 'N/A')
            if version:
                n.version = version
            else:
                n.version = 'N/A'

            n.image_user_id = containing_image.user_id
            n.image_id = containing_image.id

            # other non-PK values
            n.pkg_path_hash = hashlib.sha256(ensure_bytes(path)).hexdigest()
            n.distro_name = 'java'
            n.distro_version = 'N/A'
            n.like_distro = 'java'
            n.fullversion = n.version

            m = pkg_json.get('metadata')
            m['java_versions'] = versions_json
            n.metadata_json = m

            fullname = n.name
            pomprops = n.get_pom_properties()
            if pomprops:
                fullname = "{}:{}".format(pomprops.get('groupId'),
                                          pomprops.get('artifactId'))

            n.normalized_src_pkg = fullname
            n.src_pkg = fullname
            pkgs.append(n)

        return pkgs
Esempio n. 20
0
def refresh_ecr_credentials(registry, access_key_id, secret_access_key):
    localconfig = anchore_engine.configuration.localconfig.get_config()

    try:
        account_id, region = parse_registry_url(registry)

        # aws: assume role on the ec2 instance
        if access_key_id == 'awsauto' or secret_access_key == 'awsauto':
            if 'allow_awsecr_iam_auto' in localconfig and localconfig[
                    'allow_awsecr_iam_auto']:
                access_key_id = secret_access_key = None
                client = boto3.client('ecr',
                                      aws_access_key_id=access_key_id,
                                      aws_secret_access_key=secret_access_key,
                                      region_name=region)
            else:
                raise Exception(
                    "registry is set to 'awsauto', but system is not configured to allow (allow_awsecr_iam_auto: False)"
                )

        # aws: assume cross account roles
        elif access_key_id == '_iam_role':
            try:
                sts = boto3.client('sts')
                session = sts.assume_role(RoleArn=secret_access_key,
                                          RoleSessionName=str(int(
                                              time.time())))
                access_key_id = session['Credentials']['AccessKeyId']
                secret_access_key = session['Credentials']['SecretAccessKey']
                session_token = session['Credentials']['SessionToken']
                client = boto3.client('ecr',
                                      aws_access_key_id=access_key_id,
                                      aws_secret_access_key=secret_access_key,
                                      aws_session_token=session_token,
                                      region_name=region)
            except Exception as err:
                raise err
        # aws: provide key & secret
        else:
            client = boto3.client('ecr',
                                  aws_access_key_id=access_key_id,
                                  aws_secret_access_key=secret_access_key,
                                  region_name=region)
        r = client.get_authorization_token(registryIds=[account_id])
        ecr_data = r['authorizationData'][0]
    except Exception as err:
        logger.warn("failure to get/refresh ECR credential - exception: " +
                    str(err))
        raise err

    ret = {}
    ret['authorizationToken'] = utils.ensure_str(
        base64.decodebytes(utils.ensure_bytes(ecr_data['authorizationToken'])))
    ret['expiresAt'] = int(ecr_data['expiresAt'].strftime('%s'))

    return (ret)
Esempio n. 21
0
 def get_by_uri(self, uri):
     bucket, key = self._parse_uri(uri)
     try:
         resp = self.s3_client.get_object(Bucket=bucket, Key=key)
         content = resp['Body'].read()
         ret = utils.ensure_bytes(content)
         return(ret)
         
     except Exception as e:
         raise e
Esempio n. 22
0
def feed_json_file_array():
    files = []
    for i in range(3):
        f = tempfile.NamedTemporaryFile("w+b", prefix="file-{}-".format(i))
        b = ensure_bytes(json.dumps(sequence_data[i]))
        f.write(b)
        f.seek(0)
        files.append(f)

    return files
Esempio n. 23
0
 def _trigger_id(scanner, file, signature):
     """
     Trigger id is a string, but encoded as scanner name, signature, and m5hash of the file path (to keep size within reasonable bounds)
     :param scanner:
     :param file:
     :param signature:
     :return:
     """
     return '{}+{}+{}'.format(
         scanner, signature,
         ensure_str(hashlib.md5(ensure_bytes(file)).hexdigest()))
Esempio n. 24
0
def test_archive_basic():
    path = "testarc.tar.gz"
    with archiver.ImageArchive(path, mode="w") as arc:
        arc.image_digest = "sha256:1234567890abcdef"
        arc.account = "testaccount"
        arc.add_artifact(
            "analysis",
            archiver.ObjectStoreLocation(bucket="somebucket", key="somekey"),
            data=ensure_bytes(json.dumps({"somedata": "somekey"})),
            metadata={"size": 0},
        )

    with archiver.ImageArchive(path, mode="r") as arc:
        print(arc.manifest.artifacts)
        assert len(arc.manifest.artifacts) == 1
        s = arc.extract_artifact("analysis")
        print(s)
        assert s == ensure_bytes(json.dumps({"somedata": "somekey"}))

    os.remove(path)
Esempio n. 25
0
    def load_npms(self, analysis_json, containing_image):
        handled_pkgtypes = ['pkgs.npms']        
        npms_json = analysis_json.get('package_list', {}).get('pkgs.npms',{}).get('base')
        if not npms_json:
            return [], handled_pkgtypes

            
        npms = []
        image_packages = []
        for path, npm_str in list(npms_json.items()):
            npm_json = json.loads(npm_str)

            # TODO: remove this usage of ImageNPM, that is deprecated
            n = ImageNpm()
            n.path_hash = hashlib.sha256(ensure_bytes(path)).hexdigest()
            n.path = path
            n.name = npm_json.get('name')
            n.src_pkg = npm_json.get('src_pkg')
            n.origins_json = npm_json.get('origins')
            n.licenses_json = npm_json.get('lics')
            n.latest = npm_json.get('latest')
            n.versions_json = npm_json.get('versions')
            n.image_user_id = containing_image.user_id
            n.image_id = containing_image.id
            #npms.append(n)

            np = ImagePackage()
            # primary keys
            np.name = n.name
            if len(n.versions_json): 
                version = n.versions_json[0]
            else:
                version = "N/A"
            np.version = version
            np.pkg_type = 'npm'
            np.arch = 'N/A'
            np.image_user_id = n.image_user_id
            np.image_id = n.image_id
            np.pkg_path = n.path
            # other
            np.pkg_path_hash = n.path_hash
            np.distro_name = 'npm'
            np.distro_version = 'N/A'
            np.like_distro = 'npm'
            np.fullversion = np.version
            np.license = ' '.join(n.licenses_json)
            np.origin = ' '.join(n.origins_json)
            #np.metadata_json = pkg_json.get('metadata')
            fullname = np.name
            np.normalized_src_pkg = fullname
            np.src_pkg = fullname
            image_packages.append(np)

        return image_packages, handled_pkgtypes
Esempio n. 26
0
def test_archive_basic():
    path = 'testarc.tar.gz'
    with archiver.ImageArchive(path, mode='w') as arc:
        arc.image_digest = 'sha256:1234567890abcdef'
        arc.account = 'testaccount'
        arc.add_artifact('analysis',
                         archiver.ObjectStoreLocation(bucket='somebucket',
                                                      key='somekey'),
                         data=ensure_bytes(json.dumps({'somedata':
                                                       'somekey'})),
                         metadata={'size': 0})

    with archiver.ImageArchive(path, mode='r') as arc:
        print(arc.manifest.artifacts)
        assert len(arc.manifest.artifacts) == 1
        s = arc.extract_artifact('analysis')
        print(s)
        assert s == ensure_bytes(json.dumps({'somedata': 'somekey'}))

    os.remove(path)
Esempio n. 27
0
    def get_by_uri(self, uri):
        if not self.initialized:
            raise Exception("archive not initialized")

        try:
            path = self._parse_uri(uri)
            content = self._load_content(path)
            ret = utils.ensure_bytes(content)
            return (ret)
        except Exception as e:
            raise ObjectKeyNotFoundError(userId='', bucket='', key='', caused_by=e)
Esempio n. 28
0
    def get_feed_group_data(self, feed, group, since=None, next_token=None):
        """
        Extended implementation of parent type function that includes a limit to how fresh of data is allowed. Will
        return all records between 'since' date and 'newest_allowed' date unless newest_allowed is None in which case there
        is no forward limit.

        :param feed:
        :param group:
        :param since:
        :param next_token:
        :return:
        """
        if type(since) == datetime.datetime:
            since = since.isoformat()

        group_path = os.path.join(self.src_path, feed, group)
        data = []

        back_boundary = since
        forward_boundary = (
            self.newest_allowed.isoformat() if self.newest_allowed else None
        )
        logger.debug(
            "Getting data for {}/{} with back boundary {} and forward boundary {}".format(
                feed, group, back_boundary, forward_boundary
            )
        )
        for datafile_name in sorted(os.listdir(group_path)):
            if (not back_boundary or (datafile_name >= back_boundary)) and (
                not forward_boundary
                or (forward_boundary and datafile_name <= forward_boundary)
            ):
                logger.debug("Using data file {}".format(datafile_name))
                fpath = os.path.join(group_path, datafile_name)
                with open(fpath) as f:
                    content = json.load(f)
                    if isinstance(content, dict):
                        data.extend(content["data"])
                    else:
                        data.extend(content)
            else:
                logger.debug(
                    "Data file {} outside of bounds, skipping".format(datafile_name)
                )
                continue

        # Make it look like a single chunk of data from the API (e.g. next_token and data keys
        data = {"data": data, "next_token": ""}
        outdata = ensure_bytes(json.dumps(data))

        return GroupData(
            data=outdata, next_token=None, since=since, record_count=len(data["data"])
        )
Esempio n. 29
0
    def get_by_uri(self, uri: str) -> bytes:
        userId, bucket, key = self._parse_uri(uri)

        try:
            with db.session_scope() as dbsession:
                result = db_archivedocument.get(userId, bucket, key, session=dbsession)
            if result:
                return utils.ensure_bytes(self._decode(result))
            else:
                raise ObjectKeyNotFoundError(userId, bucket, key, caused_by=None)
        except Exception as err:
            logger.debug("cannot get data: exception - " + str(err))
            raise err
Esempio n. 30
0
    def put_document(self, userId, bucket, archiveId, data):
        """
        Write a json document to the object store

        :param userId:
        :param bucket:
        :param archiveId:
        :param data: a json serializable object (string, dict, list, etc)
        :return: str url for retrieval
        """
        payload = json.dumps({"document": data})

        return self.put(userId, bucket, archiveId, utils.ensure_bytes(payload))