예제 #1
0
    def post(self):
        data = request.json
        if not data or not data.get('file'):
            return Response(status=400)

        data['user_id'] = session['auth_user'].get('id', -1)

        name = data.get('name', 'Migration Upload {0}'.format(arrow.now().format('YYYY-MM-DD HH:mm')))

        job = Job.save({
            'name': name,
            'status': 'pending',
            'message': data
        })

        registry = Registry()
        topic_arn = registry.get('topics').get('topic').get('migrationupload')
        conn = boto.sns.connect_to_region(registry.get('region').get('region'))

        try:
            conn.publish(topic_arn, str(job.uuid), name)
        except BotoServerError as e:
            log.error('Cannot publish Job=%s to Topic=%s', job.uuid, topic_arn)
            log.exception(e)
            return Response(status=500)
        except AttributeError as e:
            log.error('Cannot publish Job=%s to Topic=%s "%s"', job.uuid, topic_arn, str(e))
            return Response(status=500)

        return Response(response=json.dumps({
            'notify_msg': {
                'title': 'Job Added',
                'message': 'Migration job has been added. Upload will commence shortly.',
                'type': 'success'
            }}), content_type='application/json', status=200)
예제 #2
0
    def get(self):
        registry = Registry()

        if request.path == '/{0}'.format(self._document['document']['url']):
            return redirect('{0}/'.format(request.path))

        (_, key_name) = request.path.split('/{0}'.format(self._document['document']['url']))
        if not key_name or '/' == key_name:
            key_name = '/index.html'  # todo must have a default start page for multipage

        tmp_dir = os.path.abspath(os.path.join('/tmp/multipage', self._document['document']['uuid']))
        if not os.path.exists(tmp_dir):
            os.makedirs(tmp_dir)

        file_path = os.path.abspath(os.path.join(tmp_dir, key_name[1:]))
        if os.path.exists(file_path):
            with open(file_path, 'r') as content:
                contents = content.read()
        else:
            key_name = '{0}{1}'.format(self._document['document']['uuid'], key_name)
            contents = S3.get_string(registry.get('files').get('bucket_name'), key_name)

            dir_name = os.path.dirname(file_path)
            if not os.path.exists(dir_name):
                os.makedirs(dir_name)

            with open(file_path, 'w') as write:
                write.write(contents)

        mimetype = mimetypes.guess_type(file_path)[0]
        return Response(response=contents, status=200, mimetype=mimetype)
예제 #3
0
    def get(self):
        registry = Registry()

        if request.path == '/{0}'.format(self._document['document']['url']):
            return redirect('{0}/'.format(request.path))

        (_, key_name) = request.path.split('/{0}'.format(
            self._document['document']['url']))
        if not key_name or '/' == key_name:
            key_name = '/index.html'  # todo must have a default start page for multipage

        tmp_dir = os.path.abspath(
            os.path.join('/tmp/multipage', self._document['document']['uuid']))
        if not os.path.exists(tmp_dir):
            os.makedirs(tmp_dir)

        file_path = os.path.abspath(os.path.join(tmp_dir, key_name[1:]))
        if os.path.exists(file_path):
            with open(file_path, 'r') as content:
                contents = content.read()
        else:
            key_name = '{0}{1}'.format(self._document['document']['uuid'],
                                       key_name)
            contents = S3.get_string(
                registry.get('files').get('bucket_name'), key_name)

            dir_name = os.path.dirname(file_path)
            if not os.path.exists(dir_name):
                os.makedirs(dir_name)

            with open(file_path, 'w') as write:
                write.write(contents)

        mimetype = mimetypes.guess_type(file_path)[0]
        return Response(response=contents, status=200, mimetype=mimetype)
예제 #4
0
def test_registry_region_cached(caplog):
    expected = 'test-region'
    caplog.setLevel(logging.DEBUG)
    with patch('hermes_cms.core.registry.open',
               mock_open(read_data='{"region": "%s"}' % (expected, )),
               create=True):
        registry = Registry()
        assert registry.get('region').get('region') == expected
예제 #5
0
class MultipageJob(Job):
    def __init__(self):
        self.registry = Registry(log=log)
        database_url = str(self.registry.get('database').get('database'))
        sqlhub.processConnection = connectionForURI(database_url)

    def do_work(self, message=None):
        """

        :type message: boto.sqs.message.Message | None
        :param message:
        :return:
        """

        if not message:
            return

        conn = boto.connect_s3()
        bucket = conn.get_bucket(self.registry.get('files').get('bucket_name'))
        contents = json.loads(message.get_body())

        job_id = str(contents['Message'])
        job = JobDB.selectBy(uuid=job_id).getOne(None)
        if not job:
            log.error('Cannot find job %s', job_id)
            raise InvalidJobError('Invalid Job ID: {0}'.format(job_id))

        job.set(status='running')
        message = job.message

        document = Document.selectBy(uuid=job.message['document']).getOne(None)
        if not document:
            message['reason'] = 'No Document exists'
            job.set(status='failed', message=message)
            raise FatalJobError('No Document Exists')

        record = Document.get_document(document)

        fp = StringIO(
            S3.get_string(
                self.registry.get('storage').get('bucket_name'),
                record['file']['key']))
        with zipfile.ZipFile(fp, 'r') as zip_handle:
            for name in zip_handle.namelist():
                if name.endswith('/'):
                    continue
                key_name = '{0}/{1}'.format(document.uuid, name)
                key = Key(bucket=bucket, name=key_name)
                key.content_type = mimetypes.guess_type(name)[0]
                key.set_contents_from_string(zip_handle.read(name))
                log.info('Uploaded %s', key_name)

        job.set(status='complete')
        if job.message.get('on_complete', {}).get('alter'):
            document.set(**job.message['on_complete']['alter'])

        log.info('Setting job=%s to complete', job_id)
예제 #6
0
    def post(self):
        """

        For specific parents

        {
            "documents": [{
                "parent_id": "uuid"
            }],
            "all_documents": false
        }

        {
            "documents": [],
            "all_documents": true
        }

        :rtype: flask.Response
        :return: A flask Response object
        """
        data = request.json

        if not data:
            return Response(status=400)

        if not ((data.get('document') and not data.get('all_documents')) or
                (not data.get('document') and data.get('all_documents'))):
            return Response(status=400)

        name = data.get('name', 'Migration Download {0}'.format(arrow.now().format('YYYY-MM-DD HH:mm')))

        job = Job.save({
            'name': name,
            'status': 'pending',
            'message': data
        })

        topic_arn = Registry().get('topics').get('topic').get('migrationdownload')
        conn = boto.sns.connect_to_region(Registry().get('region').get('region'))

        try:
            conn.publish(topic_arn, str(job.uuid), name)
        except BotoServerError as e:
            log.error('Cannot publish Job=%s to Topic=%s', job.uuid, topic_arn)
            log.exception(e)
            return Response(status=500)
        except AttributeError as e:
            log.error('Cannot publish Job=%s to Topic=%s "%s"', job.uuid, topic_arn, str(e))
            return Response(status=500)

        return Response(response=json.dumps({
            'notify_msg': {
                'title': 'Job Added',
                'message': 'Migration job has been added. Download will be ready shortly.',
                'type': 'success'
            }}), content_type='application/json', status=200)
예제 #7
0
    def __init__(self):
        self.registry = Registry(log=log)
        database_url = str(self.registry.get('database').get('database'))
        sqlhub.processConnection = connectionForURI(database_url)

        conn = boto.connect_s3()
        file_conn = boto.connect_s3()

        self.bucket = conn.get_bucket(self.registry.get('storage').get('bucket_name'))
        self.files_bucket = file_conn.get_bucket(self.registry.get('files').get('bucket_name'))
예제 #8
0
class MultipageJob(Job):

    def __init__(self):
        self.registry = Registry(log=log)
        database_url = str(self.registry.get('database').get('database'))
        sqlhub.processConnection = connectionForURI(database_url)

    def do_work(self, message=None):
        """

        :type message: boto.sqs.message.Message | None
        :param message:
        :return:
        """

        if not message:
            return

        conn = boto.connect_s3()
        bucket = conn.get_bucket(self.registry.get('files').get('bucket_name'))
        contents = json.loads(message.get_body())

        job_id = str(contents['Message'])
        job = JobDB.selectBy(uuid=job_id).getOne(None)
        if not job:
            log.error('Cannot find job %s', job_id)
            raise InvalidJobError('Invalid Job ID: {0}'.format(job_id))

        job.set(status='running')
        message = job.message

        document = Document.selectBy(uuid=job.message['document']).getOne(None)
        if not document:
            message['reason'] = 'No Document exists'
            job.set(status='failed', message=message)
            raise FatalJobError('No Document Exists')

        record = Document.get_document(document)

        fp = StringIO(S3.get_string(self.registry.get('storage').get('bucket_name'), record['file']['key']))
        with zipfile.ZipFile(fp, 'r') as zip_handle:
            for name in zip_handle.namelist():
                if name.endswith('/'):
                    continue
                key_name = '{0}/{1}'.format(document.uuid, name)
                key = Key(bucket=bucket, name=key_name)
                key.content_type = mimetypes.guess_type(name)[0]
                key.set_contents_from_string(zip_handle.read(name))
                log.info('Uploaded %s', key_name)

        job.set(status='complete')
        if job.message.get('on_complete', {}).get('alter'):
            document.set(**job.message['on_complete']['alter'])

        log.info('Setting job=%s to complete', job_id)
예제 #9
0
파일: admin.py 프로젝트: pmcilwaine/hermes
def sign_upload_url():
    registry = Registry()

    bucket = registry.get('storage')['bucket_name']
    signed_form = S3.generate_form(bucket, region=registry.get('region').get('region'))
    signed_form['file'] = {
        'bucket': bucket,
        'key': [item['value'] for item in signed_form['fields'] if item['name'] == 'key'].pop()
    }

    return Response(response=json.dumps(signed_form), content_type='application/json', status=201)
예제 #10
0
def test_registry_region():
    expected = 'my region'
    bucket_name = 'my-bucket'

    s3 = boto.connect_s3()
    bucket = s3.create_bucket(bucket_name)
    key = bucket.new_key('region')
    key.set_contents_from_string('{"region": "%s"}' % (expected, ))

    with patch('hermes_cms.core.registry.Registry._bucket_name', new_callable=PropertyMock) as mock_bucket_name:
        with patch.object(Registry, '_write_key'):
            mock_bucket_name.return_value = bucket_name
            with patch.object(Registry, '_read_cache') as mock_read_cache:
                mock_read_cache.return_value = None
                registry = Registry()
                assert registry.get('region').get('region') == expected
예제 #11
0
파일: runner.py 프로젝트: pmcilwaine/hermes
    def run(self):
        setup_logging()
        log = logging.getLogger('hermes_cms.service.runner')

        while True:
            try:
                config = Registry().get(self.config_file)
            # pylint: disable=broad-except
            except Exception as e:
                log.exception(e)

            module_name = config['jobs'][self.name]['module_name']
            class_name = config['jobs'][self.name]['class_name']

            mod = __import__(module_name, fromlist=[class_name])
            service_class = getattr(mod, class_name)

            job_class = service_class(self.name, self.region, config)

            seconds = int(config['jobs'][self.name]['frequency'])

            scheduler = BlockingScheduler()
            scheduler.add_job(job_class.do_action,
                              IntervalTrigger(seconds=seconds))
            log.info('Starting Scheduled job %s', self.name)
            scheduler.start()
예제 #12
0
파일: runner.py 프로젝트: pmcilwaine/hermes
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('action',
                        choices=['start', 'stop', 'restart'],
                        help='Start service action')
    parser.add_argument('-j', '--job', required=True)
    parser.add_argument('-c', '--config', required=True)
    args = parser.parse_args()

    sys.argv = (sys.argv[0], args.action)

    region = None
    while not region:
        try:
            region = Registry().get('region').get('region')
        except (TypeError, KeyError, S3ResponseError):
            time.sleep(5)

    app = DaemonApplication(args.job, region, args.config)
    daemon_runner = ServiceRunner(app)
    try:
        daemon_runner.do_action()
    except (daemon.runner.DaemonRunnerStartFailureError,
            daemon.runner.DaemonRunnerStopFailureError):
        pass

    return 0
예제 #13
0
def test_registry_region():
    expected = 'my region'
    bucket_name = 'my-bucket'

    s3 = boto.connect_s3()
    bucket = s3.create_bucket(bucket_name)
    key = bucket.new_key('region')
    key.set_contents_from_string('{"region": "%s"}' % (expected, ))

    with patch('hermes_cms.core.registry.Registry._bucket_name',
               new_callable=PropertyMock) as mock_bucket_name:
        with patch.object(Registry, '_write_key'):
            mock_bucket_name.return_value = bucket_name
            with patch.object(Registry, '_read_cache') as mock_read_cache:
                mock_read_cache.return_value = None
                registry = Registry()
                assert registry.get('region').get('region') == expected
예제 #14
0
    def post(self):
        document_data = request.json
        validation = DocumentValidation(data=document_data)
        if not validation.validate():
            return Response(response=json.dumps(
                {'fields': validation.errors()}),
                            status=400,
                            content_type='application/json')

        if 'validate' in request.args:
            return Response(response=json.dumps(document_data),
                            status=200,
                            content_type='application/json')

        # todo we should use Auth class to get this
        document_data['document']['user_id'] = session['auth_user'].get(
            'id', -1)
        document = DocumentDB.save(document_data)

        document_type = document_data['document']['type']
        helper_class = Registry().get('document').get(document_type, {}).get(
            'admin_helper', {})
        if helper_class:
            common.load_class(helper_class.get('document_module'),
                              helper_class.get('document_class'),
                              document).do_work()

        return Response(response=json.dumps({
            'notify_msg': {
                'title':
                'Document Modified'
                if document_data.get('id') else 'Document Added',
                'message':
                '{0} has been {1}'.format(
                    str(document.name).strip(),
                    'modified' if document_data.get('id') else 'added'),
                'type':
                'success'
            }
        }),
                        status=200,
                        content_type='application/json')
예제 #15
0
    def do_work(self):
        log = logging.getLogger('hermes_cms.helpers.multipage')

        updated_record = {}
        alter_record = {}
        if self.document.published:
            alter_record.update({
                'published': True
            })
            updated_record.update({
                'published': False
            })
            log.debug('Document: %s removing published status', str(self.document.uuid))

        # create job
        name = '{0} multipage job'.format(self.document.name)[0:254]
        job = Job.save({
            'name': name,
            'status': 'pending',
            'message': {
                'document': str(self.document.uuid),
                'on_complete': {
                    'alter': alter_record
                }
            }
        })

        log.info('Created Job for Document %s as JobID=%s', self.document.uuid, job.uuid)

        # push this to sns job topic
        topic_arn = Registry().get('topics').get('topic').get('multipage')
        conn = boto.sns.connect_to_region(Registry().get('region').get('region'))
        try:
            conn.publish(topic_arn, str(job.uuid), 'Multipage Subject')
        except BotoServerError as e:
            log.error('Cannot publish Job=%s to Topic=%s', str(job.uuid), topic_arn)
            log.exception(e)

        if updated_record:
            self.document.set(**updated_record)
예제 #16
0
    def put(self, document_id=None):
        document = DocumentDB.selectBy(uuid=document_id).getOne(None)
        if not document:
            return Response(status=404)

        document_data = DocumentDB.get_document(document)
        document_data['id'] = document.id

        document_data['document']['user_id'] = session['auth_user'].get('id', -1)
        document = DocumentDB.save(document_data)

        document_type = document_data['document']['type']
        helper_class = Registry().get('document').get(document_type, {}).get('admin_helper', {})
        print helper_class
        if helper_class:
            common.load_class(
                helper_class.get('document_module'),
                helper_class.get('document_class'),
                document
            ).do_work()

        return Response(response=json.dumps(document_data), status=200)
예제 #17
0
    def post(self):
        data = request.json
        if not data or not data.get('file'):
            return Response(status=400)

        data['user_id'] = session['auth_user'].get('id', -1)

        name = data.get(
            'name', 'Migration Upload {0}'.format(
                arrow.now().format('YYYY-MM-DD HH:mm')))

        job = Job.save({'name': name, 'status': 'pending', 'message': data})

        registry = Registry()
        topic_arn = registry.get('topics').get('topic').get('migrationupload')
        conn = boto.sns.connect_to_region(registry.get('region').get('region'))

        try:
            conn.publish(topic_arn, str(job.uuid), name)
        except BotoServerError as e:
            log.error('Cannot publish Job=%s to Topic=%s', job.uuid, topic_arn)
            log.exception(e)
            return Response(status=500)
        except AttributeError as e:
            log.error('Cannot publish Job=%s to Topic=%s "%s"', job.uuid,
                      topic_arn, str(e))
            return Response(status=500)

        return Response(response=json.dumps({
            'notify_msg': {
                'title': 'Job Added',
                'message':
                'Migration job has been added. Upload will commence shortly.',
                'type': 'success'
            }
        }),
                        content_type='application/json',
                        status=200)
예제 #18
0
파일: admin.py 프로젝트: pmcilwaine/hermes
def url_rules():
    rules = Registry().get('admin_rules').get('rules')

    for admin_rule in rules:
        module = common.load_module_class(admin_rule['module_name'], admin_rule['class_name'])
        view = module.as_view(str(admin_rule['name']))

        _url_rules = admin_rule.get('urls')
        if not _url_rules:
            _url_rules = [{
                'url': admin_rule['url'],
                'methods': admin_rule['methods']
            }]

        for rule in _url_rules:
            route.add_url_rule(rule['url'], view_func=view, methods=rule['methods'])
예제 #19
0
def create_app(app_name='hermes_cms', config_obj=None, blueprints=None):
    """

    :type app_name: str
    :param app_name:
    :type config_obj: object|None
    :param config_obj:
    :type blueprints: list|None
    :param blueprints:
    :return:
    """
    app = Flask(app_name)

    if config_obj:
        app.config.from_object(config_obj)
    else:
        # todo this needs to be in Configuration Registry
        app.secret_key = 'testing-key'

    blueprints = blueprints or Registry().get('blueprint').get('blueprint')

    for blueprint in blueprints:
        module = __import__(blueprint['name'], fromlist=blueprint['from'])
        route = getattr(module, blueprint['from'])
        if hasattr(module, 'url_rules'):
            module.url_rules()

        app.register_blueprint(route, **blueprint.get('kwargs', {}))

    def error_handler(error):
        log.exception(str(error))
        return Response(response=json.dumps({
            'notify_msg': {
                'title': 'Server Error',
                'message': 'An internal server error occurred.',
                'type': 'success'
            }
        }), content_type='application/json', status=500)

    app.register_error_handler(Exception, error_handler)
    app.before_request_funcs.setdefault(None, []).append(db_connect)
    app.after_request_funcs.setdefault(None, []).append(db_close)

    return app
예제 #20
0
class RegistryResolver(object):

    def __init__(self):
        self.registry = Registry()

    # pylint: disable=no-self-use
    def _get_dict(self, dict_src, path):
        """

        @param dict_src The dictionary source to use to find keys in
        @param path a path to find keys within object
        @return parsed dictionary
        """
        dict_dest = dict_src
        for item in path.split('.'):
            dict_dest = dict_dest[item]

        return dict_dest

    def _resolve_string(self, value):
        """

        :type value: basestring
        :param value: A string to be parsed from a registry.
        :return:
        """
        parsed_path = urlparse(value)
        if not parsed_path.scheme:
            return value

        full_path = parsed_path.path.lstrip('/')
        (bucket, path) = full_path.split('.', 1)
        resolved_config = self.registry.get(bucket)
        return self._get_dict(resolved_config, path)

    def resolver(self, value):
        """

        :param value:
        :return:
        """
        if isinstance(value, basestring):
            return self._resolve_string(value)
예제 #21
0
class RegistryResolver(object):
    def __init__(self):
        self.registry = Registry()

    # pylint: disable=no-self-use
    def _get_dict(self, dict_src, path):
        """

        @param dict_src The dictionary source to use to find keys in
        @param path a path to find keys within object
        @return parsed dictionary
        """
        dict_dest = dict_src
        for item in path.split('.'):
            dict_dest = dict_dest[item]

        return dict_dest

    def _resolve_string(self, value):
        """

        :type value: basestring
        :param value: A string to be parsed from a registry.
        :return:
        """
        parsed_path = urlparse(value)
        if not parsed_path.scheme:
            return value

        full_path = parsed_path.path.lstrip('/')
        (bucket, path) = full_path.split('.', 1)
        resolved_config = self.registry.get(bucket)
        return self._get_dict(resolved_config, path)

    def resolver(self, value):
        """

        :param value:
        :return:
        """
        if isinstance(value, basestring):
            return self._resolve_string(value)
예제 #22
0
def db_connect():
    database_url = current_app.config.get('DATABASE')
    if not database_url:
        database_url = str(Registry().get('database').get('database'))
    sqlhub.threadConnection = connectionForURI(database_url)
예제 #23
0
 def __init__(self):
     self.registry = Registry(log=log)
     database_url = str(self.registry.get('database').get('database'))
     sqlhub.processConnection = connectionForURI(database_url)
예제 #24
0
 def __init__(self):
     self.registry = Registry()
예제 #25
0
class MigrationUploadJob(Job):
    def __init__(self):
        self.registry = Registry(log=log)
        database_url = str(self.registry.get('database').get('database'))
        sqlhub.processConnection = connectionForURI(database_url)

        conn = boto.connect_s3()
        file_conn = boto.connect_s3()

        self.bucket = conn.get_bucket(
            self.registry.get('storage').get('bucket_name'))
        self.files_bucket = file_conn.get_bucket(
            self.registry.get('files').get('bucket_name'))

    @staticmethod
    def _get_manifest(handle):
        """

        :type handle: zipfile.ZipFile
        :param handle:
        :return:
        :raises Exception
        """
        return json.loads(handle.read('manifest'))

    @staticmethod
    def _validate_manifest(documents):
        lookup = {}

        for document in documents:
            if not (document.get('parent_uuid')
                    and document.get('parent_url')):
                lookup[document['uuid']] = document['url']
                continue

            if not lookup.get(document['parent_uuid']):
                parent_document = Document.selectBy(
                    url=document['parent_url']).getOne(None)
                if not parent_document:
                    return False

                lookup[document['uuid']] = document['url']

        return True

    @staticmethod
    def _get_document_from_archive(uuid, handle):
        """

        :param uuid:
        :type handle: zipfile.ZipFile
        :param handle:
        :return:
        """
        return json.loads(handle.read(uuid))

    # pylint: disable=no-self-use
    def _update_from_parent(self, contents, parent_url):
        document = Document.selectBy(url=parent_url,
                                     orderBy=DESC(Document.q.created),
                                     limit=1).getOne(None)
        contents['document']['parent'] = document.id
        contents['document']['path'] = document.path

    # pylint: disable=no-self-use
    def _save_document(self, user_id, contents):
        """

        :param user_id:
        :param contents:
        :return:
        :rtype: arrow.Arrow
        """
        created = arrow.get(contents['document']['created'])
        contents['document']['created'] = created.datetime
        contents['document']['user_id'] = user_id
        document = Document(**contents['document'])

        path = '{0}{1}/'.format(document.path, document.id)
        contents['document']['created'] = str(created)
        contents['document']['path'] = path

        return created

    def _upload_file(self, contents, handle):
        """

        :param contents:
        :type handle: zipfile.ZipFile
        :param handle:
        :return:
        """
        bucket_name = self.registry.get('storage').get('bucket_name')
        filename = contents['file']['key']
        contents['bucket'] = bucket_name

        key = Key(self.bucket, filename)
        key.set_contents_from_string(handle.read('file/{0}'.format(filename)))

    def _upload_multipage(self, contents, handle):
        """

        :type contents: dict
        :param contents:
        :type handle: zipfile.ZipFile
        :param handle:
        :return:
        """
        for item in handle.namelist():
            part = 'files/{0}/'.format(contents['document']['uuid'])
            if item.startswith(part):
                filename = item.split(part).pop()
                key = Key(
                    self.files_bucket,
                    '{0}/{1}'.format(contents['document']['uuid'], filename))
                key.content_type = mimetypes.guess_type(item)[0]
                key.set_contents_from_string(handle.read(item))

    def do_work(self, message=None):
        """

        ZipFile stored in storage. read from Job Message

        {
            "archive": "path/in/s3"
        }

        # documents order matter

        Manifest file structure
        {
            'documents': [
                {
                    'uuid': 'some-uuid',
                    'url': 'some-url',
                    'parent_url': 'some-parent-url',
                    'parent_uuid': 'some-parent-uuid'
                },
                ...
            ],
            'full': bool
        }

        :param message:
        :return:
        """
        if not message:
            return

        contents = json.loads(message.get_body())

        job_id = str(contents['Message'])
        job = JobDB.selectBy(uuid=job_id).getOne(None)
        if not job:
            log.error('Cannot find job %s', job_id)
            raise InvalidJobError('Invalid Job ID: {0}'.format(job_id))

        job.set(status='running')
        message = job.message

        # get archive file
        archive_key = self.bucket.get_key(job.message['file']['key'])
        if not archive_key:
            message['reason'] = 'Cannot find the archive in the S3 bucket.'
            job.set(status='failed', message=message)
            raise InvalidJobError('Cannot find archive in S3 bucket.')

        fp = StringIO(archive_key.get_contents_as_string())

        handle = zipfile.ZipFile(fp,
                                 mode='r',
                                 compression=zipfile.ZIP_DEFLATED)
        try:
            manifest_content = MigrationUploadJob._get_manifest(handle)
        except Exception:
            message['reason'] = 'Unable to retrieve manifest in archive'
            job.set(status='failed', message=message)
            raise InvalidJobError('Unable to retrieve manifest')

        if not MigrationUploadJob._validate_manifest(
                manifest_content['documents']):
            message['reason'] = 'Manifest found is not valid'
            job.set(status='failed', message=message)
            raise InvalidJobError('Manifest is not valid')

        for document in manifest_content['documents']:
            contents = MigrationUploadJob._get_document_from_archive(
                document['uuid'], handle)
            if document.get('parent_uuid') and document.get('parent_url'):
                self._update_from_parent(contents, document['parent_url'])

            created = self._save_document(job.message['user_id'], contents)
            if contents.get('file') and contents['document']['type'] == 'File':
                self._upload_file(contents, handle)
            elif contents.get(
                    'file') and contents['document']['type'] == 'MultiPage':
                self._upload_multipage(contents, handle)

            key_name = '{0}/{1}/{2}/{3}'.format(created.day, created.month,
                                                created.year,
                                                contents['document']['uuid'])
            key = Key(self.bucket, key_name)
            key.set_contents_from_string(json.dumps(contents))

        job.set(status='complete')
        log.info('Setting job=%s to complete', job_id)
예제 #26
0
class MigrationDownloadJob(Job):

    def __init__(self):
        self.registry = Registry(log=log)
        database_url = str(self.registry.get('database').get('database'))
        sqlhub.processConnection = connectionForURI(database_url)

        conn = boto.connect_s3()
        file_conn = boto.connect_s3()

        self.bucket = conn.get_bucket(self.registry.get('storage').get('bucket_name'))
        self.files_bucket = file_conn.get_bucket(self.registry.get('files').get('bucket_name'))

    # pylint: disable=no-self-use
    def _get_document_query(self, documents):
        uuids = []
        for item in documents:
            document = Document.selectBy(uuid=item['parent_id']).getOne(None)
            for doc in Document.select(LIKE(Document.q.path, '{0}%'.format(document.path))):
                uuids.append(doc.uuid)

        return IN(Document.q.uuid, uuids)

    def _handle_document(self, document, zip_handle):
        """

        :type document: hermes_cms.db.document.Document
        :param document:
        :type zip_handle: zipfile.ZipFile
        :param zip_handle:
        :return:
        """
        key_name = '{0}/{1}/{2}/{3}'.format(document.created.day, document.created.month,
                                            document.created.year, document.uuid)

        contents = self.bucket.get_key(key_name).get_contents_as_string()
        json_content = json.loads(contents)
        zip_handle.writestr(document.uuid, contents)

        if 'file' in json_content and document.type == 'File':
            file_contents = self.bucket.get_key(json_content['file']['key']).get_contents_as_string()
            zip_handle.writestr(json_content['file']['key'], file_contents)

        if 'MultiPage' == document.type:
            for item in self.files_bucket.list(document.uuid):
                zip_handle.writestr('files/{0}'.format(item.name), item.get_contents_as_string())

    # pylint: disable=no-self-use
    def _get_document_parent_url(self, parent):
        """

        :param parent:
        :return:
        :rtype: hermes_cms.db.document.Document | None
        """
        if not parent:
            return None

        return Document.select(Document.q.id == parent, orderBy=DESC(Document.q.created), limit=1).getOne(None)

    def do_work(self, message=None):
        """

        {
            "documents": [{
                "parent_id": "uuid"
            }],
            "all_documents": false
        }

        {
            "documents": [],
            "all_documents": true
        }

        uuid as filename
        {
            "document": {},
            "file": {},
        }

        full key name for file

        Manifest file structure
        {
            'documents': [
                {
                    'uuid': 'some-uuid',
                    'url': 'some-url',
                    'parent_url': 'some-parent-url',
                    'parent_uuid': 'some-parent-uuid'
                },
                ...
            ],
            'full': bool
        }

        :type message: boto.sqs.message.Message | None
        :param message:
        :return:
        """

        if not message:
            return

        contents = json.loads(message.get_body())

        job_id = str(contents['Message'])
        job = JobDB.selectBy(uuid=job_id).getOne(None)
        if not job:
            log.error('Cannot find job %s', job_id)
            raise InvalidJobError('Invalid Job ID: {0}'.format(job_id))

        job.set(status='running')

        and_ops = [Document.q.archived == False, Document.q.published == True]
        if not job.message.get('all_documents'):
            and_ops.append(self._get_document_query(job.message.get('document')))

        manifest = {
            'documents': [],
            'full': job.message.get('all_documents', False)
        }

        zip_contents = StringIO()
        zip_handle = zipfile.ZipFile(zip_contents, 'w', compression=zipfile.ZIP_DEFLATED)
        for document in Document.query(Document.all(), where=AND(*and_ops)):
            parent_document = self._get_document_parent_url(document.parent)
            manifest['documents'].append({
                'uuid': document.uuid,
                'url': document.url,
                'parent_url': None if not parent_document else parent_document.url,
                'parent_uuid': None if not parent_document else parent_document.uuid
            })

            self._handle_document(document, zip_handle)
            log.info('Adding document uuid=%s to zip archive', str(document.uuid))

        zip_handle.writestr('manifest', json.dumps(manifest))
        zip_handle.close()

        zip_key = Key(self.bucket, job_id)
        zip_key.content_type = 'application/zip'
        zip_key.set_contents_from_string(zip_contents.getvalue())
        log.info("Created ZIP for Job '%s'", str(job_id))

        message = job.message
        message['download'] = {
            'bucket': self.bucket.name,
            'key': job_id
        }
        job.set(status='complete', message=message)
        log.info('Setting job=%s to complete', job_id)
예제 #27
0
 def __init__(self):
     self.registry = Registry(log=log)
     database_url = str(self.registry.get('database').get('database'))
     sqlhub.processConnection = connectionForURI(database_url)
예제 #28
0
def test_registry_region_cached(caplog):
    expected = 'test-region'
    caplog.setLevel(logging.DEBUG)
    with patch('hermes_cms.core.registry.open', mock_open(read_data='{"region": "%s"}' % (expected, )), create=True):
        registry = Registry()
        assert registry.get('region').get('region') == expected
예제 #29
0
 def __init__(self):
     self.registry = Registry()
예제 #30
0
class MigrationUploadJob(Job):

    def __init__(self):
        self.registry = Registry(log=log)
        database_url = str(self.registry.get('database').get('database'))
        sqlhub.processConnection = connectionForURI(database_url)

        conn = boto.connect_s3()
        file_conn = boto.connect_s3()

        self.bucket = conn.get_bucket(self.registry.get('storage').get('bucket_name'))
        self.files_bucket = file_conn.get_bucket(self.registry.get('files').get('bucket_name'))

    @staticmethod
    def _get_manifest(handle):
        """

        :type handle: zipfile.ZipFile
        :param handle:
        :return:
        :raises Exception
        """
        return json.loads(handle.read('manifest'))

    @staticmethod
    def _validate_manifest(documents):
        lookup = {}

        for document in documents:
            if not (document.get('parent_uuid') and document.get('parent_url')):
                lookup[document['uuid']] = document['url']
                continue

            if not lookup.get(document['parent_uuid']):
                parent_document = Document.selectBy(url=document['parent_url']).getOne(None)
                if not parent_document:
                    return False

                lookup[document['uuid']] = document['url']

        return True

    @staticmethod
    def _get_document_from_archive(uuid, handle):
        """

        :param uuid:
        :type handle: zipfile.ZipFile
        :param handle:
        :return:
        """
        return json.loads(handle.read(uuid))

    # pylint: disable=no-self-use
    def _update_from_parent(self, contents, parent_url):
        document = Document.selectBy(url=parent_url, orderBy=DESC(Document.q.created), limit=1).getOne(None)
        contents['document']['parent'] = document.id
        contents['document']['path'] = document.path

    # pylint: disable=no-self-use
    def _save_document(self, user_id, contents):
        """

        :param user_id:
        :param contents:
        :return:
        :rtype: arrow.Arrow
        """
        created = arrow.get(contents['document']['created'])
        contents['document']['created'] = created.datetime
        contents['document']['user_id'] = user_id
        document = Document(**contents['document'])

        path = '{0}{1}/'.format(document.path, document.id)
        contents['document']['created'] = str(created)
        contents['document']['path'] = path

        return created

    def _upload_file(self, contents, handle):
        """

        :param contents:
        :type handle: zipfile.ZipFile
        :param handle:
        :return:
        """
        bucket_name = self.registry.get('storage').get('bucket_name')
        filename = contents['file']['key']
        contents['bucket'] = bucket_name

        key = Key(self.bucket, filename)
        key.set_contents_from_string(handle.read('file/{0}'.format(filename)))

    def _upload_multipage(self, contents, handle):
        """

        :type contents: dict
        :param contents:
        :type handle: zipfile.ZipFile
        :param handle:
        :return:
        """
        for item in handle.namelist():
            part = 'files/{0}/'.format(contents['document']['uuid'])
            if item.startswith(part):
                filename = item.split(part).pop()
                key = Key(self.files_bucket, '{0}/{1}'.format(contents['document']['uuid'], filename))
                key.content_type = mimetypes.guess_type(item)[0]
                key.set_contents_from_string(handle.read(item))

    def do_work(self, message=None):
        """

        ZipFile stored in storage. read from Job Message

        {
            "archive": "path/in/s3"
        }

        # documents order matter

        Manifest file structure
        {
            'documents': [
                {
                    'uuid': 'some-uuid',
                    'url': 'some-url',
                    'parent_url': 'some-parent-url',
                    'parent_uuid': 'some-parent-uuid'
                },
                ...
            ],
            'full': bool
        }

        :param message:
        :return:
        """
        if not message:
            return

        contents = json.loads(message.get_body())

        job_id = str(contents['Message'])
        job = JobDB.selectBy(uuid=job_id).getOne(None)
        if not job:
            log.error('Cannot find job %s', job_id)
            raise InvalidJobError('Invalid Job ID: {0}'.format(job_id))

        job.set(status='running')
        message = job.message

        # get archive file
        archive_key = self.bucket.get_key(job.message['file']['key'])
        if not archive_key:
            message['reason'] = 'Cannot find the archive in the S3 bucket.'
            job.set(status='failed', message=message)
            raise InvalidJobError('Cannot find archive in S3 bucket.')

        fp = StringIO(archive_key.get_contents_as_string())

        handle = zipfile.ZipFile(fp, mode='r', compression=zipfile.ZIP_DEFLATED)
        try:
            manifest_content = MigrationUploadJob._get_manifest(handle)
        except Exception:
            message['reason'] = 'Unable to retrieve manifest in archive'
            job.set(status='failed', message=message)
            raise InvalidJobError('Unable to retrieve manifest')

        if not MigrationUploadJob._validate_manifest(manifest_content['documents']):
            message['reason'] = 'Manifest found is not valid'
            job.set(status='failed', message=message)
            raise InvalidJobError('Manifest is not valid')

        for document in manifest_content['documents']:
            contents = MigrationUploadJob._get_document_from_archive(document['uuid'], handle)
            if document.get('parent_uuid') and document.get('parent_url'):
                self._update_from_parent(contents, document['parent_url'])

            created = self._save_document(job.message['user_id'], contents)
            if contents.get('file') and contents['document']['type'] == 'File':
                self._upload_file(contents, handle)
            elif contents.get('file') and contents['document']['type'] == 'MultiPage':
                self._upload_multipage(contents, handle)

            key_name = '{0}/{1}/{2}/{3}'.format(created.day, created.month, created.year, contents['document']['uuid'])
            key = Key(self.bucket, key_name)
            key.set_contents_from_string(json.dumps(contents))

        job.set(status='complete')
        log.info('Setting job=%s to complete', job_id)