def post(self): data = request.json if not data or not data.get('file'): return Response(status=400) data['user_id'] = session['auth_user'].get('id', -1) name = data.get('name', 'Migration Upload {0}'.format(arrow.now().format('YYYY-MM-DD HH:mm'))) job = Job.save({ 'name': name, 'status': 'pending', 'message': data }) registry = Registry() topic_arn = registry.get('topics').get('topic').get('migrationupload') conn = boto.sns.connect_to_region(registry.get('region').get('region')) try: conn.publish(topic_arn, str(job.uuid), name) except BotoServerError as e: log.error('Cannot publish Job=%s to Topic=%s', job.uuid, topic_arn) log.exception(e) return Response(status=500) except AttributeError as e: log.error('Cannot publish Job=%s to Topic=%s "%s"', job.uuid, topic_arn, str(e)) return Response(status=500) return Response(response=json.dumps({ 'notify_msg': { 'title': 'Job Added', 'message': 'Migration job has been added. Upload will commence shortly.', 'type': 'success' }}), content_type='application/json', status=200)
def get(self): registry = Registry() if request.path == '/{0}'.format(self._document['document']['url']): return redirect('{0}/'.format(request.path)) (_, key_name) = request.path.split('/{0}'.format(self._document['document']['url'])) if not key_name or '/' == key_name: key_name = '/index.html' # todo must have a default start page for multipage tmp_dir = os.path.abspath(os.path.join('/tmp/multipage', self._document['document']['uuid'])) if not os.path.exists(tmp_dir): os.makedirs(tmp_dir) file_path = os.path.abspath(os.path.join(tmp_dir, key_name[1:])) if os.path.exists(file_path): with open(file_path, 'r') as content: contents = content.read() else: key_name = '{0}{1}'.format(self._document['document']['uuid'], key_name) contents = S3.get_string(registry.get('files').get('bucket_name'), key_name) dir_name = os.path.dirname(file_path) if not os.path.exists(dir_name): os.makedirs(dir_name) with open(file_path, 'w') as write: write.write(contents) mimetype = mimetypes.guess_type(file_path)[0] return Response(response=contents, status=200, mimetype=mimetype)
def get(self): registry = Registry() if request.path == '/{0}'.format(self._document['document']['url']): return redirect('{0}/'.format(request.path)) (_, key_name) = request.path.split('/{0}'.format( self._document['document']['url'])) if not key_name or '/' == key_name: key_name = '/index.html' # todo must have a default start page for multipage tmp_dir = os.path.abspath( os.path.join('/tmp/multipage', self._document['document']['uuid'])) if not os.path.exists(tmp_dir): os.makedirs(tmp_dir) file_path = os.path.abspath(os.path.join(tmp_dir, key_name[1:])) if os.path.exists(file_path): with open(file_path, 'r') as content: contents = content.read() else: key_name = '{0}{1}'.format(self._document['document']['uuid'], key_name) contents = S3.get_string( registry.get('files').get('bucket_name'), key_name) dir_name = os.path.dirname(file_path) if not os.path.exists(dir_name): os.makedirs(dir_name) with open(file_path, 'w') as write: write.write(contents) mimetype = mimetypes.guess_type(file_path)[0] return Response(response=contents, status=200, mimetype=mimetype)
def test_registry_region_cached(caplog): expected = 'test-region' caplog.setLevel(logging.DEBUG) with patch('hermes_cms.core.registry.open', mock_open(read_data='{"region": "%s"}' % (expected, )), create=True): registry = Registry() assert registry.get('region').get('region') == expected
class MultipageJob(Job): def __init__(self): self.registry = Registry(log=log) database_url = str(self.registry.get('database').get('database')) sqlhub.processConnection = connectionForURI(database_url) def do_work(self, message=None): """ :type message: boto.sqs.message.Message | None :param message: :return: """ if not message: return conn = boto.connect_s3() bucket = conn.get_bucket(self.registry.get('files').get('bucket_name')) contents = json.loads(message.get_body()) job_id = str(contents['Message']) job = JobDB.selectBy(uuid=job_id).getOne(None) if not job: log.error('Cannot find job %s', job_id) raise InvalidJobError('Invalid Job ID: {0}'.format(job_id)) job.set(status='running') message = job.message document = Document.selectBy(uuid=job.message['document']).getOne(None) if not document: message['reason'] = 'No Document exists' job.set(status='failed', message=message) raise FatalJobError('No Document Exists') record = Document.get_document(document) fp = StringIO( S3.get_string( self.registry.get('storage').get('bucket_name'), record['file']['key'])) with zipfile.ZipFile(fp, 'r') as zip_handle: for name in zip_handle.namelist(): if name.endswith('/'): continue key_name = '{0}/{1}'.format(document.uuid, name) key = Key(bucket=bucket, name=key_name) key.content_type = mimetypes.guess_type(name)[0] key.set_contents_from_string(zip_handle.read(name)) log.info('Uploaded %s', key_name) job.set(status='complete') if job.message.get('on_complete', {}).get('alter'): document.set(**job.message['on_complete']['alter']) log.info('Setting job=%s to complete', job_id)
def post(self): """ For specific parents { "documents": [{ "parent_id": "uuid" }], "all_documents": false } { "documents": [], "all_documents": true } :rtype: flask.Response :return: A flask Response object """ data = request.json if not data: return Response(status=400) if not ((data.get('document') and not data.get('all_documents')) or (not data.get('document') and data.get('all_documents'))): return Response(status=400) name = data.get('name', 'Migration Download {0}'.format(arrow.now().format('YYYY-MM-DD HH:mm'))) job = Job.save({ 'name': name, 'status': 'pending', 'message': data }) topic_arn = Registry().get('topics').get('topic').get('migrationdownload') conn = boto.sns.connect_to_region(Registry().get('region').get('region')) try: conn.publish(topic_arn, str(job.uuid), name) except BotoServerError as e: log.error('Cannot publish Job=%s to Topic=%s', job.uuid, topic_arn) log.exception(e) return Response(status=500) except AttributeError as e: log.error('Cannot publish Job=%s to Topic=%s "%s"', job.uuid, topic_arn, str(e)) return Response(status=500) return Response(response=json.dumps({ 'notify_msg': { 'title': 'Job Added', 'message': 'Migration job has been added. Download will be ready shortly.', 'type': 'success' }}), content_type='application/json', status=200)
def __init__(self): self.registry = Registry(log=log) database_url = str(self.registry.get('database').get('database')) sqlhub.processConnection = connectionForURI(database_url) conn = boto.connect_s3() file_conn = boto.connect_s3() self.bucket = conn.get_bucket(self.registry.get('storage').get('bucket_name')) self.files_bucket = file_conn.get_bucket(self.registry.get('files').get('bucket_name'))
class MultipageJob(Job): def __init__(self): self.registry = Registry(log=log) database_url = str(self.registry.get('database').get('database')) sqlhub.processConnection = connectionForURI(database_url) def do_work(self, message=None): """ :type message: boto.sqs.message.Message | None :param message: :return: """ if not message: return conn = boto.connect_s3() bucket = conn.get_bucket(self.registry.get('files').get('bucket_name')) contents = json.loads(message.get_body()) job_id = str(contents['Message']) job = JobDB.selectBy(uuid=job_id).getOne(None) if not job: log.error('Cannot find job %s', job_id) raise InvalidJobError('Invalid Job ID: {0}'.format(job_id)) job.set(status='running') message = job.message document = Document.selectBy(uuid=job.message['document']).getOne(None) if not document: message['reason'] = 'No Document exists' job.set(status='failed', message=message) raise FatalJobError('No Document Exists') record = Document.get_document(document) fp = StringIO(S3.get_string(self.registry.get('storage').get('bucket_name'), record['file']['key'])) with zipfile.ZipFile(fp, 'r') as zip_handle: for name in zip_handle.namelist(): if name.endswith('/'): continue key_name = '{0}/{1}'.format(document.uuid, name) key = Key(bucket=bucket, name=key_name) key.content_type = mimetypes.guess_type(name)[0] key.set_contents_from_string(zip_handle.read(name)) log.info('Uploaded %s', key_name) job.set(status='complete') if job.message.get('on_complete', {}).get('alter'): document.set(**job.message['on_complete']['alter']) log.info('Setting job=%s to complete', job_id)
def sign_upload_url(): registry = Registry() bucket = registry.get('storage')['bucket_name'] signed_form = S3.generate_form(bucket, region=registry.get('region').get('region')) signed_form['file'] = { 'bucket': bucket, 'key': [item['value'] for item in signed_form['fields'] if item['name'] == 'key'].pop() } return Response(response=json.dumps(signed_form), content_type='application/json', status=201)
def test_registry_region(): expected = 'my region' bucket_name = 'my-bucket' s3 = boto.connect_s3() bucket = s3.create_bucket(bucket_name) key = bucket.new_key('region') key.set_contents_from_string('{"region": "%s"}' % (expected, )) with patch('hermes_cms.core.registry.Registry._bucket_name', new_callable=PropertyMock) as mock_bucket_name: with patch.object(Registry, '_write_key'): mock_bucket_name.return_value = bucket_name with patch.object(Registry, '_read_cache') as mock_read_cache: mock_read_cache.return_value = None registry = Registry() assert registry.get('region').get('region') == expected
def run(self): setup_logging() log = logging.getLogger('hermes_cms.service.runner') while True: try: config = Registry().get(self.config_file) # pylint: disable=broad-except except Exception as e: log.exception(e) module_name = config['jobs'][self.name]['module_name'] class_name = config['jobs'][self.name]['class_name'] mod = __import__(module_name, fromlist=[class_name]) service_class = getattr(mod, class_name) job_class = service_class(self.name, self.region, config) seconds = int(config['jobs'][self.name]['frequency']) scheduler = BlockingScheduler() scheduler.add_job(job_class.do_action, IntervalTrigger(seconds=seconds)) log.info('Starting Scheduled job %s', self.name) scheduler.start()
def main(): parser = argparse.ArgumentParser() parser.add_argument('action', choices=['start', 'stop', 'restart'], help='Start service action') parser.add_argument('-j', '--job', required=True) parser.add_argument('-c', '--config', required=True) args = parser.parse_args() sys.argv = (sys.argv[0], args.action) region = None while not region: try: region = Registry().get('region').get('region') except (TypeError, KeyError, S3ResponseError): time.sleep(5) app = DaemonApplication(args.job, region, args.config) daemon_runner = ServiceRunner(app) try: daemon_runner.do_action() except (daemon.runner.DaemonRunnerStartFailureError, daemon.runner.DaemonRunnerStopFailureError): pass return 0
def post(self): document_data = request.json validation = DocumentValidation(data=document_data) if not validation.validate(): return Response(response=json.dumps( {'fields': validation.errors()}), status=400, content_type='application/json') if 'validate' in request.args: return Response(response=json.dumps(document_data), status=200, content_type='application/json') # todo we should use Auth class to get this document_data['document']['user_id'] = session['auth_user'].get( 'id', -1) document = DocumentDB.save(document_data) document_type = document_data['document']['type'] helper_class = Registry().get('document').get(document_type, {}).get( 'admin_helper', {}) if helper_class: common.load_class(helper_class.get('document_module'), helper_class.get('document_class'), document).do_work() return Response(response=json.dumps({ 'notify_msg': { 'title': 'Document Modified' if document_data.get('id') else 'Document Added', 'message': '{0} has been {1}'.format( str(document.name).strip(), 'modified' if document_data.get('id') else 'added'), 'type': 'success' } }), status=200, content_type='application/json')
def do_work(self): log = logging.getLogger('hermes_cms.helpers.multipage') updated_record = {} alter_record = {} if self.document.published: alter_record.update({ 'published': True }) updated_record.update({ 'published': False }) log.debug('Document: %s removing published status', str(self.document.uuid)) # create job name = '{0} multipage job'.format(self.document.name)[0:254] job = Job.save({ 'name': name, 'status': 'pending', 'message': { 'document': str(self.document.uuid), 'on_complete': { 'alter': alter_record } } }) log.info('Created Job for Document %s as JobID=%s', self.document.uuid, job.uuid) # push this to sns job topic topic_arn = Registry().get('topics').get('topic').get('multipage') conn = boto.sns.connect_to_region(Registry().get('region').get('region')) try: conn.publish(topic_arn, str(job.uuid), 'Multipage Subject') except BotoServerError as e: log.error('Cannot publish Job=%s to Topic=%s', str(job.uuid), topic_arn) log.exception(e) if updated_record: self.document.set(**updated_record)
def put(self, document_id=None): document = DocumentDB.selectBy(uuid=document_id).getOne(None) if not document: return Response(status=404) document_data = DocumentDB.get_document(document) document_data['id'] = document.id document_data['document']['user_id'] = session['auth_user'].get('id', -1) document = DocumentDB.save(document_data) document_type = document_data['document']['type'] helper_class = Registry().get('document').get(document_type, {}).get('admin_helper', {}) print helper_class if helper_class: common.load_class( helper_class.get('document_module'), helper_class.get('document_class'), document ).do_work() return Response(response=json.dumps(document_data), status=200)
def post(self): data = request.json if not data or not data.get('file'): return Response(status=400) data['user_id'] = session['auth_user'].get('id', -1) name = data.get( 'name', 'Migration Upload {0}'.format( arrow.now().format('YYYY-MM-DD HH:mm'))) job = Job.save({'name': name, 'status': 'pending', 'message': data}) registry = Registry() topic_arn = registry.get('topics').get('topic').get('migrationupload') conn = boto.sns.connect_to_region(registry.get('region').get('region')) try: conn.publish(topic_arn, str(job.uuid), name) except BotoServerError as e: log.error('Cannot publish Job=%s to Topic=%s', job.uuid, topic_arn) log.exception(e) return Response(status=500) except AttributeError as e: log.error('Cannot publish Job=%s to Topic=%s "%s"', job.uuid, topic_arn, str(e)) return Response(status=500) return Response(response=json.dumps({ 'notify_msg': { 'title': 'Job Added', 'message': 'Migration job has been added. Upload will commence shortly.', 'type': 'success' } }), content_type='application/json', status=200)
def url_rules(): rules = Registry().get('admin_rules').get('rules') for admin_rule in rules: module = common.load_module_class(admin_rule['module_name'], admin_rule['class_name']) view = module.as_view(str(admin_rule['name'])) _url_rules = admin_rule.get('urls') if not _url_rules: _url_rules = [{ 'url': admin_rule['url'], 'methods': admin_rule['methods'] }] for rule in _url_rules: route.add_url_rule(rule['url'], view_func=view, methods=rule['methods'])
def create_app(app_name='hermes_cms', config_obj=None, blueprints=None): """ :type app_name: str :param app_name: :type config_obj: object|None :param config_obj: :type blueprints: list|None :param blueprints: :return: """ app = Flask(app_name) if config_obj: app.config.from_object(config_obj) else: # todo this needs to be in Configuration Registry app.secret_key = 'testing-key' blueprints = blueprints or Registry().get('blueprint').get('blueprint') for blueprint in blueprints: module = __import__(blueprint['name'], fromlist=blueprint['from']) route = getattr(module, blueprint['from']) if hasattr(module, 'url_rules'): module.url_rules() app.register_blueprint(route, **blueprint.get('kwargs', {})) def error_handler(error): log.exception(str(error)) return Response(response=json.dumps({ 'notify_msg': { 'title': 'Server Error', 'message': 'An internal server error occurred.', 'type': 'success' } }), content_type='application/json', status=500) app.register_error_handler(Exception, error_handler) app.before_request_funcs.setdefault(None, []).append(db_connect) app.after_request_funcs.setdefault(None, []).append(db_close) return app
class RegistryResolver(object): def __init__(self): self.registry = Registry() # pylint: disable=no-self-use def _get_dict(self, dict_src, path): """ @param dict_src The dictionary source to use to find keys in @param path a path to find keys within object @return parsed dictionary """ dict_dest = dict_src for item in path.split('.'): dict_dest = dict_dest[item] return dict_dest def _resolve_string(self, value): """ :type value: basestring :param value: A string to be parsed from a registry. :return: """ parsed_path = urlparse(value) if not parsed_path.scheme: return value full_path = parsed_path.path.lstrip('/') (bucket, path) = full_path.split('.', 1) resolved_config = self.registry.get(bucket) return self._get_dict(resolved_config, path) def resolver(self, value): """ :param value: :return: """ if isinstance(value, basestring): return self._resolve_string(value)
def db_connect(): database_url = current_app.config.get('DATABASE') if not database_url: database_url = str(Registry().get('database').get('database')) sqlhub.threadConnection = connectionForURI(database_url)
def __init__(self): self.registry = Registry(log=log) database_url = str(self.registry.get('database').get('database')) sqlhub.processConnection = connectionForURI(database_url)
def __init__(self): self.registry = Registry()
class MigrationUploadJob(Job): def __init__(self): self.registry = Registry(log=log) database_url = str(self.registry.get('database').get('database')) sqlhub.processConnection = connectionForURI(database_url) conn = boto.connect_s3() file_conn = boto.connect_s3() self.bucket = conn.get_bucket( self.registry.get('storage').get('bucket_name')) self.files_bucket = file_conn.get_bucket( self.registry.get('files').get('bucket_name')) @staticmethod def _get_manifest(handle): """ :type handle: zipfile.ZipFile :param handle: :return: :raises Exception """ return json.loads(handle.read('manifest')) @staticmethod def _validate_manifest(documents): lookup = {} for document in documents: if not (document.get('parent_uuid') and document.get('parent_url')): lookup[document['uuid']] = document['url'] continue if not lookup.get(document['parent_uuid']): parent_document = Document.selectBy( url=document['parent_url']).getOne(None) if not parent_document: return False lookup[document['uuid']] = document['url'] return True @staticmethod def _get_document_from_archive(uuid, handle): """ :param uuid: :type handle: zipfile.ZipFile :param handle: :return: """ return json.loads(handle.read(uuid)) # pylint: disable=no-self-use def _update_from_parent(self, contents, parent_url): document = Document.selectBy(url=parent_url, orderBy=DESC(Document.q.created), limit=1).getOne(None) contents['document']['parent'] = document.id contents['document']['path'] = document.path # pylint: disable=no-self-use def _save_document(self, user_id, contents): """ :param user_id: :param contents: :return: :rtype: arrow.Arrow """ created = arrow.get(contents['document']['created']) contents['document']['created'] = created.datetime contents['document']['user_id'] = user_id document = Document(**contents['document']) path = '{0}{1}/'.format(document.path, document.id) contents['document']['created'] = str(created) contents['document']['path'] = path return created def _upload_file(self, contents, handle): """ :param contents: :type handle: zipfile.ZipFile :param handle: :return: """ bucket_name = self.registry.get('storage').get('bucket_name') filename = contents['file']['key'] contents['bucket'] = bucket_name key = Key(self.bucket, filename) key.set_contents_from_string(handle.read('file/{0}'.format(filename))) def _upload_multipage(self, contents, handle): """ :type contents: dict :param contents: :type handle: zipfile.ZipFile :param handle: :return: """ for item in handle.namelist(): part = 'files/{0}/'.format(contents['document']['uuid']) if item.startswith(part): filename = item.split(part).pop() key = Key( self.files_bucket, '{0}/{1}'.format(contents['document']['uuid'], filename)) key.content_type = mimetypes.guess_type(item)[0] key.set_contents_from_string(handle.read(item)) def do_work(self, message=None): """ ZipFile stored in storage. read from Job Message { "archive": "path/in/s3" } # documents order matter Manifest file structure { 'documents': [ { 'uuid': 'some-uuid', 'url': 'some-url', 'parent_url': 'some-parent-url', 'parent_uuid': 'some-parent-uuid' }, ... ], 'full': bool } :param message: :return: """ if not message: return contents = json.loads(message.get_body()) job_id = str(contents['Message']) job = JobDB.selectBy(uuid=job_id).getOne(None) if not job: log.error('Cannot find job %s', job_id) raise InvalidJobError('Invalid Job ID: {0}'.format(job_id)) job.set(status='running') message = job.message # get archive file archive_key = self.bucket.get_key(job.message['file']['key']) if not archive_key: message['reason'] = 'Cannot find the archive in the S3 bucket.' job.set(status='failed', message=message) raise InvalidJobError('Cannot find archive in S3 bucket.') fp = StringIO(archive_key.get_contents_as_string()) handle = zipfile.ZipFile(fp, mode='r', compression=zipfile.ZIP_DEFLATED) try: manifest_content = MigrationUploadJob._get_manifest(handle) except Exception: message['reason'] = 'Unable to retrieve manifest in archive' job.set(status='failed', message=message) raise InvalidJobError('Unable to retrieve manifest') if not MigrationUploadJob._validate_manifest( manifest_content['documents']): message['reason'] = 'Manifest found is not valid' job.set(status='failed', message=message) raise InvalidJobError('Manifest is not valid') for document in manifest_content['documents']: contents = MigrationUploadJob._get_document_from_archive( document['uuid'], handle) if document.get('parent_uuid') and document.get('parent_url'): self._update_from_parent(contents, document['parent_url']) created = self._save_document(job.message['user_id'], contents) if contents.get('file') and contents['document']['type'] == 'File': self._upload_file(contents, handle) elif contents.get( 'file') and contents['document']['type'] == 'MultiPage': self._upload_multipage(contents, handle) key_name = '{0}/{1}/{2}/{3}'.format(created.day, created.month, created.year, contents['document']['uuid']) key = Key(self.bucket, key_name) key.set_contents_from_string(json.dumps(contents)) job.set(status='complete') log.info('Setting job=%s to complete', job_id)
class MigrationDownloadJob(Job): def __init__(self): self.registry = Registry(log=log) database_url = str(self.registry.get('database').get('database')) sqlhub.processConnection = connectionForURI(database_url) conn = boto.connect_s3() file_conn = boto.connect_s3() self.bucket = conn.get_bucket(self.registry.get('storage').get('bucket_name')) self.files_bucket = file_conn.get_bucket(self.registry.get('files').get('bucket_name')) # pylint: disable=no-self-use def _get_document_query(self, documents): uuids = [] for item in documents: document = Document.selectBy(uuid=item['parent_id']).getOne(None) for doc in Document.select(LIKE(Document.q.path, '{0}%'.format(document.path))): uuids.append(doc.uuid) return IN(Document.q.uuid, uuids) def _handle_document(self, document, zip_handle): """ :type document: hermes_cms.db.document.Document :param document: :type zip_handle: zipfile.ZipFile :param zip_handle: :return: """ key_name = '{0}/{1}/{2}/{3}'.format(document.created.day, document.created.month, document.created.year, document.uuid) contents = self.bucket.get_key(key_name).get_contents_as_string() json_content = json.loads(contents) zip_handle.writestr(document.uuid, contents) if 'file' in json_content and document.type == 'File': file_contents = self.bucket.get_key(json_content['file']['key']).get_contents_as_string() zip_handle.writestr(json_content['file']['key'], file_contents) if 'MultiPage' == document.type: for item in self.files_bucket.list(document.uuid): zip_handle.writestr('files/{0}'.format(item.name), item.get_contents_as_string()) # pylint: disable=no-self-use def _get_document_parent_url(self, parent): """ :param parent: :return: :rtype: hermes_cms.db.document.Document | None """ if not parent: return None return Document.select(Document.q.id == parent, orderBy=DESC(Document.q.created), limit=1).getOne(None) def do_work(self, message=None): """ { "documents": [{ "parent_id": "uuid" }], "all_documents": false } { "documents": [], "all_documents": true } uuid as filename { "document": {}, "file": {}, } full key name for file Manifest file structure { 'documents': [ { 'uuid': 'some-uuid', 'url': 'some-url', 'parent_url': 'some-parent-url', 'parent_uuid': 'some-parent-uuid' }, ... ], 'full': bool } :type message: boto.sqs.message.Message | None :param message: :return: """ if not message: return contents = json.loads(message.get_body()) job_id = str(contents['Message']) job = JobDB.selectBy(uuid=job_id).getOne(None) if not job: log.error('Cannot find job %s', job_id) raise InvalidJobError('Invalid Job ID: {0}'.format(job_id)) job.set(status='running') and_ops = [Document.q.archived == False, Document.q.published == True] if not job.message.get('all_documents'): and_ops.append(self._get_document_query(job.message.get('document'))) manifest = { 'documents': [], 'full': job.message.get('all_documents', False) } zip_contents = StringIO() zip_handle = zipfile.ZipFile(zip_contents, 'w', compression=zipfile.ZIP_DEFLATED) for document in Document.query(Document.all(), where=AND(*and_ops)): parent_document = self._get_document_parent_url(document.parent) manifest['documents'].append({ 'uuid': document.uuid, 'url': document.url, 'parent_url': None if not parent_document else parent_document.url, 'parent_uuid': None if not parent_document else parent_document.uuid }) self._handle_document(document, zip_handle) log.info('Adding document uuid=%s to zip archive', str(document.uuid)) zip_handle.writestr('manifest', json.dumps(manifest)) zip_handle.close() zip_key = Key(self.bucket, job_id) zip_key.content_type = 'application/zip' zip_key.set_contents_from_string(zip_contents.getvalue()) log.info("Created ZIP for Job '%s'", str(job_id)) message = job.message message['download'] = { 'bucket': self.bucket.name, 'key': job_id } job.set(status='complete', message=message) log.info('Setting job=%s to complete', job_id)
class MigrationUploadJob(Job): def __init__(self): self.registry = Registry(log=log) database_url = str(self.registry.get('database').get('database')) sqlhub.processConnection = connectionForURI(database_url) conn = boto.connect_s3() file_conn = boto.connect_s3() self.bucket = conn.get_bucket(self.registry.get('storage').get('bucket_name')) self.files_bucket = file_conn.get_bucket(self.registry.get('files').get('bucket_name')) @staticmethod def _get_manifest(handle): """ :type handle: zipfile.ZipFile :param handle: :return: :raises Exception """ return json.loads(handle.read('manifest')) @staticmethod def _validate_manifest(documents): lookup = {} for document in documents: if not (document.get('parent_uuid') and document.get('parent_url')): lookup[document['uuid']] = document['url'] continue if not lookup.get(document['parent_uuid']): parent_document = Document.selectBy(url=document['parent_url']).getOne(None) if not parent_document: return False lookup[document['uuid']] = document['url'] return True @staticmethod def _get_document_from_archive(uuid, handle): """ :param uuid: :type handle: zipfile.ZipFile :param handle: :return: """ return json.loads(handle.read(uuid)) # pylint: disable=no-self-use def _update_from_parent(self, contents, parent_url): document = Document.selectBy(url=parent_url, orderBy=DESC(Document.q.created), limit=1).getOne(None) contents['document']['parent'] = document.id contents['document']['path'] = document.path # pylint: disable=no-self-use def _save_document(self, user_id, contents): """ :param user_id: :param contents: :return: :rtype: arrow.Arrow """ created = arrow.get(contents['document']['created']) contents['document']['created'] = created.datetime contents['document']['user_id'] = user_id document = Document(**contents['document']) path = '{0}{1}/'.format(document.path, document.id) contents['document']['created'] = str(created) contents['document']['path'] = path return created def _upload_file(self, contents, handle): """ :param contents: :type handle: zipfile.ZipFile :param handle: :return: """ bucket_name = self.registry.get('storage').get('bucket_name') filename = contents['file']['key'] contents['bucket'] = bucket_name key = Key(self.bucket, filename) key.set_contents_from_string(handle.read('file/{0}'.format(filename))) def _upload_multipage(self, contents, handle): """ :type contents: dict :param contents: :type handle: zipfile.ZipFile :param handle: :return: """ for item in handle.namelist(): part = 'files/{0}/'.format(contents['document']['uuid']) if item.startswith(part): filename = item.split(part).pop() key = Key(self.files_bucket, '{0}/{1}'.format(contents['document']['uuid'], filename)) key.content_type = mimetypes.guess_type(item)[0] key.set_contents_from_string(handle.read(item)) def do_work(self, message=None): """ ZipFile stored in storage. read from Job Message { "archive": "path/in/s3" } # documents order matter Manifest file structure { 'documents': [ { 'uuid': 'some-uuid', 'url': 'some-url', 'parent_url': 'some-parent-url', 'parent_uuid': 'some-parent-uuid' }, ... ], 'full': bool } :param message: :return: """ if not message: return contents = json.loads(message.get_body()) job_id = str(contents['Message']) job = JobDB.selectBy(uuid=job_id).getOne(None) if not job: log.error('Cannot find job %s', job_id) raise InvalidJobError('Invalid Job ID: {0}'.format(job_id)) job.set(status='running') message = job.message # get archive file archive_key = self.bucket.get_key(job.message['file']['key']) if not archive_key: message['reason'] = 'Cannot find the archive in the S3 bucket.' job.set(status='failed', message=message) raise InvalidJobError('Cannot find archive in S3 bucket.') fp = StringIO(archive_key.get_contents_as_string()) handle = zipfile.ZipFile(fp, mode='r', compression=zipfile.ZIP_DEFLATED) try: manifest_content = MigrationUploadJob._get_manifest(handle) except Exception: message['reason'] = 'Unable to retrieve manifest in archive' job.set(status='failed', message=message) raise InvalidJobError('Unable to retrieve manifest') if not MigrationUploadJob._validate_manifest(manifest_content['documents']): message['reason'] = 'Manifest found is not valid' job.set(status='failed', message=message) raise InvalidJobError('Manifest is not valid') for document in manifest_content['documents']: contents = MigrationUploadJob._get_document_from_archive(document['uuid'], handle) if document.get('parent_uuid') and document.get('parent_url'): self._update_from_parent(contents, document['parent_url']) created = self._save_document(job.message['user_id'], contents) if contents.get('file') and contents['document']['type'] == 'File': self._upload_file(contents, handle) elif contents.get('file') and contents['document']['type'] == 'MultiPage': self._upload_multipage(contents, handle) key_name = '{0}/{1}/{2}/{3}'.format(created.day, created.month, created.year, contents['document']['uuid']) key = Key(self.bucket, key_name) key.set_contents_from_string(json.dumps(contents)) job.set(status='complete') log.info('Setting job=%s to complete', job_id)