def register_output_contents(coll_scope, coll_name, contents, request_id=None, workload_id=None, relation_type=CollectionRelationType.Output, session=None): """ register contents with collection scope, collection name, request id, workload id and contents. :param coll_scope: scope of the collection. :param coll_name: name the the collection. :param request_id: the request id. :param workload_id: The workload_id of the request. :param contents: list of contents [{'scope': <scope>, 'name': <name>, 'min_id': min_id, 'max_id': max_id, 'status': <status>, 'path': <path>}]. :param session: The database session in use. """ transform_ids = orm_transforms.get_transform_ids(request_id=request_id, workload_id=workload_id, session=session) if transform_ids: collections = orm_collections.get_collections(scope=coll_scope, name=coll_name, transform_id=transform_ids, relation_type=relation_type, session=session) else: collections = [] coll_def = "request_id=%s, workload_id=%s, coll_scope=%s" % (request_id, workload_id, coll_scope) coll_def += ", coll_name=%s, relation_type: %s" % (coll_name, relation_type) if len(collections) != 1: msg = "There should be only one collection matched. However there are %s collections" % len(collections) msg += coll_def raise exceptions.WrongParameterException(msg) coll_id = collections[0]['coll_id'] keys = ['scope', 'name', 'min_id', 'max_id'] for content in contents: ex_content = orm_contents.get_content(coll_id=coll_id, scope=content['scope'], name=content['name'], min_id=content['min_id'], max_id=content['max_id'], session=session) content_def = "scope: %s, name: %s, min_id: %s, max_id: %s" % (content['scope'], content['name'], content['min_id'], content['max_id']) if not ex_content: msg = "No matched content in collection(%s) with content(%s)" % (coll_def, content_def) raise exceptions.WrongParameterException(msg) for key in keys: if key in content: del content[key] content['content_id'] = ex_content['content_id'] orm_contents.update_contents(contents, session=session)
def get_request_ids_by_workload_id(workload_id, session=None): """ Get request id or raise a NoObject exception. :param workload_id: The workload_id of the request. :param session: The database session in use. :raises NoObject: If no request is founded. :returns: Request id. """ if workload_id is None: return exceptions.WrongParameterException( "workload_id should not be None") try: query = session.query(models.Request.request_id)\ .with_hint(models.Request, "INDEX(REQUESTS REQUESTS_SCOPE_NAME_IDX)", 'oracle')\ .filter(models.Request.workload_id == workload_id) tmp = query.all() ret_ids = [] if tmp: for req in tmp: ret_ids.append(req[0]) return ret_ids except sqlalchemy.orm.exc.NoResultFound as error: raise exceptions.NoObject( 'request with workload_id:%s cannot be found: %s' % (workload_id, error))
def get_match_contents(coll_scope, coll_name, scope, name, min_id=None, max_id=None, request_id=None, workload_id=None, relation_type=None, only_return_best_match=False, session=None): """ Get matched contents with collection scope, collection name, scope, name, min_id, max_id, request id, workload id and only_return_best_match. :param coll_scope: scope of the collection. :param coll_name: name the the collection. :param scope: scope of the content. :param name: name of the content. :param min_id: min_id of the content. :param max_id: max_id of the content. :param request_id: the request id. :param workload_id: The workload_id of the request. :param only_return_best_match: only return best matched content if it's true. :param session: The database session in use. :returns: list of contents """ if (request_id is None and workload_id is None) or coll_scope is None or coll_name is None: msg = "Only one of (request_id, workload_id) can be None. All other parameters should not be None: " msg += "request_id=%s, workload_id=%s, coll_scope=%s, coll_name=%s" % ( request_id, workload_id, coll_scope, coll_name) raise exceptions.WrongParameterException(msg) coll_id = orm_collections.get_collection_id_by_scope_name(coll_scope, coll_name, request_id, relation_type, session=session) contents = orm_contents.get_match_contents(coll_id=coll_id, scope=scope, name=name, min_id=min_id, max_id=max_id, session=session) if not only_return_best_match: return contents if len(contents) == 1: return contents content = None for row in contents: if (not content) or (content['max_id'] - content['min_id'] > row['max_id'] - row['min_id']): content = row return [content]
def get_request_id_by_workload_id(workload_id, session=None): """ Get request id or raise a NoObject exception. :param workload_id: The workload_id of the request. :param session: The database session in use. :raises NoObject: If no request is founded. :returns: Request id. """ if workload_id is None: return exceptions.WrongParameterException( "workload_id should not be None") try: select = "select request_id from atlas_idds.requests where workload_id=:workload_id" stmt = text(select) result = session.execute(stmt, {'workload_id': workload_id}) row = result.fetchone() if row: request_id = row[0] return request_id else: raise exceptions.NoObject( 'request with workload_id:%s cannot be found.' % (workload_id)) except sqlalchemy.orm.exc.NoResultFound as error: raise exceptions.NoObject( 'request with workload_id:%s cannot be found: %s' % (workload_id, error))
def get_workprogresses_by_status(status, period=None, locking=False, bulk_size=None, to_json=False, session=None): """ Get workprogresses. :param status: list of status of the workprogress data. :param locking: Wheter to lock workprogresses to avoid others get the same workprogress. :param bulk_size: Size limitation per retrieve. :param to_json: whether to return json format. :raises NoObject: If no workprogresses are founded. :returns: list of Workprogress. """ try: if status is None: raise exceptions.WrongParameterException( "status should not be None") if not isinstance(status, (list, tuple)): status = [status] if len(status) == 1: status = [status[0], status[0]] query = session.query(models.Workprogress)\ .with_hint(models.Workprogress, "INDEX(WORKPROGRESSES WORKPROGRESS_STATUS_PRIO_IDX)", 'oracle')\ .filter(models.Workprogress.status.in_(status))\ .filter(models.Workprogress.next_poll_at < datetime.datetime.utcnow()) if period is not None: query = query.filter( models.Workprogress.updated_at < datetime.datetime.utcnow() - datetime.timedelta(seconds=period)) if locking: query = query.filter( models.Workprogress.locking == WorkprogressLocking.Idle) query = query.order_by(asc(models.Workprogress.updated_at))\ .order_by(desc(models.Workprogress.priority)) if bulk_size: query = query.limit(bulk_size) tmp = query.all() rets = [] if tmp: for t in tmp: if to_json: rets.append(t.to_dict_json()) else: rets.append(t.to_dict()) return rets except sqlalchemy.orm.exc.NoResultFound as error: raise exceptions.NoObject( 'No workprogresses with status: %s, period: %s, locking: %s, %s' % (status, period, locking, error))
def update_request_with_transforms(request_id, parameters, transforms_to_add, transforms_to_extend, session=None): """ update an request. :param request_id: the request id. :param parameters: A dictionary of parameters. :param transforms_to_add: list of transforms :param transforms_to_extend: list of transforms """ for transform in transforms_to_add: if 'collections' not in transform or len( transform['collections']) == 0: msg = "Transform must have collections, such as input collection, output collection and log collection" raise exceptions.WrongParameterException(msg) collections = transform['collections'] del transform['collections'] transform_id = orm_transforms.add_transform(**transform, session=session) input_coll_ids = [] log_coll_ids = [] for collection in collections['input_collections']: collection['transform_id'] = transform_id input_coll_id = orm_collections.add_collection(**collection, session=session) input_coll_ids.append(input_coll_id) for collection in collections['log_collections']: collection['transform_id'] = transform_id log_coll_id = orm_collections.add_collection(**collection, session=session) log_coll_ids.append(log_coll_id) for collection in collections['output_collections']: collection['transform_id'] = transform_id workload_id = transform['transform_metadata'][ 'workload_id'] if 'workload_id' in transform[ 'transform_metadata'] else None collection['coll_metadata'] = { 'transform_id': transform_id, 'workload_id': workload_id, 'input_collections': input_coll_ids, 'log_collections': log_coll_ids } orm_collections.add_collection(**collection, session=session) for transform in transforms_to_extend: transform_id = transform['transform_id'] del transform['transform_id'] # orm_transforms.add_req2transform(request_id, transform_id, session=session) orm_transforms.update_transform(transform_id, parameters=transform, session=session) return orm_requests.update_request(request_id, parameters, session=session)
def get_match_contents(coll_scope, coll_name, scope, name, min_id=None, max_id=None, request_id=None, workload_id=None, relation_type=None, only_return_best_match=False, to_json=False, session=None): """ Get matched contents with collection scope, collection name, scope, name, min_id, max_id, request id, workload id and only_return_best_match. :param coll_scope: scope of the collection. :param coll_name: name the the collection. :param scope: scope of the content. :param name: name of the content. :param min_id: min_id of the content. :param max_id: max_id of the content. :param request_id: the request id. :param workload_id: The workload_id of the request. :param only_return_best_match: only return best matched content if it's true. :param session: The database session in use. :returns: list of contents """ transform_ids = orm_transforms.get_transform_ids(request_id=request_id, workload_id=workload_id, session=session) if transform_ids: collections = orm_collections.get_collections(scope=coll_scope, name=coll_name, transform_id=transform_ids, relation_type=relation_type, session=session) else: collections = [] coll_def = "request_id=%s, workload_id=%s, coll_scope=%s" % (request_id, workload_id, coll_scope) coll_def += ", coll_name=%s, relation_type: %s" % (coll_name, relation_type) if len(collections) != 1: msg = "There should be only one collection matched. However there are %s collections" % len(collections) msg += coll_def raise exceptions.WrongParameterException(msg) coll_id = collections[0]['coll_id'] contents = orm_contents.get_match_contents(coll_id=coll_id, scope=scope, name=name, min_id=min_id, max_id=max_id, to_json=to_json, session=session) if not only_return_best_match: return contents if len(contents) == 1: return contents content = None for row in contents: if (not content) or (content['max_id'] - content['min_id'] > row['max_id'] - row['min_id']): content = row return [content]
def get_contents(coll_scope=None, coll_name=None, request_id=None, workload_id=None, relation_type=None, session=None): """ Get contents with collection scope, collection name, request id, workload id and relation type. :param coll_scope: scope of the collection. :param coll_name: name the the collection. :param request_id: the request id. :param workload_id: The workload_id of the request. :param relation_type: The relation type between the collection and transform: input, outpu, logs and etc. :param session: The database session in use. :returns: dict of contents """ if request_id is None and workload_id is None: raise exceptions.WrongParameterException( "Either request_id or workload_id should not be None") req_transfomr_collections = get_collections(scope=coll_scope, name=coll_name, request_id=request_id, workload_id=workload_id, session=session) rets = {} for request_id in req_transfomr_collections: rets[request_id] = {} for transform_id in req_transfomr_collections[request_id]: rets[request_id][transform_id] = {} for collection in req_transfomr_collections[request_id][ transform_id]: if relation_type is not None: if isinstance(relation_type, CollectionRelationType): relation_type = relation_type.value if relation_type is None or collection[ 'relation_type'].value == relation_type: scope = collection['scope'] name = collection['name'] coll_id = collection['coll_id'] coll_relation_type = collection['relation_type'] scope_name = '%s:%s' % (scope, name) contents = orm_contents.get_contents(coll_id=coll_id, session=session) rets[request_id][transform_id][scope_name] = { 'collection': collection, 'relation_type': coll_relation_type, 'contents': contents } return rets
def add_transform(transform_type, transform_tag=None, priority=0, status=TransformStatus.New, locking=TransformLocking.Idle, retries=0, expired_at=None, transform_metadata=None, request_id=None, collections=None, session=None): """ Add a transform. :param transform_type: Transform type. :param transform_tag: Transform tag. :param priority: priority. :param status: Transform status. :param locking: Transform locking. :param retries: The number of retries. :param expired_at: The datetime when it expires. :param transform_metadata: The metadata as json. :raises DuplicatedObject: If a transform with the same name exists. :raises DatabaseException: If there is a database error. :returns: transform id. """ if collections is None or len(collections) == 0: msg = "Transform must have collections, such as input collection, output collection and log collection" raise exceptions.WrongParameterException(msg) transform_id = orm_transforms.add_transform( transform_type=transform_type, transform_tag=transform_tag, priority=priority, status=status, locking=locking, retries=retries, expired_at=expired_at, transform_metadata=transform_metadata, request_id=request_id, session=session) for collection in collections: collection['transform_id'] = transform_id orm_collections.add_collection(**collection, session=session)
def get_collection_id_by_scope_name(scope, name, transform_id=None, relation_type=None, session=None): """ Get collection id by scope, name, transform id or raise a NoObject exception. :param scope: collection scope. :param name: collection name, should not be wildcards. :param transform_id: The transform id related to this collection. :param relation_type: The relation type between this collection and the transform: Input, Ouput and Log. :param session: The database session in use. :raises NoObject: If no collections are founded. :returns: list of Collections. """ try: if transform_id is not None: if relation_type is None: select = """select * from atlas_idds.collections where scope=:scope and name=:name and transform_id=:transform_id""" stmt = text(select) result = session.execute(stmt, {'scope': scope, 'name': name, 'transform_id': transform_id}) else: if isinstance(relation_type, CollectionRelationType): relation_type = relation_type.value select = """select * from atlas_idds.collections where scope=:scope and name=:name and transform_id=:transform_id and relation_type=:relation_type""" stmt = text(select) result = session.execute(stmt, {'scope': scope, 'name': name, 'transform_id': transform_id, 'relation_type': relation_type}) else: raise exceptions.WrongParameterException("transform_id should not be None") collection_id = result.fetchone() if collection_id is None: raise sqlalchemy.orm.exc.NoResultFound() return collection_id[0] except sqlalchemy.orm.exc.NoResultFound as error: raise exceptions.NoObject('No collection with scope(%s), name(%s), transform_id(%s): %s' % (scope, name, transform_id, error)) except Exception as error: raise error
def get_requests_by_status_type(status, request_type=None, time_period=None, locking=False, bulk_size=None, to_json=False, session=None): """ Get requests. :param status: list of status of the request data. :param request_type: The type of the request data. :param locking: Wheter to lock requests to avoid others get the same request. :param bulk_size: Size limitation per retrieve. :param to_json: return json format. :raises NoObject: If no request are founded. :returns: list of Request. """ try: if status is None: raise exceptions.WrongParameterException( "status should not be None") if not isinstance(status, (list, tuple)): status = [status] if len(status) == 1: status = [status[0], status[0]] query = session.query(models.Request)\ .with_hint(models.Request, "INDEX(REQUESTS REQUESTS_SCOPE_NAME_IDX)", 'oracle')\ .filter(models.Request.status.in_(status))\ .filter(models.Request.next_poll_at < datetime.datetime.utcnow()) if request_type is not None: query = query.filter(models.Request.request_type == request_type) if time_period is not None: query = query.filter( models.Request.updated_at < datetime.datetime.utcnow() - datetime.timedelta(seconds=time_period)) if locking: query = query.filter(models.Request.locking == RequestLocking.Idle) query = query.order_by(asc(models.Request.updated_at))\ .order_by(desc(models.Request.priority)) if bulk_size: query = query.limit(bulk_size) tmp = query.all() rets = [] if tmp: for req in tmp: if to_json: rets.append(req.to_dict_json()) else: rets.append(req.to_dict()) return rets except sqlalchemy.orm.exc.NoResultFound as error: raise exceptions.NoObject( 'No requests with status: %s, request_type: %s, time_period: %s, locking: %s, %s' % (status, request_type, time_period, locking, error))
def get_collections_by_status(status, relation_type=CollectionRelationType.Input, time_period=None, locking=False, bulk_size=None, session=None): """ Get collections by status, relation_type and time_period or raise a NoObject exception. :param status: The collection status. :param relation_type: The relation_type of the collection to the transform. :param time_period: time period in seconds since last update. :param locking: Wheter to retrieve unlocked files. :param session: The database session in use. :raises NoObject: If no collections are founded. :returns: list of Collections. """ try: if status is None: raise exceptions.WrongParameterException("status should not be None") if not isinstance(status, (list, tuple)): status = [status] new_status = [] for st in status: if isinstance(st, CollectionStatus): st = st.value new_status.append(st) status = new_status select = """select * from atlas_idds.collections where status in :status""" params = {'status': status} if relation_type is not None: if isinstance(relation_type, CollectionRelationType): relation_type = relation_type.value select = select + " and relation_type=:relation_type" params['relation_type'] = relation_type if time_period is not None: select = select + " and updated_at < :updated_at" params['updated_at'] = datetime.datetime.utcnow() - datetime.timedelta(seconds=time_period) if locking: select = select + " and locking=:locking" params['locking'] = CollectionLocking.Idle.value if bulk_size: select = select + " and rownum < %s + 1 order by coll_id asc" % bulk_size stmt = text(select) stmt = stmt.bindparams(bindparam('status', expanding=True)) result = session.execute(stmt, params) collections = result.fetchall() ret = [] for collection in collections: collection = row2dict(collection) if collection['coll_type'] is not None: collection['coll_type'] = CollectionType(collection['coll_type']) if collection['relation_type'] is not None: collection['relation_type'] = CollectionRelationType(collection['relation_type']) if collection['status'] is not None: collection['status'] = CollectionStatus(collection['status']) if collection['locking'] is not None: collection['locking'] = CollectionLocking(collection['locking']) if collection['coll_metadata']: collection['coll_metadata'] = json.loads(collection['coll_metadata']) ret.append(collection) return ret except sqlalchemy.orm.exc.NoResultFound as error: raise exceptions.NoObject('No collections with status(%s), relation_type(%s), time_period(%s): %s' % (status, relation_type, time_period, error)) except Exception as error: raise error
def register_output_contents(coll_scope, coll_name, contents, request_id=None, workload_id=None, relation_type=CollectionRelationType.Output, session=None): """ register contents with collection scope, collection name, request id, workload id and contents. :param coll_scope: scope of the collection. :param coll_name: name the the collection. :param request_id: the request id. :param workload_id: The workload_id of the request. :param contents: list of contents [{'scope': <scope>, 'name': <name>, 'min_id': min_id, 'max_id': max_id, 'status': <status>, 'path': <path>}]. :param session: The database session in use. """ if (request_id is None and workload_id is None) or coll_scope is None or coll_name is None: msg = "Only one of (request_id, workload_id) can be None. All other parameters should not be None: " msg += "request_id=%s, workload_id=%s, coll_scope=%s, coll_name=%s" % ( request_id, workload_id, coll_scope, coll_name) raise exceptions.WrongParameterException(msg) if request_id is None and workload_id is not None: request_id = orm_requests.get_request_id(request_id, workload_id, session=session) coll_id = orm_collections.get_collection_id_by_scope_name(coll_scope, coll_name, request_id, relation_type, session=session) parameters = [] for content in contents: if 'status' not in content or content['status'] is None: raise exceptions.WrongParameterException( "Content status is required and should not be None: %s" % content) if content['status'] in [ ContentStatus.Available, ContentStatus.Available.value ]: content_keys = [ 'scope', 'name', 'min_id', 'max_id', 'status', 'path' ] else: content_keys = ['scope', 'name', 'min_id', 'max_id', 'status'] parameter = {} for key in content_keys: if content[key] is None: raise exceptions.WrongParameterException( "Content %s should not be None" % key) parameter[key] = content[key] if isinstance(parameter['status'], ContentStatus): parameter['status'] = parameter['status'].value parameter['coll_id'] = coll_id parameters.append(parameter) orm_contents.update_contents(parameters, session=session)
def get_requests_by_status_type(status, request_type=None, time_period=None, locking=False, bulk_size=None, session=None): """ Get requests. :param status: list of status of the request data. :param request_type: The type of the request data. :param locking: Wheter to lock requests to avoid others get the same request. :param bulk_size: Size limitation per retrieve. :raises NoObject: If no request are founded. :returns: list of Request. """ try: if status is None: raise exceptions.WrongParameterException( "status should not be None") if not isinstance(status, (list, tuple)): status = [status] new_status = [] for st in status: if isinstance(st, RequestStatus): st = st.value new_status.append(st) status = new_status req_select = """select request_id, scope, name, requester, request_type, transform_tag, priority, status, locking, workload_id, created_at, updated_at, accessed_at, expired_at, errors, request_metadata, processing_metadata from atlas_idds.requests where status in :status """ req_params = {'status': status} if request_type is not None: req_select = req_select + " and request_type=:request_type" req_params['request_type'] = request_type if time_period is not None: req_select = req_select + " and updated_at < :updated_at" req_params['updated_at'] = datetime.datetime.utcnow( ) - datetime.timedelta(seconds=time_period) if locking: req_select = req_select + " and locking=:locking" req_params['locking'] = RequestLocking.Idle.value if bulk_size: req_select = req_select + " and rownum < %s + 1 order by priority desc, request_id asc" % bulk_size else: req_select = req_select + " order by priority desc" req_stmt = text(req_select) req_stmt = req_stmt.bindparams(bindparam('status', expanding=True)) result = session.execute(req_stmt, req_params) requests = result.fetchall() return [convert_request_to_dict(req) for req in requests] except sqlalchemy.orm.exc.NoResultFound as error: raise exceptions.NoObject( 'No requests with status: %s, request_type: %s, time_period: %s, locking: %s, %s' % (status, request_type, time_period, locking, error))
def get_contents(scope=None, name=None, coll_id=None, status=None, session=None): """ Get content or raise a NoObject exception. :param scope: The scope of the content data. :param name: The name of the content data. :param coll_id: Collection id. :param session: The database session in use. :raises NoObject: If no content is founded. :returns: list of contents. """ try: if status is not None: if not isinstance(status, (tuple, list)): status = [status] new_status = [] for st in status: if isinstance(st, ContentStatus): new_status.append(st.value) else: new_status.append(st) status = new_status if scope and name: if coll_id: if status is not None: select = """select * from atlas_idds.contents where coll_id=:coll_id and scope=:scope and name like :name and status in :status""" stmt = text(select) stmt = stmt.bindparams(bindparam('status', expanding=True)) result = session.execute( stmt, { 'coll_id': coll_id, 'scope': scope, 'name': '%' + name + '%', 'status': status }) else: select = """select * from atlas_idds.contents where coll_id=:coll_id and scope=:scope and name like :name""" stmt = text(select) result = session.execute( stmt, { 'coll_id': coll_id, 'scope': scope, 'name': '%' + name + '%' }) else: if status is not None: select = """select * from atlas_idds.contents where scope=:scope and name like :name and status in :status""" stmt = text(select) stmt = stmt.bindparams(bindparam('status', expanding=True)) result = session.execute(stmt, { 'scope': scope, 'name': '%' + name + '%', 'status': status }) else: select = """select * from atlas_idds.contents where scope=:scope and name like :name""" stmt = text(select) result = session.execute(stmt, { 'scope': scope, 'name': '%' + name + '%' }) else: if coll_id: if status is not None: select = """select * from atlas_idds.contents where coll_id=:coll_id and status in :status""" stmt = text(select) stmt = stmt.bindparams(bindparam('status', expanding=True)) result = session.execute(stmt, { 'coll_id': coll_id, 'status': status }) else: select = """select * from atlas_idds.contents where coll_id=:coll_id""" stmt = text(select) result = session.execute(stmt, {'coll_id': coll_id}) else: if status is not None: select = """select * from atlas_idds.contents where status in :status""" stmt = text(select) stmt = stmt.bindparams(bindparam('status', expanding=True)) result = session.execute(stmt, {'status': status}) else: raise exceptions.WrongParameterException( "Both (scope:%s and name:%s) and coll_id:%s status:%s are not fully provided" % (scope, name, coll_id, status)) contents = result.fetchall() rets = [] for content in contents: content = row2dict(content) if content['content_type'] is not None: content['content_type'] = ContentType(content['content_type']) if content['status'] is not None: content['status'] = ContentStatus(content['status']) if content['content_metadata']: content['content_metadata'] = json.loads( content['content_metadata']) rets.append(content) return rets except sqlalchemy.orm.exc.NoResultFound as error: raise exceptions.NoObject( 'No record can be found with (scope=%s, name=%s, coll_id=%s): %s' % (scope, name, coll_id, error)) except Exception as error: raise error