def search_datasets(tags=None, version=None, ontologies=None): """ :param tags: List of strings :param version: List of strings :ontologies: List of ontology terms :return: List of datasets matching any of the supplied parameters """ db_session = get_session() print(tags, version, ontologies) try: datasets = db_session.query(Dataset) if version: datasets = datasets.filter( Dataset.version.like('%' + version + '%')) if tags: # return any project that matches at least one tag datasets = datasets.filter( or_(*[Dataset.tags.contains(tag) for tag in tags])) if ontologies: datasets = datasets.filter( or_(*[ Dataset.ontologies_internal.contains(term) for term in ontologies ])) except ORMException as e: err = _report_search_failed('dataset', e) return err, 500 return [dump(x) for x in datasets], 200
def delete_dataset_by_id(dataset_id): """ Current thoughts are that delete should only be a CLI accessible command rather than API :param dataset_id: UUID :return: 204 on successful delete """ db_session = get_session() try: specified_dataset = db_session.query(Dataset) \ .get(dataset_id) except ORMException as e: err = _report_search_failed('call', e, dataset_id=str(dataset_id)) return err, 500 if not specified_dataset: err = dict(message="Dataset not found: " + str(dataset_id), code=404) return err, 404 try: row = db_session.query(Dataset).filter( Dataset.id == dataset_id).first() db_session.delete(row) db_session.commit() except ORMException as e: err = _report_update_failed('dataset', e, dataset_id=str(dataset_id)) return err, 500 return None, 204
def get_versions(): """ :return: release versions of the database """ db_session = get_session() change_log = ChangeLog try: versions = db_session.query(change_log.version) except ORMException as e: err = _report_search_failed('versions', e) return err, 500 return [entry.version for entry in versions], 200
def get_change_log(version): """ :param version: required release version :return: changes associated with specified release version """ db_session = get_session() change_log = ChangeLog try: log = db_session.query(change_log)\ .get(version) except ORMException as e: err = _report_search_failed('change log', e) return err, 500 if not log: err = dict(message="Change log not found", code=404) return err, 404 return dump(log), 200
def get_dataset_by_id(dataset_id): """ :param dataset_id: UUID :return: all projects or if projectId specified, corresponding project """ db_session = get_session() try: validate_uuid_string('id', dataset_id) specified_dataset = db_session.query(Dataset) \ .get(dataset_id) except IdentifierFormatError as e: err = dict(message=str(e), code=404) return err, 404 if not specified_dataset: err = dict(message="Dataset not found: " + str(dataset_id), code=404) return err, 404 return dump(specified_dataset), 200
def post_change_log(body): """ Create a new change log following the changeLog schema in datasets.yaml :return: body, 200 on success """ db_session = get_session() change_version = body.get('version') body['created'] = datetime.datetime.utcnow() try: orm_changelog = ChangeLog(**body) except TypeError as e: err = _report_conversion_error('changelog', e, **body) return err, 400 try: db_session.add(orm_changelog) db_session.commit() except exc.IntegrityError: db_session.rollback() err = _report_object_exists('changelog: ' + body['version'], **body) return err, 405 except ORMException as e: err = _report_write_error('changelog', e, **body) return err, 500 logger().info( struct_log(action='post_change_log', status='created', change_version=change_version, **body)) return body, 201
def search_dataset_ontologies(): """ Return all ontologies currently used by datasets """ db_session = get_session() try: datasets = db_session.query(Dataset) valid = datasets.filter(Dataset.ontologies != []) ontologies = [dump(x)['ontologies'] for x in valid] terms = sorted( list( set([ term['id'] for ontology in ontologies for term in ontology ]))) except ORMException as e: err = _report_search_failed('dataset', e) return err, 500 return terms, 200
def post_dataset(body): """ Creates a new dataset following the dataset_ingest schema defined in datasets.yaml The ontologies_internal property is used when looking up current ontologies but is not a property to be returned when querying the dataset. """ db_session = get_session() if not body.get('id'): iid = uuid.uuid1() body['id'] = iid else: iid = body['id'] if not body.get('version'): body['version'] = Version body['created'] = datetime.datetime.utcnow() mapped = [] if body.get('ontologies'): # Ontology objects should be {'id': ontology_name, 'terms': [{'id': 'some code'}]} mapped = { ontology['id']: ontology['terms'] for ontology in body['ontologies'] } if 'duo' in mapped.keys(): validator = OntologyValidator(ont=ont, input_json=mapped) valid, invalids = validator.validate_duo() if not valid: err = dict(message="DUO Validation Errors encountered: " + str(invalids), code=400) return err, 400 duo_terms = json.loads(validator.get_duo_list()) duos = [] for term in duo_terms: stuff = OntologyParser(ont, term["id"]).get_overview() duos.append({**term, **stuff}) body['ontologies'] = duos body['ontologies_internal'] = mapped try: orm_dataset = Dataset(**body) except TypeError as e: err = _report_conversion_error('dataset', e, **body) return err, 400 try: db_session.add(orm_dataset) db_session.commit() except exc.IntegrityError: db_session.rollback() err = _report_object_exists('dataset: ' + body['id'], **body) return err, 405 except ORMException as e: db_session.rollback() err = _report_write_error('dataset', e, **body) return err, 500 body.pop('ontologies_internal') return body, 201