def get_dataset_logo(): """ Gets data set logos that are statically stored in the portal TODO: This should not be static, should be a fucntion the dataset in the database Args: dataset_id: the unique identifier of the dataset Returns: path to the png file for the logo """ logopath = "app/static/img/default_dataset.jpeg" dataset_id = request.args.get('id', '') dataset = Dataset.query.filter_by(dataset_id=dataset_id).first() if dataset is None: # This shoud return a 404 not found return 'Not Found', 400 datasetrootdir = os.path.join(current_app.config['DATA_PATH'], 'conp-dataset', dataset.fspath) logopath = DATSDataset(datasetrootdir).LogoFilepath with open(logopath, 'rb') as logofile: return logofile.read()
def test_datsdataset_jsonld(new_dataset): """ Check json-ld snippet for Google dataset search is correct """ assert new_dataset.dataset_id == "8de99b0e-5f94-11e9-9e05-52545e9add8e" assert new_dataset.fspath == './test/test_dataset' dats_dataset = DATSDataset(datasetpath=new_dataset.fspath) assert type(dats_dataset.schema_org_metadata) is dict assert dats_dataset.version == "1.0" assert dats_dataset.schema_org_metadata[ "@context"] == "https://schema.org/" assert dats_dataset.schema_org_metadata["@type"] == "Dataset" assert dats_dataset.schema_org_metadata[ "name"] == "Multicenter Single Subject Human MRI Phantom" assert dats_dataset.schema_org_metadata["description"] == "Human Brain phantom scans, Multiple MRI"\ " scans of a single human phantom over 11"\ " years, T1 weighted images and others on"\ " 13 scanner in 6 sites accross North America."\ " The data are available in minc format." assert dats_dataset.schema_org_metadata["version"] == "1.0" assert type(dats_dataset.schema_org_metadata["license"]) is list assert dats_dataset.schema_org_metadata["license"][0][ "@type"] == "CreativeWork" assert dats_dataset.schema_org_metadata["license"][0]["name"] == "CC BY-ND" assert type(dats_dataset.schema_org_metadata["keywords"]) is list assert type(dats_dataset.schema_org_metadata["creator"]) is list assert dats_dataset.schema_org_metadata["creator"][0][ "@type"] == "Organization" assert dats_dataset.schema_org_metadata["creator"][0][ "name"] == "McGill Center for Integrative Neuroscience"
def download_metadata(): """ Download Metadata Route route to allow downloading the metadata for a dataset Args: dataset (REQ ARG): the dataset Returns: Response to the zipped metadata for the browser to download Raises: HTML error if this fails """ dataset_id = request.args.get('dataset', '') dataset = Dataset.query.filter_by(dataset_id=dataset_id).first() if dataset is None: # This shoud return a 404 not found return 'Not Found', 400 datasetrootdir = os.path.join(current_app.config['DATA_PATH'], 'conp-dataset', dataset.fspath) datspath = DATSDataset(datasetrootdir).DatsFilepath return send_from_directory(os.path.dirname(datspath), os.path.basename(datspath), as_attachment=True, attachment_filename=os.path.join( dataset.name.replace(' ', '_'), '.dats.json'), mimetype='application/json')
def dataset_info(): """ Dataset Route Route to get the page for one dataset Args: id (REQ ARG): the id of the dataset to display Returns: rendered dataset.html for the dataset """ dataset_id = request.args.get('id') # Query dataset d = Dataset.query.filter_by(dataset_id=dataset_id).first() datsdataset = DATSDataset(d.fspath) if current_user.is_authenticated: authorized = True else: authorized = False dataset = { "authorized": authorized, "id": d.dataset_id, "title": d.name.replace("'", ""), "isPrivate": d.is_private, "thumbnailURL": "/dataset_logo?id={}".format(d.dataset_id), "imagePath": "static/img/", "downloadPath": d.dataset_id, "URL": 'raw_data_url', "downloads": "0", "views": "0", "likes": "0", "dateAdded": str(d.date_created.date()), "dateUpdated": str(d.date_updated.date()), "size": datsdataset.size, "files": datsdataset.fileCount, "subjects": datsdataset.subjectCount, "format": datsdataset.formats, "modalities": datsdataset.modalities, "sources": datsdataset.sources, "conpStatus": datsdataset.conpStatus } metadata = get_dataset_metadata_information(d) return render_template( 'dataset.html', title='CONP | Dataset', data=dataset, metadata=metadata, user=current_user )
def test_jsonld_creator_person(new_dataset): """ Change dats data: add creator person; remove creators """ assert new_dataset.fspath == './test/test_dataset' dats_dataset = DATSDataset(datasetpath=new_dataset.fspath) assert type(dats_dataset.schema_org_metadata) is dict person_creator = { "firstName": "Jane", "lastName": "Doe", "email": "*****@*****.**", "affiliations": [{ "name": "University" }, { "name": "University2" }, { "name": "University3" }, { "name": "University4" }] } # change creators type to Person dats_dataset.descriptor["creators"] = [person_creator] assert dats_dataset.schema_org_metadata["creator"][0]["@type"] == "Person" assert dats_dataset.schema_org_metadata["creator"][0][ "givenName"] == "Jane" assert dats_dataset.schema_org_metadata["creator"][0]["name"] == "Jane Doe" assert type( dats_dataset.schema_org_metadata["creator"][0]["affiliation"]) is list for org in dats_dataset.schema_org_metadata["creator"][0]["affiliation"]: assert org["@type"] == "Organization" assert org["name"] is not None # remove first name in person del person_creator["firstName"] assert dats_dataset.schema_org_metadata["creator"][0][ "name"] == "Name is not provided" # mess up a bit to check for exception dats_dataset.descriptor["creators"] = None assert TypeError assert dats_dataset.schema_org_metadata is None
def get_dataset_metadata_information(dataset): """ returns the datasets metadata Args: dataset: dictionary for the dataset Returns payload containing the datasets metadata """ datsdataset = DATSDataset(dataset.fspath) # check for child datasets child_datasets = [] dataset_ancestries = DatasetAncestry.query.all() for da in dataset_ancestries: if da.parent_dataset_id == dataset.dataset_id: name = da.child_dataset_id[9:] child_dataset = { "child_dataset_id": da.child_dataset_id, "name": name } child_datasets.append(child_dataset) return { "schema_org_metadata": datsdataset.schema_org_metadata, "creators": datsdataset.creators, "description": datsdataset.description, "contact": datsdataset.contacts, "version": datsdataset.version, "licenses": datsdataset.licenses, "sources": datsdataset.sources, "keywords": datsdataset.keywords, "parentDatasets": datsdataset.parentDatasetId, "primaryPublications": datsdataset.primaryPublications, "childDatasets": child_datasets, "dimensions": datsdataset.dimensions, "producedBy": datsdataset.producedBy, "isAbout": datsdataset.isAbout, "acknowledges": datsdataset.acknowledges, "spatialCoverage": datsdataset.spatialCoverage, "dates": datsdataset.dates, "remoteUrl": dataset.remoteUrl, }
def get_dataset_readme(dataset_id): dataset = Dataset.query.filter_by(dataset_id=dataset_id).first() if dataset is None: return 'Dataset Not Found', 404 datsdataset = DATSDataset(dataset.fspath) readmeFilepath = datsdataset.ReadmeFilepath f = open(readmeFilepath, 'r') if f.mode != 'r': return 'Readme Not Found', 404 readme = f.read() content = github.render_content(readme) return content
def get_dataset_metadata_information(dataset): """ returns the datasets metadata Args: dataset: dictionary for the dataset Returns payload containing the datasets metadata """ datsdataset = DATSDataset(dataset.fspath) return { "authors": datsdataset.authors, "description": datsdataset.description, "contact": datsdataset.contacts, "version": datsdataset.version, "licenses": datsdataset.licences }
def get_dataset_metadata_information(dataset): """ returns the datasets metadata Args: dataset: dictionary for the dataset Returns payload containing the datasets metadata """ datsdataset = DATSDataset(dataset.fspath) # check for child datasets childDatasets = [] datasetAncestries = DatasetAncestry.query.all() for da in datasetAncestries: if da.parent_dataset_id == dataset.dataset_id: name = da.child_dataset_id[9:] childDataset = { "child_dataset_id": da.child_dataset_id, "name": name } childDatasets.append(childDataset) return { "schema_org_metadata": datsdataset.schema_org_metadata, "authors": datsdataset.authors, "description": datsdataset.description, "contact": datsdataset.contacts, "version": datsdataset.version, "licenses": datsdataset.licenses, "sources": datsdataset.sources, "parentDatasets": datsdataset.parentDatasetId, "primaryPublications": datsdataset.primaryPublications, "childDatasets": childDatasets }
def dataset_search(): """ Dataset Search Route This route executes a dataset search Args: search is the search term in the GET Request Retuns: JSON containing the matching datasets """ datasets = [] if current_user.is_authenticated: authorized = True else: authorized = False if request.args.get('id'): # Query datasets datasets = Dataset.query.filter_by( dataset_id=request.args.get('id')).all() else: # Query datasets datasets = Dataset.query.order_by(Dataset.id).all() # Element input for payload elements = [] # Build dataset response for d in datasets: try: datsdataset = DATSDataset(d.fspath) except Exception as e: # If the DATS file can't be laoded, skip this dataset. # There should be an error message in the logs/update_datsets.log continue # If search term exists filter results here if request.args.get('search'): searchTerm = request.args.get('search') with open(datsdataset.DatsFilepath, 'r') as dats: match = False for line in dats.readlines(): if searchTerm.lower() in line.lower(): match = True break if (not match): continue dataset = { "authorized": authorized, "id": d.dataset_id, "title": d.name.replace("'", ""), "isPrivate": d.is_private, "thumbnailURL": "/dataset_logo?id={}".format(d.dataset_id), "downloadPath": d.dataset_id, "URL": '?', "dateAdded": str(d.date_created.date()), "dateUpdated": str(d.date_updated.date()), "size": datsdataset.size, "files": datsdataset.fileCount, "subjects": datsdataset.subjectCount, "format": datsdataset.formats, "modalities": datsdataset.modalities, "sources": datsdataset.sources, "conpStatus": datsdataset.conpStatus, "authorizations": datsdataset.authorizations, "principalInvestigators": datsdataset.principalInvestigators, "primaryPublications": datsdataset.primaryPublications, "logoFilepath": datsdataset.LogoFilepath, "status": datsdataset.status, } elements.append(dataset) modalities = [] for e in elements: if e['modalities'] is None: continue for m in e['modalities'].split(", "): modalities.append(m.lower()) modalities = list(set(modalities)) formats = [] # by default, formats should be represented in upper case # except for NIfTI, bigWig and RNA-Seq for e in elements: if e['format'] is None: continue for m in e['format'].split(", "): formatted_string = re.sub(r'\.', '', m) if formatted_string.lower() in ['nifti', 'nii', 'niigz']: formats.append('NIfTI') elif formatted_string.lower() in ['gifti', 'gii']: formats.append('GIfTI') elif formatted_string.lower() == 'bigwig': formats.append('bigWig') elif formatted_string.lower() == 'rna-seq': formats.append('RNA-Seq') else: formats.append(formatted_string.upper()) formats = sorted(list(set(formats))) queryAll = bool(request.args.get('elements') == 'all') if (not queryAll): if request.args.get('modalities'): filterModalities = request.args.get('modalities').split(",") elements = list( filter(lambda e: e['modalities'] is not None, elements)) elements = list( filter( lambda e: all(item in (m.lower() for m in e['modalities'].split(", ")) for item in filterModalities), elements)) if request.args.get('formats'): filterFormats = request.args.get('formats').split(",") elements = list(filter(lambda e: e['format'] is not None, elements)) elements = list( filter( lambda e: all(item.lower() in (f.lower() for f in e['format'].split(", ")) for item in filterFormats), elements)) cursor = None limit = None if (request.args.get('max_per_page') != 'All'): delta = int(request.args.get('max_per_page', 10)) * \ (int(request.args.get('page', 1)) - 1) cursor = max(min(int(request.args.get('cursor') or 0), 0), 0) + delta limit = int(request.args.get('limit') or 10) sort_key = request.args.get('sortKey') or "conpStatus" paginated = elements if (sort_key == "conpStatus"): order = {'conp': 0, 'canadian': 1, 'external': 2} paginated.sort(key=lambda o: (o[sort_key].lower( ) not in order, order.get(o[sort_key].lower(), None))) elif (sort_key == "title"): paginated.sort(key=lambda o: o[sort_key].lower()) elif (sort_key == "sizeDes" or sort_key == "sizeAsc"): def getAbsoluteSize(e): if not e["size"]: return 0.0 units = ["KB", "MB", "GB", "TB"] unitScales = [1000, 1000**2, 1000**3, 1000**4] size = e["size"].split(" ") absoluteSize = size[0] if (size[1] in units): absoluteSize = float(size[0]) * \ unitScales[units.index(size[1])] return absoluteSize reverse = (sort_key == 'sizeDes') paginated.sort(key=lambda o: getAbsoluteSize(o), reverse=reverse) elif (sort_key == "filesDes" or sort_key == "filesAsc"): def getNumberOfFiles(e): if not e["files"]: return 0 return int(e["files"]) reverse = (sort_key == 'filesDes') paginated.sort(key=lambda o: getNumberOfFiles(o), reverse=reverse) elif (sort_key == "subjectsDes" or sort_key == "subjectsAsc"): def getNumberOfSubjects(e): if not e["subjects"]: return 0 return int(e["subjects"]) reverse = (sort_key == 'subjectsDes') paginated.sort(key=lambda o: getNumberOfSubjects(o), reverse=reverse) elif (sort_key == "dateAddedDesc" or sort_key == "dateAddedAsc"): reverse = (sort_key == 'dateAddedAsc') paginated.sort(key=lambda o: (o["dateAdded"] is None, o["dateAdded"]), reverse=reverse) elif (sort_key == "dateUpdatedDesc" or sort_key == "dateUpdatedAsc"): reverse = (sort_key == 'dateUpdatedAsc') paginated.sort(key=lambda o: (o["dateUpdated"] is None, o["dateUpdated"]), reverse=reverse) else: paginated.sort(key=lambda o: (o[sort_key] is None, o[sort_key])) if (cursor is not None and limit is not None): paginated = paginated[(cursor):(cursor + limit)] else: paginated = elements # Construct payload payload = { "authorized": authorized, "total": len(elements), "sortKeys": [{ "key": "conpStatus", "label": "Origin" }, { "key": "title", "label": "Dataset Name" }, { "key": "dateAddedAsc", "label": "Date Added (Newest FIrst)" }, { "key": "dateAddedDesc", "label": "Date Added (Oldest First)" }, { "key": "dateUpdatedAsc", "label": "Date Updated (Newest First)" }, { "key": "dateUpdatedDesc", "label": "Date Updated (Oldest First)" }, { "key": "sizeDes", "label": "Disk Space Usage (Largest First)" }, { "key": "sizeAsc", "label": "Disk Space Usage (Smallest First)" }, { "key": "filesDes", "label": "Number of Files (Largest First)" }, { "key": "filesAsc", "label": "Number of Files (Smallest First)" }, { "key": "subjectsDes", "label": "Number of Subjects (Largest First)" }, { "key": "subjectsAsc", "label": "Number of Subjects (Smallest First)" }], "filterKeys": [{ "key": "modalities", "values": modalities }, { "key": "formats", "values": formats }], "elements": paginated } return json.dumps(payload)
def dataset_info(): """ Dataset Route Route to get the page for one dataset Args: id (REQ ARG): the id of the dataset to display Returns: rendered dataset.html for the dataset """ dataset_id = request.args.get('id') # Query dataset d = Dataset.query.filter_by(dataset_id=dataset_id).first() datsdataset = DATSDataset(d.fspath) if current_user.is_authenticated: authorized = True else: authorized = False dataset = { "authorized": authorized, "name": datsdataset.name, "id": d.dataset_id, "title": d.name.replace("'", ""), "isPrivate": d.is_private, "thumbnailURL": "/dataset_logo?id={}".format(d.dataset_id), "imagePath": "static/img/", "downloadPath": d.dataset_id, "URL": 'raw_data_url', "downloads": "0", "views": "0", "likes": "0", "dateAdded": str(d.date_created.date()), "dateUpdated": str(d.date_updated.date()), "size": datsdataset.size, "files": datsdataset.fileCount, "subjects": datsdataset.subjectCount, "format": datsdataset.formats, "modalities": datsdataset.modalities, "sources": datsdataset.sources, "conpStatus": datsdataset.conpStatus, "authorizations": datsdataset.authorizations, "principalInvestigators": datsdataset.principalInvestigators, "primaryPublications": datsdataset.primaryPublications, "logoFilepath": datsdataset.LogoFilepath, "status": datsdataset.status, } metadata = get_dataset_metadata_information(d) readme = get_dataset_readme(d.dataset_id) if dataset["status"] == "Working": color = "success" elif dataset["status"] == "Unknown": color = "lightgrey" else: color = "critical" ciBadgeUrl = "https://img.shields.io/badge/circleci-" + \ dataset["status"] + "-" + color + "?style=flat-square&logo=circleci" return render_template('dataset.html', title='CONP | Dataset', data=dataset, metadata=metadata, readme=readme, ciBadgeUrl=ciBadgeUrl, user=current_user)
def dataset_info(): """ Dataset Route Route to get the page for one dataset Args: id (REQ ARG): the id of the dataset to display Returns: rendered dataset.html for the dataset """ dataset_id = request.args.get('id') # Query dataset d = Dataset.query.filter_by(dataset_id=dataset_id).first() datsdataset = DATSDataset(d.fspath) if current_user.is_authenticated: authorized = True else: authorized = False with open( os.path.join(os.getcwd(), "app/static/datasets/dataset-cbrain-ids.json"), "r") as f: cbrain_dataset_ids = json.load(f) f.close() datasetTitle = d.name.replace("'", "") if datasetTitle in cbrain_dataset_ids.keys(): dataset_cbrain_id = cbrain_dataset_ids[datasetTitle] else: dataset_cbrain_id = "" dataset = { "authorized": authorized, "name": datsdataset.name, "id": d.dataset_id, "title": d.name.replace("'", ""), "remoteUrl": d.remoteUrl, "isPrivate": d.is_private, "thumbnailURL": "/dataset_logo?id={}".format(d.dataset_id), "imagePath": "static/img/", "downloadPath": d.dataset_id, "URL": 'raw_data_url', "downloads": "0", "views": "0", "likes": "0", "dateAdded": str(d.date_created.date()), "dateUpdated": str(d.date_updated.date()), "creators": datsdataset.creators, "origin": datsdataset.origin, "size": datsdataset.size, "files": datsdataset.fileCount, "subjects": datsdataset.subjectCount, "formats": datsdataset.formats, "modalities": datsdataset.modalities, "licenses": datsdataset.licenses, "version": datsdataset.version, "sources": datsdataset.sources, "conpStatus": datsdataset.conpStatus, "authorizations": datsdataset.authorizations, "principalInvestigators": datsdataset.principalInvestigators, "primaryPublications": datsdataset.primaryPublications, "logoFilepath": datsdataset.LogoFilepath, "status": datsdataset.status, "cbrain_id": dataset_cbrain_id, } metadata = get_dataset_metadata_information(d) readme = get_dataset_readme(d.dataset_id) if dataset["status"] == "Working": color = "success" elif dataset["status"] == "Unknown": color = "lightgrey" else: color = "critical" ci_badge_url = "https://img.shields.io/badge/circleci-" + \ dataset["status"] + "-" + color + "?style=flat-square&logo=circleci" try: zipped = DatasetCache(current_app).getZipLocation(d) except IOError: zipped = None showDownloadButton = zipped is not None zipLocation = '/data/{0}'.format(os.path.basename(zipped or '')) return render_template('dataset.html', title='CONP | Dataset', data=dataset, metadata=metadata, readme=readme, showDownloadButton=showDownloadButton, zipLocation=zipLocation, ciBadgeUrl=ci_badge_url, user=current_user)
def dataset_search(): """ Dataset Search Route This route executes a dataset search Args: search is the search term in the GET Request Retuns: JSON containing the matching datasets """ datasets = [] if current_user.is_authenticated: authorized = True else: authorized = False if request.args.get('search'): term = '%' + request.args.get('search') + '%' # Query datasets datasets = Dataset.query.filter( or_( func.lower(Dataset.name).like(func.lower(term)), func.lower(Dataset.description).like(func.lower(term)))) elif request.args.get('id'): # Query datasets datasets = Dataset.query.filter_by( dataset_id=request.args.get('id')).all() else: # Query datasets datasets = Dataset.query.order_by(Dataset.id).all() # Element input for payload elements = [] # Build dataset response for d in datasets: try: datsdataset = DATSDataset(d.fspath) except Exception as e: # If the DATS file can't be laoded, skip this dataset. # There should be an error message in the logs/update_datsets.log continue dataset = { "authorized": authorized, "id": d.dataset_id, "title": d.name.replace("'", ""), "isPrivate": d.is_private, "thumbnailURL": "/dataset_logo?id={}".format(d.dataset_id), "downloadPath": d.dataset_id, "URL": '?', "dateAdded": str(d.date_created.date()), "dateUpdated": str(d.date_updated.date()), "size": datsdataset.size, "files": datsdataset.fileCount, "subjects": datsdataset.subjectCount, "format": datsdataset.formats, "modalities": datsdataset.modalities, "sources": datsdataset.sources, "conpStatus": datsdataset.conpStatus, "authorizations": datsdataset.authorizations } elements.append(dataset) queryAll = bool(request.args.get('elements') == 'all') if (not queryAll): if request.args.get('modalities'): filterModalities = request.args.get('modalities').split(",") elements = list( filter(lambda e: e['modalities'] is not None, elements)) elements = list( filter( lambda e: all(item in e['modalities'].lower() for item in filterModalities), elements)) if request.args.get('formats'): filterFormats = request.args.get('formats') elements = list(filter(lambda e: e['format'] is not None, elements)) elements = list( filter( lambda e: all(item in e['format'].lower() for item in filterFormats), elements)) delta = int(request.args.get('max_per_page', 10)) * \ (int(request.args.get('page', 1)) - 1) cursor = max(min(int(request.args.get('cursor') or 0), 0), 0) + delta limit = max(min(int(request.args.get('limit') or 10), 10), 0) sort_key = request.args.get('sortKey') or "conpStatus" paginated = elements if (sort_key == "conpStatus"): order = {'conp': 0, 'canadian': 1, 'external': 2} paginated.sort(key=lambda o: order[o[sort_key].lower()]) elif (sort_key == "title"): paginated.sort(key=lambda o: o[sort_key].lower()) elif (sort_key == "size"): def getAbsoluteSize(e): if not e["size"]: return 0.0 units = ["KB", "MB", "GB", "TB"] unitScales = [1000, 1000**2, 1000**3, 1000**4] size = e["size"].split(" ") absoluteSize = size[0] if (size[1] in units): absoluteSize = float(size[0]) * unitScales[units.index( size[1])] return absoluteSize paginated.sort(key=lambda o: getAbsoluteSize(o), reverse=True) else: paginated.sort(key=lambda o: (o[sort_key] is None, o[sort_key])) paginated = paginated[(cursor):(cursor + limit)] else: paginated = elements # Construct payload payload = { "authorized": authorized, "total": len(elements), "sortKeys": [{ "key": "conpStatus", "label": "Origin" }, { "key": "title", "label": "Title" }, { "key": "dateAdded", "label": "Date Added" }, { "key": "dateUpdated", "label": "Date Updated" }, { "key": "size", "label": "Size" }, { "key": "files", "label": "Files" }, { "key": "subjects", "label": "Subjects" }], "elements": paginated } return json.dumps(payload)
def dataset_search(): """ Dataset Search Route This route executes a dataset search Args: search is the search term in the GET Request Retuns: JSON containing the matching datasets """ datasets = [] if current_user.is_authenticated: authorized = True else: authorized = False if request.args.get('search'): term = '%' + request.args.get('search') + '%' # Query datasets datasets = Dataset.query.filter( or_(func.lower(Dataset.name) .like(func.lower(term)), func.lower(Dataset.description) .like(func.lower(term))) ) elif request.args.get('id'): # Query datasets datasets = Dataset.query.filter_by(dataset_id=request.args.get('id')).all() else: # Query datasets datasets = Dataset.query.order_by(Dataset.id).all() # Element input for payload elements = [] # Build dataset response for d in datasets: try: datsdataset = DATSDataset(d.fspath) except Exception as e: # If the DATS file can't be laoded, skip this dataset. # There should be an error message in the logs/update_datsets.log continue dataset = { "authorized": authorized, "id": d.dataset_id, "title": d.name.replace("'", ""), "isPrivate": d.is_private, "thumbnailURL": "/dataset_logo?id={}".format(d.dataset_id), "downloadPath": d.dataset_id, "URL": '?', "dateAdded": str(d.date_created.date()), "dateUpdated": str(d.date_updated.date()), "size": datsdataset.size, "files": datsdataset.fileCount, "subjects": datsdataset.subjectCount, "format": datsdataset.formats, "modalities": datsdataset.modalities, "sources": datsdataset.sources, "conpStatus": datsdataset.conpStatus } elements.append(dataset) queryAll = bool(request.args.get('elements') == 'all') if(not queryAll): delta = int(request.args.get('max_per_page', 10)) * (int(request.args.get('page', 1)) - 1 ) cursor = max(min(int(request.args.get('cursor') or 0), 0), 0) + delta limit = max(min(int(request.args.get('limit') or 10), 10), 0) sort_key = request.args.get('sortKey') or "conpStatus" paginated = elements paginated.sort(key=lambda o: (o[sort_key] is None, o[sort_key])) paginated = paginated[(cursor):(cursor + limit)] else: paginated = elements # Construct payload payload = { "authorized": authorized, "total": len(elements), "sortKeys": [ { "key": "conpStatus", "label": "Origin" }, { "key": "title", "label": "Title" }, { "key": "dateAdded", "label": "Date Added" }, { "key": "dateUpdated", "label": "Date Updated" }, { "key": "size", "label": "Size" }, { "key": "files", "label": "Files" }, { "key": "subjects", "label": "Subjects" }, { "key": "format", "label": "Format" }, { "key": "modalities", "label": "Modalities" }, { "key": "sources", "label": "Sources" } ], "elements": paginated } return json.dumps(payload)
def dataset_search(): """ Dataset Search Route This route executes a dataset search Args: search is the search term in the GET Request Retuns: JSON containing the matching datasets """ datasets = [] if current_user.is_authenticated: authorized = True else: authorized = False if request.args.get('id'): # Query datasets datasets = Dataset.query.filter_by( dataset_id=request.args.get('id')).all() else: # Query datasets datasets = Dataset.query.order_by(Dataset.id).all() # Element input for payload elements = [] with open( os.path.join(os.getcwd(), "app/static/datasets/dataset-cbrain-ids.json"), "r") as f: cbrain_dataset_ids = json.load(f) f.close() # Get the number of views of datasets views = json.loads(datasets_views()) downloads = json.loads(datasets_downloads()) # Build dataset response for d in datasets: try: datsdataset = DATSDataset(d.fspath) except Exception: # If the DATS file can't be laoded, skip this dataset. # There should be an error message in the logs/update_datsets.log continue # If search term exists filter results here if request.args.get('search'): search_term = request.args.get('search') with open(datsdataset.DatsFilepath, 'r') as dats: match = False for line in dats.readlines(): if search_term.lower() in line.lower(): match = True break if not match: continue datasetTitle = d.name.replace("'", "") if datasetTitle in cbrain_dataset_ids.keys(): dataset_cbrain_id = cbrain_dataset_ids[datasetTitle] else: dataset_cbrain_id = "" views_nb = [ v["nb_hits"] for v in views if v["dataset_id"] == d.dataset_id ] download_id = os.path.basename(d.fspath) + "_version" downloads_nb = [ e["nb_hits"] for e in downloads if e["dataset_id"].startswith(download_id) ] ark_id_row = ArkId.query.filter_by(dataset_id=d.dataset_id).first() try: zipped = DatasetCache(current_app).getZipLocation(d) except IOError: zipped = None show_download_button = zipped is not None zip_location = '/data/{0}'.format(os.path.basename(zipped or '')) dataset = { "authorized": authorized, "ark_id": 'https://n2t.net/' + ark_id_row.ark_id, "id": d.dataset_id, "title": d.name.replace("'", "\'"), "remoteUrl": d.remoteUrl, "isPrivate": d.is_private, "thumbnailURL": "/dataset_logo?id={}".format(d.dataset_id), "downloadPath": d.dataset_id, "URL": '?', "downloads": downloads_nb, "views": views_nb, "dateAdded": str(d.date_added_to_portal.date()) if d.date_added_to_portal else None, "dateUpdated": str(d.date_updated.date()), "creators": datsdataset.creators, "origin": datsdataset.origin, "size": datsdataset.size, "files": datsdataset.fileCount, "subjects": datsdataset.subjectCount, "formats": datsdataset.formats, "modalities": datsdataset.modalities, "licenses": datsdataset.licenses, "version": datsdataset.version, "sources": datsdataset.sources, "conpStatus": datsdataset.conpStatus, "authorizations": datsdataset.authorizations, "principalInvestigators": datsdataset.principalInvestigators, "primaryPublications": datsdataset.primaryPublications, "logoFilepath": datsdataset.LogoFilepath, "status": datsdataset.status, "cbrain_id": dataset_cbrain_id, "showDownloadButton": show_download_button, "zipLocation": zip_location } elements.append(dataset) modalities = [] for e in elements: if e['modalities'] is None or e['modalities'] == '': continue for m in e['modalities']: modalities.append(m.lower()) modalities = sorted(list(set(modalities))) formats = [] # by default, formats should be represented in upper case # except for NIfTI, bigWig and RNA-Seq for e in elements: if e['formats'] is None or e['formats'] == []: continue for m in e['formats']: if m.lower() in ['nifti', 'nii', 'niigz']: formats.append('NIfTI') elif m.lower() in ['gifti', 'gii']: formats.append('GIfTI') elif m.lower() == 'bigwig': formats.append('bigWig') elif m.lower() == 'rna-seq': formats.append('RNA-Seq') else: formats.append(m.upper()) formats = sorted(list(set(formats)), key=str.casefold) authorizations = ['Yes', 'No'] query_all = bool(request.args.get('elements') == 'all') if not query_all: if request.args.get('modalities'): filter_modalities = request.args.get('modalities').split(",") elements = list( filter(lambda e: e['modalities'] is not None, elements)) elements = list( filter( lambda e: all(item in (m.lower() for m in e['modalities']) for item in filter_modalities), elements)) if request.args.get('formats'): filter_formats = request.args.get('formats').split(",") elements = list( filter(lambda e: e['formats'] is not None, elements)) elements = list( filter( lambda e: all(item.lower() in (f.lower() for f in e['formats']) for item in filter_formats), elements)) if request.args.get('authorizations'): filter_auth = request.args.get('authorizations').split(',') elements = list( filter(lambda e: e['authorizations'] is not None, elements)) for item in filter_auth: if item == "Yes": elements = list( filter( lambda e: e['authorizations'] in ['private', 'registered'], elements)) if item == "No": elements = list( filter( lambda e: e['authorizations'] not in ['private', 'registered'], elements)) if request.args.get('cbrain'): elements = list(filter(lambda e: e['cbrain_id'] != '', elements)) cursor = None limit = None if request.args.get('max_per_page') != 'All': delta = int(request.args.get('max_per_page', 10)) * \ (int(request.args.get('page', 1)) - 1) cursor = max(min(int(request.args.get('cursor') or 0), 0), 0) + delta limit = int(request.args.get('limit') or 10) sort_key = request.args.get('sortKey') or "conpStatus" paginated = elements if sort_key == "conpStatus": order = {'conp': 0, 'canadian': 1, 'external': 2} paginated.sort(key=lambda o: (o[sort_key].lower( ) not in order, order.get(o[sort_key].lower(), None))) elif sort_key == "title": paginated.sort(key=lambda o: o[sort_key].lower()) elif sort_key == "sizeDes" or sort_key == "sizeAsc": def get_absolute_size(o): if not o["size"]: return 0.0 units = ["KB", "MB", "GB", "TB"] unit_scales = [1000, 1000**2, 1000**3, 1000**4] size = o["size"].split(" ") absolute_size = size[0] if size[1] in units: absolute_size = float(size[0]) * \ unit_scales[units.index(size[1])] return absolute_size reverse = (sort_key == 'sizeDes') paginated.sort(key=lambda o: get_absolute_size(o), reverse=reverse) elif sort_key == "filesDes" or sort_key == "filesAsc": def get_number_of_files(o): if not o["files"]: return 0 return int(o["files"]) reverse = (sort_key == 'filesDes') paginated.sort(key=lambda o: get_number_of_files(o), reverse=reverse) elif sort_key == "subjectsDes" or sort_key == "subjectsAsc": def get_number_of_subjects(o): if not o["subjects"]: return 0 return int(o["subjects"]) reverse = (sort_key == 'subjectsDes') paginated.sort(key=lambda o: get_number_of_subjects(o), reverse=reverse) elif sort_key == "dateAddedDesc" or sort_key == "dateAddedAsc": reverse = (sort_key == 'dateAddedAsc') paginated.sort(key=lambda o: (o["dateAdded"] is None, o["dateAdded"]), reverse=reverse) elif sort_key == "dateUpdatedDesc" or sort_key == "dateUpdatedAsc": reverse = (sort_key == 'dateUpdatedAsc') paginated.sort(key=lambda o: (o["dateUpdated"] is None, o["dateUpdated"]), reverse=reverse) elif sort_key == "viewsDes" or sort_key == "viewsAsc": reverse = (sort_key == "viewsDes") paginated.sort(key=lambda o: (o["views"] is None, o["views"]), reverse=reverse) elif sort_key == "downloadsDes" or sort_key == "downloadsAsc": reverse = (sort_key == "downloadsDes") paginated.sort(key=lambda o: (o["downloads"] is None, o["downloads"]), reverse=reverse) else: paginated.sort(key=lambda o: (o[sort_key] is None, o[sort_key])) if cursor is not None and limit is not None: paginated = paginated[cursor:(cursor + limit)] else: paginated = elements # Construct payload payload = { "authorized": authorized, "total": len(elements), "sortKeys": [{ "key": "conpStatus", "label": "Origin" }, { "key": "title", "label": "Dataset Name" }, { "key": "dateAddedAsc", "label": "Date Added (Newest FIrst)" }, { "key": "dateAddedDesc", "label": "Date Added (Oldest First)" }, { "key": "dateUpdatedAsc", "label": "Date Updated (Newest First)" }, { "key": "dateUpdatedDesc", "label": "Date Updated (Oldest First)" }, { "key": "sizeDes", "label": "Disk Space Usage (Largest First)" }, { "key": "sizeAsc", "label": "Disk Space Usage (Smallest First)" }, { "key": "filesDes", "label": "Number of Files (Largest First)" }, { "key": "filesAsc", "label": "Number of Files (Smallest First)" }, { "key": "subjectsDes", "label": "Number of Subjects (Largest First)" }, { "key": "subjectsAsc", "label": "Number of Subjects (Smallest First)" }, { "key": "viewsDes", "label": "Number of Views (Largest First)" }, { "key": "viewsAsc", "label": "Number of Views (Smallest First)" }, { "key": "downloadsDes", "label": "Number of Direct Downloads (Largest First)" }, { "key": "downloadsAsc", "label": "Number of Direct Downloads (Smallest First)" }], "filterKeys": [{ "key": "modalities", "values": modalities }, { "key": "formats", "values": formats }, { "key": "authorizations", "values": authorizations }], "elements": paginated } return json.dumps(payload)