def initilize_publication(publication, status='publishing', revision=None, revision_text=None, revision_titles=None): """initilize publication.""" publication['projectId'] = publication['project']['value']['projectId'] publication['status'] = status publication['version'] = 2 publication['licenses'] = publication.pop('license', []) publication['license'] = '' es_client = new_es_client() if revision: base_pub = IndexedPublication.from_id(publication['projectId'], revision=None, using=es_client) publication['created'] = base_pub['created'] publication['revision'] = revision publication['revisionDate'] = datetime.datetime.now().isoformat() publication['revisionText'] = revision_text if revision_titles: publication['revisionTitles'] = revision_titles elif 'created' not in publication: publication['created'] = datetime.datetime.now().isoformat() try: pub = IndexedPublication.from_id(publication['projectId'], revision=revision, using=es_client) pub.update(using=es_client, **publication) except DocumentNotFound: pub = IndexedPublication(project_id=publication['projectId'], **publication) pub.save(using=es_client) pub.save(using=es_client) # Refresh index so that search works in subsequent pipeline operations. IndexedPublication._index.refresh(using=es_client) return pub
def save_publication(publication, status='publishing'): """Save publication.""" publication['projectId'] = publication['project']['value']['projectId'] publication['created'] = datetime.datetime.now().isoformat() publication['status'] = status publication['version'] = 2 publication['licenses'] = publication.pop('license', []) publication['license'] = '' try: pub = IndexedPublication.from_id(publication['projectId']) pub.update(**publication) except DocumentNotFound: pub = IndexedPublication(project_id=publication['projectId'], **publication) pub.save() pub.save() return pub
def generate_manifest_other(project_id, version=None): doc = IndexedPublication.from_id(project_id, revision=version) uuid = doc.project.uuid if version: project_id = '{}v{}'.format(project_id, str(version)) manifest = [] archive_path = os.path.join(PUBLICATIONS_MOUNT_ROOT, project_id) for path in get_child_paths(archive_path): manifest.append({ 'parent_entity': uuid, 'corral_path': path, 'checksum': get_sha1_hash(path) }) return manifest
def to_dataset_json(self): """ Serialize project to json for google dataset search https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/BMNJPS """ dataset_json = { "@context": "http://schema.org", "@type": "Dataset", "@id": "", "identifier": "", "logo": "https://www.designsafe-ci.org/static/images/nsf-designsafe-logo.014999b259f6.png", "name": self.title, "creator": [{ "name": "", "affiliation": "", "@id": "", "identifier": "" }], "author": [{ "name": "", "affiliation": "", "@id": "", "identifier": "" }], "datePublished": self.created, "dateModified": self.to_body_dict()['lastUpdated'], "description": self.description, "keywords": self.keywords.split(','), "license": { "@type": "Dataset", "text": "" }, "publisher": { "@type": "Organization", "name": "Designsafe-CI" }, "provider": { "@type": "Organization", "name": "Designsafe-CI" }, "includedInDataCatalog": { "@type": "Organization", "name": "Designsafe-CI", "url": "https://designsafe-ci.org" }, } if self.dois: dataset_json["distribution"] = { "@type": "DataDownload", "name": self.to_body_dict()['value']['projectId'] + "_archive.zip", "fileFormat": "application/zip", "contentSize": "", "@id": "", "identifier": "" } if self.dois: dataset_json['@id'] = self.dois[0] dataset_json['identifier'] = self.dois[0] else: related_ents = self.related_entities() logger.debug(related_ents) if getattr(self, 'team_order', False): authors = sorted(self.team_order, key=lambda x: x['order']) else: authors = [{ 'name': username } for username in [self.pi] + self.co_pis] dataset_json['creator'] = generate_creators(authors) dataset_json['author'] = generate_creators(authors) try: pub = IndexedPublication.from_id(self.project_id) dataset_json['license'] = pub.licenses.works except (DocumentNotFound, AttributeError): pass try: pub = IndexedPublicationLegacy.from_id(self.project_id) dataset_json['license'] = pub.licenses.works except DocumentNotFound: pass return dataset_json
def walk_experimental(project_id, version=None): """ Walk an experimental project and reconstruct parent/child relationships Params ------ project_id: Project ID to look up (e.g. PRJ-1234) Returns ------- dict: dict in form {'uuid-ex-1': {'children': ['title of child 1', ...], 'parent': 'title of parent', 'container_path': 'path/relative/to/fcroot', 'fedora_mapping': {}}} """ from urllib import parse doc = IndexedPublication.from_id(project_id, revision=version) relation_map = [] project_meta = format_metadata_for_fedora(project_id, version=version) if version: project_id = '{}v{}'.format(project_id, str(version)) license = project_meta.get('license', None) full_author_list = [] project_map = { 'uuid': doc.project.uuid, 'container_path': project_id, 'fedora_mapping': { **project_meta, 'generated': [], 'license': None }, 'fileObjs': [] } experiments_list = doc.experimentsList for expt in experiments_list: # Do stuff with experiment. expt_container_path = "{}/{}".format(project_id, parse.quote(expt.value.title)) print('experiment ' + expt.value.title) exp_doi = expt.doi project_map['fedora_mapping']['generated'].append( 'Experiment: {}'.format(exp_doi)) experiment_map = { 'uuid': expt.uuid, 'container_path': expt_container_path, 'fedora_mapping': { **format_experiment(expt), 'license': license, 'wasGeneratedBy': project_id, 'generated': [] }, 'fileObjs': expt.fileObjs } full_author_list += experiment_map['fedora_mapping']['creator'] reports = filter(lambda report: expt.uuid in report.value.experiments, getattr(doc, 'reportsList', [])) for report in reports: # Do stuff with report. report_container_path = "{}/{}".format( expt_container_path, parse.quote(report.value.title)) print('\treport ' + report.value.title) experiment_map['fedora_mapping']['generated'].append( 'Report: {}'.format(report.value.title)) report_map = { 'uuid': report.uuid, 'fileObjs': report.fileObjs, 'container_path': report_container_path, 'fedora_mapping': { **format_report(report), 'wasGeneratedBy': 'Experiment: {}'.format(exp_doi) } } relation_map.append(report_map) analysis_list = filter( lambda analysis: expt.uuid in analysis.value.experiments, getattr(doc, 'analysisList', [])) for analysis in analysis_list: # Do stuff with analysis. analysis_container_path = "{}/{}".format( expt_container_path, parse.quote(analysis.value.title)) print('\tanalysis ' + analysis.value.title) experiment_map['fedora_mapping']['generated'].append( 'Analysis: {}'.format(analysis.value.title)) analysis_map = { 'uuid': analysis.uuid, 'fileObjs': analysis.fileObjs, 'container_path': analysis_container_path, 'fedora_mapping': { **format_analysis(analysis), 'wasGeneratedBy': 'Experiment: {}'.format(exp_doi) } } relation_map.append(analysis_map) model_configs = filter( lambda model_config: expt.uuid in model_config.value.experiments, getattr(doc, 'modelConfigs', [])) for mc in model_configs: # Do stuff with model config. configs_container_path = "{}/{}".format( expt_container_path, parse.quote(mc.value.title)) print('\tmodel config ' + mc.value.title) experiment_map['fedora_mapping']['generated'].append( 'Model Configuration: {}'.format(mc.value.title)) mc_map = { 'uuid': mc.uuid, 'fileObjs': mc.fileObjs, 'container_path': configs_container_path, 'fedora_mapping': { **format_model_config(mc), 'wasGeneratedBy': exp_doi } } sensor_lists = filter( lambda sensor_list: mc.uuid in sensor_list.value.modelConfigs and expt.uuid in sensor_list.associationIds, getattr(doc, 'sensorLists', [])) for sl in sensor_lists: # Do stuff with sensor list. sl_container_path = "{}/{}".format(configs_container_path, parse.quote(sl.value.title)) print('\t\tsensor list ' + sl.value.title) experiment_map['fedora_mapping']['generated'].append( 'Sensor: {}'.format(sl.value.title)) sl_map = { 'uuid': sl.uuid, 'fileObjs': sl.fileObjs, 'container_path': sl_container_path, 'fedora_mapping': { **format_sensor_info(sl), 'wasGeneratedBy': 'Experiment: {}'.format(exp_doi), 'wasDerivedFrom': 'Model Configuration: {}'.format(mc.value.title), 'influenced': [] } } events = filter( lambda event: sl.uuid in event.value.sensorLists and expt. uuid in event.associationIds and mc.uuid in event. associationIds, getattr(doc, 'eventsList', [])) for event in events: # Do stuff with events. evt_container_path = "{}/{}".format( sl_container_path, parse.quote(event.value.title)) print('\t\t\tevent ' + event.value.title) sl_map['fedora_mapping']['influenced'].append( 'Event: {}'.format(event.value.title)) experiment_map['fedora_mapping']['generated'].append( 'Event: {}'.format(event.value.title)) event_map = { 'uuid': event.uuid, 'fileObjs': event.fileObjs, 'container_path': evt_container_path, 'fedora_mapping': { **format_event(event), 'wasGeneratedBy': 'Experiment: {}'.format(exp_doi), 'wasDerivedFrom': 'Model Configuration: {}'.format(mc.value.title), 'wasInfluencedBy': 'Sensor: {}'.format(sl.value.title) } } relation_map.append(event_map) relation_map.append(sl_map) relation_map.append(mc_map) relation_map.append(experiment_map) project_map['fedora_mapping']['creator'] = list(set(full_author_list)) relation_map.append(project_map) return relation_map[::-1]
def format_metadata_for_fedora(project_id, version=None): """ Format a publication's metadata so that it can be ingested into Fedora. """ doc = IndexedPublication.from_id(project_id, revision=version) pub_meta = doc.project.value author_list = [] try: ordered_team = sorted(pub_meta.teamOrder, key=lambda member: member.order) author_list = list( map(lambda member: "{}, {}".format(member.lname, member.fname), ordered_team)) except AttributeError: author_list = [_get_user_by_username(doc, pub_meta.pi)] award_numbers = getattr(pub_meta, 'awardNumbers', []) contributors = [] for award in award_numbers: contributors.append(award['name'] or None) contributors.append(award['number'] or None) identifiers = [ pub_meta.projectId, 'https://www.designsafe-ci.org/' 'data/browser/public/designsafe.storage.published/{}'.format( pub_meta.projectId), doc.project.uuid ] identifiers += getattr(pub_meta, 'dois', []) identifiers += [getattr(doc.project, 'doi', None)] project_type = pub_meta.projectType if project_type == 'other': project_type = getattr(pub_meta, 'dataType', "other"), fc_meta = { 'title': pub_meta.title, 'entity': 'Project', 'description': pub_meta.description, 'identifier': identifiers, 'subject': pub_meta.keywords.split(', '), 'creator': author_list, 'issued': doc.project.created.isoformat(), 'contributor': contributors, 'type': project_type, 'publisher': 'Designsafe', } licenses = getattr(doc, 'licenses', None) if licenses: fc_meta['license'] = list(licenses.to_dict().values()) associated_projects = getattr(pub_meta, 'associatedProjects', None) if associated_projects: references = list( map(lambda assoc: assoc['title'], associated_projects)) try: relation = list( map(lambda assoc: assoc['href'], associated_projects)) except KeyError: relation = [] fc_meta['references'] = references fc_meta['relation'] = relation return fc_meta
def to_dataset_json(self, **kwargs): """ Serialize project to json for google dataset search https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/BMNJPS """ dataset_json = { "@context": "http://schema.org", "@type": "Dataset", "@id": "", "identifier": "", "logo": "https://www.designsafe-ci.org/static/images/nsf-designsafe-logo.014999b259f6.png", "name": self.title, "creator": [{ "name": "", "affiliation": "", "@id": "", "identifier": "" }], "author": [{ "name": "", "affiliation": "", "@id": "", "identifier": "" }], "datePublished": self.created, "dateModified": self.to_body_dict()['lastUpdated'], "description": self.description, "keywords": self.keywords.split(','), "license": { "@type": "CreativeWork", "license": "", "url": "" }, "publisher": { "@type": "Organization", "name": "Designsafe-CI", "url": "https://designsafe-ci.org" }, "provider": { "@type": "Organization", "name": "Designsafe-CI" }, "includedInDataCatalog": { "@type": "DataCatalog", "name": "Designsafe-CI", "url": "https://designsafe-ci.org" }, } if getattr(self, 'team_order', False): authors = sorted(self.team_order, key=lambda x: x['order']) else: authors = [{ 'name': username } for username in [self.pi] + self.co_pis] dataset_json['creator'] = generate_creators(authors) dataset_json['author'] = generate_creators(authors) try: pub = IndexedPublication.from_id(self.project_id) license_info = generate_licenses(pub) dataset_json['license'] = license_info[0]["url"] except (DocumentNotFound, AttributeError): pass if self.dois: dataset_json['@id'] = self.dois[0] dataset_json['identifier'] = self.dois[0] dataset_json["distribution"] = { "@context": "http://schema.org", "@type": "Dataset", "@id": "", "identifier": "", "logo": "https://www.designsafe-ci.org/static/images/nsf-designsafe-logo.014999b259f6.png", "name": self.title, "creator": [{ "name": "", "affiliation": "", "@id": "", "identifier": "" }], "author": [{ "name": "", "affiliation": "", "@id": "", "identifier": "" }], "datePublished": self.created, "dateModified": self.to_body_dict()['lastUpdated'], "description": self.description, "keywords": self.keywords.split(','), "license": { "@type": "CreativeWork", "license": "", "url": "" }, "publisher": { "@type": "Organization", "name": "Designsafe-CI", "url": "https://designsafe-ci.org" }, "provider": { "@type": "Organization", "name": "Designsafe-CI" }, "includedInDataCatalog": { "@type": "DataCatalog", "name": "Designsafe-CI", "url": "https://designsafe-ci.org" }, } else: related_ents = self.related_entities() for i in range(len(related_ents)): if hasattr(related_ents[i], 'dois') and related_ents[i].dois: dataset_json['relatedIdentifier_' + str(i)] = { "@context": "http://schema.org", "@type": "Dataset", "@id": "", "identifier": "", "logo": "https://www.designsafe-ci.org/static/images/nsf-designsafe-logo.014999b259f6.png", "name": related_ents[i].title, "creator": [{ "name": "", "affiliation": "", "@id": "", "identifier": "" }], "author": [{ "name": "", "affiliation": "", "@id": "", "identifier": "" }], "datePublished": related_ents[i].created, "dateModified": related_ents[i].to_body_dict()['lastUpdated'], "description": related_ents[i].description, "license": { "@type": "CreativeWork", "license": "", "url": "" }, "publisher": { "@type": "Organization", "name": "Designsafe-CI" }, "provider": { "@type": "Organization", "name": "Designsafe-CI" }, "includedInDataCatalog": { "@type": "DataCatalog", "name": "Designsafe-CI", "url": "https://designsafe-ci.org" }, } dataset_json['relatedIdentifier_' + str(i)]['@id'] = related_ents[i].dois[0] dataset_json[ 'relatedIdentifier_' + str(i)]['identifier'] = related_ents[i].dois[0] if getattr(related_ents[i], 'team_order', False): authors = sorted(related_ents[i].team_order, key=lambda x: x['order']) else: authors = [{ 'name': username } for username in [self.pi] + self.co_pis] dataset_json['relatedIdentifier_' + str(i)]['creator'] = generate_creators( authors) dataset_json['relatedIdentifier_' + str(i)]['author'] = generate_creators(authors) try: dataset_json[ 'relatedIdentifier_' + str(i)]['license'] = dataset_json['license'] except (DocumentNotFound, AttributeError): pass return dataset_json