Example #1
0
def initilize_publication(publication,
                          status='publishing',
                          revision=None,
                          revision_text=None,
                          revision_titles=None):
    """initilize publication."""
    publication['projectId'] = publication['project']['value']['projectId']
    publication['status'] = status
    publication['version'] = 2
    publication['licenses'] = publication.pop('license', [])
    publication['license'] = ''
    es_client = new_es_client()
    if revision:
        base_pub = IndexedPublication.from_id(publication['projectId'],
                                              revision=None,
                                              using=es_client)
        publication['created'] = base_pub['created']
        publication['revision'] = revision
        publication['revisionDate'] = datetime.datetime.now().isoformat()
        publication['revisionText'] = revision_text
        if revision_titles:
            publication['revisionTitles'] = revision_titles
    elif 'created' not in publication:
        publication['created'] = datetime.datetime.now().isoformat()
    try:
        pub = IndexedPublication.from_id(publication['projectId'],
                                         revision=revision,
                                         using=es_client)
        pub.update(using=es_client, **publication)
    except DocumentNotFound:
        pub = IndexedPublication(project_id=publication['projectId'],
                                 **publication)
        pub.save(using=es_client)
    pub.save(using=es_client)

    # Refresh index so that search works in subsequent pipeline operations.
    IndexedPublication._index.refresh(using=es_client)
    return pub
Example #2
0
def save_publication(publication, status='publishing'):
    """Save publication."""
    publication['projectId'] = publication['project']['value']['projectId']
    publication['created'] = datetime.datetime.now().isoformat()
    publication['status'] = status
    publication['version'] = 2
    publication['licenses'] = publication.pop('license', [])
    publication['license'] = ''
    try:
        pub = IndexedPublication.from_id(publication['projectId'])
        pub.update(**publication)
    except DocumentNotFound:
        pub = IndexedPublication(project_id=publication['projectId'],
                                 **publication)
        pub.save()
    pub.save()
    return pub
def generate_manifest_other(project_id, version=None):
    doc = IndexedPublication.from_id(project_id, revision=version)
    uuid = doc.project.uuid

    if version:
        project_id = '{}v{}'.format(project_id, str(version))
    manifest = []
    archive_path = os.path.join(PUBLICATIONS_MOUNT_ROOT, project_id)

    for path in get_child_paths(archive_path):
        manifest.append({
            'parent_entity': uuid,
            'corral_path': path,
            'checksum': get_sha1_hash(path)
        })

    return manifest
Example #4
0
    def to_dataset_json(self):
        """
        Serialize project to json for google dataset search
        https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/BMNJPS
        """

        dataset_json = {
            "@context":
            "http://schema.org",
            "@type":
            "Dataset",
            "@id":
            "",
            "identifier":
            "",
            "logo":
            "https://www.designsafe-ci.org/static/images/nsf-designsafe-logo.014999b259f6.png",
            "name":
            self.title,
            "creator": [{
                "name": "",
                "affiliation": "",
                "@id": "",
                "identifier": ""
            }],
            "author": [{
                "name": "",
                "affiliation": "",
                "@id": "",
                "identifier": ""
            }],
            "datePublished":
            self.created,
            "dateModified":
            self.to_body_dict()['lastUpdated'],
            "description":
            self.description,
            "keywords":
            self.keywords.split(','),
            "license": {
                "@type": "Dataset",
                "text": ""
            },
            "publisher": {
                "@type": "Organization",
                "name": "Designsafe-CI"
            },
            "provider": {
                "@type": "Organization",
                "name": "Designsafe-CI"
            },
            "includedInDataCatalog": {
                "@type": "Organization",
                "name": "Designsafe-CI",
                "url": "https://designsafe-ci.org"
            },
        }
        if self.dois:
            dataset_json["distribution"] = {
                "@type": "DataDownload",
                "name":
                self.to_body_dict()['value']['projectId'] + "_archive.zip",
                "fileFormat": "application/zip",
                "contentSize": "",
                "@id": "",
                "identifier": ""
            }

        if self.dois:
            dataset_json['@id'] = self.dois[0]
            dataset_json['identifier'] = self.dois[0]
        else:
            related_ents = self.related_entities()
            logger.debug(related_ents)
        if getattr(self, 'team_order', False):
            authors = sorted(self.team_order, key=lambda x: x['order'])
        else:
            authors = [{
                'name': username
            } for username in [self.pi] + self.co_pis]
        dataset_json['creator'] = generate_creators(authors)
        dataset_json['author'] = generate_creators(authors)
        try:
            pub = IndexedPublication.from_id(self.project_id)
            dataset_json['license'] = pub.licenses.works
        except (DocumentNotFound, AttributeError):
            pass
        try:
            pub = IndexedPublicationLegacy.from_id(self.project_id)
            dataset_json['license'] = pub.licenses.works
        except DocumentNotFound:
            pass

        return dataset_json
def walk_experimental(project_id, version=None):
    """
    Walk an experimental project and reconstruct parent/child relationships

    Params
    ------
    project_id: Project ID to look up (e.g. PRJ-1234)

    Returns
    -------
    dict: dict in form {'uuid-ex-1':
                        {'children': ['title of child 1', ...],
                        'parent': 'title of parent',
                        'container_path': 'path/relative/to/fcroot',
                        'fedora_mapping': {}}}
    """
    from urllib import parse
    doc = IndexedPublication.from_id(project_id, revision=version)
    relation_map = []

    project_meta = format_metadata_for_fedora(project_id, version=version)
    if version:
        project_id = '{}v{}'.format(project_id, str(version))
    license = project_meta.get('license', None)
    full_author_list = []
    project_map = {
        'uuid': doc.project.uuid,
        'container_path': project_id,
        'fedora_mapping': {
            **project_meta, 'generated': [],
            'license': None
        },
        'fileObjs': []
    }

    experiments_list = doc.experimentsList
    for expt in experiments_list:
        # Do stuff with experiment.
        expt_container_path = "{}/{}".format(project_id,
                                             parse.quote(expt.value.title))
        print('experiment ' + expt.value.title)
        exp_doi = expt.doi
        project_map['fedora_mapping']['generated'].append(
            'Experiment: {}'.format(exp_doi))

        experiment_map = {
            'uuid': expt.uuid,
            'container_path': expt_container_path,
            'fedora_mapping': {
                **format_experiment(expt), 'license': license,
                'wasGeneratedBy': project_id,
                'generated': []
            },
            'fileObjs': expt.fileObjs
        }

        full_author_list += experiment_map['fedora_mapping']['creator']

        reports = filter(lambda report: expt.uuid in report.value.experiments,
                         getattr(doc, 'reportsList', []))
        for report in reports:
            # Do stuff with report.
            report_container_path = "{}/{}".format(
                expt_container_path, parse.quote(report.value.title))
            print('\treport ' + report.value.title)
            experiment_map['fedora_mapping']['generated'].append(
                'Report: {}'.format(report.value.title))

            report_map = {
                'uuid': report.uuid,
                'fileObjs': report.fileObjs,
                'container_path': report_container_path,
                'fedora_mapping': {
                    **format_report(report), 'wasGeneratedBy':
                    'Experiment: {}'.format(exp_doi)
                }
            }
            relation_map.append(report_map)

        analysis_list = filter(
            lambda analysis: expt.uuid in analysis.value.experiments,
            getattr(doc, 'analysisList', []))
        for analysis in analysis_list:
            # Do stuff with analysis.
            analysis_container_path = "{}/{}".format(
                expt_container_path, parse.quote(analysis.value.title))
            print('\tanalysis ' + analysis.value.title)
            experiment_map['fedora_mapping']['generated'].append(
                'Analysis: {}'.format(analysis.value.title))

            analysis_map = {
                'uuid': analysis.uuid,
                'fileObjs': analysis.fileObjs,
                'container_path': analysis_container_path,
                'fedora_mapping': {
                    **format_analysis(analysis), 'wasGeneratedBy':
                    'Experiment: {}'.format(exp_doi)
                }
            }
            relation_map.append(analysis_map)

        model_configs = filter(
            lambda model_config: expt.uuid in model_config.value.experiments,
            getattr(doc, 'modelConfigs', []))
        for mc in model_configs:
            # Do stuff with model config.
            configs_container_path = "{}/{}".format(
                expt_container_path, parse.quote(mc.value.title))
            print('\tmodel config ' + mc.value.title)
            experiment_map['fedora_mapping']['generated'].append(
                'Model Configuration: {}'.format(mc.value.title))

            mc_map = {
                'uuid': mc.uuid,
                'fileObjs': mc.fileObjs,
                'container_path': configs_container_path,
                'fedora_mapping': {
                    **format_model_config(mc), 'wasGeneratedBy': exp_doi
                }
            }

            sensor_lists = filter(
                lambda sensor_list: mc.uuid in sensor_list.value.modelConfigs
                and expt.uuid in sensor_list.associationIds,
                getattr(doc, 'sensorLists', []))
            for sl in sensor_lists:
                # Do stuff with sensor list.
                sl_container_path = "{}/{}".format(configs_container_path,
                                                   parse.quote(sl.value.title))
                print('\t\tsensor list ' + sl.value.title)
                experiment_map['fedora_mapping']['generated'].append(
                    'Sensor: {}'.format(sl.value.title))

                sl_map = {
                    'uuid': sl.uuid,
                    'fileObjs': sl.fileObjs,
                    'container_path': sl_container_path,
                    'fedora_mapping': {
                        **format_sensor_info(sl), 'wasGeneratedBy':
                        'Experiment: {}'.format(exp_doi),
                        'wasDerivedFrom':
                        'Model Configuration: {}'.format(mc.value.title),
                        'influenced': []
                    }
                }

                events = filter(
                    lambda event: sl.uuid in event.value.sensorLists and expt.
                    uuid in event.associationIds and mc.uuid in event.
                    associationIds, getattr(doc, 'eventsList', []))
                for event in events:
                    # Do stuff with events.
                    evt_container_path = "{}/{}".format(
                        sl_container_path, parse.quote(event.value.title))
                    print('\t\t\tevent ' + event.value.title)
                    sl_map['fedora_mapping']['influenced'].append(
                        'Event: {}'.format(event.value.title))
                    experiment_map['fedora_mapping']['generated'].append(
                        'Event: {}'.format(event.value.title))

                    event_map = {
                        'uuid': event.uuid,
                        'fileObjs': event.fileObjs,
                        'container_path': evt_container_path,
                        'fedora_mapping': {
                            **format_event(event), 'wasGeneratedBy':
                            'Experiment: {}'.format(exp_doi),
                            'wasDerivedFrom':
                            'Model Configuration: {}'.format(mc.value.title),
                            'wasInfluencedBy':
                            'Sensor: {}'.format(sl.value.title)
                        }
                    }
                    relation_map.append(event_map)
                relation_map.append(sl_map)
            relation_map.append(mc_map)
        relation_map.append(experiment_map)
    project_map['fedora_mapping']['creator'] = list(set(full_author_list))
    relation_map.append(project_map)

    return relation_map[::-1]
def format_metadata_for_fedora(project_id, version=None):
    """
    Format a publication's metadata so that it can be ingested into Fedora.
    """
    doc = IndexedPublication.from_id(project_id, revision=version)
    pub_meta = doc.project.value

    author_list = []
    try:
        ordered_team = sorted(pub_meta.teamOrder,
                              key=lambda member: member.order)
        author_list = list(
            map(lambda member: "{}, {}".format(member.lname, member.fname),
                ordered_team))
    except AttributeError:
        author_list = [_get_user_by_username(doc, pub_meta.pi)]

    award_numbers = getattr(pub_meta, 'awardNumbers', [])
    contributors = []
    for award in award_numbers:
        contributors.append(award['name'] or None)
        contributors.append(award['number'] or None)

    identifiers = [
        pub_meta.projectId, 'https://www.designsafe-ci.org/'
        'data/browser/public/designsafe.storage.published/{}'.format(
            pub_meta.projectId), doc.project.uuid
    ]
    identifiers += getattr(pub_meta, 'dois', [])
    identifiers += [getattr(doc.project, 'doi', None)]

    project_type = pub_meta.projectType
    if project_type == 'other':
        project_type = getattr(pub_meta, 'dataType', "other"),

    fc_meta = {
        'title': pub_meta.title,
        'entity': 'Project',
        'description': pub_meta.description,
        'identifier': identifiers,
        'subject': pub_meta.keywords.split(', '),
        'creator': author_list,
        'issued': doc.project.created.isoformat(),
        'contributor': contributors,
        'type': project_type,
        'publisher': 'Designsafe',
    }

    licenses = getattr(doc, 'licenses', None)
    if licenses:
        fc_meta['license'] = list(licenses.to_dict().values())

    associated_projects = getattr(pub_meta, 'associatedProjects', None)
    if associated_projects:
        references = list(
            map(lambda assoc: assoc['title'], associated_projects))
        try:
            relation = list(
                map(lambda assoc: assoc['href'], associated_projects))
        except KeyError:
            relation = []

        fc_meta['references'] = references
        fc_meta['relation'] = relation

    return fc_meta
Example #7
0
    def to_dataset_json(self, **kwargs):
        """
        Serialize project to json for google dataset search
        https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/BMNJPS
        """
        dataset_json = {
            "@context":
            "http://schema.org",
            "@type":
            "Dataset",
            "@id":
            "",
            "identifier":
            "",
            "logo":
            "https://www.designsafe-ci.org/static/images/nsf-designsafe-logo.014999b259f6.png",
            "name":
            self.title,
            "creator": [{
                "name": "",
                "affiliation": "",
                "@id": "",
                "identifier": ""
            }],
            "author": [{
                "name": "",
                "affiliation": "",
                "@id": "",
                "identifier": ""
            }],
            "datePublished":
            self.created,
            "dateModified":
            self.to_body_dict()['lastUpdated'],
            "description":
            self.description,
            "keywords":
            self.keywords.split(','),
            "license": {
                "@type": "CreativeWork",
                "license": "",
                "url": ""
            },
            "publisher": {
                "@type": "Organization",
                "name": "Designsafe-CI",
                "url": "https://designsafe-ci.org"
            },
            "provider": {
                "@type": "Organization",
                "name": "Designsafe-CI"
            },
            "includedInDataCatalog": {
                "@type": "DataCatalog",
                "name": "Designsafe-CI",
                "url": "https://designsafe-ci.org"
            },
        }

        if getattr(self, 'team_order', False):
            authors = sorted(self.team_order, key=lambda x: x['order'])
        else:
            authors = [{
                'name': username
            } for username in [self.pi] + self.co_pis]
        dataset_json['creator'] = generate_creators(authors)
        dataset_json['author'] = generate_creators(authors)
        try:
            pub = IndexedPublication.from_id(self.project_id)
            license_info = generate_licenses(pub)
            dataset_json['license'] = license_info[0]["url"]
        except (DocumentNotFound, AttributeError):
            pass

        if self.dois:
            dataset_json['@id'] = self.dois[0]
            dataset_json['identifier'] = self.dois[0]

            dataset_json["distribution"] = {
                "@context":
                "http://schema.org",
                "@type":
                "Dataset",
                "@id":
                "",
                "identifier":
                "",
                "logo":
                "https://www.designsafe-ci.org/static/images/nsf-designsafe-logo.014999b259f6.png",
                "name":
                self.title,
                "creator": [{
                    "name": "",
                    "affiliation": "",
                    "@id": "",
                    "identifier": ""
                }],
                "author": [{
                    "name": "",
                    "affiliation": "",
                    "@id": "",
                    "identifier": ""
                }],
                "datePublished":
                self.created,
                "dateModified":
                self.to_body_dict()['lastUpdated'],
                "description":
                self.description,
                "keywords":
                self.keywords.split(','),
                "license": {
                    "@type": "CreativeWork",
                    "license": "",
                    "url": ""
                },
                "publisher": {
                    "@type": "Organization",
                    "name": "Designsafe-CI",
                    "url": "https://designsafe-ci.org"
                },
                "provider": {
                    "@type": "Organization",
                    "name": "Designsafe-CI"
                },
                "includedInDataCatalog": {
                    "@type": "DataCatalog",
                    "name": "Designsafe-CI",
                    "url": "https://designsafe-ci.org"
                },
            }

        else:
            related_ents = self.related_entities()

            for i in range(len(related_ents)):
                if hasattr(related_ents[i], 'dois') and related_ents[i].dois:
                    dataset_json['relatedIdentifier_' + str(i)] = {
                        "@context":
                        "http://schema.org",
                        "@type":
                        "Dataset",
                        "@id":
                        "",
                        "identifier":
                        "",
                        "logo":
                        "https://www.designsafe-ci.org/static/images/nsf-designsafe-logo.014999b259f6.png",
                        "name":
                        related_ents[i].title,
                        "creator": [{
                            "name": "",
                            "affiliation": "",
                            "@id": "",
                            "identifier": ""
                        }],
                        "author": [{
                            "name": "",
                            "affiliation": "",
                            "@id": "",
                            "identifier": ""
                        }],
                        "datePublished":
                        related_ents[i].created,
                        "dateModified":
                        related_ents[i].to_body_dict()['lastUpdated'],
                        "description":
                        related_ents[i].description,
                        "license": {
                            "@type": "CreativeWork",
                            "license": "",
                            "url": ""
                        },
                        "publisher": {
                            "@type": "Organization",
                            "name": "Designsafe-CI"
                        },
                        "provider": {
                            "@type": "Organization",
                            "name": "Designsafe-CI"
                        },
                        "includedInDataCatalog": {
                            "@type": "DataCatalog",
                            "name": "Designsafe-CI",
                            "url": "https://designsafe-ci.org"
                        },
                    }
                    dataset_json['relatedIdentifier_' +
                                 str(i)]['@id'] = related_ents[i].dois[0]
                    dataset_json[
                        'relatedIdentifier_' +
                        str(i)]['identifier'] = related_ents[i].dois[0]

                    if getattr(related_ents[i], 'team_order', False):
                        authors = sorted(related_ents[i].team_order,
                                         key=lambda x: x['order'])
                    else:
                        authors = [{
                            'name': username
                        } for username in [self.pi] + self.co_pis]
                    dataset_json['relatedIdentifier_' +
                                 str(i)]['creator'] = generate_creators(
                                     authors)
                    dataset_json['relatedIdentifier_' +
                                 str(i)]['author'] = generate_creators(authors)
                    try:
                        dataset_json[
                            'relatedIdentifier_' +
                            str(i)]['license'] = dataset_json['license']
                    except (DocumentNotFound, AttributeError):
                        pass

        return dataset_json