def publish_resource(project_id, entity_uuid=None): """Publish a resource. Retrieves a project and/or an entity and set any saved DOIs as published. If no DOIs are saved in the specified project or entity it will fail silently. We need to specify the project id because this function also changes the status of the locally saved publication to `"published"` that way it shows up in the published listing. :param str project_id: Project Id to publish. :param str entity_uuid: Entity uuid to publish. """ mgr = ProjectsManager(service_account()) prj = mgr.get_project_by_id(project_id) entity = None if entity_uuid: entity = mgr.get_entity_by_uuid(entity_uuid) responses = [] for doi in prj.dois: res = DataciteManager.publish_doi(doi) responses.append(res) if entity: for doi in entity.dois: res = DataciteManager.publish_doi(doi) responses.append(res) pub = BaseESPublication(project_id=project_id) pub.update(status='published') for res in responses: LOG.info("DOI published: %(doi)s", {"doi": res['data']['id']}) return responses
def get_context_data(self, **kwargs): """Update context data to add publication.""" context = super(DataDepotPublishedView, self).get_context_data(**kwargs) logger.info('Get context Data') pub = BaseESPublication(project_id=kwargs['project_id'].strip('/')) logger.debug('pub: %s', pub.to_dict()) context['projectId'] = pub.projectId context['citation_title'] = pub.project.value.title context['citation_date'] = pub.created context['doi'] = pub.project.doi context['keywords'] = pub.project.value.keywords.split(',') context['authors'] = [{ 'full_name': '{last_name}, {first_name}'.format( last_name=user['last_name'], first_name=user['first_name'] ), 'institution': getattr(getattr(user, 'profile'), 'institution', '') } for user in getattr(pub, 'users', [])] context['publication'] = pub context['description'] = pub.project.value.description if self.request.user.is_authenticated: context['angular_init'] = json.dumps({ 'authenticated': True, }) else: context['angular_init'] = json.dumps({ 'authenticated': False, }) return context
def save_to_fedora(self, project_id, revision=None): import requests import magic from designsafe.libs.elasticsearch.docs.publications import BaseESPublication try: es_client = new_es_client() pub = BaseESPublication(project_id=project_id, revision=revision, using=es_client) pub.update(status='published', using=es_client) if pub.project.value.projectType == 'other': from designsafe.libs.fedora.fedora_operations import ingest_project ingest_project(project_id, version=revision) return if pub.project.value.projectType == 'experimental': from designsafe.libs.fedora.fedora_operations import ingest_project_experimental ingest_project_experimental(project_id, version=revision) return _root = os.path.join('/corral-repl/tacc/NHERI/published', project_id) fedora_base = 'http://fedoraweb01.tacc.utexas.edu:8080/fcrepo/rest/publications_01' res = requests.get(fedora_base) if res.status_code == 404 or res.status_code == 410: requests.put(fedora_base) fedora_project_base = ''.join([fedora_base, '/', project_id]) res = requests.get(fedora_project_base) if res.status_code == 404 or res.status_code == 410: requests.put(fedora_project_base) headers = {'Content-Type': 'text/plain'} #logger.debug('walking: %s', _root) for root, dirs, files in os.walk(_root): for name in files: mime = magic.Magic(mime=True) headers['Content-Type'] = mime.from_file( os.path.join(root, name)) #files full_path = os.path.join(root, name) _path = full_path.replace(_root, '', 1) _path = _path.replace('[', '-') _path = _path.replace(']', '-') url = ''.join([fedora_project_base, urllib.parse.quote(_path)]) #logger.debug('uploading: %s', url) with open(os.path.join(root, name), 'rb') as _file: requests.put(url, data=_file, headers=headers) for name in dirs: #dirs full_path = os.path.join(root, name) _path = full_path.replace(_root, '', 1) url = ''.join([fedora_project_base, _path]) #logger.debug('creating: %s', _path) requests.put(url) except Exception as exc: logger.error('Proj Id: %s. %s', project_id, exc) raise self.retry(exc=exc)
def freeze_project_and_entity_metadata(project_id, entity_uuids=None): """Freeze project and entity metadata. Given a project id and an entity uuid (should be a main entity) this function retrieves all metadata related to these entities and stores it into Elasticsearch as :class:`~designafe.libs.elasticsearch.docs.publications.BaseESPublication` :param str project_id: Project id. :param list of entity_uuid strings: Entity uuids. """ mgr = ProjectsManager(service_account()) prj = mgr.get_project_by_id(project_id) pub_doc = BaseESPublication(project_id=project_id) publication = pub_doc.to_dict() if entity_uuids: # clear any existing entities in publication entity = mgr.get_entity_by_uuid(entity_uuids[0]) pub_entities_field_name = FIELD_MAP[entity.name] publication[pub_entities_field_name] = [] for ent_uuid in entity_uuids: entity = None entity = mgr.get_entity_by_uuid(ent_uuid) entity_json = entity.to_body_dict() if entity: pub_entities_field_name = FIELD_MAP[entity.name] publication['authors'] = entity_json['value']['authors'][:] entity_json['authors'] = [] _populate_entities_in_publication(entity, publication) _transform_authors(entity_json, publication) if entity_json['value']['dois']: entity_json['doi'] = entity_json['value']['dois'][-1] _delete_unused_fields(entity_json) publication[pub_entities_field_name].append(entity_json) prj_json = prj.to_body_dict() _delete_unused_fields(prj_json) award_number = publication.get('project', {}).get('value', {}).pop( 'awardNumber', []) or [] if not isinstance(award_number, list): award_number = [] prj_json['value']['awardNumbers'] = award_number prj_json['value'].pop('awardNumber', None) if publication.get('project'): publication['project'].update(prj_json) else: publication['project'] = prj_json pub_doc.update(**publication) return pub_doc
def get_context_data(self, **kwargs): """ Update context data to add publication. """ context = super(DataDepotPublishedView, self).get_context_data(**kwargs) logger.info('Get context Data') pub = BaseESPublication(project_id=kwargs['project_id'].strip('/')) logger.debug('pub: %s', pub.to_dict()) context['projectId'] = pub.projectId context['citation_title'] = pub.project.value.title context['citation_date'] = pub.created if pub.project.value.to_dict().get('dois') != None: #This is for newer publications context['doi'] = pub.project.value.dois[0] elif pub.project.to_dict().get('doi') != None: #This is for older publications context['doi'] = pub.project.doi context['keywords'] = pub.project.value.keywords.split(',') if 'users' in pub.to_dict(): context['authors'] = [{ 'full_name': '{last_name}, {first_name}'.format( last_name=user['last_name'], first_name=user['first_name'] ), 'institution': getattr(getattr(user, 'profile', ''), 'institution', '') } for user in getattr(pub, 'users', [])] elif 'authors' in pub.to_dict(): context['authors'] = [{ 'full_name': '{last_name}, {first_name}'.format( last_name=author['lname'], first_name=author['fname'] ), 'institution': getattr(author, 'inst', '') } for author in getattr(pub, 'authors',[])] else: context['authors'] = [{ 'full_name': '{last_name}, {first_name}'.format( last_name=author['lname'], first_name=author['fname'] ), 'institution': getattr(author, 'inst', '') } for author in getattr(pub.project.value, 'teamOrder', [])] context['publication'] = pub context['description'] = pub.project.value.description context['experiments'] = getattr(pub, 'experimentsList', []) context['missions'] = getattr(pub, 'missions', []) context['reports'] = getattr(pub, 'reports', []) context['simulations'] = getattr(pub, 'simulations', []) context['hybrid_simulations'] = getattr(pub, 'hybrid_simulations',[]) proj = ProjectsManager(service_account()).get_project_by_id(pub.projectId) context['dc_json'] = json.dumps(proj.to_dataset_json()) if self.request.user.is_authenticated: context['angular_init'] = json.dumps({ 'authenticated': True, }) else: context['angular_init'] = json.dumps({ 'authenticated': False, }) return context
def zip_publication_files(self, project_id): from designsafe.libs.elasticsearch.docs.publications import BaseESPublication try: pub = BaseESPublication(project_id=project_id) pub.archive() except Exception as exc: logger.error('Zip Proj Id: %s. %s', project_id, exc, exc_info=True) raise self.retry(exc=exc)
def get(self, request, project_id): pub = BaseESPublication(project_id=project_id) if pub is not None and hasattr(pub, 'project'): return JsonResponse(pub.to_dict()) else: return JsonResponse({ 'status': 404, 'message': 'Not found' }, status=404)
def zip_publication_files(self, project_id): from designsafe.libs.elasticsearch.docs.publications import BaseESPublication # Only create archive in prod if getattr(settings, 'DESIGNSAFE_ENVIRONMENT', 'dev') != 'default': return try: pub = BaseESPublication(project_id=project_id) pub.archive() except Exception as exc: logger.error('Zip Proj Id: %s. %s', project_id, exc, exc_info=True) raise self.retry(exc=exc)
def save_publication(self, publication, status='publishing'): # pylint: disable=no-self-use """Save publication.""" publication['projectId'] = publication['project']['value']['projectId'] publication['created'] = datetime.datetime.now().isoformat() publication['status'] = status publication['version'] = 2 publication['licenses'] = publication.pop('license', []) publication['license'] = '' pub = BaseESPublication(project_id=publication['projectId'], **publication) pub.save() return pub
def publish_resource(project_id, entity_uuids=None, publish_dois=False, revision=None): """Publish a resource. Retrieves a project and/or an entity and set any saved DOIs as published. If no DOIs are saved in the specified project or entity it will fail silently. We need to specify the project id because this function also changes the status of the locally saved publication to `"published"` that way it shows up in the published listing. If publish_dois is False Datacite will keep the newly created DOIs in "DRAFT" status, and not "PUBLISHED". A DOI on DataCite can only be deleted if it is in "DRAFT" status. Once a DOI is set to "PUBLISHED" or "RESERVED" it can't be deleted. :param str project_id: Project Id to publish. :param list entity_uuids: list of str Entity uuids to publish. :param int revision: Revision number to publish. """ es_client = new_es_client() # If revision number passed, set status to "published" for specified revision and # set status to "revised" for old versions mgr = ProjectsManager(service_account()) prj = mgr.get_project_by_id(project_id) responses = [] if publish_dois: if entity_uuids: for ent_uuid in entity_uuids: entity = None if ent_uuid: entity = mgr.get_entity_by_uuid(ent_uuid) if entity: for doi in entity.dois: res = DataciteManager.publish_doi(doi) responses.append(res) for doi in prj.dois: res = DataciteManager.publish_doi(doi) responses.append(res) pub = BaseESPublication(project_id=project_id, revision=revision, using=es_client) pub.update(status='published', using=es_client) IndexedPublication._index.refresh(using=es_client) if revision: # Revising a publication sets the status of the previous document to 'archived' last_revision = revision - 1 if revision > 2 else 0 archived_pub = BaseESPublication(project_id=project_id, revision=last_revision) archived_pub.update(status='archived') for res in responses: logger.info( "DOI published: %(doi)s", {"doi": res['data']['id']} ) return responses
def listing(self, system=None, file_path=None, offset=0, limit=100, **kwargs): """Perform the search and output in a serializable format.""" query = self.construct_query(system, file_path, **kwargs) listing_search = Search() listing_search = listing_search.filter(query).sort('_index') listing_search = listing_search.extra(from_=offset, size=limit) res = listing_search.execute() children = [] for hit in res: try: getattr(hit, 'projectId') children.append(BaseESPublication(**hit.to_dict()).to_file()) except AttributeError: children.append( BaseESPublicationLegacy(**hit.to_dict()).to_file()) result = { 'trail': [{ 'name': '$SEARCH', 'path': '/$SEARCH' }], 'name': '$SEARCH', 'path': '/', 'system': system, 'type': 'dir', 'children': children, 'permissions': 'READ' } return result
def listing(self, system=None, file_path=None, offset=0, limit=100, **kwargs): """Wrap the search result in a BaseFile object for serializtion.""" query = self.construct_query(**kwargs) listing_search = Search() listing_search = listing_search.filter(query).sort( '_index', {'project._exact': {'order': 'asc', 'unmapped_type': 'keyword'}}, {'created': {'order': 'desc', 'unmapped_type': 'long'}} ) listing_search = listing_search.extra(from_=offset, size=limit) res = listing_search.execute() children = [] for hit in res: try: getattr(hit, 'projectId') children.append(BaseESPublication(**hit.to_dict()).to_file()) except AttributeError: children.append(BaseESPublicationLegacy(**hit.to_dict()).to_file()) result = { 'trail': [{'name': '$SEARCH', 'path': '/$SEARCH'}], 'name': '$SEARCH', 'path': '/', 'system': system, 'type': 'dir', 'children': children, 'permissions': 'READ' } return result
def publish_resource(project_id, entity_uuids=None, publish_dois=False): """Publish a resource. Retrieves a project and/or an entity and set any saved DOIs as published. If no DOIs are saved in the specified project or entity it will fail silently. We need to specify the project id because this function also changes the status of the locally saved publication to `"published"` that way it shows up in the published listing. If publish_dois is False Datacite will keep the newly created DOIs in "DRAFT" status, but they will not be set to "PUBLISHED". A DOI on DataCite can only be deleted if it is in "DRAFT" status. Once a DOI is set to "PUBLISHED" or "RESERVED" it can't be deleted. :param str project_id: Project Id to publish. :param list entity_uuids: list of str Entity uuids to publish. """ mgr = ProjectsManager(service_account()) prj = mgr.get_project_by_id(project_id) responses = [] if publish_dois: for ent_uuid in entity_uuids: entity = None if ent_uuid: entity = mgr.get_entity_by_uuid(ent_uuid) if entity: for doi in entity.dois: res = DataciteManager.publish_doi(doi) responses.append(res) for doi in prj.dois: res = DataciteManager.publish_doi(doi) responses.append(res) pub = BaseESPublication(project_id=project_id) pub.update(status='published') for res in responses: LOG.info( "DOI published: %(doi)s", {"doi": res['data']['id']} ) return responses
def amend_publication(project_id, amendments=None, authors=None, revision=None): """Amend a Publication Update Amendable fields on a publication and the corrosponding DataCite records. These changes do not produce a new version of a publication, but they do allow for limited changes to a published project. This is currently configured to support "Other" publications only. :param str project_id: Project uuid to amend :param int revision: Revision number to amend """ es_client = new_es_client() mgr = ProjectsManager(service_account()) prj = mgr.get_project_by_id(project_id) pub = BaseESPublication(project_id=project_id, revision=revision, using=es_client) prj_dict = prj.to_body_dict() pub_dict = pub.to_dict() _delete_unused_fields(prj_dict) if pub.project.value.projectType != 'other': pub_entity_uuids = pub.entities() for uuid in pub_entity_uuids: if uuid in amendments: entity = amendments[uuid] else: entity = mgr.get_entity_by_uuid(uuid) entity = entity.to_body_dict() _delete_unused_fields(entity) for pub_ent in pub_dict[FIELD_MAP[entity['name']]]: if pub_ent['uuid'] == entity['uuid']: for key in entity['value']: ent_type = 'entity' if 'dois' in entity['value'] else 'subentity' if key not in UNAMENDABLE_FIELDS[ent_type]: pub_ent['value'][key] = entity['value'][key] if 'authors' in entity['value']: pub_ent['value']['authors'] = authors[entity['uuid']] _set_authors(pub_ent, pub_dict) # weird key swap for old issues with awardnumber(s) award_number = prj.award_number or [] if not isinstance(award_number, list): award_number = [] prj_dict['value']['awardNumbers'] = award_number prj_dict['value'].pop('awardNumber', None) for key in prj_dict['value']: if key not in UNAMENDABLE_FIELDS['project']: pub_dict['project']['value'][key] = prj_dict['value'][key] if authors and prj_dict['value']['projectType'] == 'other': pub_dict['project']['value']['teamOrder'] = authors pub.update(**pub_dict) IndexedPublication._index.refresh(using=es_client) return pub
def get(self, request, project_id, revision=None): """ Get a publication. If a revision is not supplied, return the "Original" publication. Include the latest version if it is not being queried. """ es_client = new_es_client() pub = BaseESPublication(project_id=project_id, revision=revision, using=es_client) latest_revision = IndexedPublication.max_revision( project_id=project_id, using=es_client) latest_pub_dict = None if latest_revision > 0 and latest_revision != revision: latest_pub = BaseESPublication(project_id=project_id, revision=latest_revision, using=es_client) if latest_pub is not None and hasattr(latest_pub, 'project'): latest_pub_dict = latest_pub.to_dict() if pub is not None and hasattr(pub, 'project'): pub_dict = pub.to_dict() if pub_dict['project']['value']['projectType'] != 'other': metrics.info('Data Depot', extra={ 'user': request.user.username, 'sessionId': getattr(request.session, 'session_key', ''), 'operation': 'listing', 'agent': request.META.get('HTTP_USER_AGENT'), 'ip': get_client_ip(request), 'info': { 'api': 'agave', 'systemId': 'designsafe.storage.published', 'filePath': project_id, 'query': {} } }) if latest_pub_dict: pub_dict['latestRevision'] = latest_pub_dict return JsonResponse(pub_dict) else: return JsonResponse({ 'status': 404, 'message': 'Not found' }, status=404)
def listing(self, system=None, file_path=None, offset=0, limit=100, **kwargs): """Perform the search and output in a serializable format.""" query = self.construct_query(system, file_path, **kwargs) listing_search = Search() listing_search = listing_search.filter(query).sort( '_index', {'created': { 'order': 'desc', 'unmapped_type': 'long' }}) listing_search = listing_search.extra( from_=offset, size=limit).source(includes=[ 'project.value', 'created', 'projectId', 'users', 'system' ]) res = listing_search.execute() children = [] for hit in res: hit_to_file = BaseESPublication.hit_to_file(hit) children.append(hit_to_file) result = { 'trail': [{ 'name': '$SEARCH', 'path': '/$SEARCH' }], 'name': '$SEARCH', 'path': '/', 'system': system, 'type': 'dir', 'children': children, 'permissions': 'READ' } return result
def freeze_project_and_entity_metadata(project_id, entity_uuids=None): """Freeze project and entity metadata. Given a project id and an entity uuid (should be a main entity) this function retrieves all metadata related to these entities and stores it into Elasticsearch as :class:`~designafe.libs.elasticsearch.docs.publications.BaseESPublication` When publishing for the first time or publishing over an existing publication. We will clear any existing entities (if any) from the published metadata. We'll use entity_uuids (the entities getting DOIs) to rebuild the rest of the publication. These entities usually do not have files associated to them (except published reports/documents). :param str project_id: Project id. :param list of entity_uuid strings: Entity uuids. """ mgr = ProjectsManager(service_account()) prj = mgr.get_project_by_id(project_id) pub_doc = BaseESPublication(project_id=project_id) publication = pub_doc.to_dict() if entity_uuids: # clear any existing sub entities in publication and keep updated fileObjs fields_to_clear = [] entities_with_files = [] for key in list(FIELD_MAP.keys()): if FIELD_MAP[key] in list(publication.keys()): fields_to_clear.append(FIELD_MAP[key]) fields_to_clear = set(fields_to_clear) for field in fields_to_clear: for ent in publication[field]: if 'fileObjs' in ent: entities_with_files.append(ent) if ent['uuid'] in entity_uuids: publication[field] = [] for ent_uuid in entity_uuids: entity = None entity = mgr.get_entity_by_uuid(ent_uuid) if entity: entity_json = entity.to_body_dict() pub_entities_field_name = FIELD_MAP[entity.name] for e in entities_with_files: if e['uuid'] == entity_json['uuid']: entity_json['fileObjs'] = e['fileObjs'] publication['authors'] = list(entity_json['value']['authors']) entity_json['authors'] = [] _populate_entities_in_publication(entity, publication) _transform_authors(entity_json, publication) if entity_json['value']['dois']: entity_json['doi'] = entity_json['value']['dois'][-1] _delete_unused_fields(entity_json) publication[pub_entities_field_name].append(entity_json) prj_json = prj.to_body_dict() _delete_unused_fields(prj_json) award_number = publication.get('project', {}).get('value', {}).pop( 'awardNumber', [] ) or [] if not isinstance(award_number, list): award_number = [] prj_json['value']['awardNumbers'] = award_number prj_json['value'].pop('awardNumber', None) if publication.get('project'): publication['project'].update(prj_json) else: publication['project'] = prj_json pub_doc.update(**publication) return pub_doc
def fix_file_tags(project_id): pub = BaseESPublication(project_id=project_id) pub_dict = pub.to_dict() entities_to_check = list(set(pub_dict.keys()).intersection(list(FIELD_MAP.values()))) entities_to_check.append('project') def check_complete_tags(tags): for tag in tags: if 'path' not in tag: return False return True def fix_tags_path(entity): for tag in entity['value']['fileTags']: try: pub_file = BaseFileResource.listing( service_account(), system="designsafe.storage.published", path="{}{}".format(project_id, tag['path']) ) tag['fileUuid'] = pub_file.uuid except Exception as err: LOG.info('error: {}'.format(err)) continue def fix_tags_no_path(entity): if entity['name'] == 'designsafe.project': proj_other = BaseFileResource.listing(service_account(), system="project-{}".format(entity['uuid']), path="") for child in proj_other.children: try: pub_file = BaseFileResource.listing(service_account(), system="designsafe.storage.published", path="{}{}".format(project_id, child.path)) proj_file = BaseFileResource.listing(service_account(), system="project-{}".format(entity['uuid']), path=child.path) for tag in entity['value']['fileTags']: if tag['fileUuid'] == proj_file.uuid: tag['fileUuid'] = pub_file.uuid except Exception as err: LOG.info('error: {}'.format(err)) continue else: for fobj in entity['fileObjs']: try: pub_file = BaseFileResource.listing(service_account(), system="designsafe.storage.published", path="{}{}".format(project_id, fobj['path'])) proj_file = BaseFileResource.listing(service_account(), system="project-{}".format(pub_dict['project']['uuid']), path=fobj['path']) for tag in entity['value']['fileTags']: if tag['fileUuid'] == proj_file.uuid: tag['fileUuid'] = pub_file.uuid except Exception as err: LOG.info('error: {}'.format(err)) continue for entname in entities_to_check: if type(pub_dict[entname]) == list: for entity in pub_dict[entname]: if 'value' in entity and 'fileTags' in entity['value'] and check_complete_tags(entity['value']['fileTags']): fix_tags_path(entity) elif 'value' in entity and 'fileTags' in entity['value']: fix_tags_no_path(entity) else: if 'value' in pub_dict[entname] and 'fileTags' in pub_dict[entname]['value'] and check_complete_tags(pub_dict[entname]['value']['fileTags']): fix_tags_path(pub_dict[entname]) elif 'value' in pub_dict[entname] and 'fileTags' in pub_dict[entname]['value']: fix_tags_no_path(pub_dict[entname]) pub.update(**pub_dict)
def copy_publication_files_to_corral(self, project_id, revision=None, selected_files=None): """ Takes a project ID and copies project files to a published directory. :param str project_id: Project ID :param int revision: The revision number of the publication :param list of selected_files strings: Only provided if project type == other. """ es_client = new_es_client() publication = BaseESPublication(project_id=project_id, revision=revision, using=es_client) filepaths = publication.related_file_paths() if not len(filepaths) and selected_files: # Project is "Other" so we just copy the selected files filepaths = [ file_path.strip('/') for file_path in selected_files if (file_path != '.Trash') ] filepaths = list(set(filepaths)) filepaths = sorted(filepaths) base_path = ''.join(['/', publication.projectId]) os.chmod('/corral-repl/tacc/NHERI/published', 0o755) prefix_dest = '/corral-repl/tacc/NHERI/published/{}'.format(project_id) if revision: prefix_dest += 'v{}'.format(revision) if not os.path.isdir(prefix_dest): os.mkdir(prefix_dest) prefix_src = '/corral-repl/tacc/NHERI/projects/{}'.format( publication.project['uuid']) for filepath in filepaths: local_src_path = '{}/{}'.format(prefix_src, filepath) local_dst_path = '{}/{}'.format(prefix_dest, filepath) logger.info('Trying to copy: %s to %s', local_src_path, local_dst_path) if os.path.isdir(local_src_path): try: #os.mkdir(local_dst_path) if not os.path.isdir(os.path.dirname(local_dst_path)): os.makedirs(os.path.dirname(local_dst_path)) shutil.copytree(local_src_path, local_dst_path) for root, dirs, files in os.walk(local_dst_path): for d in dirs: os.chmod(os.path.join(root, d), 0o555) for f in files: os.chmod(os.path.join(root, f), 0o444) os.chmod(local_dst_path, 0o555) except OSError as exc: logger.info(exc) except IOError as exc: logger.info(exc) else: try: if not os.path.isdir(os.path.dirname(local_dst_path)): os.makedirs(os.path.dirname(local_dst_path)) for root, dirs, files in os.walk( os.path.dirname(local_dst_path)): for d in dirs: os.chmod(os.path.join(root, d), 0o555) for f in files: os.chmod(os.path.join(root, f), 0o444) shutil.copy(local_src_path, local_dst_path) os.chmod(local_dst_path, 0o444) except OSError as exc: logger.info(exc) except IOError as exc: logger.info(exc) os.chmod(prefix_dest, 0o555) os.chmod('/corral-repl/tacc/NHERI/published', 0o555) save_to_fedora.apply_async(args=[project_id, revision]) index_path = '/' + project_id if revision: index_path += 'v{}'.format(revision) agave_indexer.apply_async(kwargs={ 'username': '******', 'systemId': 'designsafe.storage.published', 'filePath': index_path, 'recurse': True }, queue='indexing')
def check_published_files(project_id, revision=None, selected_files=None): #get list of files that should be in the publication es_client = new_es_client() publication = BaseESPublication(project_id=project_id, revision=revision, using=es_client) if selected_files: #it's type other, use this for comparison filepaths = selected_files else: filepaths = publication.related_file_paths() #empty dirs missing_files = [] existing_files = [] empty_folders = [] #strip leading forward slash from file paths updated_filepaths = [ file_path.strip('/') for file_path in filepaths if (file_path != '.Trash') ] pub_directory = '/corral-repl/tacc/NHERI/published/{}'.format(project_id) if revision: pub_directory += 'v{}'.format(revision) #navigate through publication files paths and #compare to the previous list of files for pub_file in updated_filepaths: file_to_check = os.path.join(pub_directory, pub_file) try: if os.path.isfile(file_to_check): existing_files.append(pub_file) elif os.path.isdir(file_to_check): #check directory for items in it dir_list = os.listdir(file_to_check) if dir_list != []: existing_files.append(pub_file) else: empty_folders.append(pub_file) else: missing_files.append(pub_file) except OSError as exc: logger.info(exc) #send email if there are files/folders missing/empty if (missing_files or empty_folders): #log for potential later queries logger.info("check_published_files missing files: " + project_id + " " + str(missing_files)) logger.info("check_published_files empty folders: " + project_id + " " + str(empty_folders)) #send email to dev admins service = get_service_account_client() prj_admins = settings.DEV_PROJECT_ADMINS_EMAIL for admin in prj_admins: email_body = """ <p>Hello,</p> <p> The following project has been published with either missing files/folders or empty folders: <br> <b>{prjID} - revision {revision}</b> <br> Path to publication files: {pubFiles} </p> <p> These are the missing files/folders for this publication: <br> {missingFiles} </p> <p> These are the empty folders for this publication: <br> {emptyFolders} </p> This is a programmatically generated message. Do NOT reply to this message. """.format(pubFiles=pub_directory, prjID=project_id, missingFiles=missing_files, emptyFolders=empty_folders, revision=revision) send_mail( "DesignSafe Alert: Published Project has missing files/folders", email_body, settings.DEFAULT_FROM_EMAIL, [admin], html_message=email_body)
def draft_publication( project_id, main_entity_uuids=None, project_doi=None, main_entity_doi=None, upsert_project_doi=False, upsert_main_entity_doi=True, revision=None, revised_authors=None ): """Reserve a publication. A publication is reserved by creating a DOI through Datacite. For some of the projects a DOI is only created for the main entity e.g. Mission or Simulation. For some other projects we also (or only) get a DOI for the project. - If :param:`project_doi` and/or :param:`main_entity_doi` values are given then those dois will be updated (or created if they don't exist in datacite). - If :param:`upsert_project_doi` and/or :param:`upsert_main_entity_doi` are set to `True` then any saved DOIs will be updated (even if there's multiple unless a specific DOI is given). If there are no saved DOIs then a new DOI will be created. Meaning, it will act as update or insert. - If :param:`project_id` is given **but** :param:`main_entity_uuids` is ``None`` then a project DOI will be created or updated. .. warning:: This funciton only creates a *Draft* DOI and not a public one. .. warning:: An entity *might* have multiple DOIs, if this is the case and :param:`upsert_project_doi` or :param:`upsert_main_entity_doi` are set to True then *all* saved dois will be updated. .. note:: In theory a single resource *should not* have multiple DOIs but we don't know how this will change in the future, hence, we are supporting multiple DOIs. .. note:: If no :param:`main_entity_uuids` is given then a project DOI will be created. :param str project_id: Project Id :param list main_entity_uuids: uuid strings of main entities. :param str project_doi: Custom doi for project. :param str main_entity_doi: Custom doi for main entity. :param bool upsert_project_doi: Update or insert project doi. :param bool upsert_main_entity_doi: Update or insert main entity doi. """ mgr = ProjectsManager(service_account()) prj = mgr.get_project_by_id(project_id) responses = [] if main_entity_uuids: ### Draft Entity DOI(s) ### pub = BaseESPublication(project_id=project_id, revision=revision) for ent_uuid in main_entity_uuids: entity = mgr.get_entity_by_uuid(ent_uuid) if entity: if revision: entity_url = ENTITY_TARGET_BASE.format( project_id='{}v{}'.format(project_id, revision), entity_uuid=ent_uuid ) original_entities = getattr(pub, FIELD_MAP[entity.name]) pub_ent = next(ent for ent in original_entities if ent.uuid == ent_uuid) entity.title = pub_ent.value.title entity.authors = revised_authors[ent_uuid] else: entity_url = ENTITY_TARGET_BASE.format( project_id=project_id, entity_uuid=ent_uuid ) ent_datacite_json = entity.to_datacite_json() ent_datacite_json['url'] = entity_url # ent_datacite_json['version'] = str(revision) # omitting version number per Maria if upsert_main_entity_doi and main_entity_doi: me_res = DataciteManager.create_or_update_doi( ent_datacite_json, main_entity_doi ) entity.dois += [main_entity_doi] entity.dois = list(set(entity.dois)) entity.save(service_account()) responses.append(me_res) elif upsert_main_entity_doi and entity.dois: for doi in entity.dois: me_res = DataciteManager.create_or_update_doi( ent_datacite_json, doi ) responses.append(me_res) elif upsert_main_entity_doi and not entity.dois: me_res = DataciteManager.create_or_update_doi( ent_datacite_json ) entity.dois += [me_res['data']['id']] entity.save(service_account()) responses.append(me_res) else: ### Draft Project DOI ### upsert_project_doi = True if revision: # Versions should not update certain fields # Add version number to DataCite info prj_url = TARGET_BASE.format(project_id='{}v{}'.format(project_id, revision)) pub = BaseESPublication(project_id=project_id, revision=revision) prj.title = pub.project.value.title prj.team_order = pub.project.value.teamOrder if revised_authors: prj.team_order = revised_authors prj_datacite_json = prj.to_datacite_json() prj_datacite_json['url'] = prj_url prj_datacite_json['version'] = str(revision) # append links to previous versions in DOI... relatedIdentifiers = [] for ver in range(1, revision): id = '{}v{}'.format(project_id, ver) if ver!=1 else project_id relatedIdentifiers.append( { 'relatedIdentifierType': 'URL', 'relationType': 'IsNewVersionOf', 'relatedIdentifier': TARGET_BASE.format(project_id=id), } ) prj_datacite_json['relatedIdentifiers'] = relatedIdentifiers else: # format project for publication prj_url = TARGET_BASE.format(project_id=project_id) prj_datacite_json = prj.to_datacite_json() prj_datacite_json['url'] = prj_url if upsert_project_doi and project_doi: prj_res = DataciteManager.create_or_update_doi( prj_datacite_json, project_doi ) prj.dois += [project_doi] prj.dois = list(set(prj.dois)) prj.save(service_account()) responses.append(prj_res) elif upsert_project_doi and prj.dois: for doi in prj.dois: prj_res = DataciteManager.create_or_update_doi( prj_datacite_json, doi ) responses.append(prj_res) elif upsert_project_doi and not prj.dois: prj_res = DataciteManager.create_or_update_doi(prj_datacite_json) prj.dois += [prj_res['data']['id']] prj.save(service_account()) responses.append(prj_res) for res in responses: logger.info( "DOI created or updated: %(doi)s", {"doi": res['data']['id']} ) return responses
def archive(project_id, revision=None): """Archive Published Files and Metadata When given a project_id, this function will copy and compress all of the published files for a project, and it will also include a formatted json document of the published metadata. Note: This metadata file is will only be used until the Fedora system is set up again. """ es_client = new_es_client() pub = BaseESPublication(project_id=project_id, revision=revision, using=es_client) if revision: archive_prefix = '{}v{}'.format(pub.projectId, revision) else: archive_prefix = pub.projectId archive_name = '{}_archive.zip'.format(archive_prefix) metadata_name = '{}_metadata.json'.format(archive_prefix) pub_dir = settings.DESIGNSAFE_PUBLISHED_PATH arc_dir = os.path.join(pub_dir, 'archives/') archive_path = os.path.join(arc_dir, archive_name) metadata_path = os.path.join(arc_dir, metadata_name) def set_perms(dir, octal, subdir=None): try: os.chmod(dir, octal) if subdir: if not os.path.isdir(subdir): raise Exception('subdirectory does not exist!') for root, dirs, files in os.walk(subdir): os.chmod(root, octal) for d in dirs: os.chmod(os.path.join(root, d), octal) for f in files: os.chmod(os.path.join(root, f), octal) except Exception as e: logger.exception("Failed to set permissions for {}".format(dir)) os.chmod(dir, 0o555) # compress published files into a zip archive def create_archive(): arc_source = os.path.join(pub_dir, archive_prefix) try: logger.debug("Creating archive for {}".format(archive_prefix)) zf = zipfile.ZipFile(archive_path, mode='w', allowZip64=True) for dirs, _, files in os.walk(arc_source): for f in files: if f == archive_name: continue zf.write(os.path.join(dirs, f), os.path.join(dirs.replace(pub_dir, ''), f)) zf.write(metadata_path, metadata_name) zf.close() except Exception as e: logger.exception("Archive creation failed for {}".format(arc_source)) finally: set_perms(pub_dir, 0o555, arc_source) set_perms(arc_dir, 0o555) # create formatted metadata for user download def create_metadata(): mgr = ProjectsManager(service_account()) pub_dict = pub._wrapped.to_dict() meta_dict = {} entity_type_map = { 'experimental': 'experimentsList', 'simulation': 'simulations', 'hybrid_simulation': 'hybrid_simulations', 'field_recon': 'missions', # TODO: this should support 'reports' as well (aka Documents) } project_uuid = pub_dict['project']['uuid'] try: logger.debug("Creating metadata for {}".format(archive_prefix)) if pub_dict['project']['value']['projectType'] in entity_type_map: ent_type = entity_type_map[pub_dict['project']['value']['projectType']] entity_uuids = [] if ent_type in pub_dict.keys(): entity_uuids = [x['uuid'] for x in pub_dict[ent_type]] meta_dict = mgr.get_entity_by_uuid(project_uuid).to_datacite_json() meta_dict['published_resources'] = [] meta_dict['url'] = TARGET_BASE.format(project_id=pub_dict['project_id']) for uuid in entity_uuids: entity = mgr.get_entity_by_uuid(uuid) ent_json = entity.to_datacite_json() ent_json['doi'] = entity.dois[0] ent_json['url'] = ENTITY_TARGET_BASE.format( project_id=pub_dict['project_id'], entity_uuid=uuid ) meta_dict['published_resources'].append(ent_json) else: project = mgr.get_entity_by_uuid(project_uuid) meta_dict = project.to_datacite_json() meta_dict['doi'] = project.dois[0] meta_dict['url'] = TARGET_BASE.format(project_id=pub_dict['project_id']) with open(metadata_path, 'w') as meta_file: json.dump(meta_dict, meta_file) except: logger.exception("Failed to create metadata!") try: set_perms(pub_dir, 0o755, os.path.join(pub_dir, archive_prefix)) set_perms(arc_dir, 0o755) create_metadata() create_archive() except Exception as e: logger.exception('Failed to archive publication!')
def copy_publication_files_to_corral(self, project_id): # Only copy published files while in prod if getattr(settings, 'DESIGNSAFE_ENVIRONMENT', 'dev') != 'default': return from designsafe.libs.elasticsearch.docs.publications import BaseESPublication import shutil publication = BaseESPublication(project_id=project_id) filepaths = publication.related_file_paths() if not len(filepaths): res = get_service_account_client().files.list( systemId='project-{project_uuid}'.format( project_uuid=publication.project.uuid), filePath='/') filepaths = [ _file.path.strip('/') for _file in res if (_file.name != '.' and _file.name != 'Trash') ] filepaths = list(set(filepaths)) filepaths = sorted(filepaths) base_path = ''.join(['/', publication.projectId]) os.chmod('/corral-repl/tacc/NHERI/published', 0o755) prefix_dest = '/corral-repl/tacc/NHERI/published/{}'.format(project_id) if not os.path.isdir(prefix_dest): os.mkdir(prefix_dest) prefix_src = '/corral-repl/tacc/NHERI/projects/{}'.format( publication.project['uuid']) for filepath in filepaths: local_src_path = '{}/{}'.format(prefix_src, filepath) local_dst_path = '{}/{}'.format(prefix_dest, filepath) logger.info('Trying to copy: %s to %s', local_src_path, local_dst_path) if os.path.isdir(local_src_path): try: #os.mkdir(local_dst_path) if not os.path.isdir(os.path.dirname(local_dst_path)): os.makedirs(os.path.dirname(local_dst_path)) shutil.copytree(local_src_path, local_dst_path) for root, dirs, files in os.walk(local_dst_path): for d in dirs: os.chmod(os.path.join(root, d), 0o555) for f in files: os.chmod(os.path.join(root, f), 0o444) os.chmod(local_dst_path, 0o555) except OSError as exc: logger.info(exc) except IOError as exc: logger.info(exc) else: try: if not os.path.isdir(os.path.dirname(local_dst_path)): os.makedirs(os.path.dirname(local_dst_path)) for root, dirs, files in os.walk( os.path.dirname(local_dst_path)): for d in dirs: os.chmod(os.path.join(root, d), 0o555) for f in files: os.chmod(os.path.join(root, f), 0o444) shutil.copy(local_src_path, local_dst_path) os.chmod(local_dst_path, 0o444) except OSError as exc: logger.info(exc) except IOError as exc: logger.info(exc) os.chmod(prefix_dest, 0o555) os.chmod('/corral-repl/tacc/NHERI/published', 0o555) save_to_fedora.apply_async(args=[project_id]) agave_indexer.apply_async(kwargs={ 'username': '******', 'systemId': 'designsafe.storage.published', 'filePath': '/' + project_id, 'recurse': True }, queue='indexing')