def subscribe_mailchimp(list_name, user_id): user = User.load(user_id) m = get_mailchimp_api() list_id = get_list_id_from_name(list_name=list_name) if user.mailchimp_mailing_lists is None: user.mailchimp_mailing_lists = {} try: m.lists.subscribe( id=list_id, email={'email': user.username}, merge_vars={ 'fname': user.given_name, 'lname': user.family_name, }, double_optin=False, update_existing=True, ) except mailchimp.ValidationError as error: sentry.log_exception() sentry.log_message(error.message) user.mailchimp_mailing_lists[list_name] = False else: user.mailchimp_mailing_lists[list_name] = True finally: user.save()
def client(): global CLIENT if CLIENT is None: try: CLIENT = Elasticsearch(settings.ELASTIC_URI, request_timeout=settings.ELASTIC_TIMEOUT, retry_on_timeout=True, **settings.ELASTIC_KWARGS) logging.getLogger('elasticsearch').setLevel(logging.WARN) logging.getLogger('elasticsearch.trace').setLevel(logging.WARN) logging.getLogger('urllib3').setLevel(logging.WARN) logging.getLogger('requests').setLevel(logging.WARN) CLIENT.cluster.health(wait_for_status='yellow') except ConnectionError: message = ( 'The SEARCH_ENGINE setting is set to "elastic", but there ' 'was a problem starting the elasticsearch interface. Is ' 'elasticsearch running?') if settings.SENTRY_DSN: try: sentry.log_exception() sentry.log_message(message) except AssertionError: # App has not yet been initialized logger.exception(message) else: logger.error(message) exit(1) return CLIENT
def get_sub_folders(self, library_id, folder_id=None, **kwargs): """ Returns serialized folders underneath a specific library/group - these are the lower tiers of folders in Zotero. If no folder_id is specified, all folders in a flat manner are returned for the group library. If a folder_id is specified, only the subfolders within that folder are returned. """ try: sub_folders = self.api._get_folders(library_id=library_id, folder_id=folder_id) except zotero_errors.ResourceNotFound: raise HTTPError(404) except zotero_errors.UserNotAuthorised: raise HTTPError(403) except zotero_errors.HTTPError: sentry.log_exception() sentry.log_message('Unexpected Zotero Error when fetching folders.') raise HTTPError(500) serialized = [] for folder in sub_folders: data = folder['data'] path = folder['library']['id'] if folder['library']['type'] == 'group' else 'personal' serialized.append(self.serialize_folder('folder', data['key'], data['name'], path, data['parentCollection'])) if folder_id: return serialized else: all_documents = self.serialize_folder('folder', 'ROOT', 'All Documents', library_id, '__', None) return [all_documents] + serialized
def send_digest(grouped_digests): """ Send digest emails and remove digests for sent messages in a callback. :param grouped_digests: digest notification messages from the past 24 hours grouped by user :return: """ for group in grouped_digests: user = User.load(group['user_id']) if not user: sentry.log_exception() sentry.log_message("A user with this username does not exist.") return info = group['info'] digest_notification_ids = [message['_id'] for message in info] sorted_messages = group_messages_by_node(info) if sorted_messages: logger.info('Sending email digest to user {0!r}'.format(user)) mails.send_mail( to_addr=user.username, mimetype='html', mail=mails.DIGEST, name=user.fullname, message=sorted_messages, callback=remove_sent_digest_notifications.si( digest_notification_ids=digest_notification_ids))
def update_or_create_preprint_identifiers(preprint): status = 'public' if preprint.verified_publishable else 'unavailable' try: preprint.request_identifier_update(category='doi', status=status) except HTTPError as err: sentry.log_exception() sentry.log_message(err.args[0])
def sync_data_from_mailchimp(**kwargs): """Endpoint that the mailchimp webhook sends its data to""" key = request.args.get('key') if key == settings.MAILCHIMP_WEBHOOK_SECRET_KEY: r = request action = r.values['type'] list_name = mailchimp_utils.get_list_name_from_id( list_id=r.values['data[list_id]']) username = r.values['data[email]'] try: user = User.find_one(Q('username', 'eq', username)) except NoResultsFound: sentry.log_exception() sentry.log_message("A user with this username does not exist.") raise HTTPError( 404, data=dict( message_short='User not found', message_long='A user with this username does not exist')) if action == 'unsubscribe': user.mailchimp_mailing_lists[list_name] = False user.save() elif action == 'subscribe': user.mailchimp_mailing_lists[list_name] = True user.save() else: # TODO: get tests to pass with sentry logging # sentry.log_exception() # sentry.log_message("Unauthorized request to the OSF.") raise HTTPError(http.UNAUTHORIZED)
def handle_internal_error(initiator=None, provider=None, message=None, dry_run=True): """Log errors that happened due to unexpected bug and send emails the uploader (if available) about failures. Product owner (if available) is informed as well with more details. Emails are not sent during dry run. """ if not message: message = 'Registration bulk upload failure' logger.error(message) sentry.log_message(message) if not dry_run: if initiator: mails.send_mail( to_addr=initiator.username, mail=mails.REGISTRATION_BULK_UPLOAD_UNEXPECTED_FAILURE, fullname=initiator.fullname, osf_support_email=settings.OSF_SUPPORT_EMAIL, ) inform_product_of_errors(initiator=initiator, provider=provider, message=message)
def on_preprint_updated(preprint_id, update_share=True): # WARNING: Only perform Read-Only operations in an asynchronous task, until Repeatable Read/Serializable # transactions are implemented in View and Task application layers. from osf.models import PreprintService preprint = PreprintService.load(preprint_id) if preprint.node: status = 'public' if preprint.node.is_public else 'unavailable' try: update_ezid_metadata_on_change(preprint, status=status) except HTTPError as err: sentry.log_exception() sentry.log_message(err.args[0]) if settings.SHARE_URL and update_share: if not preprint.provider.access_token: raise ValueError('No access_token for {}. Unable to send {} to SHARE.'.format(preprint.provider, preprint)) resp = requests.post('{}api/v2/normalizeddata/'.format(settings.SHARE_URL), json={ 'data': { 'type': 'NormalizedData', 'attributes': { 'tasks': [], 'raw': None, 'data': {'@graph': format_preprint(preprint)} } } }, headers={'Authorization': 'Bearer {}'.format(preprint.provider.access_token), 'Content-Type': 'application/vnd.api+json'}) logger.debug(resp.content) resp.raise_for_status()
def get_folders(self, show_root=False, **kwargs): if self.has_auth: try: folders = self.api._get_folders() serialized_root_folder = { 'name': 'All Documents', 'provider_list_id': None, 'id': 'ROOT', 'parent_list_id': '__', 'kind': 'folder', 'addon': 'mendeley' } serialized_folders = [{ 'addon': 'mendeley', 'kind': 'folder', 'id': folder.json['id'], 'name': folder.json['name'], 'path': folder.json.get('parent_id', '/'), 'parent_list_id': folder.json.get('parent_id', None), 'provider_list_id': folder.json['id'] } for folder in folders] if show_root: serialized_folders.insert(0, serialized_root_folder) return serialized_folders except MendeleyApiException as error: sentry.log_exception() sentry.log_message('Unexpected Mendeley Error when fetching folders.') raise HTTPError(error.status) else: raise exceptions.InvalidAuthError()
def wrapped(*args, **kwargs): try: return func(*args, **kwargs) except exceptions.MalformedQueryError: raise HTTPError(http.BAD_REQUEST, data={ 'message_short': 'Bad search query', 'message_long': language.SEARCH_QUERY_HELP, }) except exceptions.SearchUnavailableError: raise HTTPError( http.SERVICE_UNAVAILABLE, data={ 'message_short': 'Search unavailable', 'message_long': ('Our search service is currently unavailable, if the issue persists, ' 'please report it to <a href="mailto:[email protected]">[email protected]</a>.' ), }) except exceptions.SearchException: # Interim fix for issue where ES fails with 500 in some settings- ensure exception is still logged until it can be better debugged. See OSF-4538 sentry.log_exception() sentry.log_message( 'Elasticsearch returned an unexpected error response') # TODO: Add a test; may need to mock out the error response due to inability to reproduce error code locally raise HTTPError(http.BAD_REQUEST, data={ 'message_short': 'Could not perform search query', 'message_long': language.SEARCH_QUERY_HELP, })
def wrapped(*args, **kwargs): if client() is not None: try: return func(*args, **kwargs) except ConnectionError as e: raise exceptions.SearchUnavailableError(str(e)) except NotFoundError as e: raise exceptions.IndexNotFoundError(e.error) except RequestError as e: if e.error == 'search_phase_execution_exception': raise exceptions.MalformedQueryError('Failed to parse query') if 'ParseException' in e.error: # ES 1.5 raise exceptions.MalformedQueryError(e.error) if type(e.error) == dict: # ES 2.0 try: root_cause = e.error['root_cause'][0] if root_cause['type'] == 'query_parsing_exception': raise exceptions.MalformedQueryError(root_cause['reason']) except (AttributeError, KeyError): pass raise exceptions.SearchException(e.error) except TransportError as e: # Catch and wrap generic uncaught ES error codes. TODO: Improve fix for https://openscience.atlassian.net/browse/OSF-4538 raise exceptions.SearchException(e.error) sentry.log_message('Elastic search action failed. Is elasticsearch running?') raise exceptions.SearchUnavailableError('Failed to connect to elasticsearch')
def sync_data_from_mailchimp(**kwargs): """Endpoint that the mailchimp webhook sends its data to""" key = request.args.get("key") if key == settings.MAILCHIMP_WEBHOOK_SECRET_KEY: r = request action = r.values["type"] list_name = mailchimp_utils.get_list_name_from_id(list_id=r.values["data[list_id]"]) username = r.values["data[email]"] try: user = User.find_one(Q("username", "eq", username)) except NoResultsFound: sentry.log_exception() sentry.log_message("A user with this username does not exist.") raise HTTPError( 404, data=dict(message_short="User not found", message_long="A user with this username does not exist") ) if action == "unsubscribe": user.mailchimp_mailing_lists[list_name] = False user.save() elif action == "subscribe": user.mailchimp_mailing_lists[list_name] = True user.save() else: # TODO: get tests to pass with sentry logging # sentry.log_exception() # sentry.log_message("Unauthorized request to the OSF.") raise HTTPError(http.UNAUTHORIZED)
def client(): global CLIENT if CLIENT is None: try: CLIENT = Elasticsearch( settings.ELASTIC_URI, request_timeout=settings.ELASTIC_TIMEOUT, retry_on_timeout=True, **settings.ELASTIC_KWARGS ) logging.getLogger('elasticsearch').setLevel(logging.WARN) logging.getLogger('elasticsearch.trace').setLevel(logging.WARN) logging.getLogger('urllib3').setLevel(logging.WARN) logging.getLogger('requests').setLevel(logging.WARN) CLIENT.cluster.health(wait_for_status='yellow') except ConnectionError: message = ( 'The SEARCH_ENGINE setting is set to "elastic", but there ' 'was a problem starting the elasticsearch interface. Is ' 'elasticsearch running?' ) if settings.SENTRY_DSN: try: sentry.log_exception() sentry.log_message(message) except AssertionError: # App has not yet been initialized logger.exception(message) else: logger.error(message) exit(1) return CLIENT
def update_or_create_preprint_identifiers(preprint): status = 'public' if preprint.verified_publishable and not preprint.is_retracted else 'unavailable' try: preprint.request_identifier_update(category='doi', status=status) except HTTPError as err: sentry.log_exception() sentry.log_message(err.args[0])
def subscribe_mailchimp(list_name, user_id): user = OSFUser.load(user_id) m = get_mailchimp_api() list_id = get_list_id_from_name(list_name=list_name) if user.mailchimp_mailing_lists is None: user.mailchimp_mailing_lists = {} try: m.lists.subscribe( id=list_id, email={'email': user.username}, merge_vars={ 'fname': user.given_name, 'lname': user.family_name, }, double_optin=False, update_existing=True, ) except (mailchimp.ValidationError, mailchimp.ListInvalidBounceMemberError) as error: sentry.log_exception() sentry.log_message(error) user.mailchimp_mailing_lists[list_name] = False else: user.mailchimp_mailing_lists[list_name] = True finally: user.save()
def send_digest(grouped_digests): """ Send digest emails and remove digests for sent messages in a callback. :param grouped_digests: digest notification messages from the past 24 hours grouped by user :return: """ for group in grouped_digests: user = User.load(group['user_id']) if not user: sentry.log_exception() sentry.log_message("A user with this username does not exist.") return info = group['info'] digest_notification_ids = [message['_id'] for message in info] sorted_messages = group_messages_by_node(info) if sorted_messages: logger.info('Sending email digest to user {0!r}'.format(user)) mails.send_mail( to_addr=user.username, mimetype='html', mail=mails.DIGEST, name=user.fullname, message=sorted_messages, callback=remove_sent_digest_notifications.si( digest_notification_ids=digest_notification_ids ) )
def sync_data_from_mailchimp(**kwargs): """Endpoint that the mailchimp webhook sends its data to""" key = request.args.get('key') if key == settings.MAILCHIMP_WEBHOOK_SECRET_KEY: r = request action = r.values['type'] list_name = mailchimp_utils.get_list_name_from_id(list_id=r.values['data[list_id]']) username = r.values['data[email]'] try: user = OSFUser.objects.get(username=username) except OSFUser.DoesNotExist: sentry.log_exception() sentry.log_message('A user with this username does not exist.') raise HTTPError(404, data=dict(message_short='User not found', message_long='A user with this username does not exist')) if action == 'unsubscribe': user.mailchimp_mailing_lists[list_name] = False user.save() elif action == 'subscribe': user.mailchimp_mailing_lists[list_name] = True user.save() else: # TODO: get tests to pass with sentry logging # sentry.log_exception() # sentry.log_message("Unauthorized request to the OSF.") raise HTTPError(http.UNAUTHORIZED)
def wrapped(*args, **kwargs): if client() is not None: try: return func(*args, **kwargs) except ConnectionError as e: raise exceptions.SearchUnavailableError(str(e)) except NotFoundError as e: raise exceptions.IndexNotFoundError(e.error) except RequestError as e: if e.error == 'search_phase_execution_exception': raise exceptions.MalformedQueryError( 'Failed to parse query') if 'ParseException' in e.error: # ES 1.5 raise exceptions.MalformedQueryError(e.error) if type(e.error) == dict: # ES 2.0 try: root_cause = e.error['root_cause'][0] if root_cause['type'] == 'query_parsing_exception': raise exceptions.MalformedQueryError( root_cause['reason']) except (AttributeError, KeyError): pass raise exceptions.SearchException(e.error) except TransportError as e: # Catch and wrap generic uncaught ES error codes. TODO: Improve fix for https://openscience.atlassian.net/browse/OSF-4538 raise exceptions.SearchException(e.error) sentry.log_message( 'Elastic search action failed. Is elasticsearch running?') raise exceptions.SearchUnavailableError( 'Failed to connect to elasticsearch')
def _send_with_sendgrid(from_addr, to_addr, subject, message, mimetype='html', categories=None, attachment_name=None, attachment_content=None, client=None): if (settings.SENDGRID_WHITELIST_MODE and to_addr in settings.SENDGRID_EMAIL_WHITELIST) or settings.SENDGRID_WHITELIST_MODE is False: client = client or sendgrid.SendGridClient(settings.SENDGRID_API_KEY) mail = sendgrid.Mail() mail.set_from(from_addr) mail.add_to(to_addr) mail.set_subject(subject) if mimetype == 'html': mail.set_html(message) if categories: mail.set_categories(categories) if attachment_name and attachment_content: mail.add_attachment_stream(attachment_name, attachment_content) status, msg = client.send(mail) if status >= 400: sentry.log_message( '{} error response from sendgrid.'.format(status) + 'from_addr: {}\n'.format(from_addr) + 'to_addr: {}\n'.format(to_addr) + 'subject: {}\n'.format(subject) + 'mimetype: {}\n'.format(mimetype) + 'message: {}\n'.format(message[:30]) + 'categories: {}\n'.format(categories) + 'attachment_name: {}\n'.format(attachment_name) ) return status < 400 else: sentry.log_message( 'SENDGRID_WHITELIST_MODE is True. Failed to send emails to non-whitelisted recipient {}.'.format(to_addr) )
def log_errors(self, obj, obj_id, error): if not self.errors: script_utils.add_file_logger(logger, __file__) self.errors += 1 logger.info('Error on {}, {}:'.format(obj, obj_id)) logger.exception(error) if self.errors == 1000: sentry.log_message('ERROR: generate_sitemap stopped execution after reaching 1000 errors. See logs for details.') raise Exception('Too many errors generating sitemap.')
def write_summary_data(filename, summary_data, remote_base_folder): header_row = summary_data.keys() summary_row = summary_data.values() file_path = '{}{}'.format(TEMP_FOLDER, filename) old_remote = requests.get( url=remote_base_folder['files'], headers={'Accept': 'application/vnd.api+json;version={}'.format(DEFAULT_API_VERSION)}, auth=bearer_token_auth(DS_METRICS_OSF_TOKEN), params={'filter[name]': filename}, ).json() try: logger.debug('json: {}'.format(old_remote)) if old_remote[u'meta'][u'total'] > 1: sentry.log_message( 'Too many files that look like {} - this may cause problems for data storage usage summaries'.format( remote_base_folder['files'] ) ) old_remote_data = old_remote['data'][0] upload = old_remote_data['links']['upload'] params = {'kind': 'file'} header_skipped = False with open(file_path, 'w') as new_file: writer = csv.writer(new_file, delimiter=',', lineterminator='\n', quoting=csv.QUOTE_ALL) writer.writerow(header_row) with requests.get( url=upload, # Yes, upload is correct here. headers={'Accept': 'application/vnd.api+json;version={}'.format(DEFAULT_API_VERSION)}, auth=bearer_token_auth(DS_METRICS_OSF_TOKEN), stream=True, ) as old_file: lines = [] for line in old_file.iter_lines(): if isinstance(line, bytes): line = line.decode() lines.append(line) reader = csv.reader(lines, delimiter=',', lineterminator='\n') for row in reader: if header_skipped: writer.writerow(row) header_skipped = True writer.writerow(summary_row) except IndexError: upload = remote_base_folder['upload'] params = { 'kind': 'file', 'name': filename, } with open(file_path, 'w') as new_file: writer = csv.writer(new_file, delimiter=',', lineterminator='\n', quoting=csv.QUOTE_ALL) writer.writerow(header_row) writer.writerow(summary_row) upload_to_storage(file_path=file_path, upload_url=upload, params=params)
def ship_to_s3(self, name, path): data = open(path, 'rb') try: self.s3.Bucket(settings.SITEMAP_AWS_BUCKET).put_object(Key='sitemaps/{}'.format(name), Body=data) except Exception as e: logger.info('Error sending data to s3 via boto3') logger.exception(e) sentry.log_message('ERROR: Sitemaps could not be uploaded to s3, see `generate_sitemap` logs') data.close()
def migrate_responses(resources, resource_name, dry_run=False, rows='all'): """ DRY method to be used to migrate both DraftRegistration.registration_responses and Registration.registration_responses. """ progress_bar = None if rows == 'all': logger.info('Migrating all {}.'.format(resource_name)) else: resources = resources[:rows] logger.info('Migrating up to {} {}.'.format(rows, resource_name)) progress_bar = tqdm(total=rows) successes_to_save = [] errors_to_save = [] for resource in resources: try: schema = get_registration_schema(resource) resource.registration_responses = flatten_registration_metadata( schema, get_nested_responses(resource, schema._id), ) resource.registration_responses_migrated = True successes_to_save.append(resource) except SchemaBlockConversionError as e: resource.registration_responses_migrated = False errors_to_save.append(resource) logger.error('Unexpected/invalid nested data in resource: {} with error {}'.format(resource, e)) if progress_bar: progress_bar.update() if progress_bar: progress_bar.close() success_count = len(successes_to_save) error_count = len(errors_to_save) total_count = success_count + error_count if total_count == 0: logger.info('No {} left to migrate.'.format(resource_name)) return total_count logger.info('Successfully migrated {} out of {} {}.'.format(success_count, total_count, resource_name)) if error_count: logger.warn('Encountered errors on {} out of {} {}.'.format(error_count, total_count, resource_name)) if not success_count: sentry.log_message('`migrate_registration_responses` has only errors left ({} errors)'.format(error_count)) if dry_run: logger.info('DRY RUN; discarding changes.') else: logger.info('Saving changes...') bulk_update(successes_to_save, update_fields=['registration_responses', 'registration_responses_migrated']) bulk_update(errors_to_save, update_fields=['registration_responses_migrated']) return total_count
def update_or_create_preprint_identifiers(preprint): status = 'public' if preprint.verified_publishable else 'unavailable' if preprint.is_published and not preprint.get_identifier('doi'): request_identifiers(preprint) else: try: update_doi_metadata_on_change(preprint._id, status=status) except HTTPError as err: sentry.log_exception() sentry.log_message(err.args[0])
def reactivate(self): """Reactivate an inactive institution and update OSF search without sending out emails. """ if self.deactivated: self.deactivated = None self.save() else: message = f'Action rejected - reactivating an active institution [{self._id}].' logger.warning(message) sentry.log_message(message)
def print_cas_log(msg, level): if settings.CAS_LOG_LEVEL > level.value: return if level == LogLevel.ERROR: logger.error(msg) sentry.log_message(msg) elif level == LogLevel.DEBUG: logger.debug(msg) elif level == LogLevel.INFO: logger.info(msg)
def update_or_create_preprint_identifiers(preprint): status = 'public' if preprint.verified_publishable else 'unavailable' if preprint.is_published and not preprint.get_identifier('doi'): get_and_set_preprint_identifiers(preprint) else: try: update_ezid_metadata_on_change(preprint._id, status=status) except HTTPError as err: sentry.log_exception() sentry.log_message(err.args[0])
def run_sql(statement, check_statement, page_size): table = statement.split(' ')[1] logger.info('Populating deleted column in table {}'.format(table)) with connection.cursor() as cursor: cursor.execute(statement.format(LIMIT_CLAUSE), [page_size]) rows = cursor.fetchall() if not rows: with connection.cursor() as cursor: cursor.execute(check_statement, [page_size]) sentry.log_message( 'Deleted field in {} table is populated'.format(table))
def resolve_guid(guid, suffix=None): """Resolve GUID to corresponding URL and return result of appropriate view function. This effectively yields a redirect without changing the displayed URL of the page. :param guid: GUID value (not the object) :param suffix: String to append to GUID route :return: Werkzeug response """ # Get prefix; handles API routes prefix = request.path.split(guid)[0].rstrip('/') # Look up GUID guid_object = Guid.load(guid) if guid_object: # verify that the object is a GuidStoredObject descendant. If a model # was once a descendant but that relationship has changed, it's # possible to have referents that are instances of classes that don't # have a redirect_mode attribute or otherwise don't behave as # expected. if not isinstance(guid_object.referent, GuidStoredObject): sentry.log_message( 'Guid `{}` resolved to non-guid object'.format(guid) ) raise HTTPError(http.NOT_FOUND) referent = guid_object.referent if referent is None: logger.error('Referent of GUID {0} not found'.format(guid)) raise HTTPError(http.NOT_FOUND) mode = referent.redirect_mode if mode is None: raise HTTPError(http.NOT_FOUND) url = referent.deep_url if mode == 'proxy' else referent.url url = _build_guid_url(url, prefix, suffix) # Always redirect API URLs; URL should identify endpoint being called if prefix or mode == 'redirect': if request.query_string: url += '?' + request.query_string return redirect(url) return proxy_url(url) # GUID not found; try lower-cased and redirect if exists guid_object_lower = Guid.load(guid.lower()) if guid_object_lower: return redirect( _build_guid_url( guid.lower(), prefix, suffix ) ) # GUID not found raise HTTPError(http.NOT_FOUND)
def run_statements(statement, page_size, table): logger.info('Populating deleted column in table {}'.format(table)) with connection.cursor() as cursor: cursor.execute(statement.format(table, table, LIMIT_CLAUSE), [page_size]) rows = cursor.fetchall() if rows: cursor.execute(CHECK_POPULATED.format(table), [page_size]) remaining_rows = cursor.fetchall() if not remaining_rows: sentry.log_message( 'Deleted field in {} table is populated'.format(table))
def resolve_guid(guid, suffix=None): """Load GUID by primary key, look up the corresponding view function in the routing table, and return the return value of the view function without changing the URL. :param str guid: GUID primary key :param str suffix: Remainder of URL after the GUID :return: Return value of proxied view function """ try: # Look up guid_object = Guid.load(guid) except KeyError as e: if e.message == 'osfstorageguidfile': # Used when an old detached OsfStorageGuidFile object is accessed raise HTTPError(http.NOT_FOUND) else: raise e if guid_object: # verify that the object implements a GuidStoredObject-like interface. If a model # was once GuidStoredObject-like but that relationship has changed, it's # possible to have referents that are instances of classes that don't # have a deep_url attribute or otherwise don't behave as # expected. if not hasattr(guid_object.referent, 'deep_url'): sentry.log_message( 'Guid `{}` resolved to an object with no deep_url'.format(guid) ) raise HTTPError(http.NOT_FOUND) referent = guid_object.referent if referent is None: logger.error('Referent of GUID {0} not found'.format(guid)) raise HTTPError(http.NOT_FOUND) if not referent.deep_url: raise HTTPError(http.NOT_FOUND) if isinstance(referent, PreprintService): return send_from_directory( os.path.abspath(os.path.join(os.getcwd(), EXTERNAL_EMBER_APPS['preprints']['path'])), 'index.html' ) url = _build_guid_url(urllib.unquote(referent.deep_url), suffix) return proxy_url(url) # GUID not found; try lower-cased and redirect if exists guid_object_lower = Guid.load(guid.lower()) if guid_object_lower: return redirect( _build_guid_url(guid.lower(), suffix) ) # GUID not found raise HTTPError(http.NOT_FOUND)
def deactivate(self): """Deactivate an active institution, update OSF search and send emails to all affiliated users. """ if not self.deactivated: self.deactivated = timezone.now() self.save() # Django mangers aren't used when querying on related models. Thus, we can query # affiliated users and send notification emails after the institution has been deactivated. self._send_deactivation_email() else: message = f'Action rejected - deactivating an inactive institution [{self._id}].' logger.warning(message) sentry.log_message(message)
def resolve_guid(guid, suffix=None): """Resolve GUID to corresponding URL and return result of appropriate view function. This effectively yields a redirect without changing the displayed URL of the page. :param guid: GUID value (not the object) :param suffix: String to append to GUID route :return: Werkzeug response """ # Get prefix; handles API routes prefix = request.path.split(guid)[0].rstrip('/') # Look up GUID guid_object = Guid.load(guid) if guid_object: # verify that the object is a GuidStoredObject descendant. If a model # was once a descendant but that relationship has changed, it's # possible to have referents that are instances of classes that don't # have a redirect_mode attribute or otherwise don't behave as # expected. if not isinstance(guid_object.referent, GuidStoredObject): sentry.log_message( 'Guid `{}` resolved to non-guid object'.format(guid)) raise HTTPError(http.NOT_FOUND) referent = guid_object.referent if referent is None: logger.error('Referent of GUID {0} not found'.format(guid)) raise HTTPError(http.NOT_FOUND) mode = referent.redirect_mode if mode is None: raise HTTPError(http.NOT_FOUND) url = referent.deep_url if mode == 'proxy' else referent.url url = _build_guid_url(url, prefix, suffix) # Always redirect API URLs; URL should identify endpoint being called if prefix or mode == 'redirect': if request.query_string: url += '?' + request.query_string return redirect(url) return proxy_url(url) # GUID not found; try lower-cased and redirect if exists guid_object_lower = Guid.load(guid.lower()) if guid_object_lower: return redirect(_build_guid_url(guid.lower(), prefix, suffix)) # GUID not found raise HTTPError(http.NOT_FOUND)
def migrate_registration_responses(dry_run=False, rows=5000): script_start_time = datetime.datetime.now() logger.info('Script started time: {}'.format(script_start_time)) draft_count = migrate_draft_registrations(dry_run, rows) registration_count = migrate_registrations(dry_run, rows) if draft_count == 0 and registration_count == 0: logger.info('Migration complete! No more drafts or registrations need migrating.') sentry.log_message('`migrate_registration_responses` command found nothing to migrate!') script_finish_time = datetime.datetime.now() logger.info('Script finished time: {}'.format(script_finish_time)) logger.info('Run time {}'.format(script_finish_time - script_start_time))
def wrapped(*args, **kwargs): if es is not None: try: return func(*args, **kwargs) except ConnectionError: raise exceptions.SearchUnavailableError('Could not connect to elasticsearch') except NotFoundError as e: raise exceptions.IndexNotFoundError(e.error) except RequestError as e: if 'ParseException' in e.error: raise exceptions.MalformedQueryError(e.error) raise exceptions.SearchException(e.error) sentry.log_message('Elastic search action failed. Is elasticsearch running?') raise exceptions.SearchUnavailableError("Failed to connect to elasticsearch")
def on_preprint_updated(preprint_id, update_share=True, share_type=None, old_subjects=None): # WARNING: Only perform Read-Only operations in an asynchronous task, until Repeatable Read/Serializable # transactions are implemented in View and Task application layers. from osf.models import PreprintService preprint = PreprintService.load(preprint_id) if old_subjects is None: old_subjects = [] if preprint.node: status = 'public' if preprint.verified_publishable else 'unavailable' try: update_ezid_metadata_on_change(preprint._id, status=status) except HTTPError as err: sentry.log_exception() sentry.log_message(err.args[0]) if update_share: update_preprint_share(preprint, old_subjects, share_type)
def update(self, revision, data, user=None, save=True): """Using revision and data update all data pretaining to self :param str or None revision: The revision that data points to :param dict data: Metadata received from waterbutler :returns: FileVersion """ self.name = data['name'] self.materialized_path = data['materialized'] version = FileVersion(identifier=revision) version.update_metadata(data, save=False) # Transform here so it can be sortted on later if data['modified'] is not None and data['modified'] != '': data['modified'] = parse_date( data['modified'], ignoretz=True, default=timezone.now() # Just incase nothing can be parsed ) # if revision is none then version is the latest version # Dont save the latest information if revision is not None: version.save() # Adds version to the list of file versions - using custom through table self.add_version(version) for entry in self.history: # Some entry might have an undefined modified field if data['modified'] is not None and entry[ 'modified'] is not None and data['modified'] < entry[ 'modified']: sentry.log_message( 'update() receives metatdata older than the newest entry in file history.' ) if ('etag' in entry and 'etag' in data) and (entry['etag'] == data['etag']): break else: self.history.append(data) # Finally update last touched self.last_touched = timezone.now() if save: self.save() return version
def get_top_level_folders(self, **kwargs): """ Returns serialized group libraries - your personal library along with any group libraries. This is the top-tier of "folders" in Zotero. You can use kwargs to refine what data is returned - how to limit the number of group libraries, whether to return the personal library alongside group_libraries, or append the total library count. """ # These kwargs are passed in from ZoteroViews > library_list limit = kwargs.get('limit', None) start = kwargs.get('start', None) return_count = kwargs.get('return_count', False) append_personal = kwargs.get('append_personal', True) try: # Fetch group libraries libraries = self.api._fetch_libraries(limit=limit, start=start) except zotero_errors.ResourceNotFound: raise HTTPError(404) except zotero_errors.UserNotAuthorised: raise HTTPError(403) except zotero_errors.HTTPError: sentry.log_exception() sentry.log_message( 'Unexpected Zotero Error when fetching group libraries.') raise HTTPError(500) # Serialize libraries serialized = [] for library in libraries[:-1]: data = library['data'] serialized.append( self.serialize_folder('library', data['id'], data['name'], str(data['id']))) if return_count: # Return total number of libraries as last item in list serialized.append(libraries[-1]) if append_personal: # Append personal library as option alongside group libraries serialized.insert( 0, self.serialize_folder('library', 'personal', 'My Library', 'personal')) return serialized
def create_primary_collection_for_provider(sender, instance, created, **kwargs): if created: Collection = apps.get_model('osf.Collection') user = getattr(instance, '_creator', None) # Temp attr set in admin view if user: c = Collection( title='{}\'s Collection'.format(instance.name), creator=user, provider=instance, is_promoted=True, is_public=True ) c.save() instance.primary_collection = c instance.save() else: # A user is required for Collections / Groups sentry.log_message('Unable to create primary_collection for {}Provider {}'.format(instance.readable_type.capitalize(), instance.name))
def wrapped(*args, **kwargs): if es is not None: try: return func(*args, **kwargs) except ConnectionError: raise exceptions.SearchUnavailableError('Could not connect to elasticsearch') except NotFoundError as e: raise exceptions.IndexNotFoundError(e.error) except RequestError as e: if 'ParseException' in e.error: raise exceptions.MalformedQueryError(e.error) raise exceptions.SearchException(e.error) except TransportError as e: # Catch and wrap generic uncaught ES error codes. TODO: Improve fix for https://openscience.atlassian.net/browse/OSF-4538 raise exceptions.SearchException(e.error) sentry.log_message('Elastic search action failed. Is elasticsearch running?') raise exceptions.SearchUnavailableError('Failed to connect to elasticsearch')
def wrapped(*args, **kwargs): if es is not None: try: return func(*args, **kwargs) except ConnectionError: raise exceptions.SearchUnavailableError('Could not connect to elasticsearch') except NotFoundError as e: raise exceptions.IndexNotFoundError(e.error) except RequestError as e: if 'ParseException' in e.error: raise exceptions.MalformedQueryError(e.error) raise exceptions.SearchException(e.error) except TransportError as e: # Catch and wrap generic uncaught ES error codes. TODO: Improve fix for https://openscience.atlassian.net/browse/OSF-4538 raise exceptions.SearchException(e.error) sentry.log_message('Elastic search action failed. Is elasticsearch running?') raise exceptions.SearchUnavailableError("Failed to connect to elasticsearch")
def migrate_page_counters(dry_run=False, rows=10000, reverse=False): script_start_time = datetime.datetime.now() logger.info('Script started time: {}'.format(script_start_time)) sql_query = REVERSE_SQL_LIMITED if reverse else FORWARD_SQL_LIMITED logger.info('SQL Query: {}'.format(sql_query)) with connection.cursor() as cursor: if not dry_run: cursor.execute(sql_query, [rows]) if not reverse: cursor.execute(COUNT_SQL) number_of_entries_left = cursor.fetchone()[0] logger.info('Entries left: {}'.format(number_of_entries_left)) if number_of_entries_left == 0: sentry.log_message('Migrate pagecounter data complete') script_finish_time = datetime.datetime.now() logger.info('Script finished time: {}'.format(script_finish_time)) logger.info('Run time {}'.format(script_finish_time - script_start_time))
def resolve_guid(guid, suffix=None): """Load GUID by primary key, look up the corresponding view function in the routing table, and return the return value of the view function without changing the URL. :param str guid: GUID primary key :param str suffix: Remainder of URL after the GUID :return: Return value of proxied view function """ # Look up GUID guid_object = Guid.load(guid) if guid_object: # verify that the object is a GuidStoredObject descendant. If a model # was once a descendant but that relationship has changed, it's # possible to have referents that are instances of classes that don't # have a redirect_mode attribute or otherwise don't behave as # expected. if not isinstance(guid_object.referent, GuidStoredObject): sentry.log_message( 'Guid `{}` resolved to non-guid object'.format(guid) ) raise HTTPError(http.NOT_FOUND) referent = guid_object.referent if referent is None: logger.error('Referent of GUID {0} not found'.format(guid)) raise HTTPError(http.NOT_FOUND) if not referent.deep_url: raise HTTPError(http.NOT_FOUND) url = _build_guid_url(referent.deep_url, suffix) return proxy_url(url) # GUID not found; try lower-cased and redirect if exists guid_object_lower = Guid.load(guid.lower()) if guid_object_lower: return redirect( _build_guid_url(guid.lower(), suffix) ) # GUID not found raise HTTPError(http.NOT_FOUND)
def resolve_guid(guid, suffix=None): """Load GUID by primary key, look up the corresponding view function in the routing table, and return the return value of the view function without changing the URL. :param str guid: GUID primary key :param str suffix: Remainder of URL after the GUID :return: Return value of proxied view function """ # Look up GUID guid_object = Guid.load(guid) if guid_object: # verify that the object implements a GuidStoredObject-like interface. If a model # was once GuidStoredObject-like but that relationship has changed, it's # possible to have referents that are instances of classes that don't # have a deep_url attribute or otherwise don't behave as # expected. if not hasattr(guid_object.referent, 'deep_url'): sentry.log_message( 'Guid `{}` resolved to an object with no deep_url'.format(guid) ) raise HTTPError(http.NOT_FOUND) referent = guid_object.referent if referent is None: logger.error('Referent of GUID {0} not found'.format(guid)) raise HTTPError(http.NOT_FOUND) if not referent.deep_url: raise HTTPError(http.NOT_FOUND) url = _build_guid_url(urllib.unquote(referent.deep_url), suffix) return proxy_url(url) # GUID not found; try lower-cased and redirect if exists guid_object_lower = Guid.load(guid.lower()) if guid_object_lower: return redirect( _build_guid_url(guid.lower(), suffix) ) # GUID not found raise HTTPError(http.NOT_FOUND)
def get_top_level_folders(self, **kwargs): """ Returns serialized group libraries - your personal library along with any group libraries. This is the top-tier of "folders" in Zotero. You can use kwargs to refine what data is returned - how to limit the number of group libraries, whether to return the personal library alongside group_libraries, or append the total library count. """ # These kwargs are passed in from ZoteroViews > library_list limit = kwargs.get('limit', None) start = kwargs.get('start', None) return_count = kwargs.get('return_count', False) append_personal = kwargs.get('append_personal', True) try: # Fetch group libraries libraries = self.api._fetch_libraries(limit=limit, start=start) except zotero_errors.ResourceNotFound: raise HTTPError(404) except zotero_errors.UserNotAuthorised: raise HTTPError(403) except zotero_errors.HTTPError: sentry.log_exception() sentry.log_message('Unexpected Zotero Error when fetching group libraries.') raise HTTPError(500) # Serialize libraries serialized = [] for library in libraries[:-1]: data = library['data'] serialized.append(self.serialize_folder('library', data['id'], data['name'], str(data['id']))) if return_count: # Return total number of libraries as last item in list serialized.append(libraries[-1]) if append_personal: # Append personal library as option alongside group libraries serialized.insert(0, self.serialize_folder('library', 'personal', 'My Library', 'personal')) return serialized
def update(self, revision, data, user=None, save=True): """Using revision and data update all data pretaining to self :param str or None revision: The revision that data points to :param dict data: Metadata recieved from waterbutler :returns: FileVersion """ self.name = data['name'] self.materialized_path = data['materialized'] version = FileVersion(identifier=revision) version.update_metadata(data, save=False) # Transform here so it can be sortted on later if data['modified'] is not None and data['modified'] != '': data['modified'] = parse_date( data['modified'], ignoretz=True, default=timezone.now() # Just incase nothing can be parsed ) # if revision is none then version is the latest version # Dont save the latest information if revision is not None: version.save() self.versions.add(version) for entry in self.history: # Some entry might have an undefined modified field if data['modified'] is not None and entry['modified'] is not None and data['modified'] < entry['modified']: sentry.log_message('update() receives metatdata older than the newest entry in file history.') if ('etag' in entry and 'etag' in data) and (entry['etag'] == data['etag']): break else: self.history.append(data) # Finally update last touched self.last_touched = timezone.now() if save: self.save() return version
def has_permission(self, request, view): if request.method != 'POST': raise exceptions.MethodNotAllowed(method=request.method) data = request.data if not data: raise exceptions.ParseError('Request body is empty') if not settings.MAILGUN_API_KEY: return False signature = hmac.new( key=settings.MAILGUN_API_KEY, msg='{}{}'.format( data['timestamp'], data['token'], ), digestmod=hashlib.sha256, ).hexdigest() if 'signature' not in data: error_message = 'Signature required in request body' sentry.log_message(error_message) raise exceptions.ParseError(error_message) if not hmac.compare_digest(unicode(signature), unicode(data['signature'])): raise exceptions.ParseError('Invalid signature') return True
def wrapped(*args, **kwargs): try: return func(*args, **kwargs) except exceptions.MalformedQueryError: raise HTTPError(http.BAD_REQUEST, data={ 'message_short': 'Bad search query', 'message_long': language.SEARCH_QUERY_HELP, }) except exceptions.SearchUnavailableError: raise HTTPError(http.SERVICE_UNAVAILABLE, data={ 'message_short': 'Search unavailable', 'message_long': ('Our search service is currently unavailable, if the issue persists, ' 'please report it to <a href="mailto:[email protected]">[email protected]</a>.'), }) except exceptions.SearchException: # Interim fix for issue where ES fails with 500 in some settings- ensure exception is still logged until it can be better debugged. See OSF-4538 sentry.log_exception() sentry.log_message('Elasticsearch returned an unexpected error response') # TODO: Add a test; may need to mock out the error response due to inability to reproduce error code locally raise HTTPError(http.BAD_REQUEST, data={ 'message_short': 'Could not perform search query', 'message_long': language.SEARCH_QUERY_HELP, })
def authenticate(self, request): """ Handle CAS institution authentication request. The JWT `data` payload is expected in the following structure: { "provider": { "idp": "", "id": "", "user": { "username": "", "fullname": "", "familyName": "", "givenName": "", "middleNames": "", "suffix": "", } } } :param request: the POST request :return: user, None if authentication succeed :raises: AuthenticationFailed if authentication fails """ try: payload = jwt.decode( jwe.decrypt(request.body, settings.JWE_SECRET), settings.JWT_SECRET, options={'verify_exp': False}, algorithm='HS256', ) except (jwt.InvalidTokenError, TypeError): raise AuthenticationFailed data = json.loads(payload['data']) provider = data['provider'] institution = Institution.load(provider['id']) if not institution: raise AuthenticationFailed('Invalid institution id specified "{}"'.format(provider['id'])) username = provider['user'].get('username') fullname = provider['user'].get('fullname') given_name = provider['user'].get('givenName') family_name = provider['user'].get('familyName') middle_names = provider['user'].get('middleNames') suffix = provider['user'].get('suffix') # use given name and family name to build full name if not provided if given_name and family_name and not fullname: fullname = given_name + ' ' + family_name # institution must provide `fullname`, otherwise we fail the authentication and inform sentry if not fullname: message = 'Institution login failed: fullname required' \ ' for user {} from institution {}'.format(username, provider['id']) sentry.log_message(message) raise AuthenticationFailed(message) # `get_or_create_user()` guesses names from fullname # replace the guessed ones if the names are provided from the authentication user, created = get_or_create_user(fullname, username, reset_password=False) if created: if given_name: user.given_name = given_name if family_name: user.family_name = family_name if middle_names: user.middle_names = middle_names if suffix: user.suffix = suffix user.update_date_last_login() # Relying on front-end validation until `accepted_tos` is added to the JWT payload user.accepted_terms_of_service = timezone.now() # save and register user user.save() user.register(username) # send confirmation email send_mail( to_addr=user.username, mail=WELCOME_OSF4I, mimetype='html', user=user, domain=DOMAIN, osf_support_email=OSF_SUPPORT_EMAIL, storage_flag_is_active=waffle.flag_is_active(request, features.STORAGE_I18N), ) if not user.is_affiliated_with_institution(institution): user.affiliated_institutions.add(institution) user.save() return user, None
def resolve_guid(guid, suffix=None): """Load GUID by primary key, look up the corresponding view function in the routing table, and return the return value of the view function without changing the URL. :param str guid: GUID primary key :param str suffix: Remainder of URL after the GUID :return: Return value of proxied view function """ try: # Look up guid_object = Guid.load(guid) except KeyError as e: if e.message == 'osfstorageguidfile': # Used when an old detached OsfStorageGuidFile object is accessed raise HTTPError(http.NOT_FOUND) else: raise e if guid_object: # verify that the object implements a GuidStoredObject-like interface. If a model # was once GuidStoredObject-like but that relationship has changed, it's # possible to have referents that are instances of classes that don't # have a deep_url attribute or otherwise don't behave as # expected. if not hasattr(guid_object.referent, 'deep_url'): sentry.log_message( 'Guid resolved to an object with no deep_url', dict(guid=guid) ) raise HTTPError(http.NOT_FOUND) referent = guid_object.referent if referent is None: logger.error('Referent of GUID {0} not found'.format(guid)) raise HTTPError(http.NOT_FOUND) if not referent.deep_url: raise HTTPError(http.NOT_FOUND) # Handle file `/download` shortcut with supported types. if suffix and suffix.rstrip('/').lower() == 'download': file_referent = None if isinstance(referent, PreprintService) and referent.primary_file: if not referent.is_published: # TODO: Ideally, permissions wouldn't be checked here. # This is necessary to prevent a logical inconsistency with # the routing scheme - if a preprint is not published, only # admins and moderators should be able to know it exists. auth = Auth.from_kwargs(request.args.to_dict(), {}) group_helper = GroupHelper(referent.provider) admin_group = group_helper.get_group('admin') mod_group = group_helper.get_group('moderator') # Check if user isn't a nonetype or that the user has admin/moderator permissions if auth.user is None or not (referent.node.has_permission(auth.user, permissions.ADMIN) or (mod_group.user_set.all() | admin_group.user_set.all()).filter(id=auth.user.id).exists()): raise HTTPError(http.NOT_FOUND) file_referent = referent.primary_file elif isinstance(referent, BaseFileNode) and referent.is_file: file_referent = referent if file_referent: # Extend `request.args` adding `action=download`. request.args = request.args.copy() request.args.update({'action': 'download'}) # Do not include the `download` suffix in the url rebuild. url = _build_guid_url(urllib.unquote(file_referent.deep_url)) return proxy_url(url) # Handle Ember Applications if isinstance(referent, PreprintService): if referent.provider.domain_redirect_enabled: # This route should always be intercepted by nginx for the branded domain, # w/ the exception of `<guid>/download` handled above. return redirect(referent.absolute_url, http.MOVED_PERMANENTLY) if PROXY_EMBER_APPS: resp = requests.get(EXTERNAL_EMBER_APPS['preprints']['server'], stream=True, timeout=EXTERNAL_EMBER_SERVER_TIMEOUT) return Response(stream_with_context(resp.iter_content()), resp.status_code) return send_from_directory(preprints_dir, 'index.html') if isinstance(referent, BaseFileNode) and referent.is_file and referent.node.is_quickfiles: if referent.is_deleted: raise HTTPError(http.GONE) if PROXY_EMBER_APPS: resp = requests.get(EXTERNAL_EMBER_APPS['ember_osf_web']['server'], stream=True, timeout=EXTERNAL_EMBER_SERVER_TIMEOUT) return Response(stream_with_context(resp.iter_content()), resp.status_code) return send_from_directory(ember_osf_web_dir, 'index.html') if isinstance(referent, Node) and not referent.is_registration and suffix: page = suffix.strip('/').split('/')[0] flag_name = 'ember_project_{}_page'.format(page) request.user = _get_current_user() or MockUser() if waffle.flag_is_active(request, flag_name): use_ember_app() url = _build_guid_url(urllib.unquote(referent.deep_url), suffix) return proxy_url(url) # GUID not found; try lower-cased and redirect if exists guid_object_lower = Guid.load(guid.lower()) if guid_object_lower: return redirect( _build_guid_url(guid.lower(), suffix) ) # GUID not found raise HTTPError(http.NOT_FOUND)
INDEX = settings.ELASTIC_INDEX try: es = Elasticsearch( settings.ELASTIC_URI, request_timeout=settings.ELASTIC_TIMEOUT ) logging.getLogger('elasticsearch').setLevel(logging.WARN) logging.getLogger('elasticsearch.trace').setLevel(logging.WARN) logging.getLogger('urllib3').setLevel(logging.WARN) logging.getLogger('requests').setLevel(logging.WARN) es.cluster.health(wait_for_status='yellow') except ConnectionError as e: sentry.log_exception() sentry.log_message("The SEARCH_ENGINE setting is set to 'elastic', but there " "was a problem starting the elasticsearch interface. Is " "elasticsearch running?") es = None def requires_search(func): def wrapped(*args, **kwargs): if es is not None: try: return func(*args, **kwargs) except ConnectionError: raise exceptions.SearchUnavailableError('Could not connect to elasticsearch') except NotFoundError as e: raise exceptions.IndexNotFoundError(e.error) except RequestError as e: if 'ParseException' in e.error:
def configure_subscription(auth): user = auth.user json_data = request.get_json() target_id = json_data.get('id') event = json_data.get('event') notification_type = json_data.get('notification_type') path = json_data.get('path') provider = json_data.get('provider') if not event or (notification_type not in NOTIFICATION_TYPES and notification_type != 'adopt_parent'): raise HTTPError(http.BAD_REQUEST, data=dict( message_long='Must provide an event and notification type for subscription.') ) node = AbstractNode.load(target_id) if 'file_updated' in event and path is not None and provider is not None: wb_path = path.lstrip('/') event = wb_path + '_file_updated' event_id = utils.to_subscription_key(target_id, event) if not node: # if target_id is not a node it currently must be the current user if not target_id == user._id: sentry.log_message( '{!r} attempted to subscribe to either a bad ' 'id or non-node non-self id, {}'.format(user, target_id) ) raise HTTPError(http.NOT_FOUND) if notification_type == 'adopt_parent': sentry.log_message( '{!r} attempted to adopt_parent of a none node id, {}'.format(user, target_id) ) raise HTTPError(http.BAD_REQUEST) owner = user else: if not node.has_permission(user, 'read'): sentry.log_message('{!r} attempted to subscribe to private node, {}'.format(user, target_id)) raise HTTPError(http.FORBIDDEN) if notification_type != 'adopt_parent': owner = node else: if 'file_updated' in event and len(event) > len('file_updated'): pass else: parent = node.parent_node if not parent: sentry.log_message( '{!r} attempted to adopt_parent of ' 'the parentless project, {!r}'.format(user, node) ) raise HTTPError(http.BAD_REQUEST) # If adopt_parent make sure that this subscription is None for the current User subscription = NotificationSubscription.load(event_id) if not subscription: return {} # We're done here subscription.remove_user_from_subscription(user) return {} subscription = NotificationSubscription.load(event_id) if not subscription: subscription = NotificationSubscription(_id=event_id, owner=owner, event_name=event) subscription.save() if node and node._id not in user.notifications_configured: user.notifications_configured[node._id] = True user.save() subscription.add_user_to_subscription(user, notification_type) subscription.save() return {'message': 'Successfully subscribed to {} list on {}'.format(notification_type, event_id)}
) logging.getLogger('elasticsearch').setLevel(logging.WARN) logging.getLogger('elasticsearch.trace').setLevel(logging.WARN) logging.getLogger('urllib3').setLevel(logging.WARN) logging.getLogger('requests').setLevel(logging.WARN) es.cluster.health(wait_for_status='yellow') except ConnectionError as e: message = ( 'The SEARCH_ENGINE setting is set to "elastic", but there ' 'was a problem starting the elasticsearch interface. Is ' 'elasticsearch running?' ) if settings.SENTRY_DSN: try: sentry.log_exception() sentry.log_message(message) except AssertionError: # App has not yet been initialized logger.exception(message) else: logger.error(message) exit(1) def requires_search(func): def wrapped(*args, **kwargs): if es is not None: try: return func(*args, **kwargs) except ConnectionError: raise exceptions.SearchUnavailableError('Could not connect to elasticsearch') except NotFoundError as e:
def get_auth(auth, **kwargs): cas_resp = None if not auth.user: # Central Authentication Server OAuth Bearer Token authorization = request.headers.get('Authorization') if authorization and authorization.startswith('Bearer '): client = cas.get_client() try: access_token = cas.parse_auth_header(authorization) cas_resp = client.profile(access_token) except cas.CasError as err: sentry.log_exception() # NOTE: We assume that the request is an AJAX request return json_renderer(err) if cas_resp.authenticated: auth.user = OSFUser.load(cas_resp.user) try: data = jwt.decode( jwe.decrypt(request.args.get('payload', '').encode('utf-8'), WATERBUTLER_JWE_KEY), settings.WATERBUTLER_JWT_SECRET, options={'require_exp': True}, algorithm=settings.WATERBUTLER_JWT_ALGORITHM )['data'] except (jwt.InvalidTokenError, KeyError) as err: sentry.log_message(str(err)) raise HTTPError(httplib.FORBIDDEN) if not auth.user: auth.user = OSFUser.from_cookie(data.get('cookie', '')) try: action = data['action'] node_id = data['nid'] provider_name = data['provider'] except KeyError: raise HTTPError(httplib.BAD_REQUEST) node = AbstractNode.load(node_id) or Preprint.load(node_id) if not node: raise HTTPError(httplib.NOT_FOUND) check_access(node, auth, action, cas_resp) provider_settings = None if hasattr(node, 'get_addon'): provider_settings = node.get_addon(provider_name) if not provider_settings: raise HTTPError(httplib.BAD_REQUEST) try: path = data.get('path') version = data.get('version') credentials = None waterbutler_settings = None fileversion = None if provider_name == 'osfstorage': if path and version: # check to see if this is a file or a folder filenode = OsfStorageFileNode.load(path.strip('/')) if filenode and filenode.is_file: try: fileversion = FileVersion.objects.filter( basefilenode___id=path.strip('/'), identifier=version ).select_related('region').get() except FileVersion.DoesNotExist: raise HTTPError(httplib.BAD_REQUEST) # path and no version, use most recent version elif path: filenode = OsfStorageFileNode.load(path.strip('/')) if filenode and filenode.is_file: fileversion = FileVersion.objects.filter( basefilenode=filenode ).select_related('region').order_by('-created').first() if fileversion: region = fileversion.region credentials = region.waterbutler_credentials waterbutler_settings = fileversion.serialize_waterbutler_settings( node_id=provider_settings.owner._id if provider_settings else node._id, root_id=provider_settings.root_node._id if provider_settings else node.root_folder._id, ) # If they haven't been set by version region, use the NodeSettings region if not (credentials and waterbutler_settings): credentials = node.serialize_waterbutler_credentials(provider_name) waterbutler_settings = node.serialize_waterbutler_settings(provider_name) except exceptions.AddonError: log_exception() raise HTTPError(httplib.BAD_REQUEST) # TODO: Add a signal here? if waffle.switch_is_active(features.ELASTICSEARCH_METRICS): user = auth.user if isinstance(node, Preprint) and not node.is_contributor(user): metric_class = get_metric_class_for_action(action) if metric_class: try: metric_class.record_for_preprint( preprint=node, user=user, version=fileversion.identifier if fileversion else None, path=path ) except es_exceptions.ConnectionError: log_exception() return {'payload': jwe.encrypt(jwt.encode({ 'exp': timezone.now() + datetime.timedelta(seconds=settings.WATERBUTLER_JWT_EXPIRATION), 'data': { 'auth': make_auth(auth.user), # A waterbutler auth dict not an Auth object 'credentials': credentials, 'settings': waterbutler_settings, 'callback_url': node.api_url_for( ('create_waterbutler_log' if not getattr(node, 'is_registration', False) else 'registration_callbacks'), _absolute=True, _internal=True ) } }, settings.WATERBUTLER_JWT_SECRET, algorithm=settings.WATERBUTLER_JWT_ALGORITHM), WATERBUTLER_JWE_KEY)}