Esempio n. 1
0
def subscribe_mailchimp(list_name, user_id):
    user = User.load(user_id)
    m = get_mailchimp_api()
    list_id = get_list_id_from_name(list_name=list_name)

    if user.mailchimp_mailing_lists is None:
        user.mailchimp_mailing_lists = {}

    try:
        m.lists.subscribe(
            id=list_id,
            email={'email': user.username},
            merge_vars={
                'fname': user.given_name,
                'lname': user.family_name,
            },
            double_optin=False,
            update_existing=True,
        )

    except mailchimp.ValidationError as error:
        sentry.log_exception()
        sentry.log_message(error.message)
        user.mailchimp_mailing_lists[list_name] = False
    else:
        user.mailchimp_mailing_lists[list_name] = True
    finally:
        user.save()
Esempio n. 2
0
def client():
    global CLIENT
    if CLIENT is None:
        try:
            CLIENT = Elasticsearch(settings.ELASTIC_URI,
                                   request_timeout=settings.ELASTIC_TIMEOUT,
                                   retry_on_timeout=True,
                                   **settings.ELASTIC_KWARGS)
            logging.getLogger('elasticsearch').setLevel(logging.WARN)
            logging.getLogger('elasticsearch.trace').setLevel(logging.WARN)
            logging.getLogger('urllib3').setLevel(logging.WARN)
            logging.getLogger('requests').setLevel(logging.WARN)
            CLIENT.cluster.health(wait_for_status='yellow')
        except ConnectionError:
            message = (
                'The SEARCH_ENGINE setting is set to "elastic", but there '
                'was a problem starting the elasticsearch interface. Is '
                'elasticsearch running?')
            if settings.SENTRY_DSN:
                try:
                    sentry.log_exception()
                    sentry.log_message(message)
                except AssertionError:  # App has not yet been initialized
                    logger.exception(message)
            else:
                logger.error(message)
            exit(1)
    return CLIENT
Esempio n. 3
0
    def get_sub_folders(self, library_id, folder_id=None, **kwargs):
        """
        Returns serialized folders underneath a specific library/group - these are the lower tiers of folders in Zotero.

        If no folder_id is specified, all folders in a flat manner are returned for the group library.
        If a folder_id is specified, only the subfolders within that folder are returned.
        """
        try:
            sub_folders = self.api._get_folders(library_id=library_id, folder_id=folder_id)
        except zotero_errors.ResourceNotFound:
            raise HTTPError(404)
        except zotero_errors.UserNotAuthorised:
            raise HTTPError(403)
        except zotero_errors.HTTPError:
            sentry.log_exception()
            sentry.log_message('Unexpected Zotero Error when fetching folders.')
            raise HTTPError(500)

        serialized = []
        for folder in sub_folders:
            data = folder['data']
            path = folder['library']['id'] if folder['library']['type'] == 'group' else 'personal'
            serialized.append(self.serialize_folder('folder', data['key'], data['name'], path, data['parentCollection']))

        if folder_id:
            return serialized
        else:
            all_documents = self.serialize_folder('folder', 'ROOT', 'All Documents', library_id, '__', None)
            return [all_documents] + serialized
Esempio n. 4
0
def send_digest(grouped_digests):
    """ Send digest emails and remove digests for sent messages in a callback.
    :param grouped_digests: digest notification messages from the past 24 hours grouped by user
    :return:
    """
    for group in grouped_digests:
        user = User.load(group['user_id'])
        if not user:
            sentry.log_exception()
            sentry.log_message("A user with this username does not exist.")
            return

        info = group['info']
        digest_notification_ids = [message['_id'] for message in info]
        sorted_messages = group_messages_by_node(info)

        if sorted_messages:
            logger.info('Sending email digest to user {0!r}'.format(user))
            mails.send_mail(
                to_addr=user.username,
                mimetype='html',
                mail=mails.DIGEST,
                name=user.fullname,
                message=sorted_messages,
                callback=remove_sent_digest_notifications.si(
                    digest_notification_ids=digest_notification_ids))
Esempio n. 5
0
def update_or_create_preprint_identifiers(preprint):
    status = 'public' if preprint.verified_publishable else 'unavailable'
    try:
        preprint.request_identifier_update(category='doi', status=status)
    except HTTPError as err:
        sentry.log_exception()
        sentry.log_message(err.args[0])
Esempio n. 6
0
def sync_data_from_mailchimp(**kwargs):
    """Endpoint that the mailchimp webhook sends its data to"""
    key = request.args.get('key')

    if key == settings.MAILCHIMP_WEBHOOK_SECRET_KEY:
        r = request
        action = r.values['type']
        list_name = mailchimp_utils.get_list_name_from_id(
            list_id=r.values['data[list_id]'])
        username = r.values['data[email]']

        try:
            user = User.find_one(Q('username', 'eq', username))
        except NoResultsFound:
            sentry.log_exception()
            sentry.log_message("A user with this username does not exist.")
            raise HTTPError(
                404,
                data=dict(
                    message_short='User not found',
                    message_long='A user with this username does not exist'))
        if action == 'unsubscribe':
            user.mailchimp_mailing_lists[list_name] = False
            user.save()

        elif action == 'subscribe':
            user.mailchimp_mailing_lists[list_name] = True
            user.save()

    else:
        # TODO: get tests to pass with sentry logging
        # sentry.log_exception()
        # sentry.log_message("Unauthorized request to the OSF.")
        raise HTTPError(http.UNAUTHORIZED)
Esempio n. 7
0
def handle_internal_error(initiator=None,
                          provider=None,
                          message=None,
                          dry_run=True):
    """Log errors that happened due to unexpected bug and send emails the uploader (if available)
    about failures. Product owner (if available) is informed as well with more details. Emails are
    not sent during dry run.
    """

    if not message:
        message = 'Registration bulk upload failure'
    logger.error(message)
    sentry.log_message(message)

    if not dry_run:
        if initiator:
            mails.send_mail(
                to_addr=initiator.username,
                mail=mails.REGISTRATION_BULK_UPLOAD_UNEXPECTED_FAILURE,
                fullname=initiator.fullname,
                osf_support_email=settings.OSF_SUPPORT_EMAIL,
            )
        inform_product_of_errors(initiator=initiator,
                                 provider=provider,
                                 message=message)
Esempio n. 8
0
def on_preprint_updated(preprint_id, update_share=True):
    # WARNING: Only perform Read-Only operations in an asynchronous task, until Repeatable Read/Serializable
    # transactions are implemented in View and Task application layers.
    from osf.models import PreprintService
    preprint = PreprintService.load(preprint_id)

    if preprint.node:
        status = 'public' if preprint.node.is_public else 'unavailable'
        try:
            update_ezid_metadata_on_change(preprint, status=status)
        except HTTPError as err:
            sentry.log_exception()
            sentry.log_message(err.args[0])

    if settings.SHARE_URL and update_share:
        if not preprint.provider.access_token:
            raise ValueError('No access_token for {}. Unable to send {} to SHARE.'.format(preprint.provider, preprint))
        resp = requests.post('{}api/v2/normalizeddata/'.format(settings.SHARE_URL), json={
            'data': {
                'type': 'NormalizedData',
                'attributes': {
                    'tasks': [],
                    'raw': None,
                    'data': {'@graph': format_preprint(preprint)}
                }
            }
        }, headers={'Authorization': 'Bearer {}'.format(preprint.provider.access_token), 'Content-Type': 'application/vnd.api+json'})
        logger.debug(resp.content)
        resp.raise_for_status()
Esempio n. 9
0
 def get_folders(self, show_root=False, **kwargs):
     if self.has_auth:
         try:
             folders = self.api._get_folders()
             serialized_root_folder = {
                 'name': 'All Documents',
                 'provider_list_id': None,
                 'id': 'ROOT',
                 'parent_list_id': '__',
                 'kind': 'folder',
                 'addon': 'mendeley'
             }
             serialized_folders = [{
                 'addon': 'mendeley',
                 'kind': 'folder',
                 'id': folder.json['id'],
                 'name': folder.json['name'],
                 'path': folder.json.get('parent_id', '/'),
                 'parent_list_id': folder.json.get('parent_id', None),
                 'provider_list_id': folder.json['id']
             } for folder in folders]
             if show_root:
                 serialized_folders.insert(0, serialized_root_folder)
             return serialized_folders
         except MendeleyApiException as error:
             sentry.log_exception()
             sentry.log_message('Unexpected Mendeley Error when fetching folders.')
             raise HTTPError(error.status)
     else:
         raise exceptions.InvalidAuthError()
Esempio n. 10
0
 def wrapped(*args, **kwargs):
     try:
         return func(*args, **kwargs)
     except exceptions.MalformedQueryError:
         raise HTTPError(http.BAD_REQUEST,
                         data={
                             'message_short': 'Bad search query',
                             'message_long': language.SEARCH_QUERY_HELP,
                         })
     except exceptions.SearchUnavailableError:
         raise HTTPError(
             http.SERVICE_UNAVAILABLE,
             data={
                 'message_short':
                 'Search unavailable',
                 'message_long':
                 ('Our search service is currently unavailable, if the issue persists, '
                  'please report it to <a href="mailto:[email protected]">[email protected]</a>.'
                  ),
             })
     except exceptions.SearchException:
         # Interim fix for issue where ES fails with 500 in some settings- ensure exception is still logged until it can be better debugged. See OSF-4538
         sentry.log_exception()
         sentry.log_message(
             'Elasticsearch returned an unexpected error response')
         # TODO: Add a test; may need to mock out the error response due to inability to reproduce error code locally
         raise HTTPError(http.BAD_REQUEST,
                         data={
                             'message_short':
                             'Could not perform search query',
                             'message_long': language.SEARCH_QUERY_HELP,
                         })
Esempio n. 11
0
    def wrapped(*args, **kwargs):
        if client() is not None:
            try:
                return func(*args, **kwargs)
            except ConnectionError as e:
                raise exceptions.SearchUnavailableError(str(e))
            except NotFoundError as e:
                raise exceptions.IndexNotFoundError(e.error)
            except RequestError as e:
                if e.error == 'search_phase_execution_exception':
                    raise exceptions.MalformedQueryError('Failed to parse query')
                if 'ParseException' in e.error:  # ES 1.5
                    raise exceptions.MalformedQueryError(e.error)
                if type(e.error) == dict:  # ES 2.0
                    try:
                        root_cause = e.error['root_cause'][0]
                        if root_cause['type'] == 'query_parsing_exception':
                            raise exceptions.MalformedQueryError(root_cause['reason'])
                    except (AttributeError, KeyError):
                        pass
                raise exceptions.SearchException(e.error)
            except TransportError as e:
                # Catch and wrap generic uncaught ES error codes. TODO: Improve fix for https://openscience.atlassian.net/browse/OSF-4538
                raise exceptions.SearchException(e.error)

        sentry.log_message('Elastic search action failed. Is elasticsearch running?')
        raise exceptions.SearchUnavailableError('Failed to connect to elasticsearch')
Esempio n. 12
0
def sync_data_from_mailchimp(**kwargs):
    """Endpoint that the mailchimp webhook sends its data to"""
    key = request.args.get("key")

    if key == settings.MAILCHIMP_WEBHOOK_SECRET_KEY:
        r = request
        action = r.values["type"]
        list_name = mailchimp_utils.get_list_name_from_id(list_id=r.values["data[list_id]"])
        username = r.values["data[email]"]

        try:
            user = User.find_one(Q("username", "eq", username))
        except NoResultsFound:
            sentry.log_exception()
            sentry.log_message("A user with this username does not exist.")
            raise HTTPError(
                404, data=dict(message_short="User not found", message_long="A user with this username does not exist")
            )
        if action == "unsubscribe":
            user.mailchimp_mailing_lists[list_name] = False
            user.save()

        elif action == "subscribe":
            user.mailchimp_mailing_lists[list_name] = True
            user.save()

    else:
        # TODO: get tests to pass with sentry logging
        # sentry.log_exception()
        # sentry.log_message("Unauthorized request to the OSF.")
        raise HTTPError(http.UNAUTHORIZED)
Esempio n. 13
0
def client():
    global CLIENT
    if CLIENT is None:
        try:
            CLIENT = Elasticsearch(
                settings.ELASTIC_URI,
                request_timeout=settings.ELASTIC_TIMEOUT,
                retry_on_timeout=True,
                **settings.ELASTIC_KWARGS
            )
            logging.getLogger('elasticsearch').setLevel(logging.WARN)
            logging.getLogger('elasticsearch.trace').setLevel(logging.WARN)
            logging.getLogger('urllib3').setLevel(logging.WARN)
            logging.getLogger('requests').setLevel(logging.WARN)
            CLIENT.cluster.health(wait_for_status='yellow')
        except ConnectionError:
            message = (
                'The SEARCH_ENGINE setting is set to "elastic", but there '
                'was a problem starting the elasticsearch interface. Is '
                'elasticsearch running?'
            )
            if settings.SENTRY_DSN:
                try:
                    sentry.log_exception()
                    sentry.log_message(message)
                except AssertionError:  # App has not yet been initialized
                    logger.exception(message)
            else:
                logger.error(message)
            exit(1)
    return CLIENT
Esempio n. 14
0
def update_or_create_preprint_identifiers(preprint):
    status = 'public' if preprint.verified_publishable and not preprint.is_retracted else 'unavailable'
    try:
        preprint.request_identifier_update(category='doi', status=status)
    except HTTPError as err:
        sentry.log_exception()
        sentry.log_message(err.args[0])
def subscribe_mailchimp(list_name, user_id):
    user = OSFUser.load(user_id)
    m = get_mailchimp_api()
    list_id = get_list_id_from_name(list_name=list_name)

    if user.mailchimp_mailing_lists is None:
        user.mailchimp_mailing_lists = {}

    try:
        m.lists.subscribe(
            id=list_id,
            email={'email': user.username},
            merge_vars={
                'fname': user.given_name,
                'lname': user.family_name,
            },
            double_optin=False,
            update_existing=True,
        )

    except (mailchimp.ValidationError, mailchimp.ListInvalidBounceMemberError) as error:
        sentry.log_exception()
        sentry.log_message(error)
        user.mailchimp_mailing_lists[list_name] = False
    else:
        user.mailchimp_mailing_lists[list_name] = True
    finally:
        user.save()
Esempio n. 16
0
def send_digest(grouped_digests):
    """ Send digest emails and remove digests for sent messages in a callback.
    :param grouped_digests: digest notification messages from the past 24 hours grouped by user
    :return:
    """
    for group in grouped_digests:
        user = User.load(group['user_id'])
        if not user:
            sentry.log_exception()
            sentry.log_message("A user with this username does not exist.")
            return

        info = group['info']
        digest_notification_ids = [message['_id'] for message in info]
        sorted_messages = group_messages_by_node(info)

        if sorted_messages:
            logger.info('Sending email digest to user {0!r}'.format(user))
            mails.send_mail(
                to_addr=user.username,
                mimetype='html',
                mail=mails.DIGEST,
                name=user.fullname,
                message=sorted_messages,
                callback=remove_sent_digest_notifications.si(
                    digest_notification_ids=digest_notification_ids
                )
            )
Esempio n. 17
0
def sync_data_from_mailchimp(**kwargs):
    """Endpoint that the mailchimp webhook sends its data to"""
    key = request.args.get('key')

    if key == settings.MAILCHIMP_WEBHOOK_SECRET_KEY:
        r = request
        action = r.values['type']
        list_name = mailchimp_utils.get_list_name_from_id(list_id=r.values['data[list_id]'])
        username = r.values['data[email]']

        try:
            user = OSFUser.objects.get(username=username)
        except OSFUser.DoesNotExist:
            sentry.log_exception()
            sentry.log_message('A user with this username does not exist.')
            raise HTTPError(404, data=dict(message_short='User not found',
                                        message_long='A user with this username does not exist'))
        if action == 'unsubscribe':
            user.mailchimp_mailing_lists[list_name] = False
            user.save()

        elif action == 'subscribe':
            user.mailchimp_mailing_lists[list_name] = True
            user.save()

    else:
        # TODO: get tests to pass with sentry logging
        # sentry.log_exception()
        # sentry.log_message("Unauthorized request to the OSF.")
        raise HTTPError(http.UNAUTHORIZED)
Esempio n. 18
0
    def wrapped(*args, **kwargs):
        if client() is not None:
            try:
                return func(*args, **kwargs)
            except ConnectionError as e:
                raise exceptions.SearchUnavailableError(str(e))
            except NotFoundError as e:
                raise exceptions.IndexNotFoundError(e.error)
            except RequestError as e:
                if e.error == 'search_phase_execution_exception':
                    raise exceptions.MalformedQueryError(
                        'Failed to parse query')
                if 'ParseException' in e.error:  # ES 1.5
                    raise exceptions.MalformedQueryError(e.error)
                if type(e.error) == dict:  # ES 2.0
                    try:
                        root_cause = e.error['root_cause'][0]
                        if root_cause['type'] == 'query_parsing_exception':
                            raise exceptions.MalformedQueryError(
                                root_cause['reason'])
                    except (AttributeError, KeyError):
                        pass
                raise exceptions.SearchException(e.error)
            except TransportError as e:
                # Catch and wrap generic uncaught ES error codes. TODO: Improve fix for https://openscience.atlassian.net/browse/OSF-4538
                raise exceptions.SearchException(e.error)

        sentry.log_message(
            'Elastic search action failed. Is elasticsearch running?')
        raise exceptions.SearchUnavailableError(
            'Failed to connect to elasticsearch')
Esempio n. 19
0
def _send_with_sendgrid(from_addr, to_addr, subject, message, mimetype='html', categories=None, attachment_name=None, attachment_content=None, client=None):
    if (settings.SENDGRID_WHITELIST_MODE and to_addr in settings.SENDGRID_EMAIL_WHITELIST) or settings.SENDGRID_WHITELIST_MODE is False:
        client = client or sendgrid.SendGridClient(settings.SENDGRID_API_KEY)
        mail = sendgrid.Mail()
        mail.set_from(from_addr)
        mail.add_to(to_addr)
        mail.set_subject(subject)
        if mimetype == 'html':
            mail.set_html(message)

        if categories:
            mail.set_categories(categories)
        if attachment_name and attachment_content:
            mail.add_attachment_stream(attachment_name, attachment_content)

        status, msg = client.send(mail)
        if status >= 400:
            sentry.log_message(
                '{} error response from sendgrid.'.format(status) +
                'from_addr:  {}\n'.format(from_addr) +
                'to_addr:  {}\n'.format(to_addr) +
                'subject:  {}\n'.format(subject) +
                'mimetype:  {}\n'.format(mimetype) +
                'message:  {}\n'.format(message[:30]) +
                'categories:  {}\n'.format(categories) +
                'attachment_name:  {}\n'.format(attachment_name)
            )
        return status < 400
    else:
        sentry.log_message(
            'SENDGRID_WHITELIST_MODE is True. Failed to send emails to non-whitelisted recipient {}.'.format(to_addr)
        )
Esempio n. 20
0
 def log_errors(self, obj, obj_id, error):
     if not self.errors:
         script_utils.add_file_logger(logger, __file__)
     self.errors += 1
     logger.info('Error on {}, {}:'.format(obj, obj_id))
     logger.exception(error)
     if self.errors == 1000:
         sentry.log_message('ERROR: generate_sitemap stopped execution after reaching 1000 errors. See logs for details.')
         raise Exception('Too many errors generating sitemap.')
Esempio n. 21
0
def write_summary_data(filename, summary_data, remote_base_folder):
    header_row = summary_data.keys()
    summary_row = summary_data.values()
    file_path = '{}{}'.format(TEMP_FOLDER, filename)
    old_remote = requests.get(
        url=remote_base_folder['files'],
        headers={'Accept': 'application/vnd.api+json;version={}'.format(DEFAULT_API_VERSION)},
        auth=bearer_token_auth(DS_METRICS_OSF_TOKEN),
        params={'filter[name]': filename},
    ).json()
    try:
        logger.debug('json: {}'.format(old_remote))
        if old_remote[u'meta'][u'total'] > 1:
            sentry.log_message(
                'Too many files that look like {} - this may cause problems for data storage usage summaries'.format(
                    remote_base_folder['files']
                )
            )
        old_remote_data = old_remote['data'][0]
        upload = old_remote_data['links']['upload']
        params = {'kind': 'file'}

        header_skipped = False
        with open(file_path, 'w') as new_file:
            writer = csv.writer(new_file, delimiter=',', lineterminator='\n', quoting=csv.QUOTE_ALL)
            writer.writerow(header_row)
            with requests.get(
                    url=upload,  # Yes, upload is correct here.
                    headers={'Accept': 'application/vnd.api+json;version={}'.format(DEFAULT_API_VERSION)},
                    auth=bearer_token_auth(DS_METRICS_OSF_TOKEN),
                    stream=True,
            ) as old_file:
                lines = []
                for line in old_file.iter_lines():
                    if isinstance(line, bytes):
                        line = line.decode()
                    lines.append(line)

                reader = csv.reader(lines, delimiter=',', lineterminator='\n')
                for row in reader:
                    if header_skipped:
                        writer.writerow(row)
                    header_skipped = True
            writer.writerow(summary_row)

    except IndexError:
        upload = remote_base_folder['upload']
        params = {
            'kind': 'file',
            'name': filename,
        }
        with open(file_path, 'w') as new_file:
            writer = csv.writer(new_file, delimiter=',', lineterminator='\n', quoting=csv.QUOTE_ALL)
            writer.writerow(header_row)
            writer.writerow(summary_row)

    upload_to_storage(file_path=file_path, upload_url=upload, params=params)
Esempio n. 22
0
 def ship_to_s3(self, name, path):
     data = open(path, 'rb')
     try:
         self.s3.Bucket(settings.SITEMAP_AWS_BUCKET).put_object(Key='sitemaps/{}'.format(name), Body=data)
     except Exception as e:
         logger.info('Error sending data to s3 via boto3')
         logger.exception(e)
         sentry.log_message('ERROR: Sitemaps could not be uploaded to s3, see `generate_sitemap` logs')
     data.close()
Esempio n. 23
0
 def log_errors(self, obj, obj_id, error):
     if not self.errors:
         script_utils.add_file_logger(logger, __file__)
     self.errors += 1
     logger.info('Error on {}, {}:'.format(obj, obj_id))
     logger.exception(error)
     if self.errors == 1000:
         sentry.log_message('ERROR: generate_sitemap stopped execution after reaching 1000 errors. See logs for details.')
         raise Exception('Too many errors generating sitemap.')
def migrate_responses(resources, resource_name, dry_run=False, rows='all'):
    """
    DRY method to be used to migrate both DraftRegistration.registration_responses
    and Registration.registration_responses.
    """
    progress_bar = None
    if rows == 'all':
        logger.info('Migrating all {}.'.format(resource_name))
    else:
        resources = resources[:rows]
        logger.info('Migrating up to {} {}.'.format(rows, resource_name))
        progress_bar = tqdm(total=rows)

    successes_to_save = []
    errors_to_save = []
    for resource in resources:
        try:
            schema = get_registration_schema(resource)
            resource.registration_responses = flatten_registration_metadata(
                schema,
                get_nested_responses(resource, schema._id),
            )
            resource.registration_responses_migrated = True
            successes_to_save.append(resource)
        except SchemaBlockConversionError as e:
            resource.registration_responses_migrated = False
            errors_to_save.append(resource)
            logger.error('Unexpected/invalid nested data in resource: {} with error {}'.format(resource, e))
        if progress_bar:
            progress_bar.update()

    if progress_bar:
        progress_bar.close()

    success_count = len(successes_to_save)
    error_count = len(errors_to_save)
    total_count = success_count + error_count

    if total_count == 0:
        logger.info('No {} left to migrate.'.format(resource_name))
        return total_count

    logger.info('Successfully migrated {} out of {} {}.'.format(success_count, total_count, resource_name))
    if error_count:
        logger.warn('Encountered errors on {} out of {} {}.'.format(error_count, total_count, resource_name))
        if not success_count:
            sentry.log_message('`migrate_registration_responses` has only errors left ({} errors)'.format(error_count))

    if dry_run:
        logger.info('DRY RUN; discarding changes.')
    else:
        logger.info('Saving changes...')
        bulk_update(successes_to_save, update_fields=['registration_responses', 'registration_responses_migrated'])
        bulk_update(errors_to_save, update_fields=['registration_responses_migrated'])

    return total_count
Esempio n. 25
0
def update_or_create_preprint_identifiers(preprint):
    status = 'public' if preprint.verified_publishable else 'unavailable'
    if preprint.is_published and not preprint.get_identifier('doi'):
        request_identifiers(preprint)
    else:
        try:
            update_doi_metadata_on_change(preprint._id, status=status)
        except HTTPError as err:
            sentry.log_exception()
            sentry.log_message(err.args[0])
Esempio n. 26
0
 def reactivate(self):
     """Reactivate an inactive institution and update OSF search without sending out emails.
     """
     if self.deactivated:
         self.deactivated = None
         self.save()
     else:
         message = f'Action rejected - reactivating an active institution [{self._id}].'
         logger.warning(message)
         sentry.log_message(message)
Esempio n. 27
0
def print_cas_log(msg, level):
    if settings.CAS_LOG_LEVEL > level.value:
        return
    if level == LogLevel.ERROR:
        logger.error(msg)
        sentry.log_message(msg)
    elif level == LogLevel.DEBUG:
        logger.debug(msg)
    elif level == LogLevel.INFO:
        logger.info(msg)
Esempio n. 28
0
def update_or_create_preprint_identifiers(preprint):
    status = 'public' if preprint.verified_publishable else 'unavailable'
    if preprint.is_published and not preprint.get_identifier('doi'):
        get_and_set_preprint_identifiers(preprint)
    else:
        try:
            update_ezid_metadata_on_change(preprint._id, status=status)
        except HTTPError as err:
            sentry.log_exception()
            sentry.log_message(err.args[0])
def run_sql(statement, check_statement, page_size):
    table = statement.split(' ')[1]
    logger.info('Populating deleted column in table {}'.format(table))
    with connection.cursor() as cursor:
        cursor.execute(statement.format(LIMIT_CLAUSE), [page_size])
        rows = cursor.fetchall()
        if not rows:
            with connection.cursor() as cursor:
                cursor.execute(check_statement, [page_size])
                sentry.log_message(
                    'Deleted field in {} table is populated'.format(table))
Esempio n. 30
0
def resolve_guid(guid, suffix=None):
    """Resolve GUID to corresponding URL and return result of appropriate
    view function. This effectively yields a redirect without changing the
    displayed URL of the page.

    :param guid: GUID value (not the object)
    :param suffix: String to append to GUID route
    :return: Werkzeug response

    """
    # Get prefix; handles API routes
    prefix = request.path.split(guid)[0].rstrip('/')
    # Look up GUID
    guid_object = Guid.load(guid)
    if guid_object:

        # verify that the object is a GuidStoredObject descendant. If a model
        #   was once a descendant but that relationship has changed, it's
        #   possible to have referents that are instances of classes that don't
        #   have a redirect_mode attribute or otherwise don't behave as
        #   expected.
        if not isinstance(guid_object.referent, GuidStoredObject):
            sentry.log_message(
                'Guid `{}` resolved to non-guid object'.format(guid)
            )

            raise HTTPError(http.NOT_FOUND)
        referent = guid_object.referent
        if referent is None:
            logger.error('Referent of GUID {0} not found'.format(guid))
            raise HTTPError(http.NOT_FOUND)
        mode = referent.redirect_mode
        if mode is None:
            raise HTTPError(http.NOT_FOUND)
        url = referent.deep_url if mode == 'proxy' else referent.url
        url = _build_guid_url(url, prefix, suffix)
        # Always redirect API URLs; URL should identify endpoint being called
        if prefix or mode == 'redirect':
            if request.query_string:
                url += '?' + request.query_string
            return redirect(url)
        return proxy_url(url)

    # GUID not found; try lower-cased and redirect if exists
    guid_object_lower = Guid.load(guid.lower())
    if guid_object_lower:
        return redirect(
            _build_guid_url(
                guid.lower(), prefix, suffix
            )
        )

    # GUID not found
    raise HTTPError(http.NOT_FOUND)
def run_statements(statement, page_size, table):
    logger.info('Populating deleted column in table {}'.format(table))
    with connection.cursor() as cursor:
        cursor.execute(statement.format(table, table, LIMIT_CLAUSE),
                       [page_size])
        rows = cursor.fetchall()
        if rows:
            cursor.execute(CHECK_POPULATED.format(table), [page_size])
            remaining_rows = cursor.fetchall()
            if not remaining_rows:
                sentry.log_message(
                    'Deleted field in {} table is populated'.format(table))
Esempio n. 32
0
def resolve_guid(guid, suffix=None):
    """Load GUID by primary key, look up the corresponding view function in the
    routing table, and return the return value of the view function without
    changing the URL.

    :param str guid: GUID primary key
    :param str suffix: Remainder of URL after the GUID
    :return: Return value of proxied view function
    """
    try:
        # Look up
        guid_object = Guid.load(guid)
    except KeyError as e:
        if e.message == 'osfstorageguidfile':  # Used when an old detached OsfStorageGuidFile object is accessed
            raise HTTPError(http.NOT_FOUND)
        else:
            raise e
    if guid_object:

        # verify that the object implements a GuidStoredObject-like interface. If a model
        #   was once GuidStoredObject-like but that relationship has changed, it's
        #   possible to have referents that are instances of classes that don't
        #   have a deep_url attribute or otherwise don't behave as
        #   expected.
        if not hasattr(guid_object.referent, 'deep_url'):
            sentry.log_message(
                'Guid `{}` resolved to an object with no deep_url'.format(guid)
            )
            raise HTTPError(http.NOT_FOUND)
        referent = guid_object.referent
        if referent is None:
            logger.error('Referent of GUID {0} not found'.format(guid))
            raise HTTPError(http.NOT_FOUND)
        if not referent.deep_url:
            raise HTTPError(http.NOT_FOUND)
        if isinstance(referent, PreprintService):
            return send_from_directory(
                os.path.abspath(os.path.join(os.getcwd(), EXTERNAL_EMBER_APPS['preprints']['path'])),
                'index.html'
            )
        url = _build_guid_url(urllib.unquote(referent.deep_url), suffix)
        return proxy_url(url)

    # GUID not found; try lower-cased and redirect if exists
    guid_object_lower = Guid.load(guid.lower())
    if guid_object_lower:
        return redirect(
            _build_guid_url(guid.lower(), suffix)
        )

    # GUID not found
    raise HTTPError(http.NOT_FOUND)
Esempio n. 33
0
def resolve_guid(guid, suffix=None):
    """Load GUID by primary key, look up the corresponding view function in the
    routing table, and return the return value of the view function without
    changing the URL.

    :param str guid: GUID primary key
    :param str suffix: Remainder of URL after the GUID
    :return: Return value of proxied view function
    """
    try:
        # Look up
        guid_object = Guid.load(guid)
    except KeyError as e:
        if e.message == 'osfstorageguidfile':  # Used when an old detached OsfStorageGuidFile object is accessed
            raise HTTPError(http.NOT_FOUND)
        else:
            raise e
    if guid_object:

        # verify that the object implements a GuidStoredObject-like interface. If a model
        #   was once GuidStoredObject-like but that relationship has changed, it's
        #   possible to have referents that are instances of classes that don't
        #   have a deep_url attribute or otherwise don't behave as
        #   expected.
        if not hasattr(guid_object.referent, 'deep_url'):
            sentry.log_message(
                'Guid `{}` resolved to an object with no deep_url'.format(guid)
            )
            raise HTTPError(http.NOT_FOUND)
        referent = guid_object.referent
        if referent is None:
            logger.error('Referent of GUID {0} not found'.format(guid))
            raise HTTPError(http.NOT_FOUND)
        if not referent.deep_url:
            raise HTTPError(http.NOT_FOUND)
        if isinstance(referent, PreprintService):
            return send_from_directory(
                os.path.abspath(os.path.join(os.getcwd(), EXTERNAL_EMBER_APPS['preprints']['path'])),
                'index.html'
            )
        url = _build_guid_url(urllib.unquote(referent.deep_url), suffix)
        return proxy_url(url)

    # GUID not found; try lower-cased and redirect if exists
    guid_object_lower = Guid.load(guid.lower())
    if guid_object_lower:
        return redirect(
            _build_guid_url(guid.lower(), suffix)
        )

    # GUID not found
    raise HTTPError(http.NOT_FOUND)
Esempio n. 34
0
 def deactivate(self):
     """Deactivate an active institution, update OSF search and send emails to all affiliated users.
     """
     if not self.deactivated:
         self.deactivated = timezone.now()
         self.save()
         # Django mangers aren't used when querying on related models. Thus, we can query
         # affiliated users and send notification emails after the institution has been deactivated.
         self._send_deactivation_email()
     else:
         message = f'Action rejected - deactivating an inactive institution [{self._id}].'
         logger.warning(message)
         sentry.log_message(message)
Esempio n. 35
0
def resolve_guid(guid, suffix=None):
    """Resolve GUID to corresponding URL and return result of appropriate
    view function. This effectively yields a redirect without changing the
    displayed URL of the page.

    :param guid: GUID value (not the object)
    :param suffix: String to append to GUID route
    :return: Werkzeug response

    """
    # Get prefix; handles API routes
    prefix = request.path.split(guid)[0].rstrip('/')

    # Look up GUID
    guid_object = Guid.load(guid)
    if guid_object:

        # verify that the object is a GuidStoredObject descendant. If a model
        #   was once a descendant but that relationship has changed, it's
        #   possible to have referents that are instances of classes that don't
        #   have a redirect_mode attribute or otherwise don't behave as
        #   expected.
        if not isinstance(guid_object.referent, GuidStoredObject):
            sentry.log_message(
                'Guid `{}` resolved to non-guid object'.format(guid))

            raise HTTPError(http.NOT_FOUND)

        referent = guid_object.referent
        if referent is None:
            logger.error('Referent of GUID {0} not found'.format(guid))
            raise HTTPError(http.NOT_FOUND)
        mode = referent.redirect_mode
        if mode is None:
            raise HTTPError(http.NOT_FOUND)
        url = referent.deep_url if mode == 'proxy' else referent.url
        url = _build_guid_url(url, prefix, suffix)
        # Always redirect API URLs; URL should identify endpoint being called
        if prefix or mode == 'redirect':
            if request.query_string:
                url += '?' + request.query_string
            return redirect(url)
        return proxy_url(url)

    # GUID not found; try lower-cased and redirect if exists
    guid_object_lower = Guid.load(guid.lower())
    if guid_object_lower:
        return redirect(_build_guid_url(guid.lower(), prefix, suffix))

    # GUID not found
    raise HTTPError(http.NOT_FOUND)
def migrate_registration_responses(dry_run=False, rows=5000):
    script_start_time = datetime.datetime.now()
    logger.info('Script started time: {}'.format(script_start_time))

    draft_count = migrate_draft_registrations(dry_run, rows)
    registration_count = migrate_registrations(dry_run, rows)

    if draft_count == 0 and registration_count == 0:
        logger.info('Migration complete! No more drafts or registrations need migrating.')
        sentry.log_message('`migrate_registration_responses` command found nothing to migrate!')

    script_finish_time = datetime.datetime.now()
    logger.info('Script finished time: {}'.format(script_finish_time))
    logger.info('Run time {}'.format(script_finish_time - script_start_time))
Esempio n. 37
0
    def wrapped(*args, **kwargs):
        if es is not None:
            try:
                return func(*args, **kwargs)
            except ConnectionError:
                raise exceptions.SearchUnavailableError('Could not connect to elasticsearch')
            except NotFoundError as e:
                raise exceptions.IndexNotFoundError(e.error)
            except RequestError as e:
                if 'ParseException' in e.error:
                    raise exceptions.MalformedQueryError(e.error)
                raise exceptions.SearchException(e.error)

        sentry.log_message('Elastic search action failed. Is elasticsearch running?')
        raise exceptions.SearchUnavailableError("Failed to connect to elasticsearch")
Esempio n. 38
0
def on_preprint_updated(preprint_id, update_share=True, share_type=None, old_subjects=None):
    # WARNING: Only perform Read-Only operations in an asynchronous task, until Repeatable Read/Serializable
    # transactions are implemented in View and Task application layers.
    from osf.models import PreprintService
    preprint = PreprintService.load(preprint_id)
    if old_subjects is None:
        old_subjects = []
    if preprint.node:
        status = 'public' if preprint.verified_publishable else 'unavailable'
        try:
            update_ezid_metadata_on_change(preprint._id, status=status)
        except HTTPError as err:
            sentry.log_exception()
            sentry.log_message(err.args[0])
    if update_share:
        update_preprint_share(preprint, old_subjects, share_type)
Esempio n. 39
0
def on_preprint_updated(preprint_id, update_share=True, share_type=None, old_subjects=None):
    # WARNING: Only perform Read-Only operations in an asynchronous task, until Repeatable Read/Serializable
    # transactions are implemented in View and Task application layers.
    from osf.models import PreprintService
    preprint = PreprintService.load(preprint_id)
    if old_subjects is None:
        old_subjects = []
    if preprint.node:
        status = 'public' if preprint.verified_publishable else 'unavailable'
        try:
            update_ezid_metadata_on_change(preprint._id, status=status)
        except HTTPError as err:
            sentry.log_exception()
            sentry.log_message(err.args[0])
    if update_share:
        update_preprint_share(preprint, old_subjects, share_type)
Esempio n. 40
0
    def update(self, revision, data, user=None, save=True):
        """Using revision and data update all data pretaining to self
        :param str or None revision: The revision that data points to
        :param dict data: Metadata received from waterbutler
        :returns: FileVersion
        """
        self.name = data['name']
        self.materialized_path = data['materialized']

        version = FileVersion(identifier=revision)
        version.update_metadata(data, save=False)

        # Transform here so it can be sortted on later
        if data['modified'] is not None and data['modified'] != '':
            data['modified'] = parse_date(
                data['modified'],
                ignoretz=True,
                default=timezone.now()  # Just incase nothing can be parsed
            )

        # if revision is none then version is the latest version
        # Dont save the latest information
        if revision is not None:
            version.save()
            # Adds version to the list of file versions - using custom through table
            self.add_version(version)
        for entry in self.history:
            # Some entry might have an undefined modified field
            if data['modified'] is not None and entry[
                    'modified'] is not None and data['modified'] < entry[
                        'modified']:
                sentry.log_message(
                    'update() receives metatdata older than the newest entry in file history.'
                )
            if ('etag' in entry and 'etag' in data) and (entry['etag']
                                                         == data['etag']):
                break
        else:
            self.history.append(data)

        # Finally update last touched
        self.last_touched = timezone.now()

        if save:
            self.save()
        return version
Esempio n. 41
0
    def get_top_level_folders(self, **kwargs):
        """
        Returns serialized group libraries - your personal library along with any group libraries.

        This is the top-tier of "folders" in Zotero.

        You can use kwargs to refine what data is returned -  how to limit the number of group libraries,
        whether to return the personal library alongside group_libraries, or append the total library count.
        """
        # These kwargs are passed in from ZoteroViews > library_list
        limit = kwargs.get('limit', None)
        start = kwargs.get('start', None)
        return_count = kwargs.get('return_count', False)
        append_personal = kwargs.get('append_personal', True)
        try:
            # Fetch group libraries
            libraries = self.api._fetch_libraries(limit=limit, start=start)
        except zotero_errors.ResourceNotFound:
            raise HTTPError(404)
        except zotero_errors.UserNotAuthorised:
            raise HTTPError(403)
        except zotero_errors.HTTPError:
            sentry.log_exception()
            sentry.log_message(
                'Unexpected Zotero Error when fetching group libraries.')
            raise HTTPError(500)

        # Serialize libraries
        serialized = []
        for library in libraries[:-1]:
            data = library['data']
            serialized.append(
                self.serialize_folder('library', data['id'], data['name'],
                                      str(data['id'])))

        if return_count:
            # Return total number of libraries as last item in list
            serialized.append(libraries[-1])

        if append_personal:
            # Append personal library as option alongside group libraries
            serialized.insert(
                0,
                self.serialize_folder('library', 'personal', 'My Library',
                                      'personal'))
        return serialized
Esempio n. 42
0
def create_primary_collection_for_provider(sender, instance, created, **kwargs):
    if created:
        Collection = apps.get_model('osf.Collection')
        user = getattr(instance, '_creator', None)  # Temp attr set in admin view
        if user:
            c = Collection(
                title='{}\'s Collection'.format(instance.name),
                creator=user,
                provider=instance,
                is_promoted=True,
                is_public=True
            )
            c.save()
            instance.primary_collection = c
            instance.save()
        else:
            # A user is required for Collections / Groups
            sentry.log_message('Unable to create primary_collection for {}Provider {}'.format(instance.readable_type.capitalize(), instance.name))
Esempio n. 43
0
def create_primary_collection_for_provider(sender, instance, created, **kwargs):
    if created:
        Collection = apps.get_model('osf.Collection')
        user = getattr(instance, '_creator', None)  # Temp attr set in admin view
        if user:
            c = Collection(
                title='{}\'s Collection'.format(instance.name),
                creator=user,
                provider=instance,
                is_promoted=True,
                is_public=True
            )
            c.save()
            instance.primary_collection = c
            instance.save()
        else:
            # A user is required for Collections / Groups
            sentry.log_message('Unable to create primary_collection for {}Provider {}'.format(instance.readable_type.capitalize(), instance.name))
Esempio n. 44
0
    def wrapped(*args, **kwargs):
        if es is not None:
            try:
                return func(*args, **kwargs)
            except ConnectionError:
                raise exceptions.SearchUnavailableError('Could not connect to elasticsearch')
            except NotFoundError as e:
                raise exceptions.IndexNotFoundError(e.error)
            except RequestError as e:
                if 'ParseException' in e.error:
                    raise exceptions.MalformedQueryError(e.error)
                raise exceptions.SearchException(e.error)
            except TransportError as e:
                # Catch and wrap generic uncaught ES error codes. TODO: Improve fix for https://openscience.atlassian.net/browse/OSF-4538
                raise exceptions.SearchException(e.error)

        sentry.log_message('Elastic search action failed. Is elasticsearch running?')
        raise exceptions.SearchUnavailableError('Failed to connect to elasticsearch')
Esempio n. 45
0
    def wrapped(*args, **kwargs):
        if es is not None:
            try:
                return func(*args, **kwargs)
            except ConnectionError:
                raise exceptions.SearchUnavailableError('Could not connect to elasticsearch')
            except NotFoundError as e:
                raise exceptions.IndexNotFoundError(e.error)
            except RequestError as e:
                if 'ParseException' in e.error:
                    raise exceptions.MalformedQueryError(e.error)
                raise exceptions.SearchException(e.error)
            except TransportError as e:
                # Catch and wrap generic uncaught ES error codes. TODO: Improve fix for https://openscience.atlassian.net/browse/OSF-4538
                raise exceptions.SearchException(e.error)

        sentry.log_message('Elastic search action failed. Is elasticsearch running?')
        raise exceptions.SearchUnavailableError("Failed to connect to elasticsearch")
Esempio n. 46
0
def migrate_page_counters(dry_run=False, rows=10000, reverse=False):
    script_start_time = datetime.datetime.now()
    logger.info('Script started time: {}'.format(script_start_time))

    sql_query = REVERSE_SQL_LIMITED if reverse else FORWARD_SQL_LIMITED
    logger.info('SQL Query: {}'.format(sql_query))

    with connection.cursor() as cursor:
        if not dry_run:
            cursor.execute(sql_query, [rows])
        if not reverse:
            cursor.execute(COUNT_SQL)
            number_of_entries_left = cursor.fetchone()[0]
            logger.info('Entries left: {}'.format(number_of_entries_left))
            if number_of_entries_left == 0:
                sentry.log_message('Migrate pagecounter data complete')

    script_finish_time = datetime.datetime.now()
    logger.info('Script finished time: {}'.format(script_finish_time))
    logger.info('Run time {}'.format(script_finish_time - script_start_time))
Esempio n. 47
0
def resolve_guid(guid, suffix=None):
    """Load GUID by primary key, look up the corresponding view function in the
    routing table, and return the return value of the view function without
    changing the URL.

    :param str guid: GUID primary key
    :param str suffix: Remainder of URL after the GUID
    :return: Return value of proxied view function
    """
    # Look up GUID
    guid_object = Guid.load(guid)
    if guid_object:

        # verify that the object is a GuidStoredObject descendant. If a model
        #   was once a descendant but that relationship has changed, it's
        #   possible to have referents that are instances of classes that don't
        #   have a redirect_mode attribute or otherwise don't behave as
        #   expected.
        if not isinstance(guid_object.referent, GuidStoredObject):
            sentry.log_message(
                'Guid `{}` resolved to non-guid object'.format(guid)
            )
            raise HTTPError(http.NOT_FOUND)
        referent = guid_object.referent
        if referent is None:
            logger.error('Referent of GUID {0} not found'.format(guid))
            raise HTTPError(http.NOT_FOUND)
        if not referent.deep_url:
            raise HTTPError(http.NOT_FOUND)
        url = _build_guid_url(referent.deep_url, suffix)
        return proxy_url(url)

    # GUID not found; try lower-cased and redirect if exists
    guid_object_lower = Guid.load(guid.lower())
    if guid_object_lower:
        return redirect(
            _build_guid_url(guid.lower(), suffix)
        )

    # GUID not found
    raise HTTPError(http.NOT_FOUND)
Esempio n. 48
0
def resolve_guid(guid, suffix=None):
    """Load GUID by primary key, look up the corresponding view function in the
    routing table, and return the return value of the view function without
    changing the URL.

    :param str guid: GUID primary key
    :param str suffix: Remainder of URL after the GUID
    :return: Return value of proxied view function
    """
    # Look up GUID
    guid_object = Guid.load(guid)
    if guid_object:

        # verify that the object implements a GuidStoredObject-like interface. If a model
        #   was once GuidStoredObject-like but that relationship has changed, it's
        #   possible to have referents that are instances of classes that don't
        #   have a deep_url attribute or otherwise don't behave as
        #   expected.
        if not hasattr(guid_object.referent, 'deep_url'):
            sentry.log_message(
                'Guid `{}` resolved to an object with no deep_url'.format(guid)
            )
            raise HTTPError(http.NOT_FOUND)
        referent = guid_object.referent
        if referent is None:
            logger.error('Referent of GUID {0} not found'.format(guid))
            raise HTTPError(http.NOT_FOUND)
        if not referent.deep_url:
            raise HTTPError(http.NOT_FOUND)
        url = _build_guid_url(urllib.unquote(referent.deep_url), suffix)
        return proxy_url(url)

    # GUID not found; try lower-cased and redirect if exists
    guid_object_lower = Guid.load(guid.lower())
    if guid_object_lower:
        return redirect(
            _build_guid_url(guid.lower(), suffix)
        )

    # GUID not found
    raise HTTPError(http.NOT_FOUND)
Esempio n. 49
0
    def get_top_level_folders(self, **kwargs):
        """
        Returns serialized group libraries - your personal library along with any group libraries.

        This is the top-tier of "folders" in Zotero.

        You can use kwargs to refine what data is returned -  how to limit the number of group libraries,
        whether to return the personal library alongside group_libraries, or append the total library count.
        """
        # These kwargs are passed in from ZoteroViews > library_list
        limit = kwargs.get('limit', None)
        start = kwargs.get('start', None)
        return_count = kwargs.get('return_count', False)
        append_personal = kwargs.get('append_personal', True)
        try:
            # Fetch group libraries
            libraries = self.api._fetch_libraries(limit=limit, start=start)
        except zotero_errors.ResourceNotFound:
            raise HTTPError(404)
        except zotero_errors.UserNotAuthorised:
            raise HTTPError(403)
        except zotero_errors.HTTPError:
            sentry.log_exception()
            sentry.log_message('Unexpected Zotero Error when fetching group libraries.')
            raise HTTPError(500)

        # Serialize libraries
        serialized = []
        for library in libraries[:-1]:
            data = library['data']
            serialized.append(self.serialize_folder('library', data['id'], data['name'], str(data['id'])))

        if return_count:
            # Return total number of libraries as last item in list
            serialized.append(libraries[-1])

        if append_personal:
            # Append personal library as option alongside group libraries
            serialized.insert(0, self.serialize_folder('library', 'personal', 'My Library', 'personal'))
        return serialized
Esempio n. 50
0
    def update(self, revision, data, user=None, save=True):
        """Using revision and data update all data pretaining to self
        :param str or None revision: The revision that data points to
        :param dict data: Metadata recieved from waterbutler
        :returns: FileVersion
        """
        self.name = data['name']
        self.materialized_path = data['materialized']

        version = FileVersion(identifier=revision)
        version.update_metadata(data, save=False)

        # Transform here so it can be sortted on later
        if data['modified'] is not None and data['modified'] != '':
            data['modified'] = parse_date(
                data['modified'],
                ignoretz=True,
                default=timezone.now()  # Just incase nothing can be parsed
            )

        # if revision is none then version is the latest version
        # Dont save the latest information
        if revision is not None:
            version.save()
            self.versions.add(version)
        for entry in self.history:
            # Some entry might have an undefined modified field
            if data['modified'] is not None and entry['modified'] is not None and data['modified'] < entry['modified']:
                sentry.log_message('update() receives metatdata older than the newest entry in file history.')
            if ('etag' in entry and 'etag' in data) and (entry['etag'] == data['etag']):
                break
        else:
            self.history.append(data)

        # Finally update last touched
        self.last_touched = timezone.now()

        if save:
            self.save()
        return version
Esempio n. 51
0
 def has_permission(self, request, view):
     if request.method != 'POST':
         raise exceptions.MethodNotAllowed(method=request.method)
     data = request.data
     if not data:
         raise exceptions.ParseError('Request body is empty')
     if not settings.MAILGUN_API_KEY:
         return False
     signature = hmac.new(
         key=settings.MAILGUN_API_KEY,
         msg='{}{}'.format(
             data['timestamp'],
             data['token'],
         ),
         digestmod=hashlib.sha256,
     ).hexdigest()
     if 'signature' not in data:
         error_message = 'Signature required in request body'
         sentry.log_message(error_message)
         raise exceptions.ParseError(error_message)
     if not hmac.compare_digest(unicode(signature), unicode(data['signature'])):
         raise exceptions.ParseError('Invalid signature')
     return True
Esempio n. 52
0
 def wrapped(*args, **kwargs):
     try:
         return func(*args, **kwargs)
     except exceptions.MalformedQueryError:
         raise HTTPError(http.BAD_REQUEST, data={
             'message_short': 'Bad search query',
             'message_long': language.SEARCH_QUERY_HELP,
         })
     except exceptions.SearchUnavailableError:
         raise HTTPError(http.SERVICE_UNAVAILABLE, data={
             'message_short': 'Search unavailable',
             'message_long': ('Our search service is currently unavailable, if the issue persists, '
                              'please report it to <a href="mailto:[email protected]">[email protected]</a>.'),
         })
     except exceptions.SearchException:
         # Interim fix for issue where ES fails with 500 in some settings- ensure exception is still logged until it can be better debugged. See OSF-4538
         sentry.log_exception()
         sentry.log_message('Elasticsearch returned an unexpected error response')
         # TODO: Add a test; may need to mock out the error response due to inability to reproduce error code locally
         raise HTTPError(http.BAD_REQUEST, data={
             'message_short': 'Could not perform search query',
             'message_long': language.SEARCH_QUERY_HELP,
         })
    def authenticate(self, request):
        """
        Handle CAS institution authentication request.

        The JWT `data` payload is expected in the following structure:
        {
            "provider": {
                "idp":  "",
                "id":   "",
                "user": {
                    "username":     "",
                    "fullname":     "",
                    "familyName":   "",
                    "givenName":    "",
                    "middleNames":  "",
                    "suffix":       "",
                }
            }
        }

        :param request: the POST request
        :return: user, None if authentication succeed
        :raises: AuthenticationFailed if authentication fails
        """

        try:
            payload = jwt.decode(
                jwe.decrypt(request.body, settings.JWE_SECRET),
                settings.JWT_SECRET,
                options={'verify_exp': False},
                algorithm='HS256',
            )
        except (jwt.InvalidTokenError, TypeError):
            raise AuthenticationFailed

        data = json.loads(payload['data'])
        provider = data['provider']

        institution = Institution.load(provider['id'])
        if not institution:
            raise AuthenticationFailed('Invalid institution id specified "{}"'.format(provider['id']))

        username = provider['user'].get('username')
        fullname = provider['user'].get('fullname')
        given_name = provider['user'].get('givenName')
        family_name = provider['user'].get('familyName')
        middle_names = provider['user'].get('middleNames')
        suffix = provider['user'].get('suffix')

        # use given name and family name to build full name if not provided
        if given_name and family_name and not fullname:
            fullname = given_name + ' ' + family_name

        # institution must provide `fullname`, otherwise we fail the authentication and inform sentry
        if not fullname:
            message = 'Institution login failed: fullname required' \
                      ' for user {} from institution {}'.format(username, provider['id'])
            sentry.log_message(message)
            raise AuthenticationFailed(message)

        # `get_or_create_user()` guesses names from fullname
        # replace the guessed ones if the names are provided from the authentication
        user, created = get_or_create_user(fullname, username, reset_password=False)
        if created:
            if given_name:
                user.given_name = given_name
            if family_name:
                user.family_name = family_name
            if middle_names:
                user.middle_names = middle_names
            if suffix:
                user.suffix = suffix
            user.update_date_last_login()

            # Relying on front-end validation until `accepted_tos` is added to the JWT payload
            user.accepted_terms_of_service = timezone.now()

            # save and register user
            user.save()
            user.register(username)

            # send confirmation email
            send_mail(
                to_addr=user.username,
                mail=WELCOME_OSF4I,
                mimetype='html',
                user=user,
                domain=DOMAIN,
                osf_support_email=OSF_SUPPORT_EMAIL,
                storage_flag_is_active=waffle.flag_is_active(request, features.STORAGE_I18N),
            )

        if not user.is_affiliated_with_institution(institution):
            user.affiliated_institutions.add(institution)
            user.save()

        return user, None
Esempio n. 54
0
def resolve_guid(guid, suffix=None):
    """Load GUID by primary key, look up the corresponding view function in the
    routing table, and return the return value of the view function without
    changing the URL.

    :param str guid: GUID primary key
    :param str suffix: Remainder of URL after the GUID
    :return: Return value of proxied view function
    """
    try:
        # Look up
        guid_object = Guid.load(guid)
    except KeyError as e:
        if e.message == 'osfstorageguidfile':  # Used when an old detached OsfStorageGuidFile object is accessed
            raise HTTPError(http.NOT_FOUND)
        else:
            raise e
    if guid_object:
        # verify that the object implements a GuidStoredObject-like interface. If a model
        #   was once GuidStoredObject-like but that relationship has changed, it's
        #   possible to have referents that are instances of classes that don't
        #   have a deep_url attribute or otherwise don't behave as
        #   expected.
        if not hasattr(guid_object.referent, 'deep_url'):
            sentry.log_message(
                'Guid resolved to an object with no deep_url', dict(guid=guid)
            )
            raise HTTPError(http.NOT_FOUND)
        referent = guid_object.referent
        if referent is None:
            logger.error('Referent of GUID {0} not found'.format(guid))
            raise HTTPError(http.NOT_FOUND)
        if not referent.deep_url:
            raise HTTPError(http.NOT_FOUND)

        # Handle file `/download` shortcut with supported types.
        if suffix and suffix.rstrip('/').lower() == 'download':
            file_referent = None
            if isinstance(referent, PreprintService) and referent.primary_file:
                if not referent.is_published:
                    # TODO: Ideally, permissions wouldn't be checked here.
                    # This is necessary to prevent a logical inconsistency with
                    # the routing scheme - if a preprint is not published, only
                    # admins and moderators should be able to know it exists.
                    auth = Auth.from_kwargs(request.args.to_dict(), {})
                    group_helper = GroupHelper(referent.provider)
                    admin_group = group_helper.get_group('admin')
                    mod_group = group_helper.get_group('moderator')
                    # Check if user isn't a nonetype or that the user has admin/moderator permissions
                    if auth.user is None or not (referent.node.has_permission(auth.user, permissions.ADMIN) or (mod_group.user_set.all() | admin_group.user_set.all()).filter(id=auth.user.id).exists()):
                        raise HTTPError(http.NOT_FOUND)
                file_referent = referent.primary_file
            elif isinstance(referent, BaseFileNode) and referent.is_file:
                file_referent = referent

            if file_referent:
                # Extend `request.args` adding `action=download`.
                request.args = request.args.copy()
                request.args.update({'action': 'download'})
                # Do not include the `download` suffix in the url rebuild.
                url = _build_guid_url(urllib.unquote(file_referent.deep_url))
                return proxy_url(url)

        # Handle Ember Applications
        if isinstance(referent, PreprintService):
            if referent.provider.domain_redirect_enabled:
                # This route should always be intercepted by nginx for the branded domain,
                # w/ the exception of `<guid>/download` handled above.
                return redirect(referent.absolute_url, http.MOVED_PERMANENTLY)

            if PROXY_EMBER_APPS:
                resp = requests.get(EXTERNAL_EMBER_APPS['preprints']['server'], stream=True, timeout=EXTERNAL_EMBER_SERVER_TIMEOUT)
                return Response(stream_with_context(resp.iter_content()), resp.status_code)

            return send_from_directory(preprints_dir, 'index.html')

        if isinstance(referent, BaseFileNode) and referent.is_file and referent.node.is_quickfiles:
            if referent.is_deleted:
                raise HTTPError(http.GONE)
            if PROXY_EMBER_APPS:
                resp = requests.get(EXTERNAL_EMBER_APPS['ember_osf_web']['server'], stream=True, timeout=EXTERNAL_EMBER_SERVER_TIMEOUT)
                return Response(stream_with_context(resp.iter_content()), resp.status_code)

            return send_from_directory(ember_osf_web_dir, 'index.html')

        if isinstance(referent, Node) and not referent.is_registration and suffix:
            page = suffix.strip('/').split('/')[0]
            flag_name = 'ember_project_{}_page'.format(page)
            request.user = _get_current_user() or MockUser()

            if waffle.flag_is_active(request, flag_name):
                use_ember_app()

        url = _build_guid_url(urllib.unquote(referent.deep_url), suffix)
        return proxy_url(url)

    # GUID not found; try lower-cased and redirect if exists
    guid_object_lower = Guid.load(guid.lower())
    if guid_object_lower:
        return redirect(
            _build_guid_url(guid.lower(), suffix)
        )

    # GUID not found
    raise HTTPError(http.NOT_FOUND)
Esempio n. 55
0
INDEX = settings.ELASTIC_INDEX

try:
    es = Elasticsearch(
        settings.ELASTIC_URI,
        request_timeout=settings.ELASTIC_TIMEOUT
    )
    logging.getLogger('elasticsearch').setLevel(logging.WARN)
    logging.getLogger('elasticsearch.trace').setLevel(logging.WARN)
    logging.getLogger('urllib3').setLevel(logging.WARN)
    logging.getLogger('requests').setLevel(logging.WARN)
    es.cluster.health(wait_for_status='yellow')
except ConnectionError as e:
    sentry.log_exception()
    sentry.log_message("The SEARCH_ENGINE setting is set to 'elastic', but there "
            "was a problem starting the elasticsearch interface. Is "
            "elasticsearch running?")
    es = None


def requires_search(func):
    def wrapped(*args, **kwargs):
        if es is not None:
            try:
                return func(*args, **kwargs)
            except ConnectionError:
                raise exceptions.SearchUnavailableError('Could not connect to elasticsearch')
            except NotFoundError as e:
                raise exceptions.IndexNotFoundError(e.error)
            except RequestError as e:
                if 'ParseException' in e.error:
Esempio n. 56
0
def configure_subscription(auth):
    user = auth.user
    json_data = request.get_json()
    target_id = json_data.get('id')
    event = json_data.get('event')
    notification_type = json_data.get('notification_type')
    path = json_data.get('path')
    provider = json_data.get('provider')

    if not event or (notification_type not in NOTIFICATION_TYPES and notification_type != 'adopt_parent'):
        raise HTTPError(http.BAD_REQUEST, data=dict(
            message_long='Must provide an event and notification type for subscription.')
        )

    node = AbstractNode.load(target_id)
    if 'file_updated' in event and path is not None and provider is not None:
        wb_path = path.lstrip('/')
        event = wb_path + '_file_updated'
    event_id = utils.to_subscription_key(target_id, event)

    if not node:
        # if target_id is not a node it currently must be the current user
        if not target_id == user._id:
            sentry.log_message(
                '{!r} attempted to subscribe to either a bad '
                'id or non-node non-self id, {}'.format(user, target_id)
            )
            raise HTTPError(http.NOT_FOUND)

        if notification_type == 'adopt_parent':
            sentry.log_message(
                '{!r} attempted to adopt_parent of a none node id, {}'.format(user, target_id)
            )
            raise HTTPError(http.BAD_REQUEST)
        owner = user
    else:
        if not node.has_permission(user, 'read'):
            sentry.log_message('{!r} attempted to subscribe to private node, {}'.format(user, target_id))
            raise HTTPError(http.FORBIDDEN)

        if notification_type != 'adopt_parent':
            owner = node
        else:
            if 'file_updated' in event and len(event) > len('file_updated'):
                pass
            else:
                parent = node.parent_node
                if not parent:
                    sentry.log_message(
                        '{!r} attempted to adopt_parent of '
                        'the parentless project, {!r}'.format(user, node)
                    )
                    raise HTTPError(http.BAD_REQUEST)

            # If adopt_parent make sure that this subscription is None for the current User
            subscription = NotificationSubscription.load(event_id)
            if not subscription:
                return {}  # We're done here

            subscription.remove_user_from_subscription(user)
            return {}

    subscription = NotificationSubscription.load(event_id)

    if not subscription:
        subscription = NotificationSubscription(_id=event_id, owner=owner, event_name=event)
        subscription.save()

    if node and node._id not in user.notifications_configured:
        user.notifications_configured[node._id] = True
        user.save()

    subscription.add_user_to_subscription(user, notification_type)

    subscription.save()

    return {'message': 'Successfully subscribed to {} list on {}'.format(notification_type, event_id)}
Esempio n. 57
0
    )
    logging.getLogger('elasticsearch').setLevel(logging.WARN)
    logging.getLogger('elasticsearch.trace').setLevel(logging.WARN)
    logging.getLogger('urllib3').setLevel(logging.WARN)
    logging.getLogger('requests').setLevel(logging.WARN)
    es.cluster.health(wait_for_status='yellow')
except ConnectionError as e:
    message = (
        'The SEARCH_ENGINE setting is set to "elastic", but there '
        'was a problem starting the elasticsearch interface. Is '
        'elasticsearch running?'
    )
    if settings.SENTRY_DSN:
        try:
            sentry.log_exception()
            sentry.log_message(message)
        except AssertionError:  # App has not yet been initialized
            logger.exception(message)
    else:
        logger.error(message)
    exit(1)


def requires_search(func):
    def wrapped(*args, **kwargs):
        if es is not None:
            try:
                return func(*args, **kwargs)
            except ConnectionError:
                raise exceptions.SearchUnavailableError('Could not connect to elasticsearch')
            except NotFoundError as e:
Esempio n. 58
0
def get_auth(auth, **kwargs):
    cas_resp = None
    if not auth.user:
        # Central Authentication Server OAuth Bearer Token
        authorization = request.headers.get('Authorization')
        if authorization and authorization.startswith('Bearer '):
            client = cas.get_client()
            try:
                access_token = cas.parse_auth_header(authorization)
                cas_resp = client.profile(access_token)
            except cas.CasError as err:
                sentry.log_exception()
                # NOTE: We assume that the request is an AJAX request
                return json_renderer(err)
            if cas_resp.authenticated:
                auth.user = OSFUser.load(cas_resp.user)

    try:
        data = jwt.decode(
            jwe.decrypt(request.args.get('payload', '').encode('utf-8'), WATERBUTLER_JWE_KEY),
            settings.WATERBUTLER_JWT_SECRET,
            options={'require_exp': True},
            algorithm=settings.WATERBUTLER_JWT_ALGORITHM
        )['data']
    except (jwt.InvalidTokenError, KeyError) as err:
        sentry.log_message(str(err))
        raise HTTPError(httplib.FORBIDDEN)

    if not auth.user:
        auth.user = OSFUser.from_cookie(data.get('cookie', ''))

    try:
        action = data['action']
        node_id = data['nid']
        provider_name = data['provider']
    except KeyError:
        raise HTTPError(httplib.BAD_REQUEST)

    node = AbstractNode.load(node_id) or Preprint.load(node_id)
    if not node:
        raise HTTPError(httplib.NOT_FOUND)

    check_access(node, auth, action, cas_resp)
    provider_settings = None
    if hasattr(node, 'get_addon'):
        provider_settings = node.get_addon(provider_name)
        if not provider_settings:
            raise HTTPError(httplib.BAD_REQUEST)

    try:
        path = data.get('path')
        version = data.get('version')
        credentials = None
        waterbutler_settings = None
        fileversion = None
        if provider_name == 'osfstorage':
            if path and version:
                # check to see if this is a file or a folder
                filenode = OsfStorageFileNode.load(path.strip('/'))
                if filenode and filenode.is_file:
                    try:
                        fileversion = FileVersion.objects.filter(
                            basefilenode___id=path.strip('/'),
                            identifier=version
                        ).select_related('region').get()
                    except FileVersion.DoesNotExist:
                        raise HTTPError(httplib.BAD_REQUEST)
            # path and no version, use most recent version
            elif path:
                filenode = OsfStorageFileNode.load(path.strip('/'))
                if filenode and filenode.is_file:
                    fileversion = FileVersion.objects.filter(
                        basefilenode=filenode
                    ).select_related('region').order_by('-created').first()
            if fileversion:
                region = fileversion.region
                credentials = region.waterbutler_credentials
                waterbutler_settings = fileversion.serialize_waterbutler_settings(
                    node_id=provider_settings.owner._id if provider_settings else node._id,
                    root_id=provider_settings.root_node._id if provider_settings else node.root_folder._id,
                )
        # If they haven't been set by version region, use the NodeSettings region
        if not (credentials and waterbutler_settings):
            credentials = node.serialize_waterbutler_credentials(provider_name)
            waterbutler_settings = node.serialize_waterbutler_settings(provider_name)
    except exceptions.AddonError:
        log_exception()
        raise HTTPError(httplib.BAD_REQUEST)

    # TODO: Add a signal here?
    if waffle.switch_is_active(features.ELASTICSEARCH_METRICS):
        user = auth.user
        if isinstance(node, Preprint) and not node.is_contributor(user):
            metric_class = get_metric_class_for_action(action)
            if metric_class:
                try:
                    metric_class.record_for_preprint(
                        preprint=node,
                        user=user,
                        version=fileversion.identifier if fileversion else None,
                        path=path
                    )
                except es_exceptions.ConnectionError:
                    log_exception()

    return {'payload': jwe.encrypt(jwt.encode({
        'exp': timezone.now() + datetime.timedelta(seconds=settings.WATERBUTLER_JWT_EXPIRATION),
        'data': {
            'auth': make_auth(auth.user),  # A waterbutler auth dict not an Auth object
            'credentials': credentials,
            'settings': waterbutler_settings,
            'callback_url': node.api_url_for(
                ('create_waterbutler_log' if not getattr(node, 'is_registration', False) else 'registration_callbacks'),
                _absolute=True,
                _internal=True
            )
        }
    }, settings.WATERBUTLER_JWT_SECRET, algorithm=settings.WATERBUTLER_JWT_ALGORITHM), WATERBUTLER_JWE_KEY)}