Esempio n. 1
0
  def mail_me(self, resp):
    # don't email about specific known failures
    if ('Deadline exceeded while waiting for HTTP response' in resp or
        'urlfetch.Fetch() took too long' in resp or
        # https://github.com/snarfed/bridgy/issues/161
        '"resp": "invalid_input"' in resp or
        # https://github.com/snarfed/bridgy/issues/175
        'bX-2i87au' in resp or
        # https://github.com/snarfed/bridgy/issues/177
        "Invalid argument, 'thread': Unable to find thread" in resp or
        # expected for partially set up tumblr accounts
        "we haven't found your Disqus account" in resp or
        # Twitter duplicate publish attempt
        'Status is a duplicate.' in resp):
      return

    subject = '%s %s' % (self.__class__.__name__,
                         '%s %s' % (self.entity.type, self.entity.status)
                         if self.entity else 'failed')
    body = 'Request:\n%s\n\nResponse:\n%s' % (self.request.params.items(), resp)

    if self.source:
      body = 'Source: %s\n\n%s' % (self.source.bridgy_url(self), body)
      subject += ': %s' % self.source.label()

    util.email_me(subject=subject, body=body)
Esempio n. 2
0
  def mail_me(self, resp):
    # don't email about specific known failures
    if ('Deadline exceeded while waiting for HTTP response' in resp or
        'urlfetch.Fetch() took too long' in resp or
        # WordPress Jetpack bug
        # https://github.com/snarfed/bridgy/issues/161
        '"resp": "invalid_input"' in resp or
        # Blogger known bug
        # https://github.com/snarfed/bridgy/issues/175
        'bX-2i87au' in resp or
        # Tumblr: transient Disqus error looking up thread
        # https://github.com/snarfed/bridgy/issues/177
        "Invalid argument, 'thread': Unable to find thread" in resp or
        # expected for partially set up tumblr accounts
        "we haven't found your Disqus account" in resp or
        # Twitter duplicate publish attempts
        'Status is a duplicate.' in resp or
        'You have already favorited this status.' in resp or
        # WordPress duplicate comment
        # "error": "Error: 409 HTTP Error 409: Conflict; {\n    \"error\": \"comment_duplicate\",\n    \"message\": \"Duplicate comment detected; it looks as though you’ve already said that!\"\n}\n"
        'comment_duplicate' in resp):
      return

    subject = '%s %s' % (self.__class__.__name__,
                         '%s %s' % (self.entity.type, self.entity.status)
                         if self.entity else 'failed')
    body = 'Request:\n%s\n\nResponse:\n%s' % (self.request.params.items(), resp)

    if self.source:
      body = 'Source: %s\n\n%s' % (self.source.bridgy_url(self), body)
      subject += ': %s' % self.source.label()

    util.email_me(subject=subject, body=body)
Esempio n. 3
0
    def mail_me(self, resp):
        # don't email about specific known failures
        if ('Deadline exceeded while waiting for HTTP response' in resp
                or 'urlfetch.Fetch() took too long' in resp or
                # WordPress Jetpack bug
                # https://github.com/snarfed/bridgy/issues/161
                '"resp": "invalid_input"' in resp or
                # Blogger known bug
                # https://github.com/snarfed/bridgy/issues/175
                'bX-2i87au' in resp or
                # Tumblr: transient Disqus error looking up thread
                # https://github.com/snarfed/bridgy/issues/177
                "Invalid argument, 'thread': Unable to find thread" in resp or
                # expected for partially set up tumblr accounts
                "we haven't found your Disqus account" in resp or
                # Twitter duplicate publish attempts
                'Status is a duplicate.' in resp
                or 'You have already favorited this status.' in resp or
                # WordPress duplicate comment
                # "error": "Error: 409 HTTP Error 409: Conflict; {\n    \"error\": \"comment_duplicate\",\n    \"message\": \"Duplicate comment detected; it looks as though you’ve already said that!\"\n}\n"
                'comment_duplicate' in resp):
            return

        subject = '%s %s' % (self.__class__.__name__, '%s %s' %
                             (self.entity.type, self.entity.status)
                             if self.entity else 'failed')
        body = 'Request:\n%s\n\nResponse:\n%s' % (self.request.params.items(),
                                                  resp)

        if self.source:
            body = 'Source: %s\n\n%s' % (self.source.bridgy_url(self), body)
            subject += ': %s' % self.source.label()

        util.email_me(subject=subject, body=body)
Esempio n. 4
0
  def post(self, *path_args):
    logging.debug('Params: %s', self.request.params)

    key = self.request.params['source_key']
    source = ndb.Key(urlsafe=key).get()
    if not source or source.status == 'disabled' or 'listen' not in source.features:
      logging.error('Source not found or disabled. Dropping task.')
      return
    logging.info('Source: %s %s, %s', source.label(), source.key.string_id(),
                 source.bridgy_url(self))

    last_polled = self.request.params['last_polled']
    if last_polled != source.last_polled.strftime(util.POLL_TASK_DATETIME_FORMAT):
      logging.warning('duplicate poll task! deferring to the other task.')
      return

    logging.info('Last poll: %s', self._last_poll_url(source))

    # mark this source as polling
    source.updates = {
      'poll_status': 'polling',
      'last_poll_attempt': util.now_fn(),
      'rate_limited': False,
    }
    source = models.Source.put_updates(source)

    source.updates = {}
    try:
      self.poll(source)
    except Exception, e:
      source.updates['poll_status'] = 'error'
      code, body = util.interpret_http_exception(e)
      if code == '401' or isinstance(e, models.DisableSource):
        # the user deauthorized the bridgy app, so disable this source.
        # let the task complete successfully so that it's not retried.
        logging.warning('Disabling source due to: %s' % e, exc_info=True)
        source.updates.update({
          'status': 'disabled',
          'poll_status': 'ok',
        })
        body = '%s\nLast poll: %s' % (source.bridgy_url(self),
                                      self._last_poll_url(source))
        if source.is_beta_user():
          util.email_me(subject='Bridgy: disabled %s' % source.label(), body=body)

      elif code in util.HTTP_RATE_LIMIT_CODES:
        logging.info('Rate limited. Marking as error and finishing. %s', e)
        source.updates['rate_limited'] = True
      elif ((code and int(code) / 100 == 5) or
            (code == '400' and isinstance(source, flickr.Flickr)) or
            util.is_connection_failure(e)):
        logging.error('API call failed. Marking as error and finishing. %s: %s\n%s',
                      code, body, e)
        self.abort(util.ERROR_HTTP_RETURN_CODE)
      else:
        raise
Esempio n. 5
0
  def post(self, *path_args):
    logging.debug('Params: %s', self.request.params)

    key = self.request.params['source_key']
    source = ndb.Key(urlsafe=key).get()
    if not source or source.status == 'disabled' or 'listen' not in source.features:
      logging.error('Source not found or disabled. Dropping task.')
      return
    logging.info('Source: %s %s, %s', source.label(), source.key.string_id(),
                 source.bridgy_url(self))

    last_polled = self.request.params['last_polled']
    if last_polled != source.last_polled.strftime(util.POLL_TASK_DATETIME_FORMAT):
      logging.warning('duplicate poll task! deferring to the other task.')
      return

    logging.info('Last poll: %s', self._last_poll_url(source))

    # mark this source as polling
    source.updates = {
      'poll_status': 'polling',
      'last_poll_attempt': util.now_fn(),
      'rate_limited': False,
    }
    source = models.Source.put_updates(source)

    source.updates = {}
    try:
      self.poll(source)
    except Exception, e:
      source.updates['poll_status'] = 'error'
      code, body = util.interpret_http_exception(e)
      if code in source.DISABLE_HTTP_CODES or isinstance(e, models.DisableSource):
        # the user deauthorized the bridgy app, so disable this source.
        # let the task complete successfully so that it's not retried.
        logging.warning('Disabling source due to: %s' % e, exc_info=True)
        source.updates.update({
          'status': 'disabled',
          'poll_status': 'ok',
        })
        body = '%s\nLast poll: %s' % (source.bridgy_url(self),
                                      self._last_poll_url(source))
        if source.is_beta_user():
          util.email_me(subject='Bridgy: disabled %s' % source.label(), body=body)

      elif code in source.RATE_LIMIT_HTTP_CODES:
        logging.info('Rate limited. Marking as error and finishing. %s', e)
        source.updates['rate_limited'] = True
      elif ((code and int(code) / 100 == 5) or
            (code == '400' and isinstance(source, flickr.Flickr)) or
            util.is_connection_failure(e)):
        logging.error('API call failed. Marking as error and finishing. %s: %s\n%s',
                      code, body, e)
        self.abort(util.ERROR_HTTP_RETURN_CODE)
      else:
        raise
Esempio n. 6
0
  def mail_me(self, resp):
    subject = '%s %s' % (self.__class__.__name__,
                         '%s %s' % (self.entity.type, self.entity.status)
                         if self.entity else 'failed')
    body = 'Request:\n%s\n\nResponse:\n%s' % (self.request.params.items(), resp)

    if self.source:
      body = 'Source: %s\n\n%s' % (self.source.bridgy_url(self), body)
      subject += ': %s' % self.source.label()

    util.email_me(subject=subject, body=body)
Esempio n. 7
0
    def mail_me(self, resp):
        # don't email about specific known failures
        if ('Deadline exceeded while waiting for HTTP response' in resp
                or 'urlfetch.Fetch() took too long' in resp or
                # WordPress Jetpack bugs
                # https://github.com/snarfed/bridgy/issues/161
                '"resp": "invalid_input"' in resp or
                # https://github.com/snarfed/bridgy/issues/750
                '"error": "jetpack_verification_failed"' in resp or
                # Blogger known bug
                # https://github.com/snarfed/bridgy/issues/175
                'bX-2i87au' in resp or
                # Tumblr: transient Disqus error looking up thread
                # https://github.com/snarfed/bridgy/issues/177
                "Invalid argument, 'thread': Unable to find thread" in resp or
                # expected for partially set up tumblr accounts
                "we haven't found your Disqus account" in resp or
                # Twitter 5MB image file size limit
                '"message":"Image file size must be' in resp or
                # Twitter media file number limits
                'Tweet with media must have exactly 1 gif or video' in resp or
                # Facebook image type/size req'ts
                'Missing or invalid image file' in resp or
                "Your photos couldn't be uploaded. Photos should be less than 4 MB"
                in resp or
                # Twitter duplicate publish attempts
                'Status is a duplicate.' in resp
                or 'You have already favorited this status.' in resp
                or 'You have already retweeted this' in resp or
                # Facebook duplicate publish attempts
                'This status update is identical to the last one you posted.'
                in resp or
                # WordPress duplicate comment
                # "error": "Error: 409 HTTP Error 409: Conflict; {\n    \"error\": \"comment_duplicate\",\n    \"message\": \"Duplicate comment detected; it looks as though you’ve already said that!\"\n}\n"
                'comment_duplicate' in resp):
            return

        subject = '%s %s' % (self.__class__.__name__, '%s %s' %
                             (self.entity.type, self.entity.status)
                             if self.entity else 'failed')
        body = 'Request:\n%s\n\nResponse:\n%s' % (self.request.params.items(),
                                                  resp)

        if self.source:
            body = 'Source: %s\n\n%s' % (self.source.bridgy_url(self), body)
            subject += ': %s' % self.source.label()

        util.email_me(subject=subject, body=body)
Esempio n. 8
0
  def mail_me(self, resp):
    # don't email about specific known failures
    if ('Deadline exceeded while waiting for HTTP response' in resp or
        'urlfetch.Fetch() took too long' in resp or
        # WordPress Jetpack bugs
        # https://github.com/snarfed/bridgy/issues/161
        '"resp": "invalid_input"' in resp or
        # https://github.com/snarfed/bridgy/issues/750
        '"error": "jetpack_verification_failed"' in resp or
        # Blogger known bug
        # https://github.com/snarfed/bridgy/issues/175
        'bX-2i87au' in resp or
        # Tumblr: transient Disqus error looking up thread
        # https://github.com/snarfed/bridgy/issues/177
        "Invalid argument, 'thread': Unable to find thread" in resp or
        # expected for partially set up tumblr accounts
        "we haven't found your Disqus account" in resp or
        # Twitter 5MB image file size limit
        '"message":"Image file size must be' in resp or
        # Twitter media file number limits
        'Tweet with media must have exactly 1 gif or video' in resp or
        # Facebook image type/size req'ts
        'Missing or invalid image file' in resp or
        "Your photos couldn't be uploaded. Photos should be less than 4 MB" in resp or
        # Twitter duplicate publish attempts
        'Status is a duplicate.' in resp or
        'You have already favorited this status.' in resp or
        'You have already retweeted this' in resp or
        # Facebook duplicate publish attempts
        'This status update is identical to the last one you posted.' in resp or
        # WordPress duplicate comment
        # "error": "Error: 409 HTTP Error 409: Conflict; {\n    \"error\": \"comment_duplicate\",\n    \"message\": \"Duplicate comment detected; it looks as though you’ve already said that!\"\n}\n"
        'comment_duplicate' in resp):
      return

    subject = '%s %s' % (self.__class__.__name__,
                         '%s %s' % (self.entity.type, self.entity.status)
                         if self.entity else 'failed')
    body = 'Request:\n%s\n\nResponse:\n%s' % (self.request.params.items(), resp)

    if self.source:
      body = 'Source: %s\n\n%s' % (self.source.bridgy_url(self), body)
      subject += ': %s' % self.source.label()

    util.email_me(subject=subject, body=body)
Esempio n. 9
0
    def new(handler, auth_entity=None, **kwargs):
        """Creates and returns a :class:`GitHub` for the logged in user.

    Args:
      handler: the current :class:`webapp2.RequestHandler`
      auth_entity: :class:`oauth_dropins.github.GitHubAuth`
      kwargs: property values
    """
        user = json.loads(auth_entity.user_json)
        gr_source = gr_github.GitHub(access_token=auth_entity.access_token())
        actor = gr_source.user_to_actor(user)

        # temporary!
        util.email_me(subject='New Bridgy GitHub user!',
                      body=json.dumps(auth_entity.user_json, indent=2))

        return GitHub(id=auth_entity.key.id(),
                      auth_entity=auth_entity.key,
                      name=actor.get('displayName'),
                      picture=actor.get('image', {}).get('url'),
                      url=actor.get('url'),
                      **kwargs)
Esempio n. 10
0
  def create_new(cls, handler, **kwargs):
    """Creates and saves a new Source and adds a poll task for it.

    Args:
      handler: the current RequestHandler
      **kwargs: passed to new()
    """
    source = cls.new(handler, **kwargs)
    if source is None:
      return None

    feature = source.features[0] if source.features else 'listen'

    if not source.domain_urls:
      # extract domain from the URL set on the user's profile, if any
      auth_entity = kwargs.get('auth_entity')
      if auth_entity and hasattr(auth_entity, 'user_json'):
        url, domain, ok = source._url_and_domain(auth_entity)
        if feature == 'publish' and not ok:
          if not url:
            handler.messages = {'Your %s profile is missing the website field. '
                                'Please add it and try again!' % cls.AS_CLASS.NAME}
          elif not domain:
            handler.messages = {'Could not parse the web site in your %s profile: '
                                '%s\n Please update it and try again!' %
                                (cls.AS_CLASS.NAME, url)}
          else:
            handler.messages = {"Could not connect to the web site in your %s profile: "
                                "%s\n Please update it and try again!" %
                                (cls.AS_CLASS.NAME, url)}
          return None

        if ok:
          if not source.domain_urls:
            source.domain_urls = [url]
          if not source.domains:
            source.domains = [domain]

    # check if this source already exists
    existing = source.key.get()
    if existing:
      # merge some fields
      source.features = set(source.features + existing.features)
      source.populate(**existing.to_dict(include=(
            'created', 'last_hfeed_fetch', 'last_poll_attempt', 'last_polled',
            'last_syndication_url', 'last_webmention_sent', 'superfeedr_secret')))
      verb = 'Updated'
    else:
      verb = 'Added'

    link = ('http://indiewebify.me/send-webmentions/?url=' + source.domain_urls[0]
            if source.domain_urls else 'http://indiewebify.me/#send-webmentions')
    blurb = '%s %s. %s' % (verb, source.label(), {
      'listen': "Refresh to see what we've found!",
      'publish': 'Try previewing a post from your web site!',
      'webmention': '<a href="%s">Try a webmention!</a>' % link,
      }.get(feature, ''))
    logging.info('%s %s', blurb, source.bridgy_url(handler))
    if not existing:
      util.email_me(subject=blurb, body=source.bridgy_url(handler))

    source.verify()
    if source.verified():
      handler.messages = {blurb}

    if 'webmention' in source.features:
      superfeedr.subscribe(source, handler)

    # TODO: ugh, *all* of this should be transactional
    source.put()

    if 'listen' in source.features:
      util.add_poll_task(source)

    return source
Esempio n. 11
0
  def create_new(cls, handler, user_url=None, **kwargs):
    """Creates and saves a new Source and adds a poll task for it.

    Args:
      handler: the current RequestHandler
      user_url: a string, optional. if provided, supersedes other urls when
        determining the author_url
      **kwargs: passed to new()
    """
    source = cls.new(handler, **kwargs)
    if source is None:
      return None

    feature = source.features[0] if source.features else 'listen'

    if not source.domain_urls:  # defer to the source if it already set this
      auth_entity = kwargs.get('auth_entity')
      if auth_entity and hasattr(auth_entity, 'user_json'):
        source.domain_urls, source.domains = source._urls_and_domains(
          auth_entity, user_url)
        logging.debug('URLs/domains: %s %s', source.domain_urls, source.domains)
        if (feature == 'publish' and
            (not source.domain_urls or not source.domains)):
          handler.messages = {'No valid web sites found in your %s profile. '
                              'Please update it and try again!' % cls.AS_CLASS.NAME}
          return None

    # check if this source already exists
    existing = source.key.get()
    if existing:
      # merge some fields
      source.features = set(source.features + existing.features)
      source.populate(**existing.to_dict(include=(
            'created', 'last_hfeed_fetch', 'last_poll_attempt', 'last_polled',
            'last_syndication_url', 'last_webmention_sent', 'superfeedr_secret')))
      verb = 'Updated'
    else:
      verb = 'Added'

    link = ('http://indiewebify.me/send-webmentions/?url=' + source.get_author_url()
            if source.domain_urls else 'http://indiewebify.me/#send-webmentions')
    blurb = '%s %s. %s' % (verb, source.label(), {
      'listen': "Refresh to see what we've found!",
      'publish': 'Try previewing a post from your web site!',
      'webmention': '<a href="%s">Try a webmention!</a>' % link,
      }.get(feature, ''))
    logging.info('%s %s', blurb, source.bridgy_url(handler))
    if not existing:
      util.email_me(subject=blurb, body=source.bridgy_url(handler))

    source.verify()
    if source.verified():
      handler.messages = {blurb}

    if 'webmention' in source.features:
      superfeedr.subscribe(source, handler)

    # TODO: ugh, *all* of this should be transactional
    source.put()

    if 'listen' in source.features:
      util.add_poll_task(source)

    return source
Esempio n. 12
0
    def _run(self):
        """Returns CreationResult on success, None otherwise."""
        logging.info('Params: %s', self.request.params.items())
        assert self.PREVIEW in (True, False)

        # parse and validate target URL
        try:
            parsed = urllib.parse.urlparse(self.target_url())
        except BaseException:
            return self.error('Could not parse target URL %s' %
                              self.target_url())

        domain = parsed.netloc
        path_parts = parsed.path.rsplit('/', 1)
        source_cls = SOURCE_NAMES.get(path_parts[-1])
        if (domain not in util.DOMAINS or len(path_parts) != 2
                or path_parts[0] != '/publish' or not source_cls):
            return self.error(
                'Target must be brid.gy/publish/{flickr,github,mastodon,twitter}'
            )
        elif source_cls == Instagram:
            return self.error('Sorry, %s is not supported.' %
                              source_cls.GR_CLASS.NAME)

        # resolve source URL
        url, domain, ok = util.get_webmention_target(
            self.source_url(), replace_test_domains=False)
        # show nice error message if they're trying to publish a silo post
        if domain in SOURCE_DOMAINS:
            return self.error(
                "Looks like that's a %s URL. Try one from your web site instead!"
                % SOURCE_DOMAINS[domain].GR_CLASS.NAME)
        elif not ok:
            return self.error('Unsupported source URL %s' % url)
        elif not domain:
            return self.error('Could not parse source URL %s' % url)

        # look up source by domain
        self.source = self._find_source(source_cls, url, domain)
        if not self.source:
            return  # _find_source rendered the error

        content_param = 'bridgy_%s_content' % self.source.SHORT_NAME
        if content_param in self.request.params:
            return self.error('The %s parameter is not supported' %
                              content_param)

        # show nice error message if they're trying to publish their home page
        for domain_url in self.source.domain_urls:
            domain_url_parts = urllib.parse.urlparse(domain_url)
            for source_url in url, self.source_url():
                parts = urllib.parse.urlparse(source_url)
                if (parts.netloc == domain_url_parts.netloc
                        and parts.path.strip('/')
                        == domain_url_parts.path.strip('/')
                        and not parts.query):
                    return self.error(
                        "Looks like that's your home page. Try one of your posts instead!"
                    )

        # done with the sanity checks, ready to fetch the source url. create the
        # Publish entity so we can store the result.
        self.entity = self.get_or_add_publish_entity(url)
        try:
            resp = self.fetch_mf2(url, raise_errors=True)
        except BaseException as e:
            status, body = util.interpret_http_exception(e)
            if status == '410':
                return self.delete(url)
            return self.error('Could not fetch source URL %s' % url)

        if not resp:
            return
        self.fetched, mf2 = resp

        # create the Publish entity so we can store the result.
        if (self.entity.status == 'complete' and self.entity.type != 'preview'
                and not self.PREVIEW and not appengine_config.DEBUG):
            return self.error(
                "Sorry, you've already published that page, and Bridgy Publish doesn't support updating existing posts. Details: https://github.com/snarfed/bridgy/issues/84",
                extra_json={'original': self.entity.published})

        # find rel-shortlink, if any
        # http://microformats.org/wiki/rel-shortlink
        # https://github.com/snarfed/bridgy/issues/173
        shortlinks = mf2['rels'].get('shortlink')
        if shortlinks:
            self.shortlink = urllib.parse.urljoin(url, shortlinks[0])

        # loop through each item and its children and try to preview/create it. if
        # it fails, try the next one. break after the first one that works.
        result = None
        types = set()
        queue = collections.deque(mf2.get('items', []))
        while queue:
            item = queue.popleft()
            item_types = set(item.get('type'))
            if 'h-feed' in item_types and 'h-entry' not in item_types:
                queue.extend(item.get('children', []))
                continue
            elif not item_types & PUBLISHABLE_TYPES:
                types = types.union(item_types)
                continue

            try:
                result = self.attempt_single_item(item)
                if self.entity.published:
                    break
                if result.abort:
                    if result.error_plain:
                        self.error(result.error_plain,
                                   html=result.error_html,
                                   data=item)
                    return
                # try the next item
                for embedded in ('rsvp', 'invitee', 'repost', 'repost-of',
                                 'like', 'like-of', 'in-reply-to'):
                    if embedded in item.get('properties', []):
                        item_types.add(embedded)
                logging.info(
                    'Object type(s) %s not supported; error=%s; trying next.',
                    item_types, result.error_plain)
                types = types.union(item_types)
                queue.extend(item.get('children', []))
            except BaseException as e:
                code, body = util.interpret_http_exception(e)
                if code in self.source.DISABLE_HTTP_CODES or isinstance(
                        e, models.DisableSource):
                    # the user deauthorized the bridgy app, or the token expired, so
                    # disable this source.
                    logging.warning('Disabling source due to: %s' % e,
                                    exc_info=True)
                    self.source.status = 'disabled'
                    self.source.put()
                    # TODO: eventually drop this to just if source.is_beta_user(). leaving
                    # for everyone right now for initial monitoring.
                    util.email_me(subject='Bridgy Publish: disabled %s' %
                                  self.source.label(),
                                  body=body)
                if isinstance(
                        e,
                    (NotImplementedError, ValueError, urllib.error.URLError)):
                    code = '400'
                elif not code:
                    raise
                msg = 'Error: %s %s' % (body or '', e)
                return self.error(msg,
                                  status=code,
                                  mail=code
                                  not in ('400', '404', '502', '503', '504'))

        if not self.entity.published:  # tried all the items
            types.discard('h-entry')
            types.discard('h-note')
            if types:
                msg = (
                    "%s doesn't support type(s) %s, or no content was found." %
                    (source_cls.GR_CLASS.NAME, ' + '.join(types)))
            else:
                msg = 'Could not find content in <a href="http://microformats.org/wiki/h-entry">h-entry</a> or any other element!'
            return self.error(msg, data=mf2)

        # write results to datastore
        self.entity.status = 'complete'
        self.entity.put()
        return result
Esempio n. 13
0
    def receive(self, email):
        addr = self.request.path.split('/')[-1]
        message_id = email.original.get('message-id').strip('<>')
        sender = getattr(email, 'sender', None)
        to = getattr(email, 'to', None)
        cc = getattr(email, 'cc', None)
        subject = getattr(email, 'subject', None)
        logging.info('Received %s from %s to %s (%s) cc %s: %s', message_id,
                     sender, to, addr, cc, subject)

        addr = self.request.path.split('/')[-1]
        user = addr.split('@')[0]
        source = FacebookEmailAccount.query(
            FacebookEmailAccount.email_user == user).get()
        logging.info('Source for %s is %s', user, source)

        util.email_me(subject='New email from %s: %s' % (sender, subject),
                      body='Source: %s' %
                      (source.bridgy_url(self) if source else None))

        htmls = list(body.decode() for _, body in email.bodies('text/html'))
        fbe = FacebookEmail.get_or_insert(
            message_id, source=source.key if source else None, htmls=htmls)
        logging.info('FacebookEmail created %s: %s', fbe.created,
                     fbe.key.urlsafe())

        if not source:
            self.response.status_code = 404
            self.response.write(
                'No Facebook email user found with address %s' % addr)
            return

        for html in htmls:
            obj = gr_facebook.Facebook.email_to_object(html)
            if obj:
                break
        else:
            self.response.status_code = 400
            self.response.write('No HTML body could be parsed')
            return
        logging.info('Converted to AS1: %s', json.dumps(obj, indent=2))

        base_obj = source.gr_source.base_object(obj)
        # note that this ignores the id query param (the post's user id) and uses
        # the source object's user id instead.
        base_obj['url'] = source.canonicalize_url(base_obj['url'])
        # also note that base_obj['id'] is not a tag URI, it's the raw Facebook post
        # id, eg '104790764108207'. we don't use it from activities_json much,
        # though, just in PropagateResponse.source_url(), which handles this fine.

        original_post_discovery.refetch(source)
        targets, mentions = original_post_discovery.discover(source,
                                                             base_obj,
                                                             fetch_hfeed=False)
        logging.info('Got targets %s mentions %s', targets, mentions)

        resp = Response(id=obj['id'],
                        source=source.key,
                        type=Response.get_type(obj),
                        response_json=json.dumps(obj),
                        activities_json=[json.dumps(base_obj)],
                        unsent=targets)
        resp.get_or_save(source, restart=True)

        fbe.response = resp.key
        fbe.put()
Esempio n. 14
0
  def _run(self):
    """Returns CreationResult on success, None otherwise."""
    logging.info('Params: %s', self.request.params.items())
    assert self.PREVIEW in (True, False)

    # parse and validate target URL
    try:
      parsed = urlparse.urlparse(self.target_url())
    except BaseException:
      return self.error('Could not parse target URL %s' % self.target_url())

    domain = parsed.netloc
    path_parts = parsed.path.rsplit('/', 1)
    source_cls = SOURCE_NAMES.get(path_parts[-1])
    if (domain not in ('brid.gy', 'www.brid.gy', 'localhost:8080') or
        len(path_parts) != 2 or path_parts[0] != '/publish' or not source_cls):
      return self.error(
        'Target must be brid.gy/publish/{facebook,flickr,github,twitter}')
    elif source_cls == Instagram:
      return self.error('Sorry, %s is not supported.' %
                        source_cls.GR_CLASS.NAME)

    # resolve source URL
    url, domain, ok = util.get_webmention_target(
      self.source_url(), replace_test_domains=False)
    # show nice error message if they're trying to publish a silo post
    if domain in SOURCE_DOMAINS:
      return self.error(
        "Looks like that's a %s URL. Try one from your web site instead!" %
        SOURCE_DOMAINS[domain].GR_CLASS.NAME)
    elif not ok:
      return self.error('Unsupported source URL %s' % url)
    elif not domain:
      return self.error('Could not parse source URL %s' % url)

    # look up source by domain
    self.source = self._find_source(source_cls, url, domain)
    if not self.source:
      return  # _find_source rendered the error

    content_param = 'bridgy_%s_content' % self.source.SHORT_NAME
    if content_param in self.request.params:
      return self.error('The %s parameter is not supported' % content_param)

    # show nice error message if they're trying to publish their home page
    for domain_url in self.source.domain_urls:
      domain_url_parts = urlparse.urlparse(domain_url)
      for source_url in url, self.source_url():
        parts = urlparse.urlparse(source_url)
        if (parts.netloc == domain_url_parts.netloc and
            parts.path.strip('/') == domain_url_parts.path.strip('/') and
            not parts.query):
          return self.error(
            "Looks like that's your home page. Try one of your posts instead!")

    # done with the sanity checks, ready to fetch the source url. create the
    # Publish entity so we can store the result.
    self.entity = self.get_or_add_publish_entity(url)
    try:
      resp = self.fetch_mf2(url, raise_errors=True)
    except BaseException as e:
      status, body = util.interpret_http_exception(e)
      if status == '410':
        return self.delete(url)
      return self.error('Could not fetch source URL %s' % url)

    if not resp:
      return
    self.fetched, data = resp

    # create the Publish entity so we can store the result.
    if (self.entity.status == 'complete' and self.entity.type != 'preview' and
        not self.PREVIEW and not appengine_config.DEBUG):
      return self.error("Sorry, you've already published that page, and Bridgy Publish doesn't yet support updating or deleting existing posts. Details: https://github.com/snarfed/bridgy/issues/84")

    # find rel-shortlink, if any
    # http://microformats.org/wiki/rel-shortlink
    # https://github.com/snarfed/bridgy/issues/173
    soup = util.beautifulsoup_parse(self.fetched.text)
    shortlinks = (soup.find_all('link', rel='shortlink') +
                  soup.find_all('a', rel='shortlink') +
                  soup.find_all('a', class_='shortlink'))
    if shortlinks:
      self.shortlink = shortlinks[0]['href']

    # loop through each item and its children and try to preview/create it. if
    # it fails, try the next one. break after the first one that works.
    result = None
    types = set()
    queue = collections.deque(data.get('items', []))
    while queue:
      item = queue.popleft()
      item_types = set(item.get('type'))
      if 'h-feed' in item_types and 'h-entry' not in item_types:
        queue.extend(item.get('children', []))
        continue
      elif not item_types & PUBLISHABLE_TYPES:
        types = types.union(item_types)
        continue

      try:
        result = self.attempt_single_item(item)
        if self.entity.published:
          break
        if result.abort:
          if result.error_plain:
            self.error(result.error_plain, html=result.error_html, data=item)
          return
        # try the next item
        for embedded in ('rsvp', 'invitee', 'repost', 'repost-of', 'like',
                         'like-of', 'in-reply-to'):
          if embedded in item.get('properties', []):
            item_types.add(embedded)
        logging.info(
          'Object type(s) %s not supported; error=%s; trying next.',
          item_types, result.error_plain)
        types = types.union(item_types)
        queue.extend(item.get('children', []))
      except BaseException, e:
        code, body = util.interpret_http_exception(e)
        if code in self.source.DISABLE_HTTP_CODES or isinstance(e, models.DisableSource):
          # the user deauthorized the bridgy app, or the token expired, so
          # disable this source.
          logging.warning('Disabling source due to: %s' % e, exc_info=True)
          self.source.status = 'disabled'
          self.source.put()
          # TODO: eventually drop this to just if source.is_beta_user(). leaving
          # for everyone right now for initial monitoring.
          util.email_me(subject='Bridgy Publish: disabled %s' % self.source.label(),
                        body=body)
        if isinstance(e, (NotImplementedError, ValueError, urllib2.URLError)):
          code = '400'
        elif not code:
          raise
        msg = 'Error: %s %s' % (body or '', e)
        return self.error(msg, status=code, mail=code not in ('400', '404', '502', '503', '504'))