Example #1
0
    def post(self):
        source = self.load_source()

        # validate URL, find silo post
        url = util.get_required_param(self, 'url')
        domain = util.domain_from_link(url)
        path = urllib.parse.urlparse(url).path
        msg = 'Discovering now. Refresh in a minute to see the results!'

        if domain == source.GR_CLASS.DOMAIN:
            post_id = source.GR_CLASS.post_id(url)
            if post_id:
                type = 'event' if path.startswith('/events/') else None
                util.add_discover_task(source, post_id, type=type)
            else:
                msg = "Sorry, that doesn't look like a %s post URL." % source.GR_CLASS.NAME

        elif util.domain_or_parent_in(domain, source.domains):
            synd_links = original_post_discovery.process_entry(
                source, url, {}, False, [])
            if synd_links:
                for link in synd_links:
                    util.add_discover_task(source,
                                           source.GR_CLASS.post_id(link))
                source.updates = {'last_syndication_url': util.now_fn()}
                models.Source.put_updates(source)
            else:
                msg = 'Failed to fetch %s or find a %s syndication link.' % (
                    util.pretty_link(url), source.GR_CLASS.NAME)

        else:
            msg = 'Please enter a URL on either your web site or %s.' % source.GR_CLASS.NAME

        self.messages.add(msg)
        self.redirect(source.bridgy_url(self))
Example #2
0
  def post(self):
    # load source
    try:
      source = ndb.Key(urlsafe=util.get_required_param(self, 'source_key')).get()
      if not source:
        self.abort(400, 'Source key not found')
    except ProtocolBufferDecodeError:
      logging.exception('Bad value for source_key')
      self.abort(400, 'Bad value for source_key')

    # validate URL, find silo post
    url = util.get_required_param(self, 'url')
    domain = util.domain_from_link(url)
    msg = 'Discovering now. Refresh in a minute to see the results!'

    if domain == source.GR_CLASS.DOMAIN:
      post_id = source.GR_CLASS.post_id(url)
      util.add_discover_task(source, post_id)
    elif util.domain_or_parent_in(domain, source.domains):
      synd_links = original_post_discovery.process_entry(source, url, {}, False, [])
      if synd_links:
        for link in synd_links:
          util.add_discover_task(source, source.GR_CLASS.post_id(link))
      else:
        msg = 'Failed to fetch %s or find a %s syndication link.' % (
          util.pretty_link(url), source.GR_CLASS.NAME)
    else:
      msg = 'Please enter a URL on either your web site or %s.' % source.GR_CLASS.NAME

    self.messages.add(msg)
    self.redirect(source.bridgy_url(self))
Example #3
0
  def find_mention_item(self, items):
    """Returns the mf2 item that mentions (or replies to, likes, etc) the target.

    May modify the items arg, e.g. may set or replace content.html or
    content.value.

    Args:
      items: sequence of mf2 item dicts

    Returns:
      mf2 item dict or None
    """
    # find target URL in source
    for item in items:
      props = item.setdefault('properties', {})

      # find first non-empty content element
      content = props.setdefault('content', [{}])[0]
      text = content.get('html') or content.get('value')

      for type in 'in-reply-to', 'like', 'like-of', 'repost', 'repost-of':
        urls = [urlparse.urldefrag(u)[0] for u in
                microformats2.get_string_urls(props.get(type, []))]
        if self.any_target_in(urls):
          break
      else:
        if text and self.any_target_in(text):
          type = 'post'
          url = first_value(props, 'url') or self.source_url
          name = first_value(props, 'name') or first_value(props, 'summary')
          text = content['html'] = ('mentioned this in %s.' %
                                    util.pretty_link(url, text=name, max_length=280))
        else:
          type = None

      if type:
        # found the target!
        rsvp = first_value(props, 'rsvp')
        if rsvp:
          self.entity.type = 'rsvp'
          if not text:
            content['value'] = 'RSVPed %s.' % rsvp
        else:
          self.entity.type = {'in-reply-to': 'comment',
                              'like-of': 'like',
                              'repost-of': 'repost',
                              }.get(type, type)
          if not text:
            content['value'] = {'comment': 'replied to this.',
                                'like': 'liked this.',
                                'repost': 'reposted this.',
                                }[self.entity.type]
        return item

      # check children in case this is eg an h-feed
      found = self.find_mention_item(item.get('children', []))
      if found:
        return found

    return None
Example #4
0
  def fetch_mf2(self, url):
    """Fetches a URL and extracts its mf2 data.

    Side effects: sets self.entity.html on success, calls self.error() on
    errors.

    Args:
      url: string

    Returns:
      (requests.Response, mf2 data dict) on success, None on failure
    """
    try:
      fetched = util.requests_get(url)
      fetched.raise_for_status()
    except BaseException as e:
      util.interpret_http_exception(e)  # log exception
      return self.error('Could not fetch source URL %s' % url)

    if self.entity:
      self.entity.html = fetched.text

    # .text is decoded unicode string, .content is raw bytes. if the HTTP
    # headers didn't specify a charset, pass raw bytes to BeautifulSoup so it
    # can look for a <meta> tag with a charset and decode.
    text = (fetched.text if 'charset' in fetched.headers.get('content-type', '')
            else fetched.content)
    doc = BeautifulSoup(text)

    # special case tumblr's markup: div#content > div.post > div.copy
    # convert to mf2.
    contents = doc.find_all(id='content')
    if contents:
      post = contents[0].find_next(class_='post')
      if post:
        post['class'] = 'h-entry'
        copy = post.find_next(class_='copy')
        if copy:
          copy['class'] = 'e-content'
        photo = post.find_next(class_='photo-wrapper')
        if photo:
          img = photo.find_next('img')
          if img:
            img['class'] = 'u-photo'
        doc = unicode(post)

    # parse microformats, convert to ActivityStreams
    data = parser.Parser(doc=doc, url=fetched.url).to_dict()
    logging.debug('Parsed microformats2: %s', json.dumps(data, indent=2))
    items = data.get('items', [])
    if not items or not items[0]:
      return self.error('No microformats2 data found in ' + fetched.url,
                        data=data, html="""
No <a href="http://microformats.org/get-started">microformats</a> or
<a href="http://microformats.org/wiki/microformats2">microformats2</a> found in
<a href="%s">%s</a>! See <a href="http://indiewebify.me/">indiewebify.me</a>
for details (skip to level 2, <em>Publishing on the IndieWeb</em>).
""" % (fetched.url, util.pretty_link(fetched.url)))

    return fetched, data
Example #5
0
  def post(self):
    source = self.load_source()

    # validate URL, find silo post
    url = util.get_required_param(self, 'url')
    domain = util.domain_from_link(url)
    path = urlparse.urlparse(url).path
    msg = 'Discovering now. Refresh in a minute to see the results!'

    if domain == source.GR_CLASS.DOMAIN:
      post_id = source.GR_CLASS.post_id(url)
      if post_id:
        type = 'event' if path.startswith('/events/') else None
        util.add_discover_task(source, post_id, type=type)
      else:
        msg = "Sorry, that doesn't look like a %s post URL." % source.GR_CLASS.NAME

    elif util.domain_or_parent_in(domain, source.domains):
      synd_links = original_post_discovery.process_entry(source, url, {}, False, [])
      if synd_links:
        for link in synd_links:
          util.add_discover_task(source, source.GR_CLASS.post_id(link))
        source.updates = {'last_syndication_url': util.now_fn()}
        models.Source.put_updates(source)
      else:
        msg = 'Failed to fetch %s or find a %s syndication link.' % (
          util.pretty_link(url), source.GR_CLASS.NAME)

    else:
      msg = 'Please enter a URL on either your web site or %s.' % source.GR_CLASS.NAME

    self.messages.add(msg)
    self.redirect(source.bridgy_url(self))
Example #6
0
    def template_vars(self):
        entities = []

        for cls in (Response, ):  # BlogPost
            for e in cls.query().order(-cls.updated):
                if (len(entities) >= self.NUM_ENTITIES or e.updated <
                        datetime.datetime.now() - datetime.timedelta(hours=1)):
                    break
                elif (not e.error and not e.unsent) or e.status == 'complete':
                    continue

                e.links = [
                    util.pretty_link(u, new_tab=True)
                    for u in e.error + e.failed
                ]
                if e.key.kind() == 'Response':
                    e.response = json.loads(e.response_json)
                    e.activities = [json.loads(a) for a in e.activities_json]
                else:
                    e.response = {'content': '[BlogPost]'}
                    e.activities = [{'url': e.key.id()}]

                entities.append(e)

        entities.sort(key=lambda e: (e.source, e.activities, e.response))
        return {'responses': entities}
Example #7
0
    def find_mention_item(self, data):
        """Returns the mf2 item that mentions (or replies to, likes, etc) the target.

    May modify the data arg, e.g. may set or replace content.html or
    content.value.

    Args:
      data: mf2 data dict

    Returns:
      mf2 item dict or None
    """
        # find target URL in source
        for item in data.get('items', []):
            props = item.setdefault('properties', {})

            # find first non-empty content element
            content = props.setdefault('content', [{}])[0]
            text = content.get('html') or content.get('value')

            for type in 'in-reply-to', 'like', 'like-of', 'repost', 'repost-of':
                urls = [
                    urlparse.urldefrag(u)[0]
                    for u in microformats2.get_string_urls(props.get(type, []))
                ]
                if self.any_target_in(urls):
                    break
            else:
                if not text or not self.any_target_in(text):
                    continue
                type = 'post'
                url = first_value(props, 'url') or self.source_url
                name = first_value(props, 'name') or first_value(
                    props, 'summary')
                text = content['html'] = (
                    'mentioned this in %s.' %
                    util.pretty_link(url, text=name, max_length=280))

            if type:
                # found the target!
                rsvp = first_value(props, 'rsvp')
                if rsvp:
                    self.entity.type = 'rsvp'
                    if not text:
                        content['value'] = 'RSVPed %s.' % rsvp
                else:
                    self.entity.type = {
                        'in-reply-to': 'comment',
                        'like-of': 'like',
                        'repost-of': 'repost',
                    }.get(type, type)
                    if not text:
                        content['value'] = {
                            'comment': 'replied to this.',
                            'like': 'liked this.',
                            'repost': 'reposted this.',
                        }[self.entity.type]
                return item

        return None
Example #8
0
    def process_webmention_links(self, e):
        """Generates pretty HTML for the links in a :class:`Webmentions` entity.

    Args:
      e: :class:`Webmentions` subclass (:class:`Response` or :class:`BlogPost`)
    """
        link = lambda url, g: util.pretty_link(
            url,
            glyphicon=g,
            attrs={'class': 'original-post u-bridgy-target'},
            new_tab=True)
        return util.trim_nulls({
            'Failed':
            set(link(url, 'exclamation-sign') for url in e.error + e.failed),
            'Sending':
            set(
                link(url, 'transfer') for url in e.unsent
                if url not in e.error),
            'Sent':
            set(
                link(url, None) for url in e.sent
                if url not in (e.error + e.unsent)),
            'No <a href="http://indiewebify.me/#send-webmentions">webmention</a> '
            'support':
            set(link(url, None) for url in e.skipped),
        })
Example #9
0
  def find_mention_item(self, data):
    """Returns the mf2 item that mentions (or replies to, likes, etc) the target.

    May modify the data arg, e.g. may set or replace content.html or
    content.value.

    Args:
      data: mf2 data dict

    Returns:
      mf2 item dict or None
    """
    # find target URL in source
    for item in data.get('items', []):
      props = item.setdefault('properties', {})

      # find first non-empty content element
      content = props.setdefault('content', [{}])[0]
      text = content.get('html') or content.get('value')

      for type in 'in-reply-to', 'like', 'like-of', 'repost', 'repost-of':
        urls = [urlparse.urldefrag(u)[0] for u in
                microformats2.get_string_urls(props.get(type, []))]
        if self.any_target_in(urls):
          break
      else:
        if not text or not self.any_target_in(text):
          continue
        type = 'post'
        url = first_value(props, 'url') or self.source_url
        name = first_value(props, 'name') or first_value(props, 'summary')
        text = content['html'] = ('mentioned this in %s.' %
                                  util.pretty_link(url, text=name))

      if type:
        # found the target!
        rsvp = first_value(props, 'rsvp')
        if rsvp:
          self.entity.type = 'rsvp'
          if not text:
            content['value'] = 'RSVPed %s.' % rsvp
        else:
          self.entity.type = {'in-reply-to': 'comment',
                              'like-of': 'like',
                              'repost-of': 'repost',
                              }.get(type, type)
          if not text:
            content['value'] = {'comment': 'replied to this.',
                                'like': 'liked this.',
                                'repost': 'reposted this.',
                                }[self.entity.type]
        return item

    return None
Example #10
0
  def fetch_mf2(self, url):
    """Fetches a URL and extracts its mf2 data.

    Side effects: sets self.entity.html on success, calls self.error() on
    errors.

    Args:
      url: string

    Returns:
      (requests.Response, mf2 data dict) on success, None on failure
    """
    try:
      fetched = requests.get(url, timeout=HTTP_TIMEOUT)
      fetched.raise_for_status()
    except BaseException:
      return self.error('Could not fetch source URL %s' % url)

    if self.entity:
      self.entity.html = fetched.text

    doc = BeautifulSoup(fetched.text)

    # special case tumblr's markup: div#content > div.post > div.copy
    # convert to mf2.
    contents = doc.find_all(id='content')
    if contents:
      post = contents[0].find_next(class_='post')
      if post:
        post['class'] = 'h-entry'
        copy = post.find_next(class_='copy')
        if copy:
          copy['class'] = 'e-content'
        photo = post.find_next(class_='photo-wrapper')
        if photo:
          img = photo.find_next('img')
          if img:
            img['class'] = 'u-photo'
        doc = unicode(post)

    # parse microformats, convert to ActivityStreams
    data = parser.Parser(doc=doc, url=fetched.url).to_dict()
    logging.debug('Parsed microformats2: %s', pprint.pformat(data))
    items = data.get('items', [])
    if not items or not items[0]:
      return self.error('No microformats2 data found in ' + fetched.url,
                        data=data, html="""
No <a href="http://microformats.org/get-started">microformats</a> or
<a href="http://microformats.org/wiki/microformats2">microformats2</a> found in
<a href="%s">%s</a>! See <a href="http://indiewebify.me/">indiewebify.me</a>
for details (skip to level 2, <em>Publishing on the IndieWeb</em>).
""" % (fetched.url, util.pretty_link(fetched.url)))

    return fetched, data
Example #11
0
    def _find_source(self, source_cls, url, domain):
        """Returns the source that should publish a post URL, or None if not found.

    Args:
      source_cls: :class:`models.Source` subclass for this silo
      url: string
      domain: string, url's domain

    Returns: :class:`models.Source`
    """
        domain = domain.lower()
        sources = source_cls.query().filter(
            source_cls.domains == domain).fetch(100)
        if not sources:
            self.error(
                "Could not find <b>%(type)s</b> account for <b>%(domain)s</b>. Check that your %(type)s profile has %(domain)s in its <em>web site</em> or <em>link</em> field, then try signing up again."
                % {
                    'type': source_cls.GR_CLASS.NAME,
                    'domain': domain
                })
            return

        current_url = ''
        sources_ready = []
        best_match = None
        for source in sources:
            logging.info('Source: %s , features %s, status %s, poll status %s',
                         source.bridgy_url(self), source.features,
                         source.status, source.poll_status)
            if source.status != 'disabled' and 'publish' in source.features:
                # use a source that has a domain_url matching the url provided,
                # including path. find the source with the closest match.
                sources_ready.append(source)
                schemeless_url = util.schemeless(url.lower()).strip('/')
                for domain_url in source.domain_urls:
                    schemeless_domain_url = util.schemeless(
                        domain_url.lower()).strip('/')
                    if (schemeless_url.startswith(schemeless_domain_url)
                            and len(domain_url) > len(current_url)):
                        current_url = domain_url
                        best_match = source

        if best_match:
            return best_match
        elif sources_ready:
            self.error(
                'No account found that matches %s. Check that <a href="%s/about#profile-link">the web site URL is in your silo profile</a>, then <a href="%s/">sign up again</a>.'
                % (self.request.host_url, util.pretty_link(url),
                   self.request.host_url))
        else:
            self.error(
                'Publish is not enabled for your account. <a href="%s/">Try signing up!</a>'
                % self.request.host_url)
Example #12
0
    def finish(self, auth_entity, state=None):
        if not auth_entity:
            self.maybe_add_or_delete_source(Medium, auth_entity, state)
            return

        user = json_loads(auth_entity.user_json)['data']
        username = user['username']
        if not username.startswith('@'):
            username = '******' + username

        # fetch publications this user contributes or subscribes to.
        # (sadly medium's API doesn't tell us the difference unless we fetch each
        # pub's metadata separately.)
        # https://github.com/Medium/medium-api-docs/#user-content-listing-the-users-publications
        auth_entity.publications_json = auth_entity.get(
            oauth_medium.API_BASE + 'users/%s/publications' % user['id']).text
        auth_entity.put()
        pubs = json_loads(auth_entity.publications_json).get('data')
        if not pubs:
            self.maybe_add_or_delete_source(Medium,
                                            auth_entity,
                                            state,
                                            id=username)
            return

        # add user profile to start of pubs list
        user['id'] = username
        pubs.insert(0, user)

        vars = {
            'action':
            '/medium/add',
            'state':
            state,
            'auth_entity_key':
            auth_entity.key.urlsafe(),
            'blogs': [{
                'id': p['id'],
                'title': p.get('name', ''),
                'url': p.get('url', ''),
                'pretty_url': util.pretty_link(str(p.get('url', ''))),
                'image': p.get('imageUrl', ''),
            } for p in pubs if p.get('id')],
        }
        logging.info('Rendering choose_blog.html with %s', vars)
        self.response.headers['Content-Type'] = 'text/html'
        self.response.out.write(
            JINJA_ENV.get_template('choose_blog.html').render(**vars))
Example #13
0
    def post(self):
        source = self.load_source()
        redirect_url = '%s?%s' % (self.request.path,
                                  urllib.parse.urlencode({
                                      'source_key':
                                      source.key.urlsafe().decode(),
                                  }))

        add = self.request.get('add')
        delete = self.request.get('delete')
        if (add and delete) or (not add and not delete):
            self.abort(400,
                       'Either add or delete param (but not both) required')

        link = util.pretty_link(add or delete)

        if add:
            resolved = Source.resolve_profile_url(add)
            if resolved:
                if resolved in source.domain_urls:
                    self.messages.add('%s already exists.' % link)
                else:
                    source.domain_urls.append(resolved)
                    domain = util.domain_from_link(resolved)
                    source.domains.append(domain)
                    source.put()
                    self.messages.add('Added %s.' % link)
            else:
                self.messages.add(
                    "%s doesn't look like your web site. Try again?" % link)

        else:
            assert delete
            try:
                source.domain_urls.remove(delete)
            except ValueError:
                self.abort(
                    400, "%s not found in %s's current web sites" %
                    (delete, source.label()))
            domain = util.domain_from_link(delete)
            if domain not in set(
                    util.domain_from_link(url) for url in source.domain_urls):
                source.domains.remove(domain)
            source.put()
            self.messages.add('Removed %s.' % link)

        self.redirect(redirect_url)
Example #14
0
  def process_webmention_links(self, e):
    """Generates pretty HTML for the links in a BlogWebmention entity.

    Args:
      e: BlogWebmention subclass (Response or BlogPost)
    """
    link = lambda url, g: util.pretty_link(
      url, glyphicon=g, attrs={'class': 'original-post u-bridgy-target'}, new_tab=True)
    return util.trim_nulls({
        'Failed': set(link(url, 'exclamation-sign') for url in e.error + e.failed),
        'Sending': set(link(url, 'transfer') for url in e.unsent
                       if url not in e.error),
        'Sent': set(link(url, None) for url in e.sent
                    if url not in (e.error + e.unsent)),
        'No <a href="http://indiewebify.me/#send-webmentions">webmention</a> '
        'support': set(link(url, None) for url in e.skipped),
        })
Example #15
0
  def _find_source(self, source_cls, url, domain):
    """Returns the source that should publish a post URL, or None if not found.

    Args:
      source_cls: :class:`models.Source` subclass for this silo
      url: string
      domain: string, url's domain

    Returns: :class:`models.Source`
    """
    domain = domain.lower()
    sources = source_cls.query().filter(source_cls.domains == domain).fetch(100)
    if not sources:
      self.error("Could not find <b>%(type)s</b> account for <b>%(domain)s</b>. Check that your %(type)s profile has %(domain)s in its <em>web site</em> or <em>link</em> field, then try signing up again." %
        {'type': source_cls.GR_CLASS.NAME, 'domain': domain})
      return

    current_url = ''
    sources_ready = []
    best_match = None
    for source in sources:
      logging.info('Source: %s , features %s, status %s, poll status %s',
                   source.bridgy_url(self), source.features, source.status,
                   source.poll_status)
      if source.status != 'disabled' and 'publish' in source.features:
        # use a source that has a domain_url matching the url provided,
        # including path. find the source with the closest match.
        sources_ready.append(source)
        schemeless_url = util.schemeless(url.lower()).strip('/')
        for domain_url in source.domain_urls:
          schemeless_domain_url = util.schemeless(domain_url.lower()).strip('/')
          if (schemeless_url.startswith(schemeless_domain_url) and
              len(domain_url) > len(current_url)):
            current_url = domain_url
            best_match = source

    if best_match:
      return best_match
    elif sources_ready:
      self.error(
        'No account found that matches %s. Check that <a href="/about#profile-link">the web site URL is in your silo profile</a>, then <a href="/">sign up again</a>.' %
        util.pretty_link(url))
    else:
      self.error('Publish is not enabled for your account. <a href="/">Try signing up!</a>')
Example #16
0
  def finish(self, auth_entity, state=None):
    if not auth_entity:
      self.maybe_add_or_delete_source(Medium, auth_entity, state)
      return

    user = json.loads(auth_entity.user_json)['data']
    username = user['username']
    if not username.startswith('@'):
      username = '******' + username

    # fetch publications this user contributes or subscribes to.
    # (sadly medium's API doesn't tell us the difference unless we fetch each
    # pub's metadata separately.)
    # https://github.com/Medium/medium-api-docs/#user-content-listing-the-users-publications
    auth_entity.publications_json = auth_entity.get(
      oauth_medium.API_BASE + 'users/%s/publications' % user['id']).text
    auth_entity.put()
    pubs = json.loads(auth_entity.publications_json).get('data')
    if not pubs:
      self.maybe_add_or_delete_source(Medium, auth_entity, state,
                                      id=username)
      return

    # add user profile to start of pubs list
    user['id'] = username
    pubs.insert(0, user)

    vars = {
      'action': '/medium/add',
      'state': state,
      'auth_entity_key': auth_entity.key.urlsafe(),
      'blogs': [{
        'id': p['id'],
        'title': p.get('name', ''),
        'url': p.get('url', ''),
        'pretty_url': util.pretty_link(str(p.get('url', ''))),
        'image': p.get('imageUrl', ''),
      } for p in pubs if p.get('id')],
    }
    logging.info('Rendering choose_blog.html with %s', vars)
    self.response.headers['Content-Type'] = 'text/html'
    self.response.out.write(JINJA_ENV.get_template('choose_blog.html').render(**vars))
Example #17
0
def edit_websites_post():
    source = util.load_source()
    redirect_url = f'{request.path}?{urllib.parse.urlencode({"source_key": source.key.urlsafe().decode()})}'

    add = request.values.get('add')
    delete = request.values.get('delete')
    if (add and delete) or (not add and not delete):
        error('Either add or delete param (but not both) required')

    link = util.pretty_link(add or delete)

    if add:
        resolved = Source.resolve_profile_url(add)
        if resolved:
            if resolved in source.domain_urls:
                flash(f'{link} already exists.')
            else:
                source.domain_urls.append(resolved)
                domain = util.domain_from_link(resolved)
                source.domains.append(domain)
                source.put()
                flash(f'Added {link}.')
        else:
            flash(f"{link} doesn't look like your web site. Try again?")

    else:
        assert delete
        try:
            source.domain_urls.remove(delete)
        except ValueError:
            error(
                f"{delete} not found in {source.label()}'s current web sites")
        domain = util.domain_from_link(delete)
        if domain not in {
                util.domain_from_link(url)
                for url in source.domain_urls
        }:
            source.domains.remove(domain)
        source.put()
        flash(f'Removed {link}.')

    return redirect(redirect_url)
Example #18
0
  def template_vars(self):
    responses = []

    # Find the most recently propagated responses with error URLs
    for r in Response.query().order(-Response.updated):
      if (len(responses) >= self.NUM_RESPONSES or
          r.updated < datetime.datetime.now() - datetime.timedelta(hours=1)):
        break
      elif not r.error or r.status == 'complete':
        continue

      # r.source = r.source.get()
      r.links = [util.pretty_link(u, new_tab=True) for u in r.error + r.failed]
      r.response = json.loads(r.response_json)
      r.activities = [json.loads(a) for a in r.activities_json]

      responses.append(r)

    responses.sort(key=lambda r: (r.source, r.activities, r.response))
    return {'responses': responses}
Example #19
0
  def post(self):
    source = self.load_source()
    redirect_url = '%s?%s' % (self.request.path, urllib.urlencode({
      'source_key': source.key.urlsafe(),
    }))

    add = self.request.get('add')
    delete = self.request.get('delete')
    if (add and delete) or (not add and not delete):
      self.abort(400, 'Either add or delete param (but not both) required')

    link = util.pretty_link(add or delete)

    if add:
      resolved = Source.resolve_profile_url(add)
      if resolved:
        if resolved in source.domain_urls:
          self.messages.add('%s already exists.' % link)
        else:
          source.domain_urls.append(resolved)
          domain = util.domain_from_link(resolved)
          source.domains.append(domain)
          source.put()
          self.messages.add('Added %s.' % link)
      else:
        self.messages.add("%s doesn't look like your web site. Try again?" % link)

    else:
      assert delete
      try:
        source.domain_urls.remove(delete)
      except ValueError:
        self.abort(400, "%s not found in %s's current web sites" % (
                          delete, source.label()))
      domain = util.domain_from_link(delete)
      if domain not in set(util.domain_from_link(url) for url in source.domain_urls):
        source.domains.remove(domain)
      source.put()
      self.messages.add('Removed %s.' % link)

    self.redirect(redirect_url)
Example #20
0
  def get_site_info(cls, handler, auth_entity):
    """Fetches the site info from the API.

    Args:
      handler: the current RequestHandler
      auth_entity: oauth_dropins.wordpress.WordPressAuth

    Returns: site info dict, or None if API calls are disabled for this blog
    """
    try:
      return cls.urlopen(auth_entity, API_SITE_URL % auth_entity.blog_id)
    except urllib2.HTTPError, e:
      code, body = interpret_http_exception(e)
      if (code == '403' and '"API calls to this blog have been disabled."' in body):
        handler.messages.add(
          'You need to <a href="http://jetpack.me/support/json-api/">enable '
          'the Jetpack JSON API</a> in %s\'s WordPress admin console.' %
          util.pretty_link(auth_entity.blog_url))
        handler.redirect('/')
        return None
      raise
Example #21
0
    def get_site_info(cls, handler, auth_entity):
        """Fetches the site info from the API.

    Args:
      handler: the current RequestHandler
      auth_entity: oauth_dropins.wordpress.WordPressAuth

    Returns: site info dict, or None if API calls are disabled for this blog
    """
        try:
            return cls.urlopen(auth_entity, API_SITE_URL % auth_entity.blog_id)
        except urllib2.HTTPError, e:
            code, body = util.interpret_http_exception(e)
            if (code == '403' and
                    '"API calls to this blog have been disabled."' in body):
                handler.messages.add(
                    'You need to <a href="http://jetpack.me/support/json-api/">enable '
                    'the Jetpack JSON API</a> in %s\'s WordPress admin console.'
                    % util.pretty_link(auth_entity.blog_url))
                handler.redirect('/')
                return None
            raise
Example #22
0
  def template_vars(self):
    entities = []

    for cls in (Response,):  # BlogPost
      for e in cls.query().order(-cls.updated):
        if (len(entities) >= self.NUM_ENTITIES or
            e.updated < datetime.datetime.now() - datetime.timedelta(hours=1)):
          break
        elif (not e.error and not e.unsent) or e.status == 'complete':
          continue

        e.links = [util.pretty_link(u, new_tab=True) for u in e.error + e.failed]
        if e.key.kind() == 'Response':
          e.response = json.loads(e.response_json)
          e.activities = [json.loads(a) for a in e.activities_json]
        else:
          e.response = {'content': '[BlogPost]'}
          e.activities = [{'url': e.key.id()}]

        entities.append(e)

    entities.sort(key=lambda e: (e.source, e.activities, e.response))
    return {'responses': entities}
Example #23
0
def responses():
  """Find the most recently attempted responses and blog posts with error URLs."""
  entities = []

  for cls in (Response,):  # BlogPost
    for e in cls.query().order(-cls.updated):
      if (len(entities) >= NUM_ENTITIES or
          e.updated < util.now_fn() - datetime.timedelta(hours=1)):
        break
      elif (not e.error and not e.unsent) or e.status == 'complete':
        continue

      e.links = [util.pretty_link(u, new_tab=True) for u in e.error + e.failed]
      if e.key.kind() == 'Response':
        e.response = json_loads(e.response_json)
        e.activities = [json_loads(a) for a in e.activities_json]
      else:
        e.response = {'content': '[BlogPost]'}
        e.activities = [{'url': e.key.id()}]

      entities.append(e)

  return render_template('admin_responses.html', responses=entities, logs=logs)
Example #24
0
 def link(url, g):
     return util.pretty_link(
         url,
         glyphicon=g,
         attrs={'class': 'original-post u-bridgy-target'},
         new_tab=True)
Example #25
0
    def fetch_mf2(self, url, require_mf2=True, raise_errors=False):
        """Fetches a URL and extracts its mf2 data.

    Side effects: sets :attr:`entity`\ .html on success, calls :attr:`error()`
    on errors.

    Args:
      url: string
      require_mf2: boolean, whether to return error if no mf2 are found
      raise_errors: boolean, whether to let error exceptions propagate up or
        handle them

    Returns:
      (:class:`requests.Response`, mf2 data dict) on success, None on failure
    """
        try:
            resp = util.requests_get(url)
            resp.raise_for_status()
        except BaseException as e:
            if raise_errors:
                raise
            util.interpret_http_exception(e)  # log exception
            return self.error('Could not fetch source URL %s' % url)

        if self.entity:
            self.entity.html = resp.text

        # parse microformats
        soup = util.parse_html(resp)
        mf2 = util.parse_mf2(soup, resp.url)

        # special case tumblr's markup: div#content > div.post > div.copy
        # convert to mf2 and re-parse
        if not mf2.get('items'):
            contents = soup.find_all(id='content')
            if contents:
                post = contents[0].find_next(class_='post')
                if post:
                    post['class'] = 'h-entry'
                    copy = post.find_next(class_='copy')
                    if copy:
                        copy['class'] = 'e-content'
                    photo = post.find_next(class_='photo-wrapper')
                    if photo:
                        img = photo.find_next('img')
                        if img:
                            img['class'] = 'u-photo'
                    # TODO: i should be able to pass post or contents[0] to mf2py instead
                    # here, but it returns no items. mf2py bug?
                    doc = str(post)
                    mf2 = util.parse_mf2(doc, resp.url)

        logging.debug('Parsed microformats2: %s', json_dumps(mf2, indent=2))
        items = mf2.get('items', [])
        if require_mf2 and (not items or not items[0]):
            return self.error('No microformats2 data found in ' + resp.url,
                              data=mf2,
                              html="""
No <a href="http://microformats.org/get-started">microformats</a> or
<a href="http://microformats.org/wiki/microformats2">microformats2</a> found in
<a href="%s">%s</a>! See <a href="http://indiewebify.me/">indiewebify.me</a>
for details (skip to level 2, <em>Publishing on the IndieWeb</em>).
""" % (resp.url, util.pretty_link(resp.url)))

        return resp, mf2
Example #26
0
  def template_vars(self):
    vars = super(UserHandler, self).template_vars()
    vars.update({
        'source': self.source,
        'EPOCH': util.EPOCH,
        'REFETCH_HFEED_TRIGGER': models.REFETCH_HFEED_TRIGGER,
        'RECENT_PRIVATE_POSTS_THRESHOLD': RECENT_PRIVATE_POSTS_THRESHOLD,
        })
    if not self.source:
      return vars

    if isinstance(self.source, instagram.Instagram):
      auth = self.source.auth_entity
      vars['indieauth_me'] = (
        auth.id if isinstance(auth, indieauth.IndieAuth)
        else self.source.domain_urls[0] if self.source.domain_urls
        else None)

    # Blog webmention promos
    if 'webmention' not in self.source.features:
      if self.source.SHORT_NAME in ('blogger', 'medium', 'tumblr', 'wordpress'):
        vars[self.source.SHORT_NAME + '_promo'] = True
      else:
        for domain in self.source.domains:
          if ('.blogspot.' in domain and  # Blogger uses country TLDs
              not Blogger.query(Blogger.domains == domain).get()):
            vars['blogger_promo'] = True
          elif (domain.endswith('tumblr.com') and
                not Tumblr.query(Tumblr.domains == domain).get()):
            vars['tumblr_promo'] = True
          elif (domain.endswith('wordpress.com') and
                not WordPress.query(WordPress.domains == domain).get()):
            vars['wordpress_promo'] = True

    # Responses
    if 'listen' in self.source.features:
      vars['responses'] = []
      query = Response.query().filter(Response.source == self.source.key)

      # if there's a paging param (responses_before or responses_after), update
      # query with it
      def get_paging_param(param):
        val = self.request.get(param)
        try:
          return util.parse_iso8601(val) if val else None
        except:
          msg = "Couldn't parse %s %r as ISO8601" % (param, val)
          logging.exception(msg)
          self.abort(400, msg)

      before = get_paging_param('responses_before')
      after = get_paging_param('responses_after')
      if before and after:
        self.abort(400, "can't handle both responses_before and responses_after")
      elif after:
        query = query.filter(Response.updated > after).order(Response.updated)
      elif before:
        query = query.filter(Response.updated < before).order(-Response.updated)
      else:
        query = query.order(-Response.updated)

      query_iter = query.iter()
      for i, r in enumerate(query_iter):
        r.response = json.loads(r.response_json)
        r.activities = [json.loads(a) for a in r.activities_json]

        if (not self.source.is_activity_public(r.response) or
            not all(self.source.is_activity_public(a) for a in r.activities)):
          continue
        elif r.type == 'post':
          r.activities = []

        r.actor = r.response.get('author') or r.response.get('actor', {})

        for a in r.activities + [r.response]:
          if not a.get('content'):
            a['content'] = a.get('object', {}).get('content')

        if not r.response.get('content'):
          phrases = {
            'like': 'liked this',
            'repost': 'reposted this',
            'rsvp-yes': 'is attending',
            'rsvp-no': 'is not attending',
            'rsvp-maybe': 'might attend',
            'rsvp-interested': 'is interested',
            'invite': 'is invited',
          }
          r.response['content'] = '%s %s.' % (
            r.actor.get('displayName') or '',
            phrases.get(r.type) or phrases.get(r.response.get('verb')))

        # convert image URL to https if we're serving over SSL
        image_url = r.actor.setdefault('image', {}).get('url')
        if image_url:
          r.actor['image']['url'] = util.update_scheme(image_url, self)

        # generate original post links
        r.links = self.process_webmention_links(r)
        r.original_links = [util.pretty_link(url, new_tab=True)
                            for url in r.original_posts]

        vars['responses'].append(r)
        if len(vars['responses']) >= 10 or i > 200:
          break

      vars['responses'].sort(key=lambda r: r.updated, reverse=True)

      # calculate new paging param(s)
      new_after = (
        before if before else
        vars['responses'][0].updated if
          vars['responses'] and query_iter.probably_has_next() and (before or after)
        else None)
      if new_after:
        vars['responses_after_link'] = ('?responses_after=%s#responses' %
                                         new_after.isoformat())

      new_before = (
        after if after else
        vars['responses'][-1].updated if
          vars['responses'] and query_iter.probably_has_next()
        else None)
      if new_before:
        vars['responses_before_link'] = ('?responses_before=%s#responses' %
                                         new_before.isoformat())

      vars['next_poll'] = max(
        self.source.last_poll_attempt + self.source.poll_period(),
        # lower bound is 1 minute from now
        util.now_fn() + datetime.timedelta(seconds=90))

    # Publishes
    if 'publish' in self.source.features:
      publishes = Publish.query().filter(Publish.source == self.source.key)\
                                 .order(-Publish.updated)\
                                 .fetch(10)
      for p in publishes:
        p.pretty_page = util.pretty_link(
          p.key.parent().id().decode('utf-8'),
          attrs={'class': 'original-post u-url u-name'},
          new_tab=True)

      vars['publishes'] = publishes

    if 'webmention' in self.source.features:
      # Blog posts
      blogposts = BlogPost.query().filter(BlogPost.source == self.source.key)\
                                  .order(-BlogPost.created)\
                                  .fetch(10)
      for b in blogposts:
        b.links = self.process_webmention_links(b)
        try:
          text = b.feed_item.get('title')
        except ValueError:
          text = None
        b.pretty_url = util.pretty_link(
          b.key.id(), text=text, attrs={'class': 'original-post u-url u-name'},
          max_length=40, new_tab=True)

      # Blog webmentions
      webmentions = BlogWebmention.query()\
          .filter(BlogWebmention.source == self.source.key)\
          .order(-BlogWebmention.updated)\
          .fetch(10)
      for w in webmentions:
        w.pretty_source = util.pretty_link(
          w.source_url(), attrs={'class': 'original-post'}, new_tab=True)
        try:
          target_is_source = (urlparse.urlparse(w.target_url()).netloc in
                              self.source.domains)
        except BaseException:
          target_is_source = False
        w.pretty_target = util.pretty_link(
          w.target_url(), attrs={'class': 'original-post'}, new_tab=True,
          keep_host=target_is_source)

      vars.update({'blogposts': blogposts, 'webmentions': webmentions})

    return vars
Example #27
0
def user(site, id):
    """View for a user page."""
    cls = models.sources.get(site)
    if not cls:
        return render_template('user_not_found.html'), 404

    source = cls.lookup(id)

    if not source:
        key = cls.query(
            ndb.OR(*[
                ndb.GenericProperty(prop) == id
                for prop in ('domains', 'inferred_username', 'name',
                             'username')
            ])).get(keys_only=True)
        if key:
            return redirect(cls(key=key).bridgy_path(), code=301)

    if not source or not source.features:
        return render_template('user_not_found.html'), 404

    source.verify()
    source = util.preprocess_source(source)

    vars = {
        'source': source,
        'logs': logs,
        'REFETCH_HFEED_TRIGGER': models.REFETCH_HFEED_TRIGGER,
        'RECENT_PRIVATE_POSTS_THRESHOLD': RECENT_PRIVATE_POSTS_THRESHOLD,
    }

    # Blog webmention promos
    if 'webmention' not in source.features:
        if source.SHORT_NAME in ('blogger', 'medium', 'tumblr', 'wordpress'):
            vars[source.SHORT_NAME + '_promo'] = True
        else:
            for domain in source.domains:
                if ('.blogspot.' in domain and  # Blogger uses country TLDs
                        not Blogger.query(Blogger.domains == domain).get()):
                    vars['blogger_promo'] = True
                elif (util.domain_or_parent_in(domain, ['tumblr.com'])
                      and not Tumblr.query(Tumblr.domains == domain).get()):
                    vars['tumblr_promo'] = True
                elif (util.domain_or_parent_in(domain, 'wordpress.com') and
                      not WordPress.query(WordPress.domains == domain).get()):
                    vars['wordpress_promo'] = True

    # Responses
    if 'listen' in source.features or 'email' in source.features:
        vars['responses'] = []
        query = Response.query().filter(Response.source == source.key)

        # if there's a paging param (responses_before or responses_after), update
        # query with it
        def get_paging_param(param):
            val = request.values.get(param)
            try:
                return util.parse_iso8601(val.replace(' ',
                                                      '+')) if val else None
            except BaseException:
                error(f"Couldn't parse {param}, {val!r} as ISO8601")

        before = get_paging_param('responses_before')
        after = get_paging_param('responses_after')
        if before and after:
            error("can't handle both responses_before and responses_after")
        elif after:
            query = query.filter(Response.updated > after).order(
                Response.updated)
        elif before:
            query = query.filter(
                Response.updated < before).order(-Response.updated)
        else:
            query = query.order(-Response.updated)

        query_iter = query.iter()
        for i, r in enumerate(query_iter):
            r.response = json_loads(r.response_json)
            r.activities = [json_loads(a) for a in r.activities_json]

            if (not source.is_activity_public(r.response) or not all(
                    source.is_activity_public(a) for a in r.activities)):
                continue
            elif r.type == 'post':
                r.activities = []

            verb = r.response.get('verb')
            r.actor = (r.response.get('object')
                       if verb == 'invite' else r.response.get('author')
                       or r.response.get('actor')) or {}

            activity_content = ''
            for a in r.activities + [r.response]:
                if not a.get('content'):
                    obj = a.get('object', {})
                    a['content'] = activity_content = (
                        obj.get('content') or obj.get('displayName') or
                        # historical, from a Reddit bug fixed in granary@4f9df7c
                        obj.get('name') or '')

            response_content = r.response.get('content')
            phrases = {
                'like': 'liked this',
                'repost': 'reposted this',
                'rsvp-yes': 'is attending',
                'rsvp-no': 'is not attending',
                'rsvp-maybe': 'might attend',
                'rsvp-interested': 'is interested',
                'invite': 'is invited',
            }
            phrase = phrases.get(r.type) or phrases.get(verb)
            if phrase and (r.type != 'repost'
                           or activity_content.startswith(response_content)):
                r.response[
                    'content'] = f'{r.actor.get("displayName") or ""} {phrase}.'

            # convert image URL to https if we're serving over SSL
            image_url = r.actor.setdefault('image', {}).get('url')
            if image_url:
                r.actor['image']['url'] = util.update_scheme(
                    image_url, request)

            # generate original post links
            r.links = process_webmention_links(r)
            r.original_links = [
                util.pretty_link(url, new_tab=True) for url in r.original_posts
            ]

            vars['responses'].append(r)
            if len(vars['responses']) >= 10 or i > 200:
                break

        vars['responses'].sort(key=lambda r: r.updated, reverse=True)

        # calculate new paging param(s)
        new_after = (before if before else vars['responses'][0].updated if
                     vars['responses'] and query_iter.probably_has_next() and
                     (before or after) else None)
        if new_after:
            vars[
                'responses_after_link'] = f'?responses_after={new_after.isoformat()}#responses'

        new_before = (after if after else
                      vars['responses'][-1].updated if vars['responses']
                      and query_iter.probably_has_next() else None)
        if new_before:
            vars[
                'responses_before_link'] = f'?responses_before={new_before.isoformat()}#responses'

        vars['next_poll'] = max(
            source.last_poll_attempt + source.poll_period(),
            # lower bound is 1 minute from now
            util.now_fn() + datetime.timedelta(seconds=90))

    # Publishes
    if 'publish' in source.features:
        publishes = Publish.query().filter(Publish.source == source.key)\
                                   .order(-Publish.updated)\
                                   .fetch(10)
        for p in publishes:
            p.pretty_page = util.pretty_link(
                p.key.parent().id(),
                attrs={'class': 'original-post u-url u-name'},
                new_tab=True)

        vars['publishes'] = publishes

    if 'webmention' in source.features:
        # Blog posts
        blogposts = BlogPost.query().filter(BlogPost.source == source.key)\
                                    .order(-BlogPost.created)\
                                    .fetch(10)
        for b in blogposts:
            b.links = process_webmention_links(b)
            try:
                text = b.feed_item.get('title')
            except ValueError:
                text = None
            b.pretty_url = util.pretty_link(
                b.key.id(),
                text=text,
                attrs={'class': 'original-post u-url u-name'},
                max_length=40,
                new_tab=True)

        # Blog webmentions
        webmentions = BlogWebmention.query()\
            .filter(BlogWebmention.source == source.key)\
            .order(-BlogWebmention.updated)\
            .fetch(10)
        for w in webmentions:
            w.pretty_source = util.pretty_link(
                w.source_url(), attrs={'class': 'original-post'}, new_tab=True)
            try:
                target_is_source = (urllib.parse.urlparse(
                    w.target_url()).netloc in source.domains)
            except BaseException:
                target_is_source = False
            w.pretty_target = util.pretty_link(
                w.target_url(),
                attrs={'class': 'original-post'},
                new_tab=True,
                keep_host=target_is_source)

        vars.update({'blogposts': blogposts, 'webmentions': webmentions})

    return render_template(f'{source.SHORT_NAME}_user.html', **vars)
Example #28
0
  def template_vars(self):
    if not self.source:
      return {}

    vars = super(UserHandler, self).template_vars()
    vars.update({
        'source': self.source,
        'epoch': util.EPOCH,
        })

    # Blog webmention promos
    if 'webmention' not in self.source.features:
      if self.source.SHORT_NAME in ('blogger', 'tumblr', 'wordpress'):
        vars[self.source.SHORT_NAME + '_promo'] = True
      else:
        for domain in self.source.domains:
          if ('.blogspot.' in domain and  # Blogger uses country TLDs
              not Blogger.query(Blogger.domains == domain).get()):
            vars['blogger_promo'] = True
          elif (domain.endswith('tumblr.com') and
                not Tumblr.query(Tumblr.domains == domain).get()):
            vars['tumblr_promo'] = True
          elif (domain.endswith('wordpress.com') and
                not WordPress.query(WordPress.domains == domain).get()):
            vars['wordpress_promo'] = True

    # Responses
    if 'listen' in self.source.features:
      vars['responses'] = []
      for i, r in enumerate(Response.query()
                              .filter(Response.source == self.source.key)\
                              .order(-Response.updated)):
        r.response = json.loads(r.response_json)
        if r.activity_json:  # handle old entities
          r.activities_json.append(r.activity_json)
        r.activities = [json.loads(a) for a in r.activities_json]

        if (not gr_source.Source.is_public(r.response) or
            not all(gr_source.Source.is_public(a) for a in r.activities)):
          continue

        r.actor = r.response.get('author') or r.response.get('actor', {})
        if not r.response.get('content'):
          phrases = {
            'like': 'liked this',
            'repost': 'reposted this',
            'rsvp-yes': 'is attending',
            'rsvp-no': 'is not attending',
            'rsvp-maybe': 'might attend',
            'invite': 'is invited',
          }
          r.response['content'] = '%s %s.' % (
            r.actor.get('displayName') or '',
            phrases.get(r.type) or phrases.get(r.response.get('verb')))

        # convert image URL to https if we're serving over SSL
        image_url = r.actor.setdefault('image', {}).get('url')
        if image_url:
          r.actor['image']['url'] = util.update_scheme(image_url, self)

        # generate original post links
        r.links = self.process_webmention_links(r)

        vars['responses'].append(r)
        if len(vars['responses']) >= 10 or i > 200:
          break

    # Publishes
    if 'publish' in self.source.features:
      publishes = Publish.query().filter(Publish.source == self.source.key)\
                                 .order(-Publish.updated)\
                                 .fetch(10)
      for p in publishes:
        p.pretty_page = util.pretty_link(
          p.key.parent().id(), a_class='original-post', new_tab=True)

      vars['publishes'] = publishes

    if 'webmention' in self.source.features:
      # Blog posts
      blogposts = BlogPost.query().filter(BlogPost.source == self.source.key)\
                                  .order(-BlogPost.created)\
                                  .fetch(10)
      for b in blogposts:
        b.links = self.process_webmention_links(b)
        try:
          text = b.feed_item.get('title')
        except ValueError:
          text = None
        b.pretty_url = util.pretty_link(b.key.id(), text=text,
                                        a_class='original-post', max_length=40,
                                        new_tab=True)

      # Blog webmentions
      webmentions = BlogWebmention.query()\
          .filter(BlogWebmention.source == self.source.key)\
          .order(-BlogWebmention.updated)\
          .fetch(10)
      for w in webmentions:
        w.pretty_source = util.pretty_link(w.source_url(), a_class='original-post',
                                           new_tab=True)
        try:
          target_is_source = (urlparse.urlparse(w.target_url()).netloc in
                              self.source.domains)
        except BaseException:
          target_is_source = False
        w.pretty_target = util.pretty_link(w.target_url(), a_class='original-post',
                                           new_tab=True, keep_host=target_is_source)

      vars.update({'blogposts': blogposts, 'webmentions': webmentions})

    return vars
Example #29
0
  def template_vars(self):
    vars = super(UserHandler, self).template_vars()
    vars.update({
        'source': self.source,
        'EPOCH': util.EPOCH,
        'REFETCH_HFEED_TRIGGER': models.REFETCH_HFEED_TRIGGER,
        'RECENT_PRIVATE_POSTS_THRESHOLD': RECENT_PRIVATE_POSTS_THRESHOLD,
        })
    if not self.source:
      return vars

    if isinstance(self.source, instagram.Instagram):
      auth = self.source.auth_entity
      vars['indieauth_me'] = (
        auth.id if isinstance(auth, indieauth.IndieAuth)
        else self.source.domain_urls[0] if self.source.domain_urls
        else None)

    # Blog webmention promos
    if 'webmention' not in self.source.features:
      if self.source.SHORT_NAME in ('blogger', 'tumblr', 'wordpress'):
        vars[self.source.SHORT_NAME + '_promo'] = True
      else:
        for domain in self.source.domains:
          if ('.blogspot.' in domain and  # Blogger uses country TLDs
              not Blogger.query(Blogger.domains == domain).get()):
            vars['blogger_promo'] = True
          elif (domain.endswith('tumblr.com') and
                not Tumblr.query(Tumblr.domains == domain).get()):
            vars['tumblr_promo'] = True
          elif (domain.endswith('wordpress.com') and
                not WordPress.query(WordPress.domains == domain).get()):
            vars['wordpress_promo'] = True

    # Responses
    if 'listen' in self.source.features:
      vars['responses'] = []
      query = Response.query().filter(Response.source == self.source.key)

      # if there's a paging param (responses_before or responses_after), update
      # query with it
      def get_paging_param(param):
        val = self.request.get(param)
        try:
          return util.parse_iso8601(val) if val else None
        except:
          msg = "Couldn't parse %s %r as ISO8601" % (param, val)
          logging.exception(msg)
          self.abort(400, msg)

      before = get_paging_param('responses_before')
      after = get_paging_param('responses_after')
      if before and after:
        self.abort(400, "can't handle both responses_before and responses_after")
      elif after:
        query = query.filter(Response.updated > after).order(Response.updated)
      elif before:
        query = query.filter(Response.updated < before).order(-Response.updated)
      else:
        query = query.order(-Response.updated)

      query_iter = query.iter()
      for i, r in enumerate(query_iter):
        r.response = json.loads(r.response_json)
        r.activities = [json.loads(a) for a in r.activities_json]

        if (not self.source.is_activity_public(r.response) or
            not all(self.source.is_activity_public(a) for a in r.activities)):
          continue
        elif r.type == 'post':
          r.activities = []

        r.actor = r.response.get('author') or r.response.get('actor', {})

        for a in r.activities + [r.response]:
          if not a.get('content'):
            a['content'] = a.get('object', {}).get('content')

        if not r.response.get('content'):
          phrases = {
            'like': 'liked this',
            'repost': 'reposted this',
            'rsvp-yes': 'is attending',
            'rsvp-no': 'is not attending',
            'rsvp-maybe': 'might attend',
            'rsvp-interested': 'is interested',
            'invite': 'is invited',
          }
          r.response['content'] = '%s %s.' % (
            r.actor.get('displayName') or '',
            phrases.get(r.type) or phrases.get(r.response.get('verb')))

        # convert image URL to https if we're serving over SSL
        image_url = r.actor.setdefault('image', {}).get('url')
        if image_url:
          r.actor['image']['url'] = util.update_scheme(image_url, self)

        # generate original post links
        r.links = self.process_webmention_links(r)
        r.original_links = [util.pretty_link(url, new_tab=True)
                            for url in r.original_posts]

        vars['responses'].append(r)
        if len(vars['responses']) >= 10 or i > 200:
          break

      vars['responses'].sort(key=lambda r: r.updated, reverse=True)

      # calculate new paging param(s)
      new_after = (
        before if before else
        vars['responses'][0].updated if
          vars['responses'] and query_iter.probably_has_next() and (before or after)
        else None)
      if new_after:
        vars['responses_after_link'] = ('?responses_after=%s#responses' %
                                         new_after.isoformat())

      new_before = (
        after if after else
        vars['responses'][-1].updated if
          vars['responses'] and query_iter.probably_has_next()
        else None)
      if new_before:
        vars['responses_before_link'] = ('?responses_before=%s#responses' %
                                         new_before.isoformat())

      vars['next_poll'] = max(
        self.source.last_poll_attempt + self.source.poll_period(),
        # lower bound is 1 minute from now
        util.now_fn() + datetime.timedelta(seconds=90))

    # Publishes
    if 'publish' in self.source.features:
      publishes = Publish.query().filter(Publish.source == self.source.key)\
                                 .order(-Publish.updated)\
                                 .fetch(10)
      for p in publishes:
        p.pretty_page = util.pretty_link(
          p.key.parent().id(), attrs={'class': 'original-post u-url u-name'},
          new_tab=True)

      vars['publishes'] = publishes

    if 'webmention' in self.source.features:
      # Blog posts
      blogposts = BlogPost.query().filter(BlogPost.source == self.source.key)\
                                  .order(-BlogPost.created)\
                                  .fetch(10)
      for b in blogposts:
        b.links = self.process_webmention_links(b)
        try:
          text = b.feed_item.get('title')
        except ValueError:
          text = None
        b.pretty_url = util.pretty_link(
          b.key.id(), text=text, attrs={'class': 'original-post u-url u-name'},
          max_length=40, new_tab=True)

      # Blog webmentions
      webmentions = BlogWebmention.query()\
          .filter(BlogWebmention.source == self.source.key)\
          .order(-BlogWebmention.updated)\
          .fetch(10)
      for w in webmentions:
        w.pretty_source = util.pretty_link(
          w.source_url(), attrs={'class': 'original-post'}, new_tab=True)
        try:
          target_is_source = (urlparse.urlparse(w.target_url()).netloc in
                              self.source.domains)
        except BaseException:
          target_is_source = False
        w.pretty_target = util.pretty_link(
          w.target_url(), attrs={'class': 'original-post'}, new_tab=True,
          keep_host=target_is_source)

      vars.update({'blogposts': blogposts, 'webmentions': webmentions})

    return vars
Example #30
0
    def fetch_mf2(self, url, require_mf2=True, raise_errors=False):
        """Fetches a URL and extracts its mf2 data.

    Side effects: sets :attr:`entity`\ .html on success, calls :attr:`error()`
    on errors.

    Args:
      url: string
      require_mf2: boolean, whether to return error if no mf2 are found
      raise_errors: boolean, whether to let error exceptions propagate up or
        handle them

    Returns:
      (:class:`requests.Response`, mf2 data dict) on success, None on failure
    """
        try:
            fetched = util.requests_get(url)
            fetched.raise_for_status()
        except BaseException as e:
            if raise_errors:
                raise
            util.interpret_http_exception(e)  # log exception
            return self.error('Could not fetch source URL %s' % url)

        if self.entity:
            self.entity.html = fetched.text

        # .text is decoded unicode string, .content is raw bytes. if the HTTP
        # headers didn't specify a charset, pass raw bytes to BeautifulSoup so it
        # can look for a <meta> tag with a charset and decode.
        text = (fetched.text if 'charset' in fetched.headers.get(
            'content-type', '') else fetched.content)
        doc = util.beautifulsoup_parse(text)

        # parse microformats
        data = util.mf2py_parse(doc, fetched.url)

        # special case tumblr's markup: div#content > div.post > div.copy
        # convert to mf2 and re-parse
        if not data.get('items'):
            contents = doc.find_all(id='content')
            if contents:
                post = contents[0].find_next(class_='post')
                if post:
                    post['class'] = 'h-entry'
                    copy = post.find_next(class_='copy')
                    if copy:
                        copy['class'] = 'e-content'
                    photo = post.find_next(class_='photo-wrapper')
                    if photo:
                        img = photo.find_next('img')
                        if img:
                            img['class'] = 'u-photo'
                    doc = unicode(post)
                    data = util.mf2py_parse(doc, fetched.url)

        logging.debug('Parsed microformats2: %s', json.dumps(data, indent=2))
        items = data.get('items', [])
        if require_mf2 and (not items or not items[0]):
            return self.error('No microformats2 data found in ' + fetched.url,
                              data=data,
                              html="""
No <a href="http://microformats.org/get-started">microformats</a> or
<a href="http://microformats.org/wiki/microformats2">microformats2</a> found in
<a href="%s">%s</a>! See <a href="http://indiewebify.me/">indiewebify.me</a>
for details (skip to level 2, <em>Publishing on the IndieWeb</em>).
""" % (fetched.url, util.pretty_link(fetched.url)))

        return fetched, data