Beispiel #1
0
    def out_rss(self,filename):
	fg=FeedGenerator()
	fg.register_extension('albopop',AlbopopExtension,AlbopopEntryExtension)
	fg.id(self.url)
	fg.title(self.title)
	fg.description(self.title)
	fg.author({'name':'alboPOP','email':''})
	fg.link(href=self.url)
	fg.pubDate(formatdate())
	fg.webMaster(self.webMaster)
	fg.docs('https://github.com/mfortini/alboPOP_saga')
	fg.language('it')

	fg.albopop.categoryName(self.categoryName)
	fg.albopop.categoryType(self.categoryType)

	for item in self.items:
		fe=fg.add_entry()
		fe.id(item['link'])
		fe.category(term=item['tipo'])
		fe.pubdate(item['pubDate'])
		fe.link(href=item['link'])
		fe.title(item['title'])
		fe.description(item['description'])
		fe.albopop.categoryUID(str(item['numero'])+'/'+str(item['anno']))

	fg.rss_file(filename)
def create_feed():
    """RSS 피드 생성하기"""

    # 피드 데이터 저장 전용 객체
    fg = FeedGenerator()

    # 사용자 정의 네임 스페이스를 등록하고
    # 이전에 만들었던 클래스 적용하기
    fg.register_extension(
        'book',
        extension_class_feed=BookFeedExtension,
        extension_class_entry=BookEntryExtension,
    )

    # <channel><title> 요소
    fg.title("위키북스의 도서 목록")
    # <channel><link> 요소: <link> 태그의 내용은 href 속성으로 지정
    fg.link(href="http://example.com")
    # <channel><description> 요소
    fg.description("설명을 입력했다고 가정합니다.")

    # <channel><item> 요소
    fe = fg.add_entry()
    # <channel><item><title> 요소
    fe.title("파이썬을 이용한 머신러닝, 딥러닝 실전 앱 개발")
    # <channel><item><link> 요소
    fe.link(href="http://example.com")
    # <channel><item><description> 요소
    fe.description('<a href="http://example.com">이스케이프 처리 확인 전용 링크</a>'
                   "설명을 입력했다고 가정합니다.")
    # <channel><item><book:writer> 요소(사용자 정의 네임 스페이스를 사용하는 요소)
    fe.book.publisher({'name': "위키북스", 'id': "1"})  # 값은 딕셔너리 자료형으로 전달합니다.

    # 피드를 RSS 형식으로 변환(pretty=True로 들여쓰기 적용)
    return fg.rss_str(pretty=True)
Beispiel #3
0
    def _get_feed(cls,
                  query: Optional[ClassicAPIQuery] = None) -> FeedGenerator:
        fg = FeedGenerator()
        fg.generator("")
        fg.register_extension("opensearch", OpenSearchExtension)
        fg.register_extension("arxiv",
                              ArXivExtension,
                              ArXivEntryExtension,
                              rss=False)

        if query:
            if query.phrase is not None:
                query_string = phrase_to_query_string(query.phrase)
            else:
                query_string = ""

            if query.id_list:
                id_list = ",".join(query.id_list)
            else:
                id_list = ""

            fg.title(f"arXiv Query: {query.to_query_string()}")

            # From perl documentation of the old site:
            # search_id is calculated by taking SHA-1 digest of the query
            # string. Digest is in bytes form and it's 20 bytes long. Then it's
            # base64 encoded, but perls version returns only 27 characters -
            # it omits the `=` sign at the end.
            search_id = base64.b64encode(
                hashlib.sha1(query.to_query_string().encode(
                    "utf-8")).digest()).decode("utf-8")[:-1]
            fg.id(
                cls._fix_url(
                    url_for("classic_api.query").replace(
                        "/query", f"/{search_id}")))

            fg.link({
                "href":
                cls._fix_url(
                    url_for(
                        "classic_api.query",
                        search_query=query_string,
                        start=query.page_start,
                        max_results=query.size,
                        id_list=id_list,
                    )),
                "type":
                "application/atom+xml",
            })
        else:
            # TODO: Discuss better defaults
            fg.title("arXiv Search Results")
            fg.id("https://arxiv.org/")

        fg.updated(to_utc(datetime.utcnow()))
        return fg
def build_feed_generator():
    feed_generator = FeedGenerator()
    feed_generator.register_extension(
        'catalogue',
        extension_class_feed=CatalogueExtension,
        extension_class_entry=CatalogueEntryExtension)

    feed_generator.title('WELT Product Feed Premium Items')
    feed_generator.description('WELT premium articles from today.')
    feed_generator.link(href=WELT_URL)

    return feed_generator
def create_feed():
    """RSSフィードの生成."""

    # フィードデータ格納用
    fg = FeedGenerator()

    # 独自名前空間の登録と、独自名前空間の拡張用クラスの適用
    fg.register_extension(
        'book',
        extension_class_feed=BookFeedExtension,
        extension_class_entry=BookEntryExtension,
    )

    # <channel><title>要素
    fg.title("芥川龍之介の新着作品")
    # <channel><link>要素: <link>タグの内容は href で指定
    fg.link(href="http://www.aozora.gr.jp/index_pages/person879.html")
    # <channel><description>要素
    fg.description("青空文庫に追加された芥川龍之介の新着作品のフィード")

    # <channel><item>要素の追加
    fe = fg.add_entry()
    # <channel><item><title>要素
    fe.title("羅生門")
    # <channel><item><link>要素
    fe.link(href="http://www.aozora.gr.jp/cards/000879/card128.html")
    # <channel><item><description>要素
    fe.description(
        '<a href="http://www.aozora.gr.jp/index_pages/person879.html">芥川</a>の5作目の短編小説。'
        "次の作品『今昔物語集』巻二十九「羅城門登上層見死人盗人語第十八」"
        "に題材を取り、人間のエゴイズムについて"
        "作者自身の解釈を加えたものである。")
    # <channel><item><book:writer>要素 (独自名前空間を持つ要素)
    fe.book.writer({'name': "芥川 竜之介", 'id': "879"})  # 値は辞書型変数で渡す

    # フィードデータをRSSフォーマットに変換する (pretty=True で整形)
    return fg.rss_str(pretty=True)
Beispiel #6
0
    def get_xml(self: Serializer, response: Response) -> Tuple[str, int]:
        """
        Serialize the provided response data into Atom, version 1.0.

        Parameters
        ----------
        response : Response
            The search response data to be serialized.

        Returns
        -------
        data : str
            The serialized XML results.
        status
            The HTTP status code for the operation.

        """
        fg = FeedGenerator()
        fg.register_extension("arxiv",
                              ArxivExtension,
                              ArxivEntryExtension,
                              rss=False)
        fg.id("http://arxiv.org/rss/version=atom_1.0")
        archive = response.hits[0]["primary_classification"]["archive"]
        fg.title(archive["id"] + " updates on arXiv.org")
        fg.link(href='http://arxiv.org/rss/version=atom_1.0',
                rel='self',
                type='application/atom+xml')
        fg.updated(datetime.utcnow().replace(tzinfo=utc))

        # TODO - Try to remove generator element?  This doesn't work - code ignores "None"
        # fg.generator(None)
        # TODO - We don't currently set "subtitle", but could do it like this
        # fg.subtitle(
        #     f"{archive['name']} ({archive['id']}) updates on the arXiv.org e-print archive")

        # Add each search result "hit" to the feed
        for hit in response:
            entry = fg.add_entry()
            entry.id("http://arxiv.org/abs/" + hit['id'])
            entry.title(hit['title'])
            entry.summary(hit['abstract'])
            entry.published(hit['submitted_date'])
            entry.updated(hit['updated_date'])

            entry.link({
                "href": url_for("abs_by_id", paper_id=hit['id']),
                "type": "text/html"
            })
            pdf_link = {
                "title": "pdf",
                "rel": "related",
                "type": "application/pdf"
            }
            pdf_link["href"] = url_for("pdf_by_id", paper_id=hit['id'])
            entry.link(pdf_link)

            # Add categories
            categories = [hit['primary_classification'].to_dict()['category']]
            for dict in hit['secondary_classification']:
                categories += [dict['category'].to_dict()]
            for cat in categories:
                label = cat['name'] + " (" + cat['id'] + ")"
                category = {
                    "term": cat['id'],
                    "scheme": "http://arxiv.org/schemas/atom",
                    "label": label
                }
                entry.category(category)

            # Add arXiv-specific element "comment"
            if not hit['comments'].strip():
                entry.arxiv.comment(hit['comments'])

            # Add arXiv-specific element "journal_ref"
            if not hit['journal_ref'].strip():
                entry.arxiv.journal_ref(hit['journal_ref'])

            # Add arXiv-specific element "primary_category"
            prim_cat = hit['primary_classification'].to_dict()['category']
            label = prim_cat['name'] + " (" + prim_cat['id'] + ")"
            category = {
                "term": prim_cat['id'],
                "scheme": "http://arxiv.org/schemas/atom",
                "label": label
            }
            entry.arxiv.primary_category(category)

            # Add arXiv-specific element "doi"
            if hit['doi']:
                entry.arxiv.doi(hit['doi'])

            # Add each author
            for author in hit['authors']:
                author_list = {"name": author['full_name']}
                entry.author(author_list)
                # TODO - How can arxiv-specific affiliation elements be added to authors?

        data = fg.atom_str(pretty=True)
        status_code = status.HTTP_200_OK
        return data, status_code
Beispiel #7
0
    def get(self, mc, db, pkey):
        def check_encoding(string):
            data = string
            if string is not unicode:
                data = unicode(string)

            return ud.normalize('NFKD', data).encode('ascii', 'xmlcharrefreplace')

        try:
            # host URL
            urlparts = request.urlparts
            host_url = '%s://%s/feeds/%s' % (urlparts.scheme, urlparts.netloc, pkey)

            # get feed data
            cfg = self._app.config
            obj = FeedService.get_feed_activities(db, mc, cfg, pkey)
            activities = obj['activities']
            user_id = obj['user_id']

            # main element
            channel = FeedGenerator()
            channel.title('Plus Channel feed')
            channel.description('Google+ List of Activities for %s' % obj['name'])
            channel.generator('Plus Channel %s' % cfg.get('main.version'))
            channel.id('https://plus.google.com/' + user_id)
            channel.link(href=host_url, rel='self')
            channel.docs('')
            if 'photo_url' in obj and obj['photo_url'] is not None:
                channel.image(url=obj['photo_url'],
                              title='Plus Channel feed',
                              link='https://plus.google.com/' + user_id,
                              width=str(cfg.get('feed.photo_size.database')),
                              height=str(cfg.get('feed.photo_size.database')))

            # additional namespaces
            channel.register_extension('media', MediaExtension, MediaEntryExtension)
            channel.register_extension('geo', GeoExtension, GeoEntryExtension)

            # compose items
            h = HTMLParser.HTMLParser()
            for activity in activities:

                title = activity['title']
                content = activity['content']
                url = activity['url']

                # check content
                if content is None or content == title:
                    content = ''

                # check title
                if title is None:
                    title = 'notitle'

                # reformat strings
                title = h.unescape(title)
                title = re.sub('<[^>]*>', '', title)
                title = escape(title)
                content = h.unescape(content)
                content = re.sub('<[^>]*>', '', content)
                content = escape(content)

                # log activity
                logging.debug('--- activity ---')
                logging.debug(title)
                logging.debug(content)
                logging.debug(url)
                logging.debug('----------------')

                # create item
                item = channel.add_entry()
                item.title(check_encoding(title))
                item.pubdate(activity['datePublished'])

                # process content
                c_content = check_encoding(content)
                item.description(c_content)
                item.content(content=c_content, type='CDATA')

                # # check image presence
                if 'imageUrl' in activity and activity['imageUrl'] != '':
                    item.media.media_thumbnail_url(activity['imageUrl'])

                    # check size
                    if 'imageWidth' in activity and 'imageHeight' in activity:
                        item.media.media_thumbnail_width(activity['imageWidth'])
                        item.media.media_thumbnail_height(activity['imageHeight'])

                # check coordinates
                if activity['hasCoordinates']:
                    item.geo.geo_lat(activity['latitude'])
                    item.geo.geo_long(activity['longitude'])

                # check link
                if url is None or url == '':
                    url = activity['url']
                item.link(href=escape(url), rel='alternate')
                item.guid(escape(activity['id']))

            # return created feed
            response.set_header('content-type', 'application/rss+xml; charset=utf-8')
            out = channel.rss_str(pretty=True)
            del channel, activities, user_id, obj
            return out

        except FeedService.FeedNotFoundException:
            abort(404)

        except FeedService.UserIdNotFoundException:
            abort(410)