Esempio n. 1
0
def process_page(html, url):
    # Site specific hacks
    log.debug("Called process_page on %s" % (url))
    if url.startswith("http://youtube.com/watch") or url.startswith("http://www.youtube.com/watch"):
        log.debug("Calling fix_youtube")
        html, url = fix_youtube(html, url)
    elif url.startswith("http://maps.google.com"):
        log.debug("Calling fix_google_maps")
        html, url = fix_google_maps(html, url)
    elif url.startswith("http://gallery.mac.com/"):
        log.debug("Calling fix_mac_gallery")
        html, url = fix_mac_gallery(html, url)
    elif url.startswith("http://www.techcrunch.com/2008/07/03/flowgram-reinvents-the-screencast-1000-beta-invites"):
        html, url = fix_techcrunch_flowgram_article(html, url)
    # elif url == "http://www.flowgram.com/" or url == "http://www.flowgram.com" or url == "http://dev.flowgram.com" or url == "http://dev.flowgram.com/" :
    # log.debug('Calling fix_flowgram_own_homepage')
    # html, url = fix_flowgram(html, url)
    # elif url.startswith("http://www.flowgram.com/fg/"):
    # log.debug('Calling fix_flowgram_widget_fg')
    # html, url = fix_flowgram_widget_fg(html, url)

    html, url = remove_self_targets(html, url)
    html, url = add_base(html, url)
    html, url = add_base_target(html, url)
    html, url = add_page_css(html, url)
    html = remove_script_tags(html, url)

    return (html, url)
Esempio n. 2
0
def import_rss(flowgram, rss_url, type, options):
    if rss_url.startswith("feed://"):
        rss_url = "http://" + rss_url[7:]

    parsed = feedparser.parse(rss_url)
    if not parsed.has_key("status"):
        raise Http404
    else:
        if parsed["status"] == "404":
            raise Http404

    is_atom = not not parsed["feed"]

    if is_atom:
        # If ATOM
        channelTitle = parsed["feed"].get("title", "")
        channelDescription = ""
    else:
        # If RSS
        channelTitle = parsed.channel.title
        channelDescription = parsed.channel.description

    flowgram_changed = False
    if not flowgram.title or flowgram.title.lower() == "untitled":
        flowgram.title = channelTitle
        flowgram_changed = True
    if not flowgram.description:
        flowgram.description = channelDescription
        flowgram_changed = True

    if flowgram_changed:
        flowgram.save()

    pages = []

    items = parsed["items"]
    if options["max_results"]:
        max_results = options["max_results"]
        items = items[:max_results]

    if type == "linkedarticles" or type == "articlesummaries":
        required_keys = ["title", "link", "description"]
        for item in items:
            for rkey in required_keys:
                if not item.has_key("title"):
                    title = ""
                elif item.has_key("title"):
                    title = item.title
                if not item.has_key("link"):
                    link = ""
                elif item.has_key("link"):
                    link = item.link
                if not item.has_key("description"):
                    description = ""
                elif item.has_key("description"):
                    description = item.description

            # need to check whether the 'title,link,description' attribute exists

            if type == "linkedarticles":
                page = Page.objects.create(
                    flowgram=flowgram,
                    owner=flowgram.owner,
                    title=title,
                    source_url=link,
                    position=controller.get_next_position(flowgram),
                )

                controller.add_default_time(page)

                AddPageRequest.objects.create(flowgram=flowgram, url=link, page=page)

                pages.append(encode.page.to_dict(page))
            elif type == "articlesummaries":
                # Create and save the page:
                page = Page.objects.create(title=title, source_url=link)
                page.save()

                context = Context({"title": title, "url": link, "description": remove_script_tags(description)})
                template = loader.get_template("importers/rss.html")

                html = "%s" % template.render(context)
                page = controller.create_page_to_flowgram(flowgram, page, html)

                pages.append(encode.page.to_dict(page))
    elif type == "singlesummarypage":
        # Create and save the page:
        page = Page.objects.create(title=channelTitle, source_url=rss_url)
        page.save()

        context = Context({"title": channelTitle, "items": items})
        template = loader.get_template("importers/rss_singlesummarypage.html")

        html = "%s" % template.render(context)
        page = controller.create_page_to_flowgram(flowgram, page, html)

        pages.append(encode.page.to_dict(page))

    if len(pages) == 0:
        raise Http404
    return data_response.create(options.get("enc", "json"), "ok", pages)