コード例 #1
0
ファイル: scrapers_run.py プロジェクト: fedecarles/ASMD
#!/usr/bin/python
# -*- coding: utf-8 -*-

from scrapers import Scraper, removeJunk

sc = Scraper()
sc.scrapeInfobae("http://cdn01.ib.infobae.com/adjuntos/162/rss/politica.xml")
sc.scrapeLaNacion("http://contenidos.lanacion.com.ar/herramientas/rss-categoria_id=30")
sc.scrapeClarin("http://www.clarin.com/rss/politica/")
sc.scrapePagina12("http://www.pagina12.com.ar/diario/rss/ultimas_noticias.xml")
sc.scrapePerfil("http://www.perfil.com/rss/politica.xml")
sc.scrapeInfonews("http://www.infonews.com/rss/politica.xml")
sc.scrapeMendozaOnline("http://www.mdzol.com/files/rss/politica.xml")
sc.scrapeTelam("http://www.telam.com.ar/rss2/politica.xml")
sc.scrapeLosAndes("http://losandes.com.ar/rss/politica")
sc.scrapeLaVoz("http://www.lavoz.com.ar/taxonomy/term/4/1/feed")


removeJunk()
コード例 #2
0
ファイル: __init__.py プロジェクト: ssokolow/fanfic2ebook
def main():
    from optparse import OptionParser, OptionGroup

    descr  = ("A simple tool for archiving fanfiction for offline reading " +
    "and converting said archives into ready-to-read eBooks for pocket " +
    "reading devices.")

    epilog = ("As an alternative to explicitly specifying a personality, " +
    "this command will alter its behaviour if called by the following names:" +
    " " + ', '.join(sorted(Personality.personalities)))

    parser = OptionParser(version="%%prog v%s" % __version__,
        usage="%prog [options] <url> ...", description=descr, epilog=epilog)
    parser.add_option('-b', '--bundle', action="store_true", dest="bundle",
        default=False, help="Also bundle the entire story into a single file" +
                            "with chapter headings and a table of contents.")
    parser.add_option('-t', '--target', action="store", dest="target", metavar="DIR",
        default=os.getcwd(), help="Specify a target directory other than the current working directory.")
    parser.add_option('--list_supported', action="store_true", dest="list_supported",
        default=False, help="List installed scrapers and personalities.")
    parser.add_option('-P', '--personality', action="store", dest="persona", metavar="NAME",
        default=None, help="Set the personality the conversion will operate under. See --list_supported.")

    #pre_group = OptionGroup(parser, "Pre-Processing Options")
    #pre_group.add_option('--strip-accents', action="store_true", dest="strip_accents",
    #    default=False, help="Remove diacritics for compatibility with readers with " +
    #    "limited fonts and no internal fallback mechanism. (eg. Sony PRS-505)")

    pp_group = OptionGroup(parser, "Post-Processing Options")
    pp_group.add_option('-p', '--postproc', action="append", dest="postproc", metavar="CMD",
        default=[], help="Call the specified post-processor after each retrieval " +
                         "completes. Can be used multiple times. Implies --bundle.")
    pp_group.add_option('-e', '--final_ext', action="store", dest="final_ext", metavar="EXT",
        default='.out', help="Set the extension to be used in the output filename " +
                           "available to post-processor templates.")
    parser.add_option_group(pp_group)

    opts, args = parser.parse_args()
    cmd = parser.get_prog_name()

    if opts.list_supported:
        names = sorted(Scraper.scrapers[x].site_name for x in Scraper.scrapers)
        print "Scrapers:\n\t" + '\n\t'.join(names)
        print
        print "Personalities:\n\t" + '\n\t'.join(sorted(Personality.personalities))
        parser.exit()

    if not args:
        parser.print_help()
        parser.exit()

    persona = Personality.get(opts.persona or cmd)()
    for option in persona.opts:
        setattr(opts, option, persona.opts[option])

    if opts.postproc:
        opts.bundle = True

    for url_arg in args:
        scraper = Scraper.get(url_arg)(opts.target, opts.bundle, opts.final_ext)
        try:
            downloaded_story = scraper.download_fic(url_arg)
        except Exception, err:
            print "Failed to retrieve story %s" % url_arg
            print "TODO: Handle this properly"
            continue

        persona.postproc(downloaded_story)

        if opts.postproc:
            inputs = {
                'appname'   : "%s v%s" % (__appname__, __version__),
                'author'    : downloaded_story.author,
                'bundle'    : downloaded_story.path,
                'category'  : downloaded_story.category,
                'coverfile' : downloaded_story.cover,
                'outfile'   : downloaded_story.final_path,
                'site_name' : downloaded_story.site_name,
                'title'     : downloaded_story.title
            }

            for pp_cmdline in opts.postproc:
                cmdlist = pp_cmdline.strip().split()
                print "Calling post-processor: %s" % cmdlist[0]
                subprocess.call([r % inputs for r in cmdlist])