コード例 #1
0
ファイル: notchkd.py プロジェクト: rms-abelov/webcheck
def generate(crawler):
    """Output the list of not checked pages."""
    session = Session()
    links = session.query(Link).filter(Link.yanked != None).order_by(Link.url)
    links = links.options(joinedload(Link.linkproblems))
    render(__outputfile__, crawler=crawler, title=__title__, links=links)
    session.close()
コード例 #2
0
ファイル: urllist.py プロジェクト: rms-abelov/webcheck
def generate(crawler):
    """Output a sorted list of URLs."""
    session = Session()
    links = session.query(Link).order_by(Link.url)
    render(__outputfile__, crawler=crawler, title=__title__,
           links=links)
    session.close()
コード例 #3
0
ファイル: badlinks.py プロジェクト: rms-abelov/webcheck
def generate(crawler):
    """Present the list of bad links."""
    session = Session()
    links = session.query(Link).filter(Link.linkproblems.any())
    links = links.order_by(Link.url).options(joinedload(Link.linkproblems))
    render(__outputfile__, crawler=crawler, title=__title__, links=links)
    session.close()
コード例 #4
0
ファイル: notitles.py プロジェクト: rms-abelov/webcheck
def generate(crawler):
    """Output the list of pages without a title."""
    session = Session()
    links = session.query(Link).filter_by(is_page=True, is_internal=True)
    links = links.filter((char_length(Link.title) == 0)
                         | (Link.title == None)).order_by(Link.url)
    render(__outputfile__, crawler=crawler, title=__title__, links=links)
    session.close()
コード例 #5
0
ファイル: external.py プロジェクト: rms-abelov/webcheck
def generate(crawler):
    """Generate the list of external links."""
    session = Session()
    links = session.query(Link).filter(Link.is_internal != True).order_by(Link.url)
    links = links.options(joinedload(Link.linkproblems))
    render(__outputfile__, crawler=crawler, title=__title__,
           links=links)
    session.close()
コード例 #6
0
ファイル: badlinks.py プロジェクト: arthurdejong/webcheck
def generate(crawler):
    """Present the list of bad links."""
    session = Session()
    links = session.query(Link).filter(Link.linkproblems.any())
    links = links.order_by(Link.url).options(joinedload(Link.linkproblems))
    render(__outputfile__, crawler=crawler, title=__title__,
           links=links)
    session.close()
コード例 #7
0
ファイル: notchkd.py プロジェクト: arthurdejong/webcheck
def generate(crawler):
    """Output the list of not checked pages."""
    session = Session()
    links = session.query(Link).filter(Link.yanked != None).order_by(Link.url)
    links = links.options(joinedload(Link.linkproblems))
    render(__outputfile__, crawler=crawler, title=__title__,
           links=links)
    session.close()
コード例 #8
0
ファイル: old.py プロジェクト: arthurdejong/webcheck
def generate(crawler):
    """Output the list of outdated pages to the specified file descriptor."""
    session = Session()
    oldtime = datetime.datetime.now() - datetime.timedelta(days=config.REPORT_WHATSOLD_URL_AGE)
    links = session.query(Link).filter_by(is_page=True, is_internal=True)
    links = links.filter(Link.mtime < oldtime).order_by(Link.mtime)
    render(__outputfile__, crawler=crawler, title=__title__,
           links=links, now=datetime.datetime.now())
    session.close()
コード例 #9
0
ファイル: new.py プロジェクト: rms-abelov/webcheck
def generate(crawler):
    """Output the list of recently modified pages."""
    session = Session()
    newtime = datetime.datetime.now() - datetime.timedelta(days=config.REPORT_WHATSNEW_URL_AGE)
    links = session.query(Link).filter_by(is_page=True, is_internal=True)
    links = links.filter(Link.mtime > newtime).order_by(Link.mtime.desc())
    render(__outputfile__, crawler=crawler, title=__title__,
           links=links, now=datetime.datetime.now())
    session.close()
コード例 #10
0
ファイル: notitles.py プロジェクト: arthurdejong/webcheck
def generate(crawler):
    """Output the list of pages without a title."""
    session = Session()
    links = session.query(Link).filter_by(is_page=True, is_internal=True)
    links = links.filter((char_length(Link.title) == 0) |
                         (Link.title == None)).order_by(Link.url)
    render(__outputfile__, crawler=crawler, title=__title__,
           links=links)
    session.close()
コード例 #11
0
ファイル: sitemap.py プロジェクト: rms-abelov/webcheck
def generate(crawler):
    """Output the sitemap."""
    session = Session()
    links = [
        session.query(Link).filter_by(url=url).first()
        for url in crawler.base_urls
    ]
    links = explore(links)
    render(__outputfile__, crawler=crawler, title=__title__, links=links)
コード例 #12
0
ファイル: size.py プロジェクト: arthurdejong/webcheck
def generate(crawler):
    """Output the list of large pages."""
    session = Session()
    links = session.query(Link).filter_by(is_page=True, is_internal=True)
    links = [x for x in links
             if get_size(x) >= config.REPORT_SLOW_URL_SIZE * 1024]
    links.sort(lambda a, b: cmp(b.total_size, a.total_size))
    render(__outputfile__, crawler=crawler, title=__title__,
           links=links)
    session.close()
コード例 #13
0
ファイル: images.py プロジェクト: rms-abelov/webcheck
def generate(crawler):
    """Generate a list of image URLs that were found."""
    session = Session()
    # get non-page links that have an image/* mimetype
    links = session.query(Link)
    links = links.filter((Link.is_page != True) | (Link.is_page == None))
    links = links.filter(Link.mimetype.startswith('image/'))
    links = links.order_by(Link.url)
    render(__outputfile__, crawler=crawler, title=__title__, links=links)
    session.close()
コード例 #14
0
def generate(crawler):
    """Output the list of large pages."""
    session = Session()
    links = session.query(Link).filter_by(is_page=True, is_internal=True)
    links = [
        x for x in links if get_size(x) >= config.REPORT_SLOW_URL_SIZE * 1024
    ]
    links.sort(lambda a, b: cmp(b.total_size, a.total_size))
    render(__outputfile__, crawler=crawler, title=__title__, links=links)
    session.close()
コード例 #15
0
ファイル: old.py プロジェクト: rms-abelov/webcheck
def generate(crawler):
    """Output the list of outdated pages to the specified file descriptor."""
    session = Session()
    oldtime = datetime.datetime.now() - datetime.timedelta(
        days=config.REPORT_WHATSOLD_URL_AGE)
    links = session.query(Link).filter_by(is_page=True, is_internal=True)
    links = links.filter(Link.mtime < oldtime).order_by(Link.mtime)
    render(__outputfile__,
           crawler=crawler,
           title=__title__,
           links=links,
           now=datetime.datetime.now())
    session.close()
コード例 #16
0
ファイル: problems.py プロジェクト: arthurdejong/webcheck
def generate(crawler):
    """Output the overview of problems per author."""
    session = Session()
    # make a list of problems per author
    problem_db = collections.defaultdict(list)
    # get internal links with page problems
    links = session.query(Link).filter_by(is_internal=True)
    links = links.filter(Link.pageproblems.any()).order_by(Link.url)
    for link in links:
        author = link.author.strip() if link.author else u'Unknown'
        problem_db[author].append(link)
    # get a sorted list of authors
    authors = problem_db.keys()
    authors.sort()
    authors = [(x, problem_db[x]) for x in authors]
    render(__outputfile__, crawler=crawler, title=__title__,
           authors=authors, mk_id=mk_id)
    session.close()
コード例 #17
0
ファイル: problems.py プロジェクト: rms-abelov/webcheck
def generate(crawler):
    """Output the overview of problems per author."""
    session = Session()
    # make a list of problems per author
    problem_db = collections.defaultdict(list)
    # get internal links with page problems
    links = session.query(Link).filter_by(is_internal=True)
    links = links.filter(Link.pageproblems.any()).order_by(Link.url)
    for link in links:
        author = link.author.strip() if link.author else u'Unknown'
        problem_db[author].append(link)
    # get a sorted list of authors
    authors = problem_db.keys()
    authors.sort()
    authors = [(x, problem_db[x]) for x in authors]
    render(__outputfile__,
           crawler=crawler,
           title=__title__,
           authors=authors,
           mk_id=mk_id)
    session.close()
コード例 #18
0
def generate(crawler):
    """Output a list of modules, it's authors and the webcheck version."""
    session = Session()
    render(__outputfile__, crawler=crawler, title=__title__,
           numlinks=session.query(Link).count())
    session.close()