Exemple #1
0
def run(conf, env, type):

    global tasks

    pool = Threadpool(multiprocessing.cpu_count())
    while tasks.get(type):
        pool.add_task(partial(tasks[type].pop(), conf, env))

    pool.wait_completion()
Exemple #2
0
def validate(paths, jobs):
    """Validates a list of urls using up to N threads.

    :param paths: a list of HTML files where we search for a-href's
    :param jobs: numbers of threads used to send I/O requests"""

    ahref = re.compile(r'<a [^>]*href="([^"]+)"[^>]*>.*?</a>')
    visited, urls = set(), collections.defaultdict(list)

    def check(url, path):
        """A HEAD request to URL.  If HEAD is not allowed, we try GET."""

        try:
            get(url, timeout=10)
        except HTTPError as e:
            if e.code == 405:
                try:
                    get(url, path, 'GET', True)
                except URLError as e:
                    print '  ' + yellow(e.reason), url
                    print white('  -- ' + path)
            else:
                print '  ' + red(e.code), url
                print white('  -- ' + path)
        except URLError as e:
            print '  ' + yellow(e.reason), url
            print white('  -- ' + path)

    # -- validation
    for path in paths:

        with io.open(path, 'r') as fp:
            data = fp.read()

        for match in ahref.finditer(data):
            a = match.group(1)
            if a.startswith(('http://', 'https://')):
                if a not in visited:
                    visited.add(a)
                    urls[path].append(a)

    print
    print "Trying", blue(len(visited)), "links..."
    print

    pool = Threadpool(jobs)
    for path in urls:
        for url in urls[path]:
            pool.add_task(check, *[unescape(url), path])

    try:
        pool.wait_completion()
    except KeyboardInterrupt:
        sys.exit(1)
Exemple #3
0
def run(conf, env, options):
    """Subcommand: ping -- notify external resources via Pingback etc."""

    commands.initialize(conf, env)
    entrylist = [entry for entry in readers.load(conf)[0] if not entry.draft]

    if options.file:
        try:
            entrylist = [filter(lambda e: e.filename == options.file, entrylist)[0]]
        except IndexError:
            raise AcrylamidException("no such post!")

    if options.service == 'twitter':

        if twitter is None:
            raise AcrylamidException("'twitter' egg not found")

        for entry in entrylist if options.all else entrylist[:options.max or 1]:
            tweet(entry, conf, options.dryrun)

        return

    # XXX we should search for actual hrefs not random grepping, but this
    # requires access to the cache at non-runtime which is unfortunately
    # not possible yet.

    patterns = [
        r'(?<=\n)\[.*?\]:\s?(https?://.+)$',  # referenced markdown
        r'\[[^\]]+\]\((https?://[^\)]+)\)',  # inline markdown
        r'(?<=\n)\.\.\s+[^:]+:\s+(https?://.+)$',  # referenced docutils
        r'`[^<]+ <(https?://[^>]+)>`_',  # inline docutils
    ]

    pool = Threadpool(options.jobs)
    ping = lambda src, dest: pingback(helpers.joinurl(conf['www_root'], src), dest, options.dryrun)

    for entry in entrylist if options.all else entrylist[:options.max or 1]:

        for href in sum([re.findall(pat, entry.source, re.M) for pat in patterns], []):
            pool.add_task(ping, *[entry.permalink, href])

        try:
            pool.wait_completion()
        except KeyboardInterrupt:
            sys.exit(1)
Exemple #4
0
def run(conf, env, type):

    global tasks

    pool = Threadpool(multiprocessing.cpu_count())
    while tasks.get(type):
        pool.add_task(partial(tasks[type].pop(), conf, env))

    pool.wait_completion()
Exemple #5
0
def validate(paths, jobs):
    """Validates a list of urls using up to N threads.

    :param paths: a list of HTML files where we search for a-href's
    :param jobs: numbers of threads used to send I/O requests"""

    ahref = re.compile(r'<a [^>]*href="([^"]+)"[^>]*>.*?</a>')
    visited, urls = set(), collections.defaultdict(list)

    def check(url, path):
        """A HEAD request to URL.  If HEAD is not allowed, we try GET."""

        try:
            get(url, timeout=10)
        except HTTPError as e:
            if e.code == 405:
                try:
                    get(url, path, 'GET', True)
                except URLError as e:
                    print('  ' + yellow(e.reason), url)
                    print(white('  -- ' + path))
            else:
                print('  ' + red(e.code), url)
                print(white('  -- ' + path))
        except URLError as e:
            print('  ' + yellow(e.reason), url)
            print(white('  -- ' + path))

    # -- validation
    for path in paths:

        with io.open(path, 'r', encoding='utf-8') as fp:
            data = fp.read()

        for match in ahref.finditer(data):
            a = match.group(1)
            if a.startswith(('http://', 'https://')):
                if a not in visited:
                    visited.add(a)
                    urls[path].append(a)

    print()
    print("Trying", blue(len(visited)), "links...")
    print()

    pool = Threadpool(jobs)
    for path in urls:
        for url in urls[path]:
            pool.add_task(check, *[unescape(url), path])

    try:
        pool.wait_completion()
    except KeyboardInterrupt:
        sys.exit(1)
Exemple #6
0
def run(conf, env, options):
    """Subcommand: ping -- notify external resources via Pingback etc."""

    commands.initialize(conf, env)
    entrylist = [entry for entry in readers.load(conf)[0] if not entry.draft]

    if options.file:
        try:
            entrylist = [
                filter(lambda e: e.filename == options.file, entrylist)[0]
            ]
        except IndexError:
            raise AcrylamidException("no such post!")

    if options.service == 'twitter':

        if twitter is None:
            raise AcrylamidException("'twitter' egg not found")

        for entry in entrylist if options.all else entrylist[:options.
                                                             max or 1]:
            tweet(entry, conf, options.dryrun)

        return

    # XXX we should search for actual hrefs not random grepping, but this
    # requires access to the cache at non-runtime which is unfortunately
    # not possible yet.

    patterns = [
        r'(?<=\n)\[.*?\]:\s?(https?://.+)$',  # referenced markdown
        r'\[[^\]]+\]\((https?://[^\)]+)\)',  # inline markdown
        r'(?<=\n)\.\.\s+[^:]+:\s+(https?://.+)$',  # referenced docutils
        r'`[^<]+ <(https?://[^>]+)>`_',  # inline docutils
    ]

    pool = Threadpool(options.jobs)
    ping = lambda src, dest: pingback(helpers.joinurl(conf['www_root'], src),
                                      dest, options.dryrun)

    for entry in entrylist if options.all else entrylist[:options.max or 1]:

        for href in sum(
            [re.findall(pat, entry.source, re.M) for pat in patterns], []):
            pool.add_task(ping, *[entry.permalink, href])

        try:
            pool.wait_completion()
        except KeyboardInterrupt:
            sys.exit(1)
Exemple #7
0
def initialize(conf, env):

    global pool

    hooks, blocks = conf.get('hooks', {}), not conf.get('hooks_mt', True)
    pool = Threadpool(1 if blocks else multiprocessing.cpu_count(),
                      wait=blocks)

    force = env.options.force
    normalize = lambda path: path.replace(conf['output_dir'], '')

    for pattern, action in iteritems(hooks):
        if isinstance(action, (types.FunctionType, string_types)):
            event.register(
                callback=partial(simple, pool, pattern, normalize, action),
                to=['create', 'update'] if not force else event.events)
        else:
            event.register(callback=partial(advanced, pool, pattern, force,
                                            normalize, *action),
                           to=event.events)

    discover([conf.get('HOOKS_DIR', 'hooks/')], lambda x: x)