Exemple #1
0
from jsb.lib.examples import examples
from jsb.plugs.common.tinyurl import get_tinyurl
from jsb.lib.persistconfig import PersistConfig

## basic import

import re
import urlparse
import xmlrpclib
import socket
import logging

## defines

cfg = PlugPersist('urlinfo', {})
plugcfg = PersistConfig()
plugcfg.define("showpictures", 1)

## sanitize function


def sanitize(text):
    """ Remove non-urls word by word. """
    text = text.strip()
    text = re.sub('\s\s+', ' ', text)
    tmp = ''
    for i in text.split(' '):
        if len(i) >= 5:
            if i.find('www.') != -1 or i.find('http') != -1: tmp += i + ' '
    tmp = tmp.strip()
    tmp2 = ''
Exemple #2
0
from jsb.lib.persistconfig import PersistConfig
from jsb.lib.plugins import plugs as plugins

## basic imports

import urllib
import urllib2
import urlparse
import copy
import re
import socket

## defines

cfg = PlugPersist('snarf.cfg')
pcfg = PersistConfig()
pcfg.define('allow', ['text/plain', 'text/html', 'application/xml'])

re_html_title = re.compile(u'<title>(.*?)</title>', re.I | re.M | re.S)

re_url_match = re.compile(u'((?:http|https)://\S+)')

re_html_valid = {
    'result':
    re.compile('(Failed validation, \d+ errors?|Passed validation)',
               re.I | re.M),
    'modified':
    re.compile('<th>Modified:</th><td colspan="2">([^<]+)</td>', re.I | re.M),
    'server':
    re.compile('<th>Server:</th><td colspan="2">([^<]+)</td>', re.I | re.M),
    'size':