Example #1
0
def main():
    task = Task(dict())
    set_up_server()

  
    for ind in xrange(MAX_CON):
        sock = socket(AF_INET, SOCK_STREAM)
        con = Spin(sock)
        Client(con)

        xmap(con, CONNECT, on_connect)
        xmap(con, CONNECT_ERR, on_connect_err)
        con.connect_ex(('localhost', PORT))

        def job(data, event, args, index=ind):
            base = data.setdefault(event, list())
            base.append(('Con %s' % index, args[1]))
            return True

        task.gather(con, 
                         ('0', job),
                         ('1', job),
                         ('2', job),
                         ('3', job),
                         ('4', job),
                         ('5', job),
                         (CONNECT_ERR, lambda *args: True)) 



    xmap(task, COMPLETE, done)
Example #2
0
def scan(addr, min, max):
    task = Task()
    for ind in range(min, max):
        task.add(create_connection(addr, ind), CONNECT, CONNECT_ERR)

    task.start()    
    xmap(task, DONE, lambda task: die())
Example #3
0
def scan(addr, min, max):
    task = Task()
    for ind in range(min, max):
        task.add(create_connection(addr, ind), CONNECT, CONNECT_ERR)

    task.start()
    task.add_map(DONE, lambda task: die())
Example #4
0
def on_connect(con):
    Stdin(con)
    Stdout(con)
    Shrug(con)
    xmap(con, FOUND, lambda con, data: spawn(con, data, data))
    #xmap(con, FOUND, lambda con, data: sys.stdout.write('%s\n' % data))
    print 'connected'

    task = Task(dict())

    def job(data, event, args):
        base = data.setdefault(event, list())
        base.append(args[1])
        return True

    for ind in xrange(MAX_REQUEST):
        task.gather(con, ('%s' % ind, job))

    xmap(task, COMPLETE, done)
Example #5
0
def on_connect(con):
    Stdin(con)
    Stdout(con)
    Shrug(con)
    xmap(con, FOUND, lambda con, data: spawn(con, data, data))
    #xmap(con, FOUND, lambda con, data: sys.stdout.write('%s\n' % data))
    print 'connected' 

    task = Task(dict())
    
    def job(data, event, args):
        base = data.setdefault(event, list())
        base.append(args[1])
        return True

    for ind in xrange(MAX_REQUEST):
        task.gather(con, ('%s' % ind, job)) 

    xmap(task, COMPLETE, done)
Example #6
0
class Miner(list):
    task    = Task()
    task.add_map(DONE, lambda task: die())
    task.start()

    def __init__(self, url, pool=None, 
        headers=HEADERS, method='get', payload={}, auth=()):
        self.pool      = pool
        self.url       = url
        self.urlparser = urlparse(url)
        self.headers   = headers
        self.method    = method
        self.payload   = payload
        self.auth      = auth
        self.encoding  = 'utf-8'
        self.response  = None

        super(list, self).__init__()
        self.expand()

    def expand(self):
        """
        No exception being raised.
        """
        try:
            self.create_connection()
        except Exception as excpt:
            print(excpt)

    def setup(self, response):
        data = response.fd.read()
        
        # Reset the fd so it can be reread later.
        response.fd.seek(0)

        type = response.headers.get('content-type', 
        'text/html; charset=%s' % self.encoding)

        params = cgi.parse_header(type)

        # Sets the encoding for later usage
        # in self.geturl for example.
        self.encoding = params[1]['charset']
        self.response = response
        data          = data.decode(self.encoding, 'ignore')
        self.build_dom(data)

    def build_dom(self, data):
        pass

    def create_connection(self):
        if self.method == 'get':
            return Fetcher(self) 
        return Poster(self)

    def geturl(self, reference):
        """
        """
        
        # It is necessary to encode back the url
        # because websnake get method inserts the host header
        # with the wrong encoding and some web servers wouldnt
        # accept it as valid header.
        reference = reference
        urlparser = urlparse(reference)
        url       = urljoin('%s://%s' % (self.urlparser.scheme, 
        self.urlparser.hostname), reference) \
        if not urlparser.scheme else reference
        return url

    def next(self, reference):
        self.url       = self.geturl(reference)
        self.urlparser = urlparse(self.url)
        self.expand()

    def run(self, dom):
        """
        Implement your rules here.
        """

        pass
Example #7
0
    con.dump("GET / HTTP/1.1\r\n")
    con.dump("Host: %s\r\n" % url)
    con.dump("Connection: TE, close\r\n")
    con.dump("User-Agent: UntwistedDownload/1.0\r\n\r\n")


def on_close(con, err, url):
    with open(url, "w") as fd:
        fd.write(con.accumulator.data)


def done(task, data):
    raise Kill


task = Task(dict())
xmap(task, COMPLETE, done)

job = lambda data, event, args: True

for ind in urls:
    con = Spin()
    Client(con)
    con.connect_ex((ind, 80))
    xmap(con, CONNECT, on_connect, ind)
    xmap(con, CLOSE, lambda con, err: lose(con))
    xmap(con, CLOSE, on_close, ind)
    xmap(con, CONNECT_ERR, lambda con, err: lose(con))
    task.gather(con, (CLOSE, job), (CONNECT_ERR, job))

core.gear.mainloop()
Example #8
0
    con.dump('GET / HTTP/1.1\r\n')
    con.dump('Host: %s\r\n' % url)
    con.dump('Connection: TE, close\r\n')
    con.dump('User-Agent: UntwistedDownload/1.0\r\n\r\n')


def on_close(con, err, url):
    with open(url, 'w') as fd:
        fd.write(con.accumulator.data)


def done(task, data):
    raise Kill


task = Task(dict())
xmap(task, COMPLETE, done)

job = lambda data, event, args: True

for ind in urls:
    con = Spin()
    Client(con)
    con.connect_ex((ind, 80))
    xmap(con, CONNECT, on_connect, ind)
    xmap(con, CLOSE, lambda con, err: lose(con))
    xmap(con, CLOSE, on_close, ind)
    xmap(con, CONNECT_ERR, lambda con, err: lose(con))
    task.gather(con, (CLOSE, job), (CONNECT_ERR, job))

core.gear.mainloop()
Example #9
0
class Miner(list):
    task = Task()
    task.add_map(DONE, lambda task: die())
    task.start()

    def __init__(self,
                 url,
                 headers=default_headers,
                 args={},
                 method='get',
                 payload=None,
                 auth=None,
                 attempts=5):
        """
        Resource
            Param: url

        Headers to be send.        
            Param: headers

        Url query.
            Param: args

        The HTTP method.
            Param: method

        The payload data in case of method is 'post'.
            Param: payload

        Authentication user/pass.
            Param: auth

        The number of times a given url should be tried
        in case of corrupted response.
            Param: attempts.
        
        """
        self.url = url
        self.auth = auth
        self.args = args

        self.encoding = 'utf-8'
        self.response = None
        self.headers = headers
        self.payload = payload
        self.method = method
        self.attempts = attempts
        self.urlparser = urlparse(url)

        super(list, self).__init__()

        self.next(self.url)

    def setup(self, response):
        # Reset the fd so it can be reread later.
        data = response.fd.read()
        response.fd.seek(0)

        type = response.headers.get('content-type',
                                    'text/html; charset=%s' % self.encoding)

        # Sets the encoding for later usage
        # in self.geturl for example.
        params = cgi.parse_header(type)
        self.encoding = params[1]['charset']
        self.response = response

        data = data.decode(self.encoding, 'ignore')
        self.build_dom(data)

    def build_dom(self, data):
        pass

    def handle_success(self, request, response):
        self.setup(response)

    def fetcher(self):
        request = Get(self.url,
                      headers=self.headers,
                      auth=self.auth,
                      attempts=self.attempts)

        self.task.add(request, ResponseHandle.ERROR, ResponseHandle.DONE)
        request.add_map('200', self.handle_success)
        return request

    def poster(self):
        request = Post(self.url,
                       headers=self.headers,
                       payload=self.payload,
                       auth=self.auth,
                       attempts=self.attempts)

        self.task.add(request, ResponseHandle.ERROR, ResponseHandle.DONE)
        request.add_map('200', self.handle_success)
        return request

    def geturl(self, reference):
        """
        """

        urlparser = urlparse(reference)
        if not urlparser.scheme:
            return urljoin(
                '%s://%s' % (self.urlparser.scheme, self.urlparser.hostname),
                reference)
        return reference

    def next(self, reference):
        self.url = self.geturl(reference)
        self.urlparser = urlparse(self.url)

        if self.method == 'get':
            return self.fetcher()
        return self.poster()

    def run(self, dom):
        """
        Implement your rules here.
        """

        pass
Example #10
0
from untwisted.core import die
from untwisted import core
from untwisted.task import Task
from untwisted.job import Job, DONE
import time


def sum(x, y):
    time.sleep(3)
    return x + y


def show(job, result):
    print(result)


task = Task()
# Tell the task it can start trigging events.
task.start()

for ind in range(100):
    job = Job(sum, ind, 1000)
    job.add_map(DONE, show)
    task.add(job, DONE)

task.add_map(DONE, lambda task: die())
core.gear.mainloop()
Example #11
0
class Download(object):
    task = Task(dict())
    job = lambda self, data, event, args: True

    def __init__(self, addr, rsc):
        self.addr = addr
        self.rsc = rsc

        con = Spin()
        Client(con)
        con.connect_ex((addr, 80))

        xmap(con, CONNECT, self.on_connect)
        xmap(con, CLOSE, lambda con, err: lose(con))
        # The event CLOSE is binded to self.on_close before
        # we call self.task.gather on con, it needs to be so
        # otherwise it might happen of the last Download
        # instance to finish spawn CLOSE and the COMPLETE
        # event be spawned too early. It needs first 
        # check whether there is a Location http key in the
        # headers.
        xmap(con, CLOSE, self.on_close)
        
        xmap(con, CONNECT_ERR, lambda con, err: lose(con))
        
        self.task.gather(con, (CLOSE, self.job),
                         (CONNECT_ERR, self.job))

    def on_connect(self, con):
        Stdin(con)
        Stdout(con)
        Accumulator(con)
        
        con.dump(HTTP_HEADER % (self.rsc, self.addr))
    
    def on_close(self, con, err):
        data = con.accumulator.data

        try:
        # It might happen of the webserver
        # sending only the http header then splitting up 
        # will raise an exception.
            header, data = data.split('\r\n\r\n', 1)
        # I lower all letters so i don't have to worry
        # when indexing location in the dict.
            header = header.lower()
        # It builds the http header.
            header = findall(REG_HEADER, header)
            header = dict(header)
        # If it occurs of the document having moved
        # to other place then we follow the link.
            addr = header['location']
            _, addr = addr.split('//')
            addr, rsc = addr.split('/', 1)
        # After properly extracting the new documment
        # address we download it.
            Download(addr, rsc)
        except:
        # If some exception occured all what we want is saving
        # what we have in hands.
            with open(self.addr, 'w') as fd:
                fd.write(data)