Ejemplo n.º 1
0
class TestRecordConstructor(object):
    Point2D = define_record('Point2D', ['x', 'y'])

    def __init__(self):
        self.cls = self.Point2D

    def test_module_name(self):
        assert self.Point2D.__module__ == __name__

    @raises(TypeError)
    def test_invalid_slot(self):
        self.cls(x=1, z=2)

    @raises(TypeError)
    def test_no_x(self):
        self.cls(y=2)

    @raises(TypeError)
    def test_no_y(self):
        self.cls(x=1)

    @raises(TypeError)
    def test_no_args(self):
        self.cls()

    def test_no_kwargs(self):
        a = self.cls(1, 2)
        assert a.x == 1 and a.y == 2
Ejemplo n.º 2
0
class TestRecordOperations(object):
    Point2D = define_record('Point2D', ['x', 'y'])

    def __init__(self):
        self.cls = self.Point2D
        self.a = self.cls(x=1, y=2)
        self.b = self.cls(x=100, y=200)

    def test_repr(self):
        assert repr(self.a) == 'Point2D(x=1, y=2)'

    def test_str(self):
        assert str(self.a) == repr(self.a)

    def test_iter(self):
        assert list(iter(self.a)) == [self.a.x, self.a.y]

    def test_equality(self):
        c = self.cls(x=self.a.x, y=self.a.y)
        assert (self.a == c) == True
        assert (self.a == self.b) == False
        assert (self.a != c) == False
        assert (self.a != self.b) == True

    def test_index(self):
        assert_raises(IndexError, lambda: self.a[2])
        assert_raises(IndexError, lambda: self.a[-3])

        assert self.a[0] == self.a.x == 1 == self.a[-2]
        assert self.a[1] == self.a.y == 2 == self.a[-1]

    def test_len(self):
        assert len(self.a) == 2
        assert len(self.b) == 2

    @raises(pickle.PicklingError)
    def test_pickle_notglobal(self):
        """ Test that if the class doesn't share a global name,
        picklig fails."""

        assert not hasattr(__name__, self.a.__class__.__name__)

        s = pickle.dumps(self.a)
        pickle.loads(s)

    def test_pickle(self):
        assert not hasattr(__name__, 'Point2D')

        global Point2D
        Point2D = self.cls
        try:
            s = pickle.dumps(self.a)
            c = pickle.loads(s)
            assert self.a == c
        finally:
            del Point2D
Ejemplo n.º 3
0
import random
from PersistentQueue import define_record, RecordFIFO, b64

if __name__ == '__main__':
    import URL

    UrlParts = define_record("UrlParts", "scheme hostname port relurl")
    f = RecordFIFO(UrlParts, (str, str, str, b64), "url_parts")

    for line in random.sample(open("urls").readlines(), 100000):
        line = line.strip()
        try:
            parts = URL.get_parts(line)
        except URL.BadFormat, exc:
            print exc
            continue
        f.put(*parts)

    f.close()
Ejemplo n.º 4
0
 def setUp(self):
     self.cls = define_record('Point2D', ['x', 'y'])
Ejemplo n.º 5
0
import shutil
from time import time
import itertools

from PersistentQueue import b64, Static, JSON, RecordFIFO, define_record


def rmdir(dir):
    try:
        shutil.rmtree(dir)
    except EnvironmentError, e:
        if e.errno != errno.ENOENT:
            raise


MyRec = define_record(
    "MyRec", ("next", "score", "depth", "data", "hostkey", "foo", "dog"))


def get_fifo(test_dir):
    "make a factory for testing"
    return RecordFIFO(MyRec,
                      (int, float, int, b64,
                       Static("http://www.wikipedia.com"), Static(None), JSON),
                      test_dir,
                      defaults={
                          "next": 0,
                          "score": 0,
                          "data": "did we survive b64ing?",
                          "dog": {}
                      })
Ejemplo n.º 6
0
import copy
import gzip
import Queue
import logging
import operator
import traceback
import robotparser
import multiprocessing
from time import time, sleep
from PersistentQueue import define_record, JSON

from analyzer_chain import Analyzer, AnalyzerChain, GetLinks, LogInfo, SpeedDiagnostics, FetchInfo
from process import Process, multi_syslog
import url

HostRecord = define_record("HostRecord", ["next", "start", "bytes", "hits",
                                          "hostname", "data"])
HostRecord_template = (int, int, int, int, str, JSON)

class CrawlStateAnalyzer(Analyzer):
    name = "CrawlStateAnalyzer"

    def __init__(self, inQ, outQ, linkQ=None, **kwargs):
        super(CrawlStateAnalyzer, self).__init__(inQ, outQ, **kwargs)
        self.linkQ = linkQ

    def analyze(self, yzable):
        """
        Puts FetchInfo & HostRecord objects into csm_inQ.
        
        Get rid of data here.
        """