class TestRecordConstructor(object): Point2D = define_record('Point2D', ['x', 'y']) def __init__(self): self.cls = self.Point2D def test_module_name(self): assert self.Point2D.__module__ == __name__ @raises(TypeError) def test_invalid_slot(self): self.cls(x=1, z=2) @raises(TypeError) def test_no_x(self): self.cls(y=2) @raises(TypeError) def test_no_y(self): self.cls(x=1) @raises(TypeError) def test_no_args(self): self.cls() def test_no_kwargs(self): a = self.cls(1, 2) assert a.x == 1 and a.y == 2
class TestRecordOperations(object): Point2D = define_record('Point2D', ['x', 'y']) def __init__(self): self.cls = self.Point2D self.a = self.cls(x=1, y=2) self.b = self.cls(x=100, y=200) def test_repr(self): assert repr(self.a) == 'Point2D(x=1, y=2)' def test_str(self): assert str(self.a) == repr(self.a) def test_iter(self): assert list(iter(self.a)) == [self.a.x, self.a.y] def test_equality(self): c = self.cls(x=self.a.x, y=self.a.y) assert (self.a == c) == True assert (self.a == self.b) == False assert (self.a != c) == False assert (self.a != self.b) == True def test_index(self): assert_raises(IndexError, lambda: self.a[2]) assert_raises(IndexError, lambda: self.a[-3]) assert self.a[0] == self.a.x == 1 == self.a[-2] assert self.a[1] == self.a.y == 2 == self.a[-1] def test_len(self): assert len(self.a) == 2 assert len(self.b) == 2 @raises(pickle.PicklingError) def test_pickle_notglobal(self): """ Test that if the class doesn't share a global name, picklig fails.""" assert not hasattr(__name__, self.a.__class__.__name__) s = pickle.dumps(self.a) pickle.loads(s) def test_pickle(self): assert not hasattr(__name__, 'Point2D') global Point2D Point2D = self.cls try: s = pickle.dumps(self.a) c = pickle.loads(s) assert self.a == c finally: del Point2D
import random from PersistentQueue import define_record, RecordFIFO, b64 if __name__ == '__main__': import URL UrlParts = define_record("UrlParts", "scheme hostname port relurl") f = RecordFIFO(UrlParts, (str, str, str, b64), "url_parts") for line in random.sample(open("urls").readlines(), 100000): line = line.strip() try: parts = URL.get_parts(line) except URL.BadFormat, exc: print exc continue f.put(*parts) f.close()
def setUp(self): self.cls = define_record('Point2D', ['x', 'y'])
import shutil from time import time import itertools from PersistentQueue import b64, Static, JSON, RecordFIFO, define_record def rmdir(dir): try: shutil.rmtree(dir) except EnvironmentError, e: if e.errno != errno.ENOENT: raise MyRec = define_record( "MyRec", ("next", "score", "depth", "data", "hostkey", "foo", "dog")) def get_fifo(test_dir): "make a factory for testing" return RecordFIFO(MyRec, (int, float, int, b64, Static("http://www.wikipedia.com"), Static(None), JSON), test_dir, defaults={ "next": 0, "score": 0, "data": "did we survive b64ing?", "dog": {} })
import copy import gzip import Queue import logging import operator import traceback import robotparser import multiprocessing from time import time, sleep from PersistentQueue import define_record, JSON from analyzer_chain import Analyzer, AnalyzerChain, GetLinks, LogInfo, SpeedDiagnostics, FetchInfo from process import Process, multi_syslog import url HostRecord = define_record("HostRecord", ["next", "start", "bytes", "hits", "hostname", "data"]) HostRecord_template = (int, int, int, int, str, JSON) class CrawlStateAnalyzer(Analyzer): name = "CrawlStateAnalyzer" def __init__(self, inQ, outQ, linkQ=None, **kwargs): super(CrawlStateAnalyzer, self).__init__(inQ, outQ, **kwargs) self.linkQ = linkQ def analyze(self, yzable): """ Puts FetchInfo & HostRecord objects into csm_inQ. Get rid of data here. """