def test_samples(self): # no samples stored j1 = self.project.push_job(self.spidername, state='running') self.assertEqual(list(j1.samples.list()), []) # simple fill ts = millitime() j1.samples.write([ts, 1, 2, 3]) j1.samples.write([ts + 1, 5, 9, 4]) j1.samples.flush() o = list(j1.samples.list()) self.assertEqual(len(o), 2) self.assertEqual(o[0], [ts, 1, 2, 3]) self.assertEqual(o[1], [ts + 1, 5, 9, 4]) # random fill j2 = self.project.push_job(self.spidername, state='running') samples = [] ts = millitime() count = int(j2.samples.batch_size * (random() + randint(1, 5))) for _ in xrange(count): ts += randint(0, 2**16) row = [ts] + list(randint(0, 2**16) for _ in xrange(randint(0, 100))) samples.append(row) j2.samples.write(row) j2.samples.flush() o = list(j2.samples.list()) self.assertEqual(len(o), count) for r1, r2 in zip(samples, o): self.assertEqual(r1, r2)
def test_samples(self): # no samples stored j1 = self.project.push_job(self.spidername, state='running') self.assertEqual(list(j1.samples.list()), []) # simple fill ts = millitime() j1.samples.write([ts, 1, 2, 3]) j1.samples.write([ts + 1, 5, 9, 4]) j1.samples.flush() o = list(j1.samples.list()) self.assertEqual(len(o), 2) self.assertEqual(o[0], [ts, 1, 2, 3]) self.assertEqual(o[1], [ts + 1, 5, 9, 4]) # random fill j2 = self.project.push_job(self.spidername, state='running') samples = [] ts = millitime() count = int(j2.samples.batch_size * (random() + randint(1, 5))) for _ in xrange(count): ts += randint(1, 2**16) row = [ts] + list( randint(0, 2**16) for _ in xrange(randint(0, 100))) samples.append(row) j2.samples.write(row) j2.samples.flush() o = list(j2.samples.list()) self.assertEqual(len(o), count) for r1, r2 in zip(samples, o): self.assertEqual(r1, r2)
def test_requests(self): ts = millitime() job = self.project.push_job(self.spidername, state='running') # top parent r1 = job.requests.add(url='http://test.com/', status=200, method='GET', rs=1337, duration=5, parent=None, ts=ts) # first child r2 = job.requests.add(url='http://test.com/2', status=400, method='POST', rs=0, duration=1, parent=r1, ts=ts + 1) # another child with fingerprint set r3 = job.requests.add(url='http://test.com/3', status=400, method='PUT', rs=0, duration=1, parent=r1, ts=ts + 2, fp='1234') job.close_writers() rr = job.requests.list() self.assertEqual(rr.next(), {u'status': 200, u'rs': 1337, u'url': u'http://test.com/', u'time': ts, u'duration': 5, u'method': u'GET'}) self.assertEqual(rr.next(), {u'status': 400, u'parent': 0, u'rs': 0, u'url': u'http://test.com/2', u'time': ts + 1, u'duration': 1, u'method': u'POST'}) self.assertEqual(rr.next(), {u'status': 400, u'fp': u'1234', u'parent': 0, u'rs': 0, u'url': u'http://test.com/3', u'time': ts + 2, u'duration': 1, u'method': u'PUT'}) self.assertRaises(StopIteration, rr.next)
def _run_scraper(self, jobkey, jobauth, close_reason=None): httpmethods = "GET PUT POST DELETE HEAD OPTIONS TRACE CONNECT".split() # Scraper - uses job level auth, no global or project auth available client = HubstorageClient(endpoint=self.endpoint) with closing(client) as scraperclient: job = scraperclient.get_job(jobkey, auth=jobauth) for idx in xrange(self.MAGICN): iid = job.items.write({"uuid": idx}) job.logs.debug("log debug %s" % idx, idx=idx) job.logs.info("log info %s" % idx, idx=idx) job.logs.warn("log warn %s" % idx, idx=idx) job.logs.error("log error %s" % idx, idx=idx) sid = job.samples.write([idx, idx, idx]) rid = job.requests.add( url="http://test.com/%d" % idx, status=random.randint(100, 1000), method=random.choice(httpmethods), rs=random.randint(0, 100000), duration=random.randint(0, 1000), parent=random.randrange(0, idx + 1) if idx > 10 else None, ts=millitime() + random.randint(100, 100000), ) self.assertEqual(iid, idx) self.assertEqual(sid, idx) self.assertEqual(rid, idx) if isinstance(close_reason, Exception): raise close_reason if close_reason: job.metadata["close_reason"] = close_reason job.metadata.save()
def _run_scraper(self, jobkey, jobauth, close_reason=None): httpmethods = 'GET PUT POST DELETE HEAD OPTIONS TRACE CONNECT'.split() # Scraper - uses job level auth, no global or project auth available client = HubstorageClient(endpoint=self.endpoint) with closing(client) as scraperclient: job = scraperclient.get_job(jobkey, auth=jobauth) for idx in xrange(self.MAGICN): iid = job.items.write({'uuid': idx}) job.logs.debug('log debug %s' % idx, idx=idx) job.logs.info('log info %s' % idx, idx=idx) job.logs.warn('log warn %s' % idx, idx=idx) job.logs.error('log error %s' % idx, idx=idx) sid = job.samples.write([idx, idx, idx]) rid = job.requests.add( url='http://test.com/%d' % idx, status=random.randint(100, 1000), method=random.choice(httpmethods), rs=random.randint(0, 100000), duration=random.randint(0, 1000), parent=random.randrange(0, idx + 1) if idx > 10 else None, ts=millitime() + random.randint(100, 100000), ) self.assertEqual(iid, idx) self.assertEqual(sid, idx) self.assertEqual(rid, idx) if isinstance(close_reason, Exception): raise close_reason if close_reason: job.metadata['close_reason'] = close_reason job.metadata.save()
def _run_scraper(self, jobkey, jobauth, close_reason=None): httpmethods = 'GET PUT POST DELETE HEAD OPTIONS TRACE CONNECT'.split() job = self.scraperclient.get_job(jobkey, auth=jobauth) for idx in xrange(self.MAGICN): iid = job.items.write({'uuid': idx}) job.logs.debug('log debug %s' % idx, idx=idx) job.logs.info('log info %s' % idx, idx=idx) job.logs.warn('log warn %s' % idx, idx=idx) job.logs.error('log error %s' % idx, idx=idx) sid = job.samples.write([idx, idx, idx]) rid = job.requests.add( url='http://test.com/%d' % idx, status=random.randint(100, 1000), method=random.choice(httpmethods), rs=random.randint(0, 100000), duration=random.randint(0, 1000), parent=random.randrange(0, idx + 1) if idx > 10 else None, ts=millitime() + random.randint(100, 100000), ) self.assertEqual(iid, idx) self.assertEqual(sid, idx) self.assertEqual(rid, idx) if isinstance(close_reason, Exception): self.scraperclient.close() raise close_reason if close_reason: job.metadata['close_reason'] = close_reason job.metadata.save() self.scraperclient.close() del self.scraperclient
def test_botgroup(self): self.project.settings.update(botgroups=['foo'], created=millitime()) self.project.settings.save() c = self.hsclient q1 = c.push_job(self.project.projectid, self.spidername) j1 = c.start_job() self.assertEqual(j1, None, 'got %s, pushed job was %s' % (j1, q1)) j2 = c.start_job(botgroup='bar') self.assertEqual(j2, None, 'got %s, pushed job was %s' % (j2, q1)) j3 = c.start_job(botgroup='foo') self.assertEqual(j3.key, q1.key)
def test_settings(self): project = self.hsclient.get_project(self.projectid) settings = dict(project.settings) settings.pop("botgroups", None) # ignore testsuite botgroups self.assertEqual(settings, {}) project.settings["created"] = created = millitime() project.settings["botgroups"] = ["g1"] project.settings.save() self.assertEqual(project.settings.liveget("created"), created) self.assertEqual(project.settings.liveget("botgroups"), ["g1"]) project.settings.expire() self.assertEqual(dict(project.settings), {"created": created, "botgroups": ["g1"]})
def test_settings(self): project = self.hsclient.get_project(self.projectid) self.assertEqual(dict(project.settings), {}) project.settings['created'] = created = millitime() project.settings['botgroups'] = ['g1', 'g2'] project.settings.save() self.assertEqual(project.settings.liveget('created'), created) self.assertEqual(project.settings.liveget('botgroups'), ['g1', 'g2']) project.settings.expire() self.assertEqual(dict(project.settings), { 'created': created, 'botgroups': ['g1', 'g2'], })
def test_settings(self): project = self.hsclient.get_project(self.projectid) settings = dict(project.settings) settings.pop('botgroups', None) # ignore testsuite botgroups self.assertEqual(settings, {}) project.settings['created'] = created = millitime() project.settings['botgroups'] = ['g1'] project.settings.save() self.assertEqual(project.settings.liveget('created'), created) self.assertEqual(project.settings.liveget('botgroups'), ['g1']) project.settings.expire() self.assertEqual(dict(project.settings), { 'created': created, 'botgroups': ['g1'], })
def test_requests(self): ts = millitime() job = self.project.push_job(self.spidername, state="running") # top parent r1 = job.requests.add(url="http://test.com/", status=200, method="GET", rs=1337, duration=5, parent=None, ts=ts) # first child r2 = job.requests.add( url="http://test.com/2", status=400, method="POST", rs=0, duration=1, parent=r1, ts=ts + 1 ) # another child with fingerprint set r3 = job.requests.add( url="http://test.com/3", status=400, method="PUT", rs=0, duration=1, parent=r1, ts=ts + 2, fp="1234" ) job.requests.close() rr = job.requests.list() self.assertEqual( next(rr), {u"status": 200, u"rs": 1337, u"url": u"http://test.com/", u"time": ts, u"duration": 5, u"method": u"GET"}, ) self.assertEqual( next(rr), { u"status": 400, u"parent": 0, u"rs": 0, u"url": u"http://test.com/2", u"time": ts + 1, u"duration": 1, u"method": u"POST", }, ) self.assertEqual( next(rr), { u"status": 400, u"fp": u"1234", u"parent": 0, u"rs": 0, u"url": u"http://test.com/3", u"time": ts + 2, u"duration": 1, u"method": u"PUT", }, ) self.assertRaises(StopIteration, next, rr)
def test_requests(self): ts = millitime() job = self.project.push_job(self.spidername, state='running') # top parent r1 = job.requests.add(url='http://test.com/', status=200, method='GET', rs=1337, duration=5, parent=None, ts=ts) # first child r2 = job.requests.add(url='http://test.com/2', status=400, method='POST', rs=0, duration=1, parent=r1, ts=ts + 1) # another child with fingerprint set r3 = job.requests.add(url='http://test.com/3', status=400, method='PUT', rs=0, duration=1, parent=r1, ts=ts + 2, fp='1234') job.requests.close() rr = job.requests.list() self.assertEqual( rr.next(), { u'status': 200, u'rs': 1337, u'url': u'http://test.com/', u'time': ts, u'duration': 5, u'method': u'GET' }) self.assertEqual( rr.next(), { u'status': 400, u'parent': 0, u'rs': 0, u'url': u'http://test.com/2', u'time': ts + 1, u'duration': 1, u'method': u'POST' }) self.assertEqual( rr.next(), { u'status': 400, u'fp': u'1234', u'parent': 0, u'rs': 0, u'url': u'http://test.com/3', u'time': ts + 2, u'duration': 1, u'method': u'PUT' }) self.assertRaises(StopIteration, rr.next)