def test_site_samples(self): """test parse_html from real cases""" samples = [] for line in GzipFile(os.path.join(path, SAMPLES_FILE), "r").readlines(): samples.append(json.loads(line, object_hook=_decode_element)) for sample in samples: self._test_sample(sample)
def run(self, args, opts): try: import setuptools except ImportError: raise UsageError("setuptools not installed") if opts.list_targets: for name, target in _get_targets().items(): print "%-20s %s" % (name, target['url']) return if opts.list_projects: target = _get_target(opts.list_projects) req = urllib2.Request(_url(target, 'listprojects.json')) _add_auth_header(req, target) f = urllib2.urlopen(req) projects = json.loads(f.read())['projects'] print os.linesep.join(projects) return target_name = _get_target_name(args) target = _get_target(target_name) project = _get_project(target, opts) version = _get_version(target, opts) tmpdir = None if opts.egg: _log("Using egg: %s" % opts.egg) egg = open(opts.egg, 'rb') else: _log("Building egg of %s-%s" % (project, version)) egg, tmpdir = _build_egg() _upload_egg(target, egg, project, version) egg.close() if tmpdir: shutil.rmtree(tmpdir)
def _list(self): msgs = [] m = self.queue.read(visibility_timeout=100) while m: msgs.append(json.loads(m.get_body())) m = self.queue.read(visibility_timeout=100) return msgs
def test_two_items(self): self.ie.start_exporting() self.ie.export_item(self.i) self.ie.export_item(self.i) self.ie.finish_exporting() exported = json.loads(self.output.getvalue()) self.assertEqual(exported, [dict(self.i), dict(self.i)])
def test_jsonrpc_client_call_request(self): ul = urllib_mock(1) jsonrpc_client_call('url', 'test', 'one', 2, _urllib=ul) req = json.loads(ul.request) assert 'id' in req self.assertEqual(ul.url, 'url') self.assertEqual(req['jsonrpc'], '2.0') self.assertEqual(req['method'], 'test') self.assertEqual(req['params'], ['one', 2])
def _append_next_from_sqs(self): q = self._get_sqs_queue() msgs = q.get_messages(1, visibility_timeout=self.visibility_timeout) if msgs: msg = msgs[0] msg.delete() spargs = json.loads(msg.get_body()) spname = spargs.pop('name') self.append_spider_name(spname, **spargs)
def result(self): res = json.loads(self.body) if 'result' in res: return res['result'] elif 'error' in res: er = res['error'] raise JsonRpcError(er['code'], er['message'], er['data']) else: msg = "JSON-RPC response must contain 'result' or 'error': %s" % res raise ValueError(msg)
def test_site_samples(self): """test parse_html from real cases""" count = 0 fname = "%s_%d.json" % (SAMPLES_FILE_PREFIX, count) while os.path.exists(fname): source = str_to_unicode(open("%s_%d.html" % (SAMPLES_FILE_PREFIX, count), "rb").read()) parsed = json.loads(str_to_unicode(open(fname, "rb").read()),\ object_hook=_decode_element) self._test_sample(source, parsed, count) count += 1 fname = "%s_%d.json" % (SAMPLES_FILE_PREFIX, count)
def jsonrpc_client_call(url, method, *args, **kwargs): """Execute a JSON-RPC call on the given url""" _urllib = kwargs.pop('_urllib', urllib) req = {'jsonrpc': '2.0', 'method': method, 'params': args or kwargs, 'id': 1} res = json.loads(_urllib.urlopen(url, json.dumps(req)).read()) if 'result' in res: return res['result'] elif 'error' in res: er = res['error'] raise JsonRpcError(er['code'], er['message'], er['data']) else: msg = "JSON-RPC response must contain 'result' or 'error': %s" % res raise ValueError(msg)
def add_sample(source): """ Method for adding samples to test samples file (use from console) """ samples = [] if os.path.exists(SAMPLES_FILE): for line in GzipFile(os.path.join(path, SAMPLES_FILE), "r").readlines(): samples.append(json.loads(line)) new_sample = {"source": source} new_sample["parsed"] = list(parse_html(source)) samples.append(new_sample) samples_file = GzipFile(os.path.join(path, SAMPLES_FILE), "wb") for sample in samples: samples_file.write(json.dumps(sample, default=_encode_element) + "\n") samples_file.close()
def test_site_pages(self): """ Tests from real pages. More reliable and easy to build for more complicated structures """ samples_file = open(os.path.join(path, "samples_pageparsing.json.gz"), "r") samples = [] for line in GzipFile(fileobj=StringIO(samples_file.read())).readlines(): samples.append(json.loads(line)) for sample in samples: source = sample["annotated"] annotations = sample["annotations"] template = HtmlPage(body=str_to_unicode(source)) parser = TemplatePageParser(TokenDict()) parser.feed(template) for annotation in parser.annotations: test_annotation = annotations.pop(0) for s in annotation.__slots__: if s == "tag_attributes": for pair in getattr(annotation, s): self.assertEqual(list(pair), test_annotation[s].pop(0)) else: self.assertEqual(getattr(annotation, s), test_annotation[s]) self.assertEqual(annotations, [])
def test_site_pages(self): """ Tests from real pages. More reliable and easy to build for more complicated structures """ SAMPLES_FILE_PREFIX = os.path.join(path, "samples/samples_pageparsing") count = 0 fname = "%s_%d.json" % (SAMPLES_FILE_PREFIX, count) while os.path.exists(fname): source = str_to_unicode(open("%s_%d.html" % (SAMPLES_FILE_PREFIX, count), "rb").read()) annotations = json.loads(str_to_unicode(open(fname, "rb").read())) template = HtmlPage(body=source) parser = TemplatePageParser(TokenDict()) parser.feed(template) for annotation in parser.annotations: test_annotation = annotations.pop(0) for s in annotation.__slots__: if s == "tag_attributes": for pair in getattr(annotation, s): self.assertEqual(list(pair), test_annotation[s].pop(0)) else: self.assertEqual(getattr(annotation, s), test_annotation[s]) self.assertEqual(annotations, []) count += 1 fname = "%s_%d.json" % (SAMPLES_FILE_PREFIX, count)
def decode(self, text): return json.loads(text)
def json_get(opts, path): url = get_wsurl(opts, path) return json.loads(urllib.urlopen(url).read())
def _read_template_annotation(html_tag): template_attr = html_tag.attributes.get('data-scrapy-annotate') if template_attr is None: return None unescaped = template_attr.replace('"', '"') return json.loads(unescaped)
def _check_output(self): exported = json.loads(self.output.getvalue().strip()) self.assertEqual(exported, [dict(self.i)])
def _pop(self): msgs = self.queue.get_messages(1, visibility_timeout=self.visibility_timeout) if msgs: msg = msgs[0] msg.delete() return json.loads(msg.get_body())