def test_concat(self): "Test that we can concatenate output and retrieve the objects back out." self._oso(self.test_objects) fob = StringIO() for ob in self.test_objects: dump(ob, fob) fob.seek(0) obs2 = [] try: while True: obs2.append(load(fob)) except EOFError: pass assert obs2 == self.test_objects
def fetch_url(self, url): file_name = self.get_file_name(url) if file_name is None: url_data = { "url": url, "content": None, "http_code": 404, "headers": None, "size": 0, "content_type": None, "is_redirected": False, "final_url": None } else: data_dict = cbor.load(open(file_name, "rb")) def get_content_type(data): if b'http_headers' not in data: return None hlist = data_dict[b"http_headers"][b'value'] for header in hlist: if header[b'k'][b'value'] == b'Content-Type': return str(header[b'v'][b'value']) return None url_data = { "url": url, "content": data_dict[b'raw_content'][b'value'] if b'raw_content' in data_dict and b'value' in data_dict[b'raw_content'] else "", "http_code": int(data_dict[b"http_code"][b'value']), "content_type": get_content_type(data_dict), "size": os.stat(file_name).st_size, "is_redirected": data_dict[b'is_redirected'][b'value'] if b'is_redirected' in data_dict and b'value' in data_dict[b'is_redirected'] else False, "final_url": data_dict[b'final_url'][b'value'] if b'final_url' in data_dict and b'value' in data_dict[b'final_url'] else None } return url_data
def sum_lists(a, b): for i in range(len(a)): if isinstance(a[i], list): sum_lists(a[i], b[i]) else: a[i] += b[i] from cbor import cbor out = None for fname in sys.argv[2:]: print(fname) with open(fname, 'rb') as f: hf = cbor.load(f) if out is None: out = hf continue for n in ('entries', 'events', 'count'): out['N'][n] += hf['N'][n] if hf["axes"] != out["axes"]: raise ValueError("incompatible axes definitions in " + fname) if hf["bins"] != out["bins"]: raise ValueError("incompatible bins definitions in " + fname) for hname, h in hf["hists"].items(): # print(hname) hout = out["hists"][hname]
def fetch_url(self, url): """ This method, using the given url, should find the corresponding file in the corpus and return a dictionary representing the repsonse to the given url. The dictionary contains the following keys: url: the requested url to be downloaded content: the content of the downloaded url in binary format. None if url does not exist in the corpus size: the size of the downloaded content in bytes. 0 if url does not exist in the corpus content_type: Content-Type from the response http headers. None if the url does not exist in the corpus or content-type wasn't provided http_code: the response http status code. 404 if the url does not exist in the corpus is_redirected: a boolean indicating if redirection has happened to get the final response final_url: the final url after all of the redirections. None if there was no redirection. :param url: the url to be fetched :return: a dictionary containing the http response for the given url """ file_name = self.get_file_name(url) if file_name is None: url_data = { "url": url, "content": None, "http_code": 404, "headers": None, "size": 0, "content_type": None, "is_redirected": False, "final_url": None } else: data_dict = cbor.load(open(file_name, "rb")) def get_content_type(data): if b'http_headers' not in data: return None hlist = data_dict[b"http_headers"][b'value'] for header in hlist: if header[b'k'][b'value'] == b'Content-Type': return str(header[b'v'][b'value']) return None url_data = { "url": url, "content": data_dict[b'raw_content'][b'value'] if b'raw_content' in data_dict and b'value' in data_dict[b'raw_content'] else "", "http_code": int(data_dict[b"http_code"][b'value']), "content_type": get_content_type(data_dict), "size": os.stat(file_name).st_size, "is_redirected": data_dict[b'is_redirected'][b'value'] if b'is_redirected' in data_dict and b'value' in data_dict[b'is_redirected'] else False, "final_url": data_dict[b'final_url'][b'value'] if b'final_url' in data_dict and b'value' in data_dict[b'final_url'] else None } return url_data