Esempio n. 1
0
    def test_concat(self):
        "Test that we can concatenate output and retrieve the objects back out."
        self._oso(self.test_objects)
        fob = StringIO()

        for ob in self.test_objects:
            dump(ob, fob)
        fob.seek(0)
        obs2 = []
        try:
            while True:
                obs2.append(load(fob))
        except EOFError:
            pass
        assert obs2 == self.test_objects
Esempio n. 2
0
    def fetch_url(self, url):
        

        file_name = self.get_file_name(url)
        if file_name is None:
            url_data = {
                "url": url,
                "content": None,
                "http_code": 404,
                "headers": None,
                "size": 0,
                "content_type": None,
                "is_redirected": False,
                "final_url": None
            }
        else:
            data_dict = cbor.load(open(file_name, "rb"))

            def get_content_type(data):
                if b'http_headers' not in data: return None

                hlist = data_dict[b"http_headers"][b'value']
                for header in hlist:
                    if header[b'k'][b'value'] == b'Content-Type':
                        return str(header[b'v'][b'value'])
                return None

            url_data = {
                "url": url,
                "content": data_dict[b'raw_content'][b'value'] if b'raw_content' in data_dict and b'value' in data_dict[b'raw_content'] else "",
                "http_code": int(data_dict[b"http_code"][b'value']),
                "content_type": get_content_type(data_dict),
                "size": os.stat(file_name).st_size,
                "is_redirected": data_dict[b'is_redirected'][b'value'] if b'is_redirected' in data_dict and b'value' in data_dict[b'is_redirected'] else False,
                "final_url": data_dict[b'final_url'][b'value'] if b'final_url' in data_dict and b'value' in data_dict[b'final_url'] else None
            }

        return url_data
Esempio n. 3
0
def sum_lists(a, b):
    for i in range(len(a)):
        if isinstance(a[i], list):
            sum_lists(a[i], b[i])
        else:
            a[i] += b[i]


from cbor import cbor

out = None
for fname in sys.argv[2:]:
    print(fname)
    with open(fname, 'rb') as f:
        hf = cbor.load(f)
    if out is None:
        out = hf
        continue

    for n in ('entries', 'events', 'count'):
        out['N'][n] += hf['N'][n]

    if hf["axes"] != out["axes"]:
        raise ValueError("incompatible axes definitions in " + fname)
    if hf["bins"] != out["bins"]:
        raise ValueError("incompatible bins definitions in " + fname)

    for hname, h in hf["hists"].items():
        # print(hname)
        hout = out["hists"][hname]
Esempio n. 4
0
    def fetch_url(self, url):
        """
        This method, using the given url, should find the corresponding file in the corpus and return a dictionary representing
        the repsonse to the given url. The dictionary contains the following keys:

        url: the requested url to be downloaded
        content: the content of the downloaded url in binary format. None if url does not exist in the corpus
        size: the size of the downloaded content in bytes. 0 if url does not exist in the corpus
        content_type: Content-Type from the response http headers. None if the url does not exist in the corpus or content-type wasn't provided
        http_code: the response http status code. 404 if the url does not exist in the corpus
        is_redirected: a boolean indicating if redirection has happened to get the final response
        final_url: the final url after all of the redirections. None if there was no redirection.

        :param url: the url to be fetched
        :return: a dictionary containing the http response for the given url
        """

        file_name = self.get_file_name(url)
        if file_name is None:
            url_data = {
                "url": url,
                "content": None,
                "http_code": 404,
                "headers": None,
                "size": 0,
                "content_type": None,
                "is_redirected": False,
                "final_url": None
            }
        else:
            data_dict = cbor.load(open(file_name, "rb"))

            def get_content_type(data):
                if b'http_headers' not in data: return None

                hlist = data_dict[b"http_headers"][b'value']
                for header in hlist:
                    if header[b'k'][b'value'] == b'Content-Type':
                        return str(header[b'v'][b'value'])
                return None

            url_data = {
                "url":
                url,
                "content":
                data_dict[b'raw_content'][b'value']
                if b'raw_content' in data_dict
                and b'value' in data_dict[b'raw_content'] else "",
                "http_code":
                int(data_dict[b"http_code"][b'value']),
                "content_type":
                get_content_type(data_dict),
                "size":
                os.stat(file_name).st_size,
                "is_redirected":
                data_dict[b'is_redirected'][b'value']
                if b'is_redirected' in data_dict
                and b'value' in data_dict[b'is_redirected'] else False,
                "final_url":
                data_dict[b'final_url'][b'value'] if b'final_url' in data_dict
                and b'value' in data_dict[b'final_url'] else None
            }

        return url_data