Ejemplo n.º 1
0
def test_more_host_zero_path():
    assert generate_suburis(sample[0]["surt"],
                            max_host_segments=5,
                            max_path_segments=0) == sample[0]["suburis"][0:4]
    assert generate_suburis(sample[0]["surt"],
                            max_host_segments=7,
                            max_path_segments=0) == sample[0]["suburis"][0:4]
Ejemplo n.º 2
0
def test_more_host_equal_path():
    assert generate_suburis(sample[0]["surt"],
                            max_host_segments=5,
                            max_path_segments=3) == sample[0]["suburis"]
    assert generate_suburis(sample[0]["surt"],
                            max_host_segments=7,
                            max_path_segments=3) == sample[0]["suburis"]
Ejemplo n.º 3
0
def test_more_host_middle_path():
    assert generate_suburis(sample[0]["surt"],
                            max_host_segments=5,
                            max_path_segments=2) == sample[0]["suburis"][0:6]
    assert generate_suburis(sample[0]["surt"],
                            max_host_segments=7,
                            max_path_segments=2) == sample[0]["suburis"][0:6]
Ejemplo n.º 4
0
def test_zero_host_more_path():
    assert generate_suburis(sample[0]["surt"],
                            max_host_segments=0,
                            max_path_segments=4) == []
    assert generate_suburis(sample[0]["surt"],
                            max_host_segments=0,
                            max_path_segments=6) == []
Ejemplo n.º 5
0
def test_one_host_more_path():
    assert generate_suburis(sample[0]["surt"],
                            max_host_segments=1,
                            max_path_segments=4) == sample[0]["suburis"][0:1]
    assert generate_suburis(sample[0]["surt"],
                            max_host_segments=1,
                            max_path_segments=6) == sample[0]["suburis"][0:1]
Ejemplo n.º 6
0
def test_explicit_none_param():
    assert generate_suburis(sample[0]["surt"],
                            max_host_segments=None) == sample[0]["suburis"]
    assert generate_suburis(sample[0]["surt"],
                            max_path_segments=None) == sample[0]["suburis"]
    assert generate_suburis(sample[0]["surt"],
                            max_host_segments=None,
                            max_path_segments=None) == sample[0]["suburis"]
Ejemplo n.º 7
0
def test_non_int_param():
    assert generate_suburis(sample[0]["surt"],
                            max_host_segments="all") == sample[0]["suburis"]
    assert generate_suburis(sample[0]["surt"],
                            max_path_segments="all") == sample[0]["suburis"]
    assert generate_suburis(sample[0]["surt"],
                            max_host_segments="all",
                            max_path_segments="all") == sample[0]["suburis"]
Ejemplo n.º 8
0
 def _update_ds(self, entry):
     """Update data structure after processing a line from the CDX"""
     suburis = generate_suburis(entry.surt, max_host_segments=self.max_host_segments, max_path_segments=self.max_path_segments)
     for s in suburis:
         self._update_record("suburi", s, entry.surt)
     self._update_record("time", entry.time[0:6], entry.surt)
     self._update_record("mediatype", entry.mime, entry.surt)
Ejemplo n.º 9
0
 def _update_ds(self, entry):
     """Update data structure after processing a line from the CDX"""
     suburis = generate_suburis(entry.surt,
                                max_host_segments=self.max_host_segments,
                                max_path_segments=self.max_path_segments)
     for s in suburis:
         self._update_record("suburi", s, entry.surt)
     self._update_record("time", entry.time[0:6], entry.surt)
     self._update_record("mediatype", entry.mime, entry.surt)
 def _update_ds(self, count, entry):
     """Update data structure after processing a line from the CDX"""
     try:
         suburis = generate_suburis(surt(entry), max_host_segments=self.max_host_segments, max_path_segments=self.max_path_segments)
         for s in suburis:
             self._update_record("suburi", s, count)
         #self._update_record("time", entry.time[0:6], entry.surt)
         #self._update_record("mediatype", entry.mime, entry.surt)
     except:
         print("Something went wrong while processing " + entry)
 def _update_ds(self, count, entry):
     """Update data structure after processing a line from the CDX"""
     try:
         suburis = generate_suburis(
             surt(entry),
             max_host_segments=self.max_host_segments,
             max_path_segments=self.max_path_segments)
         for s in suburis:
             self._update_record("suburi", s, count)
         #self._update_record("time", entry.time[0:6], entry.surt)
         #self._update_record("mediatype", entry.mime, entry.surt)
     except:
         print("Something went wrong while processing " + entry)
def generate_all_suburis(host, path):
    print("Generating Sub-URIs of {0} with Host: {1}, Path: {2}".format(collection, host, path))
    filename = "{0}-H{1}P{2}.suburi".format(collection, host, path)
    opf = open(os.path.join(opdir, filename), "w")
    for extr in sys.argv[1:]:
        with open(extr) as f:
            for line in f:
                count, entry = line.split()
                try:
                    opf.write("\n".join(generate_suburis(surt(entry), max_host_segments=host, max_path_segments=path)) + "\n")
                except:
                    print("Something went wrong while processing " + line)
    opf.close()
def test_explicit_none_param():
    assert generate_suburis(sample[0]["surt"], max_host_segments=None) == sample[0]["suburis"]
    assert generate_suburis(sample[0]["surt"], max_path_segments=None) == sample[0]["suburis"]
    assert generate_suburis(sample[0]["surt"], max_host_segments=None, max_path_segments=None) == sample[0]["suburis"]
def test_non_int_param():
    assert generate_suburis(sample[0]["surt"], max_host_segments="all") == sample[0]["suburis"]
    assert generate_suburis(sample[0]["surt"], max_path_segments="all") == sample[0]["suburis"]
    assert generate_suburis(sample[0]["surt"], max_host_segments="all", max_path_segments="all") == sample[0]["suburis"]
Ejemplo n.º 15
0
def test_all_host_all_path():
    assert generate_suburis(sample[0]["surt"]) == sample[0]["suburis"]
    assert generate_suburis(sample[0]["surt"][:-7]) == sample[0]["suburis"]
    assert generate_suburis(sample[0]["surt"][:-8]) == sample[0]["suburis"]
    assert generate_suburis(
        sample[0]["surt"][:-16]) == sample[0]["suburis"][:-1]
    assert generate_suburis(
        sample[0]["surt"][:-17]) == sample[0]["suburis"][:-1]
    assert generate_suburis(
        sample[0]["surt"][:-19]) == sample[0]["suburis"][:-2]
    assert generate_suburis(
        sample[0]["surt"][:-20]) == sample[0]["suburis"][:-2]
    assert generate_suburis(
        sample[0]["surt"][:-22]) == sample[0]["suburis"][:-3]
    assert generate_suburis(sample[0]["surt"][:-28] +
                            ")/") == sample[0]["suburis"][:-4]
    assert generate_suburis(sample[0]["surt"][:-35] +
                            ")/") == sample[0]["suburis"][:-5]
    assert generate_suburis(sample[0]["surt"][:-43] +
                            ")/") == sample[0]["suburis"][:-6]
def test_middle_host_all_path():
    assert generate_suburis(sample[0]["surt"], max_host_segments=2) == sample[0]["suburis"][0:2]
def test_all_host_one_path():
    assert generate_suburis(sample[0]["surt"], max_path_segments=1) == sample[0]["suburis"][0:5]
Ejemplo n.º 18
0
def test_one_host_all_path():
    assert generate_suburis(sample[0]["surt"],
                            max_host_segments=1) == sample[0]["suburis"][0:1]
Ejemplo n.º 19
0
def test_zero_host_all_path():
    assert generate_suburis(sample[0]["surt"], max_host_segments=0) == []
def test_more_host_equal_path():
    assert generate_suburis(sample[0]["surt"], max_host_segments=5, max_path_segments=3) == sample[0]["suburis"]
    assert generate_suburis(sample[0]["surt"], max_host_segments=7, max_path_segments=3) == sample[0]["suburis"]
def test_one_host_equal_path():
    assert generate_suburis(sample[0]["surt"], max_host_segments=1, max_path_segments=3) == sample[0]["suburis"][0:1]
def test_equal_host_all_path():
    assert generate_suburis(sample[0]["surt"], max_host_segments=4) == sample[0]["suburis"]
def test_equal_host_middle_path():
    assert generate_suburis(sample[0]["surt"], max_host_segments=4, max_path_segments=2) == sample[0]["suburis"][0:6]
def test_equal_host_zero_path():
    assert generate_suburis(sample[0]["surt"], max_host_segments=4, max_path_segments=0) == sample[0]["suburis"][0:4]
Ejemplo n.º 25
0
def test_paren_in_path():
    assert generate_suburis(sample[1]["surt"]) == sample[1]["suburis"]
def test_zero_host_all_path():
    assert generate_suburis(sample[0]["surt"], max_host_segments=0) == []
def test_all_host_all_path():
    assert generate_suburis(sample[0]["surt"]) == sample[0]["suburis"]
    assert generate_suburis(sample[0]["surt"][:-7]) == sample[0]["suburis"]
    assert generate_suburis(sample[0]["surt"][:-8]) == sample[0]["suburis"]
    assert generate_suburis(sample[0]["surt"][:-16]) == sample[0]["suburis"][:-1]
    assert generate_suburis(sample[0]["surt"][:-17]) == sample[0]["suburis"][:-1]
    assert generate_suburis(sample[0]["surt"][:-19]) == sample[0]["suburis"][:-2]
    assert generate_suburis(sample[0]["surt"][:-20]) == sample[0]["suburis"][:-2]
    assert generate_suburis(sample[0]["surt"][:-22]) == sample[0]["suburis"][:-3]
    assert generate_suburis(sample[0]["surt"][:-28]+")/") == sample[0]["suburis"][:-4]
    assert generate_suburis(sample[0]["surt"][:-35]+")/") == sample[0]["suburis"][:-5]
    assert generate_suburis(sample[0]["surt"][:-43]+")/") == sample[0]["suburis"][:-6]
def test_zero_host_more_path():
    assert generate_suburis(sample[0]["surt"], max_host_segments=0, max_path_segments=4) == []
    assert generate_suburis(sample[0]["surt"], max_host_segments=0, max_path_segments=6) == []
def test_all_host_more_path():
    assert generate_suburis(sample[0]["surt"], max_path_segments=4) == sample[0]["suburis"]
    assert generate_suburis(sample[0]["surt"], max_path_segments=6) == sample[0]["suburis"]
def test_paren_in_path():
    assert generate_suburis(sample[1]["surt"]) == sample[1]["suburis"]
Ejemplo n.º 31
0
def test_middle_host_zero_path():
    assert generate_suburis(sample[0]["surt"],
                            max_host_segments=2,
                            max_path_segments=0) == sample[0]["suburis"][0:2]
Ejemplo n.º 32
0
def test_more_host_all_path():
    assert generate_suburis(sample[0]["surt"],
                            max_host_segments=5) == sample[0]["suburis"]
    assert generate_suburis(sample[0]["surt"],
                            max_host_segments=7) == sample[0]["suburis"]
Ejemplo n.º 33
0
def test_equal_host_one_path():
    assert generate_suburis(sample[0]["surt"],
                            max_host_segments=4,
                            max_path_segments=1) == sample[0]["suburis"][0:5]
def test_middle_host_more_path():
    assert generate_suburis(sample[0]["surt"], max_host_segments=2, max_path_segments=4) == sample[0]["suburis"][0:2]
    assert generate_suburis(sample[0]["surt"], max_host_segments=2, max_path_segments=6) == sample[0]["suburis"][0:2]
Ejemplo n.º 35
0
def test_equal_host_all_path():
    assert generate_suburis(sample[0]["surt"],
                            max_host_segments=4) == sample[0]["suburis"]
def test_more_host_zero_path():
    assert generate_suburis(sample[0]["surt"], max_host_segments=5, max_path_segments=0) == sample[0]["suburis"][0:4]
    assert generate_suburis(sample[0]["surt"], max_host_segments=7, max_path_segments=0) == sample[0]["suburis"][0:4]