def __create_tree(self): sentinel = StandardNode() root = sentinel.add_child("root", NodeState.VISITED) i1 = root.add_child("issue1", NodeState.CLOSED) i1.add_child("article1-1", NodeState.CLOSED) i1.add_child("article1-2", NodeState.CLOSED) i2 = root.add_child("issue2", NodeState.VISITED) i2.add_child("article2-1", NodeState.OPEN) i2.add_child("article2-2", NodeState.CLOSED) i2.add_child("article2-3", NodeState.ERROR) root.add_child("issue3", NodeState.PROCESSING) i4 = root.add_child("issue4", NodeState.ERROR) i4.add_child("article4-1", NodeState.PROCESSING) t = ("root", NodeState.VISITED, [("issue1", NodeState.CLOSED, [("article1-1", NodeState.CLOSED, []), ("article1-2", NodeState.CLOSED, [])]), ("issue2", NodeState.CLOSED, [("article2-1", NodeState.OPEN, []), ("article2-2", NodeState.CLOSED, []), ("article2-3", NodeState.ERROR, [])]), ("issue3", NodeState.PROCESSING, []), ("issue4", NodeState.ERROR, [("article4-1", NodeState.PROCESSING, [])])]) self.assert_(subtrees_equal(t, root)) return (sentinel, t)
def __create_tree(self): sentinel = StandardNode() root = sentinel.add_child("root", NodeState.VISITED) i1 = root.add_child("issue1", NodeState.CLOSED) i1.add_child("article1-1", NodeState.CLOSED) i1.add_child("article1-2", NodeState.CLOSED) i2 = root.add_child("issue2", NodeState.VISITED) i2.add_child("article2-1", NodeState.OPEN) i2.add_child("article2-2", NodeState.CLOSED) i2.add_child("article2-3", NodeState.ERROR) root.add_child("issue3", NodeState.PROCESSING) i4 = root.add_child("issue4", NodeState.ERROR) i4.add_child("article4-1", NodeState.PROCESSING) t = ("root", NodeState.VISITED, [("issue1", NodeState.CLOSED, [("article1-1", NodeState.CLOSED, []), ("article1-2", NodeState.CLOSED, [])] ), ("issue2", NodeState.CLOSED, [("article2-1", NodeState.OPEN, []), ("article2-2", NodeState.CLOSED, []), ("article2-3", NodeState.ERROR, [])] ), ("issue3", NodeState.PROCESSING, []), ("issue4", NodeState.ERROR, [("article4-1", NodeState.PROCESSING, [])] )] ) self.assert_(subtrees_equal(t, root)) return (sentinel, t)
def test_basic_write_and_read(self): (sentinel, schema_root) = self.__create_tree() out = StringIO.StringIO() writer = XMLTreeWriter(out) writer.write(sentinel) out_str = out.getvalue() in_ = StringIO.StringIO(out_str) reader = XMLTreeReader(in_) new_sentinel = StandardNode() reader.read(new_sentinel) self.assert_(subtrees_equal(schema_root, new_sentinel.get_child("root"))) out2 = StringIO.StringIO() writer2 = XMLTreeWriter(out2) writer2.write(new_sentinel) out2_str = out2.getvalue() self.assertEqual(out_str, out2_str)
def test_basic_write_and_read(self): (sentinel, schema_root) = self.__create_tree() out = StringIO.StringIO() writer = XMLTreeWriter(out) writer.write(sentinel) out_str = out.getvalue() in_ = StringIO.StringIO(out_str) reader = XMLTreeReader(in_) new_sentinel = StandardNode() reader.read(new_sentinel) self.assert_( subtrees_equal(schema_root, new_sentinel.get_child("root"))) out2 = StringIO.StringIO() writer2 = XMLTreeWriter(out2) writer2.write(new_sentinel) out2_str = out2.getvalue() self.assertEqual(out_str, out2_str)
def run(self): args = self.__parse() threads_no = args.threads logging_level = self.__get_logging_level(args) log_file_path = args.log_file schedule = self.__get_schedule(args.daily_schedule) navigators = self.__navigators_creator.create(args, threads_no) sentinel = StandardNode() prog = MultithreadedCrawler(navigators, sentinel, schedule, log_file_path, args.state_file, self.__save_period, logging_level) print "Starting activity with {} threads, "\ "activity daily schedule: {}".format( threads_no, args.daily_schedule) prog.run() root = sentinel.get_child("root") self.__navigators_creator.on_exit() print "Done.\n" print self.__get_tree_summary(root, args.state_file, log_file_path)
def __init__(self, parent=None, name="sentinel", state=NodeState.OPEN): StandardNode.__init__(self, parent=parent, name=name, state=state) self.processed_times = 0