예제 #1
0
def recover_num_items_stored(logfile):
    state_store_path = read_param_from_log(logfile,
                                           param_name="STATE_STORE_PATH")
    checkpoint_freq = read_param_from_log(
        logfile, param_name="CHECKPOINT_FREQUENCY"
    )  # in real code, this would be fetch from the log. Currently the log that we have run does not support it.
    state_store = PriorityQueue("sqlite", store_name=state_store_path)
    return state_store.initialize(seed_nodes=[],
                                  do_recovery=True,
                                  checkpoint_freq=checkpoint_freq)
예제 #2
0
 def __init__(self, network, store_type="gml"):
     """ Function to initialize a network crawler.
       Args:
         network: the network object to be crawled
         store_type: the type of storage. Valid values are "gml", "basic_shelve"
                     or "couchdb"
       Returns:
         An initialized crawler object
     """
     self.network = network
     store_class = GenericStore.get_store_class(store_type)
     # Buffer to store network data
     self.gbuffer = GraphBuffer(self.network.label, store_class)
     #self.recover = recover
     # Priority queue in memory that keeps track of nodes to visit
     self.pqueue = PriorityQueue(store_class,
                                 store_name=self.network.label + "_state")
 def setUp(self):
     self.pqueue = PriorityQueue(store_class=STORE_CLASS,
                                 store_name="testpq")
     self.test_data = [(10, "Phy"), (12, "Maths"), (30, "Chem"),
                       (25, "Bio"), (41, "English")]