def recover_num_items_stored(logfile): state_store_path = read_param_from_log(logfile, param_name="STATE_STORE_PATH") checkpoint_freq = read_param_from_log( logfile, param_name="CHECKPOINT_FREQUENCY" ) # in real code, this would be fetch from the log. Currently the log that we have run does not support it. state_store = PriorityQueue("sqlite", store_name=state_store_path) return state_store.initialize(seed_nodes=[], do_recovery=True, checkpoint_freq=checkpoint_freq)
def __init__(self, network, store_type="gml"): """ Function to initialize a network crawler. Args: network: the network object to be crawled store_type: the type of storage. Valid values are "gml", "basic_shelve" or "couchdb" Returns: An initialized crawler object """ self.network = network store_class = GenericStore.get_store_class(store_type) # Buffer to store network data self.gbuffer = GraphBuffer(self.network.label, store_class) #self.recover = recover # Priority queue in memory that keeps track of nodes to visit self.pqueue = PriorityQueue(store_class, store_name=self.network.label + "_state")
def setUp(self): self.pqueue = PriorityQueue(store_class=STORE_CLASS, store_name="testpq") self.test_data = [(10, "Phy"), (12, "Maths"), (30, "Chem"), (25, "Bio"), (41, "English")]