def read_edges_file(self):
        context = etree.iterparse(self.edges_filename, events=('end',), tag="edge")
        edges_iter = utils.fast_iter(context, self.handle_edges)
        count_dict = {}
        flist = []
        prev_sender = None
        curr_sender = None
        new_friends = []
        node_data = GoodreadsDataPreparser.NodeData(None, GoodreadsDataPreparser.interaction_types)
        counter = 0
        max_core_nodes_index = self.node_counter-1 #assuming only core nodes were fetched before this
        for (sender_id, receiver_id), _ in edges_iter:
            curr_sender = sender_id
            # if this is a core user, makes sure that only edges of core users are read
            if  sender_id in self.node_id_map and self.node_id_map[sender_id] <=max_core_nodes_index:
                if receiver_id not in self.node_id_map:
                    nnode = self.create_network_node(self.node_counter, should_have_friends=False, should_have_interactions=True, node_data=node_data)
                    new_friends.append((self.node_counter, nnode))
                    self.node_id_map[receiver_id] = self.node_counter
                    self.node_counter += 1

                if prev_sender is not None and curr_sender != prev_sender:
                    self.nodes[self.node_id_map[prev_sender]].store_friends(flist)
                    flist = []
                new_edge = GoodreadsDataPreparser.EdgeData(self.node_id_map[receiver_id])
                flist.append(new_edge)
                prev_sender = curr_sender
                counter += 1
        if prev_sender in self.node_id_map and self.node_id_map[prev_sender] <=max_core_nodes_index:
            self.nodes[self.node_id_map[prev_sender]].store_friends(flist)
        
        # assigning unique incremented index to each new node
        for uid, node_obj in new_friends:
            self.nodes.insert(uid, node_obj)
        print "All edges stored", counter
    def read_items_file(self):
        context = etree.iterparse(self.items_filename, events=('end',), tag="item")
        items_iter = utils.fast_iter(context, self.handle_items)

        self.items.insert(0, None)
        for itemid, itemdata in items_iter:
            self.items.insert(itemid, itemdata)
        print "all items stored", len(self.items)
        self.total_num_items = len(self.items)
        return self.items
Example #3
0
    def read_items_file(self):
        context = etree.iterparse(self.items_filename,
                                  events=('end', ),
                                  tag="item")
        items_iter = utils.fast_iter(context, self.handle_items)

        self.items.insert(0, None)
        for itemid, itemdata in items_iter:
            self.items.insert(itemid, itemdata)
        print "all items stored", len(self.items)
        self.total_num_items = len(self.items)
        return self.items
    def read_nodes_file(self):
        context = etree.iterparse(self.nodes_filename, events=('end',), tag="user")
        nodes_iter = utils.fast_iter(context, self.handle_nodes)

        self.nodes.insert(self.node_counter, None)
        self.node_counter += 1 # to offset for a 1-based node id and zero-based list index
        for uid, node_data in nodes_iter:
            #assert self.node_counter == uid
            nnode = self.create_network_node(self.node_counter, should_have_friends=True, should_have_interactions=True, node_data=node_data)
            self.nodes.insert(self.node_counter, nnode)
            self.node_id_map[uid] = self.node_counter
            self.node_counter += 1
        print "All core nodes stored", len(self.nodes)-1
        return self.nodes
Example #5
0
    def read_edges_file(self):
        context = etree.iterparse(self.edges_filename,
                                  events=('end', ),
                                  tag="edge")
        edges_iter = utils.fast_iter(context, self.handle_edges)
        count_dict = {}
        flist = []
        prev_sender = None
        curr_sender = None
        new_friends = []
        node_data = GoodreadsDataPreparser.NodeData(
            None, GoodreadsDataPreparser.interaction_types)
        counter = 0
        max_core_nodes_index = self.node_counter - 1  #assuming only core nodes were fetched before this
        for (sender_id, receiver_id), _ in edges_iter:
            curr_sender = sender_id
            # if this is a core user, makes sure that only edges of core users are read
            if sender_id in self.node_id_map and self.node_id_map[
                    sender_id] <= max_core_nodes_index:
                if receiver_id not in self.node_id_map:
                    nnode = self.create_network_node(
                        self.node_counter,
                        should_have_friends=False,
                        should_have_interactions=True,
                        node_data=node_data)
                    new_friends.append((self.node_counter, nnode))
                    self.node_id_map[receiver_id] = self.node_counter
                    self.node_counter += 1

                if prev_sender is not None and curr_sender != prev_sender:
                    self.nodes[self.node_id_map[prev_sender]].store_friends(
                        flist)
                    flist = []
                new_edge = GoodreadsDataPreparser.EdgeData(
                    self.node_id_map[receiver_id])
                flist.append(new_edge)
                prev_sender = curr_sender
                counter += 1
        if prev_sender in self.node_id_map and self.node_id_map[
                prev_sender] <= max_core_nodes_index:
            self.nodes[self.node_id_map[prev_sender]].store_friends(flist)

        # assigning unique incremented index to each new node
        for uid, node_obj in new_friends:
            self.nodes.insert(uid, node_obj)
        print "All edges stored", counter
Example #6
0
    def read_nodes_file(self):
        context = etree.iterparse(self.nodes_filename,
                                  events=('end', ),
                                  tag="user")
        nodes_iter = utils.fast_iter(context, self.handle_nodes)

        self.nodes.insert(self.node_counter, None)
        self.node_counter += 1  # to offset for a 1-based node id and zero-based list index
        for uid, node_data in nodes_iter:
            #assert self.node_counter == uid
            nnode = self.create_network_node(self.node_counter,
                                             should_have_friends=True,
                                             should_have_interactions=True,
                                             node_data=node_data)
            self.nodes.insert(self.node_counter, nnode)
            self.node_id_map[uid] = self.node_counter
            self.node_counter += 1
        print "All core nodes stored", len(self.nodes) - 1
        return self.nodes