def sample_from_vertex_ids( self, vertices: np.ndarray, args: argparse.Namespace) -> Tuple['Graph', Sample]: """Creates a sample using pre-determined vertex IDs. """ state = SampleState(len(vertices)) state.sample_idx = vertices sample_object = Sample(state, self.out_neighbors, self.in_neighbors, self.true_block_assignment) subgraph = Graph(sample_object.out_neighbors, sample_object.in_neighbors, len(vertices), sample_object.num_edges, sample_object.true_block_assignment) return subgraph, sample_object
def random_node_neighbor_sample(num_vertices: int, old_out_neighbors: List[np.ndarray], old_in_neighbors: List[np.ndarray], old_true_block_assignment: np.ndarray, prev_state: RandomNodeNeighborSampleState, args: 'argparse.Namespace') -> 'Sample': """Random node neighbor sampling, where whenever a single node is sampled, all its out neighbors are sampled as well. """ state = SampleState.create_sample_state( num_vertices, prev_state, args) # type: RandomNodeNeighborSampleState sample_num = int( (num_vertices * (args.sample_size / 100)) / args.sample_iterations) print("Sampling {} vertices from graph".format(sample_num)) choices = np.setdiff1d(np.asarray(range(num_vertices)), state.sample_idx) random_samples = np.random.choice(choices, sample_num, replace=False) sample_num += len(state.sample_idx) for vertex in random_samples: if not state.sampled_marker[vertex]: state.index_set.append(vertex) state.sampled_marker[vertex] = True for neighbor in old_out_neighbors[vertex]: if not state.sampled_marker[neighbor[0]]: state.index_set.append(neighbor[0]) state.sampled_marker[neighbor[0]] = True if len(state.index_set) >= sample_num: break state.sample_idx = np.asarray(state.index_set[:sample_num]) return Sample(state, old_out_neighbors, old_in_neighbors, old_true_block_assignment)
def sample(self, args: argparse.Namespace, prev_state: SampleState = None) -> Tuple['Graph', Sample]: """Sample a set of vertices from the graph. Parameters ---------- args : Namespace the parsed command-line arguments Returns ------ subgraph : Graph the subgraph created from the sampled Graph vertices sample : Sample the sample object containing the vertex and block mappings """ sample_size = int((self.num_nodes * (args.sample_size / 100)) / args.sample_iterations) if prev_state is None: prev_state = SampleState(sample_size) sample_object = Sample.create_sample(self.num_nodes, self.out_neighbors, self.in_neighbors, self.true_block_assignment, args, prev_state) subgraph = Graph(sample_object.out_neighbors, sample_object.in_neighbors, sample_object.sample_num, sample_object.num_edges, sample_object.true_block_assignment) return subgraph, sample_object
def sample(self, graph: Graph, args: argparse.Namespace, prev_state: SampleState = None) -> Tuple[Graph, Sample]: """Sample a set of vertices from the graph. Parameters ---------- full_graph : Graph the graph from which to sample vertices args : Namespace the parsed command-line arguments prev_state : SampleState if prev_state is not None, sample will be conditioned on the previously selected vertices Returns ------ sampled_graph : Graph the sampled graph created from the sampled Graph vertices sample : Sample the sample object containing the vertex and block mappings """ sample_size = int((self.full_graph.num_vertices() * (args.sample_size / 100)) / args.sample_iterations) if prev_state is None: prev_state = SampleState(sample_size) sample_object = Sample.create_sample(self.full_graph, self.true_block_assignment, args, prev_state) return sample_object.graph, sample_object
def expansion_snowball_sample(num_vertices: int, old_out_neighbors: List[np.ndarray], old_in_neighbors: List[np.ndarray], old_true_block_assignment: np.ndarray, prev_state: UniformRandomSampleState, args: 'argparse.Namespace') -> 'Sample': """Expansion snowball sampling. At every iterations, picks a node adjacent to the current sample that contributes the most new neighbors. """ state = SampleState.create_sample_state( num_vertices, prev_state, args) # type: ExpansionSnowballSampleState sample_num = int( (num_vertices * (args.sample_size / 100)) / args.sample_iterations) print("Sampling {} vertices from graph".format(sample_num)) sample_num += len(state.sample_idx) if not state.neighbors: state.neighbors = list(old_out_neighbors[state.start][:, 0]) # Set up the initial contributions counts and flag currently neighboring vertices for neighbor in old_out_neighbors[state.start][:, 0]: state.neighbors_flag[neighbor] = True new_neighbors = 0 for _neighbor in old_out_neighbors[neighbor][:, 0]: if not (state.index_flag[_neighbor] or state.neighbors_flag[_neighbor]): new_neighbors += 1 state.contribution[neighbor] += new_neighbors while len(state.index_set) == 0 or len( state.index_set) % sample_num != 0: if len(state.neighbors) == 0 or max(state.contribution) == 0: vertex = np.random.choice( list(set(range(num_vertices)) - set(state.index_set))) state.index_set.append(vertex) for neighbor in old_out_neighbors[vertex][:, 0]: if not state.neighbors_flag[neighbor]: Sample._add_neighbor(neighbor, state.contribution, state.index_flag, state.neighbors_flag, old_out_neighbors[neighbor][:, 0], old_in_neighbors[neighbor][:, 0], state.neighbors) continue vertex = np.argmax(state.contribution) state.index_set.append(vertex) state.index_flag[vertex] = True state.neighbors.remove(vertex) state.contribution[vertex] = 0 for neighbor in old_in_neighbors[vertex][:, 0]: if not state.neighbors_flag[neighbor]: Sample._add_neighbor(neighbor, state.contribution, state.index_flag, state.neighbors_flag, old_out_neighbors[neighbor][:, 0], old_in_neighbors[neighbor][:, 0], state.neighbors) state.sample_idx = np.asarray(state.index_set) return Sample(state, old_out_neighbors, old_in_neighbors, old_true_block_assignment)
def forest_fire_sample(num_vertices: int, old_out_neighbors: List[np.ndarray], old_in_neighbors: List[np.ndarray], old_true_block_assignment: np.ndarray, prev_state: ForestFireSampleState, args: 'argparse.Namespace') -> 'Sample': """Forest-fire sampling with forward probability = 0.7. """ state = SampleState.create_sample_state( num_vertices, prev_state, args) # type: ForestFireSampleState sample_num = int( (num_vertices * (args.sample_size / 100)) / args.sample_iterations) print("Sampling {} vertices from graph".format(sample_num)) sample_num += len(state.sample_idx) while len(state.index_set) == 0 or len( state.index_set) % sample_num != 0: for vertex in state.current_fire_front: # add vertex to index set if not state.sampled_marker[vertex]: state.sampled_marker[vertex] = True state.burnt_marker[vertex] = True state.index_set.append(vertex) # select edges to burn num_to_choose = np.random.geometric(0.7) out_neighbors = old_out_neighbors[vertex] if len(out_neighbors ) < 1: # If there are no outgoing neighbors continue if len(out_neighbors) <= num_to_choose: num_to_choose = len(out_neighbors) mask = np.zeros(len(out_neighbors)) indexes = np.random.choice(np.arange(len(out_neighbors)), num_to_choose, replace=False) mask[indexes] = 1 for index, value in enumerate(mask): neighbor = out_neighbors[index][0] if value == 1: # if chosen, add to next frontier if not state.burnt_marker[neighbor]: state.next_fire_front.append(neighbor) state.burnt_marker[ neighbor] = True # mark all neighbors as visited if np.sum(state.burnt_marker ) == num_vertices: # all samples are burnt, restart state.burnt_marker = [False] * num_vertices state.current_fire_front = [np.random.randint(num_vertices)] state.next_fire_front = list() continue if len(state.next_fire_front) == 0: # if fire is burnt-out state.current_fire_front = [np.random.randint(num_vertices)] else: state.current_fire_front = copy(state.next_fire_front) state.next_fire_front = list() state.sample_idx = np.asarray(state.index_set[:sample_num]) return Sample(state, old_out_neighbors, old_in_neighbors, old_true_block_assignment)
def random_walk_sample(num_vertices: int, old_out_neighbors: List[np.ndarray], old_in_neighbors: List[np.ndarray], old_true_block_assignment: np.ndarray, prev_state: RandomWalkSampleState, args: 'argparse.Namespace') -> 'Sample': """Random walk sampling. """ state = SampleState.create_sample_state( num_vertices, prev_state, args) # type: RandomWalkSampleState sample_num = int( (num_vertices * (args.sample_size / 100)) / args.sample_iterations) print("Sampling {} vertices from graph".format(sample_num)) sample_num += len(state.sample_idx) num_tries = 0 start = np.random.randint(sample_num) # start with a random vertex vertex = start while len(state.index_set) == 0 or len( state.index_set) % sample_num != 0: num_tries += 1 if not state.sampled_marker[vertex]: state.index_set.append(vertex) state.sampled_marker[vertex] = True if num_tries % sample_num == 0: # If the number of tries is large, restart from new random vertex start = np.random.randint(sample_num) vertex = start num_tries = 0 elif np.random.random( ) < 0.15: # With a probability of 0.15, restart at original node vertex = start elif len( old_out_neighbors[vertex] ) > 0: # If the vertex has out neighbors, go to one of them vertex = np.random.choice(old_out_neighbors[vertex][:, 0]) else: # Otherwise, restart from the original vertex if len( old_out_neighbors[start] ) == 0: # if original vertex has no out neighbors, change it start = np.random.randint(sample_num) vertex = start state.sample_idx = np.asarray(state.index_set) return Sample(state, old_out_neighbors, old_in_neighbors, old_true_block_assignment)
def uniform_random_sample(num_vertices: int, old_out_neighbors: List[np.ndarray], old_in_neighbors: List[np.ndarray], old_true_block_assignment: np.ndarray, prev_state: UniformRandomSampleState, args: 'argparse.Namespace') -> 'Sample': """Uniform random sampling. """ state = SampleState.create_sample_state(num_vertices, prev_state, args) sample_num = int( (num_vertices * (args.sample_size / 100)) / args.sample_iterations) print("Sampling {} vertices from graph".format(sample_num)) choices = np.setdiff1d(np.asarray(range(num_vertices)), state.sample_idx) state.sample_idx = np.concatenate( (state.sample_idx, np.random.choice(choices, sample_num, replace=False)), axis=None) return Sample(state, old_out_neighbors, old_in_neighbors, old_true_block_assignment)
def degree_weighted_sample(num_vertices: int, old_out_neighbors: List[np.ndarray], old_in_neighbors: List[np.ndarray], old_true_block_assignment: np.ndarray, prev_state: DegreeWeightedSampleState, args: 'argparse.Namespace') -> 'Sample': """Degree-weighted sampling, where the probability of picking a vertex is proportional to its degree. """ state = SampleState.create_sample_state(num_vertices, prev_state, args) sample_num = int( (num_vertices * (args.sample_size / 100)) / args.sample_iterations) print("Sampling {} vertices from graph".format(sample_num)) vertex_degrees = np.add( [len(neighbors) for neighbors in old_out_neighbors], [len(neighbors) for neighbors in old_in_neighbors]) vertex_degrees[state.sample_idx] = 0 state.sample_idx = np.concatenate( (state.sample_idx, np.random.choice(num_vertices, sample_num, replace=False, p=vertex_degrees / np.sum(vertex_degrees)))) return Sample(state, old_out_neighbors, old_in_neighbors, old_true_block_assignment)
def update(self): print('Initial State update') sampleState = SampleState(self._state_machine) self._state_machine.replace(sampleState)