def proceed_with_history_path(path_dataset_history, edges): if sum(1 for line in open( path_dataset_history + '/event_log', 'r', encoding='utf-8')) > 0: event_log = pd.read_csv(path_dataset_history + '/event_log', header=None) d = Data() d.load_data_data_frame(event_log, edges) data_obj_file_name = path_dataset_history + '/data_obj.pickle' os.makedirs(os.path.dirname(data_obj_file_name), exist_ok=True) with open(data_obj_file_name, 'wb') as f: pickle.dump(d, f) contagion_dict_file_name = path_dataset_history + '/contagion_dict.pickle' os.makedirs(os.path.dirname(contagion_dict_file_name), exist_ok=True) with open(contagion_dict_file_name, 'wb') as contagion_dict_file: pickle.dump(d.contagion_id_dict, contagion_dict_file) cc = ContagionCorrelation() cc.estimate(d) contagion_file_name = path_dataset_history + '/contagion.pickle' os.makedirs(os.path.dirname(contagion_file_name), exist_ok=True) with open(contagion_file_name, 'wb') as contagion_file: pickle.dump(cc.matrix, contagion_file) a = Adjacency() a.estimate(d) adjacency_file_name = path_dataset_history + '/adjacency.pickle' os.makedirs(os.path.dirname(adjacency_file_name), exist_ok=True) with open(adjacency_file_name, 'wb') as adjacency_file: pickle.dump(a.matrix, adjacency_file) with open(directory + 'estimated_cc+a', 'a+', encoding='utf-8') as handle: handle.write(path_dataset_history + '\n') else: with open(directory + 'not_estimated_c+aa', 'a+', encoding='utf-8') as file: file.write(path_dataset_history + '\n')
def proceed_with_history(history_length, directory, dataset, edges): dir = directory + dataset + '/history_' + str(history_length) if sum(1 for line in open(dir + '/event_log', 'r', encoding='utf-8')) > 0: event_log = pd.read_csv(dir + '/event_log', header=None) if len(event_log.iloc[:, 2].unique()) <= 2500: d = Data() d.load_data_data_frame(event_log, copy(edges)) cc = ContagionCorrelation() cc.estimate(d) contagion_file_name = dir + '/contagion.pickle' os.makedirs(os.path.dirname(contagion_file_name), exist_ok=True) with open(contagion_file_name, 'wb') as contagion_file: pickle.dump(cc.matrix, contagion_file) a = Adjacency() a.estimate(d) adjacency_file_name = dir + '/adjacency.pickle' os.makedirs(os.path.dirname(adjacency_file_name), exist_ok=True) with open(adjacency_file_name, 'wb') as adjacency_file: pickle.dump(a.matrix, adjacency_file) else: with open(directory + 'not_estimated', 'a+', encoding='utf-8') as file: file.write(dataset + '/history_' + str(history_length) + '\n') else: with open(directory + 'not_estimated', 'a+', encoding='utf-8') as file: file.write(dataset + '/history_' + str(history_length) + '\n')
def __init__(self): self.contagion_correlation = ContagionCorrelation() self.adjacency = Adjacency() self.thresholds = Threshold()
class MultiContagionDynamicLinearThresholdModelWithForgetting( BaseMultiContagionDiffusionModel): """ The base class for Mutli-Contagion Diffusion of Information MultiContagionDynamicLinearThresholdModel. A MultiContagionDynamicLinearThresholdModel stores all the model parameters required to perform prediction of multi-contagious diffusion precess. Attributes ---------- contagion_correlation : ContagionCorrelation Stores the contagion correlation matrix of contagions in event log. adjacency : Adjacency Stores the adjacency matrix of the underlying social network. thresholds : Threshold Stores dynamic threshold of all users in the form of a matrix. Entries for specific user are equal across all columns. state_matrix_ : SingleIterResult Stores the current state of the network in the sense of users activity in particular contagions. activity_index_vector_ : numpy.array Stores the current number of activations performed by each user. """ def __init__(self): self.contagion_correlation = ContagionCorrelation() self.adjacency = Adjacency() self.thresholds = ThresholdWithForgetting() def fit(self, data: Data, **kwargs): """ Fit Multi-Contagion Dynamic Threshold models parameters according to :name:`data`. Method evaluates parameters specific estimation procedures. Parameters ---------- data : Data :class:`Data` object according to which parameters should be fitted. **kwargs Arbitrary keyword arguments. """ if (self.contagion_correlation.matrix is None) and (self.adjacency.matrix is None) and (self.thresholds.matrix is None): self.estimate_contagion_correlation_matrix(data) print('ContagionCorrelation') self.estimate_adjacency_matrix(data) print('Adjacency') self.estimate_threshold_matrix( data, adjacency=self.adjacency, correlation=self.contagion_correlation, **kwargs) print('Threshold') self.fill_state_matrix(data) print('State') else: raise NameError( 'Can not estimate parameters when any of them is already assigned' ) def fit_only_thresholds_states(self, data: Data, **kwargs): if (self.contagion_correlation.matrix is not None) and (self.adjacency.matrix is not None): self.estimate_threshold_matrix( data, adjacency=self.adjacency, correlation=self.contagion_correlation, **kwargs) # print('Threshold') self.fill_state_matrix(data) # print('State') else: raise NameError( 'Can not estimate threshold - contagion correlation matrix or adjacency matrix not assigned' ) def fill_state_matrix(self, data): # TODO state_matrix_.matrix -> sparse self.state_matrix_ = StateMatrix() self.state_matrix_.num_contagions = data.num_contagions self.state_matrix_.num_users = data.num_users self.state_matrix_.matrix = np.full( (self.state_matrix_.num_users, self.state_matrix_.num_contagions), False, dtype=float) start_time = 1332565200 end_time = 1335416399 duration_24h_in_sec = 60 * 60 * 24 time_grid = np.arange(start_time + duration_24h_in_sec, end_time + duration_24h_in_sec, duration_24h_in_sec) max_time = data.event_log[Data.time_stamp].max() for time in time_grid: if time <= max_time: temp_mat = np.empty_like(self.state_matrix_.matrix, dtype=bool) for index, row in data.event_log[ data.event_log[Data.time_stamp] <= time].iterrows(): temp_mat[row[Data.user]][row[Data.contagion_id]] = 1. self.state_matrix_.matrix = self.state_matrix_.matrix / 2 + temp_mat self.state_matrix_.matrix[self.state_matrix_.matrix > 1] = 1 # for index, row in data.event_log.iterrows(): # self.state_matrix_.matrix[row[Data.user]][row[Data.contagion_id]] = 1. self.activity_index_vector_ = np.sum( (self.state_matrix_.matrix > 0).astype(bool), axis=1) def estimate_contagion_correlation_matrix(self, data): self.contagion_correlation.estimate(data) def estimate_adjacency_matrix(self, data: Data, **kwargs): self.adjacency.estimate(data, **kwargs) def estimate_threshold_matrix(self, data: Data, adjacency, correlation, **kwargs): self.thresholds.estimate(data, adjacency=adjacency, correlation=correlation, **kwargs) def to_pickle(self, directory): # TODO directory + ... -> fileName pickle.dump( self, open(directory + 'MultiContagionDynamicLinearThresholdModel.p', 'wb')) @staticmethod def from_pickle(directory): # TODO directory + ... -> fileName return pickle.load( open(directory + 'MultiContagionDynamicLinearThresholdModel.p', 'rb')) def predict(self, num_iterations: int) -> Results: # TODO "method" rst # TODO replace num_activation by proper test """ Parameters ---------- num_iterations : int Discrete number of prediction iteration steps to perform by :method:predict method Returns ------- Results Object containing results from all predication iterations. """ global num_activations num_activations = 0 result = Results() self.adjacency.transposed() for l in range(num_iterations): result.add_result(self.__single_iteration()) # print(num_activations) return result def __single_iteration(self) -> SingleIterResult: # TODO SingleIterResult -> new special class influence_matrix = self.__influence_matrix() activation_matrix = self.__activation_matrix(influence_matrix) self.__activation_procedure(activation_matrix) return self.state_matrix_ def __activation_procedure(self, activation_matrix): # TODO delete num_activations global num_activations activation_candidates = self.__find_activation_candidates( activation_matrix) for user in self.__users_above_threshold(activation_candidates): contagions_above_threshold = self.__contagions_above_threshold( activation_candidates, user ) # contagions for user in which threshold has been exceeded # active_contagions = self.active_contagions(user) # contagions in which user is already active # contagions_above_threshold_not_active = self.__contagions_above_threshold_not_active(active_contagions,contagions_above_threshold) # delete active_contagions from contagions_above_threshold if self.__check_negative_contagion_correlation( contagions_above_threshold): # check weather # candidates are not negatively correlated self.__activation(contagions_above_threshold, user) self.__increase_activity_index(user) # num_activations += 1 self.__update_threshold(user) def __check_negative_contagion_correlation( self, contagions_above_threshold_not_active): # TODO review of correctness of condition return (not np.any(self.contagion_correlation.matrix[ contagions_above_threshold_not_active[:, None], contagions_above_threshold_not_active] < 0)) and ( not contagions_above_threshold_not_active.size == 0) def __update_threshold(self, user): # TODO assign vector in one line # print(self.thresholds.initial_matrix[user][0],self.activity_index_vector_[user]) for contagion in range( self.state_matrix_.num_contagions): # temporary solution self.thresholds.matrix[user][contagion] = 1 - math.pow( 1 - self.thresholds.initial_matrix[user][contagion], self.activity_index_vector_[user] + 1) # aktualizacja thety def __increase_activity_index(self, user): self.activity_index_vector_[ user] += 1 # Y[user]+=1 #zwiekszenie licznika aktywacji uzytkownika user def __activation(self, contagions_above_threshold_not_active, user): new_activations_matrix = np.empty_like(self.state_matrix_.matrix) new_activations_matrix[user][ contagions_above_threshold_not_active] = 1. # aktywacja uzytkownika user w tagach z listy contagions_above_threshold self.state_matrix_.matrix = self.state_matrix_.matrix / 2 + new_activations_matrix self.state_matrix_.matrix[self.state_matrix_.matrix > 1] = 1 def __contagions_above_threshold_not_active(self, active_contagions, contagions_above_threshold): return np.setdiff1d(contagions_above_threshold, active_contagions) def active_contagions(self, user): return np.where(self.state_matrix_.matrix[user][:])[0] def __contagions_above_threshold(self, activation_candidates, user): return np.where(activation_candidates[user, :])[0] def __users_above_threshold(self, activation_candidates): return np.unique(np.where(activation_candidates[:, :])[0]) def __find_activation_candidates(self, activation_matrix): return np.greater_equal(activation_matrix, self.thresholds.matrix) def __activation_matrix(self, influence_matrix): return influence_matrix.dot( self.contagion_correlation.matrix ) / self.contagion_correlation.num_contagions_ def __influence_matrix(self): return self.adjacency.matrix_transposed_.dot(self.state_matrix_.matrix) def assign_contagions_correlation_matrix(self, matrix): # TODO check if matrix is consistent with model self.contagion_correlation.assign_matrix(matrix) def assign_adjacency_matrix(self, matrix): # TODO check if matrix is consistent with model self.adjacency.assign_matrix(matrix) def assign_thresholds_matrix(self, matrix): # TODO check if matrix is consistent with model self.thresholds.assign_matrix(matrix) def assign_state_matrix(self, state_matrix): # TODO Implement this method pass def assign_activity_index_vector(self, activity_index_vector): # TODO Implement this method pass