Example #1
0
def proceed_with_history_path(path_dataset_history, edges):
    if sum(1 for line in open(
            path_dataset_history + '/event_log', 'r', encoding='utf-8')) > 0:
        event_log = pd.read_csv(path_dataset_history + '/event_log',
                                header=None)
        d = Data()
        d.load_data_data_frame(event_log, edges)
        data_obj_file_name = path_dataset_history + '/data_obj.pickle'
        os.makedirs(os.path.dirname(data_obj_file_name), exist_ok=True)
        with open(data_obj_file_name, 'wb') as f:
            pickle.dump(d, f)
        contagion_dict_file_name = path_dataset_history + '/contagion_dict.pickle'
        os.makedirs(os.path.dirname(contagion_dict_file_name), exist_ok=True)
        with open(contagion_dict_file_name, 'wb') as contagion_dict_file:
            pickle.dump(d.contagion_id_dict, contagion_dict_file)
        cc = ContagionCorrelation()
        cc.estimate(d)
        contagion_file_name = path_dataset_history + '/contagion.pickle'
        os.makedirs(os.path.dirname(contagion_file_name), exist_ok=True)
        with open(contagion_file_name, 'wb') as contagion_file:
            pickle.dump(cc.matrix, contagion_file)
        a = Adjacency()
        a.estimate(d)
        adjacency_file_name = path_dataset_history + '/adjacency.pickle'
        os.makedirs(os.path.dirname(adjacency_file_name), exist_ok=True)
        with open(adjacency_file_name, 'wb') as adjacency_file:
            pickle.dump(a.matrix, adjacency_file)
        with open(directory + 'estimated_cc+a', 'a+',
                  encoding='utf-8') as handle:
            handle.write(path_dataset_history + '\n')
    else:
        with open(directory + 'not_estimated_c+aa', 'a+',
                  encoding='utf-8') as file:
            file.write(path_dataset_history + '\n')
Example #2
0
def proceed_with_history(history_length, directory, dataset, edges):
    dir = directory + dataset + '/history_' + str(history_length)
    if sum(1 for line in open(dir + '/event_log', 'r', encoding='utf-8')) > 0:
        event_log = pd.read_csv(dir + '/event_log', header=None)
        if len(event_log.iloc[:, 2].unique()) <= 2500:
            d = Data()
            d.load_data_data_frame(event_log, copy(edges))
            cc = ContagionCorrelation()
            cc.estimate(d)
            contagion_file_name = dir + '/contagion.pickle'
            os.makedirs(os.path.dirname(contagion_file_name), exist_ok=True)
            with open(contagion_file_name, 'wb') as contagion_file:
                pickle.dump(cc.matrix, contagion_file)
            a = Adjacency()
            a.estimate(d)
            adjacency_file_name = dir + '/adjacency.pickle'
            os.makedirs(os.path.dirname(adjacency_file_name), exist_ok=True)
            with open(adjacency_file_name, 'wb') as adjacency_file:
                pickle.dump(a.matrix, adjacency_file)
        else:
            with open(directory + 'not_estimated', 'a+',
                      encoding='utf-8') as file:
                file.write(dataset + '/history_' + str(history_length) + '\n')
    else:
        with open(directory + 'not_estimated', 'a+', encoding='utf-8') as file:
            file.write(dataset + '/history_' + str(history_length) + '\n')
    def __init__(self):

        self.contagion_correlation = ContagionCorrelation()
        self.adjacency = Adjacency()
        self.thresholds = Threshold()
class MultiContagionDynamicLinearThresholdModelWithForgetting(
        BaseMultiContagionDiffusionModel):
    """
    The base class for Mutli-Contagion Diffusion of Information MultiContagionDynamicLinearThresholdModel.

    A MultiContagionDynamicLinearThresholdModel stores all the model parameters required to perform prediction of
    multi-contagious diffusion precess.

    Attributes
    ----------
    contagion_correlation : ContagionCorrelation
        Stores the contagion correlation matrix of contagions in event log.
    adjacency : Adjacency
        Stores the adjacency matrix of the underlying social network.
    thresholds : Threshold
        Stores dynamic threshold of all users in the form of a matrix. Entries for specific user are equal
        across all columns.
    state_matrix_ : SingleIterResult
        Stores the current state of the network in the sense of users activity in particular contagions.
    activity_index_vector_ : numpy.array
        Stores the current number of activations performed by each user.
    """
    def __init__(self):

        self.contagion_correlation = ContagionCorrelation()
        self.adjacency = Adjacency()
        self.thresholds = ThresholdWithForgetting()

    def fit(self, data: Data, **kwargs):
        """
        Fit Multi-Contagion Dynamic Threshold models parameters according to :name:`data`. Method evaluates parameters specific
        estimation procedures.

        Parameters
        ----------
        data : Data
            :class:`Data` object according to which parameters should be fitted.
        **kwargs
            Arbitrary keyword arguments.
        """
        if (self.contagion_correlation.matrix is
                None) and (self.adjacency.matrix is
                           None) and (self.thresholds.matrix is None):
            self.estimate_contagion_correlation_matrix(data)
            print('ContagionCorrelation')
            self.estimate_adjacency_matrix(data)
            print('Adjacency')
            self.estimate_threshold_matrix(
                data,
                adjacency=self.adjacency,
                correlation=self.contagion_correlation,
                **kwargs)
            print('Threshold')
            self.fill_state_matrix(data)
            print('State')
        else:
            raise NameError(
                'Can not estimate parameters when any of them is already assigned'
            )

    def fit_only_thresholds_states(self, data: Data, **kwargs):
        if (self.contagion_correlation.matrix
                is not None) and (self.adjacency.matrix is not None):
            self.estimate_threshold_matrix(
                data,
                adjacency=self.adjacency,
                correlation=self.contagion_correlation,
                **kwargs)
            # print('Threshold')
            self.fill_state_matrix(data)
            # print('State')
        else:
            raise NameError(
                'Can not estimate threshold - contagion correlation matrix or adjacency matrix not assigned'
            )

    def fill_state_matrix(self, data):
        # TODO state_matrix_.matrix -> sparse
        self.state_matrix_ = StateMatrix()
        self.state_matrix_.num_contagions = data.num_contagions
        self.state_matrix_.num_users = data.num_users
        self.state_matrix_.matrix = np.full(
            (self.state_matrix_.num_users, self.state_matrix_.num_contagions),
            False,
            dtype=float)
        start_time = 1332565200
        end_time = 1335416399
        duration_24h_in_sec = 60 * 60 * 24
        time_grid = np.arange(start_time + duration_24h_in_sec,
                              end_time + duration_24h_in_sec,
                              duration_24h_in_sec)
        max_time = data.event_log[Data.time_stamp].max()
        for time in time_grid:
            if time <= max_time:
                temp_mat = np.empty_like(self.state_matrix_.matrix, dtype=bool)
                for index, row in data.event_log[
                        data.event_log[Data.time_stamp] <= time].iterrows():
                    temp_mat[row[Data.user]][row[Data.contagion_id]] = 1.
                self.state_matrix_.matrix = self.state_matrix_.matrix / 2 + temp_mat
                self.state_matrix_.matrix[self.state_matrix_.matrix > 1] = 1
        # for index, row in data.event_log.iterrows():
        #     self.state_matrix_.matrix[row[Data.user]][row[Data.contagion_id]] = 1.
        self.activity_index_vector_ = np.sum(
            (self.state_matrix_.matrix > 0).astype(bool), axis=1)

    def estimate_contagion_correlation_matrix(self, data):
        self.contagion_correlation.estimate(data)

    def estimate_adjacency_matrix(self, data: Data, **kwargs):
        self.adjacency.estimate(data, **kwargs)

    def estimate_threshold_matrix(self, data: Data, adjacency, correlation,
                                  **kwargs):
        self.thresholds.estimate(data,
                                 adjacency=adjacency,
                                 correlation=correlation,
                                 **kwargs)

    def to_pickle(self, directory):
        # TODO directory + ... -> fileName
        pickle.dump(
            self,
            open(directory + 'MultiContagionDynamicLinearThresholdModel.p',
                 'wb'))

    @staticmethod
    def from_pickle(directory):
        # TODO directory + ... -> fileName
        return pickle.load(
            open(directory + 'MultiContagionDynamicLinearThresholdModel.p',
                 'rb'))

    def predict(self, num_iterations: int) -> Results:
        # TODO "method" rst
        # TODO replace num_activation by proper test
        """

        Parameters
        ----------
        num_iterations : int
            Discrete number of prediction iteration steps to perform by :method:predict method

        Returns
        -------
        Results
            Object containing results from all predication iterations.

        """
        global num_activations
        num_activations = 0
        result = Results()
        self.adjacency.transposed()
        for l in range(num_iterations):
            result.add_result(self.__single_iteration())
        # print(num_activations)
        return result

    def __single_iteration(self) -> SingleIterResult:
        # TODO SingleIterResult -> new special class
        influence_matrix = self.__influence_matrix()
        activation_matrix = self.__activation_matrix(influence_matrix)
        self.__activation_procedure(activation_matrix)
        return self.state_matrix_

    def __activation_procedure(self, activation_matrix):
        # TODO delete num_activations
        global num_activations
        activation_candidates = self.__find_activation_candidates(
            activation_matrix)
        for user in self.__users_above_threshold(activation_candidates):
            contagions_above_threshold = self.__contagions_above_threshold(
                activation_candidates, user
            )  # contagions for user in which threshold has been exceeded
            # active_contagions = self.active_contagions(user)  # contagions in which user is already active
            # contagions_above_threshold_not_active = self.__contagions_above_threshold_not_active(active_contagions,contagions_above_threshold)  # delete active_contagions from contagions_above_threshold
            if self.__check_negative_contagion_correlation(
                    contagions_above_threshold):  # check weather
                # candidates are not negatively correlated
                self.__activation(contagions_above_threshold, user)
                self.__increase_activity_index(user)
                # num_activations += 1
                self.__update_threshold(user)

    def __check_negative_contagion_correlation(
            self, contagions_above_threshold_not_active):
        # TODO review of correctness of condition
        return (not np.any(self.contagion_correlation.matrix[
            contagions_above_threshold_not_active[:, None],
            contagions_above_threshold_not_active] < 0)) and (
                not contagions_above_threshold_not_active.size == 0)

    def __update_threshold(self, user):
        # TODO assign vector in one line
        # print(self.thresholds.initial_matrix[user][0],self.activity_index_vector_[user])
        for contagion in range(
                self.state_matrix_.num_contagions):  # temporary solution
            self.thresholds.matrix[user][contagion] = 1 - math.pow(
                1 - self.thresholds.initial_matrix[user][contagion],
                self.activity_index_vector_[user] + 1)  # aktualizacja thety

    def __increase_activity_index(self, user):
        self.activity_index_vector_[
            user] += 1  # Y[user]+=1 #zwiekszenie licznika aktywacji uzytkownika user

    def __activation(self, contagions_above_threshold_not_active, user):
        new_activations_matrix = np.empty_like(self.state_matrix_.matrix)
        new_activations_matrix[user][
            contagions_above_threshold_not_active] = 1.  # aktywacja uzytkownika user w tagach z listy contagions_above_threshold
        self.state_matrix_.matrix = self.state_matrix_.matrix / 2 + new_activations_matrix
        self.state_matrix_.matrix[self.state_matrix_.matrix > 1] = 1

    def __contagions_above_threshold_not_active(self, active_contagions,
                                                contagions_above_threshold):
        return np.setdiff1d(contagions_above_threshold, active_contagions)

    def active_contagions(self, user):
        return np.where(self.state_matrix_.matrix[user][:])[0]

    def __contagions_above_threshold(self, activation_candidates, user):
        return np.where(activation_candidates[user, :])[0]

    def __users_above_threshold(self, activation_candidates):
        return np.unique(np.where(activation_candidates[:, :])[0])

    def __find_activation_candidates(self, activation_matrix):
        return np.greater_equal(activation_matrix, self.thresholds.matrix)

    def __activation_matrix(self, influence_matrix):
        return influence_matrix.dot(
            self.contagion_correlation.matrix
        ) / self.contagion_correlation.num_contagions_

    def __influence_matrix(self):
        return self.adjacency.matrix_transposed_.dot(self.state_matrix_.matrix)

    def assign_contagions_correlation_matrix(self, matrix):
        # TODO check if matrix is consistent with model
        self.contagion_correlation.assign_matrix(matrix)

    def assign_adjacency_matrix(self, matrix):
        # TODO check if matrix is consistent with model
        self.adjacency.assign_matrix(matrix)

    def assign_thresholds_matrix(self, matrix):
        # TODO check if matrix is consistent with model
        self.thresholds.assign_matrix(matrix)

    def assign_state_matrix(self, state_matrix):
        # TODO Implement this method
        pass

    def assign_activity_index_vector(self, activity_index_vector):
        # TODO Implement this method
        pass