def random_assign(self, event: Event, target: Event, p: float): convert(event) convert(target) groupby_object = self.df.groupby(event.keys()) name = tuple(event.values()) if name.__len__() == 1: name = name[0] try: index = groupby_object.get_group(name).index target_index = random.sample(list(index), int(round(index.__len__() * p))) col = self.df.columns.get_loc(target.keys()[0]) self.df.ix[target_index, col] = target.values()[0] except KeyError: pass
def convert(event: Event): for k in event.keys(): if isinstance(k, int): name = event.name[k].name v = event.dict[k] event.dict.pop(k) event.dict[name] = v
def count(self, event: Event, condition: Event, relationship: str): convert(event) convert(condition) groupby_object = self.df.groupby(condition.keys()) name = tuple(condition.values()) if name.__len__() == 1: name = name[0] try: group = groupby_object.get_group(name) key0 = event.keys()[0] key1 = event.keys()[1] if relationship == 'equal': sub_group = group[group[key0] == group[key1]] return sub_group.__len__() / group.__len__() else: sub_group = group[group[key0] != group[key1]] return sub_group.__len__() / group.__len__() except: return 0
def get_marginal_prob(self, event: Event): convert(event) groupby_object = self.df.groupby(event.keys()) name = tuple(event.values()) if name.__len__() == 1: name = name[0] try: return groupby_object.get_group(name).__len__() except: return 0