def divide_new_elements(self, messages, unlearn, original=None): """Divides a given set of emails to be unlearned into ham and spam lists and unlearns both. Param: messages contains indices of emails to learn/unlearn """ if unlearn: h.unlearn([self.train_y, self.train_x, self.pol_y, self.pol_x], messages) else: h.relearn([self.train_y, self.train_x, self.pol_y, self.pol_x], original, messages)
def learn(self, cluster): """Learns a cluster from the ActiveUnlearner.""" if len(cluster.ham) + len(cluster.spam) != cluster.size: print "\nUpdating cluster ham and spam sets...\n" cluster.divide() print "Is relearned train_y same as o_train_y before relearning?", self.train_y == self.o_train_y print "Is relearned train_x same as o_train_x before relearning? ", self.train_x == self.o_train_x h.relearn([self.train_y, self.train_x, self.pol_y, self.pol_x],\ [self.o_train_y, self.o_train_x, self.o_pol_y, self.o_pol_x], cluster.cluster_set) # print "THIS IS NEW TRAIN_Y:", self.train_y # print "THIS IS NEW O_TRAIN_Y:", self.o_train_y print "Is relearned train_y same as o_train_y after relearning?", self.train_y == self.o_train_y print "Is relearned train_x same as o_train_x after relearning?", self.train_x == self.o_train_x if self.train_x != self.o_train_x: # let's log the indices where it's not the same diff = [] for i in range(len(self.train_x)): if self.train_x[i] != self.o_train_x[i]: diff.append(i) print "Cluster set indicies: ", cluster.cluster_set print "Disparities after relearning: ", diff