def load_mt(in_dir, data_name): infile = in_dir + data_name + '.npz' #out_file = '../module/' + data_name + '.npz' reload_dict = np.load(infile) reload_mix_clt = MIXTURE_CLT() reload_mix_clt.mixture_weight = reload_dict['weights'] reload_mix_clt.n_components = reload_mix_clt.mixture_weight.shape[0] reload_clt_component = reload_dict['clt_component'] #print (reload_clt_component) for i in xrange(reload_mix_clt.n_components): clt_c = CLT() #str_id = str(i) curr_component = reload_clt_component[i] clt_c.xyprob = curr_component['xyprob'] clt_c.xprob = curr_component['xprob'] clt_c.topo_order = curr_component['topo_order'] clt_c.parents = curr_component['parents'] clt_c.log_cond_cpt = curr_component['log_cond_cpt'] clt_c.cond_cpt = np.exp(clt_c.log_cond_cpt) #deep reload_mix_clt.clt_list.append(clt_c) return reload_mix_clt
def learn_structure_weight(self, dataset, weights, ids, smooth): curr_depth = self.nvariables - dataset.shape[1] if dataset.shape[0] < self.min_rec or dataset.shape[ 1] < self.min_var or curr_depth >= self.depth: clt = CLT() clt.learnStructure(dataset) clt.xyprob = np.zeros((1, 1, 2, 2)) clt.xprob = np.zeros((1, 2)) return clt self.xycounts = Util.compute_weighted_xycounts(dataset, weights) + smooth self.xcounts = Util.compute_weighted_xcounts(dataset, weights) + 2.0 * smooth edgemat = Util.compute_edge_weights(self.xycounts, self.xcounts) #edgemat[edgemat == 0.0] = 1e-20 np.fill_diagonal(edgemat, 0) scores = np.sum(edgemat, axis=0) #print (scores) variable = np.argmax(scores) #print ("variable: ", ids[variable]) index1 = np.where(dataset[:, variable] == 1)[0] index0 = np.where(dataset[:, variable] == 0)[0] #index0 = np.setdiff1d(np.arange(dataset.shape[0]), index1) new_dataset = np.delete(dataset, variable, axis=1) new_dataset1 = new_dataset[index1] new_weights1 = weights[index1] p1 = np.sum(new_weights1) + smooth #print ("new_ids: ", new_ids) new_dataset0 = new_dataset[index0] new_weights0 = weights[index0] p0 = np.sum(new_weights0) + smooth # Normalize p0 = p0 / (p0 + p1) p1 = 1.0 - p0 #print p0, p1 new_ids = np.delete(ids, variable, 0) #print ("p0, p1:", float(p0)/(p0+p1), float(p1)/(p0+p1)) return [ variable, ids[variable], p0, p1, self.learn_structure_weight(new_dataset0, new_weights0, new_ids, smooth), self.learn_structure_weight(new_dataset1, new_weights1, new_ids, smooth) ]
def learnStructureHelper(self,tum, dataset, ids, lamda, beta_function, evid_list, data_ind, next_id = -1, next_weights = np.zeros(2)): curr_depth=self.nvariables - ids.shape[0] if len(evid_list) == 0: # the first run #alpha = 1.0 * lamda sub_dataset = dataset else: if data_ind.shape[0] == 0: sub_dataset = np.array([]) #alpha = 0.0 else: sub_dataset = dataset[data_ind,:][:, ids] #print (sub_dataset.shape) alpha = utilM.updata_coef(sub_dataset.shape[0], dataset.shape[0], lamda, beta_function) #if True: if next_id == -1: # tum part p_xy, p_x = tum.inference_jt(evid_list,ids) if alpha > 0: # dataset part xycounts = Util.compute_xycounts(sub_dataset) + 1 # laplace correction xcounts = Util.compute_xcounts(sub_dataset) + 2 # laplace correction p_xy_d = Util.normalize2d(xycounts) #print p_xy p_x_d = Util.normalize1d(xcounts) #print (p_xy) # leaf node p_xy = alpha * p_xy_d + (1-alpha) * p_xy p_x = alpha * p_x_d + (1-alpha) * p_x # compute mutual information score for all pairs of variables # weights are multiplied by -1.0 because we compute the minimum spanning tree edgemat = Util.compute_MI_prob(p_xy, p_x) # reset self mutual information to be 0 np.fill_diagonal(edgemat, 0) #for i in xrange(self.nvariables): #print (edgemat[i,i]) #print ("edgemat: ", edgemat) scores = np.sum(edgemat, axis=0) #print (scores) variable = np.argmax(scores) #variable = 7 ####test variable_id = ids[variable] # the index in the original file p1 =p_x[variable,1] p0 =p_x[variable,0] evid_list.append(np.array([variable_id, -1])) # -1 means not determined yet if curr_depth >= self.depth: clt_leaf=CLT() clt_leaf.learnStructure_MI(edgemat) #edgemat = None # Save memory clt_leaf.xyprob = p_xy clt_leaf.xprob = p_x clt_leaf.get_log_cond_cpt() # 0809 # Try to save the memory clt_leaf.xyprob = np.zeros((1, 1, 2, 2)) # 0809 save_info = {} save_info['ids'] = ids save_info['next_id'] = variable_id save_info['next_weights'] = np.array([p0,p1]) save_info['evid_list'] = evid_list save_info['data_ind'] = data_ind clt_leaf.save_info = save_info return clt_leaf else: variable_id = next_id p0 = next_weights[0] p1 = next_weights[1] variable = np.where(ids==variable_id)[0][0] evid_list_0 = copy.deepcopy(evid_list) evid_list_1 = copy.deepcopy(evid_list) evid_list_0[-1][1] = 0 evid_list_1[-1][1] = 1 new_ids=np.delete(ids,variable) if alpha> 0: #print ('0+1', data_ind.shape[0]) new_data_ind0 = data_ind[np.where(sub_dataset[:,variable] ==0)[0]] #print ('0:',new_data_ind0.shape[0]) new_data_ind1 = data_ind[np.where(sub_dataset[:,variable] ==1)[0]] #print ('1:',new_data_ind1.shape[0]) else: new_data_ind0 = np.array([]) new_data_ind1 = np.array([]) new_ids=np.delete(ids,variable) #print ("p0, p1: ", p0, p1) #return [variable,variable_id,p0,p1,self.learnStructureHelper(tum, new_ids, evid_list_0), # self.learnStructureHelper(tum, new_ids, evid_list_1)] return [variable,variable_id,p0,p1,self.learnStructureHelper(tum, dataset, new_ids, lamda, beta_function, evid_list_0, new_data_ind0), self.learnStructureHelper(tum,dataset, new_ids, lamda, beta_function, evid_list_1, new_data_ind1)]