def assign_primaries2(primaries, clusts, data): """ for each EM primary assign closest cluster that matches batch and group data should contain groups of voxels this version does not filter out compton clusters first """ primaries = primaries.cpu() data = data.cpu() labels = get_cluster_label(data, clusts) batches = get_cluster_batch(data, clusts) assn = [] for primary in primaries: # get list of indices that match label and batch pbatch = primary[-2] # plabel = primary[-1] # pselection = np.logical_and(labels == plabel, batches == pbatch) pselection = batches == pbatch pinds = np.where(pselection)[0] # indices to compare against if len(pinds) < 1: continue scores = score_clusters_primary(clusts[pinds], data, labels[pinds], primary) ind = np.argmin(scores) # print(scores[ind]) assn.append(pinds[ind]) return assn
def forward(self, data): """ Input: data[0]: (Nx5) Cluster tensor with row (x, y, z, batch_id, cluster_id) Output: dictionary, with 'node_pred': torch.tensor with node prediction weights """ # Get device cluster_label = data[0] device = cluster_label.device # Find index of points that belong to the same EM clusters clusts = form_clusters_new(cluster_label) # If requested, remove clusters below a certain size threshold if self.remove_compton: selection = np.where(filter_compton(clusts, self.compton_thresh))[0] if not len(selection): return self.default_return(device) clusts = clusts[selection] # Get the cluster ids of each processed cluster clust_ids = get_cluster_label(cluster_label, clusts) # Get the batch ids of each cluster batch_ids = get_cluster_batch(cluster_label, clusts) # Form a complete graph (should add options for other structures, TODO) edge_index = complete_graph(batch_ids, device=device) if not edge_index.shape[0]: return self.default_return(device) # Obtain vertex features x = cluster_vtx_features(cluster_label, clusts, device=device) # Obtain edge features e = cluster_edge_features(cluster_label, clusts, edge_index, device=device) # Convert the the batch IDs to a torch tensor to pass to Torch xbatch = torch.tensor(batch_ids).to(device) # Pass through the model, get output out = self.node_predictor(x, edge_index, e, xbatch) return { **out, 'clust_ids': [torch.tensor(clust_ids)], 'batch_ids': [torch.tensor(batch_ids)], 'edge_index': [edge_index] }
def assign_primaries(primaries, clusts, data, use_labels=False, max_dist=None, compton_thresh=0): """ for each EM primary assign closest cluster that matches batch and group data should contain groups of voxels """ primaries = primaries.cpu().detach().numpy() data = data.cpu().detach().numpy() #first remove compton-like clusters from list selection = filter_compton(clusts, compton_thresh) # non-compton looking clusters selinds = np.where(selection)[0] # selected indices cs2 = clusts[selinds] # if everything looks compton, say no primaries if len(cs2) < 1: return [] if use_labels: labels = get_cluster_label(data, cs2) batches = get_cluster_batch(data, cs2) assn = [] for primary in primaries: # get list of indices that match label and batch pbatch = primary[-2] if use_labels: plabel = primary[-1] pselection = np.logical_and(labels == plabel, batches == pbatch) else: pselection = batches == pbatch pinds = np.where(pselection)[0] # indices to compare against if len(pinds) < 1: continue scores = score_clusters_primary(cs2[pinds], data, primary) ind = np.argmin(scores) if max_dist and scores[ind] > max_dist: continue # print(scores[ind]) assn.append(selinds[pinds[ind]]) # assignments may not be unique assn = np.unique(assn) return assn
def assign_primaries_unique(primaries, clusts, data, use_labels=False): """ for each EM primary assign closest cluster that matches batch and group data should contain groups of voxels """ #first remove compton-like clusters from list cs2 = clusts # selection = filter_compton(clusts) # non-compton looking clusters # selinds = np.where(selection)[0] # selected indices # cs2 = clusts[selinds] # if everything looks compton, say no primaries if len(cs2) < 1: return [] labels = get_cluster_label(data, cs2) batches = get_cluster_batch(data, cs2) assn = -1 * np.ones(len(primaries)) assn_scores = -1 * np.ones(len(primaries)) for i in range(len(primaries)): primary = primaries[i] # get list of indices that match label and batch pbatch = primary[-2] if use_labels: plabel = primary[-1] pselection = np.logical_and(labels == plabel, batches == pbatch) else: pselection = batches == pbatch pinds = np.where(pselection)[0] # indices to compare against if len(pinds) < 1: continue scores = score_clusters_primary(cs2[pinds], data, primary) ind = np.argmin(scores) pind = pinds[ind] score = scores[ind] already_assigned = np.where(assn == pind)[0] if len(already_assigned) > 0: current_low = assn_scores[already_assigned][0] if score < current_low: assn_scores[already_assigned] = -1.0 assn[already_assigned] = -1.0 else: continue assn_scores[i] = score assn[i] = pind return assn
def assign_primaries3(primaries, clusts, data): """ for each EM primary assign closest cluster that matches batch and group data should contain groups of voxels """ #first remove compton-like clusters from list cs2 = clusts # selection = filter_compton(clusts) # non-compton looking clusters # selinds = np.where(selection)[0] # selected indices # cs2 = clusts[selinds] # if everything looks compton, say no primaries if len(cs2) < 1: return [] labels = get_cluster_label(data, cs2) batches = get_cluster_batch(data, cs2) assn = [] for primary in primaries: # get list of indices that match label and batch pbatch = primary[-2] plabel = primary[-1] pselection = np.logical_and(labels == plabel, batches == pbatch) pinds = np.where(pselection)[0] # indices to compare against if len(pinds) < 1: assn.append(-1) continue scores = score_clusters_primary(cs2[pinds], data, labels[pinds], primary) ind = np.argmin(scores) # print(scores[ind]) # assn.append(selinds[pinds[ind]]) assn.append(pinds[ind]) return assn
def forward(self, out, clusters, groups, primary): """ out: array output from the DataParallel gather function out[0] - n_gpus tensors of edge indexes out[1] - n_gpus tensors of predicted edge weights from model forward out[2] - n_gpus arrays of group ids for each cluster out[3] - n_gpus number of iterations data: cluster_labels - n_gpus Nx5 tensors of (x, y, z, batch_id, cluster_id) group_labels - n_gpus Nx5 tensors of (x, y, z, batch_id, group_id) em_primaries - n_gpus tensor of (x, y, z) coordinates of origins of EM primaries """ total_loss, total_acc, total_primary_fdr, total_primary_acc, total_iter = 0., 0., 0., 0., 0 ngpus = len(clusters) for i in range(ngpus): data0 = clusters[i] data1 = groups[i] data2 = primary[i] clusts = form_clusters_new(data0) # remove compton clusters # if no cluster fits this condition, return if self.remove_compton: selection = filter_compton( clusts) # non-compton looking clusters if not len(selection): edge_pred = out[1][i] total_loss += self.lossfn(edge_pred, edge_pred) total_acc += 1. continue clusts = clusts[selection] # process group data data_grp = data1 # form primary/secondary bipartite graph primaries = assign_primaries(data2, clusts, data0) batch = get_cluster_batch(data0, clusts) # edge_index = primary_bipartite_incidence(batch, primaries) group = get_cluster_label(data_grp, clusts) primaries_true = assign_primaries(data2, clusts, data1, use_labels=True) primary_fdr, primary_tdr, primary_acc = analyze_primaries( primaries, primaries_true) total_primary_fdr += primary_fdr total_primary_acc += primary_acc niter = out[3][i][0] # number of iterations total_iter += niter for j in range(niter): # determine true assignments edge_index = out[0][i][j] edge_assn = edge_assignment(edge_index, batch, group, cuda=True) edge_pred = out[1][i][j] # print(edge_pred) # print(edge_assn.shape) # print(edge_pred.shape) edge_assn = edge_assn.view(-1) edge_pred = edge_pred.view(-1) # print(edge_assn.shape) # print(edge_pred.shape) if self.balance: edge_assn, edge_pred = self.balance_classes( edge_assn, edge_pred) total_loss += self.lossfn(edge_pred, edge_assn) # compute accuracy of assignment # need to multiply by batch size to be accurate #total_acc = (np.max(batch) + 1) * torch.tensor(secondary_matching_vox_efficiency(edge_index, edge_assn, edge_pred, primaries, clusts, len(clusts))) # use out['matched'] total_acc += torch.tensor( secondary_matching_vox_efficiency2(out[2][i], group, primaries, clusts)) return { 'primary_fdr': total_primary_fdr / ngpus, 'primary_acc': total_primary_acc / ngpus, 'accuracy': total_acc / ngpus, 'loss': total_loss / ngpus, 'n_iter': total_iter }
def forward(self, out, clusters, groups, primary): """ out: array output from the DataParallel gather function out[0] - n_gpus tensors of edge indexes out[1] - n_gpus tensors of predicted edge weights from model forward out[2] - n_gpus arrays of group ids for each cluster out[3] - n_gpus number of iterations data: cluster_labels - n_gpus Nx5 tensors of (x, y, z, batch_id, cluster_id) group_labels - n_gpus Nx5 tensors of (x, y, z, batch_id, group_id) em_primaries - n_gpus tensor of (x, y, z) coordinates of origins of EM primaries """ total_loss, total_acc, total_primary_fdr, total_primary_acc, total_iter = 0., 0., 0., 0., 0 total_ari, total_ami, total_sbd, total_pur, total_eff = 0., 0., 0., 0., 0. ngpus = len(clusters) for i in range(ngpus): data0 = clusters[i] data1 = groups[i] data2 = primary[i] clusts = form_clusters_new(data0) # remove compton clusters # if no cluster fits this condition, return if self.remove_compton: selection = filter_compton( clusts) # non-compton looking clusters if not len(selection): edge_pred = out[1][i][0] total_loss += self.lossfn(edge_pred, edge_pred) total_acc += 1. clusts = clusts[selection] # process group data data_grp = data1 # form primary/secondary bipartite graph primaries = assign_primaries(data2, clusts, data0) batch = get_cluster_batch(data0, clusts) # edge_index = primary_bipartite_incidence(batch, primaries) group = get_cluster_label(data_grp, clusts) primaries_true = assign_primaries(data2, clusts, data1, use_labels=True) primary_fdr, primary_tdr, primary_acc = analyze_primaries( primaries, primaries_true) total_primary_fdr += primary_fdr total_primary_acc += primary_acc niter = out[3][i][0] # number of iterations total_iter += niter # loop over iterations and add loss at each iter. for j in range(niter): # determine true assignments edge_index = out[0][i][j] edge_assn = edge_assignment(edge_index, batch, group, cuda=True, dtype=torch.long) # get edge predictions (2 channels) edge_pred = out[1][i][j] edge_assn = edge_assn.view(-1) total_loss += self.lossfn(edge_pred, edge_assn) # compute accuracy of assignment total_acc += secondary_matching_vox_efficiency2( out[2][i], group, primaries, clusts) # get clustering metrics #print(out[2][i].shape) ari, ami, sbd, pur, eff = DBSCAN_cluster_metrics2( out[2][i].cpu().numpy(), clusts, group) total_ari += ari total_ami += ami total_sbd += sbd total_pur += pur total_eff += eff return { 'primary_fdr': total_primary_fdr / ngpus, 'primary_acc': total_primary_acc / ngpus, 'ARI': ari / ngpus, 'AMI': ami / ngpus, 'SBD': sbd / ngpus, 'purity': pur / ngpus, 'efficiency': eff / ngpus, 'accuracy': total_acc / ngpus, 'loss': total_loss / ngpus, 'n_iter': total_iter }
def forward(self, out, data0, data1): """ out: dictionary output from GNN Model keys: 'edge_pred': predicted edge weights from model forward data: data[0] - DBSCAN data data[1] - groups data """ edge_pred = out[0][0] data0 = data0[0] data1 = data1[0] device = data0.device # first decide what true edges should be # need to form graph, then pass through GNN # clusts = form_clusters(data0) clusts = form_clusters_new(data0) # remove compton clusters # if no cluster fits this condition, return if self.remove_compton: selection = filter_compton( clusts, self.compton_thresh) # non-compton looking clusters if not len(selection): total_loss = self.lossfn(edge_pred, edge_pred) return {'accuracy': 1., 'loss': total_loss} clusts = clusts[selection] # process group data # data_grp = process_group_data(data1, data0) data_grp = data1 # form graph batch = get_cluster_batch(data0, clusts) edge_index = complete_graph(batch, device=device) if not edge_index.shape[0]: total_loss = self.lossfn(edge_pred, edge_pred) return {'accuracy': 0., 'loss': total_loss} group = get_cluster_label(data_grp, clusts) # determine true assignments edge_assn = edge_assignment(edge_index, batch, group, device=device, dtype=torch.long) edge_assn = edge_assn.view(-1) # total loss on batch total_loss = self.lossfn(edge_pred, edge_assn) # compute assigned clusters fe = edge_pred[1, :] - edge_pred[0, :] cs = assign_clusters_UF(edge_index, fe, len(clusts), thresh=0.0) ari, ami, sbd, pur, eff = DBSCAN_cluster_metrics2(cs, clusts, group) edge_ct = edge_index.shape[1] return { 'ARI': ari, 'AMI': ami, 'SBD': sbd, 'purity': pur, 'efficiency': eff, 'accuracy': ari, 'loss': total_loss, 'edge_count': edge_ct }
def forward(self, edge_pred, data0, data1, data2): """ edge_pred: predicted edge weights from model forward data: data[0] - 5 types data data[1] - groups data data[2] - primary data """ data0 = data0[0] data1 = data1[0] data2 = data2[0] # first decide what true edges should be # need to form graph, then pass through GNN # clusts = form_clusters(data0) clusts = form_clusters_new(data0) # remove track-like particles # types = get_cluster_label(data0, clusts) # selection = types > 1 # 0 or 1 are track-like # clusts = clusts[selection] # remove compton clusters # if no cluster fits this condition, return selection = filter_compton(clusts) # non-compton looking clusters if not len(selection): total_loss = self.lossfn(edge_pred, edge_pred) return {'accuracy': 1., 'loss_seg': total_loss} clusts = clusts[selection] # process group data # data_grp = process_group_data(data1, data0) data_grp = data1 # form primary/secondary bipartite graph primaries = assign_primaries(data2, clusts, data0) batch = get_cluster_batch(data0, clusts) edge_index = primary_bipartite_incidence(batch, primaries) group = get_cluster_label(data_grp, clusts) primaries_true = assign_primaries(data2, clusts, data1, use_labels=True) print("primaries (est): ", primaries) print("primaries (true): ", primaries_true) # determine true assignments edge_assn = edge_assignment(edge_index, batch, group, cuda=True) edge_assn = edge_assn.view(-1) edge_pred = edge_pred.view(-1) if self.balance: # weight edges so that 0/1 labels appear equally often ind0 = edge_assn == 0 ind1 = edge_assn == 1 # number in each class n0 = torch.sum(ind0).float() n1 = torch.sum(ind1).float() print("n0 = ", n0, " n1 = ", n1) # weights to balance classes w0 = n1 / (n0 + n1) w1 = n0 / (n0 + n1) print("w0 = ", w0, " w1 = ", w1) edge_assn[ind0] = w0 * edge_assn[ind0] edge_assn[ind1] = w1 * edge_assn[ind1] edge_pred = edge_pred.clone() edge_pred[ind0] = w0 * edge_pred[ind0] edge_pred[ind1] = w1 * edge_pred[ind1] total_loss = self.lossfn(edge_pred, edge_assn) # compute accuracy of assignment # need to multiply by batch size to be accurate total_acc = (np.max(batch) + 1) * torch.tensor( secondary_matching_vox_efficiency(edge_index, edge_assn, edge_pred, primaries, clusts, len(clusts))) return {'accuracy': total_acc, 'loss_seg': total_loss}