def p345(): A = [[ 7, 53, 183, 439, 863, 497, 383, 563, 79, 973, 287, 63, 343, 169, 583 ], [ 627, 343, 773, 959, 943, 767, 473, 103, 699, 303, 957, 703, 583, 639, 913 ], [ 447, 283, 463, 29, 23, 487, 463, 993, 119, 883, 327, 493, 423, 159, 743 ], [ 217, 623, 3, 399, 853, 407, 103, 983, 89, 463, 290, 516, 212, 462, 350 ], [ 960, 376, 682, 962, 300, 780, 486, 502, 912, 800, 250, 346, 172, 812, 350 ], [ 870, 456, 192, 162, 593, 473, 915, 45, 989, 873, 823, 965, 425, 329, 803 ], [ 973, 965, 905, 919, 133, 673, 665, 235, 509, 613, 673, 815, 165, 992, 326 ], [ 322, 148, 972, 962, 286, 255, 941, 541, 265, 323, 925, 281, 601, 95, 973 ], [ 445, 721, 11, 525, 473, 65, 511, 164, 138, 672, 18, 428, 154, 448, 848 ], [ 414, 456, 310, 312, 798, 104, 566, 520, 302, 248, 694, 976, 430, 392, 198 ], [ 184, 829, 373, 181, 631, 101, 969, 613, 840, 740, 778, 458, 284, 760, 390 ], [ 821, 461, 843, 513, 17, 901, 711, 993, 293, 157, 274, 94, 192, 156, 574 ], [ 34, 124, 4, 878, 450, 476, 712, 914, 838, 669, 875, 299, 823, 329, 699 ], [ 815, 559, 813, 459, 522, 788, 168, 586, 966, 232, 308, 833, 251, 631, 107 ], [ 813, 883, 451, 509, 615, 77, 281, 613, 459, 205, 380, 274, 302, 35, 805 ]] for r in range(len(A)): for c in range(len(A[0])): A[r][c] *= -1 from munkres import Munkres, print_matrix m = Munkres() indexes = m.compute(A) total = 0 for row, column in indexes: value = A[row][column] total += value return -total
def Update(self, detections): if (len(self.tracks) == 0): for i in range(len(detections)): track = Track(detections[i], self.trackIdCount) self.trackIdCount += 1 self.tracks.append(track) N = len(self.tracks) M = len(detections) # Finding Cost then applying Hungarian Algorithm cost = np.zeros(shape=(N, M)) for i in range(len(self.tracks)): for j in range(len(detections)): try: diff = self.tracks[i].prediction - detections[j] distance = np.sqrt(diff[0][0] * diff[0][0] + diff[1][0] * diff[1][0]) cost[i][j] = distance except: pass cost = (0.5) * cost assignment = [] for _ in range(N): assignment.append(-1) hungarian = Munkres() #index = hungarian.compute(cost) row_ind, col_ind = linear_sum_assignment(cost) for i in range(len(row_ind)): assignment[row_ind[i]] = col_ind[i] un_assigned_tracks = [] for i in range(len(assignment)): if (assignment[i] != -1): if (cost[i][assignment[i]] > self.dist_thresh): assignment[i] = -1 un_assigned_tracks.append(i) pass else: self.tracks[i].skipped_frames += 1 del_tracks = [] for i in range(len(self.tracks)): if (self.tracks[i].skipped_frames > self.max_frames_to_skip): del_tracks.append(i) if len(del_tracks) > 0: for id in del_tracks: if id < len(self.tracks): del self.tracks[id] del assignment[id] else: print("Error in deleting tracks") un_assigned_detects = [] for i in range(len(detections)): if i not in assignment: un_assigned_detects.append(i) if (len(un_assigned_detects) != 0): for i in range(len(un_assigned_detects)): track = Track(detections[un_assigned_detects[i]], self.trackIdCount) self.trackIdCount += 1 self.tracks.append(track) for i in range(len(assignment)): self.tracks[i].KF.predict() if (assignment[i] != -1): self.tracks[i].skipped_frames = 0 self.tracks[i].prediction = self.tracks[i].KF.correct( detections[assignment[i]], 1) else: self.tracks[i].prediction = self.tracks[i].KF.correct( np.array([[0], [0]]), 0) if (len(self.tracks[i].trace) > self.max_trace_length): for j in range( len(self.tracks[i].trace) - self.max_trace_length): del self.tracks[i].trace[j] self.tracks[i].trace.append(self.tracks[i].prediction) self.tracks[i].KF.lastResult = self.tracks[i].prediction
def kruskal_align(U, V, permute_U=False, permute_V=False): """Aligns two KTensors and returns a similarity score. Parameters ---------- U : KTensor First kruskal tensor to align. V : KTensor Second kruskal tensor to align. permute_U : bool If True, modifies 'U' to align the KTensors (default is False). permute_V : bool If True, modifies 'V' to align the KTensors (default is False). Notes ----- If both `permute_U` and `permute_V` are both set to True, then the factors are ordered from most to least similar. If only one is True then the factors on the modified KTensor are re-ordered to match the factors in the un-aligned KTensor. Returns ------- similarity : float Similarity score between zero and one. """ # Compute similarity matrices. unrm = [f / np.linalg.norm(f, axis=0) for f in U.factors] vnrm = [f / np.linalg.norm(f, axis=0) for f in V.factors] sim_matrices = [np.dot(u.T, v) for u, v in zip(unrm, vnrm)] cost = 1 - np.mean(np.abs(sim_matrices), axis=0) # Solve matching problem via Hungarian algorithm. indices = Munkres().compute(cost.copy()) prmU, prmV = zip(*indices) # Compute mean factor similarity given the optimal matching. similarity = np.mean(1 - cost[prmU, prmV]) # If U and V are of different ranks, identify unmatched factors. unmatched_U = list(set(range(U.rank)) - set(prmU)) unmatched_V = list(set(range(V.rank)) - set(prmV)) # If permuting both U and V, order factors from most to least similar. if permute_U and permute_V: idx = np.argsort(cost[prmU, prmV]) # If permute_U is False, then order the factors such that the ordering # for U is unchanged. elif permute_V: idx = np.argsort(prmU) # If permute_V is False, then order the factors such that the ordering # for V is unchanged. elif permute_U: idx = np.argsort(prmV) # If permute_U and permute_V are both False, then we are done and can # simply return the similarity. else: return similarity # Re-order the factor permutations. prmU = [prmU[i] for i in idx] prmV = [prmV[i] for i in idx] # Permute the factors. if permute_U: U.permute(prmU) if permute_V: V.permute(prmV) # Flip the signs of factors. flips = np.sign([F[prmU, prmV] for F in sim_matrices]) flips[0] *= np.prod(flips, axis=0) # always flip an even number of factors if permute_U: for i, f in enumerate(flips): U.factors[i] *= f elif permute_V: for i, f in enumerate(flips): V.factors[i] *= f # Return the similarity score return similarity
def compute3rdPartyMetrics(self): """ Computes the metrics defined in - Stiefelhagen 2008: Evaluating Multiple Object Tracking Performance: The CLEAR MOT Metrics MOTA, MOTAL, MOTP - Nevatia 2008: Global Data Association for Multi-Object Tracking Using Network Flows MT/PT/ML """ # construct Munkres object for Hungarian Method association hm = Munkres() max_cost = 1e9 # go through all frames and associate ground truth and tracker results # groundtruth and tracker contain lists for every single frame containing lists of KITTI format detections fr, ids = 0, 0 for seq_idx in range(len(self.groundtruth)): seq_gt = self.groundtruth[seq_idx] seq_dc = self.dcareas[seq_idx] # don't care areas seq_tracker = self.tracker[seq_idx] seq_trajectories = defaultdict(list) seq_ignored = defaultdict(list) # statistics over the current sequence, check the corresponding # variable comments in __init__ to get their meaning seqtp = 0 seqitp = 0 seqfn = 0 seqifn = 0 seqfp = 0 seqigt = 0 seqitr = 0 last_ids = [[], []] n_gts = 0 n_trs = 0 for f in range(len(seq_gt)): g = seq_gt[f] dc = seq_dc[f] t = seq_tracker[f] # counting total number of ground truth and tracker objects self.n_gt += len(g) self.n_tr += len(t) n_gts += len(g) n_trs += len(t) # use hungarian method to associate, using boxoverlap 0..1 as cost # build cost matrix cost_matrix = [] this_ids = [[], []] for gg in g: # save current ids this_ids[0].append(gg.track_id) this_ids[1].append(-1) gg.tracker = -1 gg.id_switch = 0 gg.fragmentation = 0 cost_row = [] for tt in t: # overlap == 1 is cost ==0 c = 1 - self.boxoverlap(gg, tt) # gating for boxoverlap if c <= self.min_overlap: cost_row.append(c) else: cost_row.append(max_cost) # = 1e9 cost_matrix.append(cost_row) # all ground truth trajectories are initially not associated # extend groundtruth trajectories lists (merge lists) seq_trajectories[gg.track_id].append(-1) seq_ignored[gg.track_id].append(False) if len(g) is 0: cost_matrix = [[]] # associate association_matrix = hm.compute(cost_matrix) # tmp variables for sanity checks and MODP computation tmptp = 0 tmpfp = 0 tmpfn = 0 tmpc = 0 # this will sum up the overlaps for all true positives tmpcs = [0] * len( g) # this will save the overlaps for all true positives # the reason is that some true positives might be ignored # later such that the corrsponding overlaps can # be subtracted from tmpc for MODP computation # mapping for tracker ids and ground truth ids for row, col in association_matrix: # apply gating on boxoverlap c = cost_matrix[row][col] if c < max_cost: g[row].tracker = t[col].track_id this_ids[1][row] = t[col].track_id t[col].valid = True g[row].distance = c self.total_cost += 1 - c tmpc += 1 - c tmpcs[row] = 1 - c seq_trajectories[g[row].track_id][-1] = t[col].track_id # true positives are only valid associations self.tp += 1 tmptp += 1 else: g[row].tracker = -1 self.fn += 1 tmpfn += 1 # associate tracker and DontCare areas # ignore tracker in neighboring classes nignoredtracker = 0 # number of ignored tracker detections ignoredtrackers = dict() # will associate the track_id with -1 # if it is not ignored and 1 if it is # ignored; # this is used to avoid double counting ignored # cases, see the next loop for tt in t: ignoredtrackers[tt.track_id] = -1 # ignore detection if it belongs to a neighboring class or is # smaller or equal to the minimum height tt_height = abs(tt.y1 - tt.y2) if ((self.cls == "car" and tt.obj_type == "van") or (self.cls == "pedestrian" and tt.obj_type == "person_sitting") or tt_height <= self.min_height) and not tt.valid: nignoredtracker += 1 tt.ignored = True ignoredtrackers[tt.track_id] = 1 continue for d in dc: overlap = self.boxoverlap(tt, d, "a") if overlap > 0.5 and not tt.valid: tt.ignored = True nignoredtracker += 1 ignoredtrackers[tt.track_id] = 1 break # check for ignored FN/TP (truncation or neighboring object class) ignoredfn = 0 # the number of ignored false negatives nignoredtp = 0 # the number of ignored true positives nignoredpairs = 0 # the number of ignored pairs, i.e. a true positive # which is ignored but where the associated tracker # detection has already been ignored gi = 0 for gg in g: if gg.tracker < 0: if gg.occlusion>self.max_occlusion or gg.truncation>self.max_truncation\ or (self.cls=="car" and gg.obj_type=="van") or (self.cls=="pedestrian" and gg.obj_type=="person_sitting"): seq_ignored[gg.track_id][-1] = True gg.ignored = True ignoredfn += 1 elif gg.tracker >= 0: if gg.occlusion>self.max_occlusion or gg.truncation>self.max_truncation\ or (self.cls=="car" and gg.obj_type=="van") or (self.cls=="pedestrian" and gg.obj_type=="person_sitting"): seq_ignored[gg.track_id][-1] = True gg.ignored = True nignoredtp += 1 # if the associated tracker detection is already ignored, # we want to avoid double counting ignored detections if ignoredtrackers[gg.tracker] > 0: nignoredpairs += 1 # for computing MODP, the overlaps from ignored detections # are subtracted tmpc -= tmpcs[gi] gi += 1 # the below might be confusion, check the comments in __init__ # to see what the individual statistics represent # correct TP by number of ignored TP due to truncation # ignored TP are shown as tracked in visualization tmptp -= nignoredtp # count the number of ignored true positives self.itp += nignoredtp # adjust the number of ground truth objects considered self.n_gt -= (ignoredfn + nignoredtp) # count the number of ignored ground truth objects self.n_igt += ignoredfn + nignoredtp # count the number of ignored tracker objects self.n_itr += nignoredtracker # count the number of ignored pairs, i.e. associated tracker and # ground truth objects that are both ignored self.n_igttr += nignoredpairs # false negatives = associated gt bboxes exceding association threshold + non-associated gt bboxes # tmpfn += len(g) - len(association_matrix) - ignoredfn self.fn += len(g) - len(association_matrix) - ignoredfn self.ifn += ignoredfn # false positives = tracker bboxes - associated tracker bboxes # mismatches (mme_t) tmpfp += len( t) - tmptp - nignoredtracker - nignoredtp + nignoredpairs self.fp += len( t) - tmptp - nignoredtracker - nignoredtp + nignoredpairs #tmpfp = len(t) - tmptp - nignoredtp # == len(t) - (tp - ignoredtp) - ignoredtp #self.fp += len(t) - tmptp - nignoredtp # update sequence data seqtp += tmptp seqitp += nignoredtp seqfp += tmpfp seqfn += tmpfn seqifn += ignoredfn seqigt += ignoredfn + nignoredtp seqitr += nignoredtracker # sanity checks # - the number of true positives minues ignored true positives # should be greater or equal to 0 # - the number of false negatives should be greater or equal to 0 # - the number of false positives needs to be greater or equal to 0 # otherwise ignored detections might be counted double # - the number of counted true positives (plus ignored ones) # and the number of counted false negatives (plus ignored ones) # should match the total number of ground truth objects # - the number of counted true positives (plus ignored ones) # and the number of counted false positives # plus the number of ignored tracker detections should # match the total number of tracker detections; note that # nignoredpairs is subtracted here to avoid double counting # of ignored detection sin nignoredtp and nignoredtracker if tmptp < 0: print(tmptp, nignoredtp) raise NameError("Something went wrong! TP is negative") if tmpfn < 0: print(tmpfn, len(g), len(association_matrix), ignoredfn, nignoredpairs) raise NameError("Something went wrong! FN is negative") if tmpfp < 0: print(tmpfp, len(t), tmptp, nignoredtracker, nignoredtp, nignoredpairs) raise NameError("Something went wrong! FP is negative") if tmptp + tmpfn is not len(g) - ignoredfn - nignoredtp: print("seqidx", seq_idx) print("frame ", f) print("TP ", tmptp) print("FN ", tmpfn) print("FP ", tmpfp) print("nGT ", len(g)) print("nAss ", len(association_matrix)) print("ign GT", ignoredfn) print("ign TP", nignoredtp) raise NameError( "Something went wrong! nGroundtruth is not TP+FN") if tmptp + tmpfp + nignoredtp + nignoredtracker - nignoredpairs is not len( t): print(seq_idx, f, len(t), tmptp, tmpfp) print(len(association_matrix), association_matrix) raise NameError( "Something went wrong! nTracker is not TP+FP") # check for id switches or fragmentations for i, tt in enumerate(this_ids[0]): if tt in last_ids[0]: idx = last_ids[0].index(tt) tid = this_ids[1][i] lid = last_ids[1][idx] if tid != lid and lid != -1 and tid != -1: if g[i].truncation < self.max_truncation: g[i].id_switch = 1 ids += 1 if tid != lid and lid != -1: if g[i].truncation < self.max_truncation: g[i].fragmentation = 1 fr += 1 # save current index last_ids = this_ids # compute MOTP_t MODP_t = 1 if tmptp != 0: MODP_t = tmpc / float(tmptp) self.MODP_t.append(MODP_t) # remove empty lists for current gt trajectories self.gt_trajectories[seq_idx] = seq_trajectories self.ign_trajectories[seq_idx] = seq_ignored # gather statistics for "per sequence" statistics. self.n_gts.append(n_gts) self.n_trs.append(n_trs) self.tps.append(seqtp) self.itps.append(seqitp) self.fps.append(seqfp) self.fns.append(seqfn) self.ifns.append(seqifn) self.n_igts.append(seqigt) self.n_itrs.append(seqitr) # compute MT/PT/ML, fragments, idswitches for all groundtruth trajectories n_ignored_tr_total = 0 for seq_idx, (seq_trajectories, seq_ignored) in enumerate( zip(self.gt_trajectories, self.ign_trajectories)): if len(seq_trajectories) == 0: continue tmpMT, tmpML, tmpPT, tmpId_switches, tmpFragments = [0] * 5 n_ignored_tr = 0 for g, ign_g in zip(seq_trajectories.values(), seq_ignored.values()): # all frames of this gt trajectory are ignored if all(ign_g): n_ignored_tr += 1 n_ignored_tr_total += 1 continue # all frames of this gt trajectory are not assigned to any detections if all([this == -1 for this in g]): tmpML += 1 self.ML += 1 continue # compute tracked frames in trajectory last_id = g[0] # first detection (necessary to be in gt_trajectories) is always tracked tracked = 1 if g[0] >= 0 else 0 lgt = 0 if ign_g[0] else 1 for f in range(1, len(g)): if ign_g[f]: last_id = -1 continue lgt += 1 if last_id != g[f] and last_id != -1 and g[f] != -1 and g[ f - 1] != -1: tmpId_switches += 1 self.id_switches += 1 if f < len(g) - 1 and g[f - 1] != g[ f] and last_id != -1 and g[f] != -1 and g[f + 1] != -1: tmpFragments += 1 self.fragments += 1 if g[f] != -1: tracked += 1 last_id = g[f] # handle last frame; tracked state is handled in for loop (g[f]!=-1) if len(g) > 1 and g[f - 1] != g[f] and last_id != -1 and g[ f] != -1 and not ign_g[f]: tmpFragments += 1 self.fragments += 1 # compute MT/PT/ML tracking_ratio = tracked / float(len(g) - sum(ign_g)) if tracking_ratio > 0.8: tmpMT += 1 self.MT += 1 elif tracking_ratio < 0.2: tmpML += 1 self.ML += 1 else: # 0.2 <= tracking_ratio <= 0.8 tmpPT += 1 self.PT += 1 if (self.n_gt_trajectories - n_ignored_tr_total) == 0: self.MT = 0. self.PT = 0. self.ML = 0. else: self.MT /= float(self.n_gt_trajectories - n_ignored_tr_total) self.PT /= float(self.n_gt_trajectories - n_ignored_tr_total) self.ML /= float(self.n_gt_trajectories - n_ignored_tr_total) # precision/recall etc. if (self.fp + self.tp) == 0 or (self.tp + self.fn) == 0: self.recall = 0. self.precision = 0. else: self.recall = self.tp / float(self.tp + self.fn) self.precision = self.tp / float(self.fp + self.tp) if (self.recall + self.precision) == 0: self.F1 = 0. else: self.F1 = 2. * (self.precision * self.recall) / (self.precision + self.recall) if sum(self.n_frames) == 0: self.FAR = "n/a" else: self.FAR = self.fp / float(sum(self.n_frames)) # compute CLEARMOT if self.n_gt == 0: self.MOTA = -float("inf") self.MODA = -float("inf") else: self.MOTA = 1 - (self.fn + self.fp + self.id_switches) / float( self.n_gt) self.MODA = 1 - (self.fn + self.fp) / float(self.n_gt) if self.tp == 0: self.MOTP = float("inf") else: self.MOTP = self.total_cost / float(self.tp) if self.n_gt != 0: if self.id_switches == 0: self.MOTAL = 1 - (self.fn + self.fp + self.id_switches) / float(self.n_gt) else: self.MOTAL = 1 - (self.fn + self.fp + math.log10( self.id_switches)) / float(self.n_gt) else: self.MOTAL = -float("inf") if sum(self.n_frames) == 0: self.MODP = "n/a" else: self.MODP = sum(self.MODP_t) / float(sum(self.n_frames)) return True
import copy import math import os from munkres import Munkres from coalib.collecting.Collectors import collect_dirs from bears.c_languages.codeclone_detection.CountVector import CountVector # Instantiate globally since this class is holding stateless public methods. munkres = Munkres() def exclude_function(count_matrix): """ Determines heuristically whether or not it makes sense for clone detection to take this function into account. Applied heuristics: * Functions with only count vectors with a sum of all unweighted elements of lower then 10 are very likely only declarations or empty and to be ignored. (Constants are not taken into account.) :param count_matrix: A dictionary with count vectors representing all variables for a function. :return: True if the function is useless for evaluation. """ var_list = [ cv for cv in count_matrix.values() if cv.category is CountVector.Category.reference ]
def update(self, boxes): """ Update predictions, associate with detections and add new objects if have no association Also delete dead objects :param: Detected boxes :return: None """ non_asociated = [i for i in range(len(boxes))] pred = None # Update prediction for each tracked object for key in self.filters: pred = self.filters[key].predict() # Update bounding box according to xc, yc predicted and size measured new_xc = pred[0] new_yc = pred[1] xc = float(self.objects[key][0] + self.objects[key][2]) / 2 yc = float(self.objects[key][1] + self.objects[key][3]) / 2 half_w = self.objects[key][2] - xc half_h = self.objects[key][3] - yc # Update object boxes self.objects[key] = [ int(new_xc - half_w), int(new_yc - half_h), int(new_xc + half_w), int(new_yc + half_h) ] if len(boxes) != 0 and len(self.objects) != 0: m = Munkres() # Get distances between detections and predictions to match distances = self.get_distances(boxes) # assert len(self.objects) == len(self.objects_time) == len(self.objects_life) == len(self.filters), "Bug on code" # It seems to have a bug when matrix is a numpy array of shape (2,1), by that I convert to list indices = m.compute((-distances).tolist()) # print("Válidos: ", indices) # For each match possible match, filter impossible matches for i, j in indices: # Check for valid associations if distances[i, j] > self.iou_thres: # print("cumplen", i, j) key = list(self.objects.keys())[j] self.objects_life[key] = self.max_life xc = float(boxes[i][0] + boxes[i][2]) / 2 yc = float(boxes[i][1] + boxes[i][3]) / 2 meas_centers = np.array([xc, yc], np.float32) # If there is a match, feedback to kalman filter self.filters[key].correct(meas_centers) # Actualizo el ancho, esto debería ser predicho pero no está contemplado en el filtro de kalman # Hago de cuenta que la actualización está bien # Calculo el ancho del bonding box medido half_w = boxes[i][2] - xc half_h = boxes[i][3] - yc # Mantengo el centro predicho xc = float(self.objects[key][0] + self.objects[key][2]) / 2 yc = float(self.objects[key][1] + self.objects[key][3]) / 2 self.objects[key] = [ int(xc - half_w), int(yc - half_h), int(xc + half_w), int(yc + half_h) ] # Mark association as valid deleting to non associated detections non_asociated.remove(i) # Add non associated values as new objects for i in non_asociated: # print(boxes[i]) self.new_object(boxes[i][0], boxes[i][1], boxes[i][2], boxes[i][3]) keys_to_del = [] for key in self.objects_life: # If a tracked object was dissapeared for last iterations (no life), delete it if self.objects_life[key] <= 0: keys_to_del.append(key) # Else decrement life else: self.objects_life[key] = self.objects_life[key] - 1 # Delete pending asociations for key in keys_to_del: self.remove_object(key)
def match(conf_file, french_ans, exchange_ans, remaining_fr, remaining_ex, matchings_file, verbose): """Creates an optimal matching between french students and exchange students,\ using criterias defined in the config file""" config = json.load(conf_file) criterias = config['criterias'] # import data fr = pd.read_csv(french_ans) ex = pd.read_csv(exchange_ans) frAnswers = prepare(fr, doClone=True, cloneLabel=config['two_buddies_question_label']) exAnswers = prepare(ex) # Compute the costs costMatrix = [[float('inf')] * len(exAnswers.index) for _ in range(len(frAnswers.index))] for fridx, fr_row in frAnswers.iterrows(): for exidx, ex_row in exAnswers.iterrows(): # Compute a list of compatibility scores # Scores are normalized between 0 and 1 costMatrix[fridx][exidx] = totalCost(criterias, fr_row, ex_row) # Solve the assignment problem using munkres m = Munkres() assignments = m.compute(costMatrix) # Print results if verbose > 0: for fridx, exidx in assignments: print(frAnswers['Q1'][fridx], frAnswers['Q2'][fridx], '<3', exAnswers['Q1'][exidx], exAnswers['Q2'][exidx], compatPercentage(costMatrix[fridx][exidx])) # Output remaining unmatched people frMatched, exMatched = zip(*assignments) frAnswers[~frAnswers.index.isin(frMatched)].to_csv(remaining_fr) exAnswers[~exAnswers.index.isin(exMatched)].to_csv(remaining_ex) # Outputing matchings matchings = pd.DataFrame(columns=["frFName", "frLName", "frEMail", "exFName", "exLName", "exEMail", "language"]) for fridx, exidx in assignments: fQ, lQ = config["fluentQ"], config["learningQ"] fnQ, lnQ, emQ = config["firstNameQ"], config["lastNameQ"], config["eMailQ"] frRow = frAnswers.iloc[fridx] exRow = exAnswers.iloc[exidx] # TODO: commonLangs = set(frRow[fQ].split(',')).intersection(set(exRow[fQ].split(','))) for lang in config["mails"]: if lang in commonLangs: chosenLang = lang else: chosenLang = "English" d = {"frFName": frRow[fnQ], "frLName": frRow[lnQ], "frEMail": frRow[emQ], "exFName": exRow[fnQ], "exLName": exRow[lnQ], "exEMail": exRow[emQ], "language": chosenLang, "compatibility": compatPercentage(costMatrix[fridx][exidx])} matchings = matchings.append(d, ignore_index=True) matchings.to_csv("./output/matchings.csv")
def py_max_match(scores): #scores表示(row,col)的带权二分图:row和col的连接强度 m = Munkres() tmp = m.compute(-scores) #之所以这里要对scores取反,是因为compute计算的是最低花费;我们需要的是最大匹配,所以要取反 tmp = np.array(tmp).astype(np.int32) return tmp
def cluster_distance(A, B, nbhood): #key is name of GC, values is list of specificity names clusterA = BGCs[A] # dictionary where keys are specificities, and values # are lists of specificity labels that map to a specific sequence in # the DMS variable clusterB = BGCs[B] specificities_A = set(clusterA.keys()) specificities_B = set(clusterB.keys()) intersect = specificities_A.intersection(specificities_B) # JACCARD INDEX Jaccard = len(intersect) / float( len(specificities_A.union(specificities_B))) #DDS: The difference in specificities' sequences between cluster #S: Max occurence of each specificity DDS, S = 0, 0 SumDistance = 0 pair = "" # elements in either set but not in both: union - intersect not_intersect = specificities_A ^ specificities_B # sum as many copies of the unshared specificity there are, for each for unshared_specificity in not_intersect: dom_set = [] try: dom_set = clusterA[unshared_specificity] except KeyError: dom_set = clusterB[unshared_specificity] DDS += len(dom_set) S += len(dom_set) # compare sequence identity of shared specificities for shared_specificity in intersect: seta = clusterA[shared_specificity] setb = clusterB[shared_specificity] if len(seta + setb) == 2: #The specificity occurs only once in both clusters pair = tuple(sorted([seta[0], setb[0]])) try: SumDistance = 1 - DMS[shared_specificity][pair][0] # print 'SumDistance1', SumDistance except KeyError: print "KeyError on", pair print(shared_specificity) sys.exit() S += max(len(seta), len(setb)) DDS += SumDistance else: #The specificity occurs more than once in both clusters # accumulated_distance = 0 DistanceMatrix = [[1 for col in range(len(setb))] for row in range(len(seta))] # print DistanceMatrix for domsa in range(len(seta)): for domsb in range(len(setb)): pair = tuple(sorted([seta[domsa], setb[domsb]])) try: Similarity = DMS[shared_specificity][pair][0] # print Similarity except KeyError: print "KeyError on (case 2)", pair print(shared_specificity) seq_dist = 1 - Similarity DistanceMatrix[domsa][domsb] = seq_dist #Only use the best scoring pairs Hungarian = Munkres() BestIndexes = Hungarian.compute(DistanceMatrix) accumulated_distance = sum( [DistanceMatrix[bi[0]][bi[1]] for bi in BestIndexes]) SumDistance = (abs(len(seta) - len(setb)) + accumulated_distance ) #diff in abundance + sequence distance S += max(len(seta), len(setb)) DDS += SumDistance DDS /= float(S) DDS = 1 - DDS #transform into similarity # calculate the Goodman-Kruskal gamma index A_pseudo_seq = list(cluster_seq[A]) B_pseudo_seq = list(cluster_seq[B]) Ar = [item for item in A_pseudo_seq] Ar.reverse() GK = max([ calculate_GK(A_pseudo_seq, B_pseudo_seq, nbhood=nbhood), calculate_GK(Ar, B_pseudo_seq, nbhood=nbhood) ]) Distance = 1 - (Jaccardw * Jaccard) - (DDSw * DDS) - (GKw * GK) Similarity_score = (Jaccardw * Jaccard) + (DDSw * DDS) + (GKw * GK) # Similarity_score = 1 - DDS if Distance < 0: print "negative distance", Distance, "DDS", DDS, pair print "Probably a rounding issue" print "Distance is set to 0 for these clusters" Distance = 0 print A, B, Jaccard, GK, DDS return Similarity_score
# ─── DEBUGGING ────────────────────────────────────────────────────────────────── self.DEBUG = NiseConfig._DEBUG() # ─── ALGORITHM ────────────────────────────────────────────────────────────────── self.ALG = NiseConfig._ALG() self.PATH = NiseConfig._PATH() self.TEST = NiseConfig._TEST() self.MODEL = NiseConfig._MODEL() cfg = NiseConfig() nise_cfg = get_edcfg_from_nisecfg(cfg) nise_args = get_nise_arg_parser() update_nise_config(nise_cfg, nise_args) suffix, suffix_with_range = set_path_from_nise_cfg(nise_cfg) print('SUFFIX', suffix_with_range) nise_logger = get_logger() update_nise_logger(nise_logger, nise_cfg, suffix) mkrs = Munkres() nise_cfg_pack = { 'cfg': nise_cfg, 'logger': nise_logger }
def py_max_match(scores): # Backup if you have trouble getting munkres-tensorflow compiled (much slower) from munkres import Munkres m = Munkres() tmp = m.compute(-scores) return np.array(tmp).astype(np.int32)
def getHungary(vehicles, tracks): hungarianMatrix = getMatrix(vehicles, tracks) mat = np.copy(hungarianMatrix) m = Munkres() indexes = m.compute(hungarianMatrix) return mat, indexes
def evaluateFrame(self, X, Y, X_labels=None, Y_labels=None): """ Compute the OSPA metric between two sets of points. """ # check for empty sets if numpy.size(X) == 0 and numpy.size(Y) == 0: return (0, 0, 0, 0) elif numpy.size(X) == 0 or numpy.size(Y) == 0: return (self.c, 0, self.c, 0) # we assume that Y is the larger set m = numpy.size(X, 0) n = numpy.size(Y, 0) switched = False if m > n: X, Y = Y, X m, n = n, m switched = True dists = self.calculateCostMatrix(X, Y) # Copy cost matrix for munkres module munkres_matrix = numpy.copy(dists) # Only run munkres on non-empty matrix if len(munkres_matrix) > 0: munkres = Munkres() indices = munkres.compute(munkres_matrix) else: indices = [] # compute the OSPA metric total = 0 total_loc = 0 for [i, j] in indices: total_loc += dists[i][j]**self.p # calculate cardinalization error err_cn = (float((self.c)**(self.p) * (n - m)) / n)**(1 / float(self.p)) # calculate localization error err_loc = (float(total_loc) / n)**(1 / float(self.p)) # Not contained in version from github implemented # acc. to paper "A Metric for Performance Evaluation of Multi-Target Tracking Algorithms" # Implementation of labeling error # Penalizes ID switches from frame to frame new_assignments = [] for [i, j] in indices: if (switched): i, j = j, i new_assignments.append((X_labels[i], Y_labels[j])) wrong_labels = len(self.old_assignments) - len( set(new_assignments) & set(self.old_assignments)) rospy.loginfo(wrong_labels) err_label = (float((float(self.a)**float(self.p)) / n) * wrong_labels)**(1 / float(self.p)) # store current assignments of labels to track self.old_assignments = new_assignments #ospa_err = ( float(total_loc + (n-m)*self.c**self.p) / n)**(1/float(self.p)) ospa_err = (float(total_loc + self.a * wrong_labels + (n - m) * self.c**self.p) / n)**(1 / float(self.p)) ospa_tuple = (ospa_err, err_loc, err_cn, err_label) rospy.loginfo(ospa_tuple) return ospa_tuple
def kmeans_acc_ari_ami(X, L): """ Calculate clustering accuracy. Require scikit-learn installed # Arguments y: true labels, numpy.array with shape `(n_samples,)` y_pred: predicted labels, numpy.array with shape `(n_samples,)` # Return accuracy, in [0,1] """ n_clusters = len(np.unique(L)) kmeans = KMeans(n_clusters=n_clusters, n_init=20) y_pred = kmeans.fit_predict(X) y_pred = y_pred.astype(np.int64) y_true = L.astype(np.int64) assert y_pred.size == y_true.size y_pred = y_pred.reshape((1, -1)) y_true = y_true.reshape((1, -1)) # D = max(y_pred.max(), L.max()) + 1 # w = np.zeros((D, D), dtype=np.int64) # for i in range(y_pred.size): # w[y_pred[i], L[i]] += 1 # # from sklearn.utils.linear_assignment_ import linear_assignment # from scipy.optimize import linear_sum_assignment # row_ind, col_ind = linear_sum_assignment(w.max() - w) # # return sum([w[i, j] for i in row_ind for j in col_ind]) * 1.0 / y_pred.size if len(np.unique(y_pred)) == len(np.unique(y_true)): C = len(np.unique(y_true)) cost_m = np.zeros((C, C), dtype=float) for i in np.arange(0, C): a = np.where(y_pred == i) # print(a.shape) a = a[1] l = len(a) for j in np.arange(0, C): yj = np.ones((1, l)).reshape(1, l) yj = j * yj cost_m[i, j] = np.count_nonzero(yj - y_true[0, a]) mk = Munkres() best_map = mk.compute(cost_m) (_, h) = y_pred.shape for i in np.arange(0, h): c = y_pred[0, i] v = best_map[c] v = v[1] y_pred[0, i] = v acc = 1 - (np.count_nonzero(y_pred - y_true) / h) else: acc = 0 # print(y_pred.shape) y_pred = y_pred[0] y_true = y_true[0] ari, ami = adjusted_rand_score(y_true, y_pred), adjusted_mutual_info_score( y_true, y_pred) return acc, ari, ami
def __init__(self): self.training_set = [] self.testing_set = [] self.m = Munkres()
def execute(environment): # print "-------------------------------------------------" # print "Planning for objects..." Manager._create_object_plan(environment) allocation_list = [] space = environment['workspace'] object_list = environment['object_list'] robots = environment['robot_dict'] total_object = 0 total_robot = 0 allocator = Munkres() # While not all objects are transported while True: # Create the list with objects that there are not yet # fully transported work_list = [ obj for obj in object_list if len(obj['plan']['segments']) != 0 ] # With all objects were transported, finish if len(work_list) == 0: break # print "-------------------------------------------------" # print "Planning for robots..." # Create the allocation matrix # Square, for the Hungarian Method shape = (max(len(work_list), len(robots)), ) * 2 cost_matrix = np.full(shape, ManagerHungarianMulti.inf, dtype=np.int) robot_id_list = robots.keys() robot_plan = {} # For each robot available for robot_index, robot_id in enumerate(robot_id_list): # print "# robot", robot_index robot = robots[robot_id] robot_plan[robot_id] = {} # For each object to be transported for obj_index, obj in enumerate(work_list): # print "### object", obj['idx'] robot_plan[robot_id][obj['idx']] = {} # Get first segment segment = obj['plan']['segments'][0] if segment.graph['type'] is not robot.robot_type: continue # If there is another object in the segment, # its no possible to be executed in_place_objects = [ item for item in object_list if (item['idx'] is not obj['idx']) and ( hash(item['current_pos']) in segment) ] if len(in_place_objects) != 0: continue segment_start = segment.node[segment.graph['start_id']] segment_end = segment.node[segment.graph['end_id']] # Prepare Movement space.temp_obstacle_list = [ item['current_pos'] for item in object_list ] robot_start = robot.copy() robot_end = robot.copy() if robot.robot_type == Robot.TYPES.aerial: robot_end.position = Planner.create_robot_position( segment_start, -1).position else: robot_end.position = Planner.create_robot_position( segment_start, 2).position try: prepare_plan = Manager._execute_planning({ 'start_position': robot_start, 'end_position': robot_end, 'workspace': space, 'robot_type': robot.robot_type }) except Exception, e: print "Object: ", obj['idx'] + 1 print "Impossible prepare plan" continue space.temp_obstacle_list = [] robot_plan[robot_id][ obj['idx']]['prepare_plan'] = prepare_plan # Move Movement space.temp_obstacle_list = [ item['current_pos'] for item in object_list if item['idx'] is not obj['idx'] ] robot_start.position = robot_end.position robot_end = robot.copy() robot_end.position = Planner.create_robot_position( segment_end).position try: moviment_plan = Manager._execute_planning({ 'start_position': robot_start, 'end_position': robot_end, 'workspace': space, 'robot_type': robot.robot_type }) except Exception, e: print "Object: ", obj['idx'] + 1 print "Impossible movement plan" continue space.temp_obstacle_list = [] robot_plan[robot_id][ obj['idx']]['moviment_plan'] = moviment_plan # Total cost to movement this object # by the current robot total_cost = obj['plan']['total'] - len(segment) + len( prepare_plan) # Populate the cost matrix cost_matrix[robot_index][obj_index] = total_cost
print(inames) print("CNAMES:") print(cnames) print("########################") print("MUNKRES OPTIMIZATION") print("########################") targetlist = list(inames) baselist = list(cnames) + list(inames) cost_matrix = gen_cost_matrix(targetlist, baselist, instance, concept) print(len(cost_matrix), len(cost_matrix[0])) for row in cost_matrix: print("\t".join(["%0.2f" % v for v in row])) mun = Munkres() s = mun.compute(cost_matrix) print(s) #s = linear_sum_assignment(cost_matrix) #mun_sol = {targetlist[ti]: baselist[s[1][i]] for i, ti in enumerate(s[0])} mun_sol = { targetlist[row]: baselist[len(cnames) + row] if col > len(cnames) else baselist[col] for row, col in s } print("Munkres solution:") pprint(mun_sol) print("Munkres cost:") print(mapping_cost(frozenset(mun_sol.items()), instance, concept))
def run(costs): m = Munkres() idx = np.array(m.compute(costs), dtype=int) return costs[idx[:, 0], idx[:, 1]].sum()
def py_max_match(scores): m = Munkres() tmp = m.compute(-scores) tmp = np.array(tmp).astype(np.int32) return tmp
def allocate(self): self.populate_adj_scores(self.adjudicators) # Sort voting adjudicators in descending order by score voting = [ a for a in self.adjudicators if a._hungarian_score >= self.min_voting_score and not a.trainee ] random.shuffle(voting) voting.sort(key=lambda a: a._hungarian_score, reverse=True) n_debates = len(self.debates) if self.no_panellists: voting = voting[:n_debates] n_voting = len(voting) if self.no_trainees: trainees = [] else: trainees = [a for a in self.adjudicators if a not in voting] trainees.sort(key=lambda a: a._hungarian_score, reverse=True) # Divide debates into solo-chaired debates and panel debates debates_sorted = sorted(self.debates, key=lambda d: (-d.importance, d.room_rank)) # Figure out how many judges per room, prioritising the most important judges_per_room_floor = n_voting // n_debates n_bigger_panels = n_voting % n_debates judges_per_room = [judges_per_room_floor + 1] * n_bigger_panels + [ judges_per_room_floor ] * (n_debates - n_bigger_panels) logger.info( "There are %d debates, %d voting adjudicators and %d trainees", len(debates_sorted), len(voting), len(trainees)) if n_voting < n_debates: logger.warning( "There are %d debates but only %d voting adjudicators", n_debates, n_voting) # Allocate voting m = Munkres() logger.info("costing voting adjudicators") cost_matrix = [] for debate, njudges in zip(debates_sorted, judges_per_room): for i in range(njudges): row = [ self.calc_cost(debate, adj, adjustment=-i) for adj in voting ] cost_matrix.append(row) logger.info( "optimizing voting adjudicators (matrix size: %d positions by %d adjudicators)", len(cost_matrix), len(cost_matrix[0])) indexes = m.compute(cost_matrix) indexes.sort() total_cost = sum(cost_matrix[i][j] for i, j in indexes) logger.info('total cost for %d debates: %f', n_debates, total_cost) # transfer the indices to the debates alloc = [] for debate, njudges in zip(debates_sorted, judges_per_room): aa = AdjudicatorAllocation(debate) panel_indices = indexes[0:njudges] panel = [voting[c] for r, c in panel_indices] panel.sort(key=lambda a: a._hungarian_score, reverse=True) try: aa.chair = panel.pop(0) except IndexError: aa.chair = None aa.panellists = panel alloc.append(aa) del indexes[0:njudges] logger.info("allocating to %s: %s (c), %s", aa.debate, aa.chair, ", ".join([str(p) for p in aa.panellists])) # Allocate trainees, one per debate if len(trainees) > 0 and len(debates_sorted) > 0: allocation_by_debate = {aa.debate: aa for aa in alloc} logger.info("costing trainees") cost_matrix = [] for debate in debates_sorted: chair = allocation_by_debate[debate].chair row = [ self.calc_cost(debate, adj, adjustment=-2.0, chair=chair) for adj in trainees ] cost_matrix.append(row) logger.info( "optimizing trainees (matrix size: %d positions by %d trainees)", len(cost_matrix), len(cost_matrix[0])) indexes = m.compute(cost_matrix) total_cost = sum(cost_matrix[i][j] for i, j in indexes) logger.info('total cost for %d trainees: %f', len(trainees), total_cost) result = ((debates_sorted[i], trainees[j]) for i, j in indexes if i < len(debates_sorted)) for debate, trainee in result: allocation_by_debate[debate].trainees.append(trainee) logger.info("allocating to %s: %s (t)", debate, trainee) return alloc
def allocate(self): from debate.models import AdjudicatorAllocation # remove trainees self.adjudicators = filter(lambda a: a.score > self.MIN_SCORE, self.adjudicators) # sort adjudicators and debates in descending score/importance self.adjudicators_sorted = list(self.adjudicators) self.adjudicators_sorted.sort(key=lambda a: a.score, reverse=True) self.debates_sorted = list(self.debates) self.debates_sorted.sort(key=lambda a: a.importance, reverse=True) n_adjudicators = len(self.adjudicators) n_debates = len(self.debates) n_solos = n_debates - (n_adjudicators - n_debates) / 2 # get adjudicators that can adjudicate solo chairs = self.adjudicators_sorted[:n_solos] #chairs = [a for a in self.adjudicators_sorted if a.score > # self.CHAIR_CUTOFF] # get debates that will be judged by solo adjudicators chair_debates = self.debates_sorted[:len(chairs)] panel_debates = self.debates_sorted[len(chairs):] panellists = [a for a in self.adjudicators_sorted if a not in chairs] assert len(panel_debates) * 3 <= len(panellists) print "costing chairs" n = len(chairs) cost_matrix = [[0] * n for i in range(n)] for i, debate in enumerate(chair_debates): for j, adj in enumerate(chairs): cost_matrix[i][j] = self.calc_cost(debate, adj) print "optimizing" m = Munkres() indexes = m.compute(cost_matrix) total_cost = 0 for r, c in indexes: total_cost += cost_matrix[r][c] print 'total cost for solos', total_cost print 'number of solo debates', n result = ((chair_debates[i], chairs[j]) for i, j in indexes if i < len(chair_debates)) alloc = [AdjudicatorAllocation(d, c) for d, c in result] print[(a.debate, a.chair) for a in alloc] # do panels n = len(panel_debates) npan = len(panellists) if npan: print "costing panellists" # matrix is square, dummy debates have cost 0 cost_matrix = [[0] * npan for i in range(npan)] for i, debate in enumerate(panel_debates): for j in range(3): # for the top half of these debates, the final panellist # can be of lower quality than the other 2 if i < npan / 2 and j == 2: adjustment = -1.0 else: adjustment = 0 for k, adj in enumerate(panellists): cost_matrix[3 * i + j][k] = self.calc_cost( debate, adj, adjustment) print "optimizing" indexes = m.compute(cost_matrix) cost = 0 for r, c in indexes: cost += cost_matrix[r][c] print 'total cost for panellists', cost # transfer the indices to the debates # the debate corresponding to row r is floor(r/3) (i.e. r // 3) p = [[] for i in range(n)] for r, c in indexes[:n * 3]: p[r // 3].append(panellists[c]) # create the corresponding adjudicator allocations, making sure # that the chair is the highest-ranked adjudicator in the panel for i, d in enumerate(panel_debates): a = AdjudicatorAllocation(d) p[i].sort(key=lambda a: a.score, reverse=True) a.chair = p[i].pop(0) a.panel = p[i] alloc.append(a) print[(a.debate, a.chair, a.panel) for a in alloc[len(chairs):]] return alloc
def matching_eigenvectors_of_modes(number_of_modes, eigenvectors_1, eigenvectors_2): r""" Matches the eigenvectors of two crystal structures and returns new order of the modes and the weight to match each mode. Parameters ---------- number_of_modes: int Number of vibrational modes in the crystal lattice eigenvectors_1: List[float] Eigenvectors of the reference crystal structure in the form of a 3N x 3N matrix, where N is the number of atoms in the crystal lattice. eigenvectors_2: List[float] Eigenvectors of the crystal structure that needs the modes reorganized in the form of a 3N x 3N matrix, where N is the number of atoms in the crystal lattice. Returns ------- z: List[int] List to match the order of the eigenvectors_2 with eigenvectors_1. weight: List[float] Weight describing how well the reordered eigenvectors matched. A weight of 0 indicates a perfect match. """ # Using the Munkres matching package m = Munkres() # Setting a matrix to save the weights to match all of the vibrational modes weight = np.zeros((number_of_modes - 3, number_of_modes - 3)) # Cycling through all modes to apply a weight of how well they match for i in range(3, number_of_modes): # Using 1 - Cos(Theta) between the two eigenvectors as the weight diff = np.dot(eigenvectors_1[i], eigenvectors_2[i]) \ / (np.linalg.norm(eigenvectors_1[i]) * np.linalg.norm(eigenvectors_2[i])) if np.absolute(diff) > 0.95: # If Cos(Theta) is close to 1 for director order comparison of the eigenvectors than they are well matched and # all other comparisons are set with a much higher weight weight[i - 3] = 10000000. weight[i - 3, i - 3] = 1. - diff else: # Otherwise the weight compared to all other eigenvectors are computed for j in range(3, number_of_modes): # Here we will check to see if the match is better if the direction of the eigenvalue is flipped # (Eigenvectors are representative of vibrations and therefore the opposite director is also valid) hold_weight = np.zeros(4) hold_weight[0] = 1 - np.dot(-1 * eigenvectors_1[i], eigenvectors_2[j]) \ / (np.linalg.norm(-1 * eigenvectors_1[i]) * np.linalg.norm(eigenvectors_2[j])) hold_weight[1] = 1 - np.dot(eigenvectors_1[i], -1 * eigenvectors_2[j]) \ / (np.linalg.norm(eigenvectors_1[i]) * np.linalg.norm(-1 * eigenvectors_2[j])) hold_weight[2] = 1 - np.dot(eigenvectors_1[i], eigenvectors_2[j]) \ / (np.linalg.norm(eigenvectors_1[i]) * np.linalg.norm(eigenvectors_2[j])) hold_weight[3] = 1 - np.dot(-1 * eigenvectors_1[i], -1 * eigenvectors_2[j]) \ / (np.linalg.norm(-1 * eigenvectors_1[i])*np.linalg.norm(-1 * eigenvectors_2[j])) # The weight matching the two eigenvectors is the minimum value computed weight[i - 3, j - 3] = min(hold_weight) # Using the Hungarian algorithm to match wavenumbers Wgt = m.compute(weight) x, y = zip(*Wgt) z = np.column_stack((x, y)) z = z + 3 return z, weight[z[:, 0] - 3, z[:, 1] - 3]
def perform_alignment(ref_instances, sys_instances, kernel, maximize=True): disallowed = {} max_sim = 0 sim_matrix, component_matrix = [], [] start = time.time_ns() report_matrix_stats = False ### Boolean to report the stats report_matrix_start = False ### Writes a line before the matrix processing begins report_matrix_print_threshold = 10000 ### Controls the size of the matrix to trigger output if report_matrix_stats and report_matrix_start and len( ref_instances) * len(sys_instances) > report_matrix_print_threshold: print("[Info] StartBigAlignment: {} x {} = {}".format( len(ref_instances), len(sys_instances), len(ref_instances) * len(sys_instances))) for s_i, s in enumerate(sys_instances): sim_row = [] comp_row = [] for r_i, r in enumerate(ref_instances): sim, comp = kernel(r, s) sim_row.append(sim) comp_row.append(comp) if sim == DISALLOWED: disallowed[(s_i, r_i)] = True else: if sim > max_sim: max_sim = sim sim_matrix.append(sim_row) component_matrix.append(comp_row) if maximize: def _mapper(sim): return max_sim + 1 if sim == DISALLOWED else (max_sim + 1) - sim else: def _mapper(sim): return max_sim + 1 if sim == DISALLOWED else sim matrix = make_cost_matrix(sim_matrix, _mapper) correct_detects, false_alarms, missed_detects = [], [], [] unmapped_sys = set(range(0, len(sys_instances))) unmapped_ref = set(range(0, len(ref_instances))) if len(matrix) > 0: for s_i, r_i in Munkres().compute(matrix): if disallowed.get((s_i, r_i), False): continue unmapped_sys.remove(s_i) unmapped_ref.remove(r_i) try: correct_detects.append( AlignmentRecord(ref_instances[r_i], sys_instances[s_i], sim_matrix[s_i][r_i], component_matrix[s_i][r_i], ref_instances[r_i].localization, sys_instances[s_i].localization, list(sys_instances[s_i].localization)[0])) except: correct_detects.append( AlignmentRecord(ref_instances[r_i], sys_instances[s_i], sim_matrix[s_i][r_i], component_matrix[s_i][r_i], None, None, None)) for r_i in unmapped_ref: try: missed_detects.append( AlignmentRecord(ref_instances[r_i], None, None, None, ref_instances[r_i].localization, None, list(ref_instances[r_i].localization)[0])) except: missed_detects.append( AlignmentRecord(ref_instances[r_i], None, None, None, None, None, None)) for s_i in unmapped_sys: try: false_alarms.append( AlignmentRecord(None, sys_instances[s_i], None, None, None, sys_instances[s_i].localization, list(sys_instances[s_i].localization)[0])) except: false_alarms.append( AlignmentRecord(None, sys_instances[s_i], None, None, None, None, None)) if report_matrix_stats and len(ref_instances) * len( sys_instances) > report_matrix_print_threshold: print("[Info] BigAlignmentMatixTime: {} x {} = {}, {} sec.".format( len(ref_instances), len(sys_instances), len(ref_instances) * len(sys_instances), (time.time_ns() - start) / 1000000000)) return (correct_detects, missed_detects, false_alarms)
def get_order(params, tile_struct, tilesegmentlists, exposurelist, observatory, config_struct): ''' tile_struct: dictionary. key -> struct info. tilesegmentlists: list of lists. Segments for each tile in tile_struct that are available for observation. exposurelist: list of segments that the telescope is supposed to be working. consecutive segments from the start to the end, with each segment size being the exposure time. Returns a list of tile indices in the order of observation. ''' keys = tile_struct.keys() exposureids_tiles = {} first_exposure = np.inf * np.ones((len(keys), )) last_exposure = -np.inf * np.ones((len(keys), )) tileprobs = np.zeros((len(keys), )) tilenexps = np.zeros((len(keys), )) tileexptime = np.zeros((len(keys), )) tileexpdur = np.zeros((len(keys), )) tilefilts = {} tileavailable = np.zeros((len(keys), )) tileavailable_tiles = {} keynames = [] nexps = 0 for jj, key in enumerate(keys): tileprobs[jj] = tile_struct[key]["prob"] tilenexps[jj] = tile_struct[key]["nexposures"] try: tileexpdur[jj] = tile_struct[key]["exposureTime"] except: try: tileexpdur[jj] = tile_struct[key]["exposureTime"][0] except: tileexpdur[jj] = 0.0 tilefilts[key] = copy.deepcopy(tile_struct[key]["filt"]) tileavailable_tiles[jj] = [] keynames.append(key) nexps = nexps + tile_struct[key]["nexposures"] if "dec_constraint" in config_struct: dec_constraint = config_struct["dec_constraint"].split(",") dec_min = float(dec_constraint[0]) dec_max = float(dec_constraint[1]) for ii in range(len(exposurelist)): exposureids_tiles[ii] = {} exposureids = [] probs = [] ras, decs = [], [] for jj, key in enumerate(keys): tilesegmentlist = tilesegmentlists[jj] if tile_struct[key]["prob"] == 0: continue if "dec_constraint" in config_struct: if (tile_struct[key]["dec"] < dec_min) or (tile_struct[key]["dec"] > dec_max): continue if "epochs" in tile_struct[key]: if params["doMindifFilt"]: #take into account filter for mindiff idx = np.where( np.asarray(tile_struct[key]["epochs_filters"]) == params["filters"][0])[0] if np.any( np.abs(exposurelist[ii][0] - tile_struct[key]["epochs"][idx, 2]) < params["mindiff"] / 86400.0): continue elif np.any( np.abs(exposurelist[ii][0] - tile_struct[key]["epochs"][:, 2]) < params["mindiff"] / 86400.0): continue if tilesegmentlist.intersects_segment(exposurelist[ii]): exposureids.append(key) probs.append(tile_struct[key]["prob"]) ras.append(tile_struct[key]["ra"]) decs.append(tile_struct[key]["dec"]) first_exposure[jj] = np.min([first_exposure[jj], ii]) last_exposure[jj] = np.max([last_exposure[jj], ii]) tileavailable_tiles[jj].append(ii) tileavailable[jj] = tileavailable[jj] + 1 # in every exposure, the tiles available for observation exposureids_tiles[ii]["exposureids"] = exposureids # list of tile ids exposureids_tiles[ii]["probs"] = probs # the corresponding probs exposureids_tiles[ii]["ras"] = ras exposureids_tiles[ii]["decs"] = decs exposureids = [] probs = [] ras, decs = [], [] for ii, key in enumerate(keys): # tile_struct[key]["nexposures"]: the number of exposures assigned to this tile for jj in range(tile_struct[key]["nexposures"]): exposureids.append( key ) # list of tile ids for every exposure it is allocated to observe probs.append(tile_struct[key]["prob"]) ras.append(tile_struct[key]["ra"]) decs.append(tile_struct[key]["dec"]) idxs = -1 * np.ones((len(exposureids_tiles.keys()), )) filts = ['n'] * len(exposureids_tiles.keys()) if nexps == 0: return idxs, filts if params["scheduleType"] == "airmass_weighted": # # first step is to sort the array in order of descending probability indsort = np.argsort(-np.array(probs)) probs = np.array(probs)[indsort] ras = np.array(ras)[indsort] decs = np.array(decs)[indsort] exposureids = np.array(exposureids)[indsort] tilematrix = np.zeros((len(exposurelist), len(ras))) probmatrix = np.zeros((len(exposurelist), len(ras))) for ii in np.arange(len(exposurelist)): # first, create an array of airmass-weighted probabilities t = Time(exposurelist[ii][0], format='mjd') altaz = get_altaz_tiles(ras, decs, observatory, t) alts = altaz.alt.degree horizon = config_struct["horizon"] horizon_mask = alts <= horizon airmass = 1 / np.cos((90. - alts) * np.pi / 180.) below_horizon_mask = horizon_mask * 10.**100 airmass = airmass + below_horizon_mask airmass_weight = 10**(0.4 * 0.1 * (airmass - 1)) tilematrix[ii, :] = np.array(probs / airmass_weight) probmatrix[ii, :] = np.array(probs * (True ^ horizon_mask)) dt = (exposurelist[1][0] - exposurelist[0][0]) * 86400 if params["scheduleType"] == "greedy": for ii in np.arange(len(exposurelist)): if idxs[ii] > 0: continue exptimecheck = np.where( exposurelist[ii][0] - tileexptime < params["mindiff"] / 86400.0)[0] exptimecheckkeys = [keynames[x] for x in exptimecheck] # find_tile finds the tile that covers the largest probablity # restricted by availability of tile and timeallocation idx2, exposureids, probs = find_tile( exposureids_tiles[ii], exposureids, probs, exptimecheckkeys=exptimecheckkeys) if idx2 in keynames: idx = keynames.index(idx2) tilenexps[idx] = tilenexps[idx] - 1 tileexptime[idx] = exposurelist[ii][0] num = int(np.ceil(tileexpdur[idx] / dt)) tilenexps[idx] = tilenexps[idx] - 1 tileexptime[idx] = exposurelist[ii][0] if len(tilefilts[idx2]) > 0: filt = tilefilts[idx2].pop(0) for jj in range(num): try: filts[ii + jj] = filt except: pass for jj in range(num): try: idxs[ii + jj] = idx2 except: pass else: idxs[ii] = idx2 if not exposureids: break elif params["scheduleType"] == "greedy_slew": current_ra, current_dec = np.nan, np.nan for ii in np.arange(len(exposurelist)): exptimecheck = np.where( exposurelist[ii][0] - tileexptime < params["mindiff"] / 86400.0)[0] exptimecheckkeys = [keynames[x] for x in exptimecheck] # find_tile finds the tile that covers the largest probablity # restricted by availability of tile and timeallocation idx2, exposureids, probs = find_tile( exposureids_tiles[ii], exposureids, probs, exptimecheckkeys=exptimecheckkeys, current_ra=current_ra, current_dec=current_dec, slew_rate=config_struct['slew_rate'], readout=config_struct['readout']) if idx2 in keynames: idx = keynames.index(idx2) tilenexps[idx] = tilenexps[idx] - 1 tileexptime[idx] = exposurelist[ii][0] num = int(np.ceil(tileexpdur[idx] / dt)) tilenexps[idx] = tilenexps[idx] - 1 tileexptime[idx] = exposurelist[ii][0] if len(tilefilts[idx2]) > 0: filt = tilefilts[idx2].pop(0) for jj in range(num): try: filts[ii + jj] = filt except: pass for jj in range(num): try: idxs[ii + jj] = idx2 except: pass current_ra = tile_struct[idx2]["ra"] current_dec = tile_struct[idx2]["dec"] else: idxs[ii] = idx2 if not exposureids: break elif params["scheduleType"] == "sear": #for ii in np.arange(len(exposurelist)): iis = np.arange(len(exposurelist)).tolist() while len(iis) > 0: ii = iis[0] mask = np.where((ii == last_exposure) & (tilenexps > 0))[0] exptimecheck = np.where( exposurelist[ii][0] - tileexptime < params["mindiff"] / 86400.0)[0] exptimecheckkeys = [keynames[x] for x in exptimecheck] if len(mask) > 0: idxsort = mask[np.argsort(tileprobs[mask])] idx2, exposureids, probs = find_tile( exposureids_tiles[ii], exposureids, probs, idxs=idxsort, exptimecheckkeys=exptimecheckkeys) last_exposure[mask] = last_exposure[mask] + 1 else: idx2, exposureids, probs = find_tile( exposureids_tiles[ii], exposureids, probs, exptimecheckkeys=exptimecheckkeys) if idx2 in keynames: idx = keynames.index(idx2) tilenexps[idx] = tilenexps[idx] - 1 tileexptime[idx] = exposurelist[ii][0] if len(tilefilts[idx2]) > 0: filt = tilefilts[idx2].pop(0) filts[ii] = filt idxs[ii] = idx2 iis.pop(0) if not exposureids: break elif params["scheduleType"] == "weighted": for ii in np.arange(len(exposurelist)): jj = exposureids_tiles[ii]["exposureids"] weights = tileprobs[jj] * tilenexps[jj] / tileavailable[jj] weights[~np.isfinite(weights)] = 0.0 exptimecheck = np.where( exposurelist[ii][0] - tileexptime < params["mindiff"] / 86400.0)[0] weights[exptimecheck] = 0.0 if np.any(weights >= 0): idxmax = np.argmax(weights) idx2 = jj[idxmax] if idx2 in keynames: idx = keynames.index(idx2) tilenexps[idx] = tilenexps[idx] - 1 tileexptime[idx] = exposurelist[ii][0] if len(tilefilts[idx2]) > 0: filt = tilefilts[idx2].pop(0) filts[ii] = filt idxs[ii] = idx2 tileavailable[jj] = tileavailable[jj] - 1 elif params["scheduleType"] == "airmass_weighted": # then use the Hungarian algorithm (munkres) to schedule high prob tiles at low airmass tilematrix_mask = tilematrix > 10**(-10) if tilematrix_mask.any(): print("Calculating Hungarian solution...") total_cost = 0 cost_matrix = make_cost_matrix(tilematrix) m = Munkres() optimal_points = m.compute(cost_matrix) print("Hungarian solution calculated...") max_no_observ = min(tilematrix.shape) for jj in range(max_no_observ): idx0, idx1 = optimal_points[jj] # idx0 indexes over the time windows, idx1 indexes over the probabilities # idx2 gets the exposure id of the tile, used to assign tileexptime and tilenexps try: idx2 = exposureids[idx1] pamw = tilematrix[idx0][idx1] total_cost += pamw if len(tilefilts[idx2]) > 0: filt = tilefilts[idx2].pop(0) filts[idx0] = filt except: continue idxs[idx0] = idx2 else: print("The localization is not visible from the site.") else: raise ValueError( "Scheduling options are greedy/sear/weighted/airmass_weighted, or with _slew." ) return idxs, filts
def synaptic_partners_fscore(rec_annotations, gt_annotations, gt_segmentation, matching_threshold=400, all_stats=False): """Compute the f-score of the found synaptic partners. Parameters ---------- rec_annotations: Annotations, containing found synaptic partners gt_annotations: Annotations, containing ground truth synaptic partners gt_segmentation: Volume, ground truth neuron segmentation matching_threshold: float, world units Euclidean distance threshold to consider two annotations a potential match. Annotations that are `matching_threshold` or more untis apart from each other are not considered as potential matches. all_stats: boolean, optional Whether to also return precision, recall, FP, FN, and matches as a 6-tuple with f-score Returns ------- fscore: float The f-score of the found synaptic partners. precision: float, optional recall: float, optional fp: int, optional fn: int, optional filtered_matches: list of tuples, optional The indices of the matches with matching costs. """ # get cost matrix costs = cost_matrix(rec_annotations, gt_annotations, gt_segmentation, matching_threshold) # match using Hungarian method print "Finding cost-minimal matches..." munkres = Munkres() matches = munkres.compute(costs.copy( )) # have to copy, because munkres changes the cost matrix... filtered_matches = [(i, j, costs[i][j]) for (i, j) in matches if costs[i][j] <= matching_threshold] print str(len(filtered_matches)) + " matches found" # unmatched in rec = FP fp = len(rec_annotations.pre_post_partners) - len(filtered_matches) # unmatched in gt = FN fn = len(gt_annotations.pre_post_partners) - len(filtered_matches) # all ground truth elements - FN = TP tp = len(gt_annotations.pre_post_partners) - fn precision = float(tp) / (tp + fp) recall = float(tp) / (tp + fn) fscore = 2.0 * precision * recall / (precision + recall) if all_stats: return (fscore, precision, recall, fp, fn, filtered_matches) else: return fscore
def __init__(self, lr=1e-3, batchs=8, cuda=True): ''' :param tt: train_test :param tag: 1 - evaluation on testing data, 0 - without evaluation on testing data :param lr: :param batchs: :param cuda: ''' # all the tensor should set the 'volatile' as True, and False when update the network self.hungarian = Munkres() self.device = torch.device("cuda" if cuda else "cpu") self.nEpochs = 999 self.lr = lr self.batchsize = batchs self.numWorker = 4 self.show_process = 0 # interaction self.step_input = 1 print ' Preparing the model...' self.resetU() self.Uphi = uphi().to(self.device) self.Ephi = ephi().to(self.device) self.criterion = nn.MSELoss() if criterion_s else nn.CrossEntropyLoss() self.criterion = self.criterion.to(self.device) self.criterion_v = nn.MSELoss().to(self.device) self.optimizer = optim.Adam([{ 'params': self.Uphi.parameters() }, { 'params': self.Ephi.parameters() }], lr=lr) # seqs = [2, 4, 5, 9, 10, 11, 13] # lengths = [600, 1050, 837, 525, 654, 900, 750] seqs = [2, 4, 5, 10] lengths = [600, 1050, 837, 654] for i in xrange(len(seqs)): self.writer = SummaryWriter() # print ' Loading Data...' seq = seqs[i] self.seq_index = seq start = time.time() sequence_dir = '../MOT/MOT16/train/MOT16-%02d' % seq self.outName = t_dir + 'result_%02d.txt' % seq out = open(self.outName, 'w') out.close() self.train_set = DatasetFromFolder(sequence_dir, self.outName) self.train_test = lengths[i] self.tag = 0 self.loss_threhold = 0.03 self.update() print ' Logging...' t_data = time.time() - start self.log(t_data)
def train(self, dataset, learning_rate, batch_size, display_step=100, num_epoch=100, labeled=False): losses = [] LS = [] reg_arr = [] precision_arr = [] recall_arr = [] Spectral_Kmeans_acc_arr = [] self.display_step = display_step self.batch_size = batch_size print("num_samples : {}".format(dataset.num_samples)) for epoch in range(num_epoch): avg_loss = 0. avg_score = 0. reg_loss = 0. # Loop over all batches amount_batchs = dataset.get_amuont_batchs(self.batch_size) for i in range(amount_batchs): if labeled: batch_xs, batch_ys = dataset.next_batch(self.batch_size) else: batch_xs = dataset.next_batch(self.batch_size) _, loss, laplacian_score, reg_fs = self.sess.run([self.train_step, self.loss, self.laplacian_score, self.reg_gates], \ feed_dict={self.X: batch_xs, self.learning_rate:learning_rate}) avg_loss += loss / amount_batchs avg_score += laplacian_score / amount_batchs reg_loss += reg_fs / amount_batchs alpha_p = self.get_prob_alpha() precision = np.sum(alpha_p[:2]) / np.sum(alpha_p[:]) recall = np.sum(alpha_p[:2]) / 2 losses.append(avg_loss) LS.append(avg_score) reg_arr.append(reg_loss) recall_arr.append(recall) precision_arr.append(precision) if (epoch + 1) % self.display_step == 0: print("Epoch:", '%04d' % (epoch+1), "loss=", "{:.9f}".format(avg_loss), "score=", "{:.9f}".format(avg_score)\ ,"reg=", "{:.9f}".format(reg_fs) ) if labeled: indices = np.where(alpha_p > 0)[0] XS = batch_xs[:, indices] if XS.size == 0: XS = np.zeros((batch_xs.shape[0], 1)) Spectral_Kmeans_acc_arr.append( cluster_acc(batch_ys, batch_ys * 0)) else: Dist = squareform(pdist(XS)) fac = 5 / np.max( np.min(Dist + np.eye(XS.shape[0]) * 1e5, axis=1)) clustering = SpectralClustering( n_clusters=2, affinity='rbf', gamma=fac, assign_labels="discretize", random_state=0).fit(XS) netConfusion = ms.confusion_matrix(batch_ys, clustering.labels_, labels=None) netCostMat = calcCostMatrix(netConfusion, 2) m = Munkres() indexes = m.compute(netCostMat) clusterLabels = getClusterLabelsFromIndexes(indexes) netSolClassVectorOrdered = clusterLabels[ clustering.labels_].astype('int') accuracy_ls = np.mean( netSolClassVectorOrdered == np.array(batch_ys)) Spectral_Kmeans_acc_arr.append(accuracy_ls) print("Optimization Finished!") return LS, losses, reg_arr, precision_arr, recall_arr, Spectral_Kmeans_acc_arr
def plotTradeRatios(mp, fa, objLabels, preconditioner=None, numToSample=75, pixPerSide=10): if preconditioner is None: tr = mp.tradeRatios else: tr = preconditioner(mp.tradeRatios) if numToSample is None: samples = mp.paretoSamples else: dataSize = mp.paretoSamples.shape[0] smplIndxsWithReplacement = np.random.randint(0, dataSize, size=min( dataSize, numToSample)) samples = mp.paretoSamples[smplIndxsWithReplacement, :] U, S, V = np.linalg.svd(samples) locs2d = np.dot(samples, V)[:, :2] ranges = np.ptp(locs2d, axis=0) locs2dNoised = locs2d + np.random.random( locs2d.shape) * ranges[np.newaxis, :] * 1 / 1000 rangesNoised = np.ptp(locs2dNoised, axis=0) # gradient=fa.reconstructDerivative(locations=mp.projectToPlaneCoor(samples)) gridLocs = (np.mgrid[0:pixPerSide, 0:pixPerSide] + 0.5) / pixPerSide * (rangesNoised[:, np.newaxis, np.newaxis]) gridIndxs = np.mgrid[0:pixPerSide, 0:pixPerSide] flatLocs = np.array([arr.flatten() for arr in gridLocs]).T flatIndxs = np.array([arr.flatten() for arr in gridIndxs]).T dists = spst.distance.cdist( flatLocs - np.mean(flatLocs, axis=0)[np.newaxis, :], locs2dNoised - np.mean(locs2dNoised, axis=0)[np.newaxis, :]) accum = [[ None, ] * len(tr) for cnt in range(len(tr))] for k in range(len(accum)): accum[k][k] = np.ones((pixPerSide, pixPerSide)) print('beginning matching with ' + str(dists.shape) + ' sized matrix') if dists.shape[0] <= dists.shape[ 1]: # number of drawn pixels < number of elements in the sample matches = np.array( Munkres().compute(dists) ) # sparse represetnation. [:,0] is the pixel index, [:,1] is matching sample index print('finished matching') for i, j in it.combinations(range(len(tr)), 2): imageMat = np.ones((pixPerSide, pixPerSide)) * tr[i, j] # imageMat[flatIndxs[:,0],flatIndxs[:,1]]=gradient[matches[:,0],i]/gradient[matches[:,1],j]# set to the values of the tradeoffs at teh elements of the samples. imageMat[flatIndxs[matches[:, 0], 0], flatIndxs[matches[:, 0], 1]] = np.squeeze( tradeoffCalc(mp, fa, samples[matches[:, 1], :], i, j)) accum[i][j] = imageMat accum[j][i] = 1 / imageMat else: matches = np.array(Munkres().compute( dists.T)) # [:,0] is the sample index, [:,1] is the pixel index # check matching # plt.plot(flatLocs[:,0],flatLocs[:,1],'.',samples[:,0],samples[:,1],'o') # plt.plot(np.vstack((samples[matches[:,0],0],flatLocs[matches[:,1],0])),np.vstack((samples[matches[:,0],1],flatLocs[matches[:,1],1]))) # plt.show() print('finished matching') for i, j in it.combinations(range(len(tr)), 2): imageMat = np.ones((pixPerSide, pixPerSide)) * tr[i, j] # imageMat[matches[:,0],matches[:,1]]=gradient[flatIndxs[:,0],i]/gradient[flatIndxs[:,1],j]# set to the values of the tradeoffs at teh elements of the samples. imageMat[flatIndxs[matches[:, 1], 0], flatIndxs[matches[:, 1], 1]] = np.squeeze( tradeoffCalc(mp, fa, samples[matches[:, 0], :], i, j)) accum[i][j] = imageMat accum[j][i] = 1 / imageMat # reorderArr=np.argsort(np.mean(tr,axis=0)) reorderArr = np.arange(len(tr)) objLabels_reorder = list(map(lambda i: objLabels[i], range(len(objLabels)))) accumReorder = [[accum[l][k] for k in reorderArr] for l in reorderArr] toPlot = np.vstack(tuple(map(np.hstack, map(tuple, accumReorder)))) toPlot = np.log(np.abs(toPlot)) plt.imshow(toPlot, cmap=globalCmap, interpolation='nearest') plt.colorbar() plt.xticks( range(pixPerSide // 2, pixPerSide * len(objLabels_reorder), pixPerSide), objLabels_reorder) plt.yticks( range(pixPerSide // 2, pixPerSide * len(objLabels_reorder), pixPerSide), objLabels_reorder)
len1 = len(hsvPalette1) len2 = len(hsvPalette2) matrix = [] if len1 >= len2: for i in range(0, len1): matrix.append([]) for j in range(0, len1): for k in range(0, len1): matrix[j].append(1000000) for i in range(0, len1): for j in range(0, len2): matrix[j][i] = int( round( color_distance(hsvPalette1[i], hsvPalette2[j]) * 1000)) + 1 return matrix matrix = generate_matrix(c8, c6) print matrix m = Munkres() indexes = m.compute(matrix) print_matrix(matrix, msg='Lowest cost through this matrix:') total = 0 for row, column in indexes: value = matrix[row][column] total += value print '(%d, %d) -> %d' % (row, column, value) print 'total cost: %d' % total
def main(args): gt_file = './armstrong-jan2018.json' #gt_file = './armstrong-setcore-20171115.json' #gt_file = '/Users/ashokdeb/Desktop/test/effect-forecasting-models/deb/warning_ARIMA_p_7_d_1_q_7_armstrong_endpoint-malware_2017-06-30.json' # warning_dir = './cause-effect/warnings/' warning_dir = './score_warnings/' mth = args.mth external_signal_name = args.ext.split('.')[0] method = args.method event_type = args.evt if mth == "July": start_date = datetime.date(2017, 7, 1) end_date = datetime.date(2017, 7, 31) elif mth == "August": start_date = datetime.date(2017, 8, 1) end_date = datetime.date(2017, 8, 31) elif mth == "September": start_date = datetime.date(2017, 9, 1) end_date = datetime.date(2017, 9, 30) elif mth == "October": start_date = datetime.date(2017, 10, 1) end_date = datetime.date(2017, 10, 31) elif mth == "November": start_date = datetime.date(2017, 11, 1) end_date = datetime.date(2017, 11, 30) elif mth == "December": start_date = datetime.date(2017, 12, 1) end_date = datetime.date(2017, 12, 31) elif mth == "January": start_date = datetime.date(2018, 1, 1) end_date = datetime.date(2018, 1, 31) #need to adjust #start_date = datetime.date(2017, 7, 1) #end_date = datetime.date(2017, 7, 31) #start_date = datetime.date(2017, 8, 1) #end_date = datetime.date(2017, 8, 31) #start_date = datetime.date(2017, 9, 1) #end_date = datetime.date(2017, 9, 30) target_org = args.target #target_org = 'knox' # Potentially filter by event type # None -> Score for ALL event types # Otherwise, restrict to 'endpoint-malware', 'malicious-email', or 'malicious-destination' #event_type = None # First need to parse input data warnings = load_warnings(warning_dir, target_org, event_type, start_date, end_date, external_signal_name, method) events = load_gt(gt_file, target_org, event_type, start_date, end_date) print("# warnings = %d" % len(warnings)) print("# events = %d" % len(events)) # Now need to perform matching on warnings + events # To do this first have to score every possible warning-event pair # The official pair_objects.py is heavily dependent on python classes *not* provided to us by govt, so we recreate here M = Metrics() matching_matrix = np.zeros((len(events), len(warnings))) matching_dict = dict() for e_idx in range(len(events)): for w_idx in range(len(warnings)): # Check if we meet base criteria threshold if warnings[w_idx].event_type == "endpoint-malware": date_th = 0.875 elif warnings[w_idx].event_type == "malicious-email": date_th = 1.375 elif warnings[w_idx].event_type == "malicious-destination": date_th = 1.625 else: raise Exception("Unknown event_type: %s" % warnings[w_idx].event_type) if M.base_criteria(events[e_idx], warnings[w_idx], thr2=date_th): pair = Pair.build(warnings[w_idx], events[e_idx], 'fake-performer', 'fake-provider') matching_matrix[e_idx, w_idx] = -pair.quality matching_dict["%d,%d" % (e_idx, w_idx)] = pair # Now do Hungarian matching munk = Munkres() pairings = munk.compute(matching_matrix.tolist()) valid_pairings = list( filter(lambda p: matching_matrix[p[0], p[1]] != 0, pairings)) nMatched = len(valid_pairings) nUnmatchedGT = len(events) - nMatched nUnmatchedW = len(warnings) - nMatched avg_qs = 0 for e_idx, w_idx in valid_pairings: # pair = matching_dict["%d,%d" % (e_idx, w_idx)] # avg_qs += pair.quality avg_qs += matching_matrix[e_idx, w_idx] if len(valid_pairings) != 0: avg_qs /= len(valid_pairings) avg_qs = -1 * avg_qs recall = nMatched / (nMatched + nUnmatchedGT) precision = nMatched / (nMatched + nUnmatchedW) f1 = (2 * precision * recall) / (precision + recall) else: avg_qs = recall = precision = f1 = 0 print("Precision = %0.2f%%" % (100 * precision)) print("Recall = %0.2f%%" % (100 * recall)) print("Average Quality Score = %0.2f" % avg_qs) new_results = [ target_org, event_type, mth, len(events), len(warnings), external_signal_name, np.around(100 * precision, decimals=2), np.around(100 * recall, decimals=2), np.around(100 * f1, decimals=2), np.around(avg_qs, decimals=2) ] with open('./output/results.csv', 'a') as f: writer = csv.writer(f) writer.writerow(new_results) return