Example #1
def p345():
    A = [[
        7, 53, 183, 439, 863, 497, 383, 563, 79, 973, 287, 63, 343, 169, 583
             627, 343, 773, 959, 943, 767, 473, 103, 699, 303, 957, 703, 583,
             639, 913
             447, 283, 463, 29, 23, 487, 463, 993, 119, 883, 327, 493, 423,
             159, 743
             217, 623, 3, 399, 853, 407, 103, 983, 89, 463, 290, 516, 212, 462,
             960, 376, 682, 962, 300, 780, 486, 502, 912, 800, 250, 346, 172,
             812, 350
             870, 456, 192, 162, 593, 473, 915, 45, 989, 873, 823, 965, 425,
             329, 803
             973, 965, 905, 919, 133, 673, 665, 235, 509, 613, 673, 815, 165,
             992, 326
             322, 148, 972, 962, 286, 255, 941, 541, 265, 323, 925, 281, 601,
             95, 973
             445, 721, 11, 525, 473, 65, 511, 164, 138, 672, 18, 428, 154, 448,
             414, 456, 310, 312, 798, 104, 566, 520, 302, 248, 694, 976, 430,
             392, 198
             184, 829, 373, 181, 631, 101, 969, 613, 840, 740, 778, 458, 284,
             760, 390
             821, 461, 843, 513, 17, 901, 711, 993, 293, 157, 274, 94, 192,
             156, 574
             34, 124, 4, 878, 450, 476, 712, 914, 838, 669, 875, 299, 823, 329,
             815, 559, 813, 459, 522, 788, 168, 586, 966, 232, 308, 833, 251,
             631, 107
             813, 883, 451, 509, 615, 77, 281, 613, 459, 205, 380, 274, 302,
             35, 805

    for r in range(len(A)):
        for c in range(len(A[0])):
            A[r][c] *= -1

    from munkres import Munkres, print_matrix

    m = Munkres()
    indexes = m.compute(A)
    total = 0
    for row, column in indexes:
        value = A[row][column]
        total += value
    return -total
Example #2
    def Update(self, detections):

        if (len(self.tracks) == 0):
            for i in range(len(detections)):
                track = Track(detections[i], self.trackIdCount)
                self.trackIdCount += 1

        N = len(self.tracks)
        M = len(detections)
        # Finding Cost then applying Hungarian Algorithm
        cost = np.zeros(shape=(N, M))
        for i in range(len(self.tracks)):
            for j in range(len(detections)):
                    diff = self.tracks[i].prediction - detections[j]
                    distance = np.sqrt(diff[0][0] * diff[0][0] +
                                       diff[1][0] * diff[1][0])
                    cost[i][j] = distance

        cost = (0.5) * cost

        assignment = []
        for _ in range(N):
        hungarian = Munkres()

        #index = hungarian.compute(cost)
        row_ind, col_ind = linear_sum_assignment(cost)

        for i in range(len(row_ind)):
            assignment[row_ind[i]] = col_ind[i]

        un_assigned_tracks = []
        for i in range(len(assignment)):
            if (assignment[i] != -1):

                if (cost[i][assignment[i]] > self.dist_thresh):
                    assignment[i] = -1
                self.tracks[i].skipped_frames += 1

        del_tracks = []
        for i in range(len(self.tracks)):
            if (self.tracks[i].skipped_frames > self.max_frames_to_skip):
        if len(del_tracks) > 0:
            for id in del_tracks:
                if id < len(self.tracks):
                    del self.tracks[id]
                    del assignment[id]
                    print("Error in deleting tracks")

        un_assigned_detects = []
        for i in range(len(detections)):
            if i not in assignment:

        if (len(un_assigned_detects) != 0):
            for i in range(len(un_assigned_detects)):
                track = Track(detections[un_assigned_detects[i]],
                self.trackIdCount += 1

        for i in range(len(assignment)):

            if (assignment[i] != -1):
                self.tracks[i].skipped_frames = 0
                self.tracks[i].prediction = self.tracks[i].KF.correct(
                    detections[assignment[i]], 1)
                self.tracks[i].prediction = self.tracks[i].KF.correct(
                    np.array([[0], [0]]), 0)

            if (len(self.tracks[i].trace) > self.max_trace_length):
                for j in range(
                        len(self.tracks[i].trace) - self.max_trace_length):
                    del self.tracks[i].trace[j]

            self.tracks[i].KF.lastResult = self.tracks[i].prediction
Example #3
def kruskal_align(U, V, permute_U=False, permute_V=False):
    """Aligns two KTensors and returns a similarity score.

    U : KTensor
        First kruskal tensor to align.
    V : KTensor
        Second kruskal tensor to align.
    permute_U : bool
        If True, modifies 'U' to align the KTensors (default is False).
    permute_V : bool
        If True, modifies 'V' to align the KTensors (default is False).

    If both `permute_U` and `permute_V` are both set to True, then the
    factors are ordered from most to least similar. If only one is
    True then the factors on the modified KTensor are re-ordered to
    match the factors in the un-aligned KTensor.

    similarity : float
        Similarity score between zero and one.

    # Compute similarity matrices.
    unrm = [f / np.linalg.norm(f, axis=0) for f in U.factors]
    vnrm = [f / np.linalg.norm(f, axis=0) for f in V.factors]
    sim_matrices = [np.dot(u.T, v) for u, v in zip(unrm, vnrm)]
    cost = 1 - np.mean(np.abs(sim_matrices), axis=0)

    # Solve matching problem via Hungarian algorithm.
    indices = Munkres().compute(cost.copy())
    prmU, prmV = zip(*indices)

    # Compute mean factor similarity given the optimal matching.
    similarity = np.mean(1 - cost[prmU, prmV])

    # If U and V are of different ranks, identify unmatched factors.
    unmatched_U = list(set(range(U.rank)) - set(prmU))
    unmatched_V = list(set(range(V.rank)) - set(prmV))

    # If permuting both U and V, order factors from most to least similar.
    if permute_U and permute_V:
        idx = np.argsort(cost[prmU, prmV])

    # If permute_U is False, then order the factors such that the ordering
    # for U is unchanged.
    elif permute_V:
        idx = np.argsort(prmU)

    # If permute_V is False, then order the factors such that the ordering
    # for V is unchanged.
    elif permute_U:
        idx = np.argsort(prmV)

    # If permute_U and permute_V are both False, then we are done and can
    # simply return the similarity.
        return similarity

    # Re-order the factor permutations.
    prmU = [prmU[i] for i in idx]
    prmV = [prmV[i] for i in idx]

    # Permute the factors.
    if permute_U:
    if permute_V:

    # Flip the signs of factors.
    flips = np.sign([F[prmU, prmV] for F in sim_matrices])
    flips[0] *= np.prod(flips, axis=0)  # always flip an even number of factors

    if permute_U:
        for i, f in enumerate(flips):
            U.factors[i] *= f

    elif permute_V:
        for i, f in enumerate(flips):
            V.factors[i] *= f

    # Return the similarity score
    return similarity
Example #4
    def compute3rdPartyMetrics(self):
            Computes the metrics defined in
                - Stiefelhagen 2008: Evaluating Multiple Object Tracking Performance: The CLEAR MOT Metrics
                  MOTA, MOTAL, MOTP
                - Nevatia 2008: Global Data Association for Multi-Object Tracking Using Network Flows

        # construct Munkres object for Hungarian Method association
        hm = Munkres()
        max_cost = 1e9

        # go through all frames and associate ground truth and tracker results
        # groundtruth and tracker contain lists for every single frame containing lists of KITTI format detections
        fr, ids = 0, 0
        for seq_idx in range(len(self.groundtruth)):
            seq_gt = self.groundtruth[seq_idx]
            seq_dc = self.dcareas[seq_idx]  # don't care areas
            seq_tracker = self.tracker[seq_idx]
            seq_trajectories = defaultdict(list)
            seq_ignored = defaultdict(list)

            # statistics over the current sequence, check the corresponding
            # variable comments in __init__ to get their meaning
            seqtp = 0
            seqitp = 0
            seqfn = 0
            seqifn = 0
            seqfp = 0
            seqigt = 0
            seqitr = 0

            last_ids = [[], []]

            n_gts = 0
            n_trs = 0

            for f in range(len(seq_gt)):
                g = seq_gt[f]
                dc = seq_dc[f]

                t = seq_tracker[f]
                # counting total number of ground truth and tracker objects
                self.n_gt += len(g)
                self.n_tr += len(t)

                n_gts += len(g)
                n_trs += len(t)

                # use hungarian method to associate, using boxoverlap 0..1 as cost
                # build cost matrix
                cost_matrix = []
                this_ids = [[], []]
                for gg in g:
                    # save current ids
                    gg.tracker = -1
                    gg.id_switch = 0
                    gg.fragmentation = 0
                    cost_row = []
                    for tt in t:
                        # overlap == 1 is cost ==0
                        c = 1 - self.boxoverlap(gg, tt)
                        # gating for boxoverlap
                        if c <= self.min_overlap:
                            cost_row.append(max_cost)  # = 1e9
                    # all ground truth trajectories are initially not associated
                    # extend groundtruth trajectories lists (merge lists)

                if len(g) is 0:
                    cost_matrix = [[]]
                # associate
                association_matrix = hm.compute(cost_matrix)

                # tmp variables for sanity checks and MODP computation
                tmptp = 0
                tmpfp = 0
                tmpfn = 0
                tmpc = 0  # this will sum up the overlaps for all true positives
                tmpcs = [0] * len(
                    g)  # this will save the overlaps for all true positives
                # the reason is that some true positives might be ignored
                # later such that the corrsponding overlaps can
                # be subtracted from tmpc for MODP computation

                # mapping for tracker ids and ground truth ids
                for row, col in association_matrix:
                    # apply gating on boxoverlap
                    c = cost_matrix[row][col]
                    if c < max_cost:
                        g[row].tracker = t[col].track_id
                        this_ids[1][row] = t[col].track_id
                        t[col].valid = True
                        g[row].distance = c
                        self.total_cost += 1 - c
                        tmpc += 1 - c
                        tmpcs[row] = 1 - c
                        seq_trajectories[g[row].track_id][-1] = t[col].track_id

                        # true positives are only valid associations
                        self.tp += 1
                        tmptp += 1
                        g[row].tracker = -1
                        self.fn += 1
                        tmpfn += 1

                # associate tracker and DontCare areas
                # ignore tracker in neighboring classes
                nignoredtracker = 0  # number of ignored tracker detections
                ignoredtrackers = dict()  # will associate the track_id with -1
                # if it is not ignored and 1 if it is
                # ignored;
                # this is used to avoid double counting ignored
                # cases, see the next loop

                for tt in t:
                    ignoredtrackers[tt.track_id] = -1
                    # ignore detection if it belongs to a neighboring class or is
                    # smaller or equal to the minimum height

                    tt_height = abs(tt.y1 - tt.y2)
                    if ((self.cls == "car" and tt.obj_type == "van") or
                        (self.cls == "pedestrian"
                         and tt.obj_type == "person_sitting")
                            or tt_height <= self.min_height) and not tt.valid:
                        nignoredtracker += 1
                        tt.ignored = True
                        ignoredtrackers[tt.track_id] = 1
                    for d in dc:
                        overlap = self.boxoverlap(tt, d, "a")
                        if overlap > 0.5 and not tt.valid:
                            tt.ignored = True
                            nignoredtracker += 1
                            ignoredtrackers[tt.track_id] = 1

                # check for ignored FN/TP (truncation or neighboring object class)
                ignoredfn = 0  # the number of ignored false negatives
                nignoredtp = 0  # the number of ignored true positives
                nignoredpairs = 0  # the number of ignored pairs, i.e. a true positive
                # which is ignored but where the associated tracker
                # detection has already been ignored

                gi = 0
                for gg in g:
                    if gg.tracker < 0:
                        if gg.occlusion>self.max_occlusion or gg.truncation>self.max_truncation\
                                or (self.cls=="car" and gg.obj_type=="van") or (self.cls=="pedestrian" and gg.obj_type=="person_sitting"):
                            seq_ignored[gg.track_id][-1] = True
                            gg.ignored = True
                            ignoredfn += 1

                    elif gg.tracker >= 0:
                        if gg.occlusion>self.max_occlusion or gg.truncation>self.max_truncation\
                                or (self.cls=="car" and gg.obj_type=="van") or (self.cls=="pedestrian" and gg.obj_type=="person_sitting"):

                            seq_ignored[gg.track_id][-1] = True
                            gg.ignored = True
                            nignoredtp += 1

                            # if the associated tracker detection is already ignored,
                            # we want to avoid double counting ignored detections
                            if ignoredtrackers[gg.tracker] > 0:
                                nignoredpairs += 1

                            # for computing MODP, the overlaps from ignored detections
                            # are subtracted
                            tmpc -= tmpcs[gi]
                    gi += 1

                # the below might be confusion, check the comments in __init__
                # to see what the individual statistics represent

                # correct TP by number of ignored TP due to truncation
                # ignored TP are shown as tracked in visualization
                tmptp -= nignoredtp

                # count the number of ignored true positives
                self.itp += nignoredtp

                # adjust the number of ground truth objects considered
                self.n_gt -= (ignoredfn + nignoredtp)

                # count the number of ignored ground truth objects
                self.n_igt += ignoredfn + nignoredtp

                # count the number of ignored tracker objects
                self.n_itr += nignoredtracker

                # count the number of ignored pairs, i.e. associated tracker and
                # ground truth objects that are both ignored
                self.n_igttr += nignoredpairs

                # false negatives = associated gt bboxes exceding association threshold + non-associated gt bboxes
                tmpfn += len(g) - len(association_matrix) - ignoredfn
                self.fn += len(g) - len(association_matrix) - ignoredfn
                self.ifn += ignoredfn

                # false positives = tracker bboxes - associated tracker bboxes
                # mismatches (mme_t)
                tmpfp += len(
                    t) - tmptp - nignoredtracker - nignoredtp + nignoredpairs
                self.fp += len(
                    t) - tmptp - nignoredtracker - nignoredtp + nignoredpairs
                #tmpfp   = len(t) - tmptp - nignoredtp # == len(t) - (tp - ignoredtp) - ignoredtp
                #self.fp += len(t) - tmptp - nignoredtp

                # update sequence data
                seqtp += tmptp
                seqitp += nignoredtp
                seqfp += tmpfp
                seqfn += tmpfn
                seqifn += ignoredfn
                seqigt += ignoredfn + nignoredtp
                seqitr += nignoredtracker

                # sanity checks
                # - the number of true positives minues ignored true positives
                #   should be greater or equal to 0
                # - the number of false negatives should be greater or equal to 0
                # - the number of false positives needs to be greater or equal to 0
                #   otherwise ignored detections might be counted double
                # - the number of counted true positives (plus ignored ones)
                #   and the number of counted false negatives (plus ignored ones)
                #   should match the total number of ground truth objects
                # - the number of counted true positives (plus ignored ones)
                #   and the number of counted false positives
                #   plus the number of ignored tracker detections should
                #   match the total number of tracker detections; note that
                #   nignoredpairs is subtracted here to avoid double counting
                #   of ignored detection sin nignoredtp and nignoredtracker
                if tmptp < 0:
                    print(tmptp, nignoredtp)
                    raise NameError("Something went wrong! TP is negative")
                if tmpfn < 0:
                    print(tmpfn, len(g), len(association_matrix), ignoredfn,
                    raise NameError("Something went wrong! FN is negative")
                if tmpfp < 0:
                    print(tmpfp, len(t), tmptp, nignoredtracker, nignoredtp,
                    raise NameError("Something went wrong! FP is negative")
                if tmptp + tmpfn is not len(g) - ignoredfn - nignoredtp:
                    print("seqidx", seq_idx)
                    print("frame ", f)
                    print("TP    ", tmptp)
                    print("FN    ", tmpfn)
                    print("FP    ", tmpfp)
                    print("nGT   ", len(g))
                    print("nAss  ", len(association_matrix))
                    print("ign GT", ignoredfn)
                    print("ign TP", nignoredtp)
                    raise NameError(
                        "Something went wrong! nGroundtruth is not TP+FN")
                if tmptp + tmpfp + nignoredtp + nignoredtracker - nignoredpairs is not len(
                    print(seq_idx, f, len(t), tmptp, tmpfp)
                    print(len(association_matrix), association_matrix)
                    raise NameError(
                        "Something went wrong! nTracker is not TP+FP")

                # check for id switches or fragmentations
                for i, tt in enumerate(this_ids[0]):
                    if tt in last_ids[0]:
                        idx = last_ids[0].index(tt)
                        tid = this_ids[1][i]
                        lid = last_ids[1][idx]
                        if tid != lid and lid != -1 and tid != -1:
                            if g[i].truncation < self.max_truncation:
                                g[i].id_switch = 1
                                ids += 1
                        if tid != lid and lid != -1:
                            if g[i].truncation < self.max_truncation:
                                g[i].fragmentation = 1
                                fr += 1

                # save current index
                last_ids = this_ids
                # compute MOTP_t
                MODP_t = 1
                if tmptp != 0:
                    MODP_t = tmpc / float(tmptp)

            # remove empty lists for current gt trajectories
            self.gt_trajectories[seq_idx] = seq_trajectories
            self.ign_trajectories[seq_idx] = seq_ignored

            # gather statistics for "per sequence" statistics.

        # compute MT/PT/ML, fragments, idswitches for all groundtruth trajectories
        n_ignored_tr_total = 0
        for seq_idx, (seq_trajectories, seq_ignored) in enumerate(
                zip(self.gt_trajectories, self.ign_trajectories)):
            if len(seq_trajectories) == 0:
            tmpMT, tmpML, tmpPT, tmpId_switches, tmpFragments = [0] * 5
            n_ignored_tr = 0
            for g, ign_g in zip(seq_trajectories.values(),
                # all frames of this gt trajectory are ignored
                if all(ign_g):
                    n_ignored_tr += 1
                    n_ignored_tr_total += 1
                # all frames of this gt trajectory are not assigned to any detections
                if all([this == -1 for this in g]):
                    tmpML += 1
                    self.ML += 1
                # compute tracked frames in trajectory
                last_id = g[0]
                # first detection (necessary to be in gt_trajectories) is always tracked
                tracked = 1 if g[0] >= 0 else 0
                lgt = 0 if ign_g[0] else 1
                for f in range(1, len(g)):
                    if ign_g[f]:
                        last_id = -1
                    lgt += 1
                    if last_id != g[f] and last_id != -1 and g[f] != -1 and g[
                            f - 1] != -1:
                        tmpId_switches += 1
                        self.id_switches += 1
                    if f < len(g) - 1 and g[f - 1] != g[
                            f] and last_id != -1 and g[f] != -1 and g[f +
                                                                      1] != -1:
                        tmpFragments += 1
                        self.fragments += 1
                    if g[f] != -1:
                        tracked += 1
                        last_id = g[f]
                # handle last frame; tracked state is handled in for loop (g[f]!=-1)
                if len(g) > 1 and g[f - 1] != g[f] and last_id != -1 and g[
                        f] != -1 and not ign_g[f]:
                    tmpFragments += 1
                    self.fragments += 1

                # compute MT/PT/ML
                tracking_ratio = tracked / float(len(g) - sum(ign_g))
                if tracking_ratio > 0.8:
                    tmpMT += 1
                    self.MT += 1
                elif tracking_ratio < 0.2:
                    tmpML += 1
                    self.ML += 1
                else:  # 0.2 <= tracking_ratio <= 0.8
                    tmpPT += 1
                    self.PT += 1

        if (self.n_gt_trajectories - n_ignored_tr_total) == 0:
            self.MT = 0.
            self.PT = 0.
            self.ML = 0.
            self.MT /= float(self.n_gt_trajectories - n_ignored_tr_total)
            self.PT /= float(self.n_gt_trajectories - n_ignored_tr_total)
            self.ML /= float(self.n_gt_trajectories - n_ignored_tr_total)

        # precision/recall etc.
        if (self.fp + self.tp) == 0 or (self.tp + self.fn) == 0:
            self.recall = 0.
            self.precision = 0.
            self.recall = self.tp / float(self.tp + self.fn)
            self.precision = self.tp / float(self.fp + self.tp)
        if (self.recall + self.precision) == 0:
            self.F1 = 0.
            self.F1 = 2. * (self.precision * self.recall) / (self.precision +
        if sum(self.n_frames) == 0:
            self.FAR = "n/a"
            self.FAR = self.fp / float(sum(self.n_frames))

        # compute CLEARMOT
        if self.n_gt == 0:
            self.MOTA = -float("inf")
            self.MODA = -float("inf")
            self.MOTA = 1 - (self.fn + self.fp + self.id_switches) / float(
            self.MODA = 1 - (self.fn + self.fp) / float(self.n_gt)
        if self.tp == 0:
            self.MOTP = float("inf")
            self.MOTP = self.total_cost / float(self.tp)
        if self.n_gt != 0:
            if self.id_switches == 0:
                self.MOTAL = 1 - (self.fn + self.fp +
                                  self.id_switches) / float(self.n_gt)
                self.MOTAL = 1 - (self.fn + self.fp + math.log10(
                    self.id_switches)) / float(self.n_gt)
            self.MOTAL = -float("inf")
        if sum(self.n_frames) == 0:
            self.MODP = "n/a"
            self.MODP = sum(self.MODP_t) / float(sum(self.n_frames))
        return True
import copy
import math
import os

from munkres import Munkres

from coalib.collecting.Collectors import collect_dirs
from bears.c_languages.codeclone_detection.CountVector import CountVector

# Instantiate globally since this class is holding stateless public methods.
munkres = Munkres()

def exclude_function(count_matrix):
    Determines heuristically whether or not it makes sense for clone
    detection to take this function into account.

    Applied heuristics:
     * Functions with only count vectors with a sum of all unweighted elements
       of lower then 10 are very likely only declarations or empty and to be
       ignored. (Constants are not taken into account.)

    :param count_matrix: A dictionary with count vectors representing all
                         variables for a function.
    :return:             True if the function is useless for evaluation.
    var_list = [
        cv for cv in count_matrix.values()
        if cv.category is CountVector.Category.reference
    def update(self, boxes):
        Update predictions, associate with detections and add new objects if have no association
        Also delete dead objects
        :param: Detected boxes
        :return: None
        non_asociated = [i for i in range(len(boxes))]

        pred = None

        # Update prediction for each tracked object
        for key in self.filters:
            pred = self.filters[key].predict()

            # Update bounding box according to xc, yc predicted and size measured
            new_xc = pred[0]
            new_yc = pred[1]

            xc = float(self.objects[key][0] + self.objects[key][2]) / 2
            yc = float(self.objects[key][1] + self.objects[key][3]) / 2

            half_w = self.objects[key][2] - xc
            half_h = self.objects[key][3] - yc

            # Update object boxes
            self.objects[key] = [
                int(new_xc - half_w),
                int(new_yc - half_h),
                int(new_xc + half_w),
                int(new_yc + half_h)

        if len(boxes) != 0 and len(self.objects) != 0:
            m = Munkres()

            # Get distances between detections and predictions to match
            distances = self.get_distances(boxes)

            # assert len(self.objects) == len(self.objects_time) == len(self.objects_life) == len(self.filters), "Bug on code"

            # It seems to have a bug when matrix is a numpy array of shape (2,1), by that I convert to list
            indices = m.compute((-distances).tolist())
            #             print("Válidos: ", indices)

            # For each match possible match, filter impossible matches
            for i, j in indices:

                # Check for valid associations
                if distances[i, j] > self.iou_thres:
                    #                     print("cumplen", i, j)
                    key = list(self.objects.keys())[j]
                    self.objects_life[key] = self.max_life

                    xc = float(boxes[i][0] + boxes[i][2]) / 2
                    yc = float(boxes[i][1] + boxes[i][3]) / 2

                    meas_centers = np.array([xc, yc], np.float32)

                    # If there is a match, feedback to kalman filter

                    # Actualizo el ancho, esto debería ser predicho pero no está contemplado en el filtro de kalman
                    # Hago de cuenta que la actualización está bien
                    # Calculo el ancho del bonding box medido
                    half_w = boxes[i][2] - xc
                    half_h = boxes[i][3] - yc

                    # Mantengo el centro predicho
                    xc = float(self.objects[key][0] + self.objects[key][2]) / 2
                    yc = float(self.objects[key][1] + self.objects[key][3]) / 2

                    self.objects[key] = [
                        int(xc - half_w),
                        int(yc - half_h),
                        int(xc + half_w),
                        int(yc + half_h)

                    # Mark association as valid deleting to non associated detections

        # Add non associated values as new objects
        for i in non_asociated:
            # print(boxes[i])
            self.new_object(boxes[i][0], boxes[i][1], boxes[i][2], boxes[i][3])

        keys_to_del = []
        for key in self.objects_life:
            # If a tracked object was dissapeared for last iterations (no life), delete it
            if self.objects_life[key] <= 0:

            # Else decrement life
                self.objects_life[key] = self.objects_life[key] - 1

        # Delete pending asociations
        for key in keys_to_del:
def match(conf_file, french_ans, exchange_ans, remaining_fr, remaining_ex, matchings_file, verbose):
    """Creates an optimal matching between french students and exchange students,\
    using criterias defined in the config file"""

    config = json.load(conf_file)
    criterias = config['criterias']

    # import data
    fr = pd.read_csv(french_ans)
    ex = pd.read_csv(exchange_ans)

    frAnswers = prepare(fr, doClone=True, cloneLabel=config['two_buddies_question_label'])
    exAnswers = prepare(ex)

    # Compute the costs 
    costMatrix = [[float('inf')] * len(exAnswers.index) for _ in range(len(frAnswers.index))]
    for fridx, fr_row in frAnswers.iterrows():
        for exidx, ex_row in exAnswers.iterrows():
            # Compute a list of compatibility scores
            # Scores are normalized between 0 and 1
            costMatrix[fridx][exidx] = totalCost(criterias, fr_row, ex_row)

    # Solve the assignment problem using munkres
    m = Munkres()
    assignments = m.compute(costMatrix)

    # Print results
    if verbose > 0:
        for fridx, exidx in assignments:
            print(frAnswers['Q1'][fridx], frAnswers['Q2'][fridx],
                exAnswers['Q1'][exidx], exAnswers['Q2'][exidx],

    # Output remaining unmatched people
    frMatched, exMatched = zip(*assignments)

    # Outputing matchings
    matchings = pd.DataFrame(columns=["frFName",
    for fridx, exidx in assignments:
        fQ, lQ = config["fluentQ"], config["learningQ"]
        fnQ, lnQ, emQ = config["firstNameQ"], config["lastNameQ"], config["eMailQ"]
        frRow = frAnswers.iloc[fridx]
        exRow = exAnswers.iloc[exidx]
        # TODO:
        commonLangs = set(frRow[fQ].split(',')).intersection(set(exRow[fQ].split(',')))
        for lang in config["mails"]:
            if lang in commonLangs:
                chosenLang = lang
            chosenLang = "English"
        d = {"frFName": frRow[fnQ],
            "frLName": frRow[lnQ],
            "frEMail": frRow[emQ],
            "exFName": exRow[fnQ],
            "exLName": exRow[lnQ],
            "exEMail": exRow[emQ],
            "language": chosenLang,
            "compatibility": compatPercentage(costMatrix[fridx][exidx])}
        matchings = matchings.append(d, ignore_index=True)

Example #8
def py_max_match(scores): #scores表示(row,col)的带权二分图:row和col的连接强度
    m = Munkres()
    tmp = m.compute(-scores) #之所以这里要对scores取反,是因为compute计算的是最低花费;我们需要的是最大匹配,所以要取反
    tmp = np.array(tmp).astype(np.int32)
    return tmp
def cluster_distance(A, B, nbhood):
    #key is name of GC, values is list of specificity names
    clusterA = BGCs[A]  # dictionary where keys are specificities, and values
    # are lists of specificity labels that map to a specific sequence in
    # the DMS variable
    clusterB = BGCs[B]

    specificities_A = set(clusterA.keys())
    specificities_B = set(clusterB.keys())
    intersect = specificities_A.intersection(specificities_B)

    Jaccard = len(intersect) / float(

    #DDS: The difference in specificities' sequences between cluster
    #S: Max occurence of each specificity

    DDS, S = 0, 0
    SumDistance = 0
    pair = ""

    # elements in either set but not in both: union - intersect
    not_intersect = specificities_A ^ specificities_B

    # sum as many copies of the unshared specificity there are, for each
    for unshared_specificity in not_intersect:
        dom_set = []
            dom_set = clusterA[unshared_specificity]
        except KeyError:
            dom_set = clusterB[unshared_specificity]

        DDS += len(dom_set)
        S += len(dom_set)

    # compare sequence identity of shared specificities
    for shared_specificity in intersect:
        seta = clusterA[shared_specificity]
        setb = clusterB[shared_specificity]

        if len(seta +
               setb) == 2:  #The specificity occurs only once in both clusters
            pair = tuple(sorted([seta[0], setb[0]]))

                SumDistance = 1 - DMS[shared_specificity][pair][0]
                # print 'SumDistance1', SumDistance

            except KeyError:
                print "KeyError on", pair

            S += max(len(seta), len(setb))
            DDS += SumDistance

        else:  #The specificity occurs more than once in both clusters
            # accumulated_distance = 0

            DistanceMatrix = [[1 for col in range(len(setb))]
                              for row in range(len(seta))]
            # print DistanceMatrix
            for domsa in range(len(seta)):
                for domsb in range(len(setb)):
                    pair = tuple(sorted([seta[domsa], setb[domsb]]))
                        Similarity = DMS[shared_specificity][pair][0]
                        # print Similarity
                    except KeyError:
                        print "KeyError on (case 2)", pair

                    seq_dist = 1 - Similarity
                    DistanceMatrix[domsa][domsb] = seq_dist

            #Only use the best scoring pairs
            Hungarian = Munkres()
            BestIndexes = Hungarian.compute(DistanceMatrix)
            accumulated_distance = sum(
                [DistanceMatrix[bi[0]][bi[1]] for bi in BestIndexes])
            SumDistance = (abs(len(seta) - len(setb)) + accumulated_distance
                           )  #diff in abundance + sequence distance

            S += max(len(seta), len(setb))
            DDS += SumDistance

    DDS /= float(S)
    DDS = 1 - DDS  #transform into similarity

    #  calculate the Goodman-Kruskal gamma index
    A_pseudo_seq = list(cluster_seq[A])
    B_pseudo_seq = list(cluster_seq[B])
    Ar = [item for item in A_pseudo_seq]
    GK = max([
        calculate_GK(A_pseudo_seq, B_pseudo_seq, nbhood=nbhood),
        calculate_GK(Ar, B_pseudo_seq, nbhood=nbhood)

    Distance = 1 - (Jaccardw * Jaccard) - (DDSw * DDS) - (GKw * GK)
    Similarity_score = (Jaccardw * Jaccard) + (DDSw * DDS) + (GKw * GK)
    # Similarity_score = 1 - DDS
    if Distance < 0:
        print "negative distance", Distance, "DDS", DDS, pair
        print "Probably a rounding issue"
        print "Distance is set to 0 for these clusters"
        Distance = 0
    print A, B, Jaccard, GK, DDS
    return Similarity_score
Example #10
        # ─── DEBUGGING ──────────────────────────────────────────────────────────────────
        self.DEBUG = NiseConfig._DEBUG()
        # ─── ALGORITHM ──────────────────────────────────────────────────────────────────
        self.ALG = NiseConfig._ALG()
        self.PATH = NiseConfig._PATH()
        self.TEST = NiseConfig._TEST()
        self.MODEL = NiseConfig._MODEL()

cfg = NiseConfig()
nise_cfg = get_edcfg_from_nisecfg(cfg)
nise_args = get_nise_arg_parser()
update_nise_config(nise_cfg, nise_args)

suffix, suffix_with_range = set_path_from_nise_cfg(nise_cfg)
print('SUFFIX', suffix_with_range)
nise_logger = get_logger()
update_nise_logger(nise_logger, nise_cfg, suffix)

mkrs = Munkres()
nise_cfg_pack = {
    'cfg': nise_cfg,
    'logger': nise_logger
Example #11
def py_max_match(scores):
    # Backup if you have trouble getting munkres-tensorflow compiled (much slower)
    from munkres import Munkres
    m = Munkres()
    tmp = m.compute(-scores)
    return np.array(tmp).astype(np.int32)
Example #12
def getHungary(vehicles, tracks):
    hungarianMatrix = getMatrix(vehicles, tracks)
    mat = np.copy(hungarianMatrix)
    m = Munkres()
    indexes = m.compute(hungarianMatrix)
    return mat, indexes
Example #13
    def evaluateFrame(self, X, Y, X_labels=None, Y_labels=None):
        """ Compute the OSPA metric between two sets of points. """

        # check for empty sets
        if numpy.size(X) == 0 and numpy.size(Y) == 0:
            return (0, 0, 0, 0)
        elif numpy.size(X) == 0 or numpy.size(Y) == 0:
            return (self.c, 0, self.c, 0)

        # we assume that Y is the larger set
        m = numpy.size(X, 0)
        n = numpy.size(Y, 0)
        switched = False
        if m > n:
            X, Y = Y, X
            m, n = n, m
            switched = True

        dists = self.calculateCostMatrix(X, Y)
        # Copy cost matrix for munkres module
        munkres_matrix = numpy.copy(dists)
        # Only run munkres on non-empty matrix
        if len(munkres_matrix) > 0:
            munkres = Munkres()
            indices = munkres.compute(munkres_matrix)
            indices = []

        # compute the OSPA metric
        total = 0
        total_loc = 0
        for [i, j] in indices:
            total_loc += dists[i][j]**self.p
        # calculate cardinalization error
        err_cn = (float((self.c)**(self.p) * (n - m)) / n)**(1 / float(self.p))
        # calculate localization error
        err_loc = (float(total_loc) / n)**(1 / float(self.p))

        # Not contained in version from github implemented
        # acc. to paper "A Metric for Performance Evaluation of Multi-Target Tracking Algorithms"
        # Implementation of labeling error
        # Penalizes ID switches from frame to frame
        new_assignments = []
        for [i, j] in indices:
            if (switched):
                i, j = j, i
            new_assignments.append((X_labels[i], Y_labels[j]))
        wrong_labels = len(self.old_assignments) - len(
            set(new_assignments) & set(self.old_assignments))
        err_label = (float((float(self.a)**float(self.p)) / n) *
                     wrong_labels)**(1 / float(self.p))
        # store current assignments of labels to track
        self.old_assignments = new_assignments
        #ospa_err = ( float(total_loc + (n-m)*self.c**self.p) / n)**(1/float(self.p))
        ospa_err = (float(total_loc + self.a * wrong_labels +
                          (n - m) * self.c**self.p) / n)**(1 / float(self.p))
        ospa_tuple = (ospa_err, err_loc, err_cn, err_label)


        return ospa_tuple
Example #14
def kmeans_acc_ari_ami(X, L):
    Calculate clustering accuracy. Require scikit-learn installed
    # Arguments
        y: true labels, numpy.array with shape `(n_samples,)`
        y_pred: predicted labels, numpy.array with shape `(n_samples,)`
    # Return
        accuracy, in [0,1]
    n_clusters = len(np.unique(L))
    kmeans = KMeans(n_clusters=n_clusters, n_init=20)

    y_pred = kmeans.fit_predict(X)
    y_pred = y_pred.astype(np.int64)
    y_true = L.astype(np.int64)
    assert y_pred.size == y_true.size

    y_pred = y_pred.reshape((1, -1))
    y_true = y_true.reshape((1, -1))

    # D = max(y_pred.max(), L.max()) + 1
    # w = np.zeros((D, D), dtype=np.int64)
    # for i in range(y_pred.size):
    #     w[y_pred[i], L[i]] += 1
    # # from sklearn.utils.linear_assignment_ import linear_assignment
    # from scipy.optimize import linear_sum_assignment
    # row_ind, col_ind = linear_sum_assignment(w.max() - w)
    # return sum([w[i, j] for i in row_ind for j in col_ind]) * 1.0 / y_pred.size

    if len(np.unique(y_pred)) == len(np.unique(y_true)):
        C = len(np.unique(y_true))

        cost_m = np.zeros((C, C), dtype=float)
        for i in np.arange(0, C):
            a = np.where(y_pred == i)
            # print(a.shape)
            a = a[1]
            l = len(a)
            for j in np.arange(0, C):
                yj = np.ones((1, l)).reshape(1, l)
                yj = j * yj
                cost_m[i, j] = np.count_nonzero(yj - y_true[0, a])

        mk = Munkres()
        best_map = mk.compute(cost_m)

        (_, h) = y_pred.shape
        for i in np.arange(0, h):
            c = y_pred[0, i]
            v = best_map[c]
            v = v[1]
            y_pred[0, i] = v

        acc = 1 - (np.count_nonzero(y_pred - y_true) / h)

        acc = 0
    # print(y_pred.shape)
    y_pred = y_pred[0]
    y_true = y_true[0]
    ari, ami = adjusted_rand_score(y_true, y_pred), adjusted_mutual_info_score(
        y_true, y_pred)

    return acc, ari, ami
Example #15
 def __init__(self):
     self.training_set = []
     self.testing_set = []
     self.m = Munkres()
Example #16
    def execute(environment):
        # print "-------------------------------------------------"
        # print "Planning for objects..."


        allocation_list = []

        space = environment['workspace']
        object_list = environment['object_list']
        robots = environment['robot_dict']

        total_object = 0
        total_robot = 0

        allocator = Munkres()

        # While not all objects are transported
        while True:
            # Create the list with objects that there are not yet
            # fully transported
            work_list = [
                obj for obj in object_list if len(obj['plan']['segments']) != 0

            # With all objects were transported, finish
            if len(work_list) == 0:

            # print "-------------------------------------------------"
            # print "Planning for robots..."

            # Create the allocation matrix
            # Square, for the Hungarian Method
            shape = (max(len(work_list), len(robots)), ) * 2
            cost_matrix = np.full(shape,

            robot_id_list = robots.keys()
            robot_plan = {}

            # For each robot available
            for robot_index, robot_id in enumerate(robot_id_list):

                # print "# robot", robot_index
                robot = robots[robot_id]
                robot_plan[robot_id] = {}

                # For each object to be transported
                for obj_index, obj in enumerate(work_list):
                    # print "### object", obj['idx']

                    robot_plan[robot_id][obj['idx']] = {}

                    # Get first segment
                    segment = obj['plan']['segments'][0]

                    if segment.graph['type'] is not robot.robot_type:

                    # If there is another object in the segment,
                    # its no possible to be executed
                    in_place_objects = [
                        item for item in object_list
                        if (item['idx'] is not obj['idx']) and (
                            hash(item['current_pos']) in segment)

                    if len(in_place_objects) != 0:

                    segment_start = segment.node[segment.graph['start_id']]
                    segment_end = segment.node[segment.graph['end_id']]

                    # Prepare Movement

                    space.temp_obstacle_list = [
                        item['current_pos'] for item in object_list

                    robot_start = robot.copy()

                    robot_end = robot.copy()

                    if robot.robot_type == Robot.TYPES.aerial:
                        robot_end.position = Planner.create_robot_position(
                            segment_start, -1).position
                        robot_end.position = Planner.create_robot_position(
                            segment_start, 2).position

                        prepare_plan = Manager._execute_planning({
                    except Exception, e:
                        print "Object: ", obj['idx'] + 1
                        print "Impossible prepare plan"

                    space.temp_obstacle_list = []

                        obj['idx']]['prepare_plan'] = prepare_plan

                    # Move Movement

                    space.temp_obstacle_list = [
                        item['current_pos'] for item in object_list
                        if item['idx'] is not obj['idx']

                    robot_start.position = robot_end.position
                    robot_end = robot.copy()
                    robot_end.position = Planner.create_robot_position(

                        moviment_plan = Manager._execute_planning({
                    except Exception, e:
                        print "Object: ", obj['idx'] + 1
                        print "Impossible movement plan"

                    space.temp_obstacle_list = []

                        obj['idx']]['moviment_plan'] = moviment_plan

                    # Total cost to movement this object
                    # by the current robot
                    total_cost = obj['plan']['total'] - len(segment) + len(

                    # Populate the cost matrix
                    cost_matrix[robot_index][obj_index] = total_cost
Example #17

    targetlist = list(inames)
    baselist = list(cnames) + list(inames)
    cost_matrix = gen_cost_matrix(targetlist, baselist, instance, concept)
    print(len(cost_matrix), len(cost_matrix[0]))

    for row in cost_matrix:
        print("\t".join(["%0.2f" % v for v in row]))

    mun = Munkres()
    s = mun.compute(cost_matrix)
    #s = linear_sum_assignment(cost_matrix)
    #mun_sol = {targetlist[ti]: baselist[s[1][i]] for i, ti in enumerate(s[0])}
    mun_sol = {
        baselist[len(cnames) + row] if col > len(cnames) else baselist[col]
        for row, col in s
    print("Munkres solution:")
    print("Munkres cost:")

    print(mapping_cost(frozenset(mun_sol.items()), instance, concept))
 def run(costs):
     m = Munkres()
     idx = np.array(m.compute(costs), dtype=int)
     return costs[idx[:, 0], idx[:, 1]].sum()
def py_max_match(scores):
    m = Munkres()
    tmp = m.compute(-scores)
    tmp = np.array(tmp).astype(np.int32)
    return tmp
    def allocate(self):

        # Sort voting adjudicators in descending order by score
        voting = [
            a for a in self.adjudicators
            if a._hungarian_score >= self.min_voting_score and not a.trainee
        voting.sort(key=lambda a: a._hungarian_score, reverse=True)

        n_debates = len(self.debates)
        if self.no_panellists:
            voting = voting[:n_debates]
        n_voting = len(voting)

        if self.no_trainees:
            trainees = []
            trainees = [a for a in self.adjudicators if a not in voting]
            trainees.sort(key=lambda a: a._hungarian_score, reverse=True)

        # Divide debates into solo-chaired debates and panel debates
        debates_sorted = sorted(self.debates,
                                key=lambda d: (-d.importance, d.room_rank))

        # Figure out how many judges per room, prioritising the most important
        judges_per_room_floor = n_voting // n_debates
        n_bigger_panels = n_voting % n_debates
        judges_per_room = [judges_per_room_floor + 1] * n_bigger_panels + [
        ] * (n_debates - n_bigger_panels)

            "There are %d debates, %d voting adjudicators and %d trainees",
            len(debates_sorted), len(voting), len(trainees))
        if n_voting < n_debates:
                "There are %d debates but only %d voting adjudicators",
                n_debates, n_voting)

        # Allocate voting
        m = Munkres()

        logger.info("costing voting adjudicators")
        cost_matrix = []
        for debate, njudges in zip(debates_sorted, judges_per_room):
            for i in range(njudges):
                row = [
                    self.calc_cost(debate, adj, adjustment=-i)
                    for adj in voting

            "optimizing voting adjudicators (matrix size: %d positions by %d adjudicators)",
            len(cost_matrix), len(cost_matrix[0]))
        indexes = m.compute(cost_matrix)
        total_cost = sum(cost_matrix[i][j] for i, j in indexes)
        logger.info('total cost for %d debates: %f', n_debates, total_cost)

        # transfer the indices to the debates
        alloc = []
        for debate, njudges in zip(debates_sorted, judges_per_room):
            aa = AdjudicatorAllocation(debate)
            panel_indices = indexes[0:njudges]
            panel = [voting[c] for r, c in panel_indices]
            panel.sort(key=lambda a: a._hungarian_score, reverse=True)
                aa.chair = panel.pop(0)
            except IndexError:
                aa.chair = None
            aa.panellists = panel
            del indexes[0:njudges]

            logger.info("allocating to %s: %s (c), %s", aa.debate, aa.chair,
                        ", ".join([str(p) for p in aa.panellists]))

        # Allocate trainees, one per debate
        if len(trainees) > 0 and len(debates_sorted) > 0:
            allocation_by_debate = {aa.debate: aa for aa in alloc}

            logger.info("costing trainees")
            cost_matrix = []
            for debate in debates_sorted:
                chair = allocation_by_debate[debate].chair
                row = [
                    self.calc_cost(debate, adj, adjustment=-2.0, chair=chair)
                    for adj in trainees

                "optimizing trainees (matrix size: %d positions by %d trainees)",
                len(cost_matrix), len(cost_matrix[0]))
            indexes = m.compute(cost_matrix)
            total_cost = sum(cost_matrix[i][j] for i, j in indexes)
            logger.info('total cost for %d trainees: %f', len(trainees),

            result = ((debates_sorted[i], trainees[j]) for i, j in indexes
                      if i < len(debates_sorted))
            for debate, trainee in result:
                logger.info("allocating to %s: %s (t)", debate, trainee)

        return alloc
Example #21
    def allocate(self):
        from debate.models import AdjudicatorAllocation

        # remove trainees
        self.adjudicators = filter(lambda a: a.score > self.MIN_SCORE,

        # sort adjudicators and debates in descending score/importance
        self.adjudicators_sorted = list(self.adjudicators)
        self.adjudicators_sorted.sort(key=lambda a: a.score, reverse=True)
        self.debates_sorted = list(self.debates)
        self.debates_sorted.sort(key=lambda a: a.importance, reverse=True)

        n_adjudicators = len(self.adjudicators)
        n_debates = len(self.debates)

        n_solos = n_debates - (n_adjudicators - n_debates) / 2

        # get adjudicators that can adjudicate solo
        chairs = self.adjudicators_sorted[:n_solos]
        #chairs = [a for a in self.adjudicators_sorted if a.score >
        #          self.CHAIR_CUTOFF]

        # get debates that will be judged by solo adjudicators
        chair_debates = self.debates_sorted[:len(chairs)]

        panel_debates = self.debates_sorted[len(chairs):]
        panellists = [a for a in self.adjudicators_sorted if a not in chairs]

        assert len(panel_debates) * 3 <= len(panellists)

        print "costing chairs"

        n = len(chairs)

        cost_matrix = [[0] * n for i in range(n)]

        for i, debate in enumerate(chair_debates):
            for j, adj in enumerate(chairs):
                cost_matrix[i][j] = self.calc_cost(debate, adj)

        print "optimizing"

        m = Munkres()
        indexes = m.compute(cost_matrix)

        total_cost = 0
        for r, c in indexes:
            total_cost += cost_matrix[r][c]

        print 'total cost for solos', total_cost
        print 'number of solo debates', n

        result = ((chair_debates[i], chairs[j]) for i, j in indexes
                  if i < len(chair_debates))
        alloc = [AdjudicatorAllocation(d, c) for d, c in result]

        print[(a.debate, a.chair) for a in alloc]

        # do panels
        n = len(panel_debates)

        npan = len(panellists)

        if npan:
            print "costing panellists"

            # matrix is square, dummy debates have cost 0
            cost_matrix = [[0] * npan for i in range(npan)]
            for i, debate in enumerate(panel_debates):
                for j in range(3):

                    # for the top half of these debates, the final panellist
                    # can be of lower quality than the other 2
                    if i < npan / 2 and j == 2:
                        adjustment = -1.0
                        adjustment = 0

                    for k, adj in enumerate(panellists):
                        cost_matrix[3 * i + j][k] = self.calc_cost(
                            debate, adj, adjustment)

            print "optimizing"

            indexes = m.compute(cost_matrix)

            cost = 0
            for r, c in indexes:
                cost += cost_matrix[r][c]

            print 'total cost for panellists', cost

            # transfer the indices to the debates
            # the debate corresponding to row r is floor(r/3) (i.e. r // 3)
            p = [[] for i in range(n)]
            for r, c in indexes[:n * 3]:
                p[r // 3].append(panellists[c])

            # create the corresponding adjudicator allocations, making sure
            # that the chair is the highest-ranked adjudicator in the panel
            for i, d in enumerate(panel_debates):
                a = AdjudicatorAllocation(d)
                p[i].sort(key=lambda a: a.score, reverse=True)
                a.chair = p[i].pop(0)
                a.panel = p[i]

        print[(a.debate, a.chair, a.panel) for a in alloc[len(chairs):]]

        return alloc
def matching_eigenvectors_of_modes(number_of_modes, eigenvectors_1,
    Matches the eigenvectors of two crystal structures and returns new order of the modes and the weight to match each

    number_of_modes: int
        Number of vibrational modes in the crystal lattice
    eigenvectors_1: List[float]
        Eigenvectors of the reference crystal structure in the form of a 3N x 3N matrix, where N is the number of atoms
        in the crystal lattice.
    eigenvectors_2: List[float]
        Eigenvectors of the crystal structure that needs the modes reorganized in the form of a 3N x 3N matrix, where N
        is the number of atoms in the crystal lattice.

    z: List[int]
        List to match the order of the eigenvectors_2 with eigenvectors_1.
    weight: List[float]
        Weight describing how well the reordered eigenvectors matched. A weight of 0 indicates a perfect match.

    # Using the Munkres matching package
    m = Munkres()

    # Setting a matrix to save the weights to match all of the vibrational modes
    weight = np.zeros((number_of_modes - 3, number_of_modes - 3))

    # Cycling through all modes to apply a weight of how well they match
    for i in range(3, number_of_modes):
        # Using 1 - Cos(Theta) between the two eigenvectors as the weight
        diff = np.dot(eigenvectors_1[i], eigenvectors_2[i]) \
               / (np.linalg.norm(eigenvectors_1[i]) * np.linalg.norm(eigenvectors_2[i]))

        if np.absolute(diff) > 0.95:
            # If Cos(Theta) is close to 1 for director order comparison of the eigenvectors than they are well matched and
            #    all other comparisons are set with a much higher weight
            weight[i - 3] = 10000000.
            weight[i - 3, i - 3] = 1. - diff
            # Otherwise the weight compared to all other eigenvectors are computed
            for j in range(3, number_of_modes):
                # Here we will check to see if the match is better if the direction of the eigenvalue is flipped
                #    (Eigenvectors are representative of vibrations and therefore the opposite director is also valid)
                hold_weight = np.zeros(4)
                hold_weight[0] = 1 - np.dot(-1 * eigenvectors_1[i], eigenvectors_2[j]) \
                                 / (np.linalg.norm(-1 * eigenvectors_1[i]) * np.linalg.norm(eigenvectors_2[j]))
                hold_weight[1] = 1 - np.dot(eigenvectors_1[i], -1 * eigenvectors_2[j]) \
                                 / (np.linalg.norm(eigenvectors_1[i]) * np.linalg.norm(-1 * eigenvectors_2[j]))
                hold_weight[2] = 1 - np.dot(eigenvectors_1[i], eigenvectors_2[j]) \
                                 / (np.linalg.norm(eigenvectors_1[i]) * np.linalg.norm(eigenvectors_2[j]))
                hold_weight[3] = 1 - np.dot(-1 * eigenvectors_1[i], -1 * eigenvectors_2[j]) \
                                 / (np.linalg.norm(-1 * eigenvectors_1[i])*np.linalg.norm(-1 * eigenvectors_2[j]))

                # The weight matching the two eigenvectors is the minimum value computed
                weight[i - 3, j - 3] = min(hold_weight)

    # Using the Hungarian algorithm to match wavenumbers
    Wgt = m.compute(weight)
    x, y = zip(*Wgt)
    z = np.column_stack((x, y))
    z = z + 3
    return z, weight[z[:, 0] - 3, z[:, 1] - 3]
Example #23
def perform_alignment(ref_instances, sys_instances, kernel, maximize=True):
    disallowed = {}
    max_sim = 0
    sim_matrix, component_matrix = [], []
    start = time.time_ns()
    report_matrix_stats = False  ### Boolean to report the stats
    report_matrix_start = False  ### Writes a line before the matrix processing begins
    report_matrix_print_threshold = 10000  ### Controls the size of the matrix to trigger output

    if report_matrix_stats and report_matrix_start and len(
            ref_instances) * len(sys_instances) > report_matrix_print_threshold:
        print("[Info] StartBigAlignment: {} x {} = {}".format(
            len(ref_instances), len(sys_instances),
            len(ref_instances) * len(sys_instances)))

    for s_i, s in enumerate(sys_instances):
        sim_row = []
        comp_row = []
        for r_i, r in enumerate(ref_instances):
            sim, comp = kernel(r, s)


            if sim == DISALLOWED:
                disallowed[(s_i, r_i)] = True
                if sim > max_sim: max_sim = sim


    if maximize:

        def _mapper(sim):
            return max_sim + 1 if sim == DISALLOWED else (max_sim + 1) - sim

        def _mapper(sim):
            return max_sim + 1 if sim == DISALLOWED else sim

    matrix = make_cost_matrix(sim_matrix, _mapper)

    correct_detects, false_alarms, missed_detects = [], [], []
    unmapped_sys = set(range(0, len(sys_instances)))
    unmapped_ref = set(range(0, len(ref_instances)))
    if len(matrix) > 0:
        for s_i, r_i in Munkres().compute(matrix):
            if disallowed.get((s_i, r_i), False):

                    AlignmentRecord(ref_instances[r_i], sys_instances[s_i],
                    AlignmentRecord(ref_instances[r_i], sys_instances[s_i],
                                    component_matrix[s_i][r_i], None, None,
    for r_i in unmapped_ref:
                AlignmentRecord(ref_instances[r_i], None, None, None,
                                ref_instances[r_i].localization, None,
                AlignmentRecord(ref_instances[r_i], None, None, None, None,
                                None, None))

    for s_i in unmapped_sys:
                AlignmentRecord(None, sys_instances[s_i], None, None, None,
                AlignmentRecord(None, sys_instances[s_i], None, None, None,
                                None, None))

    if report_matrix_stats and len(ref_instances) * len(
            sys_instances) > report_matrix_print_threshold:
        print("[Info] BigAlignmentMatixTime: {} x {} = {}, {} sec.".format(
            len(ref_instances), len(sys_instances),
            len(ref_instances) * len(sys_instances),
            (time.time_ns() - start) / 1000000000))

    return (correct_detects, missed_detects, false_alarms)
Example #24
def get_order(params, tile_struct, tilesegmentlists, exposurelist, observatory,
    tile_struct: dictionary. key -> struct info. 
    tilesegmentlists: list of lists. Segments for each tile in tile_struct 
        that are available for observation.
    exposurelist: list of segments that the telescope is supposed to be working.
        consecutive segments from the start to the end, with each segment size
        being the exposure time.
    Returns a list of tile indices in the order of observation.
    keys = tile_struct.keys()

    exposureids_tiles = {}
    first_exposure = np.inf * np.ones((len(keys), ))
    last_exposure = -np.inf * np.ones((len(keys), ))
    tileprobs = np.zeros((len(keys), ))
    tilenexps = np.zeros((len(keys), ))
    tileexptime = np.zeros((len(keys), ))
    tileexpdur = np.zeros((len(keys), ))
    tilefilts = {}
    tileavailable = np.zeros((len(keys), ))
    tileavailable_tiles = {}
    keynames = []

    nexps = 0
    for jj, key in enumerate(keys):
        tileprobs[jj] = tile_struct[key]["prob"]
        tilenexps[jj] = tile_struct[key]["nexposures"]
            tileexpdur[jj] = tile_struct[key]["exposureTime"]
                tileexpdur[jj] = tile_struct[key]["exposureTime"][0]
                tileexpdur[jj] = 0.0

        tilefilts[key] = copy.deepcopy(tile_struct[key]["filt"])
        tileavailable_tiles[jj] = []

        nexps = nexps + tile_struct[key]["nexposures"]

    if "dec_constraint" in config_struct:
        dec_constraint = config_struct["dec_constraint"].split(",")
        dec_min = float(dec_constraint[0])
        dec_max = float(dec_constraint[1])

    for ii in range(len(exposurelist)):
        exposureids_tiles[ii] = {}
        exposureids = []
        probs = []
        ras, decs = [], []
        for jj, key in enumerate(keys):
            tilesegmentlist = tilesegmentlists[jj]
            if tile_struct[key]["prob"] == 0: continue
            if "dec_constraint" in config_struct:
                if (tile_struct[key]["dec"] <
                        dec_min) or (tile_struct[key]["dec"] > dec_max):
            if "epochs" in tile_struct[key]:
                if params["doMindifFilt"]:
                    #take into account filter for mindiff
                    idx = np.where(
                        np.asarray(tile_struct[key]["epochs_filters"]) ==
                    if np.any(
                            np.abs(exposurelist[ii][0] -
                                   tile_struct[key]["epochs"][idx, 2]) <
                            params["mindiff"] / 86400.0):
                elif np.any(
                        np.abs(exposurelist[ii][0] -
                               tile_struct[key]["epochs"][:, 2]) <
                        params["mindiff"] / 86400.0):
            if tilesegmentlist.intersects_segment(exposurelist[ii]):

                first_exposure[jj] = np.min([first_exposure[jj], ii])
                last_exposure[jj] = np.max([last_exposure[jj], ii])
                tileavailable[jj] = tileavailable[jj] + 1
        # in every exposure, the tiles available for observation
        exposureids_tiles[ii]["exposureids"] = exposureids  # list of tile ids
        exposureids_tiles[ii]["probs"] = probs  # the corresponding probs
        exposureids_tiles[ii]["ras"] = ras
        exposureids_tiles[ii]["decs"] = decs

    exposureids = []
    probs = []
    ras, decs = [], []
    for ii, key in enumerate(keys):
        # tile_struct[key]["nexposures"]: the number of exposures assigned to this tile
        for jj in range(tile_struct[key]["nexposures"]):
            )  # list of tile ids for every exposure it is allocated to observe

    idxs = -1 * np.ones((len(exposureids_tiles.keys()), ))
    filts = ['n'] * len(exposureids_tiles.keys())

    if nexps == 0:
        return idxs, filts

    if params["scheduleType"] == "airmass_weighted":

        # # first step is to sort the array in order of descending probability
        indsort = np.argsort(-np.array(probs))
        probs = np.array(probs)[indsort]
        ras = np.array(ras)[indsort]
        decs = np.array(decs)[indsort]
        exposureids = np.array(exposureids)[indsort]

        tilematrix = np.zeros((len(exposurelist), len(ras)))
        probmatrix = np.zeros((len(exposurelist), len(ras)))

        for ii in np.arange(len(exposurelist)):

            # first, create an array of airmass-weighted probabilities
            t = Time(exposurelist[ii][0], format='mjd')
            altaz = get_altaz_tiles(ras, decs, observatory, t)
            alts = altaz.alt.degree
            horizon = config_struct["horizon"]
            horizon_mask = alts <= horizon
            airmass = 1 / np.cos((90. - alts) * np.pi / 180.)
            below_horizon_mask = horizon_mask * 10.**100
            airmass = airmass + below_horizon_mask
            airmass_weight = 10**(0.4 * 0.1 * (airmass - 1))
            tilematrix[ii, :] = np.array(probs / airmass_weight)
            probmatrix[ii, :] = np.array(probs * (True ^ horizon_mask))

    dt = (exposurelist[1][0] - exposurelist[0][0]) * 86400
    if params["scheduleType"] == "greedy":
        for ii in np.arange(len(exposurelist)):
            if idxs[ii] > 0: continue

            exptimecheck = np.where(
                exposurelist[ii][0] - tileexptime < params["mindiff"] /
            exptimecheckkeys = [keynames[x] for x in exptimecheck]

            # find_tile finds the tile that covers the largest probablity
            # restricted by availability of tile and timeallocation
            idx2, exposureids, probs = find_tile(
            if idx2 in keynames:
                idx = keynames.index(idx2)
                tilenexps[idx] = tilenexps[idx] - 1
                tileexptime[idx] = exposurelist[ii][0]

                num = int(np.ceil(tileexpdur[idx] / dt))
                tilenexps[idx] = tilenexps[idx] - 1
                tileexptime[idx] = exposurelist[ii][0]
                if len(tilefilts[idx2]) > 0:
                    filt = tilefilts[idx2].pop(0)
                    for jj in range(num):
                            filts[ii + jj] = filt
                for jj in range(num):
                        idxs[ii + jj] = idx2
                idxs[ii] = idx2

            if not exposureids: break
    elif params["scheduleType"] == "greedy_slew":
        current_ra, current_dec = np.nan, np.nan
        for ii in np.arange(len(exposurelist)):
            exptimecheck = np.where(
                exposurelist[ii][0] - tileexptime < params["mindiff"] /
            exptimecheckkeys = [keynames[x] for x in exptimecheck]

            # find_tile finds the tile that covers the largest probablity
            # restricted by availability of tile and timeallocation
            idx2, exposureids, probs = find_tile(
            if idx2 in keynames:
                idx = keynames.index(idx2)
                tilenexps[idx] = tilenexps[idx] - 1
                tileexptime[idx] = exposurelist[ii][0]

                num = int(np.ceil(tileexpdur[idx] / dt))
                tilenexps[idx] = tilenexps[idx] - 1
                tileexptime[idx] = exposurelist[ii][0]
                if len(tilefilts[idx2]) > 0:
                    filt = tilefilts[idx2].pop(0)
                    for jj in range(num):
                            filts[ii + jj] = filt
                for jj in range(num):
                        idxs[ii + jj] = idx2
                current_ra = tile_struct[idx2]["ra"]
                current_dec = tile_struct[idx2]["dec"]
                idxs[ii] = idx2

            if not exposureids: break
    elif params["scheduleType"] == "sear":
        #for ii in np.arange(len(exposurelist)):
        iis = np.arange(len(exposurelist)).tolist()
        while len(iis) > 0:
            ii = iis[0]
            mask = np.where((ii == last_exposure) & (tilenexps > 0))[0]

            exptimecheck = np.where(
                exposurelist[ii][0] - tileexptime < params["mindiff"] /
            exptimecheckkeys = [keynames[x] for x in exptimecheck]

            if len(mask) > 0:
                idxsort = mask[np.argsort(tileprobs[mask])]
                idx2, exposureids, probs = find_tile(
                last_exposure[mask] = last_exposure[mask] + 1
                idx2, exposureids, probs = find_tile(
            if idx2 in keynames:
                idx = keynames.index(idx2)
                tilenexps[idx] = tilenexps[idx] - 1
                tileexptime[idx] = exposurelist[ii][0]
                if len(tilefilts[idx2]) > 0:
                    filt = tilefilts[idx2].pop(0)
                    filts[ii] = filt
            idxs[ii] = idx2

            if not exposureids: break

    elif params["scheduleType"] == "weighted":
        for ii in np.arange(len(exposurelist)):
            jj = exposureids_tiles[ii]["exposureids"]
            weights = tileprobs[jj] * tilenexps[jj] / tileavailable[jj]
            weights[~np.isfinite(weights)] = 0.0

            exptimecheck = np.where(
                exposurelist[ii][0] - tileexptime < params["mindiff"] /
            weights[exptimecheck] = 0.0

            if np.any(weights >= 0):
                idxmax = np.argmax(weights)
                idx2 = jj[idxmax]
                if idx2 in keynames:
                    idx = keynames.index(idx2)
                    tilenexps[idx] = tilenexps[idx] - 1
                    tileexptime[idx] = exposurelist[ii][0]
                    if len(tilefilts[idx2]) > 0:
                        filt = tilefilts[idx2].pop(0)
                        filts[ii] = filt
                idxs[ii] = idx2
            tileavailable[jj] = tileavailable[jj] - 1

    elif params["scheduleType"] == "airmass_weighted":
        # then use the Hungarian algorithm (munkres) to schedule high prob tiles at low airmass
        tilematrix_mask = tilematrix > 10**(-10)

        if tilematrix_mask.any():
            print("Calculating Hungarian solution...")
            total_cost = 0
            cost_matrix = make_cost_matrix(tilematrix)
            m = Munkres()
            optimal_points = m.compute(cost_matrix)
            print("Hungarian solution calculated...")
            max_no_observ = min(tilematrix.shape)
            for jj in range(max_no_observ):
                idx0, idx1 = optimal_points[jj]
                # idx0 indexes over the time windows, idx1 indexes over the probabilities
                # idx2 gets the exposure id of the tile, used to assign tileexptime and tilenexps
                    idx2 = exposureids[idx1]
                    pamw = tilematrix[idx0][idx1]
                    total_cost += pamw
                    if len(tilefilts[idx2]) > 0:
                        filt = tilefilts[idx2].pop(0)
                        filts[idx0] = filt

                idxs[idx0] = idx2

            print("The localization is not visible from the site.")

        raise ValueError(
            "Scheduling options are greedy/sear/weighted/airmass_weighted, or with _slew."

    return idxs, filts
def synaptic_partners_fscore(rec_annotations,
    """Compute the f-score of the found synaptic partners.


    rec_annotations: Annotations, containing found synaptic partners

    gt_annotations: Annotations, containing ground truth synaptic partners

    gt_segmentation: Volume, ground truth neuron segmentation

    matching_threshold: float, world units
        Euclidean distance threshold to consider two annotations a potential
        match. Annotations that are `matching_threshold` or more untis apart
        from each other are not considered as potential matches.

    all_stats: boolean, optional
        Whether to also return precision, recall, FP, FN, and matches as a 6-tuple with f-score


    fscore: float
        The f-score of the found synaptic partners.
    precision: float, optional
    recall: float, optional
    fp: int, optional
    fn: int, optional
    filtered_matches: list of tuples, optional
        The indices of the matches with matching costs.

    # get cost matrix
    costs = cost_matrix(rec_annotations, gt_annotations, gt_segmentation,

    # match using Hungarian method
    print "Finding cost-minimal matches..."
    munkres = Munkres()
    matches = munkres.compute(costs.copy(
    ))  # have to copy, because munkres changes the cost matrix...

    filtered_matches = [(i, j, costs[i][j]) for (i, j) in matches
                        if costs[i][j] <= matching_threshold]
    print str(len(filtered_matches)) + " matches found"

    # unmatched in rec = FP
    fp = len(rec_annotations.pre_post_partners) - len(filtered_matches)

    # unmatched in gt = FN
    fn = len(gt_annotations.pre_post_partners) - len(filtered_matches)

    # all ground truth elements - FN = TP
    tp = len(gt_annotations.pre_post_partners) - fn

    precision = float(tp) / (tp + fp)
    recall = float(tp) / (tp + fn)
    fscore = 2.0 * precision * recall / (precision + recall)

    if all_stats:
        return (fscore, precision, recall, fp, fn, filtered_matches)
        return fscore
Example #26
    def __init__(self, lr=1e-3, batchs=8, cuda=True):
        :param tt: train_test
        :param tag: 1 - evaluation on testing data, 0 - without evaluation on testing data
        :param lr:
        :param batchs:
        :param cuda:
        # all the tensor should set the 'volatile' as True, and False when update the network
        self.hungarian = Munkres()
        self.device = torch.device("cuda" if cuda else "cpu")
        self.nEpochs = 999
        self.lr = lr
        self.batchsize = batchs
        self.numWorker = 4

        self.show_process = 0  # interaction
        self.step_input = 1

        print '     Preparing the model...'

        self.Uphi = uphi().to(self.device)
        self.Ephi = ephi().to(self.device)

        self.criterion = nn.MSELoss() if criterion_s else nn.CrossEntropyLoss()
        self.criterion = self.criterion.to(self.device)

        self.criterion_v = nn.MSELoss().to(self.device)

        self.optimizer = optim.Adam([{
            'params': self.Uphi.parameters()
        }, {
            'params': self.Ephi.parameters()

        # seqs = [2, 4, 5, 9, 10, 11, 13]
        # lengths = [600, 1050, 837, 525, 654, 900, 750]
        seqs = [2, 4, 5, 10]
        lengths = [600, 1050, 837, 654]

        for i in xrange(len(seqs)):
            self.writer = SummaryWriter()
            # print '     Loading Data...'
            seq = seqs[i]
            self.seq_index = seq
            start = time.time()
            sequence_dir = '../MOT/MOT16/train/MOT16-%02d' % seq

            self.outName = t_dir + 'result_%02d.txt' % seq
            out = open(self.outName, 'w')

            self.train_set = DatasetFromFolder(sequence_dir, self.outName)

            self.train_test = lengths[i]
            self.tag = 0
            self.loss_threhold = 0.03

            print '     Logging...'
            t_data = time.time() - start
Example #27
    def train(self,
        losses = []
        LS = []
        reg_arr = []
        precision_arr = []
        recall_arr = []
        Spectral_Kmeans_acc_arr = []
        self.display_step = display_step
        self.batch_size = batch_size
        print("num_samples : {}".format(dataset.num_samples))
        for epoch in range(num_epoch):
            avg_loss = 0.
            avg_score = 0.
            reg_loss = 0.

            # Loop over all batches
            amount_batchs = dataset.get_amuont_batchs(self.batch_size)
            for i in range(amount_batchs):
                if labeled:
                    batch_xs, batch_ys = dataset.next_batch(self.batch_size)
                    batch_xs = dataset.next_batch(self.batch_size)
                _, loss, laplacian_score, reg_fs = self.sess.run([self.train_step, self.loss, self.laplacian_score, self.reg_gates], \
                                                          feed_dict={self.X: batch_xs, self.learning_rate:learning_rate})

                avg_loss += loss / amount_batchs
                avg_score += laplacian_score / amount_batchs
                reg_loss += reg_fs / amount_batchs

            alpha_p = self.get_prob_alpha()
            precision = np.sum(alpha_p[:2]) / np.sum(alpha_p[:])
            recall = np.sum(alpha_p[:2]) / 2

            if (epoch + 1) % self.display_step == 0:
                print("Epoch:", '%04d' % (epoch+1), "loss=", "{:.9f}".format(avg_loss), "score=", "{:.9f}".format(avg_score)\
                      ,"reg=", "{:.9f}".format(reg_fs) )

                if labeled:

                    indices = np.where(alpha_p > 0)[0]
                    XS = batch_xs[:, indices]
                    if XS.size == 0:
                        XS = np.zeros((batch_xs.shape[0], 1))
                            cluster_acc(batch_ys, batch_ys * 0))


                        Dist = squareform(pdist(XS))
                        fac = 5 / np.max(
                            np.min(Dist + np.eye(XS.shape[0]) * 1e5, axis=1))
                        clustering = SpectralClustering(

                        netConfusion = ms.confusion_matrix(batch_ys,
                        netCostMat = calcCostMatrix(netConfusion, 2)
                        m = Munkres()
                        indexes = m.compute(netCostMat)
                        clusterLabels = getClusterLabelsFromIndexes(indexes)
                        netSolClassVectorOrdered = clusterLabels[
                        accuracy_ls = np.mean(
                            netSolClassVectorOrdered == np.array(batch_ys))

        print("Optimization Finished!")

        return LS, losses, reg_arr, precision_arr, recall_arr, Spectral_Kmeans_acc_arr
Example #28
def plotTradeRatios(mp,
    if preconditioner is None:
        tr = mp.tradeRatios
        tr = preconditioner(mp.tradeRatios)

    if numToSample is None:
        samples = mp.paretoSamples
        dataSize = mp.paretoSamples.shape[0]
        smplIndxsWithReplacement = np.random.randint(0,
        samples = mp.paretoSamples[smplIndxsWithReplacement, :]
    U, S, V = np.linalg.svd(samples)
    locs2d = np.dot(samples, V)[:, :2]
    ranges = np.ptp(locs2d, axis=0)
    locs2dNoised = locs2d + np.random.random(
        locs2d.shape) * ranges[np.newaxis, :] * 1 / 1000
    rangesNoised = np.ptp(locs2dNoised, axis=0)
    # gradient=fa.reconstructDerivative(locations=mp.projectToPlaneCoor(samples))

    gridLocs = (np.mgrid[0:pixPerSide, 0:pixPerSide] +
                0.5) / pixPerSide * (rangesNoised[:, np.newaxis, np.newaxis])
    gridIndxs = np.mgrid[0:pixPerSide, 0:pixPerSide]
    flatLocs = np.array([arr.flatten() for arr in gridLocs]).T
    flatIndxs = np.array([arr.flatten() for arr in gridIndxs]).T
    dists = spst.distance.cdist(
        flatLocs - np.mean(flatLocs, axis=0)[np.newaxis, :],
        locs2dNoised - np.mean(locs2dNoised, axis=0)[np.newaxis, :])

    accum = [[
    ] * len(tr) for cnt in range(len(tr))]
    for k in range(len(accum)):
        accum[k][k] = np.ones((pixPerSide, pixPerSide))
    print('beginning matching with ' + str(dists.shape) + ' sized matrix')
    if dists.shape[0] <= dists.shape[
            1]:  # number of drawn pixels < number of elements in the sample
        matches = np.array(
        )  # sparse represetnation. [:,0] is the pixel index, [:,1] is matching sample index
        print('finished matching')
        for i, j in it.combinations(range(len(tr)), 2):
            imageMat = np.ones((pixPerSide, pixPerSide)) * tr[i, j]
            # imageMat[flatIndxs[:,0],flatIndxs[:,1]]=gradient[matches[:,0],i]/gradient[matches[:,1],j]# set to the values of the tradeoffs at teh elements of the samples.
            imageMat[flatIndxs[matches[:, 0], 0],
                     flatIndxs[matches[:, 0], 1]] = np.squeeze(
                         tradeoffCalc(mp, fa, samples[matches[:, 1], :], i, j))
            accum[i][j] = imageMat
            accum[j][i] = 1 / imageMat
        matches = np.array(Munkres().compute(
            dists.T))  # [:,0] is the sample index, [:,1] is the pixel index

        # check matching
        # plt.plot(flatLocs[:,0],flatLocs[:,1],'.',samples[:,0],samples[:,1],'o')
        # plt.plot(np.vstack((samples[matches[:,0],0],flatLocs[matches[:,1],0])),np.vstack((samples[matches[:,0],1],flatLocs[matches[:,1],1])))
        # plt.show()

        print('finished matching')
        for i, j in it.combinations(range(len(tr)), 2):
            imageMat = np.ones((pixPerSide, pixPerSide)) * tr[i, j]
            # imageMat[matches[:,0],matches[:,1]]=gradient[flatIndxs[:,0],i]/gradient[flatIndxs[:,1],j]# set to the values of the tradeoffs at teh elements of the samples.
            imageMat[flatIndxs[matches[:, 1], 0],
                     flatIndxs[matches[:, 1], 1]] = np.squeeze(
                         tradeoffCalc(mp, fa, samples[matches[:, 0], :], i, j))
            accum[i][j] = imageMat
            accum[j][i] = 1 / imageMat
    # reorderArr=np.argsort(np.mean(tr,axis=0))
    reorderArr = np.arange(len(tr))
    objLabels_reorder = list(map(lambda i: objLabels[i],
    accumReorder = [[accum[l][k] for k in reorderArr] for l in reorderArr]
    toPlot = np.vstack(tuple(map(np.hstack, map(tuple, accumReorder))))

    toPlot = np.log(np.abs(toPlot))

    plt.imshow(toPlot, cmap=globalCmap, interpolation='nearest')
        range(pixPerSide // 2, pixPerSide * len(objLabels_reorder),
              pixPerSide), objLabels_reorder)
        range(pixPerSide // 2, pixPerSide * len(objLabels_reorder),
              pixPerSide), objLabels_reorder)
Example #29
    len1 = len(hsvPalette1)
    len2 = len(hsvPalette2)
    matrix = []
    if len1 >= len2:
        for i in range(0, len1):
        for j in range(0, len1):
            for k in range(0, len1):
        for i in range(0, len1):
            for j in range(0, len2):
                matrix[j][i] = int(
                        color_distance(hsvPalette1[i], hsvPalette2[j]) *
                        1000)) + 1
    return matrix

matrix = generate_matrix(c8, c6)
print matrix
m = Munkres()
indexes = m.compute(matrix)

print_matrix(matrix, msg='Lowest cost through this matrix:')
total = 0
for row, column in indexes:
    value = matrix[row][column]
    total += value
    print '(%d, %d) -> %d' % (row, column, value)
print 'total cost: %d' % total
Example #30
def main(args):
    gt_file = './armstrong-jan2018.json'
    #gt_file = './armstrong-setcore-20171115.json'
    #gt_file = '/Users/ashokdeb/Desktop/test/effect-forecasting-models/deb/warning_ARIMA_p_7_d_1_q_7_armstrong_endpoint-malware_2017-06-30.json'
    # warning_dir = './cause-effect/warnings/'
    warning_dir = './score_warnings/'

    mth = args.mth
    external_signal_name = args.ext.split('.')[0]
    method = args.method
    event_type = args.evt

    if mth == "July":
        start_date = datetime.date(2017, 7, 1)
        end_date = datetime.date(2017, 7, 31)
    elif mth == "August":
        start_date = datetime.date(2017, 8, 1)
        end_date = datetime.date(2017, 8, 31)
    elif mth == "September":
        start_date = datetime.date(2017, 9, 1)
        end_date = datetime.date(2017, 9, 30)
    elif mth == "October":
        start_date = datetime.date(2017, 10, 1)
        end_date = datetime.date(2017, 10, 31)
    elif mth == "November":
        start_date = datetime.date(2017, 11, 1)
        end_date = datetime.date(2017, 11, 30)
    elif mth == "December":
        start_date = datetime.date(2017, 12, 1)
        end_date = datetime.date(2017, 12, 31)
    elif mth == "January":
        start_date = datetime.date(2018, 1, 1)
        end_date = datetime.date(2018, 1, 31)

    #need to adjust
    #start_date = datetime.date(2017, 7, 1)
    #end_date = datetime.date(2017, 7, 31)
    #start_date = datetime.date(2017, 8, 1)
    #end_date = datetime.date(2017, 8, 31)
    #start_date = datetime.date(2017, 9, 1)
    #end_date = datetime.date(2017, 9, 30)

    target_org = args.target
    #target_org = 'knox'

    # Potentially filter by event type
    #   None -> Score for ALL event types
    #   Otherwise, restrict to 'endpoint-malware', 'malicious-email', or 'malicious-destination'
    #event_type = None

    # First need to parse input data
    warnings = load_warnings(warning_dir, target_org, event_type, start_date,
                             end_date, external_signal_name, method)
    events = load_gt(gt_file, target_org, event_type, start_date, end_date)

    print("# warnings = %d" % len(warnings))
    print("# events = %d" % len(events))

    # Now need to perform matching on warnings + events
    # To do this first have to score every possible warning-event pair
    # The official pair_objects.py is heavily dependent on python classes *not* provided to us by govt, so we recreate here
    M = Metrics()
    matching_matrix = np.zeros((len(events), len(warnings)))
    matching_dict = dict()
    for e_idx in range(len(events)):
        for w_idx in range(len(warnings)):
            # Check if we meet base criteria threshold
            if warnings[w_idx].event_type == "endpoint-malware":
                date_th = 0.875
            elif warnings[w_idx].event_type == "malicious-email":
                date_th = 1.375
            elif warnings[w_idx].event_type == "malicious-destination":
                date_th = 1.625
                raise Exception("Unknown event_type: %s" %

            if M.base_criteria(events[e_idx], warnings[w_idx], thr2=date_th):
                pair = Pair.build(warnings[w_idx], events[e_idx],
                                  'fake-performer', 'fake-provider')
                matching_matrix[e_idx, w_idx] = -pair.quality
                matching_dict["%d,%d" % (e_idx, w_idx)] = pair

    # Now do Hungarian matching
    munk = Munkres()
    pairings = munk.compute(matching_matrix.tolist())
    valid_pairings = list(
        filter(lambda p: matching_matrix[p[0], p[1]] != 0, pairings))

    nMatched = len(valid_pairings)
    nUnmatchedGT = len(events) - nMatched
    nUnmatchedW = len(warnings) - nMatched

    avg_qs = 0
    for e_idx, w_idx in valid_pairings:
        # pair = matching_dict["%d,%d" % (e_idx, w_idx)]
        # avg_qs += pair.quality
        avg_qs += matching_matrix[e_idx, w_idx]

    if len(valid_pairings) != 0:
        avg_qs /= len(valid_pairings)
        avg_qs = -1 * avg_qs
        recall = nMatched / (nMatched + nUnmatchedGT)
        precision = nMatched / (nMatched + nUnmatchedW)
        f1 = (2 * precision * recall) / (precision + recall)
        avg_qs = recall = precision = f1 = 0

    print("Precision = %0.2f%%" % (100 * precision))
    print("Recall = %0.2f%%" % (100 * recall))
    print("Average Quality Score = %0.2f" % avg_qs)

    new_results = [
        target_org, event_type, mth,
        len(warnings), external_signal_name,
        np.around(100 * precision, decimals=2),
        np.around(100 * recall, decimals=2),
        np.around(100 * f1, decimals=2),
        np.around(avg_qs, decimals=2)

    with open('./output/results.csv', 'a') as f:
        writer = csv.writer(f)