Example #1
0
def p345():
    A = [[
        7, 53, 183, 439, 863, 497, 383, 563, 79, 973, 287, 63, 343, 169, 583
    ],
         [
             627, 343, 773, 959, 943, 767, 473, 103, 699, 303, 957, 703, 583,
             639, 913
         ],
         [
             447, 283, 463, 29, 23, 487, 463, 993, 119, 883, 327, 493, 423,
             159, 743
         ],
         [
             217, 623, 3, 399, 853, 407, 103, 983, 89, 463, 290, 516, 212, 462,
             350
         ],
         [
             960, 376, 682, 962, 300, 780, 486, 502, 912, 800, 250, 346, 172,
             812, 350
         ],
         [
             870, 456, 192, 162, 593, 473, 915, 45, 989, 873, 823, 965, 425,
             329, 803
         ],
         [
             973, 965, 905, 919, 133, 673, 665, 235, 509, 613, 673, 815, 165,
             992, 326
         ],
         [
             322, 148, 972, 962, 286, 255, 941, 541, 265, 323, 925, 281, 601,
             95, 973
         ],
         [
             445, 721, 11, 525, 473, 65, 511, 164, 138, 672, 18, 428, 154, 448,
             848
         ],
         [
             414, 456, 310, 312, 798, 104, 566, 520, 302, 248, 694, 976, 430,
             392, 198
         ],
         [
             184, 829, 373, 181, 631, 101, 969, 613, 840, 740, 778, 458, 284,
             760, 390
         ],
         [
             821, 461, 843, 513, 17, 901, 711, 993, 293, 157, 274, 94, 192,
             156, 574
         ],
         [
             34, 124, 4, 878, 450, 476, 712, 914, 838, 669, 875, 299, 823, 329,
             699
         ],
         [
             815, 559, 813, 459, 522, 788, 168, 586, 966, 232, 308, 833, 251,
             631, 107
         ],
         [
             813, 883, 451, 509, 615, 77, 281, 613, 459, 205, 380, 274, 302,
             35, 805
         ]]

    for r in range(len(A)):
        for c in range(len(A[0])):
            A[r][c] *= -1

    from munkres import Munkres, print_matrix

    m = Munkres()
    indexes = m.compute(A)
    total = 0
    for row, column in indexes:
        value = A[row][column]
        total += value
    return -total
Example #2
0
    def Update(self, detections):

        if (len(self.tracks) == 0):
            for i in range(len(detections)):
                track = Track(detections[i], self.trackIdCount)
                self.trackIdCount += 1
                self.tracks.append(track)

        N = len(self.tracks)
        M = len(detections)
        # Finding Cost then applying Hungarian Algorithm
        cost = np.zeros(shape=(N, M))
        for i in range(len(self.tracks)):
            for j in range(len(detections)):
                try:
                    diff = self.tracks[i].prediction - detections[j]
                    distance = np.sqrt(diff[0][0] * diff[0][0] +
                                       diff[1][0] * diff[1][0])
                    cost[i][j] = distance
                except:
                    pass

        cost = (0.5) * cost

        assignment = []
        for _ in range(N):
            assignment.append(-1)
        hungarian = Munkres()

        #index = hungarian.compute(cost)
        row_ind, col_ind = linear_sum_assignment(cost)

        for i in range(len(row_ind)):
            assignment[row_ind[i]] = col_ind[i]

        un_assigned_tracks = []
        for i in range(len(assignment)):
            if (assignment[i] != -1):

                if (cost[i][assignment[i]] > self.dist_thresh):
                    assignment[i] = -1
                    un_assigned_tracks.append(i)
                pass
            else:
                self.tracks[i].skipped_frames += 1

        del_tracks = []
        for i in range(len(self.tracks)):
            if (self.tracks[i].skipped_frames > self.max_frames_to_skip):
                del_tracks.append(i)
        if len(del_tracks) > 0:
            for id in del_tracks:
                if id < len(self.tracks):
                    del self.tracks[id]
                    del assignment[id]
                else:
                    print("Error in deleting tracks")

        un_assigned_detects = []
        for i in range(len(detections)):
            if i not in assignment:
                un_assigned_detects.append(i)

        if (len(un_assigned_detects) != 0):
            for i in range(len(un_assigned_detects)):
                track = Track(detections[un_assigned_detects[i]],
                              self.trackIdCount)
                self.trackIdCount += 1
                self.tracks.append(track)

        for i in range(len(assignment)):
            self.tracks[i].KF.predict()

            if (assignment[i] != -1):
                self.tracks[i].skipped_frames = 0
                self.tracks[i].prediction = self.tracks[i].KF.correct(
                    detections[assignment[i]], 1)
            else:
                self.tracks[i].prediction = self.tracks[i].KF.correct(
                    np.array([[0], [0]]), 0)

            if (len(self.tracks[i].trace) > self.max_trace_length):
                for j in range(
                        len(self.tracks[i].trace) - self.max_trace_length):
                    del self.tracks[i].trace[j]

            self.tracks[i].trace.append(self.tracks[i].prediction)
            self.tracks[i].KF.lastResult = self.tracks[i].prediction
Example #3
0
def kruskal_align(U, V, permute_U=False, permute_V=False):
    """Aligns two KTensors and returns a similarity score.

    Parameters
    ----------
    U : KTensor
        First kruskal tensor to align.
    V : KTensor
        Second kruskal tensor to align.
    permute_U : bool
        If True, modifies 'U' to align the KTensors (default is False).
    permute_V : bool
        If True, modifies 'V' to align the KTensors (default is False).

    Notes
    -----
    If both `permute_U` and `permute_V` are both set to True, then the
    factors are ordered from most to least similar. If only one is
    True then the factors on the modified KTensor are re-ordered to
    match the factors in the un-aligned KTensor.

    Returns
    -------
    similarity : float
        Similarity score between zero and one.
    """

    # Compute similarity matrices.
    unrm = [f / np.linalg.norm(f, axis=0) for f in U.factors]
    vnrm = [f / np.linalg.norm(f, axis=0) for f in V.factors]
    sim_matrices = [np.dot(u.T, v) for u, v in zip(unrm, vnrm)]
    cost = 1 - np.mean(np.abs(sim_matrices), axis=0)

    # Solve matching problem via Hungarian algorithm.
    indices = Munkres().compute(cost.copy())
    prmU, prmV = zip(*indices)

    # Compute mean factor similarity given the optimal matching.
    similarity = np.mean(1 - cost[prmU, prmV])

    # If U and V are of different ranks, identify unmatched factors.
    unmatched_U = list(set(range(U.rank)) - set(prmU))
    unmatched_V = list(set(range(V.rank)) - set(prmV))

    # If permuting both U and V, order factors from most to least similar.
    if permute_U and permute_V:
        idx = np.argsort(cost[prmU, prmV])

    # If permute_U is False, then order the factors such that the ordering
    # for U is unchanged.
    elif permute_V:
        idx = np.argsort(prmU)

    # If permute_V is False, then order the factors such that the ordering
    # for V is unchanged.
    elif permute_U:
        idx = np.argsort(prmV)

    # If permute_U and permute_V are both False, then we are done and can
    # simply return the similarity.
    else:
        return similarity

    # Re-order the factor permutations.
    prmU = [prmU[i] for i in idx]
    prmV = [prmV[i] for i in idx]

    # Permute the factors.
    if permute_U:
        U.permute(prmU)
    if permute_V:
        V.permute(prmV)

    # Flip the signs of factors.
    flips = np.sign([F[prmU, prmV] for F in sim_matrices])
    flips[0] *= np.prod(flips, axis=0)  # always flip an even number of factors

    if permute_U:
        for i, f in enumerate(flips):
            U.factors[i] *= f

    elif permute_V:
        for i, f in enumerate(flips):
            V.factors[i] *= f

    # Return the similarity score
    return similarity
Example #4
0
    def compute3rdPartyMetrics(self):
        """
            Computes the metrics defined in
                - Stiefelhagen 2008: Evaluating Multiple Object Tracking Performance: The CLEAR MOT Metrics
                  MOTA, MOTAL, MOTP
                - Nevatia 2008: Global Data Association for Multi-Object Tracking Using Network Flows
                  MT/PT/ML
        """

        # construct Munkres object for Hungarian Method association
        hm = Munkres()
        max_cost = 1e9

        # go through all frames and associate ground truth and tracker results
        # groundtruth and tracker contain lists for every single frame containing lists of KITTI format detections
        fr, ids = 0, 0
        for seq_idx in range(len(self.groundtruth)):
            seq_gt = self.groundtruth[seq_idx]
            seq_dc = self.dcareas[seq_idx]  # don't care areas
            seq_tracker = self.tracker[seq_idx]
            seq_trajectories = defaultdict(list)
            seq_ignored = defaultdict(list)

            # statistics over the current sequence, check the corresponding
            # variable comments in __init__ to get their meaning
            seqtp = 0
            seqitp = 0
            seqfn = 0
            seqifn = 0
            seqfp = 0
            seqigt = 0
            seqitr = 0

            last_ids = [[], []]

            n_gts = 0
            n_trs = 0

            for f in range(len(seq_gt)):
                g = seq_gt[f]
                dc = seq_dc[f]

                t = seq_tracker[f]
                # counting total number of ground truth and tracker objects
                self.n_gt += len(g)
                self.n_tr += len(t)

                n_gts += len(g)
                n_trs += len(t)

                # use hungarian method to associate, using boxoverlap 0..1 as cost
                # build cost matrix
                cost_matrix = []
                this_ids = [[], []]
                for gg in g:
                    # save current ids
                    this_ids[0].append(gg.track_id)
                    this_ids[1].append(-1)
                    gg.tracker = -1
                    gg.id_switch = 0
                    gg.fragmentation = 0
                    cost_row = []
                    for tt in t:
                        # overlap == 1 is cost ==0
                        c = 1 - self.boxoverlap(gg, tt)
                        # gating for boxoverlap
                        if c <= self.min_overlap:
                            cost_row.append(c)
                        else:
                            cost_row.append(max_cost)  # = 1e9
                    cost_matrix.append(cost_row)
                    # all ground truth trajectories are initially not associated
                    # extend groundtruth trajectories lists (merge lists)
                    seq_trajectories[gg.track_id].append(-1)
                    seq_ignored[gg.track_id].append(False)

                if len(g) is 0:
                    cost_matrix = [[]]
                # associate
                association_matrix = hm.compute(cost_matrix)

                # tmp variables for sanity checks and MODP computation
                tmptp = 0
                tmpfp = 0
                tmpfn = 0
                tmpc = 0  # this will sum up the overlaps for all true positives
                tmpcs = [0] * len(
                    g)  # this will save the overlaps for all true positives
                # the reason is that some true positives might be ignored
                # later such that the corrsponding overlaps can
                # be subtracted from tmpc for MODP computation

                # mapping for tracker ids and ground truth ids
                for row, col in association_matrix:
                    # apply gating on boxoverlap
                    c = cost_matrix[row][col]
                    if c < max_cost:
                        g[row].tracker = t[col].track_id
                        this_ids[1][row] = t[col].track_id
                        t[col].valid = True
                        g[row].distance = c
                        self.total_cost += 1 - c
                        tmpc += 1 - c
                        tmpcs[row] = 1 - c
                        seq_trajectories[g[row].track_id][-1] = t[col].track_id

                        # true positives are only valid associations
                        self.tp += 1
                        tmptp += 1
                    else:
                        g[row].tracker = -1
                        self.fn += 1
                        tmpfn += 1

                # associate tracker and DontCare areas
                # ignore tracker in neighboring classes
                nignoredtracker = 0  # number of ignored tracker detections
                ignoredtrackers = dict()  # will associate the track_id with -1
                # if it is not ignored and 1 if it is
                # ignored;
                # this is used to avoid double counting ignored
                # cases, see the next loop

                for tt in t:
                    ignoredtrackers[tt.track_id] = -1
                    # ignore detection if it belongs to a neighboring class or is
                    # smaller or equal to the minimum height

                    tt_height = abs(tt.y1 - tt.y2)
                    if ((self.cls == "car" and tt.obj_type == "van") or
                        (self.cls == "pedestrian"
                         and tt.obj_type == "person_sitting")
                            or tt_height <= self.min_height) and not tt.valid:
                        nignoredtracker += 1
                        tt.ignored = True
                        ignoredtrackers[tt.track_id] = 1
                        continue
                    for d in dc:
                        overlap = self.boxoverlap(tt, d, "a")
                        if overlap > 0.5 and not tt.valid:
                            tt.ignored = True
                            nignoredtracker += 1
                            ignoredtrackers[tt.track_id] = 1
                            break

                # check for ignored FN/TP (truncation or neighboring object class)
                ignoredfn = 0  # the number of ignored false negatives
                nignoredtp = 0  # the number of ignored true positives
                nignoredpairs = 0  # the number of ignored pairs, i.e. a true positive
                # which is ignored but where the associated tracker
                # detection has already been ignored

                gi = 0
                for gg in g:
                    if gg.tracker < 0:
                        if gg.occlusion>self.max_occlusion or gg.truncation>self.max_truncation\
                                or (self.cls=="car" and gg.obj_type=="van") or (self.cls=="pedestrian" and gg.obj_type=="person_sitting"):
                            seq_ignored[gg.track_id][-1] = True
                            gg.ignored = True
                            ignoredfn += 1

                    elif gg.tracker >= 0:
                        if gg.occlusion>self.max_occlusion or gg.truncation>self.max_truncation\
                                or (self.cls=="car" and gg.obj_type=="van") or (self.cls=="pedestrian" and gg.obj_type=="person_sitting"):

                            seq_ignored[gg.track_id][-1] = True
                            gg.ignored = True
                            nignoredtp += 1

                            # if the associated tracker detection is already ignored,
                            # we want to avoid double counting ignored detections
                            if ignoredtrackers[gg.tracker] > 0:
                                nignoredpairs += 1

                            # for computing MODP, the overlaps from ignored detections
                            # are subtracted
                            tmpc -= tmpcs[gi]
                    gi += 1

                # the below might be confusion, check the comments in __init__
                # to see what the individual statistics represent

                # correct TP by number of ignored TP due to truncation
                # ignored TP are shown as tracked in visualization
                tmptp -= nignoredtp

                # count the number of ignored true positives
                self.itp += nignoredtp

                # adjust the number of ground truth objects considered
                self.n_gt -= (ignoredfn + nignoredtp)

                # count the number of ignored ground truth objects
                self.n_igt += ignoredfn + nignoredtp

                # count the number of ignored tracker objects
                self.n_itr += nignoredtracker

                # count the number of ignored pairs, i.e. associated tracker and
                # ground truth objects that are both ignored
                self.n_igttr += nignoredpairs

                # false negatives = associated gt bboxes exceding association threshold + non-associated gt bboxes
                #
                tmpfn += len(g) - len(association_matrix) - ignoredfn
                self.fn += len(g) - len(association_matrix) - ignoredfn
                self.ifn += ignoredfn

                # false positives = tracker bboxes - associated tracker bboxes
                # mismatches (mme_t)
                tmpfp += len(
                    t) - tmptp - nignoredtracker - nignoredtp + nignoredpairs
                self.fp += len(
                    t) - tmptp - nignoredtracker - nignoredtp + nignoredpairs
                #tmpfp   = len(t) - tmptp - nignoredtp # == len(t) - (tp - ignoredtp) - ignoredtp
                #self.fp += len(t) - tmptp - nignoredtp

                # update sequence data
                seqtp += tmptp
                seqitp += nignoredtp
                seqfp += tmpfp
                seqfn += tmpfn
                seqifn += ignoredfn
                seqigt += ignoredfn + nignoredtp
                seqitr += nignoredtracker

                # sanity checks
                # - the number of true positives minues ignored true positives
                #   should be greater or equal to 0
                # - the number of false negatives should be greater or equal to 0
                # - the number of false positives needs to be greater or equal to 0
                #   otherwise ignored detections might be counted double
                # - the number of counted true positives (plus ignored ones)
                #   and the number of counted false negatives (plus ignored ones)
                #   should match the total number of ground truth objects
                # - the number of counted true positives (plus ignored ones)
                #   and the number of counted false positives
                #   plus the number of ignored tracker detections should
                #   match the total number of tracker detections; note that
                #   nignoredpairs is subtracted here to avoid double counting
                #   of ignored detection sin nignoredtp and nignoredtracker
                if tmptp < 0:
                    print(tmptp, nignoredtp)
                    raise NameError("Something went wrong! TP is negative")
                if tmpfn < 0:
                    print(tmpfn, len(g), len(association_matrix), ignoredfn,
                          nignoredpairs)
                    raise NameError("Something went wrong! FN is negative")
                if tmpfp < 0:
                    print(tmpfp, len(t), tmptp, nignoredtracker, nignoredtp,
                          nignoredpairs)
                    raise NameError("Something went wrong! FP is negative")
                if tmptp + tmpfn is not len(g) - ignoredfn - nignoredtp:
                    print("seqidx", seq_idx)
                    print("frame ", f)
                    print("TP    ", tmptp)
                    print("FN    ", tmpfn)
                    print("FP    ", tmpfp)
                    print("nGT   ", len(g))
                    print("nAss  ", len(association_matrix))
                    print("ign GT", ignoredfn)
                    print("ign TP", nignoredtp)
                    raise NameError(
                        "Something went wrong! nGroundtruth is not TP+FN")
                if tmptp + tmpfp + nignoredtp + nignoredtracker - nignoredpairs is not len(
                        t):
                    print(seq_idx, f, len(t), tmptp, tmpfp)
                    print(len(association_matrix), association_matrix)
                    raise NameError(
                        "Something went wrong! nTracker is not TP+FP")

                # check for id switches or fragmentations
                for i, tt in enumerate(this_ids[0]):
                    if tt in last_ids[0]:
                        idx = last_ids[0].index(tt)
                        tid = this_ids[1][i]
                        lid = last_ids[1][idx]
                        if tid != lid and lid != -1 and tid != -1:
                            if g[i].truncation < self.max_truncation:
                                g[i].id_switch = 1
                                ids += 1
                        if tid != lid and lid != -1:
                            if g[i].truncation < self.max_truncation:
                                g[i].fragmentation = 1
                                fr += 1

                # save current index
                last_ids = this_ids
                # compute MOTP_t
                MODP_t = 1
                if tmptp != 0:
                    MODP_t = tmpc / float(tmptp)
                self.MODP_t.append(MODP_t)

            # remove empty lists for current gt trajectories
            self.gt_trajectories[seq_idx] = seq_trajectories
            self.ign_trajectories[seq_idx] = seq_ignored

            # gather statistics for "per sequence" statistics.
            self.n_gts.append(n_gts)
            self.n_trs.append(n_trs)
            self.tps.append(seqtp)
            self.itps.append(seqitp)
            self.fps.append(seqfp)
            self.fns.append(seqfn)
            self.ifns.append(seqifn)
            self.n_igts.append(seqigt)
            self.n_itrs.append(seqitr)

        # compute MT/PT/ML, fragments, idswitches for all groundtruth trajectories
        n_ignored_tr_total = 0
        for seq_idx, (seq_trajectories, seq_ignored) in enumerate(
                zip(self.gt_trajectories, self.ign_trajectories)):
            if len(seq_trajectories) == 0:
                continue
            tmpMT, tmpML, tmpPT, tmpId_switches, tmpFragments = [0] * 5
            n_ignored_tr = 0
            for g, ign_g in zip(seq_trajectories.values(),
                                seq_ignored.values()):
                # all frames of this gt trajectory are ignored
                if all(ign_g):
                    n_ignored_tr += 1
                    n_ignored_tr_total += 1
                    continue
                # all frames of this gt trajectory are not assigned to any detections
                if all([this == -1 for this in g]):
                    tmpML += 1
                    self.ML += 1
                    continue
                # compute tracked frames in trajectory
                last_id = g[0]
                # first detection (necessary to be in gt_trajectories) is always tracked
                tracked = 1 if g[0] >= 0 else 0
                lgt = 0 if ign_g[0] else 1
                for f in range(1, len(g)):
                    if ign_g[f]:
                        last_id = -1
                        continue
                    lgt += 1
                    if last_id != g[f] and last_id != -1 and g[f] != -1 and g[
                            f - 1] != -1:
                        tmpId_switches += 1
                        self.id_switches += 1
                    if f < len(g) - 1 and g[f - 1] != g[
                            f] and last_id != -1 and g[f] != -1 and g[f +
                                                                      1] != -1:
                        tmpFragments += 1
                        self.fragments += 1
                    if g[f] != -1:
                        tracked += 1
                        last_id = g[f]
                # handle last frame; tracked state is handled in for loop (g[f]!=-1)
                if len(g) > 1 and g[f - 1] != g[f] and last_id != -1 and g[
                        f] != -1 and not ign_g[f]:
                    tmpFragments += 1
                    self.fragments += 1

                # compute MT/PT/ML
                tracking_ratio = tracked / float(len(g) - sum(ign_g))
                if tracking_ratio > 0.8:
                    tmpMT += 1
                    self.MT += 1
                elif tracking_ratio < 0.2:
                    tmpML += 1
                    self.ML += 1
                else:  # 0.2 <= tracking_ratio <= 0.8
                    tmpPT += 1
                    self.PT += 1

        if (self.n_gt_trajectories - n_ignored_tr_total) == 0:
            self.MT = 0.
            self.PT = 0.
            self.ML = 0.
        else:
            self.MT /= float(self.n_gt_trajectories - n_ignored_tr_total)
            self.PT /= float(self.n_gt_trajectories - n_ignored_tr_total)
            self.ML /= float(self.n_gt_trajectories - n_ignored_tr_total)

        # precision/recall etc.
        if (self.fp + self.tp) == 0 or (self.tp + self.fn) == 0:
            self.recall = 0.
            self.precision = 0.
        else:
            self.recall = self.tp / float(self.tp + self.fn)
            self.precision = self.tp / float(self.fp + self.tp)
        if (self.recall + self.precision) == 0:
            self.F1 = 0.
        else:
            self.F1 = 2. * (self.precision * self.recall) / (self.precision +
                                                             self.recall)
        if sum(self.n_frames) == 0:
            self.FAR = "n/a"
        else:
            self.FAR = self.fp / float(sum(self.n_frames))

        # compute CLEARMOT
        if self.n_gt == 0:
            self.MOTA = -float("inf")
            self.MODA = -float("inf")
        else:
            self.MOTA = 1 - (self.fn + self.fp + self.id_switches) / float(
                self.n_gt)
            self.MODA = 1 - (self.fn + self.fp) / float(self.n_gt)
        if self.tp == 0:
            self.MOTP = float("inf")
        else:
            self.MOTP = self.total_cost / float(self.tp)
        if self.n_gt != 0:
            if self.id_switches == 0:
                self.MOTAL = 1 - (self.fn + self.fp +
                                  self.id_switches) / float(self.n_gt)
            else:
                self.MOTAL = 1 - (self.fn + self.fp + math.log10(
                    self.id_switches)) / float(self.n_gt)
        else:
            self.MOTAL = -float("inf")
        if sum(self.n_frames) == 0:
            self.MODP = "n/a"
        else:
            self.MODP = sum(self.MODP_t) / float(sum(self.n_frames))
        return True
import copy
import math
import os

from munkres import Munkres

from coalib.collecting.Collectors import collect_dirs
from bears.c_languages.codeclone_detection.CountVector import CountVector

# Instantiate globally since this class is holding stateless public methods.
munkres = Munkres()


def exclude_function(count_matrix):
    """
    Determines heuristically whether or not it makes sense for clone
    detection to take this function into account.

    Applied heuristics:
     * Functions with only count vectors with a sum of all unweighted elements
       of lower then 10 are very likely only declarations or empty and to be
       ignored. (Constants are not taken into account.)

    :param count_matrix: A dictionary with count vectors representing all
                         variables for a function.
    :return:             True if the function is useless for evaluation.
    """
    var_list = [
        cv for cv in count_matrix.values()
        if cv.category is CountVector.Category.reference
    ]
    def update(self, boxes):
        """
        Update predictions, associate with detections and add new objects if have no association
        Also delete dead objects
        :param: Detected boxes
        :return: None
        """
        non_asociated = [i for i in range(len(boxes))]

        pred = None

        # Update prediction for each tracked object
        for key in self.filters:
            pred = self.filters[key].predict()

            # Update bounding box according to xc, yc predicted and size measured
            new_xc = pred[0]
            new_yc = pred[1]

            xc = float(self.objects[key][0] + self.objects[key][2]) / 2
            yc = float(self.objects[key][1] + self.objects[key][3]) / 2

            half_w = self.objects[key][2] - xc
            half_h = self.objects[key][3] - yc

            # Update object boxes
            self.objects[key] = [
                int(new_xc - half_w),
                int(new_yc - half_h),
                int(new_xc + half_w),
                int(new_yc + half_h)
            ]

        if len(boxes) != 0 and len(self.objects) != 0:
            m = Munkres()

            # Get distances between detections and predictions to match
            distances = self.get_distances(boxes)

            # assert len(self.objects) == len(self.objects_time) == len(self.objects_life) == len(self.filters), "Bug on code"

            # It seems to have a bug when matrix is a numpy array of shape (2,1), by that I convert to list
            indices = m.compute((-distances).tolist())
            #             print("Válidos: ", indices)

            # For each match possible match, filter impossible matches
            for i, j in indices:

                # Check for valid associations
                if distances[i, j] > self.iou_thres:
                    #                     print("cumplen", i, j)
                    key = list(self.objects.keys())[j]
                    self.objects_life[key] = self.max_life

                    xc = float(boxes[i][0] + boxes[i][2]) / 2
                    yc = float(boxes[i][1] + boxes[i][3]) / 2

                    meas_centers = np.array([xc, yc], np.float32)

                    # If there is a match, feedback to kalman filter
                    self.filters[key].correct(meas_centers)

                    # Actualizo el ancho, esto debería ser predicho pero no está contemplado en el filtro de kalman
                    # Hago de cuenta que la actualización está bien
                    # Calculo el ancho del bonding box medido
                    half_w = boxes[i][2] - xc
                    half_h = boxes[i][3] - yc

                    # Mantengo el centro predicho
                    xc = float(self.objects[key][0] + self.objects[key][2]) / 2
                    yc = float(self.objects[key][1] + self.objects[key][3]) / 2

                    self.objects[key] = [
                        int(xc - half_w),
                        int(yc - half_h),
                        int(xc + half_w),
                        int(yc + half_h)
                    ]

                    # Mark association as valid deleting to non associated detections
                    non_asociated.remove(i)

        # Add non associated values as new objects
        for i in non_asociated:
            # print(boxes[i])
            self.new_object(boxes[i][0], boxes[i][1], boxes[i][2], boxes[i][3])

        keys_to_del = []
        for key in self.objects_life:
            # If a tracked object was dissapeared for last iterations (no life), delete it
            if self.objects_life[key] <= 0:
                keys_to_del.append(key)

            # Else decrement life
            else:
                self.objects_life[key] = self.objects_life[key] - 1

        # Delete pending asociations
        for key in keys_to_del:
            self.remove_object(key)
def match(conf_file, french_ans, exchange_ans, remaining_fr, remaining_ex, matchings_file, verbose):
    """Creates an optimal matching between french students and exchange students,\
    using criterias defined in the config file"""

    config = json.load(conf_file)
    criterias = config['criterias']

    # import data
    fr = pd.read_csv(french_ans)
    ex = pd.read_csv(exchange_ans)

    frAnswers = prepare(fr, doClone=True, cloneLabel=config['two_buddies_question_label'])
    exAnswers = prepare(ex)

    # Compute the costs 
    costMatrix = [[float('inf')] * len(exAnswers.index) for _ in range(len(frAnswers.index))]
    for fridx, fr_row in frAnswers.iterrows():
        for exidx, ex_row in exAnswers.iterrows():
            # Compute a list of compatibility scores
            # Scores are normalized between 0 and 1
            costMatrix[fridx][exidx] = totalCost(criterias, fr_row, ex_row)

    # Solve the assignment problem using munkres
    m = Munkres()
    assignments = m.compute(costMatrix)

    # Print results
    if verbose > 0:
        for fridx, exidx in assignments:
            print(frAnswers['Q1'][fridx], frAnswers['Q2'][fridx],
                '<3',
                exAnswers['Q1'][exidx], exAnswers['Q2'][exidx],
                compatPercentage(costMatrix[fridx][exidx]))


    # Output remaining unmatched people
    frMatched, exMatched = zip(*assignments)
    frAnswers[~frAnswers.index.isin(frMatched)].to_csv(remaining_fr)
    exAnswers[~exAnswers.index.isin(exMatched)].to_csv(remaining_ex)

    # Outputing matchings
    matchings = pd.DataFrame(columns=["frFName",
                                      "frLName",
                                      "frEMail",
                                      "exFName",
                                      "exLName",
                                      "exEMail",
                                      "language"])
    for fridx, exidx in assignments:
        fQ, lQ = config["fluentQ"], config["learningQ"]
        fnQ, lnQ, emQ = config["firstNameQ"], config["lastNameQ"], config["eMailQ"]
        frRow = frAnswers.iloc[fridx]
        exRow = exAnswers.iloc[exidx]
        # TODO:
        commonLangs = set(frRow[fQ].split(',')).intersection(set(exRow[fQ].split(',')))
        for lang in config["mails"]:
            if lang in commonLangs:
                chosenLang = lang
        else:
            chosenLang = "English"
        d = {"frFName": frRow[fnQ],
            "frLName": frRow[lnQ],
            "frEMail": frRow[emQ],
            "exFName": exRow[fnQ],
            "exLName": exRow[lnQ],
            "exEMail": exRow[emQ],
            "language": chosenLang,
            "compatibility": compatPercentage(costMatrix[fridx][exidx])}
        matchings = matchings.append(d, ignore_index=True)

    matchings.to_csv("./output/matchings.csv")
Example #8
0
def py_max_match(scores): #scores表示(row,col)的带权二分图:row和col的连接强度
    m = Munkres()
    tmp = m.compute(-scores) #之所以这里要对scores取反,是因为compute计算的是最低花费;我们需要的是最大匹配,所以要取反
    tmp = np.array(tmp).astype(np.int32)
    return tmp
def cluster_distance(A, B, nbhood):
    #key is name of GC, values is list of specificity names
    clusterA = BGCs[A]  # dictionary where keys are specificities, and values
    # are lists of specificity labels that map to a specific sequence in
    # the DMS variable
    clusterB = BGCs[B]

    specificities_A = set(clusterA.keys())
    specificities_B = set(clusterB.keys())
    intersect = specificities_A.intersection(specificities_B)

    # JACCARD INDEX
    Jaccard = len(intersect) / float(
        len(specificities_A.union(specificities_B)))

    #DDS: The difference in specificities' sequences between cluster
    #S: Max occurence of each specificity

    DDS, S = 0, 0
    SumDistance = 0
    pair = ""

    # elements in either set but not in both: union - intersect
    not_intersect = specificities_A ^ specificities_B

    # sum as many copies of the unshared specificity there are, for each
    for unshared_specificity in not_intersect:
        dom_set = []
        try:
            dom_set = clusterA[unshared_specificity]
        except KeyError:
            dom_set = clusterB[unshared_specificity]

        DDS += len(dom_set)
        S += len(dom_set)

    # compare sequence identity of shared specificities
    for shared_specificity in intersect:
        seta = clusterA[shared_specificity]
        setb = clusterB[shared_specificity]

        if len(seta +
               setb) == 2:  #The specificity occurs only once in both clusters
            pair = tuple(sorted([seta[0], setb[0]]))

            try:
                SumDistance = 1 - DMS[shared_specificity][pair][0]
                # print 'SumDistance1', SumDistance

            except KeyError:
                print "KeyError on", pair
                print(shared_specificity)
                sys.exit()

            S += max(len(seta), len(setb))
            DDS += SumDistance

        else:  #The specificity occurs more than once in both clusters
            # accumulated_distance = 0

            DistanceMatrix = [[1 for col in range(len(setb))]
                              for row in range(len(seta))]
            # print DistanceMatrix
            for domsa in range(len(seta)):
                for domsb in range(len(setb)):
                    pair = tuple(sorted([seta[domsa], setb[domsb]]))
                    try:
                        Similarity = DMS[shared_specificity][pair][0]
                        # print Similarity
                    except KeyError:
                        print "KeyError on (case 2)", pair
                        print(shared_specificity)

                    seq_dist = 1 - Similarity
                    DistanceMatrix[domsa][domsb] = seq_dist

            #Only use the best scoring pairs
            Hungarian = Munkres()
            BestIndexes = Hungarian.compute(DistanceMatrix)
            accumulated_distance = sum(
                [DistanceMatrix[bi[0]][bi[1]] for bi in BestIndexes])
            SumDistance = (abs(len(seta) - len(setb)) + accumulated_distance
                           )  #diff in abundance + sequence distance

            S += max(len(seta), len(setb))
            DDS += SumDistance

    DDS /= float(S)
    DDS = 1 - DDS  #transform into similarity

    #  calculate the Goodman-Kruskal gamma index
    A_pseudo_seq = list(cluster_seq[A])
    B_pseudo_seq = list(cluster_seq[B])
    Ar = [item for item in A_pseudo_seq]
    Ar.reverse()
    GK = max([
        calculate_GK(A_pseudo_seq, B_pseudo_seq, nbhood=nbhood),
        calculate_GK(Ar, B_pseudo_seq, nbhood=nbhood)
    ])

    Distance = 1 - (Jaccardw * Jaccard) - (DDSw * DDS) - (GKw * GK)
    Similarity_score = (Jaccardw * Jaccard) + (DDSw * DDS) + (GKw * GK)
    # Similarity_score = 1 - DDS
    if Distance < 0:
        print "negative distance", Distance, "DDS", DDS, pair
        print "Probably a rounding issue"
        print "Distance is set to 0 for these clusters"
        Distance = 0
    print A, B, Jaccard, GK, DDS
    return Similarity_score
Example #10
0
        
        # ─── DEBUGGING ──────────────────────────────────────────────────────────────────
        self.DEBUG = NiseConfig._DEBUG()
        
        # ─── ALGORITHM ──────────────────────────────────────────────────────────────────
        
        self.ALG = NiseConfig._ALG()
        
        self.PATH = NiseConfig._PATH()
        
        self.TEST = NiseConfig._TEST()
        
        self.MODEL = NiseConfig._MODEL()


cfg = NiseConfig()
nise_cfg = get_edcfg_from_nisecfg(cfg)
nise_args = get_nise_arg_parser()
update_nise_config(nise_cfg, nise_args)

suffix, suffix_with_range = set_path_from_nise_cfg(nise_cfg)
print('SUFFIX', suffix_with_range)
nise_logger = get_logger()
update_nise_logger(nise_logger, nise_cfg, suffix)

mkrs = Munkres()
nise_cfg_pack = {
    'cfg': nise_cfg,
    'logger': nise_logger
}
Example #11
0
def py_max_match(scores):
    # Backup if you have trouble getting munkres-tensorflow compiled (much slower)
    from munkres import Munkres
    m = Munkres()
    tmp = m.compute(-scores)
    return np.array(tmp).astype(np.int32)
Example #12
0
def getHungary(vehicles, tracks):
    hungarianMatrix = getMatrix(vehicles, tracks)
    mat = np.copy(hungarianMatrix)
    m = Munkres()
    indexes = m.compute(hungarianMatrix)
    return mat, indexes
Example #13
0
    def evaluateFrame(self, X, Y, X_labels=None, Y_labels=None):
        """ Compute the OSPA metric between two sets of points. """

        # check for empty sets
        if numpy.size(X) == 0 and numpy.size(Y) == 0:
            return (0, 0, 0, 0)
        elif numpy.size(X) == 0 or numpy.size(Y) == 0:
            return (self.c, 0, self.c, 0)

        # we assume that Y is the larger set
        m = numpy.size(X, 0)
        n = numpy.size(Y, 0)
        switched = False
        if m > n:
            X, Y = Y, X
            m, n = n, m
            switched = True

        dists = self.calculateCostMatrix(X, Y)
        # Copy cost matrix for munkres module
        munkres_matrix = numpy.copy(dists)
        # Only run munkres on non-empty matrix
        if len(munkres_matrix) > 0:
            munkres = Munkres()
            indices = munkres.compute(munkres_matrix)
        else:
            indices = []

        # compute the OSPA metric
        total = 0
        total_loc = 0
        for [i, j] in indices:
            total_loc += dists[i][j]**self.p
        # calculate cardinalization error
        err_cn = (float((self.c)**(self.p) * (n - m)) / n)**(1 / float(self.p))
        # calculate localization error
        err_loc = (float(total_loc) / n)**(1 / float(self.p))

        # Not contained in version from github implemented
        # acc. to paper "A Metric for Performance Evaluation of Multi-Target Tracking Algorithms"
        # Implementation of labeling error
        # Penalizes ID switches from frame to frame
        new_assignments = []
        for [i, j] in indices:
            if (switched):
                i, j = j, i
            new_assignments.append((X_labels[i], Y_labels[j]))
        wrong_labels = len(self.old_assignments) - len(
            set(new_assignments) & set(self.old_assignments))
        rospy.loginfo(wrong_labels)
        err_label = (float((float(self.a)**float(self.p)) / n) *
                     wrong_labels)**(1 / float(self.p))
        # store current assignments of labels to track
        self.old_assignments = new_assignments
        #ospa_err = ( float(total_loc + (n-m)*self.c**self.p) / n)**(1/float(self.p))
        ospa_err = (float(total_loc + self.a * wrong_labels +
                          (n - m) * self.c**self.p) / n)**(1 / float(self.p))
        ospa_tuple = (ospa_err, err_loc, err_cn, err_label)

        rospy.loginfo(ospa_tuple)

        return ospa_tuple
Example #14
0
def kmeans_acc_ari_ami(X, L):
    """
    Calculate clustering accuracy. Require scikit-learn installed
    # Arguments
        y: true labels, numpy.array with shape `(n_samples,)`
        y_pred: predicted labels, numpy.array with shape `(n_samples,)`
    # Return
        accuracy, in [0,1]
    """
    n_clusters = len(np.unique(L))
    kmeans = KMeans(n_clusters=n_clusters, n_init=20)

    y_pred = kmeans.fit_predict(X)
    y_pred = y_pred.astype(np.int64)
    y_true = L.astype(np.int64)
    assert y_pred.size == y_true.size

    y_pred = y_pred.reshape((1, -1))
    y_true = y_true.reshape((1, -1))

    # D = max(y_pred.max(), L.max()) + 1
    # w = np.zeros((D, D), dtype=np.int64)
    # for i in range(y_pred.size):
    #     w[y_pred[i], L[i]] += 1
    # # from sklearn.utils.linear_assignment_ import linear_assignment
    # from scipy.optimize import linear_sum_assignment
    # row_ind, col_ind = linear_sum_assignment(w.max() - w)
    #
    # return sum([w[i, j] for i in row_ind for j in col_ind]) * 1.0 / y_pred.size

    if len(np.unique(y_pred)) == len(np.unique(y_true)):
        C = len(np.unique(y_true))

        cost_m = np.zeros((C, C), dtype=float)
        for i in np.arange(0, C):
            a = np.where(y_pred == i)
            # print(a.shape)
            a = a[1]
            l = len(a)
            for j in np.arange(0, C):
                yj = np.ones((1, l)).reshape(1, l)
                yj = j * yj
                cost_m[i, j] = np.count_nonzero(yj - y_true[0, a])

        mk = Munkres()
        best_map = mk.compute(cost_m)

        (_, h) = y_pred.shape
        for i in np.arange(0, h):
            c = y_pred[0, i]
            v = best_map[c]
            v = v[1]
            y_pred[0, i] = v

        acc = 1 - (np.count_nonzero(y_pred - y_true) / h)

    else:
        acc = 0
    # print(y_pred.shape)
    y_pred = y_pred[0]
    y_true = y_true[0]
    ari, ami = adjusted_rand_score(y_true, y_pred), adjusted_mutual_info_score(
        y_true, y_pred)

    return acc, ari, ami
Example #15
0
 def __init__(self):
     self.training_set = []
     self.testing_set = []
     self.m = Munkres()
Example #16
0
    def execute(environment):
        # print "-------------------------------------------------"
        # print "Planning for objects..."

        Manager._create_object_plan(environment)

        allocation_list = []

        space = environment['workspace']
        object_list = environment['object_list']
        robots = environment['robot_dict']

        total_object = 0
        total_robot = 0

        allocator = Munkres()

        # While not all objects are transported
        while True:
            # Create the list with objects that there are not yet
            # fully transported
            work_list = [
                obj for obj in object_list if len(obj['plan']['segments']) != 0
            ]

            # With all objects were transported, finish
            if len(work_list) == 0:
                break

            # print "-------------------------------------------------"
            # print "Planning for robots..."

            # Create the allocation matrix
            # Square, for the Hungarian Method
            shape = (max(len(work_list), len(robots)), ) * 2
            cost_matrix = np.full(shape,
                                  ManagerHungarianMulti.inf,
                                  dtype=np.int)

            robot_id_list = robots.keys()
            robot_plan = {}

            # For each robot available
            for robot_index, robot_id in enumerate(robot_id_list):

                # print "# robot", robot_index
                robot = robots[robot_id]
                robot_plan[robot_id] = {}

                # For each object to be transported
                for obj_index, obj in enumerate(work_list):
                    # print "### object", obj['idx']

                    robot_plan[robot_id][obj['idx']] = {}

                    # Get first segment
                    segment = obj['plan']['segments'][0]

                    if segment.graph['type'] is not robot.robot_type:
                        continue

                    # If there is another object in the segment,
                    # its no possible to be executed
                    in_place_objects = [
                        item for item in object_list
                        if (item['idx'] is not obj['idx']) and (
                            hash(item['current_pos']) in segment)
                    ]

                    if len(in_place_objects) != 0:
                        continue

                    segment_start = segment.node[segment.graph['start_id']]
                    segment_end = segment.node[segment.graph['end_id']]

                    # Prepare Movement

                    space.temp_obstacle_list = [
                        item['current_pos'] for item in object_list
                    ]

                    robot_start = robot.copy()

                    robot_end = robot.copy()

                    if robot.robot_type == Robot.TYPES.aerial:
                        robot_end.position = Planner.create_robot_position(
                            segment_start, -1).position
                    else:
                        robot_end.position = Planner.create_robot_position(
                            segment_start, 2).position

                    try:
                        prepare_plan = Manager._execute_planning({
                            'start_position':
                            robot_start,
                            'end_position':
                            robot_end,
                            'workspace':
                            space,
                            'robot_type':
                            robot.robot_type
                        })
                    except Exception, e:
                        print "Object: ", obj['idx'] + 1
                        print "Impossible prepare plan"
                        continue

                    space.temp_obstacle_list = []

                    robot_plan[robot_id][
                        obj['idx']]['prepare_plan'] = prepare_plan

                    # Move Movement

                    space.temp_obstacle_list = [
                        item['current_pos'] for item in object_list
                        if item['idx'] is not obj['idx']
                    ]

                    robot_start.position = robot_end.position
                    robot_end = robot.copy()
                    robot_end.position = Planner.create_robot_position(
                        segment_end).position

                    try:
                        moviment_plan = Manager._execute_planning({
                            'start_position':
                            robot_start,
                            'end_position':
                            robot_end,
                            'workspace':
                            space,
                            'robot_type':
                            robot.robot_type
                        })
                    except Exception, e:
                        print "Object: ", obj['idx'] + 1
                        print "Impossible movement plan"
                        continue

                    space.temp_obstacle_list = []

                    robot_plan[robot_id][
                        obj['idx']]['moviment_plan'] = moviment_plan

                    # Total cost to movement this object
                    # by the current robot
                    total_cost = obj['plan']['total'] - len(segment) + len(
                        prepare_plan)

                    # Populate the cost matrix
                    cost_matrix[robot_index][obj_index] = total_cost
Example #17
0
    print(inames)
    print("CNAMES:")
    print(cnames)

    print("########################")
    print("MUNKRES OPTIMIZATION")
    print("########################")
    targetlist = list(inames)
    baselist = list(cnames) + list(inames)
    cost_matrix = gen_cost_matrix(targetlist, baselist, instance, concept)
    print(len(cost_matrix), len(cost_matrix[0]))

    for row in cost_matrix:
        print("\t".join(["%0.2f" % v for v in row]))

    mun = Munkres()
    s = mun.compute(cost_matrix)
    print(s)
    #s = linear_sum_assignment(cost_matrix)
    #mun_sol = {targetlist[ti]: baselist[s[1][i]] for i, ti in enumerate(s[0])}
    mun_sol = {
        targetlist[row]:
        baselist[len(cnames) + row] if col > len(cnames) else baselist[col]
        for row, col in s
    }
    print("Munkres solution:")
    pprint(mun_sol)
    print("Munkres cost:")

    print(mapping_cost(frozenset(mun_sol.items()), instance, concept))
 def run(costs):
     m = Munkres()
     idx = np.array(m.compute(costs), dtype=int)
     return costs[idx[:, 0], idx[:, 1]].sum()
def py_max_match(scores):
    m = Munkres()
    tmp = m.compute(-scores)
    tmp = np.array(tmp).astype(np.int32)
    return tmp
    def allocate(self):
        self.populate_adj_scores(self.adjudicators)

        # Sort voting adjudicators in descending order by score
        voting = [
            a for a in self.adjudicators
            if a._hungarian_score >= self.min_voting_score and not a.trainee
        ]
        random.shuffle(voting)
        voting.sort(key=lambda a: a._hungarian_score, reverse=True)

        n_debates = len(self.debates)
        if self.no_panellists:
            voting = voting[:n_debates]
        n_voting = len(voting)

        if self.no_trainees:
            trainees = []
        else:
            trainees = [a for a in self.adjudicators if a not in voting]
            trainees.sort(key=lambda a: a._hungarian_score, reverse=True)

        # Divide debates into solo-chaired debates and panel debates
        debates_sorted = sorted(self.debates,
                                key=lambda d: (-d.importance, d.room_rank))

        # Figure out how many judges per room, prioritising the most important
        judges_per_room_floor = n_voting // n_debates
        n_bigger_panels = n_voting % n_debates
        judges_per_room = [judges_per_room_floor + 1] * n_bigger_panels + [
            judges_per_room_floor
        ] * (n_debates - n_bigger_panels)

        logger.info(
            "There are %d debates, %d voting adjudicators and %d trainees",
            len(debates_sorted), len(voting), len(trainees))
        if n_voting < n_debates:
            logger.warning(
                "There are %d debates but only %d voting adjudicators",
                n_debates, n_voting)

        # Allocate voting
        m = Munkres()

        logger.info("costing voting adjudicators")
        cost_matrix = []
        for debate, njudges in zip(debates_sorted, judges_per_room):
            for i in range(njudges):
                row = [
                    self.calc_cost(debate, adj, adjustment=-i)
                    for adj in voting
                ]
                cost_matrix.append(row)

        logger.info(
            "optimizing voting adjudicators (matrix size: %d positions by %d adjudicators)",
            len(cost_matrix), len(cost_matrix[0]))
        indexes = m.compute(cost_matrix)
        indexes.sort()
        total_cost = sum(cost_matrix[i][j] for i, j in indexes)
        logger.info('total cost for %d debates: %f', n_debates, total_cost)

        # transfer the indices to the debates
        alloc = []
        for debate, njudges in zip(debates_sorted, judges_per_room):
            aa = AdjudicatorAllocation(debate)
            panel_indices = indexes[0:njudges]
            panel = [voting[c] for r, c in panel_indices]
            panel.sort(key=lambda a: a._hungarian_score, reverse=True)
            try:
                aa.chair = panel.pop(0)
            except IndexError:
                aa.chair = None
            aa.panellists = panel
            alloc.append(aa)
            del indexes[0:njudges]

            logger.info("allocating to %s: %s (c), %s", aa.debate, aa.chair,
                        ", ".join([str(p) for p in aa.panellists]))

        # Allocate trainees, one per debate
        if len(trainees) > 0 and len(debates_sorted) > 0:
            allocation_by_debate = {aa.debate: aa for aa in alloc}

            logger.info("costing trainees")
            cost_matrix = []
            for debate in debates_sorted:
                chair = allocation_by_debate[debate].chair
                row = [
                    self.calc_cost(debate, adj, adjustment=-2.0, chair=chair)
                    for adj in trainees
                ]
                cost_matrix.append(row)

            logger.info(
                "optimizing trainees (matrix size: %d positions by %d trainees)",
                len(cost_matrix), len(cost_matrix[0]))
            indexes = m.compute(cost_matrix)
            total_cost = sum(cost_matrix[i][j] for i, j in indexes)
            logger.info('total cost for %d trainees: %f', len(trainees),
                        total_cost)

            result = ((debates_sorted[i], trainees[j]) for i, j in indexes
                      if i < len(debates_sorted))
            for debate, trainee in result:
                allocation_by_debate[debate].trainees.append(trainee)
                logger.info("allocating to %s: %s (t)", debate, trainee)

        return alloc
Example #21
0
    def allocate(self):
        from debate.models import AdjudicatorAllocation

        # remove trainees
        self.adjudicators = filter(lambda a: a.score > self.MIN_SCORE,
                                   self.adjudicators)

        # sort adjudicators and debates in descending score/importance
        self.adjudicators_sorted = list(self.adjudicators)
        self.adjudicators_sorted.sort(key=lambda a: a.score, reverse=True)
        self.debates_sorted = list(self.debates)
        self.debates_sorted.sort(key=lambda a: a.importance, reverse=True)

        n_adjudicators = len(self.adjudicators)
        n_debates = len(self.debates)

        n_solos = n_debates - (n_adjudicators - n_debates) / 2

        # get adjudicators that can adjudicate solo
        chairs = self.adjudicators_sorted[:n_solos]
        #chairs = [a for a in self.adjudicators_sorted if a.score >
        #          self.CHAIR_CUTOFF]

        # get debates that will be judged by solo adjudicators
        chair_debates = self.debates_sorted[:len(chairs)]

        panel_debates = self.debates_sorted[len(chairs):]
        panellists = [a for a in self.adjudicators_sorted if a not in chairs]

        assert len(panel_debates) * 3 <= len(panellists)

        print "costing chairs"

        n = len(chairs)

        cost_matrix = [[0] * n for i in range(n)]

        for i, debate in enumerate(chair_debates):
            for j, adj in enumerate(chairs):
                cost_matrix[i][j] = self.calc_cost(debate, adj)

        print "optimizing"

        m = Munkres()
        indexes = m.compute(cost_matrix)

        total_cost = 0
        for r, c in indexes:
            total_cost += cost_matrix[r][c]

        print 'total cost for solos', total_cost
        print 'number of solo debates', n

        result = ((chair_debates[i], chairs[j]) for i, j in indexes
                  if i < len(chair_debates))
        alloc = [AdjudicatorAllocation(d, c) for d, c in result]

        print[(a.debate, a.chair) for a in alloc]

        # do panels
        n = len(panel_debates)

        npan = len(panellists)

        if npan:
            print "costing panellists"

            # matrix is square, dummy debates have cost 0
            cost_matrix = [[0] * npan for i in range(npan)]
            for i, debate in enumerate(panel_debates):
                for j in range(3):

                    # for the top half of these debates, the final panellist
                    # can be of lower quality than the other 2
                    if i < npan / 2 and j == 2:
                        adjustment = -1.0
                    else:
                        adjustment = 0

                    for k, adj in enumerate(panellists):
                        cost_matrix[3 * i + j][k] = self.calc_cost(
                            debate, adj, adjustment)

            print "optimizing"

            indexes = m.compute(cost_matrix)

            cost = 0
            for r, c in indexes:
                cost += cost_matrix[r][c]

            print 'total cost for panellists', cost

            # transfer the indices to the debates
            # the debate corresponding to row r is floor(r/3) (i.e. r // 3)
            p = [[] for i in range(n)]
            for r, c in indexes[:n * 3]:
                p[r // 3].append(panellists[c])

            # create the corresponding adjudicator allocations, making sure
            # that the chair is the highest-ranked adjudicator in the panel
            for i, d in enumerate(panel_debates):
                a = AdjudicatorAllocation(d)
                p[i].sort(key=lambda a: a.score, reverse=True)
                a.chair = p[i].pop(0)
                a.panel = p[i]
                alloc.append(a)

        print[(a.debate, a.chair, a.panel) for a in alloc[len(chairs):]]

        return alloc
def matching_eigenvectors_of_modes(number_of_modes, eigenvectors_1,
                                   eigenvectors_2):
    r"""
    Matches the eigenvectors of two crystal structures and returns new order of the modes and the weight to match each
    mode.

    Parameters
    ----------
    number_of_modes: int
        Number of vibrational modes in the crystal lattice
    eigenvectors_1: List[float]
        Eigenvectors of the reference crystal structure in the form of a 3N x 3N matrix, where N is the number of atoms
        in the crystal lattice.
    eigenvectors_2: List[float]
        Eigenvectors of the crystal structure that needs the modes reorganized in the form of a 3N x 3N matrix, where N
        is the number of atoms in the crystal lattice.

    Returns
    -------
    z: List[int]
        List to match the order of the eigenvectors_2 with eigenvectors_1.
    weight: List[float]
        Weight describing how well the reordered eigenvectors matched. A weight of 0 indicates a perfect match.

    """
    # Using the Munkres matching package
    m = Munkres()

    # Setting a matrix to save the weights to match all of the vibrational modes
    weight = np.zeros((number_of_modes - 3, number_of_modes - 3))

    # Cycling through all modes to apply a weight of how well they match
    for i in range(3, number_of_modes):
        # Using 1 - Cos(Theta) between the two eigenvectors as the weight
        diff = np.dot(eigenvectors_1[i], eigenvectors_2[i]) \
               / (np.linalg.norm(eigenvectors_1[i]) * np.linalg.norm(eigenvectors_2[i]))

        if np.absolute(diff) > 0.95:
            # If Cos(Theta) is close to 1 for director order comparison of the eigenvectors than they are well matched and
            #    all other comparisons are set with a much higher weight
            weight[i - 3] = 10000000.
            weight[i - 3, i - 3] = 1. - diff
        else:
            # Otherwise the weight compared to all other eigenvectors are computed
            for j in range(3, number_of_modes):
                # Here we will check to see if the match is better if the direction of the eigenvalue is flipped
                #    (Eigenvectors are representative of vibrations and therefore the opposite director is also valid)
                hold_weight = np.zeros(4)
                hold_weight[0] = 1 - np.dot(-1 * eigenvectors_1[i], eigenvectors_2[j]) \
                                 / (np.linalg.norm(-1 * eigenvectors_1[i]) * np.linalg.norm(eigenvectors_2[j]))
                hold_weight[1] = 1 - np.dot(eigenvectors_1[i], -1 * eigenvectors_2[j]) \
                                 / (np.linalg.norm(eigenvectors_1[i]) * np.linalg.norm(-1 * eigenvectors_2[j]))
                hold_weight[2] = 1 - np.dot(eigenvectors_1[i], eigenvectors_2[j]) \
                                 / (np.linalg.norm(eigenvectors_1[i]) * np.linalg.norm(eigenvectors_2[j]))
                hold_weight[3] = 1 - np.dot(-1 * eigenvectors_1[i], -1 * eigenvectors_2[j]) \
                                 / (np.linalg.norm(-1 * eigenvectors_1[i])*np.linalg.norm(-1 * eigenvectors_2[j]))

                # The weight matching the two eigenvectors is the minimum value computed
                weight[i - 3, j - 3] = min(hold_weight)

    # Using the Hungarian algorithm to match wavenumbers
    Wgt = m.compute(weight)
    x, y = zip(*Wgt)
    z = np.column_stack((x, y))
    z = z + 3
    return z, weight[z[:, 0] - 3, z[:, 1] - 3]
Example #23
0
def perform_alignment(ref_instances, sys_instances, kernel, maximize=True):
    disallowed = {}
    max_sim = 0
    sim_matrix, component_matrix = [], []
    start = time.time_ns()
    report_matrix_stats = False  ### Boolean to report the stats
    report_matrix_start = False  ### Writes a line before the matrix processing begins
    report_matrix_print_threshold = 10000  ### Controls the size of the matrix to trigger output

    if report_matrix_stats and report_matrix_start and len(
            ref_instances) * len(sys_instances) > report_matrix_print_threshold:
        print("[Info] StartBigAlignment: {} x {} = {}".format(
            len(ref_instances), len(sys_instances),
            len(ref_instances) * len(sys_instances)))

    for s_i, s in enumerate(sys_instances):
        sim_row = []
        comp_row = []
        for r_i, r in enumerate(ref_instances):
            sim, comp = kernel(r, s)

            sim_row.append(sim)
            comp_row.append(comp)

            if sim == DISALLOWED:
                disallowed[(s_i, r_i)] = True
            else:
                if sim > max_sim: max_sim = sim

        sim_matrix.append(sim_row)
        component_matrix.append(comp_row)

    if maximize:

        def _mapper(sim):
            return max_sim + 1 if sim == DISALLOWED else (max_sim + 1) - sim
    else:

        def _mapper(sim):
            return max_sim + 1 if sim == DISALLOWED else sim

    matrix = make_cost_matrix(sim_matrix, _mapper)

    correct_detects, false_alarms, missed_detects = [], [], []
    unmapped_sys = set(range(0, len(sys_instances)))
    unmapped_ref = set(range(0, len(ref_instances)))
    if len(matrix) > 0:
        for s_i, r_i in Munkres().compute(matrix):
            if disallowed.get((s_i, r_i), False):
                continue

            unmapped_sys.remove(s_i)
            unmapped_ref.remove(r_i)
            try:
                correct_detects.append(
                    AlignmentRecord(ref_instances[r_i], sys_instances[s_i],
                                    sim_matrix[s_i][r_i],
                                    component_matrix[s_i][r_i],
                                    ref_instances[r_i].localization,
                                    sys_instances[s_i].localization,
                                    list(sys_instances[s_i].localization)[0]))
            except:
                correct_detects.append(
                    AlignmentRecord(ref_instances[r_i], sys_instances[s_i],
                                    sim_matrix[s_i][r_i],
                                    component_matrix[s_i][r_i], None, None,
                                    None))
    for r_i in unmapped_ref:
        try:
            missed_detects.append(
                AlignmentRecord(ref_instances[r_i], None, None, None,
                                ref_instances[r_i].localization, None,
                                list(ref_instances[r_i].localization)[0]))
        except:
            missed_detects.append(
                AlignmentRecord(ref_instances[r_i], None, None, None, None,
                                None, None))

    for s_i in unmapped_sys:
        try:
            false_alarms.append(
                AlignmentRecord(None, sys_instances[s_i], None, None, None,
                                sys_instances[s_i].localization,
                                list(sys_instances[s_i].localization)[0]))
        except:
            false_alarms.append(
                AlignmentRecord(None, sys_instances[s_i], None, None, None,
                                None, None))

    if report_matrix_stats and len(ref_instances) * len(
            sys_instances) > report_matrix_print_threshold:
        print("[Info] BigAlignmentMatixTime: {} x {} = {}, {} sec.".format(
            len(ref_instances), len(sys_instances),
            len(ref_instances) * len(sys_instances),
            (time.time_ns() - start) / 1000000000))

    return (correct_detects, missed_detects, false_alarms)
Example #24
0
def get_order(params, tile_struct, tilesegmentlists, exposurelist, observatory,
              config_struct):
    '''
    tile_struct: dictionary. key -> struct info. 
    tilesegmentlists: list of lists. Segments for each tile in tile_struct 
        that are available for observation.
    exposurelist: list of segments that the telescope is supposed to be working.
        consecutive segments from the start to the end, with each segment size
        being the exposure time.
    Returns a list of tile indices in the order of observation.
    '''
    keys = tile_struct.keys()

    exposureids_tiles = {}
    first_exposure = np.inf * np.ones((len(keys), ))
    last_exposure = -np.inf * np.ones((len(keys), ))
    tileprobs = np.zeros((len(keys), ))
    tilenexps = np.zeros((len(keys), ))
    tileexptime = np.zeros((len(keys), ))
    tileexpdur = np.zeros((len(keys), ))
    tilefilts = {}
    tileavailable = np.zeros((len(keys), ))
    tileavailable_tiles = {}
    keynames = []

    nexps = 0
    for jj, key in enumerate(keys):
        tileprobs[jj] = tile_struct[key]["prob"]
        tilenexps[jj] = tile_struct[key]["nexposures"]
        try:
            tileexpdur[jj] = tile_struct[key]["exposureTime"]
        except:
            try:
                tileexpdur[jj] = tile_struct[key]["exposureTime"][0]
            except:
                tileexpdur[jj] = 0.0

        tilefilts[key] = copy.deepcopy(tile_struct[key]["filt"])
        tileavailable_tiles[jj] = []
        keynames.append(key)

        nexps = nexps + tile_struct[key]["nexposures"]

    if "dec_constraint" in config_struct:
        dec_constraint = config_struct["dec_constraint"].split(",")
        dec_min = float(dec_constraint[0])
        dec_max = float(dec_constraint[1])

    for ii in range(len(exposurelist)):
        exposureids_tiles[ii] = {}
        exposureids = []
        probs = []
        ras, decs = [], []
        for jj, key in enumerate(keys):
            tilesegmentlist = tilesegmentlists[jj]
            if tile_struct[key]["prob"] == 0: continue
            if "dec_constraint" in config_struct:
                if (tile_struct[key]["dec"] <
                        dec_min) or (tile_struct[key]["dec"] > dec_max):
                    continue
            if "epochs" in tile_struct[key]:
                if params["doMindifFilt"]:
                    #take into account filter for mindiff
                    idx = np.where(
                        np.asarray(tile_struct[key]["epochs_filters"]) ==
                        params["filters"][0])[0]
                    if np.any(
                            np.abs(exposurelist[ii][0] -
                                   tile_struct[key]["epochs"][idx, 2]) <
                            params["mindiff"] / 86400.0):
                        continue
                elif np.any(
                        np.abs(exposurelist[ii][0] -
                               tile_struct[key]["epochs"][:, 2]) <
                        params["mindiff"] / 86400.0):
                    continue
            if tilesegmentlist.intersects_segment(exposurelist[ii]):
                exposureids.append(key)
                probs.append(tile_struct[key]["prob"])
                ras.append(tile_struct[key]["ra"])
                decs.append(tile_struct[key]["dec"])

                first_exposure[jj] = np.min([first_exposure[jj], ii])
                last_exposure[jj] = np.max([last_exposure[jj], ii])
                tileavailable_tiles[jj].append(ii)
                tileavailable[jj] = tileavailable[jj] + 1
        # in every exposure, the tiles available for observation
        exposureids_tiles[ii]["exposureids"] = exposureids  # list of tile ids
        exposureids_tiles[ii]["probs"] = probs  # the corresponding probs
        exposureids_tiles[ii]["ras"] = ras
        exposureids_tiles[ii]["decs"] = decs

    exposureids = []
    probs = []
    ras, decs = [], []
    for ii, key in enumerate(keys):
        # tile_struct[key]["nexposures"]: the number of exposures assigned to this tile
        for jj in range(tile_struct[key]["nexposures"]):
            exposureids.append(
                key
            )  # list of tile ids for every exposure it is allocated to observe
            probs.append(tile_struct[key]["prob"])
            ras.append(tile_struct[key]["ra"])
            decs.append(tile_struct[key]["dec"])

    idxs = -1 * np.ones((len(exposureids_tiles.keys()), ))
    filts = ['n'] * len(exposureids_tiles.keys())

    if nexps == 0:
        return idxs, filts

    if params["scheduleType"] == "airmass_weighted":

        # # first step is to sort the array in order of descending probability
        indsort = np.argsort(-np.array(probs))
        probs = np.array(probs)[indsort]
        ras = np.array(ras)[indsort]
        decs = np.array(decs)[indsort]
        exposureids = np.array(exposureids)[indsort]

        tilematrix = np.zeros((len(exposurelist), len(ras)))
        probmatrix = np.zeros((len(exposurelist), len(ras)))

        for ii in np.arange(len(exposurelist)):

            # first, create an array of airmass-weighted probabilities
            t = Time(exposurelist[ii][0], format='mjd')
            altaz = get_altaz_tiles(ras, decs, observatory, t)
            alts = altaz.alt.degree
            horizon = config_struct["horizon"]
            horizon_mask = alts <= horizon
            airmass = 1 / np.cos((90. - alts) * np.pi / 180.)
            below_horizon_mask = horizon_mask * 10.**100
            airmass = airmass + below_horizon_mask
            airmass_weight = 10**(0.4 * 0.1 * (airmass - 1))
            tilematrix[ii, :] = np.array(probs / airmass_weight)
            probmatrix[ii, :] = np.array(probs * (True ^ horizon_mask))

    dt = (exposurelist[1][0] - exposurelist[0][0]) * 86400
    if params["scheduleType"] == "greedy":
        for ii in np.arange(len(exposurelist)):
            if idxs[ii] > 0: continue

            exptimecheck = np.where(
                exposurelist[ii][0] - tileexptime < params["mindiff"] /
                86400.0)[0]
            exptimecheckkeys = [keynames[x] for x in exptimecheck]

            # find_tile finds the tile that covers the largest probablity
            # restricted by availability of tile and timeallocation
            idx2, exposureids, probs = find_tile(
                exposureids_tiles[ii],
                exposureids,
                probs,
                exptimecheckkeys=exptimecheckkeys)
            if idx2 in keynames:
                idx = keynames.index(idx2)
                tilenexps[idx] = tilenexps[idx] - 1
                tileexptime[idx] = exposurelist[ii][0]

                num = int(np.ceil(tileexpdur[idx] / dt))
                tilenexps[idx] = tilenexps[idx] - 1
                tileexptime[idx] = exposurelist[ii][0]
                if len(tilefilts[idx2]) > 0:
                    filt = tilefilts[idx2].pop(0)
                    for jj in range(num):
                        try:
                            filts[ii + jj] = filt
                        except:
                            pass
                for jj in range(num):
                    try:
                        idxs[ii + jj] = idx2
                    except:
                        pass
            else:
                idxs[ii] = idx2

            if not exposureids: break
    elif params["scheduleType"] == "greedy_slew":
        current_ra, current_dec = np.nan, np.nan
        for ii in np.arange(len(exposurelist)):
            exptimecheck = np.where(
                exposurelist[ii][0] - tileexptime < params["mindiff"] /
                86400.0)[0]
            exptimecheckkeys = [keynames[x] for x in exptimecheck]

            # find_tile finds the tile that covers the largest probablity
            # restricted by availability of tile and timeallocation
            idx2, exposureids, probs = find_tile(
                exposureids_tiles[ii],
                exposureids,
                probs,
                exptimecheckkeys=exptimecheckkeys,
                current_ra=current_ra,
                current_dec=current_dec,
                slew_rate=config_struct['slew_rate'],
                readout=config_struct['readout'])
            if idx2 in keynames:
                idx = keynames.index(idx2)
                tilenexps[idx] = tilenexps[idx] - 1
                tileexptime[idx] = exposurelist[ii][0]

                num = int(np.ceil(tileexpdur[idx] / dt))
                tilenexps[idx] = tilenexps[idx] - 1
                tileexptime[idx] = exposurelist[ii][0]
                if len(tilefilts[idx2]) > 0:
                    filt = tilefilts[idx2].pop(0)
                    for jj in range(num):
                        try:
                            filts[ii + jj] = filt
                        except:
                            pass
                for jj in range(num):
                    try:
                        idxs[ii + jj] = idx2
                    except:
                        pass
                current_ra = tile_struct[idx2]["ra"]
                current_dec = tile_struct[idx2]["dec"]
            else:
                idxs[ii] = idx2

            if not exposureids: break
    elif params["scheduleType"] == "sear":
        #for ii in np.arange(len(exposurelist)):
        iis = np.arange(len(exposurelist)).tolist()
        while len(iis) > 0:
            ii = iis[0]
            mask = np.where((ii == last_exposure) & (tilenexps > 0))[0]

            exptimecheck = np.where(
                exposurelist[ii][0] - tileexptime < params["mindiff"] /
                86400.0)[0]
            exptimecheckkeys = [keynames[x] for x in exptimecheck]

            if len(mask) > 0:
                idxsort = mask[np.argsort(tileprobs[mask])]
                idx2, exposureids, probs = find_tile(
                    exposureids_tiles[ii],
                    exposureids,
                    probs,
                    idxs=idxsort,
                    exptimecheckkeys=exptimecheckkeys)
                last_exposure[mask] = last_exposure[mask] + 1
            else:
                idx2, exposureids, probs = find_tile(
                    exposureids_tiles[ii],
                    exposureids,
                    probs,
                    exptimecheckkeys=exptimecheckkeys)
            if idx2 in keynames:
                idx = keynames.index(idx2)
                tilenexps[idx] = tilenexps[idx] - 1
                tileexptime[idx] = exposurelist[ii][0]
                if len(tilefilts[idx2]) > 0:
                    filt = tilefilts[idx2].pop(0)
                    filts[ii] = filt
            idxs[ii] = idx2
            iis.pop(0)

            if not exposureids: break

    elif params["scheduleType"] == "weighted":
        for ii in np.arange(len(exposurelist)):
            jj = exposureids_tiles[ii]["exposureids"]
            weights = tileprobs[jj] * tilenexps[jj] / tileavailable[jj]
            weights[~np.isfinite(weights)] = 0.0

            exptimecheck = np.where(
                exposurelist[ii][0] - tileexptime < params["mindiff"] /
                86400.0)[0]
            weights[exptimecheck] = 0.0

            if np.any(weights >= 0):
                idxmax = np.argmax(weights)
                idx2 = jj[idxmax]
                if idx2 in keynames:
                    idx = keynames.index(idx2)
                    tilenexps[idx] = tilenexps[idx] - 1
                    tileexptime[idx] = exposurelist[ii][0]
                    if len(tilefilts[idx2]) > 0:
                        filt = tilefilts[idx2].pop(0)
                        filts[ii] = filt
                idxs[ii] = idx2
            tileavailable[jj] = tileavailable[jj] - 1

    elif params["scheduleType"] == "airmass_weighted":
        # then use the Hungarian algorithm (munkres) to schedule high prob tiles at low airmass
        tilematrix_mask = tilematrix > 10**(-10)

        if tilematrix_mask.any():
            print("Calculating Hungarian solution...")
            total_cost = 0
            cost_matrix = make_cost_matrix(tilematrix)
            m = Munkres()
            optimal_points = m.compute(cost_matrix)
            print("Hungarian solution calculated...")
            max_no_observ = min(tilematrix.shape)
            for jj in range(max_no_observ):
                idx0, idx1 = optimal_points[jj]
                # idx0 indexes over the time windows, idx1 indexes over the probabilities
                # idx2 gets the exposure id of the tile, used to assign tileexptime and tilenexps
                try:
                    idx2 = exposureids[idx1]
                    pamw = tilematrix[idx0][idx1]
                    total_cost += pamw
                    if len(tilefilts[idx2]) > 0:
                        filt = tilefilts[idx2].pop(0)
                        filts[idx0] = filt
                except:
                    continue

                idxs[idx0] = idx2

        else:
            print("The localization is not visible from the site.")

    else:
        raise ValueError(
            "Scheduling options are greedy/sear/weighted/airmass_weighted, or with _slew."
        )

    return idxs, filts
def synaptic_partners_fscore(rec_annotations,
                             gt_annotations,
                             gt_segmentation,
                             matching_threshold=400,
                             all_stats=False):
    """Compute the f-score of the found synaptic partners.

    Parameters
    ----------

    rec_annotations: Annotations, containing found synaptic partners

    gt_annotations: Annotations, containing ground truth synaptic partners

    gt_segmentation: Volume, ground truth neuron segmentation

    matching_threshold: float, world units
        Euclidean distance threshold to consider two annotations a potential
        match. Annotations that are `matching_threshold` or more untis apart
        from each other are not considered as potential matches.

    all_stats: boolean, optional
        Whether to also return precision, recall, FP, FN, and matches as a 6-tuple with f-score

    Returns
    -------

    fscore: float
        The f-score of the found synaptic partners.
    precision: float, optional
    recall: float, optional
    fp: int, optional
    fn: int, optional
    filtered_matches: list of tuples, optional
        The indices of the matches with matching costs.
    """

    # get cost matrix
    costs = cost_matrix(rec_annotations, gt_annotations, gt_segmentation,
                        matching_threshold)

    # match using Hungarian method
    print "Finding cost-minimal matches..."
    munkres = Munkres()
    matches = munkres.compute(costs.copy(
    ))  # have to copy, because munkres changes the cost matrix...

    filtered_matches = [(i, j, costs[i][j]) for (i, j) in matches
                        if costs[i][j] <= matching_threshold]
    print str(len(filtered_matches)) + " matches found"

    # unmatched in rec = FP
    fp = len(rec_annotations.pre_post_partners) - len(filtered_matches)

    # unmatched in gt = FN
    fn = len(gt_annotations.pre_post_partners) - len(filtered_matches)

    # all ground truth elements - FN = TP
    tp = len(gt_annotations.pre_post_partners) - fn

    precision = float(tp) / (tp + fp)
    recall = float(tp) / (tp + fn)
    fscore = 2.0 * precision * recall / (precision + recall)

    if all_stats:
        return (fscore, precision, recall, fp, fn, filtered_matches)
    else:
        return fscore
Example #26
0
    def __init__(self, lr=1e-3, batchs=8, cuda=True):
        '''
        :param tt: train_test
        :param tag: 1 - evaluation on testing data, 0 - without evaluation on testing data
        :param lr:
        :param batchs:
        :param cuda:
        '''
        # all the tensor should set the 'volatile' as True, and False when update the network
        self.hungarian = Munkres()
        self.device = torch.device("cuda" if cuda else "cpu")
        self.nEpochs = 999
        self.lr = lr
        self.batchsize = batchs
        self.numWorker = 4

        self.show_process = 0  # interaction
        self.step_input = 1

        print '     Preparing the model...'
        self.resetU()

        self.Uphi = uphi().to(self.device)
        self.Ephi = ephi().to(self.device)

        self.criterion = nn.MSELoss() if criterion_s else nn.CrossEntropyLoss()
        self.criterion = self.criterion.to(self.device)

        self.criterion_v = nn.MSELoss().to(self.device)

        self.optimizer = optim.Adam([{
            'params': self.Uphi.parameters()
        }, {
            'params': self.Ephi.parameters()
        }],
                                    lr=lr)

        # seqs = [2, 4, 5, 9, 10, 11, 13]
        # lengths = [600, 1050, 837, 525, 654, 900, 750]
        seqs = [2, 4, 5, 10]
        lengths = [600, 1050, 837, 654]

        for i in xrange(len(seqs)):
            self.writer = SummaryWriter()
            # print '     Loading Data...'
            seq = seqs[i]
            self.seq_index = seq
            start = time.time()
            sequence_dir = '../MOT/MOT16/train/MOT16-%02d' % seq

            self.outName = t_dir + 'result_%02d.txt' % seq
            out = open(self.outName, 'w')
            out.close()

            self.train_set = DatasetFromFolder(sequence_dir, self.outName)

            self.train_test = lengths[i]
            self.tag = 0
            self.loss_threhold = 0.03
            self.update()

            print '     Logging...'
            t_data = time.time() - start
            self.log(t_data)
Example #27
0
    def train(self,
              dataset,
              learning_rate,
              batch_size,
              display_step=100,
              num_epoch=100,
              labeled=False):
        losses = []
        LS = []
        reg_arr = []
        precision_arr = []
        recall_arr = []
        Spectral_Kmeans_acc_arr = []
        self.display_step = display_step
        self.batch_size = batch_size
        print("num_samples : {}".format(dataset.num_samples))
        for epoch in range(num_epoch):
            avg_loss = 0.
            avg_score = 0.
            reg_loss = 0.

            # Loop over all batches
            amount_batchs = dataset.get_amuont_batchs(self.batch_size)
            for i in range(amount_batchs):
                if labeled:
                    batch_xs, batch_ys = dataset.next_batch(self.batch_size)
                else:
                    batch_xs = dataset.next_batch(self.batch_size)
                _, loss, laplacian_score, reg_fs = self.sess.run([self.train_step, self.loss, self.laplacian_score, self.reg_gates], \
                                                          feed_dict={self.X: batch_xs, self.learning_rate:learning_rate})

                avg_loss += loss / amount_batchs
                avg_score += laplacian_score / amount_batchs
                reg_loss += reg_fs / amount_batchs

            alpha_p = self.get_prob_alpha()
            precision = np.sum(alpha_p[:2]) / np.sum(alpha_p[:])
            recall = np.sum(alpha_p[:2]) / 2
            losses.append(avg_loss)
            LS.append(avg_score)
            reg_arr.append(reg_loss)
            recall_arr.append(recall)
            precision_arr.append(precision)

            if (epoch + 1) % self.display_step == 0:
                print("Epoch:", '%04d' % (epoch+1), "loss=", "{:.9f}".format(avg_loss), "score=", "{:.9f}".format(avg_score)\
                      ,"reg=", "{:.9f}".format(reg_fs) )

                if labeled:

                    indices = np.where(alpha_p > 0)[0]
                    XS = batch_xs[:, indices]
                    if XS.size == 0:
                        XS = np.zeros((batch_xs.shape[0], 1))
                        Spectral_Kmeans_acc_arr.append(
                            cluster_acc(batch_ys, batch_ys * 0))

                    else:

                        Dist = squareform(pdist(XS))
                        fac = 5 / np.max(
                            np.min(Dist + np.eye(XS.shape[0]) * 1e5, axis=1))
                        clustering = SpectralClustering(
                            n_clusters=2,
                            affinity='rbf',
                            gamma=fac,
                            assign_labels="discretize",
                            random_state=0).fit(XS)

                        netConfusion = ms.confusion_matrix(batch_ys,
                                                           clustering.labels_,
                                                           labels=None)
                        netCostMat = calcCostMatrix(netConfusion, 2)
                        m = Munkres()
                        indexes = m.compute(netCostMat)
                        clusterLabels = getClusterLabelsFromIndexes(indexes)
                        netSolClassVectorOrdered = clusterLabels[
                            clustering.labels_].astype('int')
                        accuracy_ls = np.mean(
                            netSolClassVectorOrdered == np.array(batch_ys))
                        Spectral_Kmeans_acc_arr.append(accuracy_ls)

        print("Optimization Finished!")

        return LS, losses, reg_arr, precision_arr, recall_arr, Spectral_Kmeans_acc_arr
Example #28
0
def plotTradeRatios(mp,
                    fa,
                    objLabels,
                    preconditioner=None,
                    numToSample=75,
                    pixPerSide=10):
    if preconditioner is None:
        tr = mp.tradeRatios
    else:
        tr = preconditioner(mp.tradeRatios)

    if numToSample is None:
        samples = mp.paretoSamples
    else:
        dataSize = mp.paretoSamples.shape[0]
        smplIndxsWithReplacement = np.random.randint(0,
                                                     dataSize,
                                                     size=min(
                                                         dataSize,
                                                         numToSample))
        samples = mp.paretoSamples[smplIndxsWithReplacement, :]
    U, S, V = np.linalg.svd(samples)
    locs2d = np.dot(samples, V)[:, :2]
    ranges = np.ptp(locs2d, axis=0)
    locs2dNoised = locs2d + np.random.random(
        locs2d.shape) * ranges[np.newaxis, :] * 1 / 1000
    rangesNoised = np.ptp(locs2dNoised, axis=0)
    # gradient=fa.reconstructDerivative(locations=mp.projectToPlaneCoor(samples))

    gridLocs = (np.mgrid[0:pixPerSide, 0:pixPerSide] +
                0.5) / pixPerSide * (rangesNoised[:, np.newaxis, np.newaxis])
    gridIndxs = np.mgrid[0:pixPerSide, 0:pixPerSide]
    flatLocs = np.array([arr.flatten() for arr in gridLocs]).T
    flatIndxs = np.array([arr.flatten() for arr in gridIndxs]).T
    dists = spst.distance.cdist(
        flatLocs - np.mean(flatLocs, axis=0)[np.newaxis, :],
        locs2dNoised - np.mean(locs2dNoised, axis=0)[np.newaxis, :])

    accum = [[
        None,
    ] * len(tr) for cnt in range(len(tr))]
    for k in range(len(accum)):
        accum[k][k] = np.ones((pixPerSide, pixPerSide))
    print('beginning matching with ' + str(dists.shape) + ' sized matrix')
    if dists.shape[0] <= dists.shape[
            1]:  # number of drawn pixels < number of elements in the sample
        matches = np.array(
            Munkres().compute(dists)
        )  # sparse represetnation. [:,0] is the pixel index, [:,1] is matching sample index
        print('finished matching')
        for i, j in it.combinations(range(len(tr)), 2):
            imageMat = np.ones((pixPerSide, pixPerSide)) * tr[i, j]
            # imageMat[flatIndxs[:,0],flatIndxs[:,1]]=gradient[matches[:,0],i]/gradient[matches[:,1],j]# set to the values of the tradeoffs at teh elements of the samples.
            imageMat[flatIndxs[matches[:, 0], 0],
                     flatIndxs[matches[:, 0], 1]] = np.squeeze(
                         tradeoffCalc(mp, fa, samples[matches[:, 1], :], i, j))
            accum[i][j] = imageMat
            accum[j][i] = 1 / imageMat
    else:
        matches = np.array(Munkres().compute(
            dists.T))  # [:,0] is the sample index, [:,1] is the pixel index

        # check matching
        # plt.plot(flatLocs[:,0],flatLocs[:,1],'.',samples[:,0],samples[:,1],'o')
        # plt.plot(np.vstack((samples[matches[:,0],0],flatLocs[matches[:,1],0])),np.vstack((samples[matches[:,0],1],flatLocs[matches[:,1],1])))
        # plt.show()

        print('finished matching')
        for i, j in it.combinations(range(len(tr)), 2):
            imageMat = np.ones((pixPerSide, pixPerSide)) * tr[i, j]
            # imageMat[matches[:,0],matches[:,1]]=gradient[flatIndxs[:,0],i]/gradient[flatIndxs[:,1],j]# set to the values of the tradeoffs at teh elements of the samples.
            imageMat[flatIndxs[matches[:, 1], 0],
                     flatIndxs[matches[:, 1], 1]] = np.squeeze(
                         tradeoffCalc(mp, fa, samples[matches[:, 0], :], i, j))
            accum[i][j] = imageMat
            accum[j][i] = 1 / imageMat
    # reorderArr=np.argsort(np.mean(tr,axis=0))
    reorderArr = np.arange(len(tr))
    objLabels_reorder = list(map(lambda i: objLabels[i],
                                 range(len(objLabels))))
    accumReorder = [[accum[l][k] for k in reorderArr] for l in reorderArr]
    toPlot = np.vstack(tuple(map(np.hstack, map(tuple, accumReorder))))

    toPlot = np.log(np.abs(toPlot))

    plt.imshow(toPlot, cmap=globalCmap, interpolation='nearest')
    plt.colorbar()
    plt.xticks(
        range(pixPerSide // 2, pixPerSide * len(objLabels_reorder),
              pixPerSide), objLabels_reorder)
    plt.yticks(
        range(pixPerSide // 2, pixPerSide * len(objLabels_reorder),
              pixPerSide), objLabels_reorder)
Example #29
0
    len1 = len(hsvPalette1)
    len2 = len(hsvPalette2)
    matrix = []
    if len1 >= len2:
        for i in range(0, len1):
            matrix.append([])
        for j in range(0, len1):
            for k in range(0, len1):
                matrix[j].append(1000000)
        for i in range(0, len1):
            for j in range(0, len2):
                matrix[j][i] = int(
                    round(
                        color_distance(hsvPalette1[i], hsvPalette2[j]) *
                        1000)) + 1
    return matrix


matrix = generate_matrix(c8, c6)
print matrix
m = Munkres()
indexes = m.compute(matrix)

print_matrix(matrix, msg='Lowest cost through this matrix:')
total = 0
for row, column in indexes:
    value = matrix[row][column]
    total += value
    print '(%d, %d) -> %d' % (row, column, value)
print 'total cost: %d' % total
Example #30
0
def main(args):
    gt_file = './armstrong-jan2018.json'
    #gt_file = './armstrong-setcore-20171115.json'
    #gt_file = '/Users/ashokdeb/Desktop/test/effect-forecasting-models/deb/warning_ARIMA_p_7_d_1_q_7_armstrong_endpoint-malware_2017-06-30.json'
    # warning_dir = './cause-effect/warnings/'
    warning_dir = './score_warnings/'

    mth = args.mth
    external_signal_name = args.ext.split('.')[0]
    method = args.method
    event_type = args.evt

    if mth == "July":
        start_date = datetime.date(2017, 7, 1)
        end_date = datetime.date(2017, 7, 31)
    elif mth == "August":
        start_date = datetime.date(2017, 8, 1)
        end_date = datetime.date(2017, 8, 31)
    elif mth == "September":
        start_date = datetime.date(2017, 9, 1)
        end_date = datetime.date(2017, 9, 30)
    elif mth == "October":
        start_date = datetime.date(2017, 10, 1)
        end_date = datetime.date(2017, 10, 31)
    elif mth == "November":
        start_date = datetime.date(2017, 11, 1)
        end_date = datetime.date(2017, 11, 30)
    elif mth == "December":
        start_date = datetime.date(2017, 12, 1)
        end_date = datetime.date(2017, 12, 31)
    elif mth == "January":
        start_date = datetime.date(2018, 1, 1)
        end_date = datetime.date(2018, 1, 31)

    #need to adjust
    #start_date = datetime.date(2017, 7, 1)
    #end_date = datetime.date(2017, 7, 31)
    #start_date = datetime.date(2017, 8, 1)
    #end_date = datetime.date(2017, 8, 31)
    #start_date = datetime.date(2017, 9, 1)
    #end_date = datetime.date(2017, 9, 30)

    target_org = args.target
    #target_org = 'knox'

    # Potentially filter by event type
    #   None -> Score for ALL event types
    #   Otherwise, restrict to 'endpoint-malware', 'malicious-email', or 'malicious-destination'
    #event_type = None

    # First need to parse input data
    warnings = load_warnings(warning_dir, target_org, event_type, start_date,
                             end_date, external_signal_name, method)
    events = load_gt(gt_file, target_org, event_type, start_date, end_date)

    print("# warnings = %d" % len(warnings))
    print("# events = %d" % len(events))

    # Now need to perform matching on warnings + events
    # To do this first have to score every possible warning-event pair
    # The official pair_objects.py is heavily dependent on python classes *not* provided to us by govt, so we recreate here
    M = Metrics()
    matching_matrix = np.zeros((len(events), len(warnings)))
    matching_dict = dict()
    for e_idx in range(len(events)):
        for w_idx in range(len(warnings)):
            # Check if we meet base criteria threshold
            if warnings[w_idx].event_type == "endpoint-malware":
                date_th = 0.875
            elif warnings[w_idx].event_type == "malicious-email":
                date_th = 1.375
            elif warnings[w_idx].event_type == "malicious-destination":
                date_th = 1.625
            else:
                raise Exception("Unknown event_type: %s" %
                                warnings[w_idx].event_type)

            if M.base_criteria(events[e_idx], warnings[w_idx], thr2=date_th):
                pair = Pair.build(warnings[w_idx], events[e_idx],
                                  'fake-performer', 'fake-provider')
                matching_matrix[e_idx, w_idx] = -pair.quality
                matching_dict["%d,%d" % (e_idx, w_idx)] = pair

    # Now do Hungarian matching
    munk = Munkres()
    pairings = munk.compute(matching_matrix.tolist())
    valid_pairings = list(
        filter(lambda p: matching_matrix[p[0], p[1]] != 0, pairings))

    nMatched = len(valid_pairings)
    nUnmatchedGT = len(events) - nMatched
    nUnmatchedW = len(warnings) - nMatched

    avg_qs = 0
    for e_idx, w_idx in valid_pairings:
        # pair = matching_dict["%d,%d" % (e_idx, w_idx)]
        # avg_qs += pair.quality
        avg_qs += matching_matrix[e_idx, w_idx]

    if len(valid_pairings) != 0:
        avg_qs /= len(valid_pairings)
        avg_qs = -1 * avg_qs
        recall = nMatched / (nMatched + nUnmatchedGT)
        precision = nMatched / (nMatched + nUnmatchedW)
        f1 = (2 * precision * recall) / (precision + recall)
    else:
        avg_qs = recall = precision = f1 = 0

    print("Precision = %0.2f%%" % (100 * precision))
    print("Recall = %0.2f%%" % (100 * recall))
    print("Average Quality Score = %0.2f" % avg_qs)

    new_results = [
        target_org, event_type, mth,
        len(events),
        len(warnings), external_signal_name,
        np.around(100 * precision, decimals=2),
        np.around(100 * recall, decimals=2),
        np.around(100 * f1, decimals=2),
        np.around(avg_qs, decimals=2)
    ]

    with open('./output/results.csv', 'a') as f:
        writer = csv.writer(f)
        writer.writerow(new_results)
    return