Beispiel #1
0
 def __call__(self, params, update=True):
     self._prev_params2 = self._prev_params
     if self._prev_params is None:
         if update:
             self._prev_params = nanguardt(params)
         return False
     delta = (self._prev_params - params).cpu().numpy()
     mags = numpy.abs(delta)
     dist = nanguard(numpy.sum(mags))
     self._last_delta2 = self._last_delta
     self._last_delta = self.delta
     self.delta = delta
     self.dist = dist
     delta_2step = nanguard(numpy.sum(
         numpy.abs(self._last_delta2 -
                   delta))) if self._last_delta2 is not None else 0
     delta_1step = nanguard(numpy.sum(
         numpy.abs(self._last_delta -
                   delta))) if self._last_delta is not None else 0
     largest = numpy.argsort(delta)
     if update:
         self._prev_params = params
     spaces = " " * 6
     ups = ', '.join(f'{idx}={delta[idx]:0.3f}' for idx in largest[:-5:-1])
     mag_std = numpy.std(mags)
     sys.stdout.write(
         f"converged dist: {dist:0.4f}. /param: {dist/len(params):0.3e}. two step dist: {delta_2step:0.3f}, 1step: {delta_1step:0.3f}, mag std: {mag_std}. top updates: {ups}{spaces}\n"
     )
     sys.stdout.flush()
     return dist <= self._tol * len(params) or (
         delta_2step < delta_1step / 2 and delta_2step > 0)
def from_history(self, history):
    from web.util import nanguard
    from web import opts
    keep = []
    wts = False
    for idx, item in enumerate(history):
        if not "items" in item:
            continue
        if "parent" in item:
            path = "/" + item["parent"] + "/" + item["name"]
            if not (item["hash"] == hashlib.sha256(path.encode(
                    "utf-8", ue)).hexdigest()):
                raise Exception()
        item["dur"] = nanguard(item.get(
            "viewend", 0), f"from_history#{idx}.viewend") - nanguard(
                item.get("viewstart", 0), f"from_history#{idx}.viewstart")
        if nanguard(item.get("dur", 0)) < opts.minview:
            if not wts and keep:
                keep.pop()
            wts = True
            continue
        wts = False
        keep.append(item)
    for item in keep:
        self.update(item, initial=True)
Beispiel #3
0
def calc_next_index(self,
                    h,
                    vals,
                    weights,
                    dists_out,
                    debug=False,
                    force=False,
                    existing_val=None):
    from web.util import nanguard
    from web import opts
    modelmin = self.min()
    modelmax = self.max()
    low_, high_ = self.softmin(vals[0], inv=True), self.softmin(vals[1])
    low = max(low_, -modelmax)
    high = min(high_, modelmax)
    dists_out[h] = high - low

    widx, lidx = self.getidx(low), self.getidx(high)
    mid = existing_val
    if mid is None:
        mid = (low + high) / 2
    midx = self.getidx(mid)
    pos = nanguard((high - modelmin) / (modelmax - modelmin))
    prec = opts.precision_func(pos)
    delta = nanguard(len(self.model) / prec) * 2
    #if low < modelmin: widx -= max((delta - 2), 1)
    #if high > modelmax: lidx += max((delta - 2), 1)
    #if debug: print(pos, delta, low, high, widx, lidx)
    seen_enough = len(vals[0]) > opts.min_clean_wins or len(vals[0]) + len(
        vals[1]) > opts.min_clean_compares
    finished_enough = max(0, lidx - widx) < delta
    is_goat = widx >= nanguard(len(self.model) - opts.goat_window)
    info = {
        "finished_enough": finished_enough,
        "seen_enough": seen_enough,
        "is_goat": is_goat,
        "wlen": len(vals[0]),
        "llen": len(vals[1]),
        "prec": prec,
        "midx": midx,
        "lidx": lidx,
        "widx": widx,
        "mid": mid,
        "pos": pos,
        "delta": delta,
        "adelta": max(0, lidx - widx),
        "high": high,
        "low": low,
    }
    if len(vals[0]) > 0 and (is_goat or len(
            vals[1]) > 1) and seen_enough and finished_enough and not force:
        return None, info
    return midx, info
Beispiel #4
0
def calculate_dists(self, stats, comparisons, h):
    from web.util import timing, as_pair, nanguard
    from web import opts
    dists = ([-50], [50])
    weights = ([1], [1])
    for other_hash, wins in comparisons.items():
        other_val = self.getval(other_hash)
        if other_val is None:
            continue
        if self.is_dropped(stats, other_hash):
            continue
        pair = as_pair(h, other_hash)
        if pair in stats.too_close:
            wins = tuple([
                nanguard(x + sum(wins) +
                         opts.too_close_boost * stats.too_close[pair])
                for x in wins
            ])
        win_ratio = (wins[0]) / (wins[0] + wins[1])
        if win_ratio < opts.ambiguity_threshold and win_ratio > 1 - opts.ambiguity_threshold:
            continue
        if wins[0] > wins[1]:
            decayed_ratio = (
                (wins[0] / max(1e-10, wins[0] + wins[1])) - 0.5) * 2
            dists[0].append(other_val)
            weights[0].append(decayed_ratio)
        elif wins[0] < wins[1]:
            decayed_ratio = ((
                (wins[1]) / max(1e-10, wins[0] + wins[1])) - 0.5) * 2
            dists[1].append(other_val)
            weights[1].append(decayed_ratio)
        else:
            continue
    return dists, weights
Beispiel #5
0
def softmin(self, directional_distances, inv=False):
    from web.util import nanguard
    from web import opts
    if not len(directional_distances):
        return 100
    vals = nanguard(
        numpy.array(directional_distances)
    )  #numpy.log2(numpy.maximum(directional_distances*10, 0.0001))
    if inv: vals = -vals
    weight = nanguard(
        numpy.minimum(opts.softmin_falloff_per_unit**-vals, 2000000))
    sum = numpy.sum(weight * vals)
    total_weight = numpy.sum(weight)
    res = nanguard(sum / total_weight)
    if inv: res = -res
    return res
Beispiel #6
0
def getprob(self, item1, item2):
    import choix
    from web.util import nanguard
    a = self.getid(item1["hash"] if type(item1) == dict else item1)
    b = self.getid(item2["hash"] if type(item2) == dict else item2)
    a_new = False
    b_new = False
    if a >= len(self.model):
        a_new = True
    if b >= len(self.model):
        b_new = True
    #self.calculate_ranking()
    if not len(self.model) or a_new or b_new:
        return 0.5, 0.5
        #return f"no model yet. new: {a_new}", f"no model yet. new: {b_new}"
    ra, rb = choix.probabilities([a, b], self.model)
    return nanguard(ra, "ra"), nanguard(rb, "rb")
Beispiel #7
0
def prepare_pairs(self, stats):
    from web.util import nanguard
    from web import opts
    pairs = []
    #removeme = []
    #for x in self.all_items:
    #    if self.is_dropped(stats, x)>1 and x in self.ids:
    #        removeme.append(self.ids[x])
    #if removeme:
    #    model = list(self.model)
    #    all_items = list(self.all_items)
    #    assert type(self.all_items) == list
    #    for idx in sorted(removeme)[::-1]:
    #        del all_items[idx]
    #        del model[idx]
    #    self.model=numpy.array(model)
    #    self.all_items =numpy.array(all_items)
    #self.ids = {x: idx for idx, x in enumerate(self.all_items)}

    for pair, rel_wins in stats.pair_wins.items():
        if pair in stats.incomparable_pairs: continue
        if (self.is_dropped(stats, pair[0]) > 1) or (self.is_dropped(
                stats, pair[1]) > 1):
            continue
        if pair[0] not in self.ids or pair[1] not in self.ids:
            continue
        if pair in stats.too_close:
            rel_wins = tuple([
                nanguard(x + sum(rel_wins) +
                         opts.too_close_boost * stats.too_close[pair])
                for x in rel_wins
            ])
        if not sum(rel_wins):
            continue
        ratio = nanguard((rel_wins[0]) / (rel_wins[0] + rel_wins[1]))
        scale = 1  #sigmoid(3*(sum(rel_wins)-1))
        rel_wins = [scale * ratio, scale * (1 - ratio)]
        id0, id1 = self.ids[pair[0]], self.ids[pair[1]]

        if rel_wins[0]:
            pairs.append((id0, id1, nanguard(rel_wins[0])))
        if rel_wins[1]:
            pairs.append((id1, id0, nanguard(rel_wins[1])))
    return pairs
Beispiel #8
0
def build_sim_data(self, stats):
    from web.util import nanguard
    triplets = [(tuple(self.getid_sim(x) for x in key), vals)
                for key, vals in stats.triplet_diffs.items()
                if not any(self.is_dropped(stats, x) for x in key)]
    edges = list(
        itertools.chain.from_iterable(
            itertools.combinations(key, 2) for key, val in triplets))
    edge_ratios = list(
        itertools.chain.from_iterable(
            (((edge1[0], edge2[0]), nanguard(edge1[1] / (edge1[1] + edge2[1])),
              nanguard(edge1[1] + edge2[1]))
             for edge1, edge2 in itertools.combinations((
                 ((vert1[0], vert2[0]), nanguard(vert1[1] + vert2[1]), vert1,
                  vert2)
                 for vert1, vert2 in itertools.combinations(zip(key, val), 2)
             ), 2)) for key, val in triplets))
    targ = [nanguard(x[1]) for x in edge_ratios]
    return edges, edge_ratios, targ
Beispiel #9
0
def extend_model(self, stats):
    from web.util import nanguard
    for pair, rel_wins in stats.pair_wins.items():
        self.getid(pair[0])
        self.getid(pair[1])
    if len(self.model) and len(self.all_items) > len(self.model):
        newlen = len(self.all_items) - len(self.model)
        #gp = choix.generate_params(newlen, 0.1)
        #print(gp.shape)
        newvals = []
        for h in self.all_items[len(self.model):]:
            dists, weights = self.calculate_dists(stats,
                                                  stats.comparisons.get(h, {}),
                                                  h)
            info = self.calc_next_index(h, dists, weights, {})[1]
            newvals.append(nanguard(info["mid"]))
        if len(newvals) != newlen:
            raise Exception()
        self.model = numpy.concatenate((self.model, newvals))
Beispiel #10
0
def calculate_nearest_neighborhood(self,
                                   stats,
                                   hashes_to_debug,
                                   extra=False,
                                   save=True):
    from web import opts
    from web.util import timing
    with timing("calculate_nearest_neighborhood", 0.1):
        distances = {h: ([-50], [50]) for h in self.all_items}
        weights = {h: ([1], [1]) for h in self.all_items}
        sp = {}
        if save: self.searching_pool = sp

        iv = {}
        if save: self.inversions = iv
        inversions = {}
        inversion_pool = set()
        dists = {}
        if save:
            self.distances = dists
        for pair, rel_wins in stats.pair_wins.items():
            if self.is_dropped(stats, pair[0]) or self.is_dropped(
                    stats, pair[1]):
                continue
            if pair in stats.incomparable_pairs: continue
            if pair in stats.too_close:
                rel_wins = tuple([
                    nanguard(x + sum(rel_wins) +
                             opts.too_close_boost * stats.too_close[pair])
                    for x in rel_wins
                ])
            win_ratio = (rel_wins[0]) / (rel_wins[0] + rel_wins[1])
            win, loss = pair[0], pair[1]
            count = rel_wins[0] + rel_wins[1]
            if win_ratio < 0.5:
                rel_wins = rel_wins[::-1]
                win, loss = pair[1], pair[0]
                win_ratio = 1 - win_ratio
            win_prob, inverted, details = self.check_inversion(stats, pair)
            if win_prob < 0.5:
                win_inversions, _ = inversions.setdefault(win, ([], []))
                _, loss_inversions = inversions.setdefault(loss, ([], []))
                win_inversions.append((pair, win_ratio, win_prob, count))
                loss_inversions.append((pair, win_ratio, win_prob, count))
            if inverted:
                inversion_pool.add(pair[0])
                inversion_pool.add(pair[1])
                iv[pair] = inverted
            decayed_ratio = (((rel_wins[0] + 1) /
                              (rel_wins[0] + rel_wins[1] + 2)) - 0.5) * 2

            if win_ratio < opts.ambiguity_threshold:
                # don't include ambiguous comparisons when tallying distances
                # should reduce risk of getting in tangles
                continue
            wval = self.getval(win)
            lval = self.getval(loss)
            if wval is None or lval is None: continue
            dist = wval - lval
            distances[win][0].append(lval)
            distances[loss][1].append(wval)
            weights[win][0].append(decayed_ratio)
            weights[loss][1].append(decayed_ratio)
        modelmin = self.min()
        modelmax = self.max()
        for h, vals in distances.items():
            if self.is_dropped(stats, h): continue
            nextidx, _ = self.calc_next_index(h,
                                              vals,
                                              weights.get(h),
                                              dists,
                                              existing_val=self.getval(h))
            if (nextidx is not None
                    or not stats.win_counts.get(h)) and h in self.bh2:
                sp[h] = True
        print(
            "len(sp)",
            len(sp),
            "len(distances)",
            len(distances),
            len(self.bh2),
            len([x for x in self.all_items if not stats.win_counts.get(x)]),
            len([
                x for x in self.all_items if not stats.win_counts.get(x)
                and not self.is_dropped(stats, x)
            ]),
        )

        #q = []
        if True:
            for h in hashes_to_debug:
                hidx = self.sorted_ids.get(h, -1)
                in_pool = h in sp
                in_inv_pool = h in inversion_pool
                #if not in_pool or in_inv_pool: continue
                dists = distances[h]
                win_inversions, loss_inversions = inversions.get(h, ([], []))
                wdist, ldist = self.weighted_softmin(
                    *dists[0]), self.weighted_softmin(*dists[1])
                val = self.getval(h)
                pos = (val - modelmin) / (modelmax - modelmin)
                delta = int(
                    len(self.model) / (opts.min_target_precision +
                                       (pos**opts.target_precision_curve) *
                                       opts.target_precision_top))
                lc = stats.loss_counts.get(h, 0)
                wc = stats.win_counts.get(h, 0)
                ##ld = "done" if ld is None else f"{ld:4d}"
                ##wd = "done" if wd is None else f"{wd:4d}"
                #if not wc or not lc or ld or wd: continue
                ##print(f"{wd} <= {wc:4d}   {lc:4d} => {ld}")
                rows = []

                low = val - wdist
                high = val + ldist
                widx2, lidx2 = self.getidx(low), self.getidx(high)
                #if low < modelmin: widx2 -= max((delta - 2), 1)
                #if high > modelmax: lidx2 += max((delta - 2), 1)

                widx, lidx = max(0, self.getidx(val - wdist)), min(
                    len(self.model) - 1, self.getidx(val + ldist))
                midx = (widx + lidx) // 2
                mval = self.sorted_model[midx]
                mdist = mval - val

                low = max(low, -modelmax)
                high = min(high, modelmax)
                vval = (low + high) / 2
                vdist = vval - val
                vidx = self.getidx(vval)

                waidx = max(0, hidx - 1)
                laidx = min(len(self.model) - 1, hidx + 1)
                la_h = self.sorted_hashes[laidx]
                wa_h = self.sorted_hashes[waidx]
                la_val = self.getval(la_h)
                wa_val = self.getval(wa_h)

                wtidx = hidx - delta
                ltidx = hidx + delta
                wthresh_h = self.sorted_hashes[max(0, wtidx)]
                lthresh_h = self.sorted_hashes[min(ltidx,
                                                   len(self.sorted_hashes) -
                                                   1)]
                wthresh_val = self.getval(wthresh_h)
                lthresh_val = self.getval(lthresh_h)
                print()
                print(f"wc={wc:2d}")
                print(f"lc={lc:2d}")
                print(f"lc+wc={lc+wc:2d}")
                print(f"in_pool={in_pool}")
                print(f"in_inv_pool={in_inv_pool}")
                for pair, win_ratio, win_prob, count in win_inversions:
                    other = [x for x in pair if x != h][0]
                    other_idx = self.sorted_ids[other]
                    other_val = self.getval(other)
                    rows.append((
                        other_val - val, 9, "iw", other_val, other_idx,
                        other_idx - hidx,
                        f"unexpected win; win ratio: {win_ratio} ({count} samples), expected win prob: {win_prob}"
                    ))
                for pair, loss_ratio, loss_prob, count in loss_inversions:
                    other = [x for x in pair if x != h][0]
                    other_idx = self.sorted_ids[other]
                    other_val = self.getval(other)
                    rows.append((
                        other_val - val, 3, "il", other_val, other_idx,
                        other_idx - hidx,
                        f"unexpected loss; loss ratio: {loss_ratio} ({count} samples), expected loss prob: {loss_prob}"
                    ))
                for windist in dists[0]:
                    if windist == 9 or windist < 0: continue
                    rows.append(
                        (-windist, 0, "win", val - windist,
                         self.getidx(val - windist),
                         self.getidx(val - windist) - hidx, f"expected win"))
                for lossdist in dists[1]:
                    if lossdist == 9 or lossdist < 0: continue
                    rows.append(
                        (lossdist, 12, "los", val + lossdist,
                         self.getidx(val + lossdist),
                         self.getidx(val + lossdist) - hidx, f"expected loss"))
                rows.append((modelmin - val, 1, "W", modelmin, 0, 0 - hidx,
                             "model boundary low"))
                rows.append((-wdist, 1, "w", val - wdist, widx2, widx2 - hidx,
                             "win boundary"))
                rows.append(
                    (wthresh_val - val, 2, "wt", wthresh_val, wtidx,
                     wtidx - hidx, "search precision threshold, win side"))
                rows.append((vdist, 4, "v", vval, vidx, vidx - hidx,
                             "midpoint in value space"))
                rows.append((mdist, 5, "m", mval, midx, midx - hidx,
                             "midpoint in index space"))
                rows.append((wa_val - val, 6, "wa", wa_val, waidx,
                             waidx - hidx, "prev neighbor"))
                rows.append((0, 7, "", val, hidx, 0, "item"))
                rows.append((la_val - val, 8, "la", la_val, laidx,
                             laidx - hidx, "next neighbor"))
                rows.append(
                    (lthresh_val - val, 10, "lt", lthresh_val, ltidx,
                     ltidx - hidx, "search precision threshold, loss side"))
                rows.append((ldist, 11, "l", val + ldist, lidx2, lidx2 - hidx,
                             "loss boundary"))
                rows.append(
                    (modelmax - val, 11, "L", modelmax, len(self.model) - 1,
                     (len(self.model) - 1) - hidx, "model boundary high"))
                maxstep = 12
                for dist, step, label, val, idx, idxdist, desc in sorted(
                        rows, key=lambda x: (x[4], x[0])):
                    prefix = label.rjust(step * 2).ljust(24)
                    label = label.rjust(3)
                    print(
                        f" {prefix} | {label}dist={dist:7.4f} {label}val={val:7.4f} {label}idx={idx:5d} {label}idxdist={idxdist:5d} {desc}"
                    )
                #print(sorted(dists[0]))
                #print(f"  wdist={-wdist:7.4f}   wval={val-wdist:7.4f}  widx={widx:5d}  widxdist={self.getidx(val-wdist)-hidx:5d}")
                #print(f"wthresh={wthresh_val-val:7.4f}  wtval={wthresh_val:7.4f} wtidx={wtidx:5d} wtidxdist={wtidx-hidx:5d}")
                #print(f"  vdist={vdist:7.4f}   vval={vval:7.4f}  vidx={vidx:5d}  vidxdist={vidx-hidx:5d}")
                #print(f"  mdist={mdist:7.4f}   mval={mval:7.4f}  midx={midx:5d}  midxdist={midx-hidx:5d}")
                #print(f" wadist={wa_val-val:7.4f}  waval={wa_val:7.4f} waidx={waidx:5d} waidxdist={waidx-hidx:5d}")
                #print(f"                   val={val:7.4f}  hidx={hidx:5d}")
                #print(f" ladist={la_val-val:7.4f}  laval={la_val:7.4f} laidx={laidx:5d} laidxdist={laidx-hidx:5d}")
                #print(f"lthresh={lthresh_val-val:7.4f}  ltval={lthresh_val:7.4f} ltidx={ltidx:5d} ltidxdist={ltidx-hidx:5d}")
                #print(f"  ldist={ldist:7.4f}   lval={val+ldist:7.4f}  lidx={lidx:5d}  lidxdist={self.getidx(val+ldist)-hidx:5d}")
                #print(sorted(dists[1]))
        #print(f"min(model): {min(self.model)}, max(model): {max(self.model)}, mean(model): {numpy.mean(self.model)}")
        print(f"searching_pool: {len(sp)}, inversions: {len(iv)}")
Beispiel #11
0
def getval(self, h):
    from web.util import nanguard
    id = self.getid(h)
    if id >= len(self.model):
        return None
    return nanguard(self.model[id])
Beispiel #12
0
def getidx(self, val):
    from web.util import nanguard
    return nanguard(numpy.searchsorted(self.sorted_model, val))
def update(self, item, initial=False):
    from web.util import nanguard, as_pair, sigmoid
    from web import opts
    age = time.time() - (nanguard(item.get("viewend", 0), "update.viewend") /
                         1000)
    decay = nanguard(opts.comparison_decay_func(age))
    item["dur"] = nanguard(
        item.get("viewend", 0) - item.get("viewstart", 0), "update.dur")
    mag_decay = nanguard(
        max(
            min(opts.initial_mag,
                opts.initial_mag / max(1, (item["dur"] / 1000))),
            opts.min_mag))
    if nanguard(item.get("dur", 0)) < opts.minview and not item.get("fast"):
        print("\033[31mskipped due to too-low view duration", item, "\033[m")
        return
    if not initial:
        print(
            f"\033[38mvd: {item.get('dur',0)}, mag_decay: {mag_decay}, age: {age}, time_decay: {decay}\033[m"
        )
    pair = as_pair(*item["items"])
    if "similarity" in item:
        info = item["similarity"]
        winner = None
        least_similar = info.get("least_similar", None)
        most_similar = info.get("most_similar", None)
        if type(most_similar) == list:
            least_similar = list(set(range(3)) - set(most_similar))[0]
            most_similar = None
    elif type(item.get("preference", None)) != dict:
        info = {}
        winner = nanguard(item.get("preference", 1) - 1)
        least_similar = None
    else:
        info = item.get("preference", {})
        winner = nanguard(info.get("prefer", 1) - 1)
        least_similar = None

    too_close = info.get("too_close", False)
    incomparable = info.get("incomparable", False)
    dislike = info.get("dislike", None)
    strong = info.get("strong", None)

    if type(item.get("info")) == dict and item["info"].get("t") == [
            "inversions", "inversions"
    ]:
        mag_decay = mag_decay * opts.inversion_compare_boost + sum(
            self.pair_wins.get(
                pair, [0, 0])) * opts.inversion_compare_relboost * mag_decay
    if not dislike:
        dislike = [0] * len(item["items"])
    for f, dis in zip(item["items"], dislike):
        if dis:
            self.dislike[f["hash"]] = True
            #print("dislike",f)
        elif f["hash"] in self.dislike:
            del self.dislike[f["hash"]]
            #print("undislike",f)
    if any(dislike): return

    if too_close:
        self.too_close[pair] = self.too_close.get(pair, 0) + nanguard(
            2 * decay * (1 - sigmoid(mag_decay)))
        #self.record_win(*item["items"])
        #self.record_win(*item["items"][::-1])
    elif incomparable:
        pair = as_pair(*item["items"], strip=True)
        self.incomparable_pairs[pair] = self.incomparable_pairs.get(
            pair, 0) + nanguard(decay / mag_decay)
    elif winner is not None:
        winning = item["items"][winner]
        losing = item["items"][1 - winner]
        self.record_win(winning, losing, nanguard(decay), nanguard(mag_decay))
    elif least_similar is not None or most_similar is not None:
        sim = least_similar if least_similar is not None else most_similar
        assert 0 <= sim <= 2
        s1, s2 = [x for i, x in enumerate(item["items"]) if i != sim]
        s3 = item["items"][sim]
        self.record_similar(s1,
                            s2,
                            s3,
                            nanguard(decay),
                            nanguard(mag_decay),
                            invert=(most_similar is not None))