Esempio n. 1
0
def corrupt(batch, n):
    """
    Corrupts the negatives of a batch of triples (in place).

    :param batch_size:
    :param n: nr of nodes in the graph

    :return:
    """
    bs, ns, _ = batch.size()

    # new entities to insert
    corruptions = torch.randint(size=(bs * ns, ),
                                low=0,
                                high=n,
                                dtype=torch.long,
                                device=d(batch))

    # boolean mask for entries to corrupt
    mask = torch.bernoulli(
        torch.empty(size=(bs, ns, 1), dtype=torch.float,
                    device=d(batch)).fill_(0.5)).to(torch.bool)
    zeros = torch.zeros(size=(bs, ns, 1), dtype=torch.bool, device=d(batch))
    mask = torch.cat([mask, zeros, ~mask], dim=2)

    batch[mask] = corruptions
Esempio n. 2
0
 def run(self):
     while self.flag and self._flag:
         try:
             data, _ = self.__socket.recvfrom(65565)
         except Exception as err:
             util.d(err)
         else:
             self.__catalogue.touch(util.unpack(data))
Esempio n. 3
0
def sum(indices, values, size, row=True):
    """
    Sum the rows or columns of a sparse matrix, and redistribute the
    results back to the non-sparse row/column entries

    Arguments are interpreted as defining sparse matrix. Any extra dimensions
    as treated as batch.

    :return:
    """

    assert len(indices.size()) == len(values.size()) + 1

    if len(indices.size()) == 2:
        # add batch dim
        indices = indices[None, :, :]
        values = values[None, :]
        bdims = None
    else:
        # fold up batch dim
        bdims = indices.size()[:-2]
        k, r = indices.size()[-2:]
        assert bdims == values.size()[:-1]
        assert values.size()[-1] == k

        indices = indices.view(-1, k, r)
        values = values.view(-1, k)

    b, k, r = indices.size()

    if row:
        ones = torch.ones((size[1], 1), device=d(indices))
    else:
        ones = torch.ones((size[0], 1), device=d(indices))
        # transpose the matrix
        indices = torch.cat([indices[:, :, 1:2], indices[:, :, 0:1]], dim=1)

    s, _ = ones.size()
    ones = ones[None, :, :].expand(b, s, 1).contiguous()

    sums = batchmm(indices, values, size, ones)  # row/column sums
    bindex = torch.arange(b,
                          device=d(indices))[:,
                                             None].expand(b, indices.size(1))
    sums = sums[bindex, indices[:, :, 0], 0]

    if bdims is None:
        return sums.view(k)

    return sums.view(*bdims + (k, ))
Esempio n. 4
0
def genDefaultConfig(params):
    c = {}
    for p in params:
        p = d(p)
        c[p.id] = p.default

    return c
Esempio n. 5
0
 def getKeys(self):
     result = {}
     for k in self._map:
         v = self._map[k]
         if isinstance(v, str): v = ord(v)
         result[k] = abs(win32api.GetAsyncKeyState(v)) > 1
     return d(result)
Esempio n. 6
0
def genDefaultConfig(params):
    c = {}
    for p in params:
        p = d(p)
        c[p.id] = p.default

    return c
Esempio n. 7
0
    def getKeys(self):
        bits = 0
        try:
            packet = SerialGamePad._generateHeader(CMDTYPE.GET_BTNS, 0)
            self._com.write(packet)
            resp = self._com.read(1)

            if len(resp) == 0:
                 SerialGamePad._comError()
            elif ord(resp) != RETURN_CODES.SUCCESS:
                SerialGamePad._printError(ord(resp))
            resp = self._com.read(2)
            if len(resp) != 2:
                SerialGamePad._comError()

            bits = ord(resp[0]) + (ord(resp[1]) << 8)
        except IOError:
            log.logger.error("IO Error Communicatng With Game Pad!")

        index = 0
        result = {}
        for m in self._map:
            result[m] = (bits & (1<<index) > 0)
            index += 1
        return d(result)
Esempio n. 8
0
    def getKeys(self):
        bits = 0
        try:
            packet = SerialGamePad._generateHeader(CMDTYPE.GET_BTNS, 0)
            self._com.write(packet)
            resp = self._com.read(1)

            if len(resp) == 0:
                SerialGamePad._comError()
            elif ord(resp) != RETURN_CODES.SUCCESS:
                SerialGamePad._printError(ord(resp))
            resp = self._com.read(2)
            if len(resp) != 2:
                SerialGamePad._comError()

            bits = ord(resp[0]) + (ord(resp[1]) << 8)
        except IOError:
            log.logger.error("IO Error Communicatng With Game Pad!")

        index = 0
        result = {}
        for m in self._map:
            result[m] = (bits & (1 << index) > 0)
            index += 1
        return d(result)
Esempio n. 9
0
 def getKeys(self):
     result = {}
     for k in self._map:
         v = self._map[k]
         if isinstance(v, str): v = ord(v)
         result[k] = abs(win32api.GetAsyncKeyState(v)) > 1
     return d(result)
Esempio n. 10
0
def readServerConfig():
    data = readConfig("config", path=__home)
    base = paramsToDict(BASE_SERVER_CONFIG.params)
    if len(data.keys()) == 0:
        data = paramsToDict(BASE_SERVER_CONFIG.params)
    elif len(data.keys()) != len(base.keys()):
        data.upgrade(base)
    return d(data)
Esempio n. 11
0
def readServerConfig():
    data = readConfig("config", path=__home)
    base = paramsToDict(BASE_SERVER_CONFIG.params)
    if len(data.keys()) == 0:
        data = paramsToDict(BASE_SERVER_CONFIG.params)
    elif len(data.keys()) != len(base.keys()):
        data.upgrade(base)
    return d(data)
Esempio n. 12
0
 def cluster_adjacency_matrix(self,C,dmax=1.e10):
   n = len(C)
   a= scipy.sparse.lil_matrix((n,n))
   for i in range(n):
     for j in range(i+1,n):
       d = util.d(C[i],C[j])
       if (d < dmax): a[j,i] = a[i,j] = d
   A= a.tocsr()
   return A.todense()
Esempio n. 13
0
    def plot(self, inputs, numpixels=5, ims=None):

        ims = inputs if ims is None else ims

        b, c, h, w = inputs.size()
        b, cims, hims, wims = ims.size()

        k = self.k

        # choose 5 random pixels, for which we'll plot the input pixels.
        choices = torch.randint(low=0, high=h * w, size=(numpixels, ))

        perrow = 5

        rows = int(math.ceil(b / perrow))

        means, sigmas, _ = self.hyper(inputs)

        inputs = inputs.data

        plt.figure(figsize=(perrow * 3, rows * 3))

        # scale up to image coordinates
        scale = torch.tensor((hims / h, wims / w), device=d(inputs))
        means = means * scale + (scale / 2)

        for current in range(b):

            # select subset of means, sigmas
            smeans = means[current, :, :, :, :].view(h * w, k, 2)
            ssigmas = sigmas[current, :, :, :].view(h * w, k, 2)

            color = (torch.arange(numpixels, dtype=torch.float)
                     [:, None].expand(numpixels, k) / numpixels) * 2.0 - 1.0

            smeans = smeans[choices, :, :]
            ssigmas = ssigmas[choices, :]

            ax = plt.subplot(rows, perrow, current + 1)

            im = np.transpose(ims[current, :, :, :].cpu().numpy(), (1, 2, 0))
            im = np.squeeze(im)

            ax.imshow(im,
                      interpolation='nearest',
                      extent=(-0.5, wims - 0.5, -0.5, hims - 0.5),
                      cmap='gray_r')

            util.plot(smeans.reshape(1, -1, 2),
                      ssigmas.reshape(1, -1, 2),
                      color.reshape(1, -1),
                      axes=ax,
                      flip_y=hims,
                      tanh=False)

        plt.gcf()
Esempio n. 14
0
 def from_points(self, c1, c2, dmax=1.e10):
     self.init()
     ### NOTE assume len(c1) == len(c2)
     self.nnodes = n = len(c1)
     for i in range(n):
         x = c1[i]
         for j in range(n):
             y = c2[i]
             d = util.d(x, y)
             if (d < dmax):
                 self.edges.append([[i, j], d])
Esempio n. 15
0
 def addKeyFunc(self, key, func, speed=1, hold=True):
     if not isinstance(key, list):
         key = [key]
     for k in key:
         self._keyfuncs[k] = d({
             "func": func,
             "speed": speed,
             "hold": hold,
             "last": False,
             "inter": False
         })
Esempio n. 16
0
 def addKeyFunc(self, key, func, speed = 1, hold = True):
     if not isinstance(key, list):
         key = [key]
     for k in key:
         self._keyfuncs[k] = d({
             "func": func,
             "speed": speed,
             "hold": hold,
             "last": False,
             "inter": False
             })
Esempio n. 17
0
 def reparameterize(self, mean_logvar):
     """
     Reparametrization trick.
     """
     self.mean = mean = mean_logvar[:, :, :self.z_dim]
     self.logvar = logvar = mean_logvar[:, :, self.z_dim:]
     if self.var:
         eps = torch.normal(torch.zeros_like(mean), std=1.).to(d())
     else:
         eps = 1.
     return eps * torch.exp(logvar * .5) + mean
Esempio n. 18
0
 def from_points(self,c1,c2,dmax=1.e10):
   self.init()
   ### NOTE assume len(c1) == len(c2)
   self.nnodes = n = len(c1)
   for i in range(n):
     x = c1[i]
     for j in range(n):
       y = c2[i]
       d = util.d(x,y)
       if (d < dmax): 
         self.edges.append([[i,j],d])
Esempio n. 19
0
    def get_LSC_random(self, cluster_number, r):
        data_points = self.dataset.samples
        landmarks = self.get_landmarks_random(cluster_number)
        sparse_matrix = self.get_sparse_affinity_matrix(
            data_points, landmarks, r)
        row_sum = self.get_row_sum_vector(sparse_matrix)  # Zn = D^(-1/2)Z

        # This is Zn, it should be samples x landmark size
        final_z = row_sum * np.transpose(sparse_matrix)

        # Calculate the Singular Value Decomposition of the final_z
        V_t, E, U = np.linalg.svd(final_z, False)

        U_t = np.transpose(U)
        I = np.eye(cluster_number)
        E_minus1 = np.power(E, -1)
        E_minus1 = E_minus1 * I

        final_a = np.dot(E_minus1, np.dot(U, np.transpose(final_z)))
        result = np.transpose(final_a)
        d("Size " + str(len(final_a)) + " - " + str(len(final_a[0])))
Esempio n. 20
0
def add_inverse_and_self(triples, n, r):
    """
    Adds inverse relations and self loops to a tensor of triples

    :param triples:
    :return:
    """
    b, _ = triples.size()

    inv = torch.cat(
        [triples[:, 2, None], triples[:, 1, None] + r, triples[:, 0, None]],
        dim=1)

    assert inv.size() == (b, 3)

    all = torch.arange(n, device=d(triples))[:, None]
    id = torch.empty(size=(n, 1), device=d(triples),
                     dtype=torch.long).fill_(2 * r)
    slf = torch.cat([all, id, all], dim=1)

    assert slf.size() == (n, 3)

    return torch.cat([triples, slf, inv], dim=0)
Esempio n. 21
0
    def hyper(self, x):

        assert x.size()[1:] == self.in_size
        b, c, h, w = x.size()
        k = self.k

        # the coordinates of the current pixels in parameters space
        # - the index tuples are described relative to these
        hw = torch.tensor((h, w), device=d(x), dtype=torch.float)
        mids = self.coords[None, :, :, :].expand(
            b, 2, h, w) * (hw - 1)[None, :, None, None]
        mids = mids.permute(0, 2, 3, 1)
        if not self.modulo:
            mids = util.inv(mids, mx=hw[None, None, None, :])
        mids = mids[:, :, :, None, :].expand(b, h, w, k, 2)

        # add coords to channels
        if self.admode == 'none':
            params = self.params[None, None, None, :].expand(b, h, w, k * 3)
        else:
            if self.admode == 'full':
                coords = self.coords[None, :, :, :].expand(b, 2, h, w)
                x = torch.cat([x, coords], dim=1)
            elif self.admode == 'coords':
                x = self.coords[None, :, :, :].expand(b, 2, h, w)
            elif self.admode == 'inputs':
                pass
            else:
                raise Exception(
                    f'adaptivity mode {self.admode} not recognized')

            x = x.permute(0, 2, 3, 1)
            params = self.toparams(x)

        assert params.size() == (b, h, w, k * 3
                                 )  # k index tuples per output pixel

        means = params[:, :, :, :k * 2].view(b, h, w, k, 2)
        sigmas = params[:, :, :, k * 2:].view(b, h, w, k)
        values = self.mvalues[None, None, None, :].expand(b, h, w, k)

        means = mids + self.mmult * means

        s = (h, w)
        means = sparse.transform_means(
            means, s, method='modulo' if self.modulo else 'sigmoid')
        sigmas = sparse.transform_sigmas(
            sigmas, s, min_sigma=self.min_sigma) * self.sigma_scale

        return means, sigmas, values
Esempio n. 22
0
def calculate_bpb(arg, model, data_sub):

    with torch.no_grad():
        bits, tot = 0.0, 0
        batch = [
        ]  # buffer, every time it fills up, we run it through the model

        for current in tqdm.trange(data_sub.size(0)):

            fr = max(0, current - arg.context)
            to = current + 1

            context = data_sub[fr:to].to(torch.long)
            if context.size(0) < arg.context + 1:
                pad = torch.zeros(size=(arg.context + 1 - context.size(0), ),
                                  dtype=torch.long)
                context = torch.cat([pad, context], dim=0)

                assert context.size(0) == arg.context + 1

            if torch.cuda.is_available():
                context = context.cuda()

            batch.append(context[None, :])

            if len(batch
                   ) == arg.test_batchsize or current == data_sub.size(0) - 1:

                # batch is full, run it through the model
                b = len(batch)

                all = torch.cat(batch, dim=0)
                source = all[:, :-1]  # input
                target = all[:, -1]  # target values

                output = model(source)

                lnprobs = output[torch.arange(b, device=d()), -1, target]
                log2probs = lnprobs * LOG2E  # convert from nats to bits

                bits += -log2probs.sum()
                batch = []  # empty buffer

        bits_per_byte = bits / data_sub.size(0)

    return bits_per_byte
Esempio n. 23
0
def corrupt_one(batch, candidates, target):
    """
    Corrupts the negatives of a batch of triples (in place).

    Corrupts either only head or only tails

    :param batch_size:
    :param n: nr of nodes in the graph
    :param target: 0 for head, 1 for predicate, 2 for tail


    :return:
    """
    bs, ns, _ = batch.size()

    # new entities to insert
    #corruptions = torch.randint(size=(bs * ns,),low=0, high=n, dtype=torch.long, device=d(batch))
    corruptions = torch.tensor(random.choices(candidates, k=bs * ns),
                               dtype=torch.long,
                               device=d(batch)).view(bs, ns)

    batch[:, :, target] = corruptions
Esempio n. 24
0
BASE_SERVER_CONFIG = d({
    "id":
    "server_config",
    "display":
    "server_config",
    "preconfig":
    False,
    "presets": [],
    "params": [
        {
            "id":
            "external_access",
            "label":
            "Allow External Access",
            "type":
            "bool",
            "default":
            True,
            "help":
            "On: Other computers on your network can access PixelWeb. Off: LocalHost access only."
        },
        {
            "id": "port",
            "label": "Server Port",
            "type": "int",
            "default": 8080,
            "help": "Port to listen on."
        },
        {
            "id":
            "load_defaults",
            "label":
            "Load Last Config on Start",
            "type":
            "bool",
            "default":
            False,
            "help":
            "Load last driver/controller configuration on application start."
        },
        {
            "id": "show_debug",
            "label": "Show Debug in Console",
            "type": "bool",
            "default": False,
            "help":
            "Show BiblioPixel debug in server console (not in main UI)."
        },
        {
            "id": "mod_dirs",
            "label": "Module Directories",
            "type": "str_multi",
            "default": [],
            "help":
            "Directories from which to load modules (animations, drivers, controllers, pre-configs).",
            "replace": {
                "\\": "/"
            }
        },
        {
            "id":
            "off_anim_time",
            "label":
            "All Off Timeout",
            "type":
            "int",
            "default":
            10,
            "min":
            0,
            "max":
            3600,
            "help":
            "Keep display off when not running an animation by actively turning all pixels off every X seconds. Set to 0 to disable."
        },
    ]
})
Esempio n. 25
0
    def forward(self, batch):

        assert batch.size(-1) == 3

        n, r = self.n, self.r

        dims = batch.size()[:-1]
        batch = batch.reshape(-1, 3)
        batchl = batch.tolist()

        with torch.no_grad():

            if self.prune and self.depth > 0:
                # gather all triples that are relevant to the current batch
                triples = {tuple(t) for t in batchl}

                nds = set()
                for s, _, o in batchl:
                    nds.add(s)
                    nds.add(o)

                for _ in range(self.depth):
                    #-- gather all triples that are close enough to the batch triples to be relevant

                    inc_triples = set()
                    for n in nds:
                        inc_triples.update(self.lookup[n])

                    triples.update(inc_triples)

                    nds.update([s for (s, _, _) in inc_triples])
                    nds.update([o for (_, _, o) in inc_triples])

                triples = torch.tensor(list(triples),
                                       device=d(self.all_triples),
                                       dtype=torch.long)
                with torch.no_grad():
                    triples = add_inverse_and_self(triples, n, r)
            else:
                triples = self.all_triples_plus  # just use all triples

            if self.dropout is not None and self.training:
                # We drop out edges by actually removing the triples, to save on memory
                assert len(self.dropout) == 2

                keep, keepid = 1.0 - self.dropout[0], 1.0 - self.dropout[1]

                nt = triples.size(0) - n

                keep_ind = random.sample(range(nt), k=int(floor(keep * nt)))
                keepid_ind = random.sample(range(nt, nt + n),
                                           k=int(floor(keepid * n)))
                ind = keep_ind + keepid_ind

                triples = triples[ind, :]

        nodes = self.embeddings if self.layer0 is None else self.layer0(
            triples=triples)

        if self.layer1 is not None:
            nodes = self.layer1(triples=triples, nodes=nodes)

        if self.do is not None:
            nodes = self.do(nodes)
            relations = self.do(self.relations)
        else:
            relations = self.relations

        if self.biases:
            biases = (self.gbias, self.sbias, self.pbias, self.obias)
        else:
            biases = None

        scores = self.decoder(batch, nodes, relations, biases=biases)

        assert scores.size() == (util.prod(dims), )

        return scores.view(*dims)
Esempio n. 26
0
    def forward(self, triples, nodes=None):

        n, r = self.n, self.r
        rn = r * n

        ## Construct the graph

        # horizontally and vertically stacked versions of the adjacency graph
        # (the vertical is always necessary to normalize the adjacencies)

        if self.hor:
            hor_ind, hor_size = util.adj_triples_tensor(triples,
                                                        n,
                                                        r,
                                                        vertical=False)

        ver_ind, ver_size = util.adj_triples_tensor(triples,
                                                    n,
                                                    r,
                                                    vertical=True)

        rn, _ = ver_size

        # compute values of row-normalized adjacency matrices (same for hor and ver)
        vals = torch.ones(ver_ind.size(0),
                          dtype=torch.float,
                          device=d(triples))
        vals = vals / util.sum_sparse(ver_ind, vals, ver_size)

        if self.hor:
            self.adj = torch.sparse.FloatTensor(indices=hor_ind.t(),
                                                values=vals,
                                                size=hor_size)
        else:
            self.adj = torch.sparse.FloatTensor(indices=ver_ind.t(),
                                                values=vals,
                                                size=ver_size)

        if triples.is_cuda:
            self.adj = self.adj.to('cuda')

        ## Perform message passing
        assert (nodes is None) == (self.insize is None)

        h0 = n if self.insize is None else self.insize
        h1 = self.outsize

        if self.decomp is None:
            weights = self.weights

        elif self.decomp == 'basis':
            weights = torch.einsum('rb, bij -> rij', self.comps, self.bases)

        elif self.decomp == 'block':
            weights = util.block_diag(self.blocks)
            # TODO: multiply in block form (more efficient, but implementation differs per layer type)

        assert weights.size() == (r, h0, h1)

        if self.insize is None:
            # -- input is the identity matrix, just multiply the weights by the adjacencies
            out = torch.mm(self.adj, weights.view(r * h0, h1))

        elif self.hor:
            # -- input is high-dim and output is low dim, multiply h0 x weights first
            nodes = nodes[None, :, :].expand(r, n, h0)
            nw = torch.einsum('rni, rio -> rno', nodes, weights).contiguous()
            out = torch.mm(self.adj, nw.view(r * n, h1))

        else:
            # -- adj x h0 first, then weights
            out = torch.mm(self.adj, nodes)  # sparse mm
            out = out.view(r, n, h0)  # new dim for the relations
            out = torch.einsum('rio, rni -> no', weights, out)

        assert out.size() == (n, h1)

        return out + self.bias
Esempio n. 27
0
def compute_compression(model, data, context, batch_size):
    """
    Compute the _compression_ of a dataset under a model. That is, given a model, in how many bits could we represent
    the dataset. This requires us to turn a given probability distribution into a code for the outcomes.

    See [this video](https://youtu.be/mSneVjDvzNQ) for an explanation.

    :param model: A sequence-to-sequence model that takes as input a (sub) sequence of integers and produces a probability
    distributuion on the output.
    :param data: A singe list of integers representing the  data
    :return: The result of the computation in "bits per byte". That is, how many bits does the compressed representation
    spend on each byte (=ASCII character) of the raw data.
    """

    bits, tot = 0.0, 0
    batch = []
    # Buffer, every time it fills up, we run it through the model
    # --- For the sake of speed we want to process the data in batches. For each token in the data, we make a
    #     prediction based on all the `context` tokens before it. This means that for each subsequence in the batch, we
    #     need to shift the start/end indices ahead by one token.
    #
    #     After we pass the batch through the model, we look at only the probabilities predicted for the last token.

    for current in range(data.size(0)):

        fr = max(0, current - context)
        to = current + 1

        instance = data[fr:to].to(
            torch.long)  # the subsequence of the data to add to the batch
        if instance.size(0) < context + 1:
            pad = torch.zeros(size=(context + 1 - instance.size(0), ),
                              dtype=torch.long)
            instance = torch.cat([pad, instance], dim=0)
            # -- the first tokens don't have enough tokens preceding them, so we pad them to the right size.

            assert instance.size(
                0) == context + 1  # all instances should be `context` + 1 long

        if torch.cuda.is_available():
            instance = instance.cuda()

        batch.append(instance[None, :])
        # -- We add a singleton dimension to concatenate along later.

        if len(batch) == batch_size or current == data.size(0) - 1:
            # batch is full or we are at the last instance, run it through the model

            b = len(batch)

            all = torch.cat(batch, dim=0)
            inputs = all[:, :-1]  # input
            target = all[:, -1]  # target values

            output = model(inputs)

            lnprobs = output[torch.arange(b, device=d()), -1, target]
            log2probs = lnprobs * LOG2E
            # -- The model produces natural logarithms of probabilities, but we need base-2 logarithms of the
            #    probabilities, since these give us bits.

            bits += -log2probs.sum(
            )  # Add the bits for each character (the negative log_2 probabilties) to the running total
            batch = []  # clear the buffer

    return bits / data.size(0)  # bits-per-byte
Esempio n. 28
0
def go(arg):

    global repeats
    repeats = arg.repeats

    tbdir = arg.tb_dir if arg.tb_dir is not None else os.path.join('./runs', get_slug(arg))[:250]
    tbw = SummaryWriter(log_dir=tbdir)

    dev = 'cuda' if torch.cuda.is_available() else 'cpu'

    test_mrrs = []

    train, val, test, (n2i, i2n), (r2i, i2r) = \
        embed.load(arg.name)

    # set of all triples (for filtering)
    alltriples = set()
    for s, p, o in torch.cat([train, val, test], dim=0):
        s, p, o = s.item(), p.item(), o.item()

        alltriples.add((s, p, o))

    truedicts = util.truedicts(alltriples)

    if arg.final:
        train, test = torch.cat([train, val], dim=0), test
    else:
        train, test = train, val

    subjects   = torch.tensor(list({s for s, _, _ in train}), dtype=torch.long, device=d())
    predicates = torch.tensor(list({p for _, p, _ in train}), dtype=torch.long, device=d())
    objects    = torch.tensor(list({o for _, _, o in train}), dtype=torch.long, device=d())
    ccandidates = (subjects, predicates, objects)

    print(len(i2n), 'nodes')
    print(len(i2r), 'relations')
    print(train.size(0), 'training triples')
    print(test.size(0), 'test triples')
    print(train.size(0) + test.size(0), 'total triples')

    for r in tqdm.trange(repeats) if repeats > 1 else range(repeats):

        """
        Define model
        """
        model = embed.LinkPredictor(
            triples=train, n=len(i2n), r=len(i2r), embedding=arg.emb, biases=arg.biases,
            edropout = arg.edo, rdropout=arg.rdo, decoder=arg.decoder, reciprocal=arg.reciprocal,
            init_method=arg.init_method, init_parms=arg.init_parms)

        if torch.cuda.is_available():
            prt('Using CUDA.')
            model.cuda()

        if arg.opt == 'adam':
            opt = torch.optim.Adam(model.parameters(), lr=arg.lr)
        elif arg.opt == 'adamw':
            opt = torch.optim.AdamW(model.parameters(), lr=arg.lr)
        elif arg.opt == 'adagrad':
            opt = torch.optim.Adagrad(model.parameters(), lr=arg.lr)
        elif arg.opt == 'sgd':
            opt = torch.optim.SGD(model.parameters(), lr=arg.lr, nesterov=True, momentum=arg.momentum)
        else:
            raise Exception()

        sched = torch.optim.lr_scheduler.ReduceLROnPlateau(patience=arg.patience, optimizer=opt, mode='max', factor=0.95, threshold=0.0001) \
            if arg.sched else None
        #-- defaults taken from libkge

        # nr of negatives sampled
        weight = torch.tensor([arg.nweight, 1.0], device=d()) if arg.nweight else None

        seen = 0
        for e in range(arg.epochs):

            seeni, sumloss = 0, 0.0
            tforward = tbackward = 0
            rforward = rbackward = 0
            tprep = tloss = 0
            tic()

            for fr in trange(0, train.size(0), arg.batch):
                to = min(train.size(0), fr + arg.batch)

                model.train(True)

                opt.zero_grad()

                positives = train[fr:to].to(d())

                for ctarget in [0, 1, 2]: # which part of the triple to corrupt
                    ng = arg.negative_rate[ctarget]

                    if ng > 0:

                        with torch.no_grad():
                            bs, _ = positives.size()

                            tic()
                            if arg.limit_negatives:
                                cand = ccandidates[ctarget]
                                mx = cand.size(0)
                                idx = torch.empty(bs, ng, dtype=torch.long, device=d()).random_(0, mx)
                                corruptions = cand[idx]
                            else:
                                mx = len(i2r) if ctarget == 1 else len(i2n)
                                corruptions = torch.empty(bs, ng, dtype=torch.long, device=d()).random_(0, mx)
                            tprep += toc()

                            s, p, o = positives[:, 0:1], positives[:, 1:2], positives[:, 2:3]
                            if ctarget == 0:
                                s = torch.cat([s, corruptions], dim=1)
                            if ctarget == 1:
                                p = torch.cat([p, corruptions], dim=1)
                            if ctarget == 2:
                                o = torch.cat([o, corruptions], dim=1)

                            # -- NB: two of the index vectors s, p o are now size (bs, 1) and the other is (bs, ng+1)
                            #    We will let the model broadcast these to give us a score tensor of (bs, ng+1)
                            #    In most cases we can optimize the decoder to broadcast late for better speed.

                            if arg.loss == 'bce':
                                labels = torch.cat([torch.ones(bs, 1, device=d()), torch.zeros(bs, ng, device=d())], dim=1)
                            elif arg.loss == 'ce':
                                labels = torch.zeros(bs, dtype=torch.long, device=d())
                                # -- CE loss treats the problem as a multiclass classification problem: for a positive triple,
                                #    together with its k corruptions, identify which is the true triple. This is always triple 0.
                                #    (It may seem like the model could easily cheat by always choosing triple 0, but the score
                                #    function is order equivariant, so it can't choose by ordering.)

                        recip = None if not arg.reciprocal else ('head' if ctarget == 0 else 'tail')
                        # -- We use the tail relations if the target is the relation (usually p-corruption is not used)

                        tic()
                        out = model(s, p, o, recip=recip)
                        tforward += toc()

                        assert out.size() == (bs, ng + 1), f'{out.size()=} {(bs, ng + 1)=}'

                        tic()
                        if arg.loss == 'bce':
                            loss = F.binary_cross_entropy_with_logits(out, labels, weight=weight, reduction=arg.lred)
                        elif arg.loss == 'ce':
                            loss = F.cross_entropy(out, labels, reduction=arg.lred)

                        assert not torch.isnan(loss), 'Loss has become NaN'

                        sumloss += float(loss.item())
                        seen += bs; seeni += bs
                        tloss += toc()

                        tic()
                        loss.backward()
                        tbackward += toc()
                        # No step yet, we accumulate the gradients over all corruptions.
                        # -- this causes problems with modules like batchnorm, so be careful when porting.

                tic()
                regloss = None
                if arg.reg_eweight is not None:
                    regloss = model.penalty(which='entities', p=arg.reg_exp, rweight=arg.reg_eweight)

                if arg.reg_rweight is not None:
                    regloss = model.penalty(which='relations', p=arg.reg_exp, rweight=arg.reg_rweight)
                rforward += toc()

                tic()
                if regloss is not None:
                    sumloss += float(regloss.item())
                    regloss.backward()
                rbackward += toc()

                opt.step()

                tbw.add_scalar('biases/train_loss', float(loss.item()), seen)

            if e == 0:
                print(f'\n pred: forward {tforward:.4}, backward {tbackward:.4}')
                print (f'   reg: forward {rforward:.4}, backward {rbackward:.4}')
                print (f'           prep {tprep:.4}, loss {tloss:.4}')
                print (f' total: {toc():.4}')
                # -- NB: these numbers will not be accurate for GPU runs unless CUDA_LAUNCH_BLOCKING is set to 1

            # Evaluate
            if ((e+1) % arg.eval_int == 0) or e == arg.epochs - 1:

                with torch.no_grad():

                    model.train(False)

                    if arg.eval_size is None:
                        testsub = test
                    else:
                        testsub = test[random.sample(range(test.size(0)), k=arg.eval_size)]

                    mrr, hits, ranks = util.eval(
                        model=model, valset=testsub, truedicts=truedicts, n=len(i2n),
                        batch_size=arg.test_batch, verbose=True)

                    if arg.check_simple: # double-check using a separate, slower implementation
                        mrrs, hitss, rankss = util.eval_simple(
                            model=model, valset=testsub, alltriples=alltriples, n=len(i2n), verbose=True)

                        assert ranks == rankss
                        assert mrr == mrrs

                    print(f'epoch {e}: MRR {mrr:.4}\t hits@1 {hits[0]:.4}\t  hits@3 {hits[1]:.4}\t  hits@10 {hits[2]:.4}')

                    tbw.add_scalar('biases/mrr', mrr, e)
                    tbw.add_scalar('biases/h@1', hits[0], e)
                    tbw.add_scalar('biases/h@3', hits[1], e)
                    tbw.add_scalar('biases/h@10', hits[2], e)

                    if sched is not None:
                        sched.step(mrr) # reduce lr if mrr stalls

        test_mrrs.append(mrr)

    print('training finished.')

    temrrs = torch.tensor(test_mrrs)
    print(f'mean test MRR    {temrrs.mean():.3} ({temrrs.std():.3})  \t{test_mrrs}')
Esempio n. 29
0
BASE_SERVER_CONFIG = d({
            "id":"server_config",
            "display": "server_config",
            "preconfig": False,
            "presets":[],
            "params": [{
                "id": "external_access",
                "label": "Allow External Access",
                "type": "bool",
                "default": True,
                "help":"On: Other computers on your network can access PixelWeb. Off: LocalHost access only."
            },{
                "id": "port",
                "label": "Server Port",
                "type": "int",
                "default": 8080,
                "help":"Port to listen on."
            },{
                "id": "load_defaults",
                "label": "Load Last Config on Start",
                "type": "bool",
                "default": False,
                "help":"Load last driver/controller configuration on application start."
            },
            {
                "id": "show_debug",
                "label": "Show Debug in Console",
                "type": "bool",
                "default": False,
                "help":"Show BiblioPixel debug in server console (not in main UI)."
            },{
                "id": "mod_dirs",
                "label": "Module Directories",
                "type": "str_multi",
                "default": [],
                "help":"Directories from which to load modules (animations, drivers, controllers, pre-configs).",
                "replace": {"\\":"/"}
            },
            {
                "id": "off_anim_time",
                "label": "All Off Timeout",
                "type": "int",
                "default": 10,
                "min": 0,
                "max": 3600,
                "help":"Keep display off when not running an animation by actively turning all pixels off every X seconds. Set to 0 to disable."
            },]
        });
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
Project Euler 23: Find the sum of all the positive integers which cannot be written as the sum of two abundant numbers.
A perfect number is a number for which the sum of its proper divisors is exactly equal to the number. For example, the sum of the proper divisors of 28 would be 1 + 2 + 4 + 7 + 14 = 28, which means that 28 is a perfect number.

A number whose proper divisors are less than the number is called deficient and a number whose proper divisors exceed the number is called abundant.

As 12 is the smallest abundant number, 1 + 2 + 3 + 4 + 6 = 16, the smallest number that can be written as the sum of two abundant numbers is 24. By mathematical analysis, it can be shown that all integers greater than 28123 can be written as the sum of two abundant numbers. However, this upper limit cannot be reduced any further by analysis even though it is known that the greatest number that cannot be expressed as the sum of two abundant numbers is less than this limit.

Find the sum of all the positive integers which cannot be written as the sum of two abundant numbers.
"""

import util


limit = 20162
sum = 0
# it's a set, after all. sets are faster than lists for our needs.
abn = set()
for n in range(1, limit):
    if util.d(n) > n:
        abn.add(n)
    # if the difference of the number we're examining and every number in the set
    # is in the set, then the number is the sum of two abundant numbers.
    # otherwise, we must add it to our sum in question.
    if not any( (n-a in abn) for a in abn ):
        sum += n
Esempio n. 31
0
def go(arg):

    if arg.seed < 0:
        seed = random.randint(0, 1000000)
        print('random seed: ', seed)
    else:
        torch.manual_seed(arg.seed)

    tbw = SummaryWriter(log_dir=arg.tb_dir)  # Tensorboard logging

    # load the data (validation unless arg.final is true, then test)
    arg.data = here('data/enwik8.gz') if arg.data is None else arg.data

    data_train, data_val, data_test = enwik8(arg.data)
    data_train, data_test = (torch.cat([data_train, data_val], dim=0), data_test) \
                            if arg.final else (data_train, data_val)

    # create the model
    model = GTransformer(emb=arg.embedding_size,
                         heads=arg.num_heads,
                         depth=arg.depth,
                         seq_length=arg.context,
                         num_tokens=NUM_TOKENS)
    if torch.cuda.is_available():
        model.cuda()

    opt = torch.optim.Adam(lr=arg.lr, params=model.parameters())

    # training loop
    # - note: we don't loop over the data, instead we sample a batch of random subsequences each time.
    for i in tqdm.trange(arg.num_batches):

        # learning rate warmup
        # - we linearly increase the learning rate from 10e-10 to arg.lr over the first
        #   few thousand batches
        if arg.lr_warmup > 0 and i < arg.lr_warmup:
            lr = max((arg.lr / arg.lr_warmup) * i, 1e-10)
            opt.lr = lr

        opt.zero_grad()

        # sample a batch of random subsequences
        starts = torch.randint(size=(arg.batch_size, ),
                               low=0,
                               high=data_train.size(0) - arg.context - 1)
        seqs_source = [
            data_train[start:start + arg.context] for start in starts
        ]
        seqs_target = [
            data_train[start + 1:start + arg.context + 1] for start in starts
        ]
        source = torch.cat([s[None, :] for s in seqs_source],
                           dim=0).to(torch.long)
        target = torch.cat([s[None, :] for s in seqs_target],
                           dim=0).to(torch.long)
        # - target is the same sequence as source, except one character ahead

        if torch.cuda.is_available():
            source, target = source.cuda(), target.cuda()
        source, target = Variable(source), Variable(target)

        output = model(source)

        loss = F.nll_loss(output.transpose(2, 1), target, reduction='mean')
        tbw.add_scalar('transformer/train-loss',
                       float(loss.item()) * LOG2E, i * arg.batch_size)

        loss.backward()

        # clip gradients
        # - If the total gradient vector has a length > 1, we clip it back down to 1.
        if arg.gradient_clipping > 0.0:
            nn.utils.clip_grad_norm_(model.parameters(), arg.gradient_clipping)

        opt.step()

        # - validate every {arg.test_every} steps. First we compute the
        #   compression on the validation (or a subset)
        #   then we generate some random text to monitor progress
        if i != 0 and (i % arg.test_every == 0 or i == arg.num_batches - 1):

            upto = data_test.size(
                0) if i == arg.num_batches - 1 else arg.test_subset
            data_sub = data_test[:upto]

            with torch.no_grad():
                bits, tot = 0.0, 0
                batch = [
                ]  # buffer, every time it fills up, we run it through the model

                for current in range(data_sub.size(0)):

                    fr = max(0, current - arg.context)
                    to = current + 1

                    context = data_sub[fr:to].to(torch.long)
                    if context.size(0) < arg.context + 1:
                        pad = torch.zeros(size=(arg.context + 1 -
                                                context.size(0), ),
                                          dtype=torch.long)
                        context = torch.cat([pad, context], dim=0)

                        assert context.size(0) == arg.context + 1

                    if torch.cuda.is_available():
                        context = context.cuda()

                    batch.append(context[None, :])

                    if len(
                            batch
                    ) == arg.test_batchsize or current == data_sub.size(0) - 1:

                        # batch is full, run it through the model
                        b = len(batch)

                        all = torch.cat(batch, dim=0)
                        source = all[:, :-1]  # input
                        target = all[:, -1]  # target values

                        output = model(source)

                        lnprobs = output[torch.arange(b, device=d()), -1,
                                         target]
                        log2probs = lnprobs * LOG2E  # convert from nats to bits

                        bits += -log2probs.sum()
                        batch = []  # empty buffer

                bits_per_byte = bits / data_sub.size(0)

                # print validation performance. 1 bit per byte is (currently) state of the art.
                print(f'epoch{i}: {bits_per_byte:.4} bits per byte')
                tbw.add_scalar(f'transformer/eval-loss', bits_per_byte,
                               i * arg.batch_size)

                # generate some random text
                GENSIZE = 600
                TEMP = 0.5
                seedfr = random.randint(0, data_test.size(0) - arg.context)
                input = data_test[seedfr:seedfr + arg.context].to(torch.long)

                if torch.cuda.is_available():
                    input = input.cuda()

                input = Variable(input)

                print('[', end='', flush=True)
                for c in input:
                    print(str(chr(c)), end='', flush=True)
                print(']', end='', flush=True)

                for _ in range(GENSIZE):
                    output = model(input[None, :])
                    c = sample(output[0, -1, :], TEMP)
                    print(str(chr(max(32, c))), end='', flush=True)

                    input = torch.cat([input[1:], c[None]], dim=0)

                print()
Esempio n. 32
0
def go(arg):

    try:
        arg.bins = int(arg.bins)
    except ValueError:
        pass

    util.makedirs('./bias/')

    if not os.path.exists('./bias/cached.npz'):

        if arg.seed < 0:
            seed = random.randint(0, 1000000)
            print('random seed: ', seed)
        else:
            torch.manual_seed(arg.seed)

        tbw = SummaryWriter(log_dir=arg.tb_dir)
        tfms = transforms.Compose([transforms.ToTensor()])

        if (arg.task == 'mnist'):

            shape = (1, 28, 28)
            num_classes = 10

            data = arg.data + os.sep + arg.task

            if arg.final:
                train = torchvision.datasets.MNIST(root=data, train=True, download=True, transform=tfms)
                trainloader = torch.utils.data.DataLoader(train, batch_size=arg.batch_size, shuffle=True, num_workers=0)

                test = torchvision.datasets.MNIST(root=data, train=False, download=True, transform=ToTensor())
                testloader = torch.utils.data.DataLoader(test, batch_size=arg.batch_size, shuffle=False, num_workers=0)

            else:
                NUM_TRAIN = 45000
                NUM_VAL = 5000
                total = NUM_TRAIN + NUM_VAL

                train = torchvision.datasets.MNIST(root=data, train=True, download=True, transform=tfms)

                trainloader = DataLoader(train, batch_size=arg.batch, sampler=util.ChunkSampler(0, NUM_TRAIN, total))
                testloader = DataLoader(train, batch_size=arg.batch, sampler=util.ChunkSampler(NUM_TRAIN, NUM_VAL, total))

        elif (arg.task == 'cifar10'):

            shape = (3, 32, 32)
            num_classes = 10

            data = arg.data + os.sep + arg.task

            if arg.final:
                train = torchvision.datasets.CIFAR10(root=data, train=True, download=True, transform=tfms)
                trainloader = torch.utils.data.DataLoader(train, batch_size=arg.batch, shuffle=True, num_workers=2)
                test = torchvision.datasets.CIFAR10(root=data, train=False, download=True, transform=ToTensor())
                testloader = torch.utils.data.DataLoader(test, batch_size=arg.batch, shuffle=False, num_workers=2)

            else:
                NUM_TRAIN = 45000
                NUM_VAL = 5000
                total = NUM_TRAIN + NUM_VAL

                train = torchvision.datasets.CIFAR10(root=data, train=True, download=True, transform=tfms)

                trainloader = DataLoader(train, batch_size=arg.batch, sampler=util.ChunkSampler(0, NUM_TRAIN, total))
                testloader = DataLoader(train, batch_size=arg.batch,
                                        sampler=util.ChunkSampler(NUM_TRAIN, NUM_VAL, total))

        elif arg.task == 'ffhq':

            transform = ToTensor()
            shape = (3, 128, 128)

            trainset = torchvision.datasets.ImageFolder(root=arg.data+os.sep+'train',
                                                        transform=transform)
            trainloader = torch.utils.data.DataLoader(trainset, batch_size=arg.batch,
                                                      shuffle=True, num_workers=2)

            testset = torchvision.datasets.ImageFolder(root=arg.data+os.sep+'valid',
                                                       transform=transform)
            testloader = torch.utils.data.DataLoader(testset, batch_size=arg.batch,
                                                     shuffle=False, num_workers=2)

        else:
            raise Exception('Task {} not recognized'.format(arg.task))

        encoder = Encoder(shape, latent_size=arg.latent_size, depth=arg.depth)
        decoder = Decoder(shape, latent_size=arg.latent_size, depth=arg.depth)

        if arg.cuda:
            encoder.cuda()
            decoder.cuda()

        opt = torch.optim.Adam(params=list(encoder.parameters()) + list(decoder.parameters()), lr=arg.lr)

        nparms = num_params([encoder])
        print(f'{nparms} parameters in encoder.')

        seen = 0
        l = arg.latent_size
        ti = random.sample(range(nparms), arg.num_params) # random indices of parameters for which to test the gradient
        k = arg.k

        # Train for a fixed nr of instances (with the true gradient)
        for e in range(arg.epochs):
            print('epoch', e)

            for i, (inputs, _) in enumerate(trainloader):

                b, c, h, w = inputs.size()

                if arg.cuda:
                    inputs = inputs.cuda()

                # compute actual gradient
                opt.zero_grad()

                latent = encoder(inputs)
                latent = F.softmax(latent, dim=1)

                dinp = torch.eye(l, device=d(arg.cuda))[None, :, :].expand(b, l, l).reshape(b*l, l)
                dout = decoder(dinp)

                assert dout.size() == (b*l, c, h, w)

                target = inputs.detach()[:, None, :, :, :].expand(b, l, c, h, w).reshape(b*l, c, h, w)

                loss = F.binary_cross_entropy(dout, target, reduction='none')
                loss = loss.sum(dim=1).sum(dim=1).sum(dim=1).view(b, l)

                loss = (loss * latent).sum(dim=1).mean()

                loss.backward()

                true_gradient = gradient([encoder, decoder])
                true_gradient = true_gradient[ti]

                opt.step()

        inputs, _ = next(iter(trainloader))
        if arg.cuda:
            inputs = inputs.cuda()

        b, c, h, w = inputs.size()

        # compute true gradient
        opt.zero_grad()

        latent = encoder(inputs)
        latent = F.softmax(latent, dim=1)

        dinp = torch.eye(l, device=d(arg.cuda))[None, :, :].expand(b, l, l).reshape(b*l, l)
        dout = decoder(dinp)

        assert dout.size() == (b*l, c, h, w)

        target = inputs.detach()[:, None, :, :, :].expand(b, l, c, h, w).reshape(b*l, c, h, w)

        loss = F.binary_cross_entropy(dout, target, reduction='none')
        loss = loss.sum(dim=1).sum(dim=1).sum(dim=1).view(b, l)

        loss = (loss * latent).sum(dim=1).mean()

        loss.backward()

        true_gradient = gradient([encoder])
        true_gradient = true_gradient[ti]

        # - Estimate the bias for the uninformed sampler

        uste = torch.zeros((arg.samples, len(ti),), device=d(arg.cuda))

        # Unbiased, uninformed STE
        for s in trange(arg.samples):
            opt.zero_grad()

            ks = [random.sample(range(arg.latent_size), k) for _ in range(b)]
            ks = torch.tensor(ks, device=d(arg.cuda))

            latent = encoder(inputs)
            latent = torch.gather(latent, dim=1, index=ks); assert latent.size() == (b, k)
            latent = F.softmax(latent, dim=1)

            dinp = torch.zeros(size=(b*k, l), device=d(arg.cuda))
            dinp.scatter_(dim=1, index=ks.view(b*k, 1), value=1)
            dout = decoder(dinp)

            assert dout.size() == (b * k, c, h, w)

            target = inputs.detach()[:, None, :, :, :].expand(b, k, c, h, w).reshape(b * k, c, h, w)

            loss = F.binary_cross_entropy(dout, target, reduction='none')
            loss = loss.sum(dim=1).sum(dim=1).sum(dim=1).view(b, k)

            loss = (loss * latent).sum(dim=1).mean()

            loss.backward()

            samp_gradient = gradient([encoder])
            uste[s, :] = samp_gradient[ti]

            del loss

        iste = torch.zeros((arg.samples, len(ti),), device=d(arg.cuda))

        # Unbiased, informed STE
        # This behaves like the USTE, but ensures that the argmax is always included in the sample
        for s in trange(arg.samples):
            opt.zero_grad()

            latent = encoder(inputs)

            ks = [random.sample(range(arg.latent_size-1), k-1) for _ in range(b)]
            ks = torch.tensor(ks, device=d(arg.cuda))
            am = latent.argmax(dim=1, keepdim=True)
            ks[ks > am] += 1

            ks = torch.cat([am, ks], dim=1)

            latent = torch.gather(latent, dim=1, index=ks); assert latent.size() == (b, k)
            latent = F.softmax(latent, dim=1)

            dinp = torch.zeros(size=(b * k, l), device=d())
            dinp.scatter_(dim=1, index=ks.view(b * k, 1), value=1)
            dout = decoder(dinp)

            assert dout.size() == (b * k, c, h, w)

            target = inputs.detach()[:, None, :, :, :].expand(b, k, c, h, w).reshape(b * k, c, h, w)

            loss = F.binary_cross_entropy(dout, target, reduction='none')
            loss = loss.sum(dim=1).sum(dim=1).sum(dim=1).view(b, k)

            loss = (loss * latent).sum(dim=1).mean()

            loss.backward()

            samp_gradient = gradient([encoder])
            iste[s, :] = samp_gradient[ti]

            del loss

        # Biased (?) gumbel STE
        # STE with gumbel noise

        gste = torch.zeros((arg.samples, len(ti),), device=d(arg.cuda))

        for s in trange(arg.samples):
            for _ in range(k):
                opt.zero_grad()

                latent = encoder(inputs)

                gumbelize(latent, temperature=arg.gumbel)
                latent = F.softmax(latent, dim=1)

                ks = latent.argmax(dim=1, keepdim=True)

                dinp = torch.zeros(size=(b, l), device=d())
                dinp.scatter_(dim=1, index=ks, value=1)

                dinp = (dinp - latent).detach() + latent # straight-through trick
                dout = decoder(dinp)

                assert dout.size() == (b, c, h, w)

                target = inputs.detach()

                loss = F.binary_cross_entropy(dout, target, reduction='none')
                loss = loss.sum(dim=1).sum(dim=1).sum(dim=1).view(b)
                loss = loss.mean()

                loss.backward()

                samp_gradient = gradient([encoder])
                gste[s, :] += samp_gradient[ti]

                del loss

            gste[s, :] /= k

        # Classical STE
        # cste = torch.zeros((arg.samples, len(ti),), device=d(arg.cuda))
        #
        # for s in trange(arg.samples):
        #     opt.zero_grad()
        #
        #     latent = encoder(inputs)
        #
        #     # gumbelize(latent, temperature=arg.gumbel)
        #     dist = ds.Categorical(logits=latent)
        #     ks = dist.sample()[:, None]
        #
        #     dinp = torch.zeros(size=(b, l), device=d())
        #     dinp.scatter_(dim=1, index=ks, value=1)
        #
        #     dinp = (dinp - latent).detach() + latent # straight-through trick
        #     dout = decoder(dinp)
        #
        #     assert dout.size() == (b, c, h, w)
        #
        #     target = inputs.detach()
        #
        #     loss = F.binary_cross_entropy(dout, target, reduction='none')
        #     loss = loss.sum(dim=1).sum(dim=1).sum(dim=1).view(b)
        #     loss = loss.mean()
        #
        #     loss.backward()
        #
        #     samp_gradient = gradient([encoder])
        #     cste[s, :] = samp_gradient[ti]
        #
        #     del loss

        uste = uste.cpu().numpy()
        iste = iste.cpu().numpy()
        gste = gste.cpu().numpy()
        tgrd = true_gradient.cpu().numpy()

        np.savez_compressed('./bias/cached.npz', uste=uste, iste=iste, gste=gste, tgrd=tgrd)

    else:
        res = np.load('./bias/cached.npz')
        uste, iste, gste, tgrd = res['uste'], res['iste'], res['gste'], res['tgrd']

    ind = tgrd != 0.0
    print(tgrd.shape, ind)

    print(f'{ind.sum()} derivatives out of {ind.shape} not equal to zero.')

    if not arg.skip:
        for nth, i in enumerate( np.arange(ind.shape[0])[ind][:5] ):

            plt.gcf().clear()

            unump = uste[:, i]
            inump = iste[:, i]
            gnump = gste[:, i]
            # cnump = cste[:, i].cpu().numpy()

            ulab = f'uninformed, var={unump.var():.4}'
            ilab = f'informed, var={inump.var():.4}'
            glab = f'Gumbel STE (t={arg.gumbel}) var={gnump.var():.4}'
            # clab = f'Classical STE var={cnump.var():.4}'

            plt.hist([unump, inump, gnump], color=['r', 'g', 'b'], label=[ulab, ilab, glab], bins=arg.bins)

            plt.axvline(x=tgrd[i], color='k', label='true gradient')
            plt.axvline(x=unump.mean(), color='r', ls='--')
            plt.axvline(x=inump.mean(), color='g', ls='-.')
            plt.axvline(x=gnump.mean(), color='b', ls=':')
            # plt.axvline(x=cnump.mean(), color='c')

            plt.title(f'estimates for parameter ... ({uste.shape[0]} samples)')

            plt.legend()
            util.basic()

            plt.savefig(f'./bias/histogram.{nth}.pdf')


    plt.gcf().clear()

    unump = uste[:, ind].mean(axis=0)
    inump = iste[:, ind].mean(axis=0)
    gnump = gste[:, ind].mean(axis=0)

    tnump = tgrd[ind]

    unump = np.abs(unump - tnump)
    inump = np.abs(inump - tnump)
    gnump = np.abs(gnump - tnump)

    ulab = f'uninformed, var={unump.var():.4}'
    ilab = f'informed, var={inump.var():.4}'
    glab = f'gumbel STE (t={arg.gumbel}) var={gnump.var():.4}'
    # clab = f'Classical STE var={cnump.var():.4}'

    plt.hist([unump, inump, gnump], color=['r', 'g', 'b'], label=[ulab, ilab, glab], bins=arg.bins)

    plt.axvline(x=unump.mean(), color='r', ls='--')
    plt.axvline(x=inump.mean(), color='g', ls='-.')
    plt.axvline(x=gnump.mean(), color='b', ls=':')
    # plt.axvline(x=cnump.mean(), color='c')

    plt.title(f'Absolute error between true gradient and estimate \n over {ind.sum()} parameters with nonzero gradient.')

    plt.legend()
    util.basic()

    if arg.range is not None:
        plt.xlim(*arg.range)

    plt.savefig(f'./bias/histogram.all.pdf')
Esempio n. 33
0
def go(arg):

    if arg.seed < 0:
        seed = random.randint(0, 1000000)
        print('random seed: ', seed)
    else:
        torch.manual_seed(arg.seed)

    tbw = SummaryWriter(log_dir=arg.tb_dir)  # Tensorboard logging

    arg.data = here('data/enwik8.gz') if arg.data is None else arg.data

    str_train, str_val, str_test = load_text(arg.data)
    str_train, str_test = (str_train + str_val, str_test) \
                            if arg.final else (str_train, str_val)

    # create the model
    model = GPT2Wrapper(iblocks=arg.iblocks)

    if torch.cuda.is_available():
        model.to('cuda')
        model.model.mod[0].to('cuda')

    # tokenize the data
    data_train, data_val, data_test = \
        torch.tensor(model.tokenizer.encode(str_train)), \
        torch.tensor(model.tokenizer.encode(str_val)), \
        torch.tensor(model.tokenizer.encode(str_test))

    opt = torch.optim.Adam(lr=arg.lr, params=model.parameters())
    # sch = torch.optim.lr_scheduler.LambdaLR(opt, lambda i: min(i / (arg.lr_warmup / arg.batch_size), 1.0))
    # -- linear learning rate warmup

    # training loop
    # -- note: we don't loop over the data, instead we sample a batch of random subsequences each time.
    for i in tqdm.trange(arg.num_batches):

        opt.zero_grad()

        # sample a batch of random subsequences
        starts = torch.randint(size=(arg.batch_size, ),
                               low=0,
                               high=data_train.size(0) - model.ctx - 1)
        seqs_source = [data_train[start:start + model.ctx] for start in starts]
        seqs_target = [
            data_train[start + 1:start + model.ctx + 1] for start in starts
        ]

        source = torch.cat([s[None, :] for s in seqs_source],
                           dim=0).to(torch.long)
        target = torch.cat([s[None, :] for s in seqs_target],
                           dim=0).to(torch.long)
        # -- target is the same sequence as source, except one character ahead

        if torch.cuda.is_available():
            source, target = source.to('cuda'), target.to('cuda')

        output = model(source)

        loss = F.cross_entropy(output.transpose(2, 1),
                               target,
                               reduction='mean')
        tbw.add_scalar('podcasts/train-loss',
                       float(loss.item()) * LOG2E, i * arg.batch_size)

        loss.backward()

        # clip gradients
        # - If the total gradient vector has a length > 1, we clip it back down to 1.
        if arg.gradient_clipping > 0.0:
            nn.utils.clip_grad_norm_(model.parameters(), arg.gradient_clipping)

        opt.step()
        # sch.step()

        model.clear()

        # - validate every {arg.test_every} steps. First we compute the
        #   compression on the validation (or a subset)
        #   then we generate some random text to monitor progress
        if i != 0 and (i % arg.print_every == 0 or i == arg.num_batches - 1):

            with torch.no_grad():

                # generate and print some random text
                seedfr = random.randint(
                    0,
                    data_test.size(0) - arg.print_seed_size)
                input = data_test[seedfr:seedfr + arg.print_seed_size].to(
                    torch.long)

                if torch.cuda.is_available():
                    input = input.cuda()

                # print the seed
                strinput = model.tokenizer.decode(input)
                print(f'[{strinput}]', end='')

                outseq = []
                for _ in range(arg.print_size):
                    output = model(input[None, :])
                    c = sample(output[0, -1, :], arg.sampling_temp)
                    outseq.append(c[None])

                    input = torch.cat([input[1:], c[None]], dim=0)

                outseq = torch.cat(outseq, dim=0)
                outseq = model.tokenizer.decode(outseq)

                print(outseq)

        # val
        if i != 0 and (i % arg.test_every == 0 or i == arg.num_batches - 1):

            with torch.no_grad():

                upto = data_test.size(
                    0) if i == arg.num_batches - 1 else arg.test_subset
                data_sub = data_test[:upto]

                bits, tot = 0.0, 0
                batch = [
                ]  # buffer, every time it fills up, we run it through the model

                for current in range(data_sub.size(0)):

                    fr = max(0, current - model.ctx)
                    to = current + 1

                    context = data_sub[fr:to].to(torch.long)
                    if context.size(0) < model.ctx + 1:
                        pad = torch.zeros(size=(model.ctx + 1 -
                                                context.size(0), ),
                                          dtype=torch.long)
                        context = torch.cat([pad, context], dim=0)

                        assert context.size(0) == model.ctx + 1

                    if torch.cuda.is_available():
                        context = context.cuda()

                    batch.append(context[None, :])

                    if len(
                            batch
                    ) == arg.test_batchsize or current == data_sub.size(0) - 1:

                        # batch is full, run it through the model
                        b = len(batch)

                        all = torch.cat(batch, dim=0)
                        source = all[:, :-1]  # input
                        target = all[:, -1]  # target values

                        output = model(source)

                        lnprobs = output[torch.arange(b, device=d()), -1,
                                         target]
                        log2probs = lnprobs * LOG2E  # convert from nats to bits

                        bits += -log2probs.sum()
                        batch = []  # empty buffer

                bits_per_byte = bits / data_sub.size(0)

                # print validation performance. 0.92 bit per byte is (currently) state of the art.
                print(f'epoch{i}: {bits_per_byte:.4} bits per byte')
                tbw.add_scalar(f'podcasts/eval-loss', bits_per_byte,
                               i * arg.batch_size)
Esempio n. 34
0
    def forward(self, x):

        assert x.size()[1:] == self.in_size

        b, c, h, w = x.size()
        k = self.k
        s = (h, w)

        means, sigmas, mvalues = self.hyper(x)

        # This is a bit confusing, but k is the chunk dimension here. This is because the sparse operation
        # only selects in the k separate input pixels, it doens not sum/merge them.
        # In other words, we add a separate tuple dimension.
        means = means[:, :, :, :, None, :]
        sigmas = sigmas[:, :, :, :, None, :]
        mvalues = mvalues[:, :, :, :, None]

        if self.smp:
            # sample integer indices and values
            indices = sparse.ngenerate(means,
                                       self.gadditional,
                                       self.radditional,
                                       rng=s,
                                       relative_range=self.region,
                                       cuda=x.is_cuda)

            vs = (4 + self.radditional + self.gadditional)
            assert indices.size() == (
                b, h, w, k, vs, 2), f'{indices.size()}, {(b, h, w, k, vs, 2)}'

            indices = indices.view(b, h, w, k, vs, 2)
            indfl = indices.float()

            # Mask for duplicate indices
            dups = util.nduplicates(indices).to(torch.bool)

            # compute (unnormalized) densities under the given MVNs (proportions)
            props = sparse.densities(indfl, means,
                                     sigmas).clone()  # (b, h, w, k, vs, 1)
            assert props.size() == (b, h, w, k, vs, 1)

            props[dups, :] = 0
            props = props / props.sum(
                dim=4, keepdim=True
            )  # normalize over all points of a given index tuple

            # weight the values by the proportions
            weights = mvalues[:, :, :, :, None, :].expand_as(props)
            # - add a dim for the MVNs

            weights = props * weights
            weights = weights.sum(dim=5)  # - sum out the MVNs

            assert indices.size() == (b, h, w, k, vs, 2)
            assert weights.size() == (b, h, w, k, vs)

        else:
            vs = 1
            indices = means.floor().to(torch.long).detach()

        l = h * w * k * vs
        indices = indices.view(b * l, 2)

        br = torch.arange(b, device=d(x), dtype=torch.long)[:, None].expand(
            b, l).contiguous().view(-1)
        features = x[br, :, indices[:, 0], indices[:, 1]]
        assert features.size() == (b * l, c)

        if self.smp:
            features = features.view(b, h, w, k, vs, c)
            features = features * weights[:, :, :, :, :, None]
            features = features.sum(dim=4)
        else:
            features = features.view(b, h, w, k, c)

        # features now contains the selected input pixels (or weighted sum thereover): k inputs per output pixel
        assert features.size() == (
            b, h, w, k,
            c), f'Was {features.size()}, expected {(b, h, w, k, c)}.'

        features = features.view(b, h, w, k * c)

        return self.unify(features).permute(0, 3, 1, 2)  # (b, c_out, h, w)
Esempio n. 35
0
from util import d

ans = 0

for x in xrange(1, 10000):
    if d(d(x)) == x and d(x) != x:
        ans += x

print ans
Esempio n. 36
0
    def forward(self, triples, depth=2):

        assert triples.size(-1) == 3

        n, r = self.n, self.r

        dims = triples.size()[:-1]
        triples = triples.reshape(-1, 3)

        b, _ = triples.size()
        batch = Batch(triples=triples,
                      graph=self.graph,
                      inv_graph=self.inv_graph)

        # Sample
        if depth > 0:
            batch = self.sample0(batch)
        if depth > 1:
            batch = self.sample1(batch)

        # extract batch node embeddings

        bind = batch.indices()
        nodes = self.embeddings[flatten(bind), :]

        if self.dropout is not None:
            nodes = self.dropout(nodes)

        # Message passing
        if depth > 0:

            # compute the edge weights
            dtriples = torch.tensor(list(batch.edges()),
                                    device=d(),
                                    dtype=torch.long)
            btriples = torch.tensor(batch.batch_triples(),
                                    device=d(),
                                    dtype=torch.long)

            # adjacency matrix indices
            # -- repeans R times, vertically
            bn = batch.num_nodes()

            fr = btriples[:, 0] + bn * btriples[:, 1]
            to = btriples[:, 2]

            indices = torch.cat([fr[:, None], to[:, None]], dim=1)

            si, pi, oi = dtriples[:, 0], dtriples[:, 1], dtriples[:, 2]
            semb, pemb, oemb = self.embeddings[si, :], self.relations[
                pi, :], self.embeddings[oi, :]

            # compute the score (bilinear dot product)
            semb = self.tokeys(semb)
            oemb = self.toqueries(oemb)

            dots = (semb * pemb * oemb).sum(dim=1)

            values = torch.ones((indices.size(0), ),
                                device=d(),
                                dtype=torch.float)
            # values = (dots).abs()
            values = values / util.sum_sparse(indices, values, (r * bn, bn))

            # values *= ACTIVATION(dots)  # F.softplus(dots)

            nodes = nodes + self.rgcn0(nodes, indices, values)

            if depth > 1:
                nodes = nodes + self.rgcn1(nodes, indices, values)

        _, tind = batch.target_indices(bind)
        # -- indices of the target nodes in the list `bind`

        subjects, objects = [t[0] for t in tind], [t[1] for t in tind]

        assert len(subjects) == len(objects) == triples.size(0)
        # print(nodes.size())

        # extract embeddings for target nodes
        try:
            s = nodes[subjects, :]
            o = nodes[objects, :]
            p = self.relations[triples[:, 1], :]
        except Exception as e:
            print(triples.size())
            print(batch.size())
            print(nodes.size())
            print(len(batch.indices()))
            print(batch.entities)

            raise (e)

        scores = self.decoder(s, p, o)

        assert scores.size() == (util.prod(dims), )

        return scores.view(*dims)
Esempio n. 37
0
    def forward(self, batch: Batch):
        """

        :param batch:
        :return:
        """

        # select some candidates.
        if self.multi:
            raise Exception()
            # with Pool(self.cpus_available) as pool:
            #     cflats = pool.starmap(self.inner, [(i, batch) for i in range(batch.size())])

        else:
            cflats = []
            for bi in range(batch.size()):

                if self.csample is not None:
                    # Sample a list of candidates using the pre-computed scores
                    cflat = wrs_gen(
                        batch.gen_inc_edges(bi),
                        weight_function=lambda edge: self.globals[edge],
                        k=self.csample)
                else:
                    cflat = list(batch.gen_inc_edges(bi))

                cflats.append(cflat)

        # pad the candidates with zero triples
        lens = [len(x) for x in cflats]
        mx = max(lens)
        cflats = [x + [(0, 0, 0)] * (mx - ln) for x, ln in zip(cflats, lens)]

        with torch.no_grad():
            #- compute the attention weights

            all = torch.tensor(cflats, device=d(), dtype=torch.long)

            assert all.size() == (batch.size(), mx, 3)

            semb, pemb, oemb = self.nodes[all[:, :, 0]], self.relations[[
                all[:, :, 1]
            ]], self.nodes[all[:, :, 2]]

            # compute the score (bilinear dot product)
            semb = self.tokeys(semb)
            oemb = self.toqueries(oemb)

            dots = (semb * pemb * oemb).sum(dim=2)  # + sb + pb + ob + gb
            # dots = ACTIVATION(dots)

            u = torch.rand(*dots.size(), device=d(dots))
            weights = u.log() / dots

            weights, indices = torch.sort(weights, dim=1, descending=True)

            indices = indices.tolist()

        # rm any indices that are too high
        indices = [[i for i in ind if i < ln]
                   for ind, ln in zip(indices, lens)]

        # pick the first k
        indices = [ind[:self.ksample] for ind in indices]

        sampled = [[cflats[i][j] for j in ind]
                   for i, ind in enumerate(indices)]

        for bi, samp in enumerate(sampled):
            batch.add_edges(samp, bi)

        return batch
Esempio n. 38
0
    def forward(self, nodes=None):

        n, r = self.n, self.r
        rn = r * n

        ## Perform message passing
        assert (nodes is None) == (self.insize is None)

        h0 = n if self.insize is None else self.insize
        h1 = self.outsize

        if self.decomp is None:
            weights = self.weights

        elif self.decomp == 'basis':
            weights = torch.einsum('rb, bij -> rij', self.comps, self.bases)

        elif self.decomp == 'block':
            weights = util.block_diag(self.blocks)
            # TODO: multiply in block form (more efficient, but implementation differs per layer type)

        assert weights.size() == (r, h0, h1)

        if self.edo is not None and self.training:
            # apply edge dropout

            p, pid = self.edo

            nt = self.indices.size(0) - n

            mask = torch.bernoulli(
                torch.empty(size=(nt, ),
                            dtype=torch.float,
                            device=d(self.bias)).fill_(1.0 - p))
            maskid = torch.bernoulli(
                torch.empty(size=(n, ), dtype=torch.float,
                            device=d(self.bias)).fill_(1.0 - pid))

            vals = torch.cat([mask, maskid], dim=0)

        else:
            vals = torch.ones(self.indices.size(0),
                              dtype=torch.float,
                              device=d(self.bias))

        # Row- or column normalize the values of the adjacency matrix
        vals = vals / util.sum_sparse(
            self.indices, vals, self.adjsize, row=not self.hor)

        adj = torch.sparse.FloatTensor(indices=self.indices.t(),
                                       values=vals,
                                       size=self.adjsize)
        if self.bias.is_cuda:
            adj = adj.to('cuda')

        if self.insize is None:
            # -- input is the identity matrix, just multiply the weights by the adjacencies
            out = torch.mm(adj, weights.view(r * h0, h1))

        elif self.hor:
            # -- input is high-dim and output is low dim, multiply h0 x weights first
            nodes = nodes[None, :, :].expand(r, n, h0)
            nw = torch.einsum('rni, rio -> rno', nodes, weights).contiguous()
            out = torch.mm(adj, nw.view(r * n, h1))

        else:
            # -- adj x h0 first, then weights
            out = torch.mm(adj, nodes)  # sparse mm
            out = out.view(r, n, h0)  # new dim for the relations
            out = torch.einsum('rio, rni -> no', weights, out)

        assert out.size() == (n, h1)

        return out + self.bias
Esempio n. 39
0
def sample_gumbel(shape, eps=1e-20, cuda=False):
    U = torch.rand(shape, device=d(cuda))
    return -Variable(torch.log(-torch.log(U + eps) + eps))