コード例 #1
0
ファイル: training.py プロジェクト: nachtsky1077/ungol
    def _epoch_validate(self, epoch: int) -> float:
        self.t.model.eval()

        batches = self.loader.batch_count
        done = ts(log.info, 'validation took {delta}s')

        # ---

        it = enumerate(self.loader.gen())
        gen = tqdm(it, desc='validation', unit=' batches', total=batches)

        losses = []
        for i, x in gen:
            if i >= batches:
                log.error(f'skipping unexpected batch {i}')
                break

            self._loss = self.validate(epoch, x)
            self._stats.ping(ums.Kind.valid, self, epoch, i)
            losses.append(self._loss)

        # ---

        self._valid_losses.append(np.array(losses).mean())
        done()
コード例 #2
0
    def _exchange_buffer(self):
        done = ts(log.info, 'moving noise samples to GPU took {delta}s')
        self.buf.data[:] = torch.from_numpy(self._sampler.samples)
        done()

        self.buf.position = 0
        self.buf.reused = 0
コード例 #3
0
    def _write_buf(f_name: str, arr: np.array):
        with open(f_name, mode='wb') as fd:
            log.info(f'[{os.getpid()}] writing "{f_name}" for later use')
            done = ts(log.info, '[{pid}] writing took {delta}s')

            arr.tofile(fd)

            done(pid=os.getpid())
コード例 #4
0
    def _read_buf(f_name: str, buf: mp.Array):
        with open(f_name, mode='rb') as fd:
            log.info(f'[{os.getpid()}] loading cached "{f_name}"')
            done = ts(log.info, '[{pid}] loading took {delta}s')

            arr = np.fromfile(fd, dtype=np.float32)
            AsyncSampler._atobuf(buf, arr)

            done(pid=os.getpid())
コード例 #5
0
ファイル: embed.py プロジェクト: nachtsky1077/ungol
    def _ram_gen_chunks(self):
        if (yield from self._ram_from_cache()):
            return

        gen = self._batcher(self.ram_chunk_size, len(self.embed))
        for lower, upper in gen:

            msg = 'moving chunk {lower}-{upper} to RAM took {delta}s'
            done = ucu.ts(log.info, msg)
            chunk = self._ram_load_chunk(lower, upper)
            done(lower=lower, upper=upper)
            yield chunk
コード例 #6
0
    def samples(self) -> np.array:
        done = ts(log.info, 'joining on sampler processes took {delta}s')

        try:
            self._retrieve_from_queue()
            self._join_processes()

        except AttributeError:
            raise Exception('no _create_spare() invoked')

        done()

        samples = np.frombuffer(self.buf.get_obj(), dtype=np.float32)[:]
        self._create_spare()
        return samples
コード例 #7
0
ファイル: stats.py プロジェクト: nachtsky1077/ungol
    def receive(self, up: Update, buf: np.array):
        log.info('[%d] writing %s codes', os.getpid(), up.kind.name)
        done = ts(log.info, '[{pid}] writing codes took {delta}s')

        # the sender works with fixed buffer sizes, thus
        # the data must be cropped at some point:
        data = buf.reshape((-1, self.M, self.K))[:self.N]

        if up.kind == Kind.train:
            grp = self.grp_train.create_group(str(up.epoch))
            self.update_train(grp, up, data)

        if up.kind == Kind.valid:
            grp = self.grp_valid.create_group(str(up.epoch))
            self.update_valid(grp, up, data)

        done(pid=os.getpid())
コード例 #8
0
ファイル: analyze.py プロジェクト: nachtsky1077/ungol
def _compile_common_nn(agg):
    """

    see _gen_common_nn(...)

    """
    done = ts(print, 'concatenation took {delta}s')
    for k in tuple(agg.keys()):

        assert len(agg[k][0]) == len(agg[k][1])
        print('compiling k-{} from {} units'.format(k, len(agg[k][0])))

        chunk_size = int(1e4)
        total = len(agg[k][0]) // chunk_size
        assert len(agg[k][0]) % chunk_size == 0, len(agg[k][0])

        # reverse list for nice pop() usage:
        # must pop to free memory (cannot change data structure
        # while iterating...)

        agg1 = agg[k][0]
        agg2 = agg[k][1]
        agg1.reverse()
        agg2.reverse()

        # compile chunks

        comp = [[], []]

        print()
        for chunk in tqdm(range(total)):
            amount = min(len(agg1), chunk_size)

            agg1_chunk = [agg1.pop() for _ in range(amount)]
            agg2_chunk = [agg2.pop() for _ in range(amount)]

            comp[0] = [pd.concat(comp[0] + agg1_chunk, axis=1)]
            comp[1] = [pd.concat(comp[1] + agg2_chunk, axis=1)]

        print()
        yield k, (comp[0][0], comp[1][0])

    done()
コード例 #9
0
ファイル: analyze.py プロジェクト: nachtsky1077/ungol
def _aggregate_common_nn(n_ref, n_cmp, ranges) -> '{k: ([], [])':  # noqa
    """

    see _gen_common_nn(...)

    """
    done = ts(print, 'iteration took {delta}s')
    total = len(n_ref.vocabulary) * len(ranges)
    gen = enumerate(_gen_isect_dataframes(n_ref, n_cmp, ranges))

    agg = {k: ([], []) for k in ranges}
    for i, (k, pd_ref, pd_cmp) in tqdm(gen, total=total):
        ls_ref, ls_cmp = agg[k]
        ls_ref.append(pd_ref)
        ls_cmp.append(pd_cmp)

    print()
    done()

    return agg
コード例 #10
0
def _run(info: str, cmd: str, setup: str):
    done = ts(info)
    res = _timeit(cmd, setup)
    done()

    return info, res