Example #1
0
File: main.py Project: romaad/msae
def train(configPath, name):
    useGpu = os.environ.get('GNUMPY_USE_GPU', 'auto')
    if useGpu == "no":
        mode = "cpu"
    else:
        mode = "gpu"

    print '========================================================'
    print 'train %s' % name
    print "the program is on %s" % mode
    print '======================================================='

    config = configparser.ConfigParser(
        interpolation=configparser.ExtendedInterpolation())
    config.read(configPath)
    model_name = config.get(name, 'model')
    if model_name == "ae":
        from ae import AE
        model = AE(config, name)
    elif model_name == "lae":
        from lae import LAE
        model = LAE(config, name)
    elif model_name == "pae":
        from pae import PAE
        model = PAE(config, name)
    elif model_name == "sae":
        from sae import SAE
        model = SAE(config, name)
    elif model_name == "msae":
        from msae import MSAE
        model = MSAE(config, name)

    model.train()
Example #2
0
def main():
    d = rdat()
    x, u = d['__x'], d['__u']
    t, d = [], x.shape[-1] * 8
    for i in range(0, 10):
        t.append(Tnr(SAE.from_dim([x.shape[-1], d]), x, u=u))
        d = d/2
    return t
Example #3
0
 def createsae(self, prefix, saeName):
     if self.config.has_option(self.name, saeName):
         saepath = self.readField(self.config, self.name, saeName)
         sae = self.loadModel(self.config, saepath)
         reset = self.readField(self.config, self.name, "reset_hyperparam")
         if reset != "False":
             for ae in sae.ae[1:]:
                 ae.resetHyperParam(self.config, reset)
         return sae
     else:
         return SAE(self.config, self.name, prefix=prefix)
Example #4
0
def test_trainer():
    import hlp
    hlp.set_seed(120)
    
    import os.path as pt
    x = np.load(pt.expandvars('$AZ_SP1/lh001F1.npz'))['vtx']['tck']
    x = np.asarray(x, dtype = '<f4')
    x = hlp.rescale01(x)
    d = x.shape[1]

    import sae
    from sae import SAE
    m = SAE.from_dim([d/1, d/2, d/4])
    ## t = Trainer(m.z, src = x, xpt = x, lrt = 0.01)
    return x, m
Example #5
0
def rdat(fdr="../../raw/H08_20", seed=None):
    # pick data file
    np.random.seed(seed)
    fnm = np.random.choice(os.listdir(fdr))
    dat = np.load(os.path.join(fdr, fnm))
    gmx = dat["gmx"].astype("f")

    # fix MAF > .5
    __i = np.where(gmx.sum((0, 1)) > gmx.shape[0])[0]
    gmx[:, :, __i] = 1 - gmx[:, :, __i]
    __x = gmx.reshape(gmx.shape[0], -1)

    # set up neral network
    from sae import SAE

    # from exb import Ods
    dim = __x.shape[-1]
    # dim = [dim] + [int(dim/2**_) for _ in range(-2, 32) if 2**_ <= dim]
    dim = dim = [dim, dim * 2]
    nnt = SAE.from_dim(dim)
    # nnt[-1].shp = S(1.0, 'Shp')

    dat = {"__x": __x, "nnt": nnt, "gmx": gmx}
    return dat
Example #6
0
 def __init__(self):
     super(TrafficPrediction, self).__init__()
     
     self.action_space = spaces.Box(low=-0.05, high=0.05, shape=(257,), dtype=np.float32)
     self.observation_space = spaces.Tuple([
             spaces.Box(low=0., high=1., shape=(6, 257), dtype=np.float32),
             spaces.Box(low=0., high=1., shape=(257,), dtype=np.float32)])
             #(last_state_point,)
     self.delta = 1.
     self.state = None
     
     self.predictor = SAE() #SEKNN #self.load_sae() #KNN()
     self.pointer = 0
     self.load_sae()
     
     self.link = 257
     self.predstep = self.predictor.dp.predstep
     self.maxv = np.asarray(self.predictor.dp.maxv)
     self.valiX, self.valiY, self.valiY_nofilt = self.predictor.dp.get_data(data_type='vali')
     self.testX, self.testY, self.testY_nofilt = self.predictor.dp.get_data(data_type='test')
     
     self.set_predY = []
     self.set_predY_ = []
     self.set_realY = []
Example #7
0
def main(fnm='../../raw/W09/1004', **kwd):
    """ the fine-tune procedure for Stacked Autoencoder(SAE).

    -- fnm: pathname to the input, supposingly the saved progress after the
    pre-training. If {fnm} points to a directory, a file is randomly chosen
    from it.

    ** ae1: depth of the sub SA.
    """
    new_lrt = kwd.pop('lrt', None)  # new learning rate
    new_hte = kwd.pop('hte', None)  # new halting error

    # randomly pick data file if {fnm} is a directory and no record
    # exists in the saved progress:
    if pt.isdir(fnm):
        fnm = pt.join(fnm, np.random.choice(os.listdir(fnm)))
    kwd.update(fnm=fnm)

    # load data from {fnm}, but parameters in {kwd} takes precedence.
    kwd.update((k, v) for k, v in lpz(fnm).iteritems() if k not in kwd.keys())

    # check saved progress and overwrite options:
    sav = kwd.get('sav', '.')
    if pt.isdir(sav):
        sav = pt.join(sav, pt.basename(fnm).split('.')[0])
    if pt.exists(sav + '.pgz'):
        print(sav, ": exists,")
        ovr = kwd.pop('ovr', 0)  # overwrite?

        if ovr is 0 or ovr > 2:  # do not overwrite the progress
            print(" skipped.")
            return kwd
    else:
        ovr = 2

    # resume progress, use network stored in {sav}.
    if ovr is 1:
        kwd.pop('cvw', None)  # use saved networks for CV
        kwd.pop('cvl', None)  # use saved CV LRT
        kwd.pop('cvh', None)  # use saved CV halting state
        kwd.pop('cve', None)  # use saved CV halting error
        kwd.pop('lrt', None)  # use saved learning rate for training
        kwd.pop('nwk', None)  # use saved network for training

        # remaining options in {kwd} take precedence over {sav}.
        sdt = lpz(sav)
        sdt.update(kwd)
        kwd = sdt
        print("continue training.")
    else:  # restart the training
        kwd.pop('lrt', None)    # do not use archived NT LRT
        kwd.pop('cvl', None)    # do not use archived CV LRT
        kwd.pop('cve', None)    # do not use archived CV errors
        kwd.pop('cvh', None)    # do not use archived CV halting state
        print("restart training.")

    # <-- __x, w, npt, ptn, ... do it.
    gmx = kwd['gmx']
    nsb = gmx.shape[0]                     # sample size
    xmx = gmx.reshape(nsb, -1).astype('f')  # training data
    ngv = xmx.shape[-1]                     # feature size
    mdp = kwd.pop('wdp', 16)                # maximum network depth
    # learing rates
    lrt = new_lrt if new_lrt else kwd.pop('lrt', 1e-4)
    dim = [ngv//2**_ for _ in range(mdp) if 2**_ <= ngv]

    # cross-validation networks
    cvk = kwd.get('cvk', 2)                    # K
    cvm = kwd.get('cvm', cv_msk(xmx, cvk))     # mask
    cvh = kwd.pop('cvh', [None] * cvk)         # halting
    cvl = kwd.pop('cvl', [lrt] * cvk)          # learning rates
    cvw = kwd.pop('cvw', [None] * cvk)         # slots for CV networks
    cve = kwd.pop('cve', np.ndarray((cvk, 2)))  # error

    # tune the network: (1) CV
    for i, m in enumerate(cvm):
        msg = 'CV: {:02d}/{:02d}'.format(i + 1, cvk)
        if cvh[i]:
            msg = msg + ' halted.'
            print(msg)
            continue

        print(msg)
        if cvw[i] is None:
            cvw[i] = SAE.from_dim(dim, s='relu', **kwd)
            cvw[i][-1].s = 'sigmoid'
            
            # suggest no layer-wise treatment (relu)
            gdy = kwd.get('gdy', False)
        else:
            # suggest no layer-wise treatment
            gdy = kwd.get('gdy', False)
        kwd = ftn_sae(cvw[i], xmx[-m], xmx[+m], gdy=gdy, lrt=cvl[i], **kwd)

        # collect the output
        ftn = kwd.pop('ftn')
        cvl[i] = ftn.lrt.get_value()  # CV learning rate
        cve[i, 0] = ftn.terr()        # CV training error
        cve[i, 1] = ftn.verr()        # CV validation error
        cvh[i] = ftn.hlt              # CV halting?
    # update
    kwd.update(cvk=cvk, cvm=cvm, cvh=cvh, cvl=cvl, cve=cve, cvw=cvw)

    # (2) normal training
    # force continue of training till new halting error?
    if new_hte:
        [kwd.pop(_, None) for _ in ['hte', 'hof', 'eot', 'eov']]
        hte = new_hte
    else:
        # mean CV training error as halting error
        hte = kwd.pop('hte', cve[:, 0].mean())
        
    # NT only happens when all CV is halted.
    if all(cvh) and 'hof' not in kwd:
        # create normal network if necessary
        nwk = kwd.pop('nwk', None)
        if nwk is None:
            nwk = SAE.from_dim(dim, s='relu', **kwd)
            nwk[-1].s = 'sigmoid'

            # suggest no layer-wise treatment (relu)
            gdy = kwd.get('gdy', False)
        else:
            # suggest no layer-wise treatment
            gdy = kwd.get('gdy', False)

        print('NT: HTE = {}'.format(hte))
        kwd = ftn_sae(nwk, xmx, xmx, gdy=gdy, lrt=lrt, hte=hte, **kwd)
        ftn = kwd.pop('ftn')
        lrt = ftn.lrt.get_value()  # learning rate

        # update
        kwd.update(nwk=nwk, lrt=lrt, hte=hte)

        # when NT halt, save the high order features
        if ftn.hlt:
            kwd['hof'] = nwk.ec(xmx).eval()
            kwd['eot'] = ftn.terr()
            kwd['eov'] = ftn.verr()
            print('NT: halted.')
    elif all(cvh) and 'hof' in kwd:
        print('NT: halted.')
    else:
        print('NT: Not Ready.')  # not ready for NT

    # save
    if sav:
        print("write to: ", sav)
        spz(sav, kwd)

    kwd = dict((k, v) for k, v in kwd.iteritems() if v is not None)
    return kwd
Example #8
0
def test():
    from sae import SAE
    dm = [100, 200, 300]
    sa1 = SAE.from_dim(dm)
    sa2 = SAE.from_dim(dm)
    return sa1, sa2
Example #9
0
def test():
    from sae import SAE
    dm = [100, 200, 300]
    sa1 = SAE.from_dim(dm)
    sa2 = SAE.from_dim(dm)
    return sa1, sa2
Example #10
0
def work(tsk, ftr = ['slc', 'tck'], eph = 100, ovr = 0):
    ## load data
    dst, src, wms = tsk['dst'], tsk['src'], tsk['wms']
    dat = np.load(pt.join(src, wms + '.npz'))
    sbj, vtx = dat['sbj'].tolist(), dat['vtx'][ftr]
    del dat

    ## save binary: SDA, vertices, encodings and subjects
    fo = pt.join(dst, wms + '.pgz')
    if pt.isfile(fo):
        if ovr < 2:
            print "update:", fo; sys.stdout.flush()
            for k, v in rut_hlp.load_pgz(fo).iteritems():
                tsk[k] = v
        else:
            print "overwite:", fo; sys.stdout.flush()

    ## quality check
    for fn, fv in [(fn, vtx[fn]) for fn in ftr]:
        if np.count_nonzero(fv) / float(fv.size) > 0.9:
            continue
        print "xt: 0s exceed 10% in {}/{}['{}']".format(
            src, wms, fn); sys.stdout.flush()
        return

    ## get or create network and encode dictionary
    if not tsk.has_key('nnt'):
        tsk['nnt'] = {}
    nnt = tsk['nnt']

    if not tsk.has_key('enc'):
        tsk['enc'] = {}
    enc = tsk['enc']

    dim = tsk['dim']

    ## train each feature seperately for now
    ## fn: feature name, dd: power of dimension divisor
    print 'wm surface: ', wms; sys.stdout.flush()
    from itertools import product
    for fn in ftr:
        ## source data
        fv = hlp.rescale01(vtx[fn])
        enc[fn, 0] = fv                   # encode level 0 (raw data)

        print 'feature: ', fn
        ## pre-train:
        if nnt.has_key((fn, 'stk')):
            if ovr == 0:
                print "skip: {}.{}".format(fn, 'stk')
                continue
            elif ovr == 1:
                print "more: {}.{}".format(fn, 'stk')
            else:
                nnt[fn, 'stk'] = SAE.from_dim(dim)
        else:
            nnt[fn, 'stk'] = SAE.from_dim(dim)
            
        stk = nnt[fn, 'stk']
        pre_train(stk, fv, rate = 0.01, epoch = eph * len(dim))
        sys.stdout.flush()
        
        ## fine-tune networks of various depth
        ec = 1
        for di in xrange(1, len(dim)):
            nt = stk.sub(di)
            fine_tune(nt, fv, rate = 0.01, epoch = eph)
            sys.stdout.flush()

            ## encode the feature
            ## exclude super encodings, because later analysis are only
            ## interests in compressed dimensionality
            if nt.ec.dim[-1] < fv.shape[1]:
                enc[fn, ec] = nt.ec(fv).eval()
                ec += 1
        
    ## save python data
    rut_hlp.save_pgz(fo, tsk)
    print 'saved:', fo; sys.stdout.flush()

    ## append encoding to R data and save
    __enc2rds__(tsk)
    print "xt: success"
Example #11
0
from sae import SAE
import torch

training_set = torch.load('./training_set.pkl')
test_set = torch.load('./test_set.pkl')
sae = SAE(3787, encoder_input=40, decoder_input=40)
sae.add_hiden_layer(20)
sae.add_dropout(0.2)
sae.add_hiden_layer(40)
sae.compile(optimizer='adam')
sae.fit(training_set, 5)
sae.perform(training_set, test_set)
torch.save(sae, 'model.pkl')