Example #1
0
def run_boston_housing_DistilledSGLD():
    X, Y, X_test, Y_test, X_mean, X_std, Y_mean, Y_std = load_boston_housing()
    print(X.shape, Y.shape, X_test.shape, Y_test.shape)
    minibatch_size = 1
    teacher_noise_precision = 1.25
    teacher_net = get_boston_housing_sym(True, teacher_noise_precision)
    student_net = get_boston_housing_sym(False)
    data_shape = (minibatch_size,) + X.shape[1::]
    teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                           'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())}
    student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev())}
    #                   'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev())}
    teacher_initializer = BiasXavier(factor_type="in", magnitude=1)
    student_initializer = BiasXavier(factor_type="in", magnitude=1)
    student_grad_f = lambda student_outputs, teacher_pred: \
        regression_student_grad(student_outputs, teacher_pred, teacher_noise_precision)
    student_exe, student_params, _ = \
        DistilledSGLD(teacher_sym=teacher_net, student_sym=student_net,
                      teacher_data_inputs=teacher_data_inputs,
                      student_data_inputs=student_data_inputs,
                      X=X, Y=Y, X_test=X_test, Y_test=Y_test,
                      X_mean=X_mean, X_std=X_std, Y_mean=Y_mean, Y_std=Y_std,
                      total_iter_num=5000000,
                      teacher_initializer=teacher_initializer,
                      student_initializer=student_initializer,
                      teacher_learning_rate=2E-7, student_learning_rate=1E-2,
                      student_optimizing_algorithm='sgd',
                      teacher_lr_scheduler=mx.lr_scheduler.FactorScheduler(80000, 0.5, 1E-7),
                      student_lr_scheduler=mx.lr_scheduler.FactorScheduler(step=5000, factor=0.8,
                                                                           stop_factor_lr=1E-6),
                      student_grad_f=student_grad_f,
                      teacher_prior_precision=2.5, student_prior_precision=0.001,
                      perturb_deviation=0.05, minibatch_size=minibatch_size, task='boston',
                      dev=dev())
Example #2
0
def run_toy_DistilledSGLD(gpu_id=None):
    X, Y, X_test, Y_test = load_toy()
    minibatch_size = 1
    teacher_noise_precision = 1.0
    teacher_net = get_toy_sym(True, teacher_noise_precision)
    student_net = get_toy_sym(False)
    data_shape = (minibatch_size,) + X.shape[1::]
    teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
                           'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev(gpu_id))}
    student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id))}

    teacher_initializer = mx.init.Uniform(0.07)
    student_initializer = mx.init.Uniform(0.07)
    student_grad_f = lambda student_outputs, teacher_pred: \
        regression_student_grad(student_outputs, teacher_pred, teacher_noise_precision)
    student_exe, student_params, _ = \
        DistilledSGLD(teacher_sym=teacher_net, student_sym=student_net,
                      teacher_data_inputs=teacher_data_inputs,
                      student_data_inputs=student_data_inputs,
                      X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=80000,
                      teacher_initializer=teacher_initializer,
                      student_initializer=student_initializer,
                      teacher_learning_rate=1E-4, student_learning_rate=0.01,
                      # teacher_lr_scheduler=mx.lr_scheduler.FactorScheduler(100000, 0.5),
                      student_lr_scheduler=mx.lr_scheduler.FactorScheduler(8000, 0.8),
                      student_grad_f=student_grad_f,
                      teacher_prior_precision=0.1, student_prior_precision=0.001,
                      perturb_deviation=0.1, minibatch_size=minibatch_size, task='regression',
                      dev=dev(gpu_id))
Example #3
0
def run_toy_SGLD(gpu_id=None):
    """Run SGLD on toy dataset"""
    X, Y, X_test, Y_test = load_toy()
    minibatch_size = 1
    teacher_noise_precision = 1.0 / 9.0
    net = get_toy_sym(True, teacher_noise_precision)
    data_shape = (minibatch_size,) + X.shape[1::]
    data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
                   'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev(gpu_id))}
    initializer = mx.init.Uniform(0.07)
    exe, params, _ = SGLD(sym=net,
                          data_inputs=data_inputs,
                          X=X,
                          Y=Y,
                          X_test=X_test,
                          Y_test=Y_test,
                          total_iter_num=50000,
                          initializer=initializer,
                          learning_rate=1E-4,
                          # lr_scheduler=mx.lr_scheduler.FactorScheduler(100000, 0.5),
                          prior_precision=0.1,
                          burn_in_iter_num=1000,
                          thin_interval=10,
                          task='regression',
                          minibatch_size=minibatch_size,
                          dev=dev(gpu_id))  # disable=unbalanced-tuple-unpacking
        def _get_or_reshape(name, shared_data_arrays, arg_shape, arg_type, context, logger):
            """Internal helper to get a memory block or re-use by re-shaping"""
            if name in shared_data_arrays:
                arg_arr = shared_data_arrays[name]

                if np.prod(arg_arr.shape) >= np.prod(arg_shape):
                    # nice, we can directly re-use this data blob
                    assert arg_arr.dtype == arg_type
                    arg_arr = arg_arr.reshape(arg_shape)
                else:
                    logger.warning(('bucketing: data "%s" has a shape %s' % (name, arg_shape)) +
                                   (', which is larger than already allocated ') +
                                   ('shape %s' % (arg_arr.shape,)) +
                                   ('. Need to re-allocate. Consider putting ') +
                                   ('default_bucket_key to') +
                                   (' be the bucket taking the largest input for better ') +
                                   ('memory sharing.'))
                    arg_arr = nd.zeros(arg_shape, context, dtype=arg_type)

                    # replace existing shared array because the new one is bigger
                    shared_data_arrays[name] = arg_arr
            else:
                arg_arr = nd.zeros(arg_shape, context, dtype=arg_type)
                shared_data_arrays[name] = arg_arr

            return arg_arr
Example #5
0
def main(args):
  ctx = mx.gpu(args.gpu)
  args.ctx_num = 1
  prop = face_image.load_property(args.data)
  image_size = prop.image_size
  print('image_size', image_size)
  vec = args.model.split(',')
  prefix = vec[0]
  epoch = int(vec[1])
  print('loading',prefix, epoch)
  sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
  arg_params, aux_params = ch_dev(arg_params, aux_params, ctx)
  all_layers = sym.get_internals()
  sym = all_layers['fc1_output']
  #model = mx.mod.Module.load(prefix, epoch, context = ctx)
  model = mx.mod.Module(symbol=sym, context=ctx, label_names = None)
  #model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))])
  model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))])
  model.set_params(arg_params, aux_params)
  path_imgrec = os.path.join(args.data, 'train.rec')
  path_imgidx = os.path.join(args.data, 'train.idx')
  imgrec = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r')  # pylint: disable=redefined-variable-type
  s = imgrec.read_idx(0)
  header, _ = mx.recordio.unpack(s)
  assert header.flag>0
  print('header0 label', header.label)
  header0 = (int(header.label[0]), int(header.label[1]))
  #assert(header.flag==1)
  imgidx = range(1, int(header.label[0]))
  stat = []
  count = 0
  data = nd.zeros( (1 ,3, image_size[0], image_size[1]) )
  label = nd.zeros( (1,) )
  for idx in imgidx:
    if len(stat)%100==0:
      print('processing', len(stat))
    s = imgrec.read_idx(idx)
    header, img = mx.recordio.unpack(s)
    img = mx.image.imdecode(img)
    img = nd.transpose(img, axes=(2, 0, 1))
    data[0][:] = img
    #input_blob = np.expand_dims(img.asnumpy(), axis=0)
    #arg_params["data"] = mx.nd.array(input_blob, ctx)
    #arg_params["softmax_label"] = mx.nd.empty((1,), ctx)
    time_now = datetime.datetime.now()
    #exe = sym.bind(ctx, arg_params ,args_grad=None, grad_req="null", aux_states=aux_params)
    #exe.forward(is_train=False)
    #_embedding = exe.outputs[0].asnumpy().flatten()
    #db = mx.io.DataBatch(data=(data,), label=(label,))
    db = mx.io.DataBatch(data=(data,))
    model.forward(db, is_train=False)
    net_out = model.get_outputs()[0].asnumpy()
    time_now2 = datetime.datetime.now()
    diff = time_now2 - time_now
    stat.append(diff.total_seconds())
    if len(stat)==args.param1:
      break
  stat = stat[10:]
  print('avg infer time', np.mean(stat))
Example #6
0
def train(input_variable, target_variable, encoder, decoder, teacher_forcing_ratio,
          encoder_optimizer, decoder_optimizer, criterion, max_length, ctx):
    with autograd.record():
        loss = F.zeros((1,), ctx=ctx)

        encoder_hidden = encoder.initHidden(ctx)

        input_length = input_variable.shape[0]
        target_length = target_variable.shape[0]

        encoder_outputs, encoder_hidden = encoder(
                input_variable.expand_dims(0), encoder_hidden)

        if input_length < max_length:
            encoder_outputs = F.concat(encoder_outputs.flatten(),
                F.zeros((max_length - input_length, encoder.hidden_size), ctx=ctx), dim=0)
        else:
            encoder_outputs = encoder_outputs.flatten()



        decoder_input = F.array([SOS_token], ctx=ctx)

        decoder_hidden = encoder_hidden

        use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

        if use_teacher_forcing:
            # Teacher forcing: Feed the target as the next input
            for di in range(target_length):
                decoder_output, decoder_hidden, decoder_attention = decoder(
                    decoder_input, decoder_hidden, encoder_outputs)

                loss = F.add(loss, criterion(decoder_output, target_variable[di]))
                print criterion(decoder_output, target_variable[di])
                decoder_input = target_variable[di]  # Teacher forcing

        else:
            # Without teacher forcing: use its own predictions as the next input
            for di in range(target_length):
                decoder_output, decoder_hidden, decoder_attention = decoder(
                    decoder_input, decoder_hidden, encoder_outputs)
                topi = decoder_output.argmax(axis=1)

                decoder_input = F.array([topi.asscalar()], ctx=ctx)

                loss = F.add(loss, criterion(decoder_output, target_variable[di]))

                if topi.asscalar() == EOS_token:
                    break

        loss.backward()

    encoder_optimizer.step(1)
    decoder_optimizer.step(1)

    return loss.asscalar()/target_length
Example #7
0
def weights_init(layers):
    for layer in layers:
        classname = layer.__class__.__name__
        if hasattr(layer, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1):
            layer.weight.set_data(nd.random.normal(0.0,0.02,shape=layer.weight.data().shape))
            if hasattr(layer, 'bias') and layer.bias is not None:
                layer.bias.set_data(nd.zeros(layer.bias.data().shape))
        elif classname.find('BatchNorm') != -1:
            layer.gamma.set_data(nd.random.normal(1.0, 0.02,shape=layer.gamma.data().shape))
            layer.beta.set_data(nd.zeros(layer.bias.data().shape))
Example #8
0
def get_params():
    W_xh = nd.random_normal(scale=std, shape=(input_dim, hidden_dim), ctx=ctx)
    W_hh = nd.random_normal(scale=std, shape=(hidden_dim, hidden_dim), ctx=ctx)
    b_h = nd.zeros(hidden_dim, ctx=ctx)

    W_hy = nd.random_normal(scale=std, shape=(hidden_dim, output_dim), ctx=ctx)
    b_y = nd.zeros(output_dim, ctx=ctx)

    params = [W_xh, W_hh, b_h, W_hy, b_y]
    for param in params:
        param.attach_grad()
    return params
Example #9
0
def run_toy_HMC(gpu_id=None):
    X, Y, X_test, Y_test = load_toy()
    minibatch_size = Y.shape[0]
    noise_precision = 1 / 9.0
    net = get_toy_sym(True, noise_precision)
    data_shape = (minibatch_size,) + X.shape[1::]
    data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
                   'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev(gpu_id))}
    initializer = mx.init.Uniform(0.07)
    sample_pool = HMC(net, data_inputs=data_inputs, X=X, Y=Y, X_test=X_test, Y_test=Y_test,
                      sample_num=300000, initializer=initializer, prior_precision=1.0,
                      learning_rate=1E-3, L=10, dev=dev(gpu_id))
Example #10
0
def get_parameters():
    W_xh = nd.random_normal(scale=config.std, shape=(config.input_dim, config.hidden_dim))
    W_hh = nd.random_normal(scale=config.std, shape=(config.hidden_dim, config.hidden_dim))
    b_h = nd.zeros(config.hidden_dim)

    W_hy = nd.random_normal(scale=config.std, shape=(config.hidden_dim, config.output_dim))
    b_y = nd.zeros(config.output_dim)

    parameters = [W_xh, W_hh, b_h, W_hy, b_y]
    for parameter in parameters:
        parameter.attach_grad()

    return parameters
Example #11
0
def run_mnist_SGD(training_num=50000, gpu_id=None):
    X, Y, X_test, Y_test = load_mnist(training_num)
    minibatch_size = 100
    net = get_mnist_sym()
    data_shape = (minibatch_size,) + X.shape[1::]
    data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
                   'softmax_label': nd.zeros((minibatch_size,), ctx=dev(gpu_id))}
    initializer = mx.init.Xavier(factor_type="in", magnitude=2.34)
    exe, exe_params, _ = SGD(sym=net, dev=dev(gpu_id), data_inputs=data_inputs, X=X, Y=Y,
                             X_test=X_test, Y_test=Y_test,
                             total_iter_num=1000000,
                             initializer=initializer,
                             lr=5E-6, prior_precision=1.0, minibatch_size=100)
Example #12
0
 def reset_c2c(self):
   self.select_triplets()
   for identity,v in self.id2range.iteritems():
     _list = range(*v)
   
     for idx in _list:
       s = imgrec.read_idx(idx)
       ocontents.append(s)
     embeddings = None
     #print(len(ocontents))
     ba = 0
     while True:
       bb = min(ba+args.batch_size, len(ocontents))
       if ba>=bb:
         break
       _batch_size = bb-ba
       _batch_size2 = max(_batch_size, args.ctx_num)
       data = nd.zeros( (_batch_size2,3, image_size[0], image_size[1]) )
       label = nd.zeros( (_batch_size2,) )
       count = bb-ba
       ii=0
       for i in xrange(ba, bb):
         header, img = mx.recordio.unpack(ocontents[i])
         img = mx.image.imdecode(img)
         img = nd.transpose(img, axes=(2, 0, 1))
         data[ii][:] = img
         label[ii][:] = header.label
         ii+=1
       while ii<_batch_size2:
         data[ii][:] = data[0][:]
         label[ii][:] = label[0][:]
         ii+=1
       db = mx.io.DataBatch(data=(data,), label=(label,))
       self.mx_model.forward(db, is_train=False)
       net_out = self.mx_model.get_outputs()
       net_out = net_out[0].asnumpy()
       model.forward(db, is_train=False)
       net_out = model.get_outputs()
       net_out = net_out[0].asnumpy()
       if embeddings is None:
         embeddings = np.zeros( (len(ocontents), net_out.shape[1]))
       embeddings[ba:bb,:] = net_out[0:_batch_size,:]
       ba = bb
     embeddings = sklearn.preprocessing.normalize(embeddings)
     embedding = np.mean(embeddings, axis=0, keepdims=True)
     embedding = sklearn.preprocessing.normalize(embedding)
     sims = np.dot(embeddings, embedding).flatten()
     assert len(sims)==len(_list)
     for i in xrange(len(_list)):
       _idx = _list[i]
       self.idx2cos[_idx] = sims[i]
Example #13
0
def run_mnist_SGLD(training_num=50000):
    X, Y, X_test, Y_test = load_mnist(training_num)
    minibatch_size = 100
    net = get_mnist_sym()
    data_shape = (minibatch_size,) + X.shape[1::]
    data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                   'softmax_label': nd.zeros((minibatch_size,), ctx=dev())}
    initializer = mx.init.Xavier(factor_type="in", magnitude=2.34)
    exe, sample_pool = SGLD(sym=net, dev=dev(), data_inputs=data_inputs, X=X, Y=Y,
                            X_test=X_test, Y_test=Y_test,
                            total_iter_num=1000000,
                            initializer=initializer,
                            learning_rate=4E-6, prior_precision=1.0, minibatch_size=100,
                            thin_interval=100, burn_in_iter_num=1000)
Example #14
0
def run_boston_housing_SGLD():
    X, Y, X_test, Y_test = load_boston_housing()
    minibatch_size = 1
    teacher_noise_precision = 1.25
    net = get_boston_housing_sym(True, teacher_noise_precision)
    data_shape = (minibatch_size,) + X.shape[1::]
    data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                   'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())}
    initializer = BiasXavier(factor_type="in", magnitude=2.34)
    exe, sample_pool = SGLD(sym=net, dev=dev(), data_inputs=data_inputs, X=X, Y=Y,
                            X_test=X_test, Y_test=Y_test,
                            total_iter_num=1000000,
                            initializer=initializer,
                            learning_rate=5E-10, prior_precision=1.0, minibatch_size=minibatch_size,
                            thin_interval=100, burn_in_iter_num=1000, task='boston')
Example #15
0
def orthonormal_VanillaLSTMBuilder(lstm_layers, input_dims, lstm_hiddens, dropout_x=0., dropout_h=0., debug=False):
    """Build a standard LSTM cell, with variational dropout,
    with weights initialized to be orthonormal (https://arxiv.org/abs/1312.6120)

    Parameters
    ----------
    lstm_layers : int
        Currently only support one layer
    input_dims : int
        word vector dimensions
    lstm_hiddens : int
        hidden size
    dropout_x : float
        dropout on inputs, not used in this implementation, see `biLSTM` below
    dropout_h : float
        dropout on hidden states
    debug : bool
        set to True to skip orthonormal initialization

    Returns
    -------
    lstm_cell : VariationalDropoutCell
        A LSTM cell
    """
    assert lstm_layers == 1, 'only accept one layer lstm'
    W = orthonormal_initializer(lstm_hiddens, lstm_hiddens + input_dims, debug)
    W_h, W_x = W[:, :lstm_hiddens], W[:, lstm_hiddens:]
    b = nd.zeros((4 * lstm_hiddens,))
    b[lstm_hiddens:2 * lstm_hiddens] = -1.0
    lstm_cell = rnn.LSTMCell(input_size=input_dims, hidden_size=lstm_hiddens,
                             i2h_weight_initializer=mx.init.Constant(np.concatenate([W_x] * 4, 0)),
                             h2h_weight_initializer=mx.init.Constant(np.concatenate([W_h] * 4, 0)),
                             h2h_bias_initializer=mx.init.Constant(b))
    wrapper = VariationalDropoutCell(lstm_cell, drop_states=dropout_h)
    return wrapper
def init_params():
    w = nd.random_normal(scale=1, shape=(num_inputs, 1))
    b = nd.zeros(shape=(1,))
    params = [w, b]
    for param in params:
        param.attach_grad()#自动求导 需要创建它们的梯度
    return params
Example #17
0
def try_gpu():
    try:
        ctx = mx.gpu()
        _ = nd.zeros((1,), ctx=ctx)
    except:
        ctx = mx.cpu()
    return ctx
Example #18
0
def get_feature(name, vid, args):
  global feature_cache
  key = (name,vid)
  if key in feature_cache:
    return feature_cache[key]

  input_dir = os.path.join(args.image_dir, name, str(vid))
  data = nd.zeros( (1 ,3, image_size[0], image_size[1]) )
  F = []
  for img in os.listdir(input_dir):
    img = os.path.join(input_dir, img)
    img = cv2.imread(img)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = np.transpose(img, (2,0,1))
    data[0][:] = img
    db = mx.io.DataBatch(data=(data,))
    model.forward(db, is_train=False)
    net_out = model.get_outputs()[0].asnumpy().flatten()
    F.append(net_out)
  F = np.array(F)
  F = sklearn.preprocessing.normalize(F)
  feature = np.mean(F, axis=0, keepdims=True)
  feature = sklearn.preprocessing.normalize(feature).flatten()

  feature_cache[key] = feature
  return feature
Example #19
0
def calc_sum(matA, matB):
    height,width = matA.shape
    matC = nd.zeros( matA.shape, ctx=matA.context)
    for y in range(height):
        for x in range(width):
            matC[y,x] = matA[y,x] + matB[y,x]
    return matC
Example #20
0
def try_gpu():
    """If GPU is available, return mx.gpu(0); else return mx.cpu()"""
    try:
        ctx = mx.gpu()
        _ = nd.zeros((1,), ctx=ctx)
    except:
        ctx = mx.cpu()
    return ctx
Example #21
0
 def calc_sum(self,matA, matB):
     height,width = matA.shape
     ptrA = self.get_pointer(matA)
     ptrB = self.get_pointer(matB)
     matC = nd.zeros( matA.shape, ctx = matA.context)
     ptrC = self.get_pointer(matC)
     self.fun_calc_sum(ptrA, ptrB, ptrC, width, height)
     return matC
Example #22
0
 def gan_loss(input,target_is_real):
     if target_is_real:
         target = nd.ones(input.shape,ctx=input.context)
     else:
         target = nd.zeros(input.shape, ctx=input.context)
     #mse loss for lsgan
     e = ((input - target) ** 2).mean(axis=0, exclude=True)
     return e
Example #23
0
def get_embedding(args, imgrec, id, image_size, model):
  s = imgrec.read_idx(id)
  header, _ = mx.recordio.unpack(s)
  ocontents = []
  for idx in xrange(int(header.label[0]), int(header.label[1])):
    s = imgrec.read_idx(idx)
    ocontents.append(s)
  embeddings = None
  #print(len(ocontents))
  ba = 0
  while True:
    bb = min(ba+args.batch_size, len(ocontents))
    if ba>=bb:
      break
    _batch_size = bb-ba
    _batch_size2 = max(_batch_size, args.ctx_num)
    data = nd.zeros( (_batch_size2,3, image_size[0], image_size[1]) )
    label = nd.zeros( (_batch_size2,) )
    count = bb-ba
    ii=0
    for i in xrange(ba, bb):
      header, img = mx.recordio.unpack(ocontents[i])
      img = mx.image.imdecode(img)
      img = nd.transpose(img, axes=(2, 0, 1))
      data[ii][:] = img
      label[ii][:] = header.label
      ii+=1
    while ii<_batch_size2:
      data[ii][:] = data[0][:]
      label[ii][:] = label[0][:]
      ii+=1
    #db = mx.io.DataBatch(data=(data,), label=(label,))
    db = mx.io.DataBatch(data=(data,))
    model.forward(db, is_train=False)
    net_out = model.get_outputs()
    net_out = net_out[0].asnumpy()
    if embeddings is None:
      embeddings = np.zeros( (len(ocontents), net_out.shape[1]))
    embeddings[ba:bb,:] = net_out[0:_batch_size,:]
    ba = bb
  embeddings = sklearn.preprocessing.normalize(embeddings)
  embedding = np.mean(embeddings, axis=0, keepdims=True)
  embedding = sklearn.preprocessing.normalize(embedding).flatten()
  return embedding
Example #24
0
 def transform_mnist(data, label):
     # transform a batch of examples
     if resize:
         n = data.shape[0]
         new_data = nd.zeros((n, resize, resize, data.shape[3]))
         for i in range(n):
             new_data[i] = image.imresize(data[i], resize, resize)
         data = new_data
     # change data from batch x height x weight x channel to batch x channel x height x weight
     return nd.transpose(data.astype('float32'), (0,3,1,2))/255, label.astype('float32')
Example #25
0
def run_mnist_DistilledSGLD(num_training=50000, gpu_id=None):
    """Run DistilledSGLD on mnist dataset"""
    X, Y, X_test, Y_test = load_mnist(num_training)
    minibatch_size = 100
    if num_training >= 10000:
        num_hidden = 800
        total_iter_num = 1000000
        teacher_learning_rate = 1E-6
        student_learning_rate = 0.0001
        teacher_prior = 1
        student_prior = 0.1
        perturb_deviation = 0.1
    else:
        num_hidden = 400
        total_iter_num = 20000
        teacher_learning_rate = 4E-5
        student_learning_rate = 0.0001
        teacher_prior = 1
        student_prior = 0.1
        perturb_deviation = 0.001
    teacher_net = get_mnist_sym(num_hidden=num_hidden)
    logsoftmax = LogSoftmax()
    student_net = get_mnist_sym(output_op=logsoftmax, num_hidden=num_hidden)
    data_shape = (minibatch_size,) + X.shape[1::]
    teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
                           'softmax_label': nd.zeros((minibatch_size,), ctx=dev(gpu_id))}
    student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
                           'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev(gpu_id))}
    teacher_initializer = BiasXavier(factor_type="in", magnitude=1)
    student_initializer = BiasXavier(factor_type="in", magnitude=1)
    student_exe, student_params, _ = \
        DistilledSGLD(teacher_sym=teacher_net, student_sym=student_net,
                      teacher_data_inputs=teacher_data_inputs,
                      student_data_inputs=student_data_inputs,
                      X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=total_iter_num,
                      student_initializer=student_initializer,
                      teacher_initializer=teacher_initializer,
                      student_optimizing_algorithm="adam",
                      teacher_learning_rate=teacher_learning_rate,
                      student_learning_rate=student_learning_rate,
                      teacher_prior_precision=teacher_prior, student_prior_precision=student_prior,
                      perturb_deviation=perturb_deviation, minibatch_size=100, dev=dev(gpu_id))
Example #26
0
def run_boston_housing_SGD():
    X, Y, X_test, Y_test, X_mean, X_std, Y_mean, Y_std = load_boston_housing()
    minibatch_size = 1
    teacher_noise_precision = 1.25
    net = get_boston_housing_sym(True, teacher_noise_precision)
    data_shape = (minibatch_size,) + X.shape[1::]
    print data_shape
    data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
                   'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())}
    initializer = BiasXavier(factor_type="in", magnitude=1)
    # initializer = mx.init.Normal(sigma=0.01)
    exe, exe_params, _ = SGD(sym=net, dev=dev(), data_inputs=data_inputs, X=X, Y=Y,
                             X_test=X_test, Y_test=Y_test,
                             X_mean=X_mean, X_std=X_std,
                             Y_mean=Y_mean, Y_std=Y_std,
                             total_iter_num=2000000,
                             initializer=initializer,
#                             lr_scheduler=mx.lr_scheduler.FactorScheduler(80000, 0.5),
                             lr=1E-6, prior_precision=1,
                             minibatch_size=minibatch_size,
                             task="boston")
Example #27
0
def predict_rnn(rnn, prefix, num_chars, params, hidden_dim, ctx, idx_to_char,
                char_to_idx, get_inputs, is_lstm=False):

    prefix = prefix.lower()
    state_h = nd.zeros(shape=(1, hidden_dim), ctx=ctx)
    if is_lstm:
        state_c = nd.zeros(shape=(1, hidden_dim), ctx=ctx)
    #pdb.set_trace()
    output = [char_to_idx[prefix[0]]]
    for i in range(num_chars + len(prefix)):
        X = nd.array([output[-1]], ctx=ctx)
        if is_lstm:
            Y, state_h, state_c = rnn(get_inputs(X), state_h, state_c, *params)
        else:
            Y, state_h = rnn(get_inputs(X), state_h, *params)
        if i < len(prefix)-1:
            next_input = char_to_idx[prefix[i+1]]
        else:
            next_input = int(Y[0].argmax(axis=1).asscalar())
        output.append(next_input)
    return ''.join([idx_to_char[i] for i in output])
def test_token_embedding_manual_extension(initializeidxtovecbyextending,
                                          tmpdir):
    if not initializeidxtovecbyextending:
        # Load a TokenEmbedding with idx_to_vec already initialized
        embed_root = str(tmpdir)
        embed_name = 'my_embed'
        elem_delim = '\t'
        pretrain_file = 'my_pretrain_file.txt'
        _mk_my_pretrain_file(
            os.path.join(embed_root, embed_name), elem_delim, pretrain_file)
        pretrain_file_path = os.path.join(embed_root, embed_name,
                                          pretrain_file)
        TokEmb = functools.partial(nlp.embedding.TokenEmbedding.from_file,
                                   pretrain_file_path, elem_delim,
                                   allow_extend=True)
    else:
        TokEmb = functools.partial(
            nlp.embedding.token_embedding.TokenEmbedding, allow_extend=True)

    # Uninitialized token_embedding._idx_to_vec based
    token_embedding = TokEmb()
    token_embedding['hello'] = nd.zeros(shape=(1, 5))
    assert np.all(np.isclose(0, token_embedding['hello'].asnumpy()))

    token_embedding = TokEmb()
    token_embedding['hello'] = nd.zeros(shape=(5, ))
    assert np.all(np.isclose(0, token_embedding['hello'].asnumpy()))

    token_embedding = TokEmb()
    token_embedding[['hello', 'world']] = nd.zeros(shape=(2, 5))
    assert np.all(np.isclose(0, token_embedding['hello'].asnumpy()))
    assert np.all(np.isclose(0, token_embedding['world'].asnumpy()))

    with pytest.raises(AssertionError):
        token_embedding = TokEmb()
        token_embedding[['hello', 'world']] = nd.zeros(shape=(1, 5))

    with pytest.raises(AssertionError):
        token_embedding = TokEmb()
        token_embedding[['hello', 'world']] = nd.zeros(shape=(5, ))
Example #29
0
    def create_state(self, index, weight):
        """Create additional optimizer state such as momentum.

        Parameters
        ----------
        weight : NDArray
            The weight data

        """
        if self.momentum == 0.0:
            return None
        else:
            return zeros(weight.shape, weight.context, dtype=weight.dtype)
Example #30
0
def parse_groundtruth_for_target(labels, box_per_cell, xywh):
    B,H,W,A,_ = xywh.shape
    _,maxObjNum,_ = labels.shape
    #pdb.set_trace()
    boxMask = nd.zeros( (B,H,W,A,1), ctx = xywh.context )
    boxCls = nd.ones_like(boxMask, ctx = xywh.context) * (-1) #default negative label
    boxObject = nd.zeros((B,H,W,A,1),ctx = xywh.context)
    boxXYWH = nd.zeros((B,H,W,A,4), ctx = xywh.context)
    for b in range(B):
        label  = labels[b].asnumpy()
        validLabel = label[np.where(label[:,1] >-0.5)[0],:]
        #pdb.set_trace()
        np.random.shuffle(validLabel)
        for l in validLabel:
            cls,x0,y0,x1,y1 = l
            w = x1 - x0
            h = y1 - y0
            #find best box for this object
            indx,indy = int(x0*W), int(y0*H) #position
            pws, phs = xywh[b,indy, indx, :, -2], xywh[b,indy,indx,:,-1]
            ious = []
            pws = pws.asnumpy()
            phs = phs.asnumpy()
            pws, phs = [1,1],[1,1]
            
            for pw, ph in zip(pws,phs):
                intersect = np.minimum(pw,w*W) * np.minimum(ph,h*H)
                ious.append(  intersect / (pw * ph + w * h - intersect) )
            #pdb.set_trace()
            bestbox = int(np.argmax(ious))
            boxMask[b,indy,indx,bestbox,:] = 1.0
            boxCls[b,indy,indx,bestbox,:] = cls
            boxObject[b,indy,indx,bestbox,:] = 1.0 # ious[bestbox]
            tx = x0 * W - indx
            ty = y0 * H - indy
            tw,th = math.sqrt(w),  math.sqrt(h) #predict sqrt(w) sqrt(h)
            #pdb.set_trace()
            boxXYWH[b,indy,indx,bestbox,:] = nd.array([tx,ty,tw,th])
    return boxMask, boxCls, boxObject,boxXYWH
Example #31
0
    def forward(self, input_vec, loss=None, training=True):
        # print('************* ' + str(input_vec.shape[1]) + ' *************')
        # print('############# ' + str(input_vec.shape) + ' #############')
        assert input_vec.shape[1] == self.input_dimension

        # get inputs for every slot(including global)
        inputs = {}
        for slot in self.slots:
            inputs[slot] = input_vec[:, self.slot_dimension[slot][0]:self.slot_dimension[slot][1]]
        input_global = []
        for seg in self.global_dimension:
            input_global.append(input_vec[:, seg[0]:seg[1]])
        inputs['global'] = nd.concat(*input_global, dim=1)

        layer = []
        # inputs -> first_hidden_layer
        if (not self.sort_input_vec) and self.state_feature != 'dip':
            layer.append([])
            for slot in self.slots:
                layer[0].append(self.input_trans[slot](inputs[slot]))
            layer[0].append(self.input_trans['global'](inputs['global']))
        elif self.state_feature == 'dip':
            sorted_inputs = []
            for slot in self.slots:
                sorted_inputs.append(inputs[slot])
            sorted_inputs.append(inputs['global'])
            layer.append(self.input_trans.forward(sorted_inputs, loss, training=training))
        elif self.sort_input_vec:
            sorted_inputs = []
            for slot in self.slots:
                tmp = inputs[slot][:, :-2].sort(is_ascend=False)
                if tmp.shape[1] < 20:
                    tmp = nd.concat(tmp, nd.zeros((tmp.shape[0], 20 - tmp.shape[1]), ctx=CTX), dim=1)
                else:
                    tmp = nd.slice_axis(tmp, axis=1, begin=0, end=20)
                sorted_inputs.append(nd.concat(tmp, inputs[slot][:, -2:], dim=1))
            sorted_inputs.append(inputs['global'])
            layer.append(self.input_trans.forward(sorted_inputs, loss, training=training))

        # hidden_layers
        for i in range(self.hidden_layers - 1):
            if self.recurrent_mode is False:
                # equal to 'layer.append(self.ma_trans[i](layer[-1], loss))'
                layer.append(self.ma_trans[i](layer[i], loss))
            else:
                layer.append(self.ma_trans(layer[i], loss))

        if self.share_last_layer is False:
            # dropout of last hidden layer
            for j in range(len(self.slots)):
                layer[-1][j] = self.local_out_drop_op.forward(layer[-1][j])
            layer[-1][-1] = self.global_out_drop_op.forward(layer[-1][-1])

            # last_hidden_layer -> outputs
            outputs = []
            slotv_probs = []
            slotqs = []
            slot_probs = []
            top_decision = []
            for i in range(len(self.slots) + 1):
                if self.use_dueling is False:
                    outputs.append(self.output_trans[i](layer[-1][i]))
                else:
                    if i < len(self.slots):
                        cur_slotv_prob = self.output_trans_local_valueP.forward(layer[-1][i], training=training)
                        cur_slotv_prob = nd.softmax(cur_slotv_prob)
                    else:
                        cur_slotv_prob = self.output_trans_global_valueP.forward(layer[-1][i], training=training)
                        cur_slotv_prob = nd.softmax(cur_slotv_prob)

                    if self.dueling_share_last:
                        if i < len(self.slots):
                            cur_slotq = self.output_trans_local_slotQ.forward(layer[-1][i], training=training)
                            cur_slot_prob = self.output_trans_local_slotP.forward(layer[-1][i], training=training).reshape(-1,1)
                            cur_slotv_prob = cur_slotv_prob*cur_slot_prob
                            # cur_slot_prob = nd.softmax(cur_slot_prob)
                            if self.shared_last_layer_use_bias:
                                cur_slotq = cur_slotq + nd.slice(self.value_bias_local.data(), begin=(i, ), end=(i + 1, ))
                        else:
                            cur_slotq = self.output_trans_global_slotQ.forward(layer[-1][i], training=training)
                            cur_slot_prob = self.output_trans_global_slotP.forward(layer[-1][i], training=training).reshape(-1,1)
                            cur_slotv_prob = cur_slotv_prob*cur_slot_prob
                            # cur_slot_prob = nd.softmax(cur_slot_prob)

                        top_decision.append(cur_slot_prob)
                    else:
                        cur_slotq = self.output_trans_value[i](layer[-1][i])

                    slotv_probs.append(cur_slotv_prob)
                    slot_probs.append(cur_slot_prob)
                    slotqs.append(cur_slotq)

            # batch_slotv_probs_list = []
            # slot_prob_softmax = nd.softmax(nd.concat(*slot_probs, dim=1))
            # slot_prob_split = nd.split(slot_prob_softmax, axis=1, num_outputs=len(self.slots)+1)
            # assert len(slotv_probs) == len(self.slots)+1
            # for i in range(len(slotv_probs)):
            #     tmp = slot_prob_split[i].reshape(-1,1)*slotv_probs[i]
            #     batch_slotv_probs_list.append(tmp)
            batch_slot_prob = nd.softmax(nd.concat(*slot_probs, dim=1))
            batch_slot_slotq = nd.concat(*slotqs, dim=1)
            batch_slotv_prob = nd.softmax(nd.concat(*slotv_probs, dim=1))
            batch_top_decision = nd.softmax(nd.concat(*top_decision,dim=1))

            # print('@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@')
            # print(batch_slotv_prob)
            # print(batch_slot_prob.shape)
            # print(batch_slot_slotq.shape)
            # print(batch_slotv_prob.shape)

            prob = batch_slotv_prob
            value = nd.max(batch_slot_slotq, axis=1)
            top_decision = batch_top_decision

            # CTname = threading.currentThread().getName()
            # print(CTname+' top decision is : ')
            # print(top_decision)

        return prob, value, top_decision
Example #32
0
def train():
    """training"""
    image_pool = ImagePool(pool_size)
    metric = mx.metric.CustomMetric(facc)

    stamp = datetime.now().strftime('%Y_%m_%d-%H_%M')
    logging.basicConfig(level=logging.DEBUG)

    # define a summary writer that logs data and flushes to the file every 5 seconds
    sw = SummaryWriter(logdir='%s' % dir_out_sw, flush_secs=5, verbose=False)
    global_step = 0

    for epoch in range(epochs):
        if epoch == 0:
            netG.hybridize()
            netD.hybridize()
        #     sw.add_graph(netG)
        #     sw.add_graph(netD)

        tic = time.time()
        btic = time.time()
        train_data.reset()
        val_data.reset()
        iter = 0
        for local_step, batch in enumerate(train_data):
            ############################
            # (1) Update D network: maximize log(D(x, y)) + log(1 - D(x, G(x, z)))
            ###########################
            tmp = mx.nd.concat(batch.data[0],
                               batch.data[1],
                               batch.data[2],
                               dim=1)
            tmp = augmenter(tmp,
                            patch_size=128,
                            offset=offset,
                            aug_type=1,
                            aug_methods=aug_methods,
                            random_crop=False)
            real_in = tmp[:, :1].as_in_context(ctx)
            real_out = tmp[:, 1:2].as_in_context(ctx)
            m = tmp[:, 2:3].as_in_context(ctx)  # mask

            fake_out = netG(real_in) * m

            # loss weight based on mask, applied on L1 loss
            if no_loss_weights:
                loss_weight = m
            else:
                loss_weight = m.asnumpy()
                loss_weight[loss_weight == 0] = .1
                loss_weight = mx.nd.array(loss_weight, ctx=m.context)

            fake_concat = image_pool.query(nd.concat(real_in, fake_out, dim=1))
            with autograd.record():
                # Train with fake image
                # Use image pooling to utilize history images
                output = netD(fake_concat)
                fake_label = nd.zeros(output.shape, ctx=ctx)
                errD_fake = GAN_loss(output, fake_label)
                metric.update([
                    fake_label,
                ], [
                    output,
                ])

                # Train with real image
                real_concat = nd.concat(real_in, real_out, dim=1)
                output = netD(real_concat)
                real_label = nd.ones(output.shape, ctx=ctx)
                errD_real = GAN_loss(output, real_label)
                errD = (errD_real + errD_fake) * 0.5
                errD.backward()
                metric.update([
                    real_label,
                ], [
                    output,
                ])

            trainerD.step(batch.data[0].shape[0])

            ############################
            # (2) Update G network: maximize log(D(x, G(x, z))) - lambda1 * L1(y, G(x, z))
            ###########################
            with autograd.record():
                fake_out = netG(real_in)
                fake_concat = nd.concat(real_in, fake_out, dim=1)
                output = netD(fake_concat)
                real_label = nd.ones(output.shape, ctx=ctx)
                errG = GAN_loss(output, real_label) + loss_2nd(
                    real_out, fake_out, loss_weight) * lambda1
                errG.backward()

            trainerG.step(batch.data[0].shape[0])

            sw.add_scalar(tag='loss',
                          value=('d_loss', errD.mean().asscalar()),
                          global_step=global_step)
            sw.add_scalar(tag='loss',
                          value=('g_loss', errG.mean().asscalar()),
                          global_step=global_step)
            global_step += 1

            if epoch + local_step == 0:
                sw.add_graph((netG))
                img_in_list, img_out_list, m_val = val_data.next().data
                m_val = m_val.as_in_context(ctx)
                sw.add_image('first_minibatch_train_real', norm3(real_out))
                sw.add_image('first_minibatch_val_real',
                             norm3(img_out_list.as_in_context(ctx)))
                netG.export('%snetG' % dir_out_checkpoints)
            if local_step == 0:
                # Log the first batch of images of each epoch (training)
                sw.add_image('first_minibatch_train_fake',
                             norm3(fake_out * m) * m, epoch)
                sw.add_image(
                    'first_minibatch_val_fake',
                    norm3(netG(img_in_list.as_in_context(ctx)) * m_val) *
                    m_val, epoch)
                # norm3(netG(img_in_list.as_in_context(ctx)) * m_val.as_in_context(ctx)), epoch)

            if (iter + 1) % 10 == 0:
                name, acc = metric.get()

                logging.info('speed: {} samples/s'.format(
                    batch_size / (time.time() - btic)))
                logging.info(
                    'discriminator loss = %f, generator loss = %f, binary training acc = %f at iter %d epoch %d'
                    % (nd.mean(errD).asscalar(), nd.mean(errG).asscalar(), acc,
                       iter, epoch))

            iter += 1
            btic = time.time()

        sw.add_scalar(tag='binary_training_acc',
                      value=('acc', acc),
                      global_step=epoch)

        name, acc = metric.get()
        metric.reset()

        fake_val = netG(val_data.data[0][1].as_in_context(ctx))
        loss_val = loss_2nd(val_data.data[1][1].as_in_context(ctx), fake_val,
                            val_data.data[2][1].as_in_context(ctx)) * lambda1
        sw.add_scalar(tag='loss_val',
                      value=('g_loss', loss_val.mean().asscalar()),
                      global_step=epoch)

        if (epoch % check_point_interval == 0) | (epoch == epochs - 1):
            netD.save_params('%snetD-%04d' % (dir_out_checkpoints, epoch))
            netG.save_params('%snetG-%04d' % (dir_out_checkpoints, epoch))

        logging.info('\nbinary training acc at epoch %d: %s=%f' %
                     (epoch, name, acc))
        logging.info('time: %f' % (time.time() - tic))

    sw.export_scalars('scalar_dict.json')
    sw.close()
Example #33
0
    def forward(self, inputs, loss=None, training=True, commtype='average', topo='FC'):
        assert len(inputs) == self.slots + 1

        local_drop_vec = nd.ones_like(inputs[0])
        local_drop_vec = self.local_dropout_op(local_drop_vec)
        for i in range(self.slots):
            inputs[i] = inputs[i] * local_drop_vec
        inputs[-1] = self.global_dropout_op(inputs[-1])

        if topo == 'FC':
            comm_rate = nd.ones(shape=(self.slots + 1, self.slots + 1))
        elif topo == 'FUC':
            comm_rate = nd.zeros(shape=(self.slots + 1, self.slots + 1))
        elif topo == 'Master':
            comm_rate = nd.ones(shape=(self.slots + 1, self.slots + 1))
            for i in range(self.slots):
                for j in range(self.slots):
                    comm_rate[i][j] = 0

        if self.use_comm and self.topo_learning_mode:
            proba = nd.sigmoid(self.topo.data())

            if random.random() < 1e-2:
                print '---------------------------------------------'
                print proba.asnumpy()
                print '---------------------------------------------'

            u_vec = nd.random_uniform(low=1e-5, high=1. - 1e-5, shape=(self.slots + 1, self.slots + 1))
            comm_rate = nd.sigmoid(10. * (
                    nd.log(proba) - nd.log(1. - proba) +
                    nd.log(u_vec) - nd.log(1. - u_vec)
            ))
            if loss is not None:
                loss.append(4e-4 * nd.sum(proba * nd.log(proba) + (1. - proba) * nd.log(1. - proba)))

        results = []
        for i in range(self.slots):
            results.append(self.local_share_trans.forward(inputs[i], training=training))
        results.append(self.global_trans.forward(inputs[-1], training=training))

        if commtype == 'average':
            for i in range(self.slots):
                tmp = nd.zeros_like(results[i])
                norm = nd.zeros_like(comm_rate[0][0])
                for j in range(self.slots):
                    if i != j:
                        tmp = tmp + self.local2local_share_comm.forward(nd.concat(inputs[j], dim=1),
                                                                        training=training) * comm_rate[j][i]
                        norm = norm + comm_rate[j][i]
                # results[i] = results[i] + self.global2local_comm(inputs[-1]) * comm_rate[-1][i]
                tmp = tmp + self.global2local_comm.forward(nd.concat(inputs[-1], dim=1), training=training) * \
                      comm_rate[-1][i]
                norm = norm + comm_rate[-1][i]
                if nd.sum(norm) > 1e-5:
                    results[i] = results[i] + tmp / norm

            tmp = nd.zeros_like(results[-1])
            norm = nd.zeros_like(comm_rate[0][0])
            for j in range(self.slots):
                tmp = tmp + self.local2global_comm.forward(nd.concat(inputs[j], dim=1), training=training) * \
                      comm_rate[j][-1]
                norm = norm + comm_rate[j][-1]
            if nd.sum(norm) > 1e-5:
                results[-1] = results[-1] + tmp / norm

        elif commtype == 'maxpooling':
            for i in range(self.slots):
                tmp = []
                for j in range(self.slots):
                    if j != i:
                        tmp.append(self.local2local_share_comm.forward(inputs[j], training=training))
                tmp.append(self.global2local_comm.forward(inputs[-1], training=training))

                for k in range(len(tmp)):
                    tmp[k] = tmp[k].reshape((tmp[k].shape[0], 1, tmp[k].shape[1]))

                tmp = nd.concat(*tmp, dim=1)
                maxcomm = nd.max(tmp, axis=1)
                results[i] = results[i] + maxcomm

            tmp = []
            for i in range(self.slots):
                tmp.append(self.local2global_comm.forward(inputs[i], training=training))
            for k in range(len(tmp)):
                tmp[k] = tmp[k].reshape((tmp[k].shape[0], 1, tmp[k].shape[1]))

            tmp = nd.concat(*tmp, dim=1)
            maxcomm = nd.max(tmp, axis=1)
            results[-1] = results[-1] + maxcomm

        return results
Example #34
0
    def _bind_ith_exec(self, i, data_shapes, label_shapes, shared_group):
        """Internal utility function to bind the i-th executor.
        """
        shared_exec = None if shared_group is None else shared_group.execs[i]
        context = self.contexts[i]
        shared_data_arrays = self.shared_data_arrays[i]

        input_shapes = dict(data_shapes)
        if label_shapes is not None:
            input_shapes.update(dict(label_shapes))

        arg_shapes, _, aux_shapes = self.symbol.infer_shape(**input_shapes)
        assert arg_shapes is not None, "shape inference failed"

        input_types = {x.name: x.dtype for x in data_shapes}
        if label_shapes is not None:
            input_types.update({x.name: x.dtype for x in label_shapes})
        arg_types, _, aux_types = self.symbol.infer_type(**input_types)
        assert arg_types is not None, "type inference failed"

        arg_arrays = []
        grad_arrays = {} if self.for_training else None

        def _get_or_reshape(name, shared_data_arrays, arg_shape, arg_type, context, logger):
            """Internal helper to get a memory block or re-use by re-shaping"""
            if name in shared_data_arrays:
                arg_arr = shared_data_arrays[name]

                if np.prod(arg_arr.shape) >= np.prod(arg_shape):
                    # nice, we can directly re-use this data blob
                    assert arg_arr.dtype == arg_type
                    arg_arr = arg_arr.reshape(arg_shape)
                else:
                    logger.warning(('bucketing: data "%s" has a shape %s' % (name, arg_shape)) +
                                   (', which is larger than already allocated ') +
                                   ('shape %s' % (arg_arr.shape,)) +
                                   ('. Need to re-allocate. Consider putting ') +
                                   ('default_bucket_key to') +
                                   (' be the bucket taking the largest input for better ') +
                                   ('memory sharing.'))
                    arg_arr = nd.zeros(arg_shape, context, dtype=arg_type)

                    # replace existing shared array because the new one is bigger
                    shared_data_arrays[name] = arg_arr
            else:
                arg_arr = nd.zeros(arg_shape, context, dtype=arg_type)
                shared_data_arrays[name] = arg_arr

            return arg_arr

        # create or borrow arguments and gradients
        for j in range(len(self.arg_names)):
            name = self.arg_names[j]
            if name in self.param_names: # model parameters
                if shared_exec is None:
                    arg_arr = nd.zeros(arg_shapes[j], context, dtype=arg_types[j])
                    if self.grad_req[name] != 'null':
                        grad_arr = nd.zeros(arg_shapes[j], context, dtype=arg_types[j])
                        grad_arrays[name] = grad_arr
                else:
                    arg_arr = shared_exec.arg_dict[name]
                    assert arg_arr.shape == arg_shapes[j]
                    assert arg_arr.dtype == arg_types[j]
                    if self.grad_req[name] != 'null':
                        grad_arrays[name] = shared_exec.grad_dict[name]
            else: # data, label, or states
                arg_arr = _get_or_reshape(name, shared_data_arrays, arg_shapes[j], arg_types[j],
                                          context, self.logger)

                # data might also need grad if inputs_need_grad is True
                if self.grad_req[name] != 'null':
                    grad_arrays[name] = _get_or_reshape('grad of ' + name, shared_data_arrays,
                                                        arg_shapes[j], arg_types[j], context,
                                                        self.logger)

            arg_arrays.append(arg_arr)

        # create or borrow aux variables
        if shared_exec is None:
            aux_arrays = [nd.zeros(s, context, dtype=t) for s, t in zip(aux_shapes, aux_types)]
        else:
            for j, arr in enumerate(shared_exec.aux_arrays):
                assert aux_shapes[j] == arr.shape
                assert aux_types[j] == arr.dtype
            aux_arrays = shared_exec.aux_arrays[:]

        executor = self.symbol.bind(ctx=context, args=arg_arrays,
                                    args_grad=grad_arrays, aux_states=aux_arrays,
                                    grad_req=self.grad_req, shared_exec=shared_exec)
        # Get the total bytes allocated for this executor
        return executor
Example #35
0
def penalty_l2(params):
    penalty = nd.zeros(shape=1)
    for param in params:
        penalty = penalty + nd.sum(param**2)
    return penalty
Example #36
0
    def forward(self, input_vec, loss=None):
        assert input_vec.shape[1] == self.input_dimension

        # get inputs for every slot(including global)
        inputs = {}
        for slot in self.slots:
            inputs[slot] = input_vec[:, self.slot_dimension[slot][0]:self.slot_dimension[slot][1]]
        input_global = []
        for seg in self.global_dimension:
            input_global.append(input_vec[:, seg[0]:seg[1]])
        inputs['global'] = nd.concat(*input_global, dim=1)

        layer = []
        # inputs -> first_hidden_layer
        if (not self.sort_input_vec) and self.state_feature != 'dip':
            layer.append([])
            for slot in self.slots:
                layer[0].append(self.input_trans[slot](inputs[slot]))
            layer[0].append(self.input_trans['global'](inputs['global']))
        elif self.state_feature == 'dip':
            sorted_inputs = []
            for slot in self.slots:
                sorted_inputs.append(inputs[slot])
            sorted_inputs.append(inputs['global'])
            layer.append(self.input_trans(sorted_inputs, loss))
        elif self.sort_input_vec:
            sorted_inputs = []
            for slot in self.slots:
                tmp = inputs[slot][:, :-2].sort(is_ascend=False)
                if tmp.shape[1] < 20:
                    tmp = nd.concat(tmp, nd.zeros((tmp.shape[0], 20 - tmp.shape[1]), ctx=CTX), dim=1)
                else:
                    tmp = nd.slice_axis(tmp, axis=1, begin=0, end=20)
                sorted_inputs.append(nd.concat(tmp, inputs[slot][:, -2:], dim=1))
            sorted_inputs.append(inputs['global'])
            layer.append(self.input_trans(sorted_inputs, loss))

        # hidden_layers
        for i in range(self.hidden_layers - 1):
            if self.recurrent_mode is False:
                # equal to 'layer.append(self.ma_trans[i](layer[-1], loss))'
                layer.append(self.ma_trans[i](layer[i], loss))
            else:
                layer.append(self.ma_trans(layer[i], loss))

        if self.share_last_layer is False:
            # dropout of last hidden layer
            for j in range(len(self.slots)):
                layer[-1][j] = self.local_out_drop_op(layer[-1][j])
            layer[-1][-1] = self.global_out_drop_op(layer[-1][-1])

            # last_hidden_layer -> outputs
            outputs = []
            for i in range(len(self.slots) + 1):
                if self.use_dueling is False:
                    outputs.append(self.output_trans[i](layer[-1][i]))
                else:
                    if i < len(self.slots):
                        tmp_adv = self.output_trans_local_advantage(sorted_inputs[i])
                    else:
                        tmp_adv = self.output_trans_global_advantage(sorted_inputs[-1])
                    if self.dueling_share_last:
                        if i < len(self.slots):
                            cur_value = self.output_trans_local_value(layer[-1][i])
                            if self.shared_last_layer_use_bias:
                                cur_value = cur_value + nd.slice(self.value_bias_local.data(), begin=(i, ), end=(i + 1, ))
                        else:
                            cur_value = self.output_trans_global_value(layer[-1][i])
                    else:
                        cur_value = self.output_trans_value[i](layer[-1][i])
                    outputs.append(
                        cur_value +
                        tmp_adv - tmp_adv.mean(axis=1).reshape(
                            (tmp_adv.shape[0], 1)).broadcast_axes(axis=1, size=tmp_adv.shape[1]))
        else:
            outputs = []
            for i in range(len(self.slots)):
                output_i = self.output_trans_local(layer[-1][i])
                if self.shared_last_layer_use_bias:
                    output_i = output_i + self.output_trans_local_biases[i].data()
                outputs.append(output_i)
            outputs.append(self.output_trans_global(layer[-1][-1]))
        return nd.concat(*outputs, dim=1)
Example #37
0
def GRU(epoch = 100 , batch_size=100, save_period=100 , load_period=100 ,learning_rate= 0.1, ctx=mx.gpu(0)):

    train_data , test_data = FashionMNIST(batch_size)

    #network parameter
    time_step = 28
    num_inputs = 28
    num_hidden = 200
    num_outputs = 10

    path = "weights/FashionMNIST_GRUweights-{}".format(load_period)

    if os.path.exists(path):

        print("loading weights")
        [wxz, wxr, wxh, whz, whr, whh, bz, br, bh, why, by] = nd.load(path)  # weights load
        wxz = wxz.as_in_context(ctx)
        wxr = wxr.as_in_context(ctx)
        whz = whz.as_in_context(ctx)


        whz = whz.as_in_context(ctx)
        whr = whr.as_in_context(ctx)
        whh = whh.as_in_context(ctx)

        bz = bz.as_in_context(ctx)
        br = br.as_in_context(ctx)
        bh = bh.as_in_context(ctx)

        why = why.as_in_context(ctx)
        by = by.as_in_context(ctx)
        params = [wxz , wxr , wxh , whz, whr, whh, bz, br, bh, why , by]

    else:
        print("initializing weights")

        with ctx:
            wxz = nd.random.normal(loc=0, scale=0.01, shape=(num_hidden, num_inputs))
            wxr = nd.random.normal(loc=0, scale=0.01, shape=(num_hidden, num_inputs))
            wxh = nd.random.normal(loc=0, scale=0.01, shape=(num_hidden, num_inputs))

            whz = nd.random.normal(loc=0, scale=0.01, shape=(num_hidden, num_hidden))
            whr = nd.random.normal(loc=0, scale=0.01, shape=(num_hidden, num_hidden))
            whh = nd.random.normal(loc=0, scale=0.01, shape=(num_hidden, num_hidden))

            bz = nd.random.normal(loc=0,scale=0.01,shape=(num_hidden,))
            br = nd.random.normal(loc=0,scale=0.01,shape=(num_hidden,))
            bh = nd.random.normal(loc=0,scale=0.01,shape=(num_hidden,))

            why = nd.random.normal(loc=0,scale=0.1,shape=(num_outputs , num_hidden))
            by = nd.random.normal(loc=0,scale=0.1,shape=(num_outputs,))

        params = [wxz , wxr , wxh , whz, whr, whh, bz, br, bh, why , by]

    # attach gradient!!!
    for param in params:
        param.attach_grad()

    #Fully Neural Network with 1 Hidden layer
    def GRU_Cell(input, state):
        for x in input:
            z_t = nd.Activation(nd.FullyConnected(data=x,weight=wxz,no_bias=True,num_hidden=num_hidden)+
                                nd.FullyConnected(data=state,weight=whz,no_bias=True,num_hidden=num_hidden)+bz,act_type="sigmoid")
            r_t = nd.Activation(nd.FullyConnected(data=x,weight=wxr,no_bias=True,num_hidden=num_hidden)+
                                nd.FullyConnected(data=state,weight=whr,no_bias=True,num_hidden=num_hidden)+br,act_type="sigmoid")
            g_t = nd.Activation(nd.FullyConnected(data=x,weight=wxh,no_bias=True,num_hidden=num_hidden)+
                                nd.FullyConnected(data=r_t*state,weight=whh,no_bias=True,num_hidden=num_hidden)+bh,act_type="tanh")

            state = nd.multiply(z_t,state) + nd.multiply(1-z_t,g_t)

        output = nd.FullyConnected(data=state, weight=why, bias=by, num_hidden=num_outputs)
        output = nd.softmax(data=output)
        return output, state

    def cross_entropy(output, label):
        return - nd.sum(label * nd.log(output), axis=0 , exclude=True)

    #Adam optimizer
    state=[]
    optimizer=mx.optimizer.Adam(rescale_grad=1,learning_rate=learning_rate)

    for param in params:
        state.append(optimizer.create_state(0,param))

    for i in tqdm(range(1,epoch+1,1)):

        for data,label in train_data:

            states = nd.zeros(shape=(data.shape[0], num_hidden), ctx=ctx)
            data = data.as_in_context(ctx)
            data = data.reshape(shape=(-1,time_step,num_inputs))
            data=nd.transpose(data=data,axes=(1,0,2))
            label = label.as_in_context(ctx)
            label = nd.one_hot(label , num_outputs)

            with autograd.record():
                outputs, states = GRU_Cell(data, states)
                loss = cross_entropy(outputs,label) # (batch_size,)
            loss.backward()

            cost = nd.mean(loss).asscalar()
            for j,param in enumerate(params):
                optimizer.update(0,param,param.grad,state[j])

        test_accuracy = evaluate_accuracy(test_data, time_step, num_inputs, num_hidden, GRU_Cell, ctx)
        print(" epoch : {} , last batch cost : {}".format(i,cost))
        print("Test_acc : {0:0.3f}%".format(test_accuracy * 100))

        #weight_save
        if i % save_period==0:
            if not os.path.exists("weights"):
                os.makedirs("weights")
            print("saving weights")
            nd.save("weights/FashionMNIST_GRUweights-{}".format(i),params)

    test_accuracy = evaluate_accuracy(test_data, time_step, num_inputs, num_hidden, GRU_Cell, ctx)
    print("Test_acc : {0:0.3f}%".format(test_accuracy * 100))
    return "optimization completed"
    def interclass_reset(self):
        self.seq2 = []
        self.oseq2 = []
        while len(self.seq2) < self.seq_min_size:
            self.time_reset()
            embeddings = None
            bag_size = self.interclass_bag_size  # 3600
            batch_size2 = self.batch_size2  # 200
            # data = np.zeros( (bag_size,)+self.data_shape )
            # label = np.zeros( (bag_size,) )
            tag = []
            # idx = np.zeros( (bag_size,) )
            #print('eval %d images..' % bag_size, self.interclass_oseq_cur)  # 3600 0 first time
            #print('interclass time stat', self.times)
            if self.interclass_oseq_cur + bag_size > len(self.oseq2):
                self.interclass_oseq_reset()
                print('eval %d images..' % bag_size, self.interclass_oseq_cur)
            self.times[0] += self.time_elapsed()
            self.time_reset()
            # print(data.shape)
            data = nd.zeros(self.provide_data2[0][1])
            label = nd.zeros(self.provide_label2[0][1])
            ba = 0

            all_layers = self.mx_model.symbol.get_internals()
            if self.model_t is None:
                symbol_t = all_layers['blockgrad0_output']
                self.model_t = mx.mod.Module(symbol=symbol_t,
                                             context=self.ctx,
                                             label_names=None)
                self.model_t.bind(data_shapes=self.provide_data2)
                arg_t, aux_t = self.mx_model.get_params()
                self.model_t.set_params(arg_t, aux_t)
            else:
                arg_t, aux_t = self.mx_model.get_params()
                self.model_t.set_params(arg_t, aux_t)

            while True:
                bb = min(ba + batch_size2, bag_size)
                if ba >= bb:
                    break
                # _batch = self.data_iter.next()
                # _data = _batch.data[0].asnumpy()
                # print(_data.shape)
                # _label = _batch.label[0].asnumpy()
                # data[ba:bb,:,:,:] = _data
                # label[ba:bb] = _label
                for i in xrange(ba, bb):
                    _idx = self.oseq2[i + self.interclass_oseq_cur]
                    s = self.imgrec2.read_idx(_idx)
                    header, img = recordio.unpack(s)
                    img = self.imdecode(img)
                    data[i - ba][:] = self.postprocess_data(img)
                    #label[i-ba][:] = header.label
                    #print('header.label', header.label)
                    #print('header.label', header.label.shape)
                    #tag.append((int(header.label), _idx))
                    #print('header.label',header.label)
                    label0 = header.label
                    if not isinstance(label0, numbers.Number):
                        label0 = label0[0]
                    #print('label0', label0)
                    label[i - ba][:] = label0
                    tag.append((int(label0), _idx))
                    # idx[i] = _idx
                #print('tag:' ,tag)
                #print(data,label)

                #db = mx.io.DataBatch(data=(data,), label=(label,))
                #self.mx_model.forward(db, is_train=False)
                #net_out = self.mx_model.get_outputs()

                #print("self.mx_model",self.mx_model)

                db = mx.io.DataBatch(data=(data, ), label=(label, ))
                self.model_t.forward(db, is_train=False)
                net_out = self.model_t.get_outputs()

                #print('eval for selecting interclasses',ba,bb)
                #print(net_out)
                #print(len(net_out))
                #print(net_out[0].asnumpy())
                net_out = net_out[0].asnumpy()
                #print(len(net_out))
                #print('net_out', net_out.shape)
                if embeddings is None:
                    embeddings = np.zeros((bag_size, net_out.shape[1]))
                #print ("net_out.shape: ", net_out.shape)
                #print("ba,bb: ", ba,bb)
                embeddings[ba:bb, :] = net_out
                ba = bb
            assert len(tag) == bag_size
            self.interclass_oseq_cur += bag_size
            #print("embeddings: ",embeddings)
            embeddings = sklearn.preprocessing.normalize(embeddings)
            self.times[1] += self.time_elapsed()
            self.time_reset()
            nrof_images_per_class = [1]
            for i in xrange(1, bag_size):
                if tag[i][0] == tag[i - 1][0]:
                    nrof_images_per_class[-1] += 1
                else:
                    nrof_images_per_class.append(1)

            id_sel = self.pick_interclass(embeddings, nrof_images_per_class,
                                          self.batchsize_id)  # shape=(T,3)
            #print('found interclass', id_sel) #2
            if self.images_per_identity == 1:
                for j in xrange(self.batchsize_id // 3):
                    idsel_0 = tag[id_sel[j] * self.images_per_identity][1]
                    self.seq2.append(idsel_0)
            else:
                for j in xrange(self.batchsize_id // 3):
                    idsel_0 = tag[id_sel[j] * self.images_per_identity][1]
                    self.seq2.append(idsel_0)
                    idsel_0 = tag[id_sel[j] * self.images_per_identity + 1][1]
                    self.seq2.append(idsel_0)
                    idsel_0 = tag[id_sel[j] * self.images_per_identity + 2][1]
                    self.seq2.append(idsel_0)
            self.times[2] += self.time_elapsed()
#encoding:utf-8
import sys
sys.path.append('..')
import utils
batch_size = 256
train_data, test_data = utils.load_data_fashion_mnist(batch_size)
from mxnet import ndarray as nd
num_inputs = 28 * 28
num_outputs = 10

num_hidden1 = 256
num_hidden2 = 256
weight_scale = .01

W1 = nd.random_normal(shape=(num_inputs, num_hidden1), scale=weight_scale)
b1 = nd.zeros(num_hidden1)

W2 = nd.random_normal(shape=(num_hidden1, num_hidden2), scale=weight_scale)
b2 = nd.zeros(num_hidden2)

W3 = nd.random_normal(shape=(num_hidden2, num_outputs), scale=weight_scale)
b3 = nd.zeros(num_outputs)

params = [W1, b1, W2, b2, W3, b3]

for param in params:
    param.attach_grad()


def dropout(X, drop_probability):
    keep_probability = 1 - drop_probability
Example #40
0
    def reset(self):
        """Resets the iterator to the beginning of the data."""
        if self.first_reset == 1:
            print("first reset")
            #all_layers = self.mx_model.symbol.get_internals()
            # print('all_layers: ',all_layers)
            if self.model_t is None:
                vec = self.mx_pretrained.split(',')
                assert len(vec) > 1
                prefix = vec[0]
                epoch = int(vec[1])
                print('loading', prefix, epoch)
                sym, arg_params, aux_params = mx.model.load_checkpoint(
                    prefix, epoch)
                all_layers = sym.get_internals()
                print('all_layers:', all_layers)
                sym = all_layers['blockgrad1_output']
                self.model_t = mx.mod.Module(symbol=sym, context=self.ctx)
                self.model_t.bind(data_shapes=self.provide_data_mining,
                                  label_shapes=self.provide_label_mining)
                self.model_t.set_params(arg_params, aux_params)
            ba = 0
            tag = []
            data = nd.zeros(self.provide_data_mining[0][1])
            label = nd.zeros(self.provide_label_mining[0][1])
            outfilew = os.path.join(self.bin_dir,
                                    "%d_noiselist.txt" % (self.save))
            with open(outfilew, 'w') as fp:
                while True:
                    bb = min(ba + self.batch_size_mining, len(self.oseq))
                    print("start bb,ba", ba, bb)
                    if ba >= bb:
                        break
                    for i in xrange(ba, bb):
                        _idx = self.oseq[i]
                        s = self.imgrec.read_idx(_idx)
                        header, img = recordio.unpack(s)
                        img = self.imdecode(img)
                        data[i - ba][:] = self.postprocess_data(img)

                        label0 = header.label
                        if not isinstance(label0, numbers.Number):
                            label0 = label0[0]
                        # print('label0', label0)
                        label[i - ba][:] = label0
                        tag.append((int(label0), _idx))

                    db = mx.io.DataBatch(data=(data, ), label=(label, ))
                    self.model_t.forward(db, is_train=False)
                    net_out = self.model_t.get_outputs()
                    net_P = mx.nd.softmax(net_out[0], axis=1)
                    net_P = net_P.asnumpy()
                    for ii in range(bb - ba):
                        #print('label:',label[ii])
                        #print('tag:',tag[ii][0])
                        P = net_P[ii]
                        #print(P)
                        #print(max(P))
                        if max(P) < self.threshold:
                            line = '%d %d %s %s\n' % (tag[ii][0], tag[ii][1],
                                                      max(P), P[tag[ii][0]])
                            fp.write(line)
                        else:
                            self.seq.append(tag[ii][1])
                    tag = []
                    ba = bb
            self.save += 1
            print("Initialize done: ", len(self.oseq), len(self.seq),
                  len(self.oseq) - len(self.seq))
            self.first_reset += 1
        else:
            print('call reset()')
            self.cur = 0
            if self.shuffle:
                random.shuffle(self.seq)
            self.first_reset += 1
def data_iter():
    idx = list(range(num_example))
    random.shuffle(idx)
    for i in range(0, num_example, batch_size):
        j = nd.array(idx[i:min(i + batch_size, num_example)])
        yield nd.take(X, j), nd.take(Y, j)


for data, label in data_iter():  #data:预测值;label:真实值
    print(data, label)
    break

# 3.初始化模型
w = nd.random_normal(shape=(num_inputs, 1))
b = nd.zeros(1)
params = [w, b]

for param in params:
    param.attach_grad()


# 4.定义模型
def net(X):
    return nd.dot(X, w) + b


print(net(data))


# 5.损失函数
Example #42
0
def bind(modQ,
         data_shapes,
         label_shapes=None,
         for_training=True,
         inputs_need_grad=False,
         force_rebind=False,
         shared_module=None,
         grad_req='write'):
    if force_rebind:
        modQ._reset_bind()

    if modQ.binded:
        modQ.logger.warning('Already binded, ignoring bind()')
        return

    modQ.for_training = for_training
    modQ.inputs_need_grad = inputs_need_grad
    modQ.binded = True
    modQ._grad_req = grad_req

    if not for_training:
        assert not inputs_need_grad
    else:
        pass
        # this is not True, as some module might not contains a loss function
        # that consumes the labels
        # assert label_shapes is not None

    modQ._data_shapes, modQ._label_shapes = _parse_data_desc(
        modQ.data_names, modQ.label_names, data_shapes, label_shapes)

    if shared_module is not None:
        assert isinstance(shared_module, Module) and \
               shared_module.binded and shared_module.params_initialized
        shared_group = shared_module._exec_group
    else:
        shared_group = None

    modQ._exec_group = DataParallelExecutorGroup(
        modQ._symbol,
        modQ._context,
        modQ._work_load_list,
        modQ._data_shapes,
        modQ._label_shapes,
        modQ._param_names,
        for_training,
        inputs_need_grad,
        shared_group,
        logger=modQ.logger,
        fixed_param_names=modQ._fixed_param_names,
        grad_req=grad_req,
        state_names=modQ._state_names)
    modQ._total_exec_bytes = modQ._exec_group._total_exec_bytes
    if shared_module is not None:
        modQ.params_initialized = True
        modQ._arg_params = shared_module._arg_params
        modQ._aux_params = shared_module._aux_params
    elif modQ.params_initialized:
        # if the parameters are already initialized, we are re-binding
        # so automatically copy the already initialized params
        modQ._exec_group.set_params(modQ._arg_params, modQ._aux_params)
    else:
        assert modQ._arg_params is None and modQ._aux_params is None
        param_arrays = [
            nd.zeros(x[0].shape, dtype=x[0].dtype, ctx=x[0][0].context)
            for x in modQ._exec_group.param_arrays
        ]
        modQ._arg_params = {
            name: arr
            for name, arr in zip(modQ._param_names, param_arrays)
        }

        aux_arrays = [
            nd.zeros(x[0].shape, dtype=x[0].dtype, ctx=x[0][0].context)
            for x in modQ._exec_group.aux_arrays
        ]
        modQ._aux_params = {
            name: arr
            for name, arr in zip(modQ._aux_names, aux_arrays)
        }

    if shared_module is not None and shared_module.optimizer_initialized:
        modQ.borrow_optimizer(shared_module)
Example #43
0
def main():
    parser = argparse.ArgumentParser(description='Script to test the trained network on a game.')
    parser.add_argument('-r', '--rom', required=False, type=str,
                        default=os.path.join('arena', 'games', 'roms', 'breakout.bin'),
                        help='Path of the ROM File.')
    parser.add_argument('-v', '--visualization', required=False, type=int, default=0,
                        help='Visualize the runs.')
    parser.add_argument('--lr', required=False, type=float, default=0.01,
                        help='Learning rate of the AdaGrad optimizer')
    parser.add_argument('--eps', required=False, type=float, default=0.01,
                        help='Eps of the AdaGrad optimizer')
    parser.add_argument('--clip-gradient', required=False, type=float, default=None,
                        help='Clip threshold of the AdaGrad optimizer')
    parser.add_argument('--double-q', required=False, type=bool, default=False,
                        help='Use Double DQN')
    parser.add_argument('--wd', required=False, type=float, default=0.0,
                        help='Weight of the L2 Regularizer')
    parser.add_argument('-c', '--ctx', required=False, type=str, default='gpu',
                        help='Running Context. E.g `-c gpu` or `-c gpu1` or `-c cpu`')
    parser.add_argument('-d', '--dir-path', required=False, type=str, default='',
                        help='Saving directory of model files.')
    parser.add_argument('--start-eps', required=False, type=float, default=1.0,
                        help='Eps of the epsilon-greedy policy at the beginning')
    parser.add_argument('--replay-start-size', required=False, type=int, default=50000,
                        help='The step that the training starts')
    parser.add_argument('--kvstore-update-period', required=False, type=int, default=1,
                        help='The period that the worker updates the parameters from the sever')
    parser.add_argument('--kv-type', required=False, type=str, default=None,
                        help='type of kvstore, default will not use kvstore, could also be dist_async')
    args, unknown = parser.parse_known_args()
    if args.dir_path == '':
        rom_name = os.path.splitext(os.path.basename(args.rom))[0]
        args.dir_path = 'dqn-%s' % rom_name
    ctx = re.findall('([a-z]+)(\d*)', args.ctx)
    ctx = [(device, int(num)) if len(num) >0 else (device, 0) for device, num in ctx]
    replay_start_size = args.replay_start_size
    max_start_nullops = 30
    replay_memory_size = 1000000
    history_length = 4
    rows = 84
    cols = 84
    q_ctx = mx.Context(*ctx[0])

    game = AtariGame(rom_path=args.rom, resize_mode='scale', replay_start_size=replay_start_size,
                     resized_rows=rows, resized_cols=cols, max_null_op=max_start_nullops,
                     replay_memory_size=replay_memory_size, display_screen=args.visualization,
                     history_length=history_length)

    ##RUN NATURE
    freeze_interval = 10000
    epoch_num = 200
    steps_per_epoch = 250000
    update_interval = 4
    discount = 0.99

    eps_start = args.start_eps
    eps_min = 0.1
    eps_decay = (eps_start - 0.1) / 1000000
    eps_curr = eps_start
    freeze_interval /= update_interval
    minibatch_size = 32
    action_num = len(game.action_set)

    data_shapes = {'data': (minibatch_size, history_length) + (rows, cols),
                   'dqn_action': (minibatch_size,), 'dqn_reward': (minibatch_size,)}
    #optimizer = mx.optimizer.create(name='sgd', learning_rate=args.lr,wd=args.wd)
    optimizer = mx.optimizer.Nop()
    dqn_output_op = DQNOutputNpyOp()
    dqn_sym = dqn_sym_nature(action_num, dqn_output_op)
    qnet = Base(data_shapes=data_shapes, sym=dqn_sym, name='QNet',
                  initializer=DQNInitializer(factor_type="in"),
                  ctx=q_ctx)
    target_qnet = qnet.copy(name="TargetQNet", ctx=q_ctx)
    # Create kvstore
    testShape = (1,1686180*100)
    testParam = nd.ones(testShape,ctx=q_ctx)
    testGrad = nd.zeros(testShape,ctx=q_ctx)

    # Create kvstore

    if args.kv_type != None:
        kvType = args.kv_type
        kvStore = kvstore.create(kvType)
        #Initialize kvstore
        for idx,v in enumerate(qnet.params.values()):
            kvStore.init(idx,v);
        # Set optimizer on kvstore
        kvStore.set_optimizer(optimizer)
        kvstore_update_period = args.kvstore_update_period
    else:
        updater = mx.optimizer.get_updater(optimizer)

    # if args.kv_type != None:
    #     kvType = args.kv_type
    #     kvStore = kvstore.create(kvType)
    #     kvStore.init(0,testParam)
    #     testOptimizer = mx.optimizer.Nop()
    #     kvStore.set_optimizer(testOptimizer)
    #     kvstore_update_period = args.kvstore_update_period


    qnet.print_stat()
    target_qnet.print_stat()
    # Begin Playing Game
    training_steps = 0
    total_steps = 0
    while(1):
        time_before_wait = time.time()

        # kvStore.push(0,testGrad,priority=0)
        # kvStore.pull(0,testParam,priority=0)
        # testParam.wait_to_read()

        for paramIndex in range(len(qnet.params)):#range(6):#
            k=qnet.params.keys()[paramIndex]
            kvStore.push(paramIndex,qnet.params_grad[k],priority=-paramIndex)
            kvStore.pull(paramIndex,qnet.params[k],priority=-paramIndex)

        for v in qnet.params.values():
            v.wait_to_read()
        logging.info("wait time %f" %(time.time()-time_before_wait))

    for epoch in xrange(epoch_num):
        # Run Epoch
        steps_left = steps_per_epoch
        episode = 0
        epoch_reward = 0
        start = time.time()
        game.start()
        while steps_left > 0:
            # Running New Episode
            episode += 1
            episode_loss = 0.0
            episode_q_value = 0.0
            episode_update_step = 0
            episode_action_step = 0
            time_episode_start = time.time()
            game.begin_episode(steps_left)
            while not game.episode_terminate:
                # 1. We need to choose a new action based on the current game status
                if game.state_enabled and game.replay_memory.sample_enabled:
                    do_exploration = (npy_rng.rand() < eps_curr)
                    eps_curr = max(eps_curr - eps_decay, eps_min)
                    if do_exploration:
                        action = npy_rng.randint(action_num)
                    else:
                        # TODO Here we can in fact play multiple gaming instances simultaneously and make actions for each
                        # We can simply stack the current_state() of gaming instances and give prediction for all of them
                        # We need to wait after calling calc_score(.), which makes the program slow
                        # TODO Profiling the speed of this part!
                        current_state = game.current_state()
                        state = nd.array(current_state.reshape((1,) + current_state.shape),
                                         ctx=q_ctx) / float(255.0)
                        qval_npy = qnet.forward(batch_size=1, data=state)[0].asnumpy()
                        action = numpy.argmax(qval_npy)
                        episode_q_value += qval_npy[0, action]
                        episode_action_step += 1
                else:
                    action = npy_rng.randint(action_num)

                # 2. Play the game for a single mega-step (Inside the game, the action may be repeated for several times)
                game.play(action)
                total_steps += 1

                # 3. Update our Q network if we can start sampling from the replay memory
                #    Also, we update every `update_interval`
                if total_steps % update_interval == 0 and game.replay_memory.sample_enabled:
                    # 3.1 Draw sample from the replay_memory
                    training_steps += 1
                    episode_update_step += 1
                    states, actions, rewards, next_states, terminate_flags \
                        = game.replay_memory.sample(batch_size=minibatch_size)
                    states = nd.array(states, ctx=q_ctx) / float(255.0)
                    next_states = nd.array(next_states, ctx=q_ctx) / float(255.0)
                    actions = nd.array(actions, ctx=q_ctx)
                    rewards = nd.array(rewards, ctx=q_ctx)
                    terminate_flags = nd.array(terminate_flags, ctx=q_ctx)

                    # 3.2 Use the target network to compute the scores and
                    #     get the corresponding target rewards
                    if not args.double_q:
                        target_qval = target_qnet.forward(batch_size=minibatch_size,
                                                         data=next_states)[0]
                        target_rewards = rewards + nd.choose_element_0index(target_qval,
                                                                nd.argmax_channel(target_qval))\
                                           * (1.0 - terminate_flags) * discount
                    else:
                        target_qval = target_qnet.forward(batch_size=minibatch_size,
                                                         data=next_states)[0]
                        qval = qnet.forward(batch_size=minibatch_size, data=next_states)[0]

                        target_rewards = rewards + nd.choose_element_0index(target_qval,
                                                                nd.argmax_channel(qval))\
                                           * (1.0 - terminate_flags) * discount
                    outputs = qnet.forward(batch_size=minibatch_size,is_train=True, data=states,
                                              dqn_action=actions,
                                              dqn_reward=target_rewards)
                    qnet.backward(batch_size=minibatch_size)
                    nd.waitall()
                    time_before_update = time.time()

                    if args.kv_type != None:
                        if total_steps % kvstore_update_period == 0:
                            update_to_kvstore(kvStore,qnet.params,qnet.params_grad)
                    else:
                        qnet.update(updater=updater)
                    logging.info("update time %f" %(time.time()-time_before_update))
                    time_before_wait = time.time()
                    nd.waitall()
                    logging.info("wait time %f" %(time.time()-time_before_wait))

                    '''nd.waitall()
                    time_before_wait = time.time()
                    kvStore.push(0,testGrad,priority=0)
                    kvStore.pull(0,testParam,priority=0)
                    nd.waitall()
                    logging.info("wait time %f" %(time.time()-time_before_wait))'''
                    # 3.3 Calculate Loss
                    diff = nd.abs(nd.choose_element_0index(outputs[0], actions) - target_rewards)
                    quadratic_part = nd.clip(diff, -1, 1)
                    loss = (0.5 * nd.sum(nd.square(quadratic_part)) + nd.sum(diff - quadratic_part)).asscalar()
                    episode_loss += loss

                    # 3.3 Update the target network every freeze_interval
                    # (We can do annealing instead of hard copy)
                    if training_steps % freeze_interval == 0:
                        qnet.copy_params_to(target_qnet)
            steps_left -= game.episode_step
            time_episode_end = time.time()
            # Update the statistics
            epoch_reward += game.episode_reward
            info_str = "Epoch:%d, Episode:%d, Steps Left:%d/%d, Reward:%f, fps:%f, Exploration:%f" \
                        % (epoch, episode, steps_left, steps_per_epoch, game.episode_reward,
                           game.episode_step / (time_episode_end - time_episode_start), eps_curr)
            if episode_update_step > 0:
                info_str += ", Avg Loss:%f/%d" % (episode_loss / episode_update_step,
                                                  episode_update_step)
            if episode_action_step > 0:
                info_str += ", Avg Q Value:%f/%d" % (episode_q_value / episode_action_step,
                                                  episode_action_step)
            logging.info(info_str)
        end = time.time()
        fps = steps_per_epoch / (end - start)
        qnet.save_params(dir_path=args.dir_path, epoch=epoch)
        logging.info("Epoch:%d, FPS:%f, Avg Reward: %f/%d"
                     % (epoch, fps, epoch_reward / float(episode), episode))
    def bind(self,
             data_shapes,
             label_shapes=None,
             for_training=True,
             inputs_need_grad=False,
             force_rebind=False,
             shared_module=None,
             grad_req='write'):
        """Binds the symbols to construct executors. This is necessary before one
        can perform computation with the module.

        Parameters
        ----------
        data_shapes : list of (str, tuple)
            Typically is ``data_iter.provide_data``.
        label_shapes : list of (str, tuple)
            Typically is ``data_iter.provide_label``.
        for_training : bool
            Default is ``True``. Whether the executors should be bound for training.
        inputs_need_grad : bool
            Default is ``False``. Whether the gradients to the input data need to be computed.
            Typically this is not needed. But this might be needed when implementing composition
            of modules.
        force_rebind : bool
            Default is ``False``. This function does nothing if the executors are already
            bound. But with this ``True``, the executors will be forced to rebind.
        shared_module : Module
            Default is ``None``. This is used in bucketing. When not ``None``, the shared module
            essentially corresponds to a different bucket -- a module with different symbol
            but with the same sets of parameters (e.g. unrolled RNNs with different lengths).
        """
        # force rebinding is typically used when one want to switch from
        # training to prediction phase.
        if force_rebind:
            self._reset_bind()

        if self.binded:
            self.logger.warning('Already bound, ignoring bind()')
            return

        self.for_training = for_training
        self.inputs_need_grad = inputs_need_grad
        self.binded = True
        self._grad_req = grad_req

        if not for_training:
            assert not inputs_need_grad
        else:
            pass
            # this is not True, as some module might not contains a loss function
            # that consumes the labels
            # assert label_shapes is not None

        self._data_shapes, self._label_shapes = _parse_data_desc(
            self.data_names, self.label_names, data_shapes, label_shapes)

        if shared_module is not None:
            assert isinstance(shared_module, Module) and \
                    shared_module.binded and shared_module.params_initialized
            shared_group = shared_module._exec_group
            assert len(shared_group.execs) >= len(self._context)
        else:
            shared_group = None

        self._exec_group = DataParallelExecutorGroup(
            self._symbol,
            self._context,
            self._work_load_list,
            self._data_shapes,
            self._label_shapes,
            self._param_names,
            for_training,
            inputs_need_grad,
            shared_group,
            logger=self.logger,
            fixed_param_names=self._fixed_param_names,
            grad_req=grad_req,
            group2ctxs=self._group2ctxs,
            state_names=self._state_names)
        self._total_exec_bytes = self._exec_group._total_exec_bytes
        if shared_module is not None:
            self.params_initialized = True
            self._arg_params = shared_module._arg_params
            self._aux_params = shared_module._aux_params
        elif self.params_initialized:
            # if the parameters are already initialized, we are re-binding
            # so automatically copy the already initialized params
            self._exec_group.set_params(self._arg_params, self._aux_params)
        else:
            assert self._arg_params is None and self._aux_params is None
            param_arrays = [
                zeros(shape=x[0].shape, dtype=x[0].dtype, stype=x[0].stype)
                for x in self._exec_group.param_arrays
            ]
            self._arg_params = {
                name: arr
                for name, arr in zip(self._param_names, param_arrays)
            }

            aux_arrays = [
                zeros(x[0].shape, dtype=x[0].dtype)
                for x in self._exec_group.aux_arrays
            ]
            self._aux_params = {
                name: arr
                for name, arr in zip(self._aux_names, aux_arrays)
            }

        if shared_module is not None and shared_module.optimizer_initialized:
            self.borrow_optimizer(shared_module)
Example #45
0
 def get_vector(self, i, cnt):
     i = int(i)
     vec = nd.zeros((1, cnt))
     vec[0][i] = 1
     return vec
Example #46
0
import time
import os, sys

import mxnet as mx
from mxnet import ndarray as nd
from mxnet import autograd
from mxnet import gluon
import random
from data_utils import *

mx.random.seed(1)
random.seed(1)

try:
    ctx = mx.gpu()
    _ = nd.zeros((1, ), ctx=ctx)
except:
    ctx = mx.cpu()
print('CPU or GPU? : ', ctx)


# zero mean and unit variance as it makes traning process easier
def Normolise(data):
    data_array = np.array(data)
    data_array_shape = data_array.shape[0]
    return pd.DataFrame(
        (data_array -
         np.mean(data_array, axis=1).reshape(data_array_shape, -1)) /
        np.std(data_array, axis=1).reshape(data_array_shape, -1),
        index=data.index)
Example #47
0
def exchange_rate_model(epoch=1000,
                        time_step=28,
                        day=7,
                        normalization_factor=100,
                        save_period=1000,
                        load_period=1000,
                        learning_rate=0.001,
                        ctx=mx.gpu(0)):
    ''' 28 time x 1 day '''
    #network parameter
    normalization_factor = normalization_factor
    time_step = time_step  # 28  step
    day = day  # 1 day
    num_hidden = 300

    training, test = JPY_to_KRW(time_step, day, normalization_factor)

    path = "weights/GRUCell_weights-{}.params".format(load_period)
    model = GRUCell(num_hidden, day)
    model.hybridize()

    # weight initialization
    if os.path.exists(path):
        print("loading weights")
        model.load_params(filename=path, ctx=ctx)  # weights load
    else:
        print("initializing weights")
        model.collect_params().initialize(mx.init.Normal(sigma=0.01),
                                          ctx=ctx)  # weights initialization

    trainer = gluon.Trainer(model.collect_params(), "rmsprop",
                            {"learning_rate": learning_rate})

    for i in tqdm(range(1, epoch + 1, 1)):
        for data, label in training:
            states = [nd.zeros(shape=(1, num_hidden), ctx=ctx)]
            data = data.as_in_context(ctx)
            label = label.as_in_context(ctx)
            data = data.reshape(shape=(-1, time_step, day))
            data = nd.transpose(data=data, axes=(1, 0, 2))

            loss = 0
            with autograd.record():
                for j in range(time_step):
                    outputs, states = model(data[j], states)
                    loss = loss + gluon.loss.L2Loss()(
                        outputs, label[j].reshape(shape=outputs.shape))
            loss.backward()
            trainer.step(batch_size=1)
        cost = nd.mean(loss).asscalar()
        print(" epoch : {} , last batch cost : {}".format(i, cost))

        #weight_save
        if i % save_period == 0:

            if not os.path.exists("weights"):
                os.makedirs("weights")

            print("saving weights")
            model.save_params("weights/GRUCell_weights-{}.params".format(i))

    prediction(test, time_step, day, normalization_factor, num_hidden, model,
               ctx)
Example #48
0
    def generate_learned_samples(self):
        '''
        Draw and generate data.

        Returns:
            `Tuple` data. The shape is ...
            - `mxnet.ndarray` of observed data points in training.
            - `mxnet.ndarray` of supervised data in training.
            - `mxnet.ndarray` of observed data points in test.
            - `mxnet.ndarray` of supervised data in test.
        '''
        for _ in range(self.iter_n):
            training_batch_arr, test_batch_arr = None, None
            training_label_arr, test_label_arr = None, None
            for batch_size in range(self.batch_size):
                dir_key = np.random.randint(low=0, high=len(self.__training_file_path_list))

                training_one_hot_arr = nd.zeros((1, len(self.__training_file_path_list)), ctx=self.__ctx)
                training_one_hot_arr[0, dir_key] = 1

                training_file_path_list = self.__split_at_intervals(
                    self.__training_file_path_list[dir_key], 
                    start_pos=0, 
                    seq_interval=self.__at_intervals
                )

                training_data_arr, test_data_arr = None, None
                training_file_key = np.random.randint(
                    low=0,
                    high=len(training_file_path_list) - self.__seq_len
                )

                test_dir_key = np.random.randint(low=0, high=len(self.__test_file_path_list))
                test_one_hot_arr = nd.zeros((1, len(self.__test_file_path_list)), ctx=self.__ctx)
                test_one_hot_arr[0, test_dir_key] = 1

                test_file_path_list = self.__split_at_intervals(
                    self.__test_file_path_list[test_dir_key], 
                    start_pos=0, 
                    seq_interval=self.__at_intervals
                )

                test_file_key = np.random.randint(
                    low=0,
                    high=len(test_file_path_list) - self.__seq_len
                )
                for seq in range(self.__seq_len):
                    seq_training_batch_arr = self.__image_extractor.extract(
                        path=training_file_path_list[training_file_key+seq],
                    )
                    seq_training_batch_arr = self.pre_normalize(seq_training_batch_arr)
                    seq_training_batch_arr = nd.expand_dims(seq_training_batch_arr, axis=0)
                    seq_test_batch_arr = self.__image_extractor.extract(
                        path=test_file_path_list[test_file_key+seq],
                    )
                    seq_test_batch_arr = self.pre_normalize(seq_test_batch_arr)
                    seq_test_batch_arr = nd.expand_dims(seq_test_batch_arr, axis=0)

                    if training_data_arr is not None:
                        training_data_arr = nd.concat(training_data_arr, seq_training_batch_arr, dim=0)
                    else:
                        training_data_arr = seq_training_batch_arr
                    
                    if test_data_arr is not None:
                        test_data_arr = nd.concat(test_data_arr, seq_test_batch_arr, dim=0)
                    else:
                        test_data_arr = seq_test_batch_arr

                training_data_arr = nd.expand_dims(training_data_arr, axis=0)
                test_data_arr = nd.expand_dims(test_data_arr, axis=0)

                if training_batch_arr is not None:
                    training_batch_arr = nd.concat(training_batch_arr, training_data_arr, dim=0)
                else:
                    training_batch_arr = training_data_arr

                if test_batch_arr is not None:
                    test_batch_arr = nd.concat(test_batch_arr, test_data_arr, dim=0)
                else:
                    test_batch_arr = test_data_arr

                if training_label_arr is not None:
                    training_label_arr = nd.concat(training_label_arr, training_one_hot_arr, dim=0)
                else:
                    training_label_arr = training_one_hot_arr

                if test_label_arr is not None:
                    test_label_arr = nd.concat(test_label_arr, test_one_hot_arr, dim=0)
                else:
                    test_label_arr = test_one_hot_arr

            if self.__noiseable_data is not None:
                training_batch_arr = self.__noiseable_data.noise(training_batch_arr)

            yield training_batch_arr, training_label_arr, test_batch_arr, test_label_arr
Example #49
0
# trainer for the generator and the discriminator
trainerG = gluon.Trainer(netG.collect_params(), 'adam', {
    'learning_rate': lr,
    'beta1': beta1
})
trainerD = gluon.Trainer(netD.collect_params(), 'adam', {
    'learning_rate': lr,
    'beta1': beta1
})

from datetime import datetime
import time
import logging

real_label = nd.ones((batch_size, ), ctx=ctx)
fake_label = nd.zeros((batch_size, ), ctx=ctx)


def facc(label, pred):
    pred = pred.ravel()
    label = label.ravel()
    return ((pred > 0.5) == label).mean()


metric = mx.metric.CustomMetric(facc)

stamp = datetime.now().strftime('%Y_%m_%d-%H_%M')
logging.basicConfig(level=logging.DEBUG)

for epoch in range(epochs):
    tic = time.time()
    def select_triplets(self):
      self.seq = []
      while len(self.seq)<self.seq_min_size:
        self.time_reset()
        embeddings = None
        bag_size = self.triplet_bag_size
        batch_size = self.batch_size
        #data = np.zeros( (bag_size,)+self.data_shape )
        #label = np.zeros( (bag_size,) )
        tag = []
        #idx = np.zeros( (bag_size,) )
        print('eval %d images..'%bag_size, self.triplet_cur)
        print('triplet time stat', self.times)
        if self.triplet_cur+bag_size>len(self.triplet_seq):
          self.triplet_reset()
          #bag_size = min(bag_size, len(self.triplet_seq))
          print('eval %d images..'%bag_size, self.triplet_cur)
        self.times[0] += self.time_elapsed()
        self.time_reset()
        #print(data.shape)
        data = nd.zeros( self.provide_data[0][1] )
        label = None
        if self.provide_label is not None:
          label = nd.zeros( self.provide_label[0][1] )
        ba = 0
        while True:
          bb = min(ba+batch_size, bag_size)
          if ba>=bb:
            break
          _count = bb-ba
          #data = nd.zeros( (_count,)+self.data_shape )
          #_batch = self.data_iter.next()
          #_data = _batch.data[0].asnumpy()
          #print(_data.shape)
          #_label = _batch.label[0].asnumpy()
          #data[ba:bb,:,:,:] = _data
          #label[ba:bb] = _label
          for i in range(ba, bb):
            #print(ba, bb, self.triplet_cur, i, len(self.triplet_seq))
            _idx = self.triplet_seq[i+self.triplet_cur]
            s = self.imgrec.read_idx(_idx)
            header, img = recordio.unpack(s)
            img = self.imdecode(img)
            data[i-ba][:] = self.postprocess_data(img)
            _label = header.label
            if not isinstance(_label, numbers.Number):
              _label = _label[0]
            if label is not None:
              label[i-ba][:] = _label
            tag.append( ( int(_label), _idx) )
            #idx[i] = _idx

          db = mx.io.DataBatch(data=(data,))
          self.mx_model.forward(db, is_train=False)
          net_out = self.mx_model.get_outputs()
          #print('eval for selecting triplets',ba,bb)
          #print(net_out)
          #print(len(net_out))
          #print(net_out[0].asnumpy())
          net_out = net_out[0].asnumpy()
          #print(net_out)
          #print('net_out', net_out.shape)
          if embeddings is None:
            embeddings = np.zeros( (bag_size, net_out.shape[1]))
          embeddings[ba:bb,:] = net_out
          ba = bb
        assert len(tag)==bag_size
        self.triplet_cur+=bag_size
        embeddings = sklearn.preprocessing.normalize(embeddings)
        self.times[1] += self.time_elapsed()
        self.time_reset()
        nrof_images_per_class = [1]
        for i in range(1, bag_size):
          if tag[i][0]==tag[i-1][0]:
            nrof_images_per_class[-1]+=1
          else:
            nrof_images_per_class.append(1)
          
        triplets = self.pick_triplets(embeddings, nrof_images_per_class) # shape=(T,3)
        print('found triplets', len(triplets))
        ba = 0
        while True:
          bb = ba+self.per_batch_size//3
          if bb>len(triplets):
            break
          _triplets = triplets[ba:bb]
          for i in range(3):
            for triplet in _triplets:
              _pos = triplet[i]
              _idx = tag[_pos][1]
              self.seq.append(_idx)
          ba = bb
        self.times[2] += self.time_elapsed()
Example #51
0
def zeros(shape, dtype, ctx):
    return nd.zeros(shape, dtype=dtype, ctx=ctx)
 def hard_mining_reset(self):
   #import faiss
   from annoy import AnnoyIndex
   data = nd.zeros( self.provide_data[0][1] )
   label = nd.zeros( self.provide_label[0][1] )
   #label = np.zeros( self.provide_label[0][1] )
   X = None
   ba = 0
   batch_num = 0
   while ba<len(self.oseq):
     batch_num+=1
     if batch_num%10==0:
       print('loading batch',batch_num, ba)
     bb = min(ba+self.batch_size, len(self.oseq))
     _count = bb-ba
     for i in range(_count):
       idx = self.oseq[i+ba]
       s = self.imgrec.read_idx(idx)
       header, img = recordio.unpack(s)
       img = self.imdecode(img)
       data[i][:] = self.postprocess_data(img)
       label[i][:] = header.label
     db = mx.io.DataBatch(data=(data,self.data_extra), label=(label,))
     self.mx_model.forward(db, is_train=False)
     net_out = self.mx_model.get_outputs()
     embedding = net_out[0].asnumpy()
     nembedding = sklearn.preprocessing.normalize(embedding)
     if _count<self.batch_size:
       nembedding = nembedding[0:_count,:]
     if X is None:
       X = np.zeros( (len(self.id2range), nembedding.shape[1]), dtype=np.float32 )
     nplabel = label.asnumpy()
     for i in range(_count):
       ilabel = int(nplabel[i])
       #print(ilabel, ilabel.__class__)
       X[ilabel] += nembedding[i]
     ba = bb
   X = sklearn.preprocessing.normalize(X)
   d = X.shape[1]
   t = AnnoyIndex(d, metric='euclidean')
   for i in range(X.shape[0]):
     t.add_item(i, X[i])
   print('start to build index')
   t.build(20)
   print(X.shape)
   k = self.per_identities
   self.seq = []
   for i in range(X.shape[0]):
     nnlist = t.get_nns_by_item(i, k)
     assert nnlist[0]==i
     for _label in nnlist:
       assert _label<len(self.id2range)
       _id = self.header0[0]+_label
       v = self.id2range[_id]
       _list = range(*v)
       if len(_list)<self.images_per_identity:
         random.shuffle(_list)
       else:
         _list = np.random.choice(_list, self.images_per_identity, replace=False)
       for i in range(self.images_per_identity):
         _idx = _list[i%len(_list)]
         self.seq.append(_idx)
    def hybrid_forward(self, F, X):
        # (batch_size, num_channel_prev, h, w, dim_vector)
        # -->(batch_size,num_capsule_prev,1,1,dim_vector)
        X = X.reshape((0, -1, 1, 1, 0))

        self.num_capsules_prev = X.shape[1]
        self.batch_size = X.shape[0]
        # (batch_size,num_capsule_prev,out_channels,1,dim_vector)
        X_tile = nd.tile(X, reps=(1, 1, self.out_channels, 1, 1))

        if self.routing_weight_initial:
            self.routing_weight = nd.random_normal(
                shape=(1, self.num_capsules_prev, self.out_channels,
                       self.dim_input_vector, self.dim_vector),
                name='routing_weight').as_in_context(mx.gpu(0))
            self.routing_weight_initial = False
        # (batch_size,num_capsule_prev,out_channels,dim_input_vector,dim_vector)
        # (64, 1152, 10, 8, 16)
        W_tile = nd.tile(self.routing_weight,
                         reps=(self.batch_size, 1, 1, 1, 1))
        linear_combination_3d = nd.batch_dot(
            X_tile.reshape((-1, X_tile.shape[-2], X_tile.shape[-1])),
            W_tile.reshape((-1, W_tile.shape[-2], W_tile.shape[-1])))
        # (64, 1152, 10, 1, 16)
        linear_combination = linear_combination_3d.reshape(
            (self.batch_size, self.num_capsules_prev, self.out_channels, 1,
             self.dim_vector))

        # b_ij (1, 1152, 10, 1, 1)
        priors = nd.zeros((1, self.num_capsules_prev, self.out_channels, 1, 1))

        ############################################################################
        ##                                Rounting                                ##
        ############################################################################
        for iter_index in range(self.num_routing_iter):
            # NOTE: RoutingAlgorithm-line 4
            # b_ij (1, 1152, 10, 1, 1)
            softmax_prior = nd.softmax(priors,
                                       axis=2)  # on num_capsule dimension
            # NOTE: RoutingAlgorithm-line 5
            # (64, 1152, 10, 1, 16)
            # output = torch.mul(softmax_prior, linear_combination)
            output = softmax_prior * linear_combination

            # (64, 1, 10, 1, 16)
            output_sum = output.sum(axis=1, keepdims=True)  # s_J

            # NOTE: RoutingAlgorithm-line 6
            # (64, 1, 10, 1, 16)
            output_squashed = self.squash(output_sum)  # v_J

            # NOTE: RoutingAlgorithm-line 7
            # (64, 1152, 10, 1, 16)
            output_tile = nd.tile(output_squashed,
                                  reps=(1, self.num_capsules_prev, 1, 1, 1))
            # (64, 1152, 10, 1, 16) x (64, 1152, 10, 1, 16) (transpose on last two axis)
            # ==> (64, 1152, 10, 1, 1)
            U_times_v = nd.batch_dot(linear_combination.reshape(
                (-1, 1, self.dim_vector)),
                                     output_tile.reshape(
                                         (-1, 1, self.dim_vector)),
                                     transpose_b=True)
            U_times_v = U_times_v.reshape(
                (self.batch_size, self.num_capsules_prev, self.out_channels, 1,
                 1))

            priors = priors + U_times_v.sum(axis=0).expand_dims(axis=0)

        return output_squashed  # v_J
Example #54
0
def train(pool_size, epochs, train_data, val_data,  ctx, netEn, netDe,  netD, netD2, trainerEn, trainerDe, trainerD, trainerD2, lambda1, batch_size, expname,  append=True, useAE = False):
    tp_file = open(expname + "_trainloss.txt", "w")  
    tp_file.close()  
    text_file = open(expname + "_validtest.txt", "w")
    text_file.close()
    #netGT, netDT, _, _ = set_test_network(opt.depth, ctx, opt.lr, opt.beta1,opt.ndf,  opt.ngf, opt.append)
    GAN_loss = gluon.loss.SigmoidBinaryCrossEntropyLoss()
    L1_loss = gluon.loss.L2Loss()
    image_pool = imagePool.ImagePool(pool_size)
    metric = mx.metric.CustomMetric(facc)
    metric2 = mx.metric.CustomMetric(facc)
    metricMSE = mx.metric.MSE()
    loss_rec_G = []
    loss_rec_D = []
    loss_rec_R = []
    acc_rec = []
    acc2_rec = []
    loss_rec_D2 = []
    loss_rec_G2 = []
    lr = 0.002
    #mu = nd.random_normal(loc=0, scale=1, shape=(batch_size/2,64,1,1), ctx=ctx) 
    mu = nd.random.uniform(low= -1, high=1, shape=(batch_size/2,64,1,1),ctx=ctx)
    #mu =  nd.zeros((batch_size/2,64,1,1),ctx=ctx)
    sigma = nd.ones((64,1,1),ctx=ctx)
    mu.attach_grad()
    sigma.attach_grad()    
    stamp = datetime.now().strftime('%Y_%m_%d-%H_%M')
    logging.basicConfig(level=logging.DEBUG)
    for epoch in range(epochs):

        tic = time.time()
        btic = time.time()
        train_data.reset()
        iter = 0
        #print('learning rate : '+str(trainerD.learning_rate ))
        for batch in train_data:
            ############################
            # (1) Update D network: maximize log(D(x, y)) + log(1 - D(x, G(x, z)))
            ###########################
            real_in = batch.data[0].as_in_context(ctx)
            real_out = batch.data[1].as_in_context(ctx)
            fake_latent= netEn(real_in)
            #real_latent = nd.random_normal(loc=0, scale=1, shape=fake_latent.shape, ctx=ctx)
            real_latent = nd.multiply(nd.power(sigma,2),nd.random_normal(loc=0, scale=1, shape=fake_latent.shape, ctx=ctx))
	    #nd.random.uniform( low=-1, high=1, shape=fake_latent.shape, ctx=ctx)
	    fake_out = netDe(fake_latent)
            fake_concat =  nd.concat(real_in, fake_out, dim=1) if append else  fake_out
            with autograd.record():
                # Train with fake image
                # Use image pooling to utilize history imagesi
                output = netD(fake_concat)
                output2 = netD2(fake_latent)
                fake_label = nd.zeros(output.shape, ctx=ctx)
                fake_latent_label = nd.zeros(output2.shape, ctx=ctx)
		noiseshape = (fake_latent.shape[0]/2,fake_latent.shape[1],fake_latent.shape[2],fake_latent.shape[3])
                eps2 = nd.multiply(nd.power(sigma,2),nd.random_normal(loc=0, scale=1, shape=fake_latent.shape, ctx=ctx))
		#eps2 = nd.random_normal(loc=0, scale=sigma.asscalar(), shape=fake_latent.shape, ctx=ctx) #
		#eps = nd.random.uniform( low=-1, high=1, shape=noiseshape, ctx=ctx)
		rec_output = netD(netDe(eps2))
                errD_fake = GAN_loss(rec_output, fake_label)
                errD_fake2 = GAN_loss(output, fake_label)
                errD2_fake = GAN_loss(output2, fake_latent_label)
                metric.update([fake_label, ], [output, ])
                metric2.update([fake_latent_label, ], [output2, ])
                real_concat =  nd.concat(real_in, real_out, dim=1) if append else  real_out
                output = netD(real_concat)
                output2 = netD2(real_latent)
                real_label = nd.ones(output.shape, ctx=ctx)
                real_latent_label =  nd.ones(output2.shape, ctx=ctx)
                errD_real = GAN_loss(output, real_label)
                errD2_real =  GAN_loss(output2, real_latent_label)
                #errD = (errD_real + 0.5*(errD_fake+errD_fake2)) * 0.5
                errD = (errD_real + errD_fake) * 0.5
                errD2 = (errD2_real + errD2_fake) * 0.5
		totalerrD = errD+errD2
                totalerrD.backward()
                #errD2.backward()
                metric.update([real_label, ], [output, ])
            	metric2.update([real_latent_label, ], [output2, ])
            trainerD.step(batch.data[0].shape[0])
            trainerD2.step(batch.data[0].shape[0])
            ############################
            # (2) Update G network: maximize log(D(x, G(x, z))) - lambda1 * L1(y, G(x, z))
            ###########################
            with autograd.record():
		sh = fake_latent.shape
		eps2 = nd.multiply(nd.power(sigma,2),nd.random_normal(loc=0, scale=1, shape=fake_latent.shape, ctx=ctx))
                #eps2 = nd.random_normal(loc=0, scale=sigma.asscalar(), shape=fake_latent.shape, ctx=ctx) #
		#eps = nd.random.uniform( low=-1, high=1, shape=noiseshape, ctx=ctx)
		rec_output = netD(netDe(eps2))
                fake_latent= (netEn(real_in))
                output2 = netD2(fake_latent)
                fake_out = netDe(fake_latent)
                fake_concat =  nd.concat(real_in, fake_out, dim=1) if append else  fake_out
                output = netD(fake_concat)
                real_label = nd.ones(output.shape, ctx=ctx)
                real_latent_label = nd.ones(output2.shape, ctx=ctx)
                errG2 = GAN_loss(rec_output, real_label)
                errR = L1_loss(real_out, fake_out) * lambda1
		errG = 10.0*GAN_loss(output2, real_latent_label)+errG2+errR+nd.mean(nd.power(sigma,2))
		errG.backward()
	    if epoch>50:
	    	sigma -= lr / sigma.shape[0] * sigma.grad
	    	print(sigma)
            trainerDe.step(batch.data[0].shape[0])
            trainerEn.step(batch.data[0].shape[0])
            loss_rec_G2.append(nd.mean(errG2).asscalar())
            loss_rec_G.append(nd.mean(nd.mean(errG)).asscalar()-nd.mean(errG2).asscalar()-nd.mean(errR).asscalar())
            loss_rec_D.append(nd.mean(errD).asscalar())
            loss_rec_R.append(nd.mean(errR).asscalar())
            loss_rec_D2.append(nd.mean(errD2).asscalar())
            _, acc2 = metric2.get()
            name, acc = metric.get()
            acc_rec.append(acc)
            acc2_rec.append(acc2)

            # Print log infomation every ten batches
            if iter % 10 == 0:
                _, acc2 = metric2.get()
                name, acc = metric.get()
                logging.info('speed: {} samples/s'.format(batch_size / (time.time() - btic)))
                #print(errD)
		logging.info('discriminator loss = %f, D2 loss = %f, generator loss = %f, G2 loss = %f,  binary training acc = %f , D2 acc = %f, reconstruction error= %f  at iter %d epoch %d'
                    	% (nd.mean(errD).asscalar(),nd.mean(errD2).asscalar(),
                      	nd.mean(errG-errG2-errR).asscalar(),nd.mean(errG2).asscalar(), acc,acc2,nd.mean(errR).asscalar() ,iter, epoch))
                iter = iter + 1
        btic = time.time()
        name, acc = metric.get()
        _, acc2 = metric2.get()
        tp_file = open(expname + "_trainloss.txt", "a")
        tp_file.write(str(nd.mean(errG2).asscalar()) + " " + str(
            nd.mean(nd.mean(errG)).asscalar() - nd.mean(errG2).asscalar() - nd.mean(errR).asscalar()) + " " + str(
            nd.mean(errD).asscalar()) + " " + str(nd.mean(errD2).asscalar()) + " " + str(nd.mean(errR).asscalar()) +" "+str(acc) + " " + str(acc2)+"\n")
        tp_file.close()
        metric.reset()
        metric2.reset()
        train_data.reset()

        logging.info('\nbinary training acc at epoch %d: %s=%f' % (epoch, name, acc))
        logging.info('time: %f' % (time.time() - tic))
        if epoch%10 ==0:# and epoch>0:
            text_file = open(expname + "_validtest.txt", "a")
            filename = "checkpoints/"+expname+"_"+str(epoch)+"_D.params"
            netD.save_params(filename)
            filename = "checkpoints/"+expname+"_"+str(epoch)+"_D2.params"
            netD2.save_params(filename)
            filename = "checkpoints/"+expname+"_"+str(epoch)+"_En.params"
            netEn.save_params(filename)
            filename = "checkpoints/"+expname+"_"+str(epoch)+"_De.params"
            netDe.save_params(filename)
            fake_img1 = nd.concat(real_in[0],real_out[0], fake_out[0], dim=1)
            fake_img2 = nd.concat(real_in[1],real_out[1], fake_out[1], dim=1)
            fake_img3 = nd.concat(real_in[2],real_out[2], fake_out[2], dim=1)
            fake_img4 = nd.concat(real_in[3],real_out[3], fake_out[3], dim=1)
            val_data.reset()
            text_file = open(expname + "_validtest.txt", "a")
            for vbatch in val_data:
                
                real_in = vbatch.data[0].as_in_context(ctx)
                real_out = vbatch.data[1].as_in_context(ctx)
                fake_latent= netEn(real_in)
                y = netDe(fake_latent)
                fake_out = y
                metricMSE.update([fake_out, ], [real_out, ])
            _, acc2 = metricMSE.get()
            text_file.write("%s %s %s\n" % (str(epoch), nd.mean(errR).asscalar(), str(acc2)))
            metricMSE.reset()
	    images = netDe(eps2)
            fake_img1T = nd.concat(images[0],images[1], images[2], dim=1)
	    fake_img2T = nd.concat(images[3],images[4], images[5], dim=1)
            fake_img3T = nd.concat(images[6],images[7], images[8], dim=1)
            fake_img = nd.concat(fake_img1T,fake_img2T, fake_img3T,dim=2)
            visual.visualize(fake_img)
            plt.savefig('outputs/'+expname+'_fakes_'+str(epoch)+'.png')
            text_file.close()

	    # Do 10 iterations of sampler update
	    fake_img1T = nd.concat(real_in[0],real_out[0], fake_out[0], dim=1)
            fake_img2T = nd.concat(real_in[1],real_out[1], fake_out[1], dim=1)
            fake_img3T = nd.concat(real_in[2],real_out[2], fake_out[2], dim=1)
            #fake_img4T = nd.concat(real_in[3],real_out[3], fake_out[3], dim=1)
            fake_img = nd.concat(fake_img1,fake_img2, fake_img3,fake_img1T,fake_img2T, fake_img3T,dim=2)
            visual.visualize(fake_img)
            plt.savefig('outputs/'+expname+'_'+str(epoch)+'.png')
	    '''if epoch > 100:
	      for ep2 in range(10):
	    	with autograd.record():
                	#eps = nd.random_normal(loc=0, scale=1, shape=noiseshape, ctx=ctx) #
			eps = nd.random.uniform( low=-1, high=1, shape=noiseshape, ctx=ctx)
			eps2 = nd.random_normal(loc=0, scale=0.02, shape=noiseshape, ctx=ctx)
                	eps2 = nd.tanh(eps2*sigma+mu)
                	eps2 = nd.concat(eps,eps2,dim=0)
			rec_output = netD(netDe(eps2))
			fake_label = nd.zeros(rec_output.shape, ctx=ctx)
                	errGS = GAN_loss(rec_output, fake_label)
    			errGS.backward()
		mu -= lr / mu.shape[0] * mu.grad
		sigma -= lr / sigma.shape[0] * sigma.grad
	    	print('mu ' + str(mu[0,0,0,0].asnumpy())+ '  sigma '+ str(sigma[0,0,0,0].asnumpy()))
	    '''
	    images = netDe(eps2)
            fake_img1T = nd.concat(images[0],images[1], images[2], dim=1)
            fake_img2T = nd.concat(images[3],images[4], images[5], dim=1)
            fake_img3T = nd.concat(images[6],images[7], images[8], dim=1)
            fake_img = nd.concat(fake_img1T,fake_img2T, fake_img3T,dim=2)
            visual.visualize(fake_img)
            plt.savefig('outputs/'+expname+'_fakespost_'+str(epoch)+'.png')
    return([loss_rec_D,loss_rec_G, loss_rec_R, acc_rec, loss_rec_D2, loss_rec_G2, acc2_rec])
Example #55
0
    def build_graph(self):
        import mxnet as mx
        from mxnet import ndarray as nd
        from mxnet import gluon, autograd
        import dgl

        user_ids = list(self.users.index)
        product_ids = list(self.products.index)
        user_ids_invmap = {id_: i for i, id_ in enumerate(user_ids)}
        product_ids_invmap = {id_: i for i, id_ in enumerate(product_ids)}
        self.user_ids = user_ids
        self.product_ids = product_ids
        self.user_ids_invmap = user_ids_invmap
        self.product_ids_invmap = product_ids_invmap

        g = dgl.DGLGraph(multigraph=True)
        g.add_nodes(len(user_ids) + len(product_ids))

        # node type
        node_type = nd.zeros(g.number_of_nodes(), dtype='float32')
        node_type[:len(user_ids)] = 1
        g.ndata['type'] = node_type

        # user features
        print('Adding user features...')
        for user_column in self.users.columns:
            udata = nd.zeros(g.number_of_nodes(), dtype='int64')
            # 0 for padding
            udata[:len(user_ids)] = \
                    nd.from_numpy(self.users[user_column].cat.codes.values.astype('int64') + 1)
            g.ndata[user_column] = udata

        # product genre
        print('Adding product features...')
        product_genres = nd.from_numpy(
            self.products[self.genres].values.copy().astype('float32'))
        g.ndata['genre'] = nd.zeros((g.number_of_nodes(), len(self.genres)))
        g.ndata['genre'][len(user_ids):len(user_ids) +
                         len(product_ids)] = product_genres

        # product year
        if 'year' in self.products.columns:
            g.ndata['year'] = nd.zeros(g.number_of_nodes(), dtype='int64')
            # 0 for padding
            g.ndata['year'][len(user_ids):len(user_ids) + len(product_ids)] = \
                    nd.from_numpy(self.products['year'].cat.codes.values.astype('int64') + 1)
        '''
        # product title
        print('Parsing title...')
        nlp = stanfordnlp.Pipeline(use_gpu=False, processors='tokenize,lemma')
        vocab = set()
        title_words = []
        for t in tqdm.tqdm(self.products['title'].values):
            doc = nlp(t)
            words = set()
            for s in doc.sentences:
                words.update(w.lemma.lower() for w in s.words
                             if not re.fullmatch(r'['+string.punctuation+']+', w.lemma))
            vocab.update(words)
            title_words.append(words)
        vocab = list(vocab)
        vocab_invmap = {w: i for i, w in enumerate(vocab)}
        # bag-of-words
        g.ndata['title'] = nd.zeros((g.number_of_nodes(), len(vocab)))
        for i, tw in enumerate(tqdm.tqdm(title_words)):
            g.ndata['title'][len(user_ids) + i, [vocab_invmap[w] for w in tw]] = 1
        self.vocab = vocab
        self.vocab_invmap = vocab_invmap
        '''

        rating_user_vertices = [
            user_ids_invmap[id_] for id_ in self.ratings['user_id'].values
        ]
        rating_product_vertices = [
            product_ids_invmap[id_] + len(user_ids)
            for id_ in self.ratings['product_id'].values
        ]
        self.rating_user_vertices = rating_user_vertices
        self.rating_product_vertices = rating_product_vertices

        g.add_edges(rating_user_vertices,
                    rating_product_vertices,
                    data={
                        'inv':
                        nd.zeros(self.ratings.shape[0], dtype='int32'),
                        'rating':
                        nd.from_numpy(
                            self.ratings['rating'].values.astype('float32'))
                    })
        g.add_edges(rating_product_vertices,
                    rating_user_vertices,
                    data={
                        'inv':
                        nd.ones(self.ratings.shape[0], dtype='int32'),
                        'rating':
                        nd.from_numpy(
                            self.ratings['rating'].values.astype('float32'))
                    })
        self.g = g
        g.readonly()
Example #56
0
def main(opt):
    ctx = mx.gpu() if opt.use_gpu else mx.cpu()
    testclasspaths = []
    testclasslabels = []
    print('loading test files')
    filename = '_testlist.txt'
    with open(opt.dataset + "_" + opt.expname + filename, 'r') as f:
        for line in f:
            testclasspaths.append(line.split(' ')[0])
            if int(line.split(' ')[1]) == -1:
                testclasslabels.append(0)
            else:
                testclasslabels.append(1)
    neworder = range(len(testclasslabels))
    neworder = shuffle(neworder)

    c = list(zip(testclasslabels, testclasspaths))
    print('shuffling')
    random.shuffle(c)

    #testclasslabels, testclasspaths = zip(*c)
    #testclasslabels = testclasslabels[1:5000]
    #testclasspaths = testclasspaths[1:5000]
    ltnt = 512
    print('loading pictures')
    test_data = load_image.load_test_images(testclasspaths, testclasslabels,
                                            opt.batch_size, opt.img_wd,
                                            opt.img_ht, ctx, opt.noisevar)
    print('picture loading done')
    netEn, netDe, netD, netD2, netDS = set_network(opt.depth, ctx, 0, 0,
                                                   opt.ndf, opt.ngf,
                                                   opt.append)
    netEn.load_params('checkpoints/' + opt.expname + '_' + str(opt.epochs) +
                      '_En.params',
                      ctx=ctx)
    netDe.load_params('checkpoints/' + opt.expname + '_' + str(opt.epochs) +
                      '_De.params',
                      ctx=ctx)
    netD.load_params('checkpoints/' + opt.expname + '_' + str(opt.epochs) +
                     '_D.params',
                     ctx=ctx)
    netD2.load_params('checkpoints/' + opt.expname + '_' + str(opt.epochs) +
                      '_D2.params',
                      ctx=ctx)
    netDS.load_params('checkpoints/' + opt.expname + '_' + str(opt.epochs) +
                      '_SD.params',
                      ctx=ctx)
    print('Model loading done')
    lbllist = []
    scorelist1 = []
    scorelist2 = []
    scorelist3 = []
    scorelist4 = []
    test_data.reset()
    count = 0
    for batch in (test_data):
        count += 1
        print(str(count))  #, end="\r")
        real_in = batch.data[0].as_in_context(ctx)
        real_out = batch.data[1].as_in_context(ctx)
        lbls = batch.label[0].as_in_context(ctx)
        code = netEn((real_out))
        code = code + nd.random.normal(
            loc=0, scale=0.002, shape=code.shape, ctx=ctx)
        outnn = (netDe(code))
        out_concat = nd.concat(real_out, outnn, dim=1) if opt.append else outnn
        output4 = nd.mean((netD(out_concat)), (1, 3, 2)).asnumpy()
        code = netEn(real_in)
        #code=codet+nd.random.normal(loc=0, scale=0.0000001, shape=code.shape,ctx=ctx)
        #code2=codet+nd.random.normal(loc=0, scale=0.000001, shape=code.shape,ctx=ctx)
        #eq_code = heq(code.asnumpy(),2)
        #code = nd.array(eq_code, ctx=ctx)
        out = netDe(code)
        #out2 = netDe(code2)
        out_concat = nd.concat(real_in, out, dim=1) if opt.append else out
        output = netD(out_concat)  #Denoised image
        output3 = nd.mean((out - real_out)**2,
                          (1, 3, 2)).asnumpy()  #denoised-real
        output = nd.mean(output, (1, 3, 2)).asnumpy()
        out_concat = nd.concat(real_out, real_out,
                               dim=1) if opt.append else real_out

        output2 = netDS(netDe(code))  #Image with no noise
        output2 = nd.mean(output2, (1, 3, 2)).asnumpy()
        lbllist = lbllist + list(lbls.asnumpy())
        scorelist1 = scorelist1 + list(output)
        scorelist2 = scorelist2 + list(output2)
        scorelist3 = scorelist3 + list(output3)
        scorelist4 = scorelist4 + list(output4)

        fake_img1 = nd.concat(real_in[0], real_out[0], out[0], outnn[0], dim=1)
        fake_img2 = nd.concat(real_in[1], real_out[1], out[1], outnn[1], dim=1)
        fake_img3 = nd.concat(real_in[2], real_out[2], out[2], outnn[2], dim=1)
        fake_img4 = nd.concat(real_in[3], real_out[3], out[3], outnn[3], dim=1)
        fake_img = nd.concat(fake_img1, fake_img2, fake_img3, fake_img4, dim=2)
        #print(np.shape(fake_img))
        visual.visualize(fake_img)
        plt.savefig('outputs/T_' + opt.expname + '_' + str(count) + '.png')
    if not opt.isvalidation:

        fpr, tpr, _ = roc_curve(lbllist, scorelist1, 1)
        roc_auc1 = auc(fpr, tpr)
        fpr, tpr, _ = roc_curve(lbllist, scorelist2, 1)
        roc_auc2 = auc(fpr, tpr)
        fpr, tpr, _ = roc_curve(lbllist, scorelist3, 1)
        roc_auc3 = auc(fpr, tpr)
        fpr, tpr, _ = roc_curve(lbllist, scorelist4, 1)
        roc_auc4 = auc(fpr, tpr)
        plt.gcf().clear()
        plt.clf()
        sns.set(color_codes=True)
        posscores = [
            scorelist3[i] for i, v in enumerate(lbllist) if int(v) == 1
        ]
        negscores = [
            scorelist3[i] for i, v in enumerate(lbllist) if int(v) == 0
        ]
        #sns.distplot(posscores, hist=False, label="Known Classes" ,rug=True)
        sns.kdeplot(posscores, label="Known Classes")
        sns.kdeplot(negscores, label="Unnown Classes")
        #plt.hold()
        #sns.distplot(negscores, hist=False, label = "Unknown Classes", rug=True);
        plt.legend()
        plt.savefig('outputs/matdist_' + opt.expname + '_.png')

        plt.gcf().clear()
        inputT = nd.zeros((ltnt, ltnt, 1, 1), ctx=ctx)
        for i in range(0, ltnt):
            inputT[i, i, :, :] = -1
        out = netDe(inputT)
        count = 0
        for i in range(int(math.ceil(math.sqrt(ltnt)))):
            for j in range(int(math.ceil(math.sqrt(ltnt)))):
                if count < ltnt:
                    plt.subplot(math.ceil(math.sqrt(ltnt)),
                                math.ceil(math.sqrt(ltnt)), count + 1)
                    plt.imshow(
                        ((out[count].asnumpy().transpose(1, 2, 0) + 1.0) *
                         127.5).astype(np.uint8))
                    plt.axis('off')
                count += 1
        plt.savefig('outputs/atoms_' + opt.expname + '_.png')
        plt.gcf().clear()
        plt.clf()
        return ([roc_auc1, roc_auc2, roc_auc3, roc_auc4])
    else:
        return ([0, 0, 0, 0])
    fakecode = nd.random_normal(loc=0,
                                scale=1,
                                shape=(16, 4096, 1, 1),
                                ctx=ctx)
    out = netDe(fakecode)
    fake_img1 = nd.concat(out[0], out[1], out[2], out[3], dim=1)
    fake_img2 = nd.concat(out[7], out[6], out[5], out[4], dim=1)
    fake_img3 = nd.concat(out[8], out[9], out[10], out[11], dim=1)
    fake_img4 = nd.concat(out[15], out[14], out[13], out[12], dim=1)
    fake_img = nd.concat(fake_img1, fake_img2, fake_img3, fake_img4, dim=2)
    #print(np.shape(fake_img))
    visual.visualize(fake_img)
    plt.savefig('outputs/fakes_' + opt.expname + '_.png')
Example #57
0
 def set_ctx(self):
     try:
         self.__ctx = mx.gpu()
         _ = nd.zeros(shape=(1, ), ctx=self.__ctx)
     except:
         self.__ctx = mx.cpu()
Example #58
0
    idx = list(range(num_examples))
    # 将索引序列打乱
    random.shuffle(idx)
    for i in range(0, num_examples, batch_size):
        j = nd.array(idx[i:min(i + batch_size, num_examples)])
        yield nd.take(X, j), nd.take(y, j)


# for data, label in data_iter():
#     print(data, label)
#     break

# 初始化参数
w = nd.random_normal(shape=(num_inputs, 1))
b = nd.zeros([
    1,
])
params = [w, b]

# 创建参数的梯度
for param in params:
    param.attach_grad()


# 定义模型
def net(X):
    return nd.dot(X, w) + b


# 损失函数
def sequare_loss(yhat, y):
Example #59
0
def train():
    image_pool = ImagePool(pool_size)
    metric = mx.metric.CustomMetric(facc)

    stamp = datetime.now().strftime('%Y_%m_%d-%H_%M')
    logging.basicConfig(level=logging.DEBUG)

    for epoch in range(epochs):
        tic = time.time()
        btic = time.time()
        train_data.reset()
        iter = 0
        for batch in train_data:
            ############################
            # (1) Update D network: maximize log(D(x, y)) + log(1 - D(x, G(x, z)))
            ###########################
            real_in = batch.data[0].as_in_context(ctx)
            real_out = batch.data[1].as_in_context(ctx)

            fake_out = netG(real_in)
            fake_concat = image_pool.query(nd.concat(real_in, fake_out, dim=1))
            with autograd.record():
                # Train with fake image
                # Use image pooling to utilize history images
                output = netD(fake_concat)
                fake_label = nd.zeros(output.shape, ctx=ctx)
                errD_fake = GAN_loss(output, fake_label)
                metric.update([
                    fake_label,
                ], [
                    output,
                ])

                # Train with real image
                real_concat = nd.concat(real_in, real_out, dim=1)
                output = netD(real_concat)
                real_label = nd.ones(output.shape, ctx=ctx)
                errD_real = GAN_loss(output, real_label)
                errD = (errD_real + errD_fake) * 0.5
                errD.backward()
                metric.update([
                    real_label,
                ], [
                    output,
                ])

            trainerD.step(batch.data[0].shape[0])

            ############################
            # (2) Update G network: maximize log(D(x, G(x, z))) - lambda1 * L1(y, G(x, z))
            ###########################
            with autograd.record():
                fake_out = netG(real_in)
                fake_concat = nd.concat(real_in, fake_out, dim=1)
                output = netD(fake_concat)
                real_label = nd.ones(output.shape, ctx=ctx)
                errG = GAN_loss(
                    output, real_label) + L1_loss(real_out, fake_out) * lambda1
                errG.backward()

            trainerG.step(batch.data[0].shape[0])

            # Print log infomation every ten batches
            if iter % 10 == 0:
                name, acc = metric.get()
                logging.info('speed: {} samples/s'.format(
                    batch_size / (time.time() - btic)))
                logging.info(
                    'discriminator loss = %f, generator loss = %f, binary training acc = %f at iter %d epoch %d'
                    % (nd.mean(errD).asscalar(), nd.mean(errG).asscalar(), acc,
                       iter, epoch))
            iter = iter + 1
            btic = time.time()

        name, acc = metric.get()
        metric.reset()
        logging.info('\nbinary training acc at epoch %d: %s=%f' %
                     (epoch, name, acc))
        logging.info('time: %f' % (time.time() - tic))

        # Visualize one generated image for each epoch
        fake_img = fake_out[0]
        visualize(fake_img)
Example #60
0
from chapter1 import c1_utils
from mxnet import ndarray as nd
from mxnet import gluon
from mxnet import autograd as autograd

batch_size = 256
train_data, test_data = c1_utils.load_data_fashion_mnist(batch_size)

num_inputs = 28 * 28
num_outputs = 10

num_hidden = 784
weight_scale = .05

W1 = nd.random_normal(shape=(num_inputs, num_hidden), scale=weight_scale)
b1 = nd.zeros(num_hidden)

W2 = nd.random_normal(shape=(num_hidden, num_outputs), scale=weight_scale)
b2 = nd.zeros(num_outputs)

params = [W1, b1, W2, b2]

for param in params:
    param.attach_grad()


def relu(X):
    return nd.maximum(X, 0)


def net(X):