Ejemplo n.º 1
0
 def ApplyActivation(self):
     state = self.state
     if self.activation == deepnet_pb2.Hyperparams.LOGISTIC:
         cm.sigmoid(state)
     elif self.activation == deepnet_pb2.Hyperparams.TANH:
         cm.tanh(state)
     elif self.activation == deepnet_pb2.Hyperparams.RECTIFIED_LINEAR:
         state.greater_than(0, target=self.temp)
         state.mult(self.temp)
     elif self.activation == deepnet_pb2.Hyperparams.RECTIFIED_LINEAR_SMOOTH:
         cm.log_1_plus_exp(state)
     elif self.activation == deepnet_pb2.Hyperparams.LINEAR:
         pass
     elif self.activation == deepnet_pb2.Hyperparams.SOFTMAX:
         state.max(axis=0, target=self.temp)
         state.add_row_mult(self.temp, -1)
         cm.exp(state)
         state.sum(axis=0, target=self.temp)
         self.temp.reciprocal()
         state.mult_by_row(self.temp)
     elif self.activation == deepnet_pb2.Hyperparams.REPLICATED_SOFTMAX:
         state.max(axis=0, target=self.temp)
         state.add_row_mult(self.temp, -1)
         cm.exp(state)
         state.sum(axis=0, target=self.temp)
         self.NN.divide(self.temp, target=self.temp)
         state.mult_by_row(self.temp)
     else:
         raise Exception("Unknown activation")
Ejemplo n.º 2
0
Archivo: ais.py Proyecto: ANB2/deepnet
def ExactZ_binary_binary(model):
  assert len(model.layer) == 2, 'Only implemented for RBMs.'
  steps = len(schedule)
  input_layer = model.layer[0]
  hidden_layer = model.layer[1]
  edge = model.edge[0]
  w = edge.params['weight']
  a = hidden_layer.params['bias']
  b = input_layer.params['bias']
  numvis, numhid = w.shape
  batchsize = 2**numvis
  input_layer.AllocateBatchsizeDependentMemory(batchsize)
  hidden_layer.AllocateBatchsizeDependentMemory(batchsize)
  all_inputs = GetAll(numvis)
  w_ais = cm.CUDAMatrix(np.zeros((1, batchsize)))
  input_layer.sample.overwrite(all_inputs)
  cm.dot(w.T, input_layer.sample, target=hidden_layer.state)
  hidden_layer.state.add_col_vec(a)
  cm.log_1_plus_exp(hidden_layer.state)
  w_ais.add_sums(hidden_layer.state, axis=0)
  w_ais.add_dot(b.T, input_layer.state)
  offset = float(w_ais.asarray().max())
  w_ais.subtract(offset)
  cm.exp(w_ais)
  z = offset + np.log(w_ais.asarray().sum())
  return z
Ejemplo n.º 3
0
 def ApplyActivation(self):
     state = self.state
     if self.activation == deepnet_pb2.Hyperparams.LOGISTIC:
         cm.sigmoid(state)
     elif self.activation == deepnet_pb2.Hyperparams.TANH:
         cm.tanh(state)
     elif self.activation == deepnet_pb2.Hyperparams.RECTIFIED_LINEAR:
         state.greater_than(0, target=self.temp)
         state.mult(self.temp)
     elif self.activation == deepnet_pb2.Hyperparams.RECTIFIED_LINEAR_SMOOTH:
         cm.log_1_plus_exp(state)
     elif self.activation == deepnet_pb2.Hyperparams.LINEAR:
         pass
     elif self.activation == deepnet_pb2.Hyperparams.SOFTMAX:
         state.max(axis=0, target=self.temp)
         state.add_row_mult(self.temp, -1)
         cm.exp(state)
         state.sum(axis=0, target=self.temp)
         self.temp.reciprocal()
         state.mult_by_row(self.temp)
     elif self.activation == deepnet_pb2.Hyperparams.REPLICATED_SOFTMAX:
         state.max(axis=0, target=self.temp)
         state.add_row_mult(self.temp, -1)
         cm.exp(state)
         state.sum(axis=0, target=self.temp)
         self.NN.divide(self.temp, target=self.temp)
         state.mult_by_row(self.temp)
     else:
         raise Exception('Unknown activation')
Ejemplo n.º 4
0
def ExactZ_binary_binary(model):
    assert len(model.layer) == 2, 'Only implemented for RBMs.'
    steps = len(schedule)
    input_layer = model.layer[0]
    hidden_layer = model.layer[1]
    edge = model.edge[0]
    w = edge.params['weight']
    a = hidden_layer.params['bias']
    b = input_layer.params['bias']
    numvis, numhid = w.shape
    batchsize = 2**numvis
    input_layer.AllocateBatchsizeDependentMemory(batchsize)
    hidden_layer.AllocateBatchsizeDependentMemory(batchsize)
    all_inputs = GetAll(numvis)
    w_ais = cm.CUDAMatrix(np.zeros((1, batchsize)))
    input_layer.sample.overwrite(all_inputs)
    cm.dot(w.T, input_layer.sample, target=hidden_layer.state)
    hidden_layer.state.add_col_vec(a)
    cm.log_1_plus_exp(hidden_layer.state)
    w_ais.add_sums(hidden_layer.state, axis=0)
    w_ais.add_dot(b.T, input_layer.state)
    offset = float(w_ais.asarray().max())
    w_ais.subtract(offset)
    cm.exp(w_ais)
    z = offset + np.log(w_ais.asarray().sum())
    return z
Ejemplo n.º 5
0
def AISBinaryRbm(model, schedule):
    cm.CUDAMatrix.init_random(seed=int(time.time()))
    assert len(model.layer) == 2, 'Only implemented for RBMs.'
    steps = len(schedule)
    input_layer = model.layer[0]
    hidden_layer = model.layer[1]
    edge = model.edge[0]
    batchsize = model.t_op.batchsize
    w = edge.params['weight']
    a = hidden_layer.params['bias']
    b = input_layer.params['bias']
    numvis, numhid = w.shape

    # INITIALIZE TO UNIFORM RANDOM.
    input_layer.state.assign(0)
    input_layer.ApplyActivation()
    input_layer.Sample()
    w_ais = cm.CUDAMatrix(np.zeros((1, batchsize)))
    unitcell = cm.empty((1, 1))
    # RUN AIS.
    for i in range(1, steps):
        cm.dot(w.T, input_layer.sample, target=hidden_layer.state)
        hidden_layer.state.add_col_vec(a)

        hidden_layer.state.mult(schedule[i - 1], target=hidden_layer.temp)
        hidden_layer.state.mult(schedule[i])
        cm.log_1_plus_exp(hidden_layer.state, target=hidden_layer.deriv)
        cm.log_1_plus_exp(hidden_layer.temp)
        hidden_layer.deriv.subtract(hidden_layer.temp)
        w_ais.add_sums(hidden_layer.deriv, axis=0)
        w_ais.add_dot(b.T,
                      input_layer.state,
                      mult=schedule[i] - schedule[i - 1])

        hidden_layer.ApplyActivation()
        hidden_layer.Sample()
        cm.dot(w, hidden_layer.sample, target=input_layer.state)
        input_layer.state.add_col_vec(b)
        input_layer.state.mult(schedule[i])
        input_layer.ApplyActivation()
        input_layer.Sample()
    z = LogMeanExp(w_ais.asarray()) + numvis * np.log(2) + numhid * np.log(2)
    return z
Ejemplo n.º 6
0
Archivo: ais.py Proyecto: ANB2/deepnet
def AISBinaryRbm(model, schedule):
  cm.CUDAMatrix.init_random(seed=int(time.time()))
  assert len(model.layer) == 2, 'Only implemented for RBMs.'
  steps = len(schedule)
  input_layer = model.layer[0]
  hidden_layer = model.layer[1]
  edge = model.edge[0]
  batchsize = model.t_op.batchsize
  w = edge.params['weight']
  a = hidden_layer.params['bias']
  b = input_layer.params['bias']
  numvis, numhid = w.shape

  # INITIALIZE TO UNIFORM RANDOM.
  input_layer.state.assign(0)
  input_layer.ApplyActivation()
  input_layer.Sample()
  w_ais = cm.CUDAMatrix(np.zeros((1, batchsize)))
  unitcell = cm.empty((1, 1))
  # RUN AIS.
  for i in range(1, steps):
    cm.dot(w.T, input_layer.sample, target=hidden_layer.state)
    hidden_layer.state.add_col_vec(a)

    hidden_layer.state.mult(schedule[i-1], target=hidden_layer.temp)
    hidden_layer.state.mult(schedule[i])
    cm.log_1_plus_exp(hidden_layer.state, target=hidden_layer.deriv)
    cm.log_1_plus_exp(hidden_layer.temp)
    hidden_layer.deriv.subtract(hidden_layer.temp)
    w_ais.add_sums(hidden_layer.deriv, axis=0)
    w_ais.add_dot(b.T, input_layer.state, mult=schedule[i]-schedule[i-1])

    hidden_layer.ApplyActivation()
    hidden_layer.Sample()
    cm.dot(w, hidden_layer.sample, target=input_layer.state)
    input_layer.state.add_col_vec(b)
    input_layer.state.mult(schedule[i])
    input_layer.ApplyActivation()
    input_layer.Sample()
  z = LogMeanExp(w_ais.asarray()) + numvis * np.log(2) + numhid * np.log(2)
  return z
Ejemplo n.º 7
0
Archivo: ais.py Proyecto: ANB2/deepnet
def AISReplicatedSoftmax(model, D, num_chains, display=False):
  schedule = np.concatenate((
    #np.arange(0.0, 1.0, 0.01),
    #np.arange(0.0, 1.0, 0.001),
    np.arange(0.0, 0.7, 0.001),  # 700
    np.arange(0.7, 0.9, 0.0001),  # 2000
    np.arange(0.9, 1.0, 0.00002)  # 5000
    ))
  #schedule = np.array([0.])
  cm.CUDAMatrix.init_random(seed=0)

  assert len(model.layer) == 2, 'Only implemented for RBMs.'
  steps = len(schedule)
  input_layer = model.layer[0]
  hidden_layer = model.layer[1]
  edge = model.edge[0]
  batchsize = num_chains
  w = edge.params['weight']
  a = hidden_layer.params['bias']
  b = input_layer.params['bias']
  numvis, numhid = w.shape
  f = 0.1
  input_layer.AllocateBatchsizeDependentMemory(num_chains)
  hidden_layer.AllocateBatchsizeDependentMemory(num_chains)

  # INITIALIZE TO SAMPLES FROM BASE MODEL.
  input_layer.state.assign(0)
  input_layer.NN.assign(D)
  input_layer.state.add_col_mult(b, f)
  SampleEnergySoftmax(input_layer, D)
  w_ais = cm.CUDAMatrix(np.zeros((1, batchsize)))
  #pdb.set_trace()

  w_variance = []
  x_axis = []
  if display:
    Display(w_ais, hidden_layer.state, input_layer.state, w_variance, x_axis)
    #raw_input('Press Enter.')
  #pdb.set_trace()

  # RUN AIS.
  for i in range(steps-1):
    sys.stdout.write('\r%d' % (i+1))
    sys.stdout.flush()
    cm.dot(w.T, input_layer.sample, target=hidden_layer.state)
    hidden_layer.state.add_col_mult(a, D)

    hidden_layer.state.mult(schedule[i], target=hidden_layer.temp)
    hidden_layer.state.mult(schedule[i+1])
    cm.log_1_plus_exp(hidden_layer.state, target=hidden_layer.deriv)
    cm.log_1_plus_exp(hidden_layer.temp)
    hidden_layer.deriv.subtract(hidden_layer.temp)
    w_ais.add_sums(hidden_layer.deriv, axis=0)
    w_ais.add_dot(b.T, input_layer.sample, mult=(1-f)*(schedule[i+1]-schedule[i]))

    hidden_layer.ApplyActivation()
    hidden_layer.Sample()
    cm.dot(w, hidden_layer.sample, target=input_layer.state)
    input_layer.state.add_col_vec(b)
    input_layer.state.mult(schedule[i+1])
    input_layer.state.add_col_mult(b, f*(1-schedule[i+1]))
    SampleEnergySoftmax(input_layer, D)
    if display and (i % 100 == 0 or i == steps - 2):
      w_variance.append(w_ais.asarray().var())
      x_axis.append(i)
      Display(w_ais, hidden_layer.state, input_layer.sample, w_variance, x_axis)
  sys.stdout.write('\n')
  z = LogMeanExp(w_ais.asarray()) + D * LogSumExp(f * b.asarray()) + numhid * np.log(2)
  return z
Ejemplo n.º 8
0
def AISReplicatedSoftmax(model, D, num_chains, display=False):
    schedule = np.concatenate((
        #np.arange(0.0, 1.0, 0.01),
        #np.arange(0.0, 1.0, 0.001),
        np.arange(0.0, 0.7, 0.001),  # 700
        np.arange(0.7, 0.9, 0.0001),  # 2000
        np.arange(0.9, 1.0, 0.00002)  # 5000
    ))
    #schedule = np.array([0.])
    cm.CUDAMatrix.init_random(seed=0)

    assert len(model.layer) == 2, 'Only implemented for RBMs.'
    steps = len(schedule)
    input_layer = model.layer[0]
    hidden_layer = model.layer[1]
    edge = model.edge[0]
    batchsize = num_chains
    w = edge.params['weight']
    a = hidden_layer.params['bias']
    b = input_layer.params['bias']
    numvis, numhid = w.shape
    f = 0.1
    input_layer.AllocateBatchsizeDependentMemory(num_chains)
    hidden_layer.AllocateBatchsizeDependentMemory(num_chains)

    # INITIALIZE TO SAMPLES FROM BASE MODEL.
    input_layer.state.assign(0)
    input_layer.NN.assign(D)
    input_layer.state.add_col_mult(b, f)
    SampleEnergySoftmax(input_layer, D)
    w_ais = cm.CUDAMatrix(np.zeros((1, batchsize)))
    #pdb.set_trace()

    w_variance = []
    x_axis = []
    if display:
        Display(w_ais, hidden_layer.state, input_layer.state, w_variance,
                x_axis)
        #raw_input('Press Enter.')
    #pdb.set_trace()

    # RUN AIS.
    for i in range(steps - 1):
        sys.stdout.write('\r%d' % (i + 1))
        sys.stdout.flush()
        cm.dot(w.T, input_layer.sample, target=hidden_layer.state)
        hidden_layer.state.add_col_mult(a, D)

        hidden_layer.state.mult(schedule[i], target=hidden_layer.temp)
        hidden_layer.state.mult(schedule[i + 1])
        cm.log_1_plus_exp(hidden_layer.state, target=hidden_layer.deriv)
        cm.log_1_plus_exp(hidden_layer.temp)
        hidden_layer.deriv.subtract(hidden_layer.temp)
        w_ais.add_sums(hidden_layer.deriv, axis=0)
        w_ais.add_dot(b.T,
                      input_layer.sample,
                      mult=(1 - f) * (schedule[i + 1] - schedule[i]))

        hidden_layer.ApplyActivation()
        hidden_layer.Sample()
        cm.dot(w, hidden_layer.sample, target=input_layer.state)
        input_layer.state.add_col_vec(b)
        input_layer.state.mult(schedule[i + 1])
        input_layer.state.add_col_mult(b, f * (1 - schedule[i + 1]))
        SampleEnergySoftmax(input_layer, D)
        if display and (i % 100 == 0 or i == steps - 2):
            w_variance.append(w_ais.asarray().var())
            x_axis.append(i)
            Display(w_ais, hidden_layer.state, input_layer.sample, w_variance,
                    x_axis)
    sys.stdout.write('\n')
    z = LogMeanExp(
        w_ais.asarray()) + D * LogSumExp(f * b.asarray()) + numhid * np.log(2)
    return z
Ejemplo n.º 9
0
def rbmPredict(m, X):
    """using trained rbm model to do prediction"""
    nClass = m.labels.size
    numCase = X.shape[0]

    # This part is executed on CPU
    # define the free energy
    #    FF = np.zeros((numCase, nClass))
    #    FFcol = np.zeros((numCase, 1))
    #    for index in range(nClass) :
    #        temp = np.zeros((numCase, nClass))
    #        temp[:, index] = 1
    #
    #        tt = np.emath.log(np.exp(np.dot(X, m.weight)+ np.dot(temp, m.weightLabel) + m.biasH)+1)
    #
    #        FFcol = temp[:,index] * m.biasLabel[0,index] + np.sum(tt,axis = 1)
    #
    #        FF[:, index] = FFcol
    #
    #    [x, y] = np.where(np.abs(FF - np.max(FF, axis=1, keepdims=True)) < 1e-5)

    #    result = np.zeros(y.shape)

    #    for index in range(y.size) :
    #        result[index] = m.labels[y[index]]

    # The following part runs on GPU
    cm.cublas_init()

    # copy data to GPU
    data = cm.CUDAMatrix(cm.reformat(X))
    weight = cm.CUDAMatrix(cm.reformat(m.weight))
    biasH = cm.CUDAMatrix(cm.reformat(m.biasH))
    weightLabel = cm.CUDAMatrix(cm.reformat(m.weightLabel))
    biasLabel = cm.CUDAMatrix(cm.reformat(m.biasLabel))

    F = cm.CUDAMatrix(np.zeros((numCase, nClass)))
    Fcol = cm.CUDAMatrix(np.zeros((numCase, 1)))
    temp = cm.CUDAMatrix(np.zeros((numCase, nClass)))

    tt = cm.CUDAMatrix(np.zeros((numCase, biasH.asarray().size)))
    for index in range(nClass):
        temp.assign(0)

        temp.set_col_slice(index, index + 1, 1)

        tt = cm.dot(data, weight)
        tt.add_dot(temp, weightLabel)
        tt.add_row_vec(biasH)
        cm.log_1_plus_exp(tt, target=tt, exact=True)

        Fcol = cm.sum(tt, axis=1)
        Fcol.add_mult(temp.get_col_slice(index, index + 1), biasLabel.numpy_array[0, index])

        F.set_col_slice(index, index + 1, Fcol)

        tt.free_device_memory()

    F.copy_to_host()
    [x, y] = np.where(np.abs(F.numpy_array - np.max(F.numpy_array, axis=1, keepdims=True)) < 1e-5)

    # free device memory
    data.free_device_memory()

    weight.free_device_memory()
    biasH.free_device_memory()
    biasLabel.free_device_memory()
    weightLabel.free_device_memory()

    F.free_device_memory()
    Fcol.free_device_memory()
    temp.free_device_memory()

    cm.shutdown()

    result = np.zeros(y.shape)

    for index in range(y.size):
        result[index] = m.labels[y[index]]

    return [result, F.numpy_array]
Ejemplo n.º 10
0
def rbmPredict(m, X):
    """using trained rbm model to do prediction"""
    nClass = m.labels.size
    numCase = X.shape[0]

    # This part is executed on CPU
    # define the free energy
    #    FF = np.zeros((numCase, nClass))
    #    FFcol = np.zeros((numCase, 1))
    #    for index in range(nClass) :
    #        temp = np.zeros((numCase, nClass))
    #        temp[:, index] = 1
    #
    #        tt = np.emath.log(np.exp(np.dot(X, m.weight)+ np.dot(temp, m.weightLabel) + m.biasH)+1)
    #
    #        FFcol = temp[:,index] * m.biasLabel[0,index] + np.sum(tt,axis = 1)
    #
    #        FF[:, index] = FFcol
    #
    #    [x, y] = np.where(np.abs(FF - np.max(FF, axis=1, keepdims=True)) < 1e-5)

    #    result = np.zeros(y.shape)

    #    for index in range(y.size) :
    #        result[index] = m.labels[y[index]]

    # The following part runs on GPU
    cm.cublas_init()

    # copy data to GPU
    data = cm.CUDAMatrix(cm.reformat(X))
    weight = cm.CUDAMatrix(cm.reformat(m.weight))
    biasH = cm.CUDAMatrix(cm.reformat(m.biasH))
    weightLabel = cm.CUDAMatrix(cm.reformat(m.weightLabel))
    biasLabel = cm.CUDAMatrix(cm.reformat(m.biasLabel))

    F = cm.CUDAMatrix(np.zeros((numCase, nClass)))
    Fcol = cm.CUDAMatrix(np.zeros((numCase, 1)))
    temp = cm.CUDAMatrix(np.zeros((numCase, nClass)))

    tt = cm.CUDAMatrix(np.zeros((numCase, biasH.asarray().size)))
    for index in range(nClass):
        temp.assign(0)

        temp.set_col_slice(index, index + 1, 1)

        tt = cm.dot(data, weight)
        tt.add_dot(temp, weightLabel)
        tt.add_row_vec(biasH)
        cm.log_1_plus_exp(tt, target=tt, exact=True)

        Fcol = cm.sum(tt, axis=1)
        Fcol.add_mult(temp.get_col_slice(index, index + 1),
                      biasLabel.numpy_array[0, index])

        F.set_col_slice(index, index + 1, Fcol)

        tt.free_device_memory()

    F.copy_to_host()
    [x, y] = np.where(
        np.abs(F.numpy_array -
               np.max(F.numpy_array, axis=1, keepdims=True)) < 1e-5)

    # free device memory
    data.free_device_memory()

    weight.free_device_memory()
    biasH.free_device_memory()
    biasLabel.free_device_memory()
    weightLabel.free_device_memory()

    F.free_device_memory()
    Fcol.free_device_memory()
    temp.free_device_memory()

    cm.shutdown()

    result = np.zeros(y.shape)

    for index in range(y.size):
        result[index] = m.labels[y[index]]

    return [result, F.numpy_array]