def reportLargeWeights(filename): errorL = loadNet(filename) input = getInputLayer(errorL) output = errorL.below llast = output.below units = allUnitsLayers(errorL) first = units[0] print(first) wt = np.transpose(first.weights.get_value()) bias = first.bias.get_value() print("wt shape", wt.shape) max = np.max(wt) min = np.min(wt) print("max", max, "min", min) fmax = 0.4 fmin = 0.4 extreme = (wt > fmax * max) + (wt < fmin * min) nc = data_nextchar.numChars() for u in range(0, wt.shape[0]): print("\nunit", u, "bias", bias[u]) for i in np.argwhere(extreme[u, ::]): c = i[0] nchar = c // nc ch = c % nc print(nchar, data_nextchar.charAt(ch), wt[u, i][0])
def learn_logic(filename, saveWeights=False): data_nextchar.init() context_size = 3 if filename == None: rng = np.random.RandomState(123) input = inputLayer(data_nextchar.input_width(context_size)) nori1 = layer(input, 400, rng) # nori1o = nexp(nori1) # nori1o = relu(nori1) nori1o = sigmoid(nori1) # nori2 = layer(nori1o, 400, rng) # #nori2o = nexp(nori2) # # nori2o = relu(nori2) # nori2o = sigmoid(nori2) # nori3 = layer(nori2o, 400, rng) # #nori3o = nexp(nori3) # nori3o = sigmoid(nori3) # fullInput = appendNegated(input) # and1 = andLayer(fullInput, rng) # red1 = layer(and1, 120, rng) # red1o = sigmoid(red1) # red1on = appendNegated(red1o) # and2 = andLayer(red1on, rng) # red2 = layer(and2, 80, rng) # red2o = sigmoid(red2) # andLast = andLayer(red1o, rng) llast = layer(nori1o, data_nextchar.numChars(), rng) output = softmax(llast) target_ = T.ivector("target") errorL = negative_log_likelihood(output, target_) else: errorL = loadNet(filename) input = getInputLayer(errorL) output = errorL.below llast = output.below target = errorL.target error = errorL.output() nll = negative_log_likelihood(output, target).output() lr = T.scalar("lr") # setDropoutToAllUnits(llast.below, 0.5, None) dumpNetworkStructure(errorL) trainFunc = updateFunction(input.output(), target, error, errorL.all_layers_with_params(), lr) # setDropoutToAllUnits(llast.below, None, 0.5) testFunc = testFunction(input.output(), target, nll, output, allLayerActivationFunctions(errorL)) minibatchSize = 100 testingMinibatchSize = 10000 (testInputs, testOutputs, itxts, otxts) = data_nextchar.prepareMinibatch(testingMinibatchSize, context_size, False) trainingErrors = [] t = 0 while True: (inputs, outputs, tritxts, trotxts) = data_nextchar.prepareMinibatch(minibatchSize, context_size, True) err = trainFunc(inputs.astype(np.float32), outputs.astype(np.int32), 0.01) trainingErrors.append(err) # print(t, ' training:', r3(err)) if t < 10 or (t < 100 and t % 10 == 0) or t % 50 == 0: trainingErr = np.mean(trainingErrors) trainingErrors = [] tres = testFunc(testInputs.astype(np.float32), testOutputs.astype(np.int32)) err = tres[0] relus = tres[2:] ### print('num >0 ', ', '.join([f3((r>0).sum()/r.size) for r in relus])) ### print('num >0.3', ', '.join([f3((r>.3).sum()/r.size) for r in relus])) ### print('num >1 ', ', '.join([f3((r>1).sum()/r.size) for r in relus])) # lo = tres[2] # print(lo[0,::]) # print(lo[1,::]) # print(lo[2,::]) sm = tres[1] smo = np.argsort(sm, axis=1)[:, ::-1] # print(smo[1:3]) hits = list([0 for _ in range(0, smo.shape[1])]) for v in range(0, smo.shape[0]): for o in range(0, smo.shape[1]): if smo[v, o] == testOutputs[v]: hits[o] += 1 continue print(t, "testing:", r3(err), "training:", r3(trainingErr), hits) sys.stdout.flush() # for m in range(50): # print(titxt[m], totxt[m], # [charidToChar(smo[m,p]) + " {:9.7f}".format(sm[m, smo[m,p]]) # for p in range(5)]) # if saveWeights and (t<1000 and t%100 == 0) or t%1000 == 0: # filename = 'netm.json' # with open(filename, 'w') as f: # f.write(json.dumps(errorL.dump())) # print('wrote', filename) t += 1