gradient_checker = ModelGradientChecker( CostMinimizationObjective(cost=cost_function, data_provider=validation_data_provider, regularizer=regularizer)) n_epochs = 1 n_batches = train_data_provider.batches_per_epoch * n_epochs time_start = time.time() costs = [] prev_weights = tweet_model.pack() for batch_index, iteration_info in enumerate(optimizer): costs.append(iteration_info['cost']) if batch_index % 10 == 0: X_valid, Y_valid, meta_valid = validation_data_provider.next_batch() Y_hat = tweet_model.fprop(X_valid, meta=meta_valid) assert np.all(np.abs(Y_hat.sum(axis=1) - 1) < 1e-6) # This is really slow: #grad_check = gradient_checker.check(model) grad_check = "skipped" acc = np.mean(np.argmax(Y_hat, axis=1) == np.argmax(Y_valid, axis=1)) print "B: {}, A: {}, C: {}, Prop1: {}, Param size: {}, g: {}".format( batch_index, acc, costs[-1], np.argmax(Y_hat, axis=1).mean(), np.mean(np.abs(tweet_model.pack())),
CostMinimizationObjective(cost=cost_function, data_provider=validation_data_provider, regularizer=regularizer)) n_epochs = 1 n_batches = train_data_provider.batches_per_epoch * n_epochs time_start = time.time() costs = [] prev_weights = tweet_model.pack() for batch_index, iteration_info in enumerate(optimizer): costs.append(iteration_info['cost']) if batch_index % 10 == 0: X_valid, Y_valid, meta_valid = validation_data_provider.next_batch( ) Y_hat = tweet_model.fprop(X_valid, meta=meta_valid) assert np.all(np.abs(Y_hat.sum(axis=1) - 1) < 1e-6) # This is really slow: #grad_check = gradient_checker.check(model) grad_check = "skipped" acc = np.mean( np.argmax(Y_hat, axis=1) == np.argmax(Y_valid, axis=1)) print "B: {}, A: {}, C: {}, Prop1: {}, Param size: {}, g: {}".format( batch_index, acc, costs[-1], np.argmax(Y_hat, axis=1).mean(), np.mean(np.abs(tweet_model.pack())), grad_check)
def optimize_and_save(model, alphabet, n_batches, data_file_name, chars_or_words, result_file_name): print result_file_name with gzip.open(data_file_name) as data_file: data = json.loads(data_file.read()) X, Y = map(list, zip(*data)) # shuffle combined = zip(X, Y) random.shuffle(combined) X, Y = map(list, zip(*combined)) # map labels to something useful Y = [ [":)", ":("].index(y) for y in Y ] if chars_or_words == 'chars': X = [list(x) for x in X] elif chars_or_words == 'words': # replace unknowns with an unknown character tokenizer = WordPunctTokenizer() new_X = [] for x in X: new_X.append([w if w in alphabet else 'UNKNOWN' for w in tokenizer.tokenize(x)]) X = new_X else: raise ValueError("I don't know what that means :(") train_data_provider = LabelledSequenceMinibatchProvider( X=X[:-500], Y=Y[:-500], batch_size=50, padding='PADDING') validation_data_provider = LabelledSequenceMinibatchProvider( X=X[-500:], Y=Y[-500:], batch_size=500, padding='PADDING') cost_function = CrossEntropy() objective = CostMinimizationObjective( cost=cost_function, data_provider=train_data_provider) update_rule = AdaGrad( gamma=0.05, model_template=model) regularizer = L2Regularizer(lamb=1e-4) optimizer = SGD( model=model, objective=objective, update_rule=update_rule, regularizer=regularizer) print model monitor_info = [] iteration_info = [] for batch_index, info in enumerate(optimizer): iteration_info.append(info) if batch_index % 10 == 0: X_valid, Y_valid, meta_valid = validation_data_provider.next_batch() Y_hat = model.fprop(X_valid, meta=meta_valid) assert np.all(np.abs(Y_hat.sum(axis=1) - 1) < 1e-6) acc = np.mean(np.argmax(Y_hat, axis=1) == np.argmax(Y_valid, axis=1)) prop_1 = np.argmax(Y_hat, axis=1).mean() monitor_info.append({ 'batch_index': batch_index, 'acc': acc, 'prop_1': prop_1, }) print "B: {}, A: {}, C: {}, Prop1: {}, Param size: {}".format( batch_index, acc, info['cost'], prop_1, np.mean(np.abs(model.pack()))) if batch_index == n_batches - 1: break result = { 'model': model, 'iteration_info': iteration_info, 'monitor_info': monitor_info, } with open(result_file_name, 'w') as result_file: pickle.dump(result, result_file, protocol=-1)
def optimize_and_save(model, alphabet, n_batches, data_file_name, chars_or_words, result_file_name): print result_file_name with gzip.open(data_file_name) as data_file: data = json.loads(data_file.read()) X, Y = map(list, zip(*data)) # shuffle combined = zip(X, Y) random.shuffle(combined) X, Y = map(list, zip(*combined)) # map labels to something useful Y = [[":)", ":("].index(y) for y in Y] if chars_or_words == 'chars': X = [list(x) for x in X] elif chars_or_words == 'words': # replace unknowns with an unknown character tokenizer = WordPunctTokenizer() new_X = [] for x in X: new_X.append([ w if w in alphabet else 'UNKNOWN' for w in tokenizer.tokenize(x) ]) X = new_X else: raise ValueError("I don't know what that means :(") train_data_provider = LabelledSequenceMinibatchProvider(X=X[:-500], Y=Y[:-500], batch_size=50, padding='PADDING') validation_data_provider = LabelledSequenceMinibatchProvider( X=X[-500:], Y=Y[-500:], batch_size=500, padding='PADDING') cost_function = CrossEntropy() objective = CostMinimizationObjective(cost=cost_function, data_provider=train_data_provider) update_rule = AdaGrad(gamma=0.05, model_template=model) regularizer = L2Regularizer(lamb=1e-4) optimizer = SGD(model=model, objective=objective, update_rule=update_rule, regularizer=regularizer) print model monitor_info = [] iteration_info = [] for batch_index, info in enumerate(optimizer): iteration_info.append(info) if batch_index % 10 == 0: X_valid, Y_valid, meta_valid = validation_data_provider.next_batch( ) Y_hat = model.fprop(X_valid, meta=meta_valid) assert np.all(np.abs(Y_hat.sum(axis=1) - 1) < 1e-6) acc = np.mean( np.argmax(Y_hat, axis=1) == np.argmax(Y_valid, axis=1)) prop_1 = np.argmax(Y_hat, axis=1).mean() monitor_info.append({ 'batch_index': batch_index, 'acc': acc, 'prop_1': prop_1, }) print "B: {}, A: {}, C: {}, Prop1: {}, Param size: {}".format( batch_index, acc, info['cost'], prop_1, np.mean(np.abs(model.pack()))) if batch_index == n_batches - 1: break result = { 'model': model, 'iteration_info': iteration_info, 'monitor_info': monitor_info, } with open(result_file_name, 'w') as result_file: pickle.dump(result, result_file, protocol=-1)