def read_from_file(modelName, noDataset=False, debugDataset=False, simpleLoading=None): # Keep trying to get a right filename while (True): try: f = open('./saved_models/' + modelName, 'r') break except IOError: modelName = raw_input( "This model does not exist! Please provide the name of the model you want to inspect:\n" ) savedVars, settings = load_from_pickle(f) print(settings) if (simpleLoading is not None): settings['simple_data_loading'] = simpleLoading if (debugDataset): settings['max_dataset_size'] = 1000 dataset, rnn = constructModels(settings, None, None, dataset=noDataset) # Actually load variables rnn.loadVars(savedVars) f.close() return dataset, rnn, settings
def read_from_file(modelName): # Keep trying to get a right filename while (True): try: f = open('./saved_models/' + modelName, 'r') break except IOError: modelName = raw_input( "This model does not exist! Please provide the name of the model you want to inspect:\n" ) savedVars, settings = load_from_pickle(f) print(settings) dataset, rnn = constructModels(settings, None, None) # Actually load variables rnn.loadVars(savedVars) return dataset, rnn, settings
# Print parameters printF(str(parameters), experimentId, currentIteration) # Warn for unusual parameters if (parameters['max_dataset_size'] is not False): printF("WARNING! RUNNING WITH LIMIT ON DATASET SIZE!", experimentId, currentIteration) if (not using_gpu()): printF("WARNING! RUNNING WITHOUT GPU USAGE!", experimentId, currentIteration) # Set simple loading processor processor = processSampleFindX # Construct models dataset, model = constructModels(parameters, 0, {}) # Load pretrained only_cause_expression = 1 model if (parameters['load_cause_expression_1'] is not False): loadedVars, _ = load_from_pickle_with_filename( "./saved_models/" + parameters['load_cause_expression_1']) if (model.loadPartialDataDimVars(dict(loadedVars), 0, model.data_dim)): printF("Loaded pretrained model (expression 1) successfully!", experimentId, currentIteration) else: raise ValueError( "Loading pretrained model failed: wrong variables supplied!" ) # Train on all datasets in succession
# Ask for seed if running random baseline seed = 0 if (parameters['random_baseline']): seed = int( raw_input( "Please provide an integer seed for the random number generation: " )) # Warn for unusual parameters if (parameters['max_training_size'] is not False): print("WARNING! RUNNING WITH LIMIT ON TRAINING SIZE!") if (not using_gpu()): print("WARNING! RUNNING WITHOUT GPU USAGE!") # Construct models datasets, rnn = constructModels(parameters, seed, verboseOutputter) ### From here the experiment should be the same every time # Start experiment clock start = time.clock() # Train on all datasets in succession train(rnn, datasets, parameters, name, start, saveModels=saveModels, targets=not parameters['single_digit'], verboseOutputter=verboseOutputter)
from tools.file import load_from_pickle_with_filename from tools.model import constructModels import theano if __name__ == '__main__': theano.config.floatX = 'float32' name = sys.argv[1] filepath = "./saved_models/%s.model" % name if (os.path.isfile(filepath)): modelName = name result = load_from_pickle_with_filename(filepath) if (result is not False): savedVars, settings = result dataset, rnn = constructModels(settings, 0, None) modelSet = rnn.loadVars(dict(savedVars)) if (modelSet): modelInfo = settings floats = {} for key in sorted(rnn.vars.keys()): floats[key] = rnn.vars[key].get_value().astype('float32') f_model = open(filepath) _ = f_model.readline() settingsLine = f_model.readline() f_model.close() f = open('./saved_models/%s.floats' % name, 'wb') f.writelines(['###\n', settingsLine]) pickle.dump(floats.items(), f)
def testExists(self): params = [ '--finish_subsystems', 'True', '--only_cause_expression', '1', '--dataset', '../data/subsystems_shallow_simple_topcause', "--sample_testing_size", "10000", "--n_max_digits", "17", "--intervention_base_offset", "0", "--intervention_range", "17", "--nesterov_optimizer", "True", "--decoder", "True", "--learning_rate", "0.005", "--hidden_dim", "256" ] params = processCommandLineArguments(params) datasets, _ = constructModels(params, 1234, {}) dataset = datasets[0] storage = dataset.expressionsByPrefix expressions = [ "(3-9)*(0-3)=18", "(4-7)+(6*5)=27", "(0/6)+(2*8)=16", "(1-4)+(3+6)=6", "(6+0)+(0-1)=5" ] for i, expr in enumerate(expressions): self.assertEqual( storage.exists(expr), True, "(exists) Failing exists lookup for sample %d" % i) _, _, _, _, branch = storage.get(expr[:4], alsoGetStructure=True) closest, _, _, _ = branch.get_closest(expr[4:]) self.assertNotEqual( closest, False, "(exists) Branch-based lookup failed with False for sample %d: %s" % (i, closest)) self.assertEqual( closest, expr, "(exists) Failing branch-based lookup for sample %d: %s" % (i, closest)) # Apply mutations and test if both methods get the same new label for n in range(20): intervention_location = np.random.randint(0, len(expr)) new_symbol = np.random.randint(dataset.data_dim) new_expression = expr[:intervention_location] + dataset.findSymbol[ new_symbol] + expr[intervention_location + 1:] print("Old: %s\tNew: %s" % (expr, new_expression)) _, _, valids, _, branch = storage.get( new_expression[:intervention_location + 1], alsoGetStructure=True) if (new_expression not in valids and len(valids) > 0): # Old method: compare all profiler.start('old') nearest = -1 nearest_score = 100000 for j, nexpr in enumerate(valids): score = string_difference(new_expression, nexpr) if (score < nearest_score): nearest = j nearest_score = score closest_old = valids[nearest] profiler.stop('old') profiler.start('new') # New method: closest_new, _, _, _ = branch.get_closest( new_expression[intervention_location + 1:]) profiler.stop('new') if (closest_old != closest_new): print( "(exists) Intervened closest do not match for sample %d: loc %d / orig %s / int %s / old %s / new %s" % (i, intervention_location, expr, new_expression, closest_old, closest_new)) # self.assertEqual(closest_old, closest_new, # "(exists) Intervened closest do not match for sample %d: loc %d / orig %s / int %s / old %s / new %s" % # (i, intervention_location, expr, new_expression, closest_old, closest_new)); profiler.profile()
# Process parameters parameters = processCommandLineArguments(sys.argv[1:]) # Specific settings - default name is time of experiment name = parameters['output_name'] + time.strftime("_%d-%m-%Y_%H-%M-%S") saveModels = True # Warn for unusual parameters if (parameters['max_training_size'] is not False): print("WARNING! RUNNING WITH LIMIT ON TRAINING SIZE!") if (not using_gpu()): print("WARNING! RUNNING WITHOUT GPU USAGE!") # Construct models _, model = constructModels(parameters, 0, {}, noDataset=True) # Load data dataset_data = load_data(parameters) # Train on all datasets in succession # Print settings headers to raw results file print("# " + str(parameters)) # Compute batching variables repetition_size = len(dataset_data) if (parameters['max_training_size'] is not False): repetition_size = min(parameters['max_training_size'], repetition_size) next_testing_threshold = parameters['test_interval'] * repetition_size for r in range(parameters['repetitions']):
if (not using_gpu()): print("WARNING! RUNNING WITHOUT GPU USAGE!"); # Check for valid subbatch size if (parameters['minibatch_size'] % parameters['subbatch_size'] != 0): raise ValueError("Subbatch size is not compatible with minibatch size: m.size = %d, s.size = %d" % (parameters['minibatch_size'], parameters['subbatch_size'])); # Check for valid intervention ranges if (parameters['intervention_base_offset'] <= 0): raise ValueError("Invalid intervention base offset: is %d, must be at least 1." % parameters['intervention_base_offset']); # Construct models dataset, _ = constructModels(parameters, 0, {}, noModel=True); actual_data_dim = dataset.data_dim; if (parameters['only_cause_expression'] is False): actual_data_dim *= 2; model = Autoencoder(actual_data_dim, parameters['hidden_dim'], parameters['minibatch_size'], parameters['n_max_digits'], parameters['learning_rate'], dataset.GO_symbol_index, dataset.EOS_symbol_index, parameters['only_cause_expression']); # Train on all datasets in succession # Print settings headers to raw results file print("# " + str(parameters)); # Compute batching variables repetition_size = dataset.lengths[dataset.TRAIN]; if (parameters['max_training_size'] is not False): repetition_size = min(parameters['max_training_size'],repetition_size); next_testing_threshold = parameters['test_interval'] * repetition_size;