def build_model(data_set, data_split, no_interactions, negative, max_snps, cross_validation, output_dir): """ Builds a model using logistic regression and an elastic net penalty :param data_set: The feature data set :param data_split: The percentage of data to use for testing the model :param no_interactions: If True interactions will not be included in the model :param negative: The negative phenotype label :param max_snps: The maximum number of SNPs for the model to include :param output_dir: The directory to write the model to """ l1_ratio = 0 l1_ratios = [] step_size = 0.05 while l1_ratio < 1: l1_ratios.append(l1_ratio) l1_ratio += step_size param_grid = {'l1_ratio': l1_ratios} model_eval = {'roc': get_roc_probs, 'features': save_features} common.build_model( data_set, data_split, no_interactions, negative, SGDClassifier(loss="log", penalty="elasticnet", random_state=1, n_jobs=-1, max_iter=1000, tol=1e-3), cross_validation, max_snps, output_dir, param_grid, model_eval)
def build_model(data_set, data_split, no_interactions, negative, max_snps, cross_validation, output_dir): param_grid = { "criterion": ["gini", "entropy"], #If float then min_samples_split is a percentage and ceil(min_samples_split * n_samples) # are the minimum number of samples for each split. "min_samples_split": [.01, .015, .02, .025], "max_depth": [None, 4, 5], # int or None. None allows a full tree #If float then min_samples_leaf is a percentage and ceil(min_samples_leaf * n_samples) # are the minimum number of samples for each leaf. "min_samples_leaf": [.0025, .005, .01, .015], # Berry and Linoff .0025 to .01 #If float then max_features is a percentage and int(min_max_features* n_features) # features are considered at each split. If 'auto' then max_features = sqrt(n_features) # If None then max_features = n_features "max_features": [ .3, .4, .5, ] #A Complete Tutorial on Tree Based Modeling from Scratch (in R & Python)is 30 to 40% } model_eval = {'features': save_features} common.build_model(data_set, data_split, True, negative, tree.DecisionTreeClassifier(random_state=1), cross_validation, max_snps, output_dir, param_grid, model_eval)
def build_model(dataset, data_split, no_interactions, negative, max_snps, cross_validation, output_dir): model_eval = {'features': save_features} default_grid = { 'n_estimators': [1000], 'criterion': ['gini'], 'max_features': ['sqrt'], 'max_depth': [None], 'min_samples_leaf': [0.0025], 'min_samples_split': [0.01] } # For testing combinations of parameters param_grid = { "criterion": ["gini", "entropy"], # If float then min_samples_split is a percentage and ceil(min_samples_split * n_samples) # are the minimum number of samples for each split. "min_samples_split": [.01, .015, .02, .025], "max_depth": [None, 4, 5], # int or None. None allows a full tree # If float then min_samples_leaf is a percentage and ceil(min_samples_leaf * n_samples) # are the minimum number of samples for each leaf. "min_samples_leaf": [.0025, .005, .01, .015], # Berry and Linoff .0025 to .01 # If float then max_features is a percentage and int(min_max_features* n_features) # features are considered at each split. If 'auto' then max_features = sqrt(n_features) # If None then max_features = n_features "max_features": ["sqrt", .3, .4, .5], # A Complete Tutorial on Tree Based Modeling from Scratch (in R & Python)is 30 to 40% "n_estimators": [500, 1000, 3000] } common.build_model(dataset, data_split, True, negative, RandomForestClassifier(n_jobs=-1), cross_validation, max_snps, output_dir, param_grid=default_grid, model_eval=model_eval)
params["max_inter_stimuli_interval_timestep"]) # Count stimuli each neuron should emit neuron_stimuli_counts = [len(n) for n in neuron_stimuli_times] stim_gen_end_time = perf_counter() print("Stimulus generation time: %fms" % ((stim_gen_end_time - stim_gen_start_time) * 1000.0)) # ---------------------------------------------------------------------------- # Network creation # ---------------------------------------------------------------------------- # Assert that duration is a multiple of record time assert (params["duration_timestep"] % params["record_time_timestep"]) == 0 # Build base model model, e_pop, i_pop, e_e_pop, e_i_pop = build_model("izhikevich_pavlovian_gpu_stim", params, reward_timesteps) # Current source parameters curr_source_params = {"n": 6.5, "stimMagnitude": params["stimuli_current"]} # Calculate start and end indices of stimuli to be injected by each current source start_exc_stimuli, end_exc_stimuli = get_start_end_stim(neuron_stimuli_counts[:params["num_excitatory"]]) start_inh_stimuli, end_inh_stimuli = get_start_end_stim(neuron_stimuli_counts[params["num_excitatory"]:]) # Current source initial state exc_curr_source_init = {"startStim": start_exc_stimuli, "endStim": end_exc_stimuli} inh_curr_source_init = {"startStim": start_inh_stimuli, "endStim": end_inh_stimuli} # Add background current sources e_curr_pop = model.add_current_source("ECurr", stim_noise_model, "E", curr_source_params, exc_curr_source_init)
if len(sys.argv) < 3: print("Usage: python " + sys.argv[0] + " <spectrogram_dir> <checkpoint_dir>") exit(-1) spectroDir = sys.argv[1] spectroDirPath = Path(spectroDir) if not spectroDirPath.exists(): print("Could not find directory " + spectroDir) exit(-1) checkpointDir = sys.argv[2] checkpointDirPath = Path(checkpointDir) if not checkpointDirPath.exists(): print("Could not find directory " + spectroDir) exit(-1) model = common.build_model() model.summary() spectroFiles = list(spectroDirPath.glob('*.npy')) random.shuffle(spectroFiles) S, batchedDataset = common.build_dataset_from_spectrogram(spectroFiles[0].resolve()) loss, acc = model.evaluate(batchedDataset.repeat(), steps=100) print("Untrained model, accuracy: {:5.2f}%".format(100*acc)) model.load_weights(tf.train.latest_checkpoint(str(common.get_latest_checkpoint_dir(checkpointDirPath)))) loss, acc = model.evaluate(batchedDataset.repeat(), steps=100) print("Trained model, accuracy: {:5.2f}%".format(100*acc))
seed_S = np.load(seed_file_str) # Now the fun part: Use the seed data to feed the first part of the RNN and let the model feedback into itself # to generate enough data to make a song of the specified song length # Restore the most recent deepnickelback model print("Loading model...") # Let's change the batch size of the model to be just 1 model_version = dnb_constants.DEEP_NICKELBACK_VERSION batch_size = 1 seq_len = 1 stateful = True model = common.build_model(batch_size=batch_size, checkpoint_base_dir=checkpt_base_dirpath, version=model_version, stateful=stateful, compile=False) model.build(tf.TensorShape([batch_size, None, dnb_constants.NUM_MEL_CHANNELS])) model.summary() print("Generating terrible music...") sr = 22050 S = common.generate_terrible_mel_spectrogram(model, seed_S, sr, song_duration_s, stateful) if "s" in model_version: # De-standardize the spectrogram file with open(data_funcs.get_std_spec_filepath(seed_filepath), 'r') as spec_file: mean = ast.literal_eval(spec_file.readline())
hidden_units = results.hidden_units epochs = results.epochs gpu = results.gpu learning_rate = float(learning_rate) hidden_units = int(float(hidden_units)) epochs = int(float(epochs)) if not os.path.exists(save_dir): os.makedirs(save_dir) if gpu and not torch.cuda.is_available(): print("There is no a gpu device available") trainloader, validloader, testloader, class_to_idx = load_data(data_directory) model = build_model(arch, hidden_units) message_cuda = "cuda is available" if torch.cuda.is_available( ) else "cuda is not available" print(message_cuda) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") criterion = nn.NLLLoss() # Only train the classifier parameters optimizer = optim.Adam(model.classifier.parameters(), lr=learning_rate) # We move our model to cuda, if this is available. model.to(device)
# Generate the deepnickelback network from a given directory of # mel spectrographs of each of their terrible songs spectroFiles = list(spectroDirPath.glob('*.npy')) print("The following spectrogram files were found:") for sf in spectroFiles: print(str(sf)) #LOG_DIR = "./logs/training" #file_writer = tf.contrib.summary.create_file_writer(LOG_DIR) #tf.summary.trace_on(graph=True, profiler=True) # Create and/or load our model with the most recent checkpoint CURR_BATCH_SIZE = common.BATCH_SIZE model = common.build_model(batch_size=CURR_BATCH_SIZE, checkpoint_base_dir=checkpoint_base_dirpath, stateful=False) model.summary() model.reset_states() checkpoint_prefix = str( checkpoint_dirpath.joinpath("dnb_v" + dnb_constants.DEEP_NICKELBACK_VERSION + "_epoch{epoch:04d}_loss{loss:.8f}") ) #str(checkpoint_dirpath.joinpath(dnb_constants.FULL_MODEL_FILE_NAME).resolve()) checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( filepath=checkpoint_prefix, save_weights_only=True, monitor='loss', save_best_only=True, period=1,