Esempio n. 1
0
def build_model(data_set, data_split, no_interactions, negative, max_snps,
                cross_validation, output_dir):
    """
    Builds a model using logistic regression and an elastic net penalty
    :param data_set: The feature data set
    :param data_split: The percentage of data to use for testing the model
    :param no_interactions: If True interactions will not be included in the model
    :param negative: The negative phenotype label
    :param max_snps: The maximum number of SNPs for the model to include
    :param output_dir: The directory to write the model to
    """
    l1_ratio = 0
    l1_ratios = []
    step_size = 0.05
    while l1_ratio < 1:
        l1_ratios.append(l1_ratio)
        l1_ratio += step_size

    param_grid = {'l1_ratio': l1_ratios}

    model_eval = {'roc': get_roc_probs, 'features': save_features}

    common.build_model(
        data_set, data_split, no_interactions, negative,
        SGDClassifier(loss="log",
                      penalty="elasticnet",
                      random_state=1,
                      n_jobs=-1,
                      max_iter=1000,
                      tol=1e-3), cross_validation, max_snps, output_dir,
        param_grid, model_eval)
Esempio n. 2
0
def build_model(data_set, data_split, no_interactions, negative, max_snps,
                cross_validation, output_dir):
    param_grid = {
        "criterion": ["gini", "entropy"],
        #If float then min_samples_split is a percentage and ceil(min_samples_split * n_samples)
        # are the minimum number of samples for each split.
        "min_samples_split": [.01, .015, .02, .025],
        "max_depth": [None, 4, 5],  # int or None. None allows a full tree
        #If float then min_samples_leaf is a percentage and ceil(min_samples_leaf * n_samples)
        #  are the minimum number of samples for each leaf.
        "min_samples_leaf": [.0025, .005, .01,
                             .015],  # Berry and Linoff .0025 to .01
        #If float then max_features is a percentage and int(min_max_features* n_features)
        # features are considered at each split. If 'auto' then max_features = sqrt(n_features)
        # If None then max_features = n_features
        "max_features": [
            .3,
            .4,
            .5,
        ]  #A Complete Tutorial on Tree Based Modeling from Scratch (in R & Python)is 30 to 40%
    }

    model_eval = {'features': save_features}

    common.build_model(data_set, data_split, True, negative,
                       tree.DecisionTreeClassifier(random_state=1),
                       cross_validation, max_snps, output_dir, param_grid,
                       model_eval)
Esempio n. 3
0
def build_model(dataset, data_split, no_interactions, negative, max_snps,
                cross_validation, output_dir):
    model_eval = {'features': save_features}

    default_grid = {
        'n_estimators': [1000],
        'criterion': ['gini'],
        'max_features': ['sqrt'],
        'max_depth': [None],
        'min_samples_leaf': [0.0025],
        'min_samples_split': [0.01]
    }

    # For testing combinations of parameters
    param_grid = {
        "criterion": ["gini", "entropy"],
        # If float then min_samples_split is a percentage and ceil(min_samples_split * n_samples)
        # are the minimum number of samples for each split.
        "min_samples_split": [.01, .015, .02, .025],
        "max_depth": [None, 4, 5],  # int or None. None allows a full tree
        # If float then min_samples_leaf is a percentage and ceil(min_samples_leaf * n_samples)
        #  are the minimum number of samples for each leaf.
        "min_samples_leaf": [.0025, .005, .01,
                             .015],  # Berry and Linoff .0025 to .01
        # If float then max_features is a percentage and int(min_max_features* n_features)
        # features are considered at each split. If 'auto' then max_features = sqrt(n_features)
        # If None then max_features = n_features
        "max_features": ["sqrt", .3, .4, .5],
        # A Complete Tutorial on Tree Based Modeling from Scratch (in R & Python)is 30 to 40%
        "n_estimators": [500, 1000, 3000]
    }

    common.build_model(dataset,
                       data_split,
                       True,
                       negative,
                       RandomForestClassifier(n_jobs=-1),
                       cross_validation,
                       max_snps,
                       output_dir,
                       param_grid=default_grid,
                       model_eval=model_eval)
                                               params["max_inter_stimuli_interval_timestep"])

# Count stimuli each neuron should emit
neuron_stimuli_counts = [len(n) for n in neuron_stimuli_times]

stim_gen_end_time =  perf_counter()
print("Stimulus generation time: %fms" % ((stim_gen_end_time - stim_gen_start_time) * 1000.0))

# ----------------------------------------------------------------------------
# Network creation
# ----------------------------------------------------------------------------
# Assert that duration is a multiple of record time
assert (params["duration_timestep"] % params["record_time_timestep"]) == 0

# Build base model
model, e_pop, i_pop, e_e_pop, e_i_pop = build_model("izhikevich_pavlovian_gpu_stim", 
                                                    params, reward_timesteps)

# Current source parameters
curr_source_params = {"n": 6.5, "stimMagnitude": params["stimuli_current"]}

# Calculate start and end indices of stimuli to be injected by each current source
start_exc_stimuli, end_exc_stimuli = get_start_end_stim(neuron_stimuli_counts[:params["num_excitatory"]])
start_inh_stimuli, end_inh_stimuli = get_start_end_stim(neuron_stimuli_counts[params["num_excitatory"]:])

# Current source initial state
exc_curr_source_init = {"startStim": start_exc_stimuli, "endStim": end_exc_stimuli}
inh_curr_source_init = {"startStim": start_inh_stimuli, "endStim": end_inh_stimuli}

# Add background current sources
e_curr_pop = model.add_current_source("ECurr", stim_noise_model, "E", 
                                      curr_source_params, exc_curr_source_init)
Esempio n. 5
0
if len(sys.argv) < 3:
  print("Usage: python " + sys.argv[0] + " <spectrogram_dir> <checkpoint_dir>")
  exit(-1)

spectroDir = sys.argv[1]
spectroDirPath = Path(spectroDir)
if not spectroDirPath.exists():
  print("Could not find directory " + spectroDir)
  exit(-1)

checkpointDir = sys.argv[2]
checkpointDirPath = Path(checkpointDir)
if not checkpointDirPath.exists():
  print("Could not find directory " + spectroDir)
  exit(-1)

model = common.build_model()
model.summary()

spectroFiles = list(spectroDirPath.glob('*.npy'))
random.shuffle(spectroFiles)
S, batchedDataset = common.build_dataset_from_spectrogram(spectroFiles[0].resolve())

loss, acc = model.evaluate(batchedDataset.repeat(), steps=100)
print("Untrained model, accuracy: {:5.2f}%".format(100*acc))

model.load_weights(tf.train.latest_checkpoint(str(common.get_latest_checkpoint_dir(checkpointDirPath))))
loss, acc = model.evaluate(batchedDataset.repeat(), steps=100)
print("Trained model, accuracy: {:5.2f}%".format(100*acc))
Esempio n. 6
0
seed_S = np.load(seed_file_str)

# Now the fun part: Use the seed data to feed the first part of the RNN and let the model feedback into itself
# to generate enough data to make a song of the specified song length

# Restore the most recent deepnickelback model
print("Loading model...")
# Let's change the batch size of the model to be just 1
model_version = dnb_constants.DEEP_NICKELBACK_VERSION
batch_size = 1
seq_len = 1
stateful = True

model = common.build_model(batch_size=batch_size,
                           checkpoint_base_dir=checkpt_base_dirpath,
                           version=model_version,
                           stateful=stateful,
                           compile=False)
model.build(tf.TensorShape([batch_size, None, dnb_constants.NUM_MEL_CHANNELS]))
model.summary()

print("Generating terrible music...")
sr = 22050
S = common.generate_terrible_mel_spectrogram(model, seed_S, sr,
                                             song_duration_s, stateful)

if "s" in model_version:
    # De-standardize the spectrogram file
    with open(data_funcs.get_std_spec_filepath(seed_filepath),
              'r') as spec_file:
        mean = ast.literal_eval(spec_file.readline())
Esempio n. 7
0
hidden_units = results.hidden_units
epochs = results.epochs
gpu = results.gpu
learning_rate = float(learning_rate)
hidden_units = int(float(hidden_units))
epochs = int(float(epochs))

if not os.path.exists(save_dir):
    os.makedirs(save_dir)

if gpu and not torch.cuda.is_available():
    print("There is no a gpu device available")

trainloader, validloader, testloader, class_to_idx = load_data(data_directory)

model = build_model(arch, hidden_units)

message_cuda = "cuda is available" if torch.cuda.is_available(
) else "cuda is not available"

print(message_cuda)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

criterion = nn.NLLLoss()

# Only train the classifier parameters
optimizer = optim.Adam(model.classifier.parameters(), lr=learning_rate)

# We move our model to cuda, if this is available.
model.to(device)
Esempio n. 8
0
# Generate the deepnickelback network from a given directory of
# mel spectrographs of each of their terrible songs
spectroFiles = list(spectroDirPath.glob('*.npy'))
print("The following spectrogram files were found:")
for sf in spectroFiles:
    print(str(sf))

#LOG_DIR = "./logs/training"
#file_writer = tf.contrib.summary.create_file_writer(LOG_DIR)
#tf.summary.trace_on(graph=True, profiler=True)

# Create and/or load our model with the most recent checkpoint
CURR_BATCH_SIZE = common.BATCH_SIZE
model = common.build_model(batch_size=CURR_BATCH_SIZE,
                           checkpoint_base_dir=checkpoint_base_dirpath,
                           stateful=False)
model.summary()
model.reset_states()

checkpoint_prefix = str(
    checkpoint_dirpath.joinpath("dnb_v" +
                                dnb_constants.DEEP_NICKELBACK_VERSION +
                                "_epoch{epoch:04d}_loss{loss:.8f}")
)  #str(checkpoint_dirpath.joinpath(dnb_constants.FULL_MODEL_FILE_NAME).resolve())
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True,
    monitor='loss',
    save_best_only=True,
    period=1,