Beispiel #1
0
def test_scale_bad():
    args = argparse.Namespace()
    args.env = "mockenv.json"
    args.instance = "testjcs"
    args.scale = "scale"
    args.hosts = "testjcs-wls-1,testjcs-wls-2"
    args.shape = "VM.Standard2.1"
    args.email = True
    args.verbose = False
    _, response = scale(args)
    assert requests.codes.BAD == response.status_code
Beispiel #2
0
eval_namelist = list(eval_csv['audio_filename'])
eval_data = np.load(eval_data_path)
frames,bins = eval_data[0].shape

train_data_path = '~/log_mel.npy' 
val_data_path = '~/log_mel.npy'   
train_data = np.load(train_data_path)
val_data = np.load(val_data_path)
all_data = np.concatenate((train_data,val_data),axis=0)
(mean_train, std_train) = calculate_scalar_of_tensor(np.concatenate(all_data,axis=0))

###-----------------------------------
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess=tf.Session(config=config)
saver = tf.train.import_meta_graph(os.path.join(model_path,'model-41.meta'))
saver.restore(sess,tf.train.latest_checkpoint(model_path))

graph = tf.get_default_graph()
x = graph.get_tensor_by_name("x:0")
is_training = graph.get_tensor_by_name("is_training:0")
sigmoid = graph.get_tensor_by_name("sigmoid:0") ## if net==CNN9_gated , "sigmoid:0" must be replaced by "sigmoid_8:0"
pre=[]
for eval_data_batch in get_val_batch(eval_data,batch_size):
        eval_data_batch = scale(eval_data_batch,mean_train,std_train)
        eval_data_batch = eval_data_batch.reshape(-1,frames,bins,1) 
        sigmoid_prediction =sess.run(sigmoid, feed_dict={x: eval_data_batch,is_training:False})
        pre.extend(sigmoid_prediction)
write_pre_csv(eval_namelist,pre,'coarse',submission_path,fine_labels,coarse_labels)
sess.close()
Beispiel #3
0
def train(annotation_path,
          taxonomy_path,
          train_feature_dir,
          val_feature_dir,
          output_dir,
          load_checkpoint,
          load_checkpoint_path,
          exp_id,
          label_mode,
          batch_size=32,
          n_epochs=100,
          kernel_size=3,
          layer_depth=[64, 128, 256, 512],
          chs=1,
          max_ckpt=20,
          lr=1e-3,
          hidden_layer_size=256,
          snapshot=5,
          num_hidden_layers=1,
          standardize=True,
          timestamp=None):
    """
    Train and evaluate a MIL MLP model.
    Parameters
    ----------
    annotation_path
    emb_dir
    output_dir
    label_mode
    batch_size
    num_epochs
    patience
    learning_rate
    hidden_layer_size
    l2_reg
    standardize
    timestamp
    random_state

    Returns
    -------
    """

    # Load annotations and taxonomy
    print("* Loading dataset.")
    annotation_data = pd.read_csv(annotation_path).sort_values(
        'audio_filename')
    with open(taxonomy_path, 'r') as f:
        taxonomy = yaml.load(f, Loader=yaml.Loader)

    annotation_data_trunc = annotation_data[[
        'audio_filename', 'latitude', 'longitude', 'week', 'day', 'hour'
    ]].drop_duplicates()
    file_list = annotation_data_trunc['audio_filename'].to_list()
    latitude_list = annotation_data_trunc['latitude'].to_list()
    longitude_list = annotation_data_trunc['longitude'].to_list()
    week_list = annotation_data_trunc['week'].to_list()
    day_list = annotation_data_trunc['day'].to_list()
    hour_list = annotation_data_trunc['hour'].to_list()

    full_fine_target_labels = [
        "{}-{}_{}".format(coarse_id, fine_id, fine_label)
        for coarse_id, fine_dict in taxonomy['fine'].items()
        for fine_id, fine_label in fine_dict.items()
    ]
    fine_target_labels = [
        x for x in full_fine_target_labels
        if x.split('_')[0].split('-')[1] != 'X'
    ]
    coarse_target_labels = [
        "_".join([str(k), v]) for k, v in taxonomy['coarse'].items()
    ]

    print("* Preparing training data.")

    # For fine, we include incomplete labels in targets for computing the loss
    fine_target_list = get_file_targets(annotation_data,
                                        full_fine_target_labels)
    coarse_target_list = get_file_targets(annotation_data,
                                          coarse_target_labels)
    train_file_idxs, valid_file_idxs = get_subset_split(annotation_data)

    if label_mode == "fine":
        target_list = fine_target_list
        labels = fine_target_labels
        num_classes = len(labels)
        y_true_num = len(full_fine_target_labels)
    elif label_mode == "coarse":
        target_list = coarse_target_list
        labels = coarse_target_labels
        num_classes = len(labels)
        y_true_num = num_classes
    else:
        raise ValueError("Invalid label mode: {}".format(label_mode))




    X_train_meta, y_train, X_valid_meta, y_valid_meta, scaler \
        = prepare_data(train_file_idxs, valid_file_idxs,
                       latitude_list, longitude_list,
                       week_list, day_list, hour_list,
                       target_list, standardize=standardize)

    print('X_train meta shape', X_train_meta.shape)
    print('y_train shape', y_train.shape)
    print('X_valid_meta shape', X_valid_meta.shape)
    print('y_valid shape', y_valid_meta.shape)

    meta_dims = X_train_meta.shape[2]

    X_train = load_train_data(file_list, train_file_idxs, train_feature_dir)
    X_valid = load_train_data(file_list, valid_file_idxs, val_feature_dir)
    _, frames, bins = X_train.shape
    print('X_train shape', X_train.shape)
    print('X_valid shape', X_valid.shape)

    (mean_train,
     std_train) = calculate_scalar_of_tensor(np.concatenate(X_train, axis=0))

    model = CNN9_Res_train(kernel_size, layer_depth, num_classes,
                           hidden_layer_size)

    if not timestamp:
        timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")

    model_path = os.path.join(output_dir, 'exp' + exp_id)

    if scaler is not None:
        scaler_path = os.path.join(model_path, 'stdizer.pkl')
        with open(scaler_path, 'wb') as f:
            pk.dump(scaler, f)

    if label_mode == "fine":
        full_coarse_to_fine_terminal_idxs = np.cumsum(
            [len(fine_dict) for fine_dict in taxonomy['fine'].values()])
        incomplete_fine_subidxs = [
            len(fine_dict) - 1 if 'X' in fine_dict else None
            for fine_dict in taxonomy['fine'].values()
        ]
        coarse_to_fine_end_idxs = np.cumsum([
            len(fine_dict) - 1 if 'X' in fine_dict else len(fine_dict)
            for fine_dict in taxonomy['fine'].values()
        ])

        # Create loss function that only adds loss for fine labels for which
        # the we don't have any incomplete labels
        def masked_loss(y_true, y_pred):
            loss = None
            for coarse_idx in range(len(full_coarse_to_fine_terminal_idxs)):
                true_terminal_idx = full_coarse_to_fine_terminal_idxs[
                    coarse_idx]
                true_incomplete_subidx = incomplete_fine_subidxs[coarse_idx]
                pred_end_idx = coarse_to_fine_end_idxs[coarse_idx]

                if coarse_idx != 0:
                    true_start_idx = full_coarse_to_fine_terminal_idxs[
                        coarse_idx - 1]
                    pred_start_idx = coarse_to_fine_end_idxs[coarse_idx - 1]
                else:
                    true_start_idx = 0
                    pred_start_idx = 0

                if true_incomplete_subidx is None:
                    true_end_idx = true_terminal_idx

                    sub_true = y_true[:, true_start_idx:true_end_idx]
                    sub_pred = y_pred[:, pred_start_idx:pred_end_idx]

                else:
                    # Don't include incomplete label
                    true_end_idx = true_terminal_idx - 1
                    true_incomplete_idx = true_incomplete_subidx + true_start_idx
                    assert true_end_idx - true_start_idx == pred_end_idx - pred_start_idx
                    assert true_incomplete_idx == true_end_idx

                    # 1 if not incomplete, 0 if incomplete
                    mask = K.expand_dims(1 - y_true[:, true_incomplete_idx])

                    # Mask the target and predictions. If the mask is 0,
                    # all entries will be 0 and the BCE will be 0.
                    # This has the effect of masking the BCE for each fine
                    # label within a coarse label if an incomplete label exists
                    sub_true = y_true[:, true_start_idx:true_end_idx] * mask
                    sub_pred = y_pred[:, pred_start_idx:pred_end_idx] * mask

                if loss is not None:
                    loss += K.sum(K.binary_crossentropy(sub_true, sub_pred))
                else:
                    loss = K.sum(K.binary_crossentropy(sub_true, sub_pred))

            return loss

        loss_func = masked_loss
    else:

        def unmasked_loss(y_true, y_pred):

            loss = None
            loss = K.sum(K.binary_crossentropy(y_true, y_pred))
            return loss

        loss_func = unmasked_loss

    ###     placeholder
    x = tf.placeholder(tf.float32, shape=[None, frames, bins, chs], name='x')
    meta_x = tf.placeholder(tf.float32, shape=[None, meta_dims], name='meta_x')
    y = tf.placeholder(tf.float32, shape=[None, y_true_num], name='y')
    is_training = tf.placeholder(tf.bool, shape=None, name='is_training')

    ###     net output
    output = model.forward(input_tensor=x,
                           input_meta=meta_x,
                           is_training=is_training)
    sigmoid_output = tf.nn.sigmoid(output, name='sigmoid_output')
    loss = loss_func(y, sigmoid_output)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    learning_rate = tf.Variable(float(lr), trainable=False, dtype=tf.float32)
    learning_rate_decay_op = learning_rate.assign(learning_rate * 0.9)
    with tf.control_dependencies(update_ops):
        #        train_op = tf.train.MomentumOptimizer(learning_rate=lr,momentum=momentum).minimize(loss)
        train_op = tf.train.AdamOptimizer(
            learning_rate=learning_rate).minimize(loss)

    ###     start session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    saver = tf.train.Saver(max_to_keep=max_ckpt)
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())
    if load_checkpoint:
        saver.restore(sess, load_checkpoint_path)

    ###     tensorboard summary

    train_summary_dir = os.path.join(model_path, 'summaries', 'train')
    train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

    loss_all = tf.placeholder(tf.float32, shape=None, name='loss_all')

    tf.add_to_collection("loss", loss_all)

    loss_summary = tf.summary.scalar('loss', loss_all)

    val_summary_dir = os.path.join(model_path, 'summaries', 'val')
    val_micro_auprc_summary_writer = tf.summary.FileWriter(
        os.path.join(val_summary_dir, 'micro_auprc'), sess.graph)
    val_macro_auprc_summary_writer = tf.summary.FileWriter(
        os.path.join(val_summary_dir, 'macro_auprc'), sess.graph)
    val_val_micro_F1score_summary_writer = tf.summary.FileWriter(
        os.path.join(val_summary_dir, 'micro_F1score'), sess.graph)
    val_summary = tf.placeholder(tf.float32, shape=None, name='loss_all')
    tf.add_to_collection("val_summary", val_summary)
    val_summary_op = tf.summary.scalar('val_summary', val_summary)

    ###     train loop
    print("* Training model.")
    class_auprc_dict = {}
    for epoch in range(n_epochs):
        train_loss = 0
        n_batch = 0
        for X_train_batch, X_meta_batch, y_train_batch in gen_train_batch(
                X_train, X_train_meta, y_train, batch_size):

            X_meta_batch = X_meta_batch.reshape(-1, meta_dims)
            X_train_batch = scale(X_train_batch, mean_train, std_train)
            X_train_batch = X_train_batch.reshape(-1, frames, bins, chs)
            _, train_loss_batch = sess.run(
                [train_op, loss],
                feed_dict={
                    x: X_train_batch,
                    meta_x: X_meta_batch,
                    y: y_train_batch,
                    is_training: True
                })
            train_loss += train_loss_batch
            n_batch += 1
        train_loss = train_loss / n_batch
        train_summary_op = tf.summary.merge([loss_summary])
        train_summaries = sess.run(train_summary_op,
                                   feed_dict={loss_all: train_loss})
        train_summary_writer.add_summary(train_summaries, epoch)

        print("step %d" % (epoch))
        print("   train loss: %f" % (train_loss))

        pre = []
        if ((epoch + 1) % snapshot == 0
                and epoch > 0) or epoch == n_epochs - 1:
            sess.run(learning_rate_decay_op)

            for val_data_batch, val_meta_batch in gen_val_batch(
                    X_valid, X_valid_meta, batch_size):

                val_meta_batch = val_meta_batch.reshape(-1, meta_dims)
                val_data_batch = scale(val_data_batch, mean_train, std_train)
                val_data_batch = val_data_batch.reshape(-1, frames, bins, chs)
                prediction = sess.run(sigmoid_output,
                                      feed_dict={
                                          x: val_data_batch,
                                          meta_x: val_meta_batch,
                                          is_training: False
                                      })
                pre.extend(prediction)
            # print(len(pre))
            generate_output_file(pre, valid_file_idxs, model_path, file_list,
                                 label_mode, taxonomy)
            submission_path = os.path.join(model_path, "output.csv")
            df_dict = metrics.evaluate(prediction_path=submission_path,
                                       annotation_path=annotation_path,
                                       yaml_path=taxonomy_path,
                                       mode=label_mode)
            val_micro_auprc, eval_df = metrics.micro_averaged_auprc(
                df_dict, return_df=True)
            val_macro_auprc, class_auprc = metrics.macro_averaged_auprc(
                df_dict, return_classwise=True)
            thresh_idx_05 = (eval_df['threshold'] >= 0.5).nonzero()[0][0]
            val_micro_F1score = eval_df['F'][thresh_idx_05]

            val_summaries = sess.run(val_summary_op,
                                     feed_dict={val_summary: val_micro_auprc})
            val_micro_auprc_summary_writer.add_summary(val_summaries, epoch)
            val_summaries = sess.run(val_summary_op,
                                     feed_dict={val_summary: val_macro_auprc})
            val_macro_auprc_summary_writer.add_summary(val_summaries, epoch)
            val_summaries = sess.run(
                val_summary_op, feed_dict={val_summary: val_micro_F1score})
            val_val_micro_F1score_summary_writer.add_summary(
                val_summaries, epoch)
            class_auprc_dict['class_auprc_' + str(epoch)] = class_auprc
            print('official')
            print('micro', val_micro_auprc)
            print('micro_F1', val_micro_F1score)
            print('macro', val_macro_auprc)

            print('-----save:{}-{}'.format(
                os.path.join(model_path, 'ckeckpoint', 'model'), epoch))
            saver.save(sess,
                       os.path.join(model_path, 'ckeckpoint', 'model'),
                       global_step=epoch)

            np.save(os.path.join(model_path, 'class_auprc_dict.npy'),
                    class_auprc_dict)
    sess.close()
Beispiel #4
0
n_proxy_bins = functions.num_bins(opts.proxy_min, opts.proxy_max, opts.proxy_bin)

hist_sum = np.zeros(n_mass_bins)
hist_sum_matrix = np.zeros((n_proxy_bins, n_mass_bins))

for cluster in clusters:

    #hist_sum += functions.scale(cluster.hist)
    hist_sum += cluster.hist

    proxy_bin = n_proxy_bins - 1 - \
      functions.find_bin(np.log10(cluster.proxy), opts.proxy_min, opts.proxy_bin)

    for i in range(n_proxy_bins):
        if proxy_bin == i and opts.z_min <= cluster.z < opts.z_max:
            hist_sum_matrix[proxy_bin] += functions.scale(cluster.hist)
                   
for i in range(n_proxy_bins):
    hist_sum_matrix[i] = functions.scale(hist_sum_matrix[i])
        
# SAVE MATRIX TO FILE

output = np.transpose(np.vstack([hm_hist, hist_sum, (hist_sum / hm_hist[1])]))

file_name = opts.obs_mem_file + '.hist.txt'
np.savetxt(file_name, output, fmt = '%.3f')
print 'Data saved to:', file_name

file_name = opts.obs_mem_file + '.matrix.txt'
output = np.fliplr(np.transpose(np.vstack([hist_sum_matrix,
                                           clusters[0].hist_x])))
Beispiel #5
0
    train_df = train_df[train_df[train_var] != test_set]
    test_df[train_var] = test_set

    """put the two DFs together to perform transformations, trimming, filling NANs if necessary etc."""        
    DF = pd.concat([train_df, test_df], ignore_index=False)
    DF['const'] = 1.0 #adding the bias node; in some situations it should be omitted
    print "size of concatenated DF",len(DF),"number of columns:", len(DF.columns)
    
    explanatory_vars = valid_variables(train_df,target_var)
    if 'const' in DF.columns:
        explanatory_vars += ['const']
    print "useful vars:",explanatory_vars

    scaled_DF = DF.copy()
    for col in explanatory_vars:
        scaled_DF[col] = functions.scale(DF,col)
    #scaled_DF.to_csv("scaledDF.csv")
    
    scaled_DF[target_var] = functions.scale(DF,target_var)
    
    """separate the two DFs AFTER all the variable manipulating work is done"""
    train_df = scaled_DF[scaled_DF[train_var] != test_set ] 
    test_df = scaled_DF[scaled_DF[train_var] == test_set]

    train_data = functions.make_numpy_matrix(train_df[train_df[train_var] != validation_set],explanatory_vars)
    train_target = np.array(train_df[target_var][train_df[train_var] != validation_set])#.reshape(train_data.shape[0],1)

    validation_data = functions.make_numpy_matrix(train_df[train_df[train_var] == validation_set],explanatory_vars)
    validation_target = np.array(train_df[target_var][train_df[train_var] == validation_set])#.reshape(validation_data.shape[0],1)
    
Beispiel #6
0
### Fourier Transformation

# 1) Convolution with isotropic Gaussian kernel
sigma_arr = [0, 1, 2, 3, 5, 10]
N = 11

fig, ax = plt.subplots(2, 3, figsize=(9, 6))
ax = ax.flatten()
for i in range(len(sigma_arr)):
    if i == 0:
        ax[i].imshow(I_trui, cm.gray)
        ax[i].axis('off')
        ax[i].set_title('original image')
    else:
        I_trui_gauss = fct.scale(I_trui, N, sigma_arr[i])
        ax[i].imshow(I_trui_gauss, cm.gray)
        ax[i].axis('off')
        ax[i].set_title('$\sigma$ = %i' % sigma_arr[i])

fig.savefig(image_path + '1_gauss_sigma.png')

# 3) derivative of a image using FFT
I_trui_derive_x = fct.derive(I_trui, 1, 0)
I_trui_derive_y = fct.derive(I_trui, 0, 1)
I_trui_derive = fct.derive(I_trui, 1, 1)
fig, ax = plt.subplots(1, 4, figsize=(12, 3))
ax[0].imshow(I_trui, cm.gray)
ax[0].axis('off')
ax[0].set_title('original image')
ax[1].imshow(I_trui_derive_x, cm.gray)
Beispiel #7
0
for col in data.columns:
    data[col] = f.normalize(data[col])

# In[7]:

split = pd.Timestamp('01-01-2015')

# In[8]:

train = data.loc[:split, ]
test = data.loc[split:, ]

# In[9]:

for col in data.columns:
    train.loc[:, col], test.loc[:, col] = f.scale(train.loc[:, col],
                                                  test.loc[:, col])

# In[34]:

x_train = train[:-1]
y_train = train.ma5.shift(-1)
y_train.dropna(inplace=True)

x_test = test[:-1]
y_test = test.ma5.shift(-1)
y_test.dropna(inplace=True)

# In[35]:

y_test
Beispiel #8
0
    initial_state_S=OLD_initial_state_S,
    initial_state_A=OLD_initial_state_A,
    initial_state_len=OLD_initial_state_len,
    #function = ina,
    dt=1e-7,
    filename_abs=OLD_filename_abs,
    t=t,
    v=v,
    output_S=OLD_output_S,
    output_A=OLD_output_A,
    bounds=bounds,
    sample_weight=weight)
#data = pd.read_csv('../../data/training/2020_12_19_0035 I-V INa 11,65 pF.atf' ,delimiter= '\t', header=None, skiprows = 11)
#exp_data = np.concatenate([data[k] for k in range(1,21)])

x0 = scale(C.value.values[:-2], *bounds)
OLD_data = OLD_calculate_full_trace(x0, OLD_kwargs)

#with open(file_to_write, "w", newline='') as csv_file:
#    writer = csv.writer(csv_file, delimiter=',')
#    writer.writerow(('generation',*C[:-2].T.columns,'loss'))

result = 0
print('start')
with MPIPool() as pool:
    pool.workers_exit()
    #exit()
    result = scop.differential_evolution(
        OLD_loss,
        bounds=scale_bounds,
        args=(OLD_data, OLD_kwargs),
Beispiel #9
0
                                  opts.proxy_bin)

hist_sum = np.zeros(n_mass_bins)
hist_sum_matrix = np.zeros((n_proxy_bins, n_mass_bins))

for cluster in clusters:

    #hist_sum += functions.scale(cluster.hist)
    hist_sum += cluster.hist

    proxy_bin = n_proxy_bins - 1 - \
      functions.find_bin(np.log10(cluster.proxy), opts.proxy_min, opts.proxy_bin)

    for i in range(n_proxy_bins):
        if proxy_bin == i and opts.z_min <= cluster.z < opts.z_max:
            hist_sum_matrix[proxy_bin] += functions.scale(cluster.hist)

for i in range(n_proxy_bins):
    hist_sum_matrix[i] = functions.scale(hist_sum_matrix[i])

# SAVE MATRIX TO FILE

output = np.transpose(np.vstack([hm_hist, hist_sum, (hist_sum / hm_hist[1])]))

file_name = opts.obs_mem_file + '.hist.txt'
np.savetxt(file_name, output, fmt='%.3f')
print 'Data saved to:', file_name

file_name = opts.obs_mem_file + '.matrix.txt'
output = np.fliplr(
    np.transpose(np.vstack([hist_sum_matrix, clusters[0].hist_x])))