def test_scale_bad(): args = argparse.Namespace() args.env = "mockenv.json" args.instance = "testjcs" args.scale = "scale" args.hosts = "testjcs-wls-1,testjcs-wls-2" args.shape = "VM.Standard2.1" args.email = True args.verbose = False _, response = scale(args) assert requests.codes.BAD == response.status_code
eval_namelist = list(eval_csv['audio_filename']) eval_data = np.load(eval_data_path) frames,bins = eval_data[0].shape train_data_path = '~/log_mel.npy' val_data_path = '~/log_mel.npy' train_data = np.load(train_data_path) val_data = np.load(val_data_path) all_data = np.concatenate((train_data,val_data),axis=0) (mean_train, std_train) = calculate_scalar_of_tensor(np.concatenate(all_data,axis=0)) ###----------------------------------- config = tf.ConfigProto() config.gpu_options.allow_growth = True sess=tf.Session(config=config) saver = tf.train.import_meta_graph(os.path.join(model_path,'model-41.meta')) saver.restore(sess,tf.train.latest_checkpoint(model_path)) graph = tf.get_default_graph() x = graph.get_tensor_by_name("x:0") is_training = graph.get_tensor_by_name("is_training:0") sigmoid = graph.get_tensor_by_name("sigmoid:0") ## if net==CNN9_gated , "sigmoid:0" must be replaced by "sigmoid_8:0" pre=[] for eval_data_batch in get_val_batch(eval_data,batch_size): eval_data_batch = scale(eval_data_batch,mean_train,std_train) eval_data_batch = eval_data_batch.reshape(-1,frames,bins,1) sigmoid_prediction =sess.run(sigmoid, feed_dict={x: eval_data_batch,is_training:False}) pre.extend(sigmoid_prediction) write_pre_csv(eval_namelist,pre,'coarse',submission_path,fine_labels,coarse_labels) sess.close()
def train(annotation_path, taxonomy_path, train_feature_dir, val_feature_dir, output_dir, load_checkpoint, load_checkpoint_path, exp_id, label_mode, batch_size=32, n_epochs=100, kernel_size=3, layer_depth=[64, 128, 256, 512], chs=1, max_ckpt=20, lr=1e-3, hidden_layer_size=256, snapshot=5, num_hidden_layers=1, standardize=True, timestamp=None): """ Train and evaluate a MIL MLP model. Parameters ---------- annotation_path emb_dir output_dir label_mode batch_size num_epochs patience learning_rate hidden_layer_size l2_reg standardize timestamp random_state Returns ------- """ # Load annotations and taxonomy print("* Loading dataset.") annotation_data = pd.read_csv(annotation_path).sort_values( 'audio_filename') with open(taxonomy_path, 'r') as f: taxonomy = yaml.load(f, Loader=yaml.Loader) annotation_data_trunc = annotation_data[[ 'audio_filename', 'latitude', 'longitude', 'week', 'day', 'hour' ]].drop_duplicates() file_list = annotation_data_trunc['audio_filename'].to_list() latitude_list = annotation_data_trunc['latitude'].to_list() longitude_list = annotation_data_trunc['longitude'].to_list() week_list = annotation_data_trunc['week'].to_list() day_list = annotation_data_trunc['day'].to_list() hour_list = annotation_data_trunc['hour'].to_list() full_fine_target_labels = [ "{}-{}_{}".format(coarse_id, fine_id, fine_label) for coarse_id, fine_dict in taxonomy['fine'].items() for fine_id, fine_label in fine_dict.items() ] fine_target_labels = [ x for x in full_fine_target_labels if x.split('_')[0].split('-')[1] != 'X' ] coarse_target_labels = [ "_".join([str(k), v]) for k, v in taxonomy['coarse'].items() ] print("* Preparing training data.") # For fine, we include incomplete labels in targets for computing the loss fine_target_list = get_file_targets(annotation_data, full_fine_target_labels) coarse_target_list = get_file_targets(annotation_data, coarse_target_labels) train_file_idxs, valid_file_idxs = get_subset_split(annotation_data) if label_mode == "fine": target_list = fine_target_list labels = fine_target_labels num_classes = len(labels) y_true_num = len(full_fine_target_labels) elif label_mode == "coarse": target_list = coarse_target_list labels = coarse_target_labels num_classes = len(labels) y_true_num = num_classes else: raise ValueError("Invalid label mode: {}".format(label_mode)) X_train_meta, y_train, X_valid_meta, y_valid_meta, scaler \ = prepare_data(train_file_idxs, valid_file_idxs, latitude_list, longitude_list, week_list, day_list, hour_list, target_list, standardize=standardize) print('X_train meta shape', X_train_meta.shape) print('y_train shape', y_train.shape) print('X_valid_meta shape', X_valid_meta.shape) print('y_valid shape', y_valid_meta.shape) meta_dims = X_train_meta.shape[2] X_train = load_train_data(file_list, train_file_idxs, train_feature_dir) X_valid = load_train_data(file_list, valid_file_idxs, val_feature_dir) _, frames, bins = X_train.shape print('X_train shape', X_train.shape) print('X_valid shape', X_valid.shape) (mean_train, std_train) = calculate_scalar_of_tensor(np.concatenate(X_train, axis=0)) model = CNN9_Res_train(kernel_size, layer_depth, num_classes, hidden_layer_size) if not timestamp: timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S") model_path = os.path.join(output_dir, 'exp' + exp_id) if scaler is not None: scaler_path = os.path.join(model_path, 'stdizer.pkl') with open(scaler_path, 'wb') as f: pk.dump(scaler, f) if label_mode == "fine": full_coarse_to_fine_terminal_idxs = np.cumsum( [len(fine_dict) for fine_dict in taxonomy['fine'].values()]) incomplete_fine_subidxs = [ len(fine_dict) - 1 if 'X' in fine_dict else None for fine_dict in taxonomy['fine'].values() ] coarse_to_fine_end_idxs = np.cumsum([ len(fine_dict) - 1 if 'X' in fine_dict else len(fine_dict) for fine_dict in taxonomy['fine'].values() ]) # Create loss function that only adds loss for fine labels for which # the we don't have any incomplete labels def masked_loss(y_true, y_pred): loss = None for coarse_idx in range(len(full_coarse_to_fine_terminal_idxs)): true_terminal_idx = full_coarse_to_fine_terminal_idxs[ coarse_idx] true_incomplete_subidx = incomplete_fine_subidxs[coarse_idx] pred_end_idx = coarse_to_fine_end_idxs[coarse_idx] if coarse_idx != 0: true_start_idx = full_coarse_to_fine_terminal_idxs[ coarse_idx - 1] pred_start_idx = coarse_to_fine_end_idxs[coarse_idx - 1] else: true_start_idx = 0 pred_start_idx = 0 if true_incomplete_subidx is None: true_end_idx = true_terminal_idx sub_true = y_true[:, true_start_idx:true_end_idx] sub_pred = y_pred[:, pred_start_idx:pred_end_idx] else: # Don't include incomplete label true_end_idx = true_terminal_idx - 1 true_incomplete_idx = true_incomplete_subidx + true_start_idx assert true_end_idx - true_start_idx == pred_end_idx - pred_start_idx assert true_incomplete_idx == true_end_idx # 1 if not incomplete, 0 if incomplete mask = K.expand_dims(1 - y_true[:, true_incomplete_idx]) # Mask the target and predictions. If the mask is 0, # all entries will be 0 and the BCE will be 0. # This has the effect of masking the BCE for each fine # label within a coarse label if an incomplete label exists sub_true = y_true[:, true_start_idx:true_end_idx] * mask sub_pred = y_pred[:, pred_start_idx:pred_end_idx] * mask if loss is not None: loss += K.sum(K.binary_crossentropy(sub_true, sub_pred)) else: loss = K.sum(K.binary_crossentropy(sub_true, sub_pred)) return loss loss_func = masked_loss else: def unmasked_loss(y_true, y_pred): loss = None loss = K.sum(K.binary_crossentropy(y_true, y_pred)) return loss loss_func = unmasked_loss ### placeholder x = tf.placeholder(tf.float32, shape=[None, frames, bins, chs], name='x') meta_x = tf.placeholder(tf.float32, shape=[None, meta_dims], name='meta_x') y = tf.placeholder(tf.float32, shape=[None, y_true_num], name='y') is_training = tf.placeholder(tf.bool, shape=None, name='is_training') ### net output output = model.forward(input_tensor=x, input_meta=meta_x, is_training=is_training) sigmoid_output = tf.nn.sigmoid(output, name='sigmoid_output') loss = loss_func(y, sigmoid_output) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) learning_rate = tf.Variable(float(lr), trainable=False, dtype=tf.float32) learning_rate_decay_op = learning_rate.assign(learning_rate * 0.9) with tf.control_dependencies(update_ops): # train_op = tf.train.MomentumOptimizer(learning_rate=lr,momentum=momentum).minimize(loss) train_op = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(loss) ### start session config = tf.ConfigProto() config.gpu_options.allow_growth = True saver = tf.train.Saver(max_to_keep=max_ckpt) sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) if load_checkpoint: saver.restore(sess, load_checkpoint_path) ### tensorboard summary train_summary_dir = os.path.join(model_path, 'summaries', 'train') train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) loss_all = tf.placeholder(tf.float32, shape=None, name='loss_all') tf.add_to_collection("loss", loss_all) loss_summary = tf.summary.scalar('loss', loss_all) val_summary_dir = os.path.join(model_path, 'summaries', 'val') val_micro_auprc_summary_writer = tf.summary.FileWriter( os.path.join(val_summary_dir, 'micro_auprc'), sess.graph) val_macro_auprc_summary_writer = tf.summary.FileWriter( os.path.join(val_summary_dir, 'macro_auprc'), sess.graph) val_val_micro_F1score_summary_writer = tf.summary.FileWriter( os.path.join(val_summary_dir, 'micro_F1score'), sess.graph) val_summary = tf.placeholder(tf.float32, shape=None, name='loss_all') tf.add_to_collection("val_summary", val_summary) val_summary_op = tf.summary.scalar('val_summary', val_summary) ### train loop print("* Training model.") class_auprc_dict = {} for epoch in range(n_epochs): train_loss = 0 n_batch = 0 for X_train_batch, X_meta_batch, y_train_batch in gen_train_batch( X_train, X_train_meta, y_train, batch_size): X_meta_batch = X_meta_batch.reshape(-1, meta_dims) X_train_batch = scale(X_train_batch, mean_train, std_train) X_train_batch = X_train_batch.reshape(-1, frames, bins, chs) _, train_loss_batch = sess.run( [train_op, loss], feed_dict={ x: X_train_batch, meta_x: X_meta_batch, y: y_train_batch, is_training: True }) train_loss += train_loss_batch n_batch += 1 train_loss = train_loss / n_batch train_summary_op = tf.summary.merge([loss_summary]) train_summaries = sess.run(train_summary_op, feed_dict={loss_all: train_loss}) train_summary_writer.add_summary(train_summaries, epoch) print("step %d" % (epoch)) print(" train loss: %f" % (train_loss)) pre = [] if ((epoch + 1) % snapshot == 0 and epoch > 0) or epoch == n_epochs - 1: sess.run(learning_rate_decay_op) for val_data_batch, val_meta_batch in gen_val_batch( X_valid, X_valid_meta, batch_size): val_meta_batch = val_meta_batch.reshape(-1, meta_dims) val_data_batch = scale(val_data_batch, mean_train, std_train) val_data_batch = val_data_batch.reshape(-1, frames, bins, chs) prediction = sess.run(sigmoid_output, feed_dict={ x: val_data_batch, meta_x: val_meta_batch, is_training: False }) pre.extend(prediction) # print(len(pre)) generate_output_file(pre, valid_file_idxs, model_path, file_list, label_mode, taxonomy) submission_path = os.path.join(model_path, "output.csv") df_dict = metrics.evaluate(prediction_path=submission_path, annotation_path=annotation_path, yaml_path=taxonomy_path, mode=label_mode) val_micro_auprc, eval_df = metrics.micro_averaged_auprc( df_dict, return_df=True) val_macro_auprc, class_auprc = metrics.macro_averaged_auprc( df_dict, return_classwise=True) thresh_idx_05 = (eval_df['threshold'] >= 0.5).nonzero()[0][0] val_micro_F1score = eval_df['F'][thresh_idx_05] val_summaries = sess.run(val_summary_op, feed_dict={val_summary: val_micro_auprc}) val_micro_auprc_summary_writer.add_summary(val_summaries, epoch) val_summaries = sess.run(val_summary_op, feed_dict={val_summary: val_macro_auprc}) val_macro_auprc_summary_writer.add_summary(val_summaries, epoch) val_summaries = sess.run( val_summary_op, feed_dict={val_summary: val_micro_F1score}) val_val_micro_F1score_summary_writer.add_summary( val_summaries, epoch) class_auprc_dict['class_auprc_' + str(epoch)] = class_auprc print('official') print('micro', val_micro_auprc) print('micro_F1', val_micro_F1score) print('macro', val_macro_auprc) print('-----save:{}-{}'.format( os.path.join(model_path, 'ckeckpoint', 'model'), epoch)) saver.save(sess, os.path.join(model_path, 'ckeckpoint', 'model'), global_step=epoch) np.save(os.path.join(model_path, 'class_auprc_dict.npy'), class_auprc_dict) sess.close()
n_proxy_bins = functions.num_bins(opts.proxy_min, opts.proxy_max, opts.proxy_bin) hist_sum = np.zeros(n_mass_bins) hist_sum_matrix = np.zeros((n_proxy_bins, n_mass_bins)) for cluster in clusters: #hist_sum += functions.scale(cluster.hist) hist_sum += cluster.hist proxy_bin = n_proxy_bins - 1 - \ functions.find_bin(np.log10(cluster.proxy), opts.proxy_min, opts.proxy_bin) for i in range(n_proxy_bins): if proxy_bin == i and opts.z_min <= cluster.z < opts.z_max: hist_sum_matrix[proxy_bin] += functions.scale(cluster.hist) for i in range(n_proxy_bins): hist_sum_matrix[i] = functions.scale(hist_sum_matrix[i]) # SAVE MATRIX TO FILE output = np.transpose(np.vstack([hm_hist, hist_sum, (hist_sum / hm_hist[1])])) file_name = opts.obs_mem_file + '.hist.txt' np.savetxt(file_name, output, fmt = '%.3f') print 'Data saved to:', file_name file_name = opts.obs_mem_file + '.matrix.txt' output = np.fliplr(np.transpose(np.vstack([hist_sum_matrix, clusters[0].hist_x])))
train_df = train_df[train_df[train_var] != test_set] test_df[train_var] = test_set """put the two DFs together to perform transformations, trimming, filling NANs if necessary etc.""" DF = pd.concat([train_df, test_df], ignore_index=False) DF['const'] = 1.0 #adding the bias node; in some situations it should be omitted print "size of concatenated DF",len(DF),"number of columns:", len(DF.columns) explanatory_vars = valid_variables(train_df,target_var) if 'const' in DF.columns: explanatory_vars += ['const'] print "useful vars:",explanatory_vars scaled_DF = DF.copy() for col in explanatory_vars: scaled_DF[col] = functions.scale(DF,col) #scaled_DF.to_csv("scaledDF.csv") scaled_DF[target_var] = functions.scale(DF,target_var) """separate the two DFs AFTER all the variable manipulating work is done""" train_df = scaled_DF[scaled_DF[train_var] != test_set ] test_df = scaled_DF[scaled_DF[train_var] == test_set] train_data = functions.make_numpy_matrix(train_df[train_df[train_var] != validation_set],explanatory_vars) train_target = np.array(train_df[target_var][train_df[train_var] != validation_set])#.reshape(train_data.shape[0],1) validation_data = functions.make_numpy_matrix(train_df[train_df[train_var] == validation_set],explanatory_vars) validation_target = np.array(train_df[target_var][train_df[train_var] == validation_set])#.reshape(validation_data.shape[0],1)
### Fourier Transformation # 1) Convolution with isotropic Gaussian kernel sigma_arr = [0, 1, 2, 3, 5, 10] N = 11 fig, ax = plt.subplots(2, 3, figsize=(9, 6)) ax = ax.flatten() for i in range(len(sigma_arr)): if i == 0: ax[i].imshow(I_trui, cm.gray) ax[i].axis('off') ax[i].set_title('original image') else: I_trui_gauss = fct.scale(I_trui, N, sigma_arr[i]) ax[i].imshow(I_trui_gauss, cm.gray) ax[i].axis('off') ax[i].set_title('$\sigma$ = %i' % sigma_arr[i]) fig.savefig(image_path + '1_gauss_sigma.png') # 3) derivative of a image using FFT I_trui_derive_x = fct.derive(I_trui, 1, 0) I_trui_derive_y = fct.derive(I_trui, 0, 1) I_trui_derive = fct.derive(I_trui, 1, 1) fig, ax = plt.subplots(1, 4, figsize=(12, 3)) ax[0].imshow(I_trui, cm.gray) ax[0].axis('off') ax[0].set_title('original image') ax[1].imshow(I_trui_derive_x, cm.gray)
for col in data.columns: data[col] = f.normalize(data[col]) # In[7]: split = pd.Timestamp('01-01-2015') # In[8]: train = data.loc[:split, ] test = data.loc[split:, ] # In[9]: for col in data.columns: train.loc[:, col], test.loc[:, col] = f.scale(train.loc[:, col], test.loc[:, col]) # In[34]: x_train = train[:-1] y_train = train.ma5.shift(-1) y_train.dropna(inplace=True) x_test = test[:-1] y_test = test.ma5.shift(-1) y_test.dropna(inplace=True) # In[35]: y_test
initial_state_S=OLD_initial_state_S, initial_state_A=OLD_initial_state_A, initial_state_len=OLD_initial_state_len, #function = ina, dt=1e-7, filename_abs=OLD_filename_abs, t=t, v=v, output_S=OLD_output_S, output_A=OLD_output_A, bounds=bounds, sample_weight=weight) #data = pd.read_csv('../../data/training/2020_12_19_0035 I-V INa 11,65 pF.atf' ,delimiter= '\t', header=None, skiprows = 11) #exp_data = np.concatenate([data[k] for k in range(1,21)]) x0 = scale(C.value.values[:-2], *bounds) OLD_data = OLD_calculate_full_trace(x0, OLD_kwargs) #with open(file_to_write, "w", newline='') as csv_file: # writer = csv.writer(csv_file, delimiter=',') # writer.writerow(('generation',*C[:-2].T.columns,'loss')) result = 0 print('start') with MPIPool() as pool: pool.workers_exit() #exit() result = scop.differential_evolution( OLD_loss, bounds=scale_bounds, args=(OLD_data, OLD_kwargs),
opts.proxy_bin) hist_sum = np.zeros(n_mass_bins) hist_sum_matrix = np.zeros((n_proxy_bins, n_mass_bins)) for cluster in clusters: #hist_sum += functions.scale(cluster.hist) hist_sum += cluster.hist proxy_bin = n_proxy_bins - 1 - \ functions.find_bin(np.log10(cluster.proxy), opts.proxy_min, opts.proxy_bin) for i in range(n_proxy_bins): if proxy_bin == i and opts.z_min <= cluster.z < opts.z_max: hist_sum_matrix[proxy_bin] += functions.scale(cluster.hist) for i in range(n_proxy_bins): hist_sum_matrix[i] = functions.scale(hist_sum_matrix[i]) # SAVE MATRIX TO FILE output = np.transpose(np.vstack([hm_hist, hist_sum, (hist_sum / hm_hist[1])])) file_name = opts.obs_mem_file + '.hist.txt' np.savetxt(file_name, output, fmt='%.3f') print 'Data saved to:', file_name file_name = opts.obs_mem_file + '.matrix.txt' output = np.fliplr( np.transpose(np.vstack([hist_sum_matrix, clusters[0].hist_x])))