def generate_dataset_region(work, truth_vcf, mode, filtered_candidates_vcf, region, tumor_count_bed, normal_count_bed, reference, matrix_width, matrix_base_pad, min_ev_frac_per_col, min_cov, num_threads, ensemble_bed, tsv_batch_size): generate_dataset(work, truth_vcf, mode, filtered_candidates_vcf, region, tumor_count_bed, normal_count_bed, reference, matrix_width, matrix_base_pad, min_ev_frac_per_col, min_cov, num_threads, None, ensemble_bed, tsv_batch_size)
def backward(): x = tf.placeholder(tf.float64,shape=(None,23)) y_train = tf.placeholder(tf.float64,shape=(None,1)) X,Y,X_test,Y_test = generate_dataset.generate_dataset() y_pred=forward.forward(x,REGULARIZER) global_step=tf.Variable(0,trainable=False) #learning rate exponential decay learning_rate = tf.train.exponential_decay(learning_rate_base,global_step,learning_rate_step,learning_rate_decay,staircase = False) ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY,global_step) ema_op = ema.apply(tf.trainable_variables()) mse = tf.reduce_mean(tf.square(y_pred-y_train)) loss = mse + tf.add_n(tf.get_collection('losses')) #choose AdamOptimizer train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss,global_step = global_step) with tf.control_dependencies([train_step,ema_op]): train_op = tf.no_op(name = 'train') MAE = tf.reduce_mean(abs(y_pre-y_train)) accuracy = 1/(1+MAE) with tf.Session() as sess: init_op = tf.global_variables_initializer() sess.run(init_op) #training steps for i in range(STEPS): sess.run(train_step, feed_dict={x:X[0:20000], y_train:Y[0:20000]}) if i%100 == 0: total_loss=sess.run(loss,feed_dict={x:X, y_train:Y}) print(total_loss) print(sess.run(y_pred,feed_dict={x:X, y_train:Y}))
illustrative_example = False """ All the variable necessaries for generating the graph results """ graph = True threshold_interpretability = 1 linear_models_name = [ 'local surrogate', 'lime extending', 'lime regression', 'lime not binarize', 'lime traditional' ] interpretability_name = ['LS extend', 'APE', 'anchor'] interpretability_name = ['ls log reg', 'ls raw data'] if graph: experimental_informations = store_experimental_informations( len(models), len(interpretability_name), interpretability_name) for dataset_name in dataset_names: models_name = [] x, y, class_names, regression, multiclass, continuous_features, categorical_features, categorical_values, categorical_names = generate_dataset( dataset_name) for nb_model, model in enumerate(models): if graph: experimental_informations.initialize_per_models() model_name = type(model).__name__ models_name.append(model_name) dataset, black_box, x_train, x_test, y_train, y_test, x_train_vectorize, x_test_vectorize, vectorizer = preparing_dataset( x, y, False, False, dataset_name, model) print("###", model_name, "training on", dataset_name, "dataset.") black_box = black_box.fit(x_train, y_train) print('### Accuracy:', sum(black_box.predict(x_test) == y_test) / len(y_test)) cnt = 0 for instance_to_explain in x_test: if cnt == max_instance_to_explain: break
('query_in_title', SimpleTransform()), ('search_term_features', SimpleTransform()), ('seq_edit_in_brand', SimpleTransform()), ('seq_edit_in_product_description', SimpleTransform()), ('seq_edit_in_product_title', SimpleTransform()), ('tfidf_svd50_product_description_vector', SimpleTransform()), ('tfidf_svd50_product_title_vector', SimpleTransform()), ('tfidf_svd50_search_term_vector', SimpleTransform()), ('word_in_brand', SimpleTransform()), ('word_in_product_description', SimpleTransform()), ('word_in_product_title', SimpleTransform()), ('word_ratio_in_brand', SimpleTransform()), ('word_ratio_in_product_description', SimpleTransform()), ('word_ratio_in_product_title', SimpleTransform()), ('jaccard_coef_of_bigram_between_search_term_product_title', SimpleTransform()), ('jaccard_coef_of_trigram_between_product_title_product_description', SimpleTransform()), ('jaccard_coef_of_trigram_between_search_term_product_description', SimpleTransform()), ('jaccard_coef_of_trigram_between_search_term_product_title', SimpleTransform()), ('jaccard_coef_of_unigram_between_product_title_product_description', SimpleTransform()), ('jaccard_coef_of_unigram_between_search_term_product_description', SimpleTransform()), ('jaccard_coef_of_unigram_between_search_term_product_title', SimpleTransform()), ('len_of_brand', SimpleTransform()), ('len_of_product_description', SimpleTransform()), ('len_of_product_title', SimpleTransform()), ('len_of_search_term', SimpleTransform()), ('jaccard_coef_of_bigram_between_search_term_product_description', SimpleTransform()), ('ngram_match_brand', SimpleTransform()), ] generate_dataset(features, 'svd50x3_dist')
import importlib.util import os import sys import matplotlib.pyplot as plt sys.path.insert(0, '../img-generator') sys.path.insert(0, '../convNet') import generate_dataset import convNet import getSplitDataGenerators from dataFeeder import * from myImage import * generate_dataset.generate_dataset(N=5000, sigma=0.5, datapath="..\\SplitDataSet\\train") generate_dataset.generate_dataset(N=5000, sigma=1, datapath="..\\SplitDataSet\\test") #param param param img_x = 128 #reshaped the images, to reasonable size!!!!!!! img_y = 128 batch = 30 numClasses = 5 numOfChannels = 3 inputShape = (img_x, img_y, numOfChannels) optimizer = keras.optimizers.SGD(0.001) loss = keras.losses.cosine_proximity epochs = 10
from utilities import SimpleTransform from generate_dataset import generate_dataset import numpy as np __author__ = 'abhinav' def log_transform(x): return np.log(1 + x) if __name__ == '__main__': features = [ ('mainFeatures_log10_normalized', SimpleTransform()), # ('keras_feature', SimpleTransform()), ] generate_dataset(features, 'main_normalized_log10')
import networkx as nx import numpy as np import builtins from strategic_functions import new_edge, residual_patching from iteration_functions import iterate, calculate_preds from generate_dataset import generate_dataset from generate_grid import make_graph from plotting_functions import plot data_size = 100 output_node = 1000 #this is also the max amount of nodes you can have. function_type = "Sinus" X_train, X_test, y_train, y_test = generate_dataset(data_size, function_type) iterations = 200 lr = 0.005 L1 = 0 L2 = 0 #1e-5 n = int(X_train.shape[0]/10) builtins.activation_type = "SELU" #Sigmoid, SELU, ELU, RELU builtins.G = nx.Graph() try: input_dim = X_train.shape[1] except: input_dim = 1 hidden_nodes = make_graph(output_node, input_dim, hidden_nodes=5, hidden_connections=2, steps_back=3, output_connections=1) #value_matrix, predictions, total_errors = iterate(iterations, X_train, y_train, hidden_nodes, L1, L2, lr, output_node) #new_edge(value_matrix, hidden_nodes, n, output_node) value_matrix, residuals, total_errors = iterate(iterations, X_train, y_train, hidden_nodes, L1, L2, lr, output_node, residual = False) hidden_nodes = residual_patching(4, iterations * 5, residuals, output_node, input_dim, L1, L2, lr, hidden_nodes, X_train) value_matrix, predictions, total_errors = iterate(iterations, X_train, y_train, hidden_nodes, L1, L2, lr, output_node, residual = False)
from utilities import SimpleTransform from generate_dataset import generate_dataset import numpy as np def log_transform(x): return np.log(1 + x) if __name__ == '__main__': features = [ ('as_it_is', SimpleTransform()), # ('keras_feature', SimpleTransform()), ] generate_dataset(features, 'as_it_is')