def load_pb(self, path_to_pb): if self._cpu_only: with tf.device('/cpu:{}'.format(self._cpu_number)): self._input_ph, self._output = load_graph(path_to_pb, ['input:0', 'output:0']) self._sess = tf.Session(config=self._config) else: with tf.device('/gpu:{}'.format(self._gpu_number)): self._input_ph, self._output = load_graph(path_to_pb, ['input:0', 'output:0']) self._sess = tf.Session(config=self._config) return self
def save_states(q, gpu, target, limit, mem_ratio, model_dir, seed=0, chunksize=1000): os.environ['CUDA_VISIBLE_DEVICES'] = gpu print 'GPU {}'.format(gpu) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_ratio) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: load_graph(os.path.join(model_dir, 'classify_image_graph_def.pb')) next_last_layer = sess.graph.get_tensor_by_name('pool_3:0') while True: source = q.get() if source == KILL: break images = glob.glob('{}/*'.format(source)) random.seed(seed) random.shuffle(images) if limit > 0: images = images[:limit] t0 = time.time() h5name = os.path.join(target, '{}.h5'.format(os.path.basename(os.path.normpath(source)))) with pd.HDFStore(h5name, mode='w', complevel=9, complib='blosc') as store: for chunk in chunks(images, chunksize): states = [] for jpg in list(chunk): # Creates a copy over which it is safe to iterate try: raw_data = gfile.FastGFile(jpg).read() hidden_layer = sess.run(next_last_layer, {'DecodeJpeg/contents:0': raw_data}) hidden_layer = np.squeeze(hidden_layer) states.append(hidden_layer) except Exception as e: chunk.remove(jpg) print 'Something went wrong when processing {}'.format(jpg) X = np.vstack(states) columns = [ 'f{}'.format(i) for i in range(X.shape[1]) ] df = pd.DataFrame(data=X, index=chunk, columns=columns) df.index.name='filename' store.append('data', df) print('Time spent collecting {} states: {}'.format(len(images), time.time() - t0))
def main(): start_time = time.time() args = utils.create_argument_parser() graph = utils.load_graph(args.dataset, args.w) graph_copy = deepcopy(graph) preprocess(graph) c = greedy_modularity_communities(graph) finish_time = time.time() print('\nDone in %.4f seconds.' % (finish_time - start_time)) communities = dict() for i in range(len(c)): communities[i] = list(c[i]) partition = create_partition(communities) utils.print_comm_info_to_display(communities) # utils.write_comm_info_to_file(partition) print('modularity_value =', modularity(graph_copy, communities)) print('NMI =', NMI(args.output, partition)) finish_time = time.time() print('\nDone in %.4f seconds.' % (finish_time - start_time))
def uploader(): f = request.files['file'] filename = secure_filename(f.filename) print(filename) f.save(os.path.join(app.config['UPLOAD_FOLDER'], str(filename))) image = Image.open(os.path.join(app.config['UPLOAD_FOLDER'], str(filename))) image_resized = image.resize([299, 299], Image.ANTIALIAS) image_name = (os.path.join(app.config['UPLOAD_FOLDER'], str(filename))) frozen_model_filename = './classify_image_graph_def.pb' graph = load_graph(frozen_model_filename) x = graph.get_tensor_by_name('prefix/DecodeJpeg/contents:0') y = graph.get_tensor_by_name('prefix/softmax:0') with tf.Session(graph=graph) as sess: image_data = tf.gfile.FastGFile(image_name, 'rb').read() prediction = sess.run(y, feed_dict={x: image_data}) predictions = np.squeeze(prediction) node_lookup = NodeLookup() top_k = predictions.argsort()[-1:][::-1] for node_id in top_k: human_string = node_lookup.id_to_string(node_id) image_resized.save( os.path.join(app.config['UPLOAD_FOLDER'], str(filename))) return render_template('image.html', prediction=human_string.split(',')[0], image=str(filename))
def main(argv=None): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False, ) img = Image.open(FLAGS.input_img) img_resized = letter_box_image(img, FLAGS.size, FLAGS.size, 128) img_resized = img_resized.astype(np.float32) classes = load_coco_names(FLAGS.class_names) if FLAGS.frozen_model: t0 = time.time() frozenGraph = load_graph(FLAGS.frozen_model) print("Loaded graph in {:.2f}s".format(time.time() - t0)) #print(frozenGraph.inputs) #print(frozenGraph.outputs) boxes, inputs = get_boxes_and_inputs_pb(frozenGraph) with tf.Session(graph=frozenGraph, config=config) as sess: t0 = time.time() detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) else: if FLAGS.tiny: model = yolo_v3_tiny.yolo_v3_tiny elif FLAGS.spp: model = yolo_v3.yolo_v3_spp else: model = yolo_v3.yolo_v3 boxes, inputs = get_boxes_and_inputs(model, len(classes), FLAGS.size, FLAGS.data_format) saver = tf.train.Saver(var_list=tf.global_variables(scope='detector')) with tf.Session(config=config) as sess: t0 = time.time() saver.restore(sess, FLAGS.ckpt_file) print('Model restored in {:.2f}s'.format(time.time() - t0)) t0 = time.time() detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) print("Predictions found in {:.2f}s".format(time.time() - t0)) draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size), True) img.save(FLAGS.output_img)
def load_model(self, path_to_pb): with tf.device(self._device): self._input_ph, self._encoding, self._output = load_graph( path_to_pb, ['input:0', 'autoencoder/encoding:0', 'output/BiasAdd:0']) self._sess = tf.compat.v1.Session(config=self._config) return self
def main(): graph = utils.load_graph() edge_distribution = get_edge_distribution(graph) utils.plot_heatmap( edge_distribution, 'Row-Normalized Inter-Species Edge Weight Distribution Post Network Enhancement', 'images/edge_distribution.png')
def main(): start_time = time.time() args = utils.create_argument_parser() graph = utils.load_graph(args.dataset, args.w) graph_copy = deepcopy(graph) communities = community_search(graph) com_dict = {} for i in range(len(communities)): com_dict[i] = communities[i] utils.print_comm_info_to_display(com_dict) print('modularity_value =', modularity(graph, com_dict)) com_dict2 = {} for k, v in com_dict.items(): for node in v: com_dict2[node] = k print('NMI =', NMI(args.output, com_dict2)) finish_time = time.time() print('\nDone in %.4f seconds.' % (finish_time - start_time))
def main(input_path, DEBUG): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False, ) classes = load_coco_names(FLAGS.class_names) frozenGraph = load_graph(FLAGS.frozen_model) boxes, inputs = get_boxes_and_inputs_pb(frozenGraph) boxes_list = [] with tf.Session(graph=frozenGraph, config=config) as sess: for item in input_path: start = clock() FLAGS.input_img = item img = Image.open(FLAGS.input_img) img_resized = letter_box_image(img, FLAGS.size, FLAGS.size, 128) img_resized = img_resized.astype(np.float32) detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) boxes_list.append(filtered_boxes) if DEBUG: draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size), True) print(filtered_boxes) print("Execution Time : {} / #Symbols : {} / Path : {}".format( clock() - start, len(filtered_boxes), item)) sess.close() tf.reset_default_graph() return boxes_list, classes, FLAGS.size
def main(args): if args.get('verbose', False): print(args) recovery_method_name = args["recovery_method"] recovery_params = args["recovery_params"] RecoveryMethodClass = getattr(recovery, recovery_method_name) graph = args.get("graph") if graph is None: graph = load_graph(args["graph_file"]) samples = args.get("samples") if samples is None: samples = load_samples(args["samples_file"]) recovery_method = RecoveryMethodClass(graph, samples, recovery_params) x = [graph.node[idx]['value'] for idx in sorted(graph.node)] x_hat = recovery_method.run() results = args.copy() results.update({"x_hat": x_hat, "nmse": nmse(x, x_hat)}) results_file = args.get("results_file") if results_file is None: return results else: dump_results(results, results_file)
def __init__(self, args): """ Role2Vec machine constructor. :param args: Arguments object with the model hyperparameters. """ self.args = args self.graph = load_graph(args.graph_input)
def test_correlator1_wd(self): """ Check that matrices :math:`W` and :math:`D` of correlator1 correspond to the ones stated in the paper. """ W_test = np.array([ [0, 1, 2, 3, 4, 3, 2, 1], [0, 0, 1, 2, 3, 2, 1, 0], [0, 1, 0, 1, 2, 1, 0, 0], [0, 1, 2, 0, 1, 0, 0, 0], [0, 1, 2, 3, 0, 0, 0, 0], [0, 1, 2, 3, 4, 0, 0, 0], [0, 1, 2, 3, 4, 3, 0, 0], [0, 1, 2, 3, 4, 3, 2, 0] ]) D_test = np.array([ [0, 3, 6, 9, 12, 16, 13, 10], [10, 3, 6, 9, 12, 16, 13, 10], [17, 20, 3, 6, 9, 13, 10, 17], [24, 27, 30, 3, 6, 10, 17, 24], [24, 27, 30, 33, 3, 10, 17, 24], [21, 24, 27, 30, 33, 7, 14, 21], [14, 17, 20, 23, 26, 30, 7, 14], [7, 10, 13, 16, 19, 23, 20, 7] ]) g = load_graph('../graphs/correlator1.dot') W, D = wd(g) W = wd2numpy_correlator(W) D = wd2numpy_correlator(D) self.assertTrue((W == W_test).all()) self.assertTrue((D == D_test).all())
def test_correlator2_opt1(self): """ Check that *Algorithm OPT1* applied to correlator2 produces a clock period of 13. """ g = load_graph('../graphs/correlator2.dot') gr = opt1(g) self.assertEqual(cp(gr), 13)
def test_correlator1_cp(self): """ Check that the clock period of correlator1 is 24. """ g = load_graph('../graphs/correlator1.dot') clock_period = cp(g) self.assertEqual(clock_period, 24)
def test_correlator2_cp(self): """ Check that the clock period of correlator2 is 17. """ g = load_graph('../graphs/correlator2.dot') clock_period = cp(g) self.assertEqual(clock_period, 17)
def _main(): start_time = time.time() args = utils.create_argument_parser() graph = utils.load_graph(args.dataset, False) graph_copy = deepcopy(graph) communities = community_detect(graph) number_of_nodes = 0 com_dict = {} for i in range(len(communities)): com_dict[i] = communities[i] number_of_nodes += len(communities[i]) print(number_of_nodes, ' nodes has been analyzed.') utils.print_comm_info_to_display(com_dict) print('modularity_value =', modularity(graph_copy, com_dict)) com_dict2 = {} for k, v in com_dict.items(): for node in v: com_dict2[node] = k print('NMI =', NMI(args.output, com_dict2)) finish_time = time.time() print('\nDone in %.4f seconds.' % (finish_time - start_time))
def effect_of_group_sizes(self): ''' This generate the evaluation graphs for ii) varrying p_g_a ''' influenced_a_list = [] influenced_b_list = [] seeds_a_list = [] seeds_b_list = [] seed_list = [11223344, 11224433, 33112244, 22113344] for group_ratio in self.group_ratios: # group_ratio = 0.5 #0.7 # A loop here to run multiple times on 5 seeds # for seed in SEED_list: filename = '{self.filename}_{self.num_nodes}_{self.p_with}_{self.p_across}_{group_ratio}' # read in graph G = ut.load_graph(filename, self.p_with, self.p_across, group_ratio, self.num_nodes) influenced, influenced_a, influenced_b, seeds_a, seeds_b = self.calculate_greedy( filename, G) stats = ut.graph_stats(G, print_stats=True) influenced_a_list.append(influenced_a) influenced_b_list.append(influenced_b) seeds_a_list.append(seeds_a) seeds_b_list.append(seeds_b) print(" ******* Finished group size analysis *******") return (influenced_a_list, influenced_b_list, seeds_a_list, seeds_b_list)
def load_data(feature_type='identity', embedding_file=None): # Load graph. graph = utils.load_graph() node_ids = list(range(len(graph.nodes))) # Choose node features from identity, adjacency matrix, or embeddings. if feature_type == 'identity': node_features = np.eye(len(graph.nodes)) elif feature_type == 'adjacency': node_features = nx.to_numpy_matrix(graph, node_ids) elif feature_type == 'embedding': embedding_path = 'node2vec/embeddings/' + embedding_file embeddings = utils.load_embeddings(embedding_path) node_features = np.array([embeddings[nid] for nid in node_ids]) # Extract graph info to create torch geometric data object. x = torch.tensor(node_features, dtype=torch.float) y = torch.tensor(get_labels(graph), dtype=torch.long) edge_index, edge_attr = get_edges(graph) data = Data(x=x, edge_index=edge_index, y=y) # Obtain train/val/test splits. get_masks(data) return data
def calc_experiment_params(args): G = utils.load_graph(args.graph) nodes_cluster = utils.load_labels(args.all_labels) known_labels = utils.load_labels(args.seed_set) holdout = utils.load_labels(args.holdout) node2features = None if (not args.features is None): node2features = cPickle.load(open(args.features)) special_params = {} if (args.model == "norm_lp" or args.model == "feature_diffusion_norm_lp"): special_params["M"] = label_propagation.get_graph_normalized_laplacian( G) if (args.model == "lp" or args.model == 'feature_diffusion_lp'): special_params["M"] = label_propagation.get_graph_laplacian(G) num_classes = max([nodes_cluster[node_id] for node_id in nodes_cluster]) + 1 n = max(G.nodes()) cluster_distribution = defaultdict(int) cluster_count = defaultdict(int) for node_id in nodes_cluster: cluster_count[nodes_cluster[node_id]] += 1 for cluster_id in cluster_count: cluster_distribution[cluster_id] = cluster_count[cluster_id] / float( len(nodes_cluster)) parameters = [] parameters.append( (G, num_classes, known_labels, cluster_distribution, holdout, nodes_cluster, node2features, n, args, special_params)) return parameters
def __init__(self, args, train_dataset=None, dev_dataset=None, test_dataset=None): ''' initial trainer :param args,train_dataset,dev_dataset, test_dataset ''' self.args = args self.train_dataset = train_dataset self.dev_dataset = dev_dataset self.test_dataset = test_dataset self.label_lst = get_label(args) self.num_labels = len(self.label_lst) self.config_class, self.model_class, _ = MODEL_CLASSES[args.model_type] self.bert_config = self.config_class.from_pretrained( args.model_name_or_path, num_labels=self.num_labels, finetuning_task=args.task) self.model = self.model_class(self.bert_config, args) self.graph = load_graph(args.graph_file) self.edge_feature = load_edge_feature(args.edge_feature_file) self.entity_feature = load_entity_feature(args.entity_feature_file) # self.entity2id = load_entity2id(args.entity2id_file) # GPU or CPU self.device = "cuda" if torch.cuda.is_available( ) and not args.no_cuda else "cpu" self.model.to(self.device)
def proceed(self): config = self.pipeline.version.config combinations_r = config['combinations_r'] train_dir = config['train_dir'] # This can change :( graph = utils.load_graph('labeled-import-prune.pickle', config) exhibitions = graph.get_nodes()['Exhibition'].values() print(f'{"Index":<6s}{"Edges":<7s}{"Title"} {"ID"}') for index, e in enumerate(exhibitions): print(f'{index:<6}{len(e.edges):<7} {e.title} {e.id}') if e.degrees >= combinations_r: lines = combinations(list(e.edges), combinations_r) else: lines = [e.edges] file = os.path.join(train_dir, f'{e.id}.txt') with open(file, 'w') as f: for line in lines: f.write(" ".join(line)) f.write("\n") self.pipeline.update()
def export_test(frozen_model_filename, is_2D): """Test the exported file with examples from data""" print("\n.......Testing %s ........... \n" % frozen_model_filename) # We use our "load_graph" function graph = utils.load_graph(frozen_model_filename) # We can verify that we can access the list of operations in the graph for op in graph.get_operations(): print(op.name) # We access the input and output nodes x = graph.get_tensor_by_name('prefix/input/x:0') y = graph.get_tensor_by_name('prefix/Model/output/y:0') data_gen, _ = loader.load_inference_data('processed_data/test', is_2D) inputs, labels = data_gen.next_batch(80) # We launch a Session to test the exported file with tf.Session(graph=graph) as sess: for idx in list(np.random.randint(0, 80, 10)): # Note: we didn't initialize/restore anything, everything is stored in the graph_def y_out = sess.run(y, feed_dict={x: [inputs[idx]]}) print("Input label = {}, predicted label = {}".format( y_out[0], labels[idx]))
def test_correlator2_feas(self): """ Check that 13 is a feasible clock period for correlator2 with *Algorithm FEAS*. """ g = load_graph('../graphs/correlator2.dot') r = feas(g, 13) self.assertIsNotNone(r)
def test_correlator2_retimed_opt2_synchronous_circuit(self): """ Check that the circuit retimed with *Algorithm OPT2* starting from correlator2 is actually a synchronous circuit. """ g = load_graph('../graphs/correlator2.dot') gr = opt2(g) self.assertTrue(check_if_synchronous_circuit(gr))
def proceed(self): config = self.pipeline.version.config graph = utils.load_graph('labeled-import.pickle', config) artists = graph.get_nodes()['Artist'].values() # Artists without a category graph.prune([a for a in artists if len(a.categories) < 1]) utils.save_graph(graph, 'labeled-import-prune', config) self.pipeline.update()
def __init__(self, graph): super().__init__() if isinstance(graph, nx.Graph): self.graph = graph elif isinstance(graph, str): self.graph = load_graph(graph) else: raise ValueError("unexpected graph type: {}".format(type(graph)))
def __init__(self, args): """ MUSAE and AE machine constructor. :param args: Arguments object with the model hyperparameters. """ self.args = args self.log = dict() self.graph = load_graph(args.graph_input) self.features = load_features(args.features_input)
def main(args): """ Orbital rol featur extraction. :param args: Arguments object. """ tab_printer(args) graph = load_graph(args.graph_input) model = MotifCounterMachine(graph, args) model.extract_features()
def main(argv=None): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False, ) if FLAGS.frozen_model: model = load_graph(FLAGS.frozen_model) else: if FLAGS.tiny: model = yolo_v3_tiny.yolo_v3_tiny else: model = yolo_v3.yolo_v3 scores = list() scores_wout_mispredictions = list() inference_time = list() if FLAGS.input_dir != '': for root, dirs, files in os.walk(FLAGS.input_dir, topdown=False): counter = 0 for name in tqdm(files): if counter >= FLAGS.max_imgs: break res, inf_time = get_score_from_image(os.path.join(root, name), gpu_options, config, model) if res > 0: scores_wout_mispredictions.append(res) scores.append(res) inference_time.append(inf_time) counter += 1 else: res, inf_time = get_score_from_image(FLAGS.input_img, gpu_options, config, model) if res > 0: scores_wout_mispredictions.append(res) scores.append(res) inference_time.append(inf_time) print("Average score across all images: " + str(np.mean(scores))) print("Max score across all images: " + str(np.max(scores))) print("Average inference time: " + str(np.mean(inference_time)) + "ms") print("Average score disregarding mis-predictions: " + str(np.mean(scores_wout_mispredictions))) print("Minimum score disregarding mis-predictions: " + str(np.min(scores_wout_mispredictions)))
def main(argv=None): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False, ) classes = load_coco_names(FLAGS.class_names) t0 = time.time() frozenGraph = load_graph(FLAGS.frozen_model) print("Loaded graph in {:.2f}s".format(time.time() - t0)) boxes, inputs = get_boxes_and_inputs_pb(frozenGraph) with tf.Session(graph=frozenGraph, config=config) as sess: t0 = time.time() print(FLAGS.input_img) cap = cv2.VideoCapture(FLAGS.input_img) # cap = cv2.VideoCapture(0) fps = cap.get(cv2.CAP_PROP_FPS) width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) videoWriter = cv2.VideoWriter( "output.mp4", cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps, (int(width), int(height))) while (cap.isOpened()): ret, frame = cap.read() if ret == True: frame = cv2.flip(frame, 0) img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) img_resized = letter_box_image(img, FLAGS.size, FLAGS.size, 128) img_resized = img_resized.astype(np.float32) detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]}) filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) print("Predictions found in {:.2f}s".format(time.time() - t0)) draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size), True) fimg = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) cv2.imshow("show", fimg) videoWriter.write(fimg) if cv2.waitKey(1) & 0xFF == ord('q'): break else: break cap.release() videoWriter.release()
def train_idec(dataset): model = SDCN(500, 500, 2000, 2000, 500, 500, n_input=args.n_input, n_z=args.n_z, n_clusters=args.n_clusters, v=1.0).to(device) print(model) optimizer = Adam(model.parameters(), lr=args.lr) # KNN Graph adj = load_graph(args.name, args.k) adj = adj.cuda() # cluster parameter initiate data = torch.Tensor(dataset.x).to(device) y = dataset.y with torch.no_grad(): _, _, _, _, z = model.ae(data) kmeans = KMeans(n_clusters=args.n_clusters, n_init=20) y_pred = kmeans.fit_predict(z.data.cpu().numpy()) y_pred_last = y_pred model.cluster_layer.data = torch.tensor(kmeans.cluster_centers_).to(device) eva(y, y_pred, 'pae') for epoch in range(200): if epoch % 1 == 0: # update_interval _, tmp_q, pred, _ = model(data, adj) tmp_q = tmp_q.data p = target_distribution(tmp_q) res1 = tmp_q.cpu().numpy().argmax(1) #Q res2 = pred.data.cpu().numpy().argmax(1) #Z res3 = p.data.cpu().numpy().argmax(1) #P eva(y, res1, str(epoch) + 'Q') eva(y, res2, str(epoch) + 'Z') eva(y, res3, str(epoch) + 'P') x_bar, q, pred, _ = model(data, adj) kl_loss = F.kl_div(q.log(), p, reduction='batchmean') ce_loss = F.kl_div(pred.log(), p, reduction='batchmean') re_loss = F.mse_loss(x_bar, data) loss = 0.1 * kl_loss + 0.01 * ce_loss + re_loss optimizer.zero_grad() loss.backward() optimizer.step()
def main(): (parser, options, args) = parse_args() if options.input and options.output: g = load_graph(options.input) res = gen_secondary_structure(g, options) write_file(options.output, res) else: print "specify input and output" parser.print_help() exit(1)
def show_graph_question_4(): computer_graph = load_graph(NETWORK_URL) num_nodes = len(computer_graph.keys()) prob = 0.02 random_graph = random_ugraph(prob, num_nodes) m_nodes= 3 up_graph = UPA(num_nodes, m_nodes) cg_set = get_resilience_targeted(computer_graph) rg_set = get_resilience_targeted(random_graph) up_set = get_resilience_targeted(up_graph) xlabel("Nodes removed") ylabel("Size of largest connect component") plot(range(0,len(cg_set)), cg_set, '-b', label="computer graph") plot(range(0,len(rg_set)), rg_set, '-r', label="random graph. p=0.02") plot(range(0,len(up_set)), up_set, '-g', label="UPA graph. m=3") #plot(range(0,1239), [0.75 * (1239 - x) for x in range(0,1239)]) legend(loc="upper right") show()
def learn(data_folder, experts, learning_rate=.001, train_ratio=.8, validation_ratio=.1, test_ratio=.1, save_every=10, batch_size=2048, hidden_size=1024, dropout=.5, epochs=500, print_every=1, model_dir='.', perceptron=False, mem_ratio=.95): assert train_ratio + validation_ratio + test_ratio == 1, 'Train/validation/test ratios must sum up to 1' data = read_data(data_folder, train_ratio, validation_ratio, test_ratio) model_name = ('''transfer_classifier_moe_epochs_{}_batch_{}_ratios_{}_{}_{}_''' '''learning_rate_{}'''.format( epochs, batch_size, train_ratio, validation_ratio, test_ratio, learning_rate)) if perceptron: model_name = '{}_perceptron.pb'.format(model_name) else: model_name = '{}_dropout_{}_hidden_size_{}.pb'.format(model_name, dropout, hidden_size) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_ratio) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: local_experts = {} for model in os.listdir(experts): print('Loading {}'.format(model)) load_graph(os.path.join(args.experts, model)) stripped = model[20:] h5 = stripped[:stripped.find('_')]# MESSY. local_experts[h5] = sess.graph.get_tensor_by_name('{}output:0'.format(h5)) data.train._X = np.hstack([ data.train.X, np.vstack([ flow(sess, local_experts, x) for x in chunks(data.train.X, batch_size) ]) ]) data.validation._X = np.hstack([ data.validation.X, flow(sess, local_experts, data.validation.X) ]) data.test._X = np.hstack([ data.test.X, flow(sess, local_experts, data.test.X) ]) x = tf.placeholder('float', shape=[None, data.train.X_features], name='input') y_ = tf.placeholder('float', shape=[None, data.train.Y_features], name='target') if perceptron: W = weight_variable([data.train.X_features, data.train.Y_features], name='weights') b = bias_variable([data.train.Y_features], name='bias') logits = tf.matmul(x,W) + b else: W_in = weight_variable([data.train.X_features, hidden_size], name='weights_in') b_in = bias_variable([hidden_size], name='bias_in') hidden = tf.matmul(x,W_in) + b_in relu = tf.nn.relu(hidden) keep_prob = tf.placeholder_with_default([1.], shape=None) hidden_dropout = tf.nn.dropout(relu, keep_prob) W_out = weight_variable([hidden_size,data.train.Y_features], name='weights_out') b_out = bias_variable([data.train.Y_features], name='bias_out') logits = tf.matmul(relu,W_out) + b_out y = tf.nn.softmax(logits, name='output') cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, y_) train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float')) sess.run(tf.initialize_all_variables()) last_epoch = 0 t_epoch = time.time() while data.train.epoch <= epochs: epoch = data.train.epoch batch_x, batch_y = data.train.next_batch(batch_size) t_start = time.time() feed_dict = {x: batch_x, y_: batch_y } if perceptron else {x: batch_x, y_: batch_y, keep_prob: dropout} train_step.run(feed_dict=feed_dict) t_end = time.time() - t_start if epoch > last_epoch: if epoch % print_every == 0: train_accuracy = accuracy.eval(feed_dict={ x: batch_x, y_: batch_y }) validation_accuracy = accuracy.eval(feed_dict={ x: data.validation.X, y_: data.validation.Y }) print('''Epoch {} train accuracy: {}, validation accuracy: {}. ''' '''{} states/sec, {} secs/epoch.'''.format(epoch, train_accuracy, validation_accuracy, batch_size/t_end, time.time() - t_epoch)) if epoch % save_every == 0 or epoch == epochs: output_graph_def = graph_util.convert_variables_to_constants( sess, sess.graph.as_graph_def(), ['input', 'output']) with gfile.FastGFile(os.path.join(model_dir, model_name), 'w') as f: f.write(output_graph_def.SerializeToString()) t_epoch = time.time() last_epoch = epoch print('Trained model saved to {}'.format(os.path.join(model_dir, model_name))) if test_ratio > 0: test_accuracy = accuracy.eval(feed_dict={x: data.test.X, y_: data.test.Y }) print('Evaluation on testing data: {}'.format(test_accuracy))
def augment_images(q, gpu, target, limit, mem_ratio, model_dir): os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu) print 'GPU {}'.format(gpu) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_ratio) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: x = tf.placeholder('string') original = tf.image.decode_jpeg(x, channels=3) transformed = transformations(original) adjusted = random.choice(adjustments)(transformed) encoded = tf.image.encode_jpeg(adjusted) load_graph(os.path.join(model_dir, 'classify_image_graph_def.pb')) next_last_layer = sess.graph.get_tensor_by_name('pool_3:0') while True: h5 = q.get() if h5 == KILL: break data = pd.read_hdf(h5, 'data') images = data.index augmentation_repeat = max(limit/len(images) - 1, 0) rest = augmentation_repeat - int(augmentation_repeat) if augmentation_repeat: print('''{}: {} images, each image will be augmented {} times to achieve goal of {}''' ''' images in total'''.format(h5, len(images), augmentation_repeat, limit)) else: print('{}: {} images present. No augmentation to be done, copying over file.'.format(h5, len(images))) shutil.copy(h5, target) continue t0 = time.time() states = [] for jpg in images: try: raw_data = gfile.FastGFile(jpg).read() iterations = int(augmentation_repeat+1) if random.random() < rest else int(augmentation_repeat) for i in range(iterations): augmented = sess.run(encoded, feed_dict={x: raw_data}) hidden_layer = sess.run(next_last_layer, {'DecodeJpeg/contents:0': augmented}) hidden_layer = np.squeeze(hidden_layer) states.append(hidden_layer) except Exception as e: print 'Something went wrong when augmenting {}: \n\t{}'.format(jpg, e) print 'Time spent augmenting images in {}: {}'.format(h5, time.time() - t0) X = np.vstack(states) columns = [ 'f{}'.format(i) for i in range(X.shape[1]) ] df = pd.DataFrame(data=X, columns=columns) h5name = os.path.join(target, os.path.basename(h5)) with pd.HDFStore(h5name, mode='w', complevel=9, complib='blosc') as store: store.append('data', df)
def evaluate(model, h5_files, top_k, categories=None, out_file='stats.h5'): h5_files = sorted(h5_files) plotly_data = [] with tf.Session() as sess: print('Evaluating {}'.format(model)) print('NOTE: ALL NUMBER TRIPLES ARE ON THE FORM (mean, median, standard deviation)') load_graph(model) transfer_predictor = sess.graph.get_tensor_by_name('output:0') all_accuracies = [] all_top_k_accuracy = [] all_top_level_accuracy = [] stats = [] with pd.HDFStore(out_file, mode='w', complevel=9, complib='blosc') as store: for target, h5 in enumerate(h5_files): data = pd.read_hdf(h5) category_i = os.path.basename(h5).replace('.h5','') predictions = sess.run(transfer_predictor, { 'input:0': data }) top_level_accuracy = np.mean([ categories[category_i]['parent'] == categories[os.path.basename(h5_files[prediction]).replace('.h5','')]['parent'] for prediction in np.argmax(predictions, axis=1) ]) correct = np.argmax(predictions, axis=1) == target accuracy = np.mean(correct) top_k_accuracy = np.mean([ target in np.argsort(prediction)[-top_k:] for prediction in predictions ]) correct_scores = np.max(predictions[correct], axis=1) correct_x = np.linspace(0,1, num=len(correct_scores)) correct_confidence = np.mean(correct_scores) correct_confidence_median = np.median(np.max(predictions[correct], axis=1)) correct_confidence_std = np.std(np.max(predictions[correct], axis=1)) category = categories[category_i]['name'] if categories else category_i sorted_correct = sorted(zip(correct_scores, data.index[correct]), key=lambda x: x[0]) sorted_correct_scores, sorted_correct_paths = zip(*sorted_correct) df = pd.DataFrame(data=list(sorted_correct_scores), index=sorted_correct_paths, columns=['score']) df.index.name = 'filename' store.append('{}/correct'.format(category_i), df) wrong_scores = np.max(predictions[~correct], axis=1) wrong_categories_i = np.argmax(predictions[~correct], axis=1) wrong_x = np.linspace(0,1, num=len(wrong_scores)) wrong_confidence = np.mean(wrong_scores) wrong_confidence_median = np.median(np.max(predictions[~correct], axis=1)) wrong_confidence_std = np.std(np.max(predictions[~correct], axis=1)) wrong_categories = [ os.path.basename(h5_files[i]).replace('.h5','') for i in wrong_categories_i ] sorted_wrong = sorted(zip(wrong_scores, data.index[~correct], wrong_categories), key=lambda x: x[0]) sorted_wrong_scores, sorted_wrong_paths, sorted_wrong_categories = zip(*sorted_wrong) df = pd.DataFrame(data=zip(sorted_wrong_scores, sorted_wrong_categories), index=sorted_wrong_paths, columns=['score', 'category']) df.index.name='filename' store.append('{}/wrong/out'.format(category_i), df) spread = defaultdict(list) for score, path, wrong_category in sorted_wrong: spread[wrong_category].append((path, score)) for wrong_category, X in spread.items(): paths, scores = zip(*X) df = pd.DataFrame(data=zip(scores, [category_i]*len(paths)), index=paths, columns=['score', 'category']) df.index.name='filename' store.append('{}/wrong/in'.format(wrong_category), df, min_itemsize={'index': 50}) # plotly_data.append(go.Scatter( # x=wrong_x, # y=sorted_wrong_scores, # mode='markers', # name=category, # hoverinfo='name+y', # text=[ json.dumps({ 'path': path, 'prediction': prediction }) for path, prediction in # zip(sorted_wrong_paths, sorted_wrong_categories)])) print('Category {}, {} images. \t accuracy: {} top level accuracy: {} top_{} accuracy: {} ' 'correct confidence: {}, {}, {} wrong confidence: {}, {}, {} ' 'diff: {}, {}, {}' ''.format(category_i, len(data), pf(accuracy), pf(top_level_accuracy), top_k, pf(top_k_accuracy), pf(correct_confidence), pf(correct_confidence_median), pf(correct_confidence_std), pf(wrong_confidence), pf(wrong_confidence_median), pf(wrong_confidence_std), pf(correct_confidence - wrong_confidence), pf(correct_confidence_median - wrong_confidence_median), pf(correct_confidence_std - wrong_confidence_std) )) all_accuracies.append(accuracy) all_top_k_accuracy.append(top_k_accuracy) all_top_level_accuracy.append(top_level_accuracy) stats.append([ category, pf(accuracy), pf(top_k_accuracy), pf(correct_confidence), wrong_categories, wrong_scores, data.index[~correct] ]) mean_accuracy = pf(np.mean(all_accuracies)) top_k_accuracy = pf(np.mean(all_top_k_accuracy)) top_level_accuracy = pf(np.mean(all_top_level_accuracy)) print('Average accuracy across categories: {}'.format(mean_accuracy)) print('Average top_{} accuracy across categories: {}'.format(top_k, top_k_accuracy)) print('Average top level accuracy across categories: {}'.format(top_level_accuracy)) tf.reset_default_graph() return plotly_data, mean_accuracy, top_k_accuracy, stats
def learn( train_states, test_states, model, learning_rate=0.0001, save_every=10, batch_size=2048, hidden_size=2048, dropout=0.5, epochs=500, print_every=1, model_dir=".", perceptron=False, mem_ratio=0.95, ): data = read_data(train_states, test_states) model_name = """trust_classifier_epochs_{}_batch_{}_learning_rate_{}""".format(epochs, batch_size, learning_rate) if perceptron: model_name = "{}_perceptron.pb".format(model_name) else: model_name = "{}_dropout_{}_hidden_size_{}.pb".format(model_name, dropout, hidden_size) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_ratio) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: load_graph(model) transfer_predictor = sess.graph.get_tensor_by_name("output:0") # Evaluate the model on the training and test data, for training and testing. data.train._X = np.vstack( [sess.run(transfer_predictor, {"input:0": chunk}) for chunk in chunks(data.train.X, 10)] ) answer = tf.equal(tf.argmax(data.train.X, 1), tf.argmax(data.train.Y, 1)) data.train._Y = tf.one_hot(tf.to_int32(answer), depth=2).eval() data.test._X = sess.run(transfer_predictor, {"input:0": data.test.X}) answer = tf.equal(tf.argmax(data.test.X, 1), tf.argmax(data.test.Y, 1)) data.test._Y = tf.one_hot(tf.to_int32(answer), depth=2).eval() x = tf.placeholder("float", shape=[None, data.train.X_features], name="input_b") y_ = tf.placeholder("float", shape=[None, data.train.Y_features], name="target") if perceptron: W = weight_variable([data.train.X_features, data.train.Y_features], name="weights") b = bias_variable([data.train.Y_features], name="bias") logits = tf.matmul(x, W) + b else: W_in = weight_variable([data.train.X_features, hidden_size], name="weights_in") b_in = bias_variable([hidden_size], name="bias_in") hidden = tf.matmul(x, W_in) + b_in relu = tf.nn.relu(hidden) keep_prob = tf.placeholder_with_default([1.0], shape=None) hidden_dropout = tf.nn.dropout(relu, keep_prob) W_out = weight_variable([hidden_size, data.train.Y_features], name="weights_out") b_out = bias_variable([data.train.Y_features], name="bias_out") logits = tf.matmul(relu, W_out) + b_out # Loss & train cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, y_) train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy) # Evaluation y = tf.nn.softmax(logits, name="output_b") correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) sess.run(tf.initialize_all_variables()) last_epoch = 0 t_epoch = time.time() while data.train.epoch <= epochs: epoch = data.train.epoch batch_x, batch_y = data.train.next_batch(batch_size) t_start = time.time() feed_dict = {x: batch_x, y_: batch_y} if perceptron else {x: batch_x, y_: batch_y, keep_prob: dropout} train_step.run(feed_dict=feed_dict) t_end = time.time() - t_start if epoch > last_epoch: if epoch % print_every == 0: train_accuracy_mean = accuracy.eval(feed_dict={x: batch_x, y_: batch_y}) validation_accuracy_mean = accuracy.eval(feed_dict={x: data.test.X, y_: data.test.Y}) print( """Epoch {} train accuracy: {}, test accuracy: {}. """ """{} states/sec, {} secs/epoch.""".format( epoch, train_accuracy_mean, validation_accuracy_mean, batch_size / t_end, time.time() - t_epoch, ) ) if epoch % save_every == 0 or epoch == epochs: output_graph_def = graph_util.convert_variables_to_constants( sess, sess.graph.as_graph_def(), ["input_b", "output_b"] ) with gfile.FastGFile(os.path.join(model_dir, model_name), "w") as f: f.write(output_graph_def.SerializeToString()) t_epoch = time.time() last_epoch = epoch print("Trained model saved to {}".format(os.path.join(model_dir, model_name)))