def run_program(args): kg = utils.load_kg(args.dataset) kg_mask = KGMask(kg) train_labels = utils.load_labels(args.dataset, 'train') test_labels = utils.load_labels(args.dataset, 'test') path_counts = utils.load_path_count(args.dataset) # Training path freq with open(args.infer_path_data, 'rb') as f: raw_paths = pickle.load(f) # Test path with scores symbolic_model = create_symbolic_model(args, kg, train=False) program_exe = MetaProgramExecutor(symbolic_model, kg_mask, args) pred_labels = {} pbar = tqdm(total=len(test_labels)) for uid in test_labels: program = create_heuristic_program(kg.metapaths, raw_paths[uid], path_counts[uid], args.sample_size) program_exe.execute(program, uid, train_labels[uid]) paths = program_exe.collect_results(program) tmp = [(r[0][-1], np.mean(r[1][-1])) for r in paths] tmp = sorted(tmp, key=lambda x: x[1], reverse=True)[:10] pred_labels[uid] = [t[0] for t in tmp] pbar.update(1) msg = evaluate_with_insufficient_pred(pred_labels, test_labels) logger.info(msg)
def calc_experiment_params(args): G = utils.load_graph(args.graph) nodes_cluster = utils.load_labels(args.all_labels) known_labels = utils.load_labels(args.seed_set) holdout = utils.load_labels(args.holdout) node2features = None if (not args.features is None): node2features = cPickle.load(open(args.features)) special_params = {} if (args.model == "norm_lp" or args.model == "feature_diffusion_norm_lp"): special_params["M"] = label_propagation.get_graph_normalized_laplacian( G) if (args.model == "lp" or args.model == 'feature_diffusion_lp'): special_params["M"] = label_propagation.get_graph_laplacian(G) num_classes = max([nodes_cluster[node_id] for node_id in nodes_cluster]) + 1 n = max(G.nodes()) cluster_distribution = defaultdict(int) cluster_count = defaultdict(int) for node_id in nodes_cluster: cluster_count[nodes_cluster[node_id]] += 1 for cluster_id in cluster_count: cluster_distribution[cluster_id] = cluster_count[cluster_id] / float( len(nodes_cluster)) parameters = [] parameters.append( (G, num_classes, known_labels, cluster_distribution, holdout, nodes_cluster, node2features, n, args, special_params)) return parameters
def __init__(self, file_labels, audio_dir, max_length_sec, online=True, feats_fir=None, calc_flevel=None): """ Class to load a custom Dataset. Can be used as an input for the DataLoader. Args: file_labels (string): Path to the csv file with the labels. audio_dir (string): Path to the WAV utterances. online (boolean, optional): if True, features are computed on the fly. if False, features are loaded from disk. Default: True feats_fir (string, optional): The directory containing the files of the features (use only if 'online'=False). Default: None. calc_flevel (callable, optional): Optional calculation to be applied on a sample. E.g. compute fbanks or MFCCs of the audio signals. Use when online=True. max_length_sec (int): Maximum length in seconds to keep from the utterances. :return dictionary { """ name_set = os.path.basename(feats_fir) self.labels = utils.load_labels(file_labels, name_set) self.list_wavs = utils.get_files_abspaths(path=audio_dir + name_set, file_type='.wav') self.name_set = name_set self.calc_flevel = calc_flevel self.online = online self.max_length_sec = max_length_sec if not online: self.list_feature_files = utils.get_files_abspaths( path=feats_fir, file_type='.npy')
def main(args): feat, case_ids = load_features(args.src, zscore=True) lab = load_labels(args.labsrc) ((nepc_f, nepc_lab), (m0_f, m0_lab), (m0p_f, m0p_lab), (m1_f, m1_lab)) = split_sets(feat, lab) yvect = ['M0']*m0_f.shape[0] + ['NPEC']*nepc_f.shape[0] ttests = [] fig = plt.figure() for f in feat.columns: m0_ = m0_f.loc[:, f] nepc_ = nepc_f.loc[:, f] tt = ttest_ind(m0_, nepc_) if tt.pvalue < 1e-10: feature_data = pd.DataFrame({'group': yvect, 'feature': np.concatenate([m0_, nepc_], axis=0)}) print(f, tt) out = os.path.join(args.dst, 'f_{}.png'.format(f)) plt.clf() # sns.boxplot(x='group', y='feature', data=feature_data) sns.distplot(m0_, label='M0') sns.distplot(nepc_, label='NEPC') plt.legend() plt.title('Feature {}'.format(f)) plt.savefig(out, bbox_inches='tight')
def task_bitcoinalpha(args): A, X = utils.load_XA(args.dataset, datadir="../Generate_XA_Data/XAL") L = utils.load_labels(args.dataset, datadir="../Generate_XA_Data/XAL") num_classes = max(L) + 1 print("NUMBER OF CLASS IS: " + str(num_classes)) input_dim = X.shape[1] print("Input dimension is: ", input_dim) model = models.GcnEncoderNode( input_dim, args.hidden_dim, args.output_dim, num_classes, args.num_gc_layers, bn=args.bn, args=args, ) train_node_classifier.train(model, A, X, L, args, normalize_adjacency=False)
def bitcoin(args): A, X = utils.load_XA(args.dataset, datadir = "../Generate_XA_Data/XAL") L = utils.load_labels(args.dataset, datadir = "../Generate_XA_Data/XAL") num_classes = max(L) + 1 input_dim = X.shape[1] num_nodes = X.shape[0] ckpt = utils.load_ckpt(args) print("input dim: ", input_dim, "; num classes: ", num_classes) model = models.GcnEncoderNode( input_dim=input_dim, hidden_dim=args.hidden_dim, embedding_dim=args.output_dim, label_dim=num_classes, num_layers=args.num_gc_layers, bn=args.bn, args=args, ) model.load_state_dict(ckpt["model_state"]) pred = ckpt["save_data"]["pred"] explainer = pe.Node_Explainer(model, A, X, pred, args.num_gc_layers) node_to_explain = [i for [i] in np.argwhere(np.sum(A,axis = 0) > 2)] explanations = explainer.explain_range(node_to_explain, num_samples = args.num_perturb_samples, top_node = args.top_node) print(explanations) savename = utils.gen_filesave(args) np.save(savename,explanations)
def main(args): feat, case_ids = load_features(args.src) lab = load_labels(args.labsrc) feat = drop_high_cor(feat, cor_thresh=0.8) print('Features after high cor drop') print(feat.head()) run_tsne(feat, lab)
def render_gen(args): fps_counter = utils.avg_fps_counter(30) engines, titles = utils.make_engines(args.model, DetectionEngine) assert utils.same_input_image_sizes(engines) engines = itertools.cycle(engines) engine = next(engines) labels = utils.load_labels(args.labels) if args.labels else None filtered_labels = set( l.strip() for l in args.filter.split(',')) if args.filter else None get_color = make_get_color(args.color, labels) draw_overlay = True yield utils.input_image_size(engine) output = None while True: tensor, layout, command = (yield output) inference_rate = next(fps_counter) if draw_overlay: start = time.monotonic() # Changed to detect_with_input_tensor. Res is same # See https://coral.googlesource.com/edgetpuvision/+/refs/heads/4.14.98%5E%21/#F0 objs = engine.detect_with_input_tensor(tensor, threshold=args.threshold, top_k=args.top_k) inference_time = time.monotonic() - start objs = [convert(obj, labels) for obj in objs] if labels and filtered_labels: objs = [obj for obj in objs if obj.label in filtered_labels] objs = [ obj for obj in objs if args.min_area <= obj.bbox.area() <= args.max_area ] if args.print: print_results(inference_rate, objs) autoturret_render_artifacts = controller.run(objs) title = titles[engine] output = overlay(title, objs, get_color, inference_time, inference_rate, layout, autoturret_render_artifacts) else: output = None if command == 'o': draw_overlay = not draw_overlay elif command == 'n': engine = next(engines)
def main(): multiplier = 1.0 input_size = 224 modelname = "mobilenet_{}_{}_{}".format(VERSION, multiplier, input_size) labels = load_labels() download_checkpoint(multiplier, input_size) checkpoint = os.path.join(SAVEDIR, modelname, "mobilenet_v2_1.0_224.tflite") predict_using_tflite(checkpoint, labels)
def load_globals(): # this initial process is just brought over from the sample_prediction.py with open("./../src/ModelConfig.yaml", "r") as f: model_config = yaml.safe_load(f) global MODEL global FLOWER_SPECIES_NAMES MODEL = load_model(config=model_config) FLOWER_SPECIES_NAMES = load_labels(config=model_config)
def main(): # args parser = build_parser() args = parser.parse_args() check_args(args) check_args_to_run(args) # data print('\n===== Starting preparing data =====\n') labels = load_labels() images = load_images(mode='train') train_images, val_images, train_labels, val_labels = train_test_split(images, labels, test_size=args.test_size) # TODO: discuss train_dataset = BengaliTrainDataset(images=train_images, labels=train_labels, size=args.image_size) val_dataset = BengaliTrainDataset(images=val_images, labels=val_labels, size=args.image_size) train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=os.cpu_count()) val_dataloader = DataLoader(val_dataset, batch_size=512, shuffle=False, num_workers=os.cpu_count()) print('\n===== Completed preparing data =====') # model criterions = build_loss(args) base_cnn_model = BaseCNNModel(model_name=args.model_name, hidden_dim=args.hidden_dim, dropout=args.dropout, activation=args.activation) optimizer = build_optimizer(args, base_cnn_model) scheduler = build_scheduler(args, optimizer) model = BengaliLightningModel(base_model=base_cnn_model, train_dataloader=train_dataloader, val_dataloader=val_dataloader, criterions=criterions, optimizer=optimizer, scheduler=scheduler) # callbacks filepath = f'/home/jarvis1121/AI/Kaggle/Bengali/kaggle-Bengali/models/trial_{int(args.exp)}' checkpoint_callback = pl.callbacks.ModelCheckpoint(filepath=filepath, monitor='val_loss', verbose=1, mode='min') print('\n===== Starting training =====') # train trainer = pl.Trainer(max_epochs=args.epochs, gpus=args.gpus, early_stop_callback=False, checkpoint_callback=checkpoint_callback) trainer.fit(model) print('\n===== End training =====')
def render_gen(self, args1): fps_counter = utils.avg_fps_counter(30) args = self.parser.parse_args() engines, titles = utils.make_engines(args.model, DetectionEngine) assert utils.same_input_image_sizes(engines) engines = itertools.cycle(engines) engine = next(engines) labels = utils.load_labels(args.labels) if args.labels else None filtered_labels = set( l.strip() for l in args.filter.split(',')) if args.filter else None get_color = make_get_color(args.color, labels) draw_overlay = True yield utils.input_image_size(engine) output = None while True: tensor, layout, command = (yield output) inference_rate = next(fps_counter) if draw_overlay: start = time.monotonic() objs = engine.detect_with_input_tensor( tensor, threshold=args.threshold, top_k=args.top_k) inference_time = time.monotonic() - start objs = [convert(obj, labels) for obj in objs] if labels and filtered_labels: objs = [ obj for obj in objs if obj.label in filtered_labels ] objs = [ obj for obj in objs if args.min_area <= obj.bbox.area() <= args.max_area ] if args.print: print_results(inference_rate, objs) title = titles[engine] output = overlay(title, objs, get_color, inference_time, inference_rate, layout) else: output = None if command == 'o': draw_overlay = not draw_overlay elif command == 'n': engine = next(engines)
def task_syn(args): A, X = utils.load_XA(args.dataset, datadir = "../Generate_XA_Data/XAL") L = utils.load_labels(args.dataset, datadir = "../Generate_XA_Data/XAL") num_classes = max(L) + 1 input_dim = X.shape[1] ckpt = utils.load_ckpt(args) print("input dim: ", input_dim, "; num classes: ", num_classes) model = models.GcnEncoderNode( input_dim=input_dim, hidden_dim=args.hidden_dim, embedding_dim=args.output_dim, label_dim=num_classes, num_layers=args.num_gc_layers, bn=args.bn, args=args, ) model.load_state_dict(ckpt["model_state"]) pred = ckpt["save_data"]["pred"] explainer = pe.Node_Explainer(model, A, X, pred, args.num_gc_layers) explanations = {} if args.explain_node == None: if args.dataset == 'syn1': explanations = explainer.explain_range(list(range(300,700)), num_samples = args.num_perturb_samples, top_node = args.top_node) elif args.dataset == 'syn2': explanations = explainer.explain_range(list(range(300,700)) + list(range(1000,1400)), num_samples = args.num_perturb_samples, top_node = args.top_node, pred_threshold = 0.1) elif args.dataset == 'syn3': explanations = explainer.explain_range(list(range(300,1020)), num_samples = args.num_perturb_samples, top_node = args.top_node,pred_threshold = 0.05) elif args.dataset == 'syn4': explanations = explainer.explain_range(list(range(511,871)), num_samples = args.num_perturb_samples, top_node = args.top_node, pred_threshold = 0.1) elif args.dataset == 'syn5': explanations = explainer.explain_range(list(range(511,1231)), num_samples = args.num_perturb_samples, top_node = args.top_node, pred_threshold = 0.05) elif args.dataset == 'syn6': explanations = explainer.explain_range(list(range(300,700)), num_samples = args.num_perturb_samples, top_node = args.top_node) else: explanation = explainer.explain(args.explain_node, num_samples = args.num_perturb_samples, top_node = args.top_node) print(explanation) explanations[args.explain_node] = explanation print(explanations) savename = utils.gen_filesave(args) np.save(savename,explanations)
def estimate_path_count(args): kg = utils.load_kg(args.dataset) num_mp = len(kg.metapaths) train_labels = utils.load_labels(args.dataset, 'train') counts = {} pbar = tqdm(total=len(train_labels)) for uid in train_labels: counts[uid] = np.zeros(num_mp) for pid in train_labels[uid]: for mpid in range(num_mp): cnt = kg.count_paths_with_target(mpid, uid, pid, 50) counts[uid][mpid] += cnt counts[uid] = counts[uid] / len(train_labels[uid]) pbar.update(1) utils.save_path_count(args.dataset, counts)
def main(args): feat_importance = pd.read_csv(args.src, sep='\t', index_col=0, header=None) features , _ = load_features(args.featsrc, zscore=True) labels = load_labels(args.labelsrc) feat_importance.sort_values(1, ascending=False, inplace=True) sns.distplot(feat_importance) plt.savefig('tile_feature_importance_dist.png', bbox_inches='tight') sns.regplot(np.squeeze(feat_importance.index.values), np.squeeze(feat_importance.values)) feat_importance = feat_importance.iloc[:args.n, :] print('highest feature importance:') for f in feat_importance.index.values: print(f, feat_importance.loc[f].values)
def render_gen(args): acc = accumulator(size=args.window, top_k=args.top_k) acc.send(None) # Initialize. fps_counter = utils.avg_fps_counter(30) engines, titles = utils.make_engines(args.model, ClassificationEngine) assert utils.same_input_image_sizes(engines) engines = itertools.cycle(engines) engine = next(engines) labels = utils.load_labels(args.labels) draw_overlay = True yield utils.input_image_size(engine) output = None while True: tensor, layout, command = (yield output) inference_rate = next(fps_counter) if draw_overlay: start = time.monotonic() results = engine.classify_with_input_tensor( tensor, threshold=args.threshold, top_k=args.top_k) inference_time = time.monotonic() - start results = [(labels[i], score) for i, score in results] results = acc.send(results) if args.print: print_results(inference_rate, results) title = titles[engine] output = overlay(title, results, inference_time, inference_rate, layout) else: output = None if command == 'o': draw_overlay = not draw_overlay elif command == 'n': engine = next(engines)
def main(): args = cmdparser() config = get_config(args.config) if args.preprocess: utils.preprocess(config['raw_path'], config['train_path'], config['dev_path'], config['label_path'], config['stop_word_path'], config['vocabulary_path']) labels = utils.load_labels(config['label_path']) vocabulary = utils.load_vocabulary(config['vocabulary_path']) stop_words = utils.load_stop_words(config['stop_word_path']) if args.dev: train(config, vocabulary, labels, stop_words, save_path='', mode='dev') elif args.train: if int(config['ensemble_size']) == 1: train(config, vocabulary, labels, stop_words, save_path=config['model_path'], mode='train') else: for i in range(int(config['ensemble_size'])): train(config, vocabulary, labels, stop_words, save_path=config[f'model_path_{i+1}'], mode='train') elif args.test: if int(config['ensemble_size']) == 1: test(config, vocabulary, labels, stop_words, save_path=[config['model_path']]) else: test_paths = [ config[f'model_path_{i+1}'] for i in range(int(config['ensemble_size'])) ] test(config, vocabulary, labels, stop_words, save_path=test_paths)
def api_call(): f = request.files['file'] filename = secure_filename(f.filename) f.save(os.path.join(app.config['UPLOAD_FOLDER'], str(filename))) image = Image.open(os.path.join(app.config['UPLOAD_FOLDER'], str(filename))) #image_resized = image.resize([299,299], Image.ANTIALIAS) file_name = (os.path.join(app.config['UPLOAD_FOLDER'], str(filename))) input_height = 299 input_width = 299 input_mean = 128 input_std = 128 t = read_tensor_from_image_file(file_name, input_height=input_height, input_width=input_width, input_mean=input_mean, input_std=input_std) results = sess.run(output_operation.outputs[0], {input_operation.outputs[0]: t}) results = np.squeeze(results) top_k = results.argsort()[-5:][::-1] labels = load_labels(label_file) for i in top_k: print(labels[i], results[i]) # Save images to their original data directory if probability of prediction > threshold --- for retraining #if results[top_k[0]]>0.70: # shutil.copy(os.path.join(app.config['UPLOAD_FOLDER'],str(filename)), "/home/ubuntu/crop_classification_updated/data_dir/train_dir/{0}/{1}".format(data_dict[labels[top_k[0]]],filename)) # print ('Saving image to: /home/ubuntu/crop_classification_updated/data_dir/train_dir/{0}/{1}'.format(data_dict[labels[top_k[0]]],filename)) result_list = [{ 'Prediction1': '%s' % (labels[top_k[0]]), 'Confidence1': '%s' % (results[top_k[0]]), 'Prediction2': '%s' % (labels[top_k[1]]), 'Confidence2': '%s' % (results[top_k[1]]) }] return jsonify(result_list)
def infer_paths(args): kg = utils.load_kg(args.dataset) model = create_symbolic_model(args, kg, train=False) train_labels = utils.load_labels(args.dataset, 'train') train_uids = list(train_labels.keys()) kg_mask = KGMask(kg) predicts = {} pbar = tqdm(total=len(train_uids)) for uid in train_uids: predicts[uid] = {} for mpid in range(len(kg.metapaths)): metapath = kg.metapaths[mpid] paths = model.infer_with_path(metapath, uid, kg_mask, excluded_pids=train_labels[uid], topk_paths=20) predicts[uid][mpid] = paths pbar.update(1) with open(args.infer_path_data, 'wb') as f: pickle.dump(predicts, f)
def task_syn(args): A, X = utils.load_XA(args.dataset, datadir="../Generate_XA_Data/XAL") L = utils.load_labels(args.dataset, datadir="../Generate_XA_Data/XAL") num_classes = max(L) + 1 input_dim = X.shape[1] model = models.GcnEncoderNode( args.input_dim, args.hidden_dim, args.output_dim, num_classes, args.num_gc_layers, bn=args.bn, args=args, ) train_node_classifier.train(model, A, X, L, args, normalize_adjacency=False)
parser.add_argument("--labels", help="list of sample labels", required=True) parser.add_argument("--gene-sets", help="list of curated gene sets") parser.add_argument("--target", help="target class") parser.add_argument("--set", help="gene set to run", type=str, default="HALLMARK_ALL") parser.add_argument("--output-dir", help="Output directory", default=".") args = parser.parse_args() # load input data print("loading input dataset...") df = utils.load_dataframe(args.dataset) df_samples = df.index df_genes = df.columns labels, classes = utils.load_labels(args.labels) print("loaded input dataset (%s genes, %s samples)" % (df.shape[1], df.shape[0])) # impute missing values df.fillna(value=df.min().min(), inplace=True) # determine target class try: if args.target == None: args.target = -1 else: args.target = classes.index(args.target) print("target class is: %s" % (classes[args.target])) except ValueError: print("error: class %s not found in dataset" % (args.target))
def get_input_fetures(features_file, label_file): labels = utils.load_labels(label_file, data_root) X = np.load(features_file) y = np.array(labels, dtype=np.uint8) return X, y
def evaluateBossu(args): ''' Evalaute detections from the Bossu rain detection algorithm. Saves: Plots of the different metrics CSV containing metrics for each input file and accumulated text file containing results and additional information Input: args: - labelFile: Path to the label file - inputFolder: Path to the folder containing the different detection csv files - outputFolder: Path to the folder where the output will be saved - filePlots: Whether to save plots for each input file ''' label_file = args["labelFile"] main_path = args["inputFolder"] output_path = args["outputFolder"] plots_per_file = args["filePlots"] if "laser" in label_file: label_type = "Laser" else: label_type = "Mechanical" # Setup output paths main_output_path = os.path.join( output_path, "{}-{}-{}".format(os.path.basename(main_path), label_type, "Bossu")) if not os.path.exists(main_output_path): os.makedirs(main_output_path) output_path = os.path.join(main_output_path, "results_collected.csv") # Set threshod values thresholds = [x for x in np.linspace(0, 1, 101)] label_dict = utils.load_labels(label_file) # Containers for the type errors and counters for different label types em_per_minute_counter = np.zeros((101, 4)) kalman_sampled_counter = np.zeros((101, 4)) em_per_frame_counter = np.zeros((1, 4)) kalman_per_frame_counter = np.zeros((1, 4)) label_total_per_minute = 0 label_pos_per_minute = 0 label_total_per_frame = 0 label_pos_per_frame = 0 with open(output_path, 'w', newline="") as csvWriteFile: writer = csv.writer(csvWriteFile, delimiter=";") # Write the headers in the new csv file firstrow = [] firstrow.append("file") firstrow.append("Total Frames") firstrow.append("EM Rain Frames") firstrow.append("Kalman Rain Frames") firstrow.append("EM %") firstrow.append("Kalman %") firstrow.append("TP") firstrow.append("TN") firstrow.append("FP") firstrow.append("FN") firstrow.append("Accuracy") firstrow.append("F1-Score (TP)") firstrow.append("F1-Score (TN)") firstrow.append("MCC") firstrow.append("Kalman TP") firstrow.append("Kalman TN") firstrow.append("Kalman FP") firstrow.append("Kalman FN") firstrow.append("Kalman Accuracy") firstrow.append("Kalman F1-Score (TP)") firstrow.append("Kalman F1-Score (TN)") firstrow.append("Kalman MCC") writer.writerow(firstrow) for dirs in os.listdir(main_path): dir_path = os.path.join(main_path, dirs) print("\n{}".format(dirs)) dir_content = os.listdir(dir_path) settings = [s for s in dir_content if "setting" in s.lower()] if len(settings) > 1: raise ValueError( "more than one settings file present in {}".format( dir_path)) for subdir in dir_content: if os.path.isdir(os.path.join(dir_path, subdir)): continue if os.path.splitext(subdir)[-1] == ".txt": continue ###### LOAD LABELS ###### filename = subdir.replace(".mkv", ".mp4")[:-12] filename = filename.replace("-brick", "") print(filename) dict_ind = label_dict[os.path.basename(filename)] offset = dict_ind[ "frameOffset"] # How many frames left of the starting minute e.g. 16:00:45, has 15 seconds left # This corresponds to 450 frames (30 FPS), and we assume we are halfway through the second, so 435 frame offset # These initial 435 frames are assigned to the label of 16:00:00, while the 436th label is assigned to 16:00:01 FPM = dict_ind["FPM"] # Frames per minute labels = dict_ind["labels"] # List of labels per minute frameCount = dict_ind["frameCount"] ###### LOAD BOSSU OUTPUT ###### # Load the supplied csv file csv_file = os.path.join(dir_path, subdir) rain_dataframe = pd.read_csv(csv_file, sep=";") ####### ANALYSE DATA ####### start_frame = rain_dataframe[" Frame#"][0] - 1 total_frames = len(rain_dataframe[" Frame#"]) maxFrameStart = np.max(rain_dataframe[" Frame#"]) print( "Total frames in video: {}\nLargest frame analyzed: {}\nDifference: {}" .format(frameCount, maxFrameStart, frameCount - maxFrameStart)) if frameCount != (total_frames + start_frame): print( "\tSize mismatch between labels {}, and data, {}. Skipping this one\n" .format(frameCount, total_frames)) continue em_detected = rain_dataframe["EM Rain Detected"] kalman_detected = rain_dataframe["Kalman Rain Detected"] ## Raw EM Detections em_per_frame, em_per_minute, em_per_frame_labels, em_per_minute_labels = analyze_Bossu_predictions( em_detected, labels, offset, FPM, start_frame, thresholds=thresholds) em_per_frame_counter += np.asarray(em_per_frame["Type Errors"]) em_per_minute_counter += np.asarray( em_per_minute["Type Errors"]) if plots_per_file: utils.make_metrics_plots(em_per_minute, thresholds, main_output_path, filename.replace(".mp4", ".pdf")) ## Kalman Detections kalman_per_frame, kalman_per_minute, kalman_per_frame_labels, kalman_per_minute_labels = analyze_Bossu_predictions( kalman_detected, labels, offset, FPM, start_frame, thresholds=thresholds) kalman_per_frame_counter += np.asarray( kalman_per_frame["Type Errors"]) kalman_sampled_counter += np.asarray( kalman_per_minute["Type Errors"]) if plots_per_file: utils.make_metrics_plots( kalman_per_minute, thresholds, main_output_path, filename.replace(".mp4", "_kalman.pdf")) row = [] row.append(filename) row.append(total_frames) row.append(np.sum(em_detected)) row.append(np.sum(kalman_detected)) row.append(np.sum(em_detected) / total_frames * 100) row.append(np.sum(kalman_detected) / total_frames * 100) row.append(em_per_frame["Type Errors"][0][0]) row.append(em_per_frame["Type Errors"][0][1]) row.append(em_per_frame["Type Errors"][0][2]) row.append(em_per_frame["Type Errors"][0][3]) row.append(em_per_frame["Accuracy"][0]) row.append(em_per_frame["F1-score"][0][0]) row.append(em_per_frame["F1-score"][0][1]) row.append(em_per_frame["MCC"][0]) row.append(kalman_per_frame["Type Errors"][0][0]) row.append(kalman_per_frame["Type Errors"][0][1]) row.append(kalman_per_frame["Type Errors"][0][2]) row.append(kalman_per_frame["Type Errors"][0][3]) row.append(kalman_per_frame["Accuracy"][0]) row.append(kalman_per_frame["F1-score"][0][0]) row.append(kalman_per_frame["F1-score"][0][1]) row.append(kalman_per_frame["MCC"][0]) writer.writerow(row) assert (em_per_minute_labels == kalman_per_minute_labels).all( ), "The minute labels for EM and Kalman are not the same!" assert (em_per_frame_labels == kalman_per_frame_labels).all( ), "The frame labels for EM and Kalman are not the same!" label_total_per_minute += len(em_per_minute_labels) label_pos_per_minute += sum(em_per_minute_labels) label_total_per_frame += len(em_per_frame_labels) label_pos_per_frame += sum(em_per_frame_labels) # Calculate all metrics based on the accumlated type errors total_em_per_minute = calculate_classification_metrics_full_dataset( em_per_minute_counter, thresholds) total_em_per_frame = calculate_classification_metrics_full_dataset( em_per_frame_counter) total_kalman_per_minute = calculate_classification_metrics_full_dataset( kalman_sampled_counter, thresholds) total_kalman_per_frame = calculate_classification_metrics_full_dataset( kalman_per_frame_counter) # Make plots of the different metrics utils.make_metrics_plots(total_em_per_minute, thresholds, main_output_path, "overall_em.pdf") utils.make_metrics_plots(total_kalman_per_minute, thresholds, main_output_path, "overall_kalman.pdf") # Save results with open(os.path.join(main_output_path, 'evaluation_information.txt'), 'w') as f: f.write("Metrics (EM) per frame: {}\n".format(total_em_per_frame)) f.write("Metrics (Kalman) per frame: {}\n\n".format( total_kalman_per_frame)) f.write("{} % Rain labels (Per minute)\n".format( label_pos_per_minute / label_total_per_minute * 100)) f.write("{} rainy out of {} (Per minute)\n\n".format( label_pos_per_minute, label_total_per_minute)) f.write("{} % Rain labels (Per frame)\n".format( label_pos_per_frame / label_total_per_frame * 100)) f.write("{} rainy out of {} (Per frame)\n\n".format( label_pos_per_frame, label_total_per_frame)) f.write("Label file used: {}\n".format(label_file)) f.write("Method used: Bossu\n") f.write("Label type used: {}".format(label_type)) row = [] row.append("Total") row.append("") row.append("") row.append("") row.append("") row.append("") row.append(total_em_per_frame["Type Errors"][0][0]) row.append(total_em_per_frame["Type Errors"][0][1]) row.append(total_em_per_frame["Type Errors"][0][2]) row.append(total_em_per_frame["Type Errors"][0][3]) row.append(total_em_per_frame["Accuracy"][0]) row.append(total_em_per_frame["F1-score"][0][0]) row.append(total_em_per_frame["F1-score"][0][1]) row.append(total_em_per_frame["MCC"][0]) row.append(total_kalman_per_frame["Type Errors"][0][0]) row.append(total_kalman_per_frame["Type Errors"][0][1]) row.append(total_kalman_per_frame["Type Errors"][0][2]) row.append(total_kalman_per_frame["Type Errors"][0][3]) row.append(total_kalman_per_frame["Accuracy"][0]) row.append(total_kalman_per_frame["F1-score"][0][0]) row.append(total_kalman_per_frame["F1-score"][0][1]) row.append(total_kalman_per_frame["MCC"][0]) writer.writerow(row)
import pickle import sys import numpy as np import scipy.optimize sys.path.append("..") import utils import sac from utils import ASSERT_SIZE, ASSERT_NO_NAN MAX_PATCHES = 60000 images = utils.load_images("../data/train-images-idx3-ubyte") labels_ = utils.load_labels("../data/train-labels-idx1-ubyte") patches = images[:, 0:MAX_PATCHES] labels = labels_[0:MAX_PATCHES] # Note, this is the output from running mnist_train.py in the top level. print "Reading edge detector." fname = "data/numeral_sac.pickle" f = open(fname, "r") edge_detector_solution = pickle.load(f) options = sac.SparseAutoEncoderOptions(28 * 28, 196, output_dir = "output") edge_detector = sac.SparseAutoEncoder(options, patches) print "Computing edges." edges, identity = edge_detector.feed_forward(images[:, 0:MAX_PATCHES], edge_detector_solution.W1, edge_detector_solution.W2, edge_detector_solution.b1,
pp.fileOrder(valid, test) pp.resampleDatabase(sampling_rate) # Device Configuration device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Process Dataset trainList, trainLabel = list(), list() validList, validLabel = list(), list() testList, testLabel = list(), list() utils.generate_labels(pp.TRAIN_PATH) utils.generate_labels(pp.VALID_PATH) utils.generate_labels(pp.TEST_PATH) trainList, trainLabel = utils.load_labels(pp.TRAIN_PATH) validList, validLabel = utils.load_labels(pp.VALID_PATH) testList, testLabel = utils.load_labels(pp.TEST_PATH) class Signalset(Dataset): def __init__(self, dataList, dataLabel, path, inputDim): self.dataList = dataList self.dataLabel = dataLabel self.path = path self.inputDim = inputDim def __len__(self): return len(self.dataList) def __getitem__(self, idx):
import sys if len(sys.argv) != 2: print "Usage: ./display_saved_network.py somefile.pickle" sys.exit(1) fname = sys.argv[1] f = open(fname, "r") solution = pickle.load(f) utils.save_as_figure((solution.W1 + solution.b1).T, "loadedW1.png") utils.save_as_figure(solution.W2, "loadedW2.png") images = utils.load_images("data/train-images-idx3-ubyte") labels = utils.load_labels("data/train-labels-idx1-ubyte") utils.save_as_figure(images[:, 0:100], "output/input.png") patches = images[:, 0:10000] visible_size = 28*28 hidden_size = 196 options = sac.SparseAutoEncoderOptions(visible_size, hidden_size, output_dir="output", max_iterations = 400) network = sac.SparseAutoEncoder(options, patches) theta = network.flatten(solution.W1, solution.W2, solution.b1, solution.b2)
seed = int("".join(str(string.ascii_lowercase.index(x)) for x in "sugarbyte")) random.seed(seed) # img_list = set() # A_PS_labels = load_labels("data/raw/amazon/labels.csv", CC.LABELS.CATEGORICAL.CLOUD, to_dataframe=True) # img_list |= set(map(lambda x : os.path.join("data/raw/amazon/tif", x), A_PS_labels.index)) # # T_PS_labels = load_labels("data/raw/tropics/planetlabs/labels.csv", CC.LABELS.CATEGORICAL.CLOUD, to_dataframe=True) # img_list |= set(map(lambda x : os.path.join("data/raw/tropics/planetlabs/tif", x), T_PS_labels.index)) # # T_S2_labels = load_labels("data/raw/tropics/sentinel2/labels.csv", CC.LABELS.CATEGORICAL.CLOUD, to_dataframe=True) # img_list |= set(map(lambda x : os.path.join("data/raw/tropics/sentinel2/tif", x), T_S2_labels.index)) img_list = set() A_PS_labels = load_labels("data/raw/amazon/labels.csv", CC.LABELS.ALL, to_dataframe=True) img_list |= set( map(lambda x: os.path.join("data/raw/amazon/tif", x), A_PS_labels.index)) T_PS_labels = load_labels("data/raw/tropics/planetlabs/labels.csv", CC.LABELS.ALL, to_dataframe=True) img_list |= set( map(lambda x: os.path.join("data/raw/tropics/planetlabs/tif", x), T_PS_labels.index)) T_S2_labels = load_labels("data/raw/tropics/sentinel2/labels.csv", CC.LABELS.ALL, to_dataframe=True) img_list |= set(
parser.add_argument('--gene-sets', help='list of curated gene sets') parser.add_argument('--set', help='specific gene set to run') parser.add_argument('--tsne', help='plot t-SNE of samples', action='store_true') parser.add_argument('--heatmap', help='plot heatmaps of sample perturbations', action='store_true') parser.add_argument('--target', help='target class') parser.add_argument('--output-dir', help='output directory', default='.') args = parser.parse_args() # load input data print('loading train/perturb data...') df_train = utils.load_dataframe(args.train_data) df_perturb = utils.load_dataframe(args.perturb_data) y_train, classes = utils.load_labels(args.train_labels) y_perturb, _ = utils.load_labels(args.perturb_labels, classes) print('loaded train data (%s genes, %s samples)' % (df_train.shape[1], df_train.shape[0])) print('loaded perturb data (%s genes, %s samples)' % (df_perturb.shape[1], df_perturb.shape[0])) # impute missing values min_value = df_train.min().min() df_train.fillna(value=min_value, inplace=True) df_perturb.fillna(value=min_value, inplace=True) # sanitize class names classes = [utils.sanitize(c) for c in classes] # determine target class
users_limit = 150000 print_user_id = False users_to_print = [] # fill user labels to print next to 2d point fig_size = (20, 20) text_size = 10 labels_data_path = config["labels"] umap_embedding_folder = config["umap_embedding_folder"] distance = config["umap_distance"] n_neighbors = config["umap_n_neighbors"] min_dist = config["umap_min_dist"] if print_user_id: labels = load_labels(labels_data_path) for neigh in n_neighbors: for dist in min_dist: umap_embedding_path = "{}/embedding_umap_{}_{}_{}.csv".format( umap_embedding_folder, neigh, dist, distance) X_embedded_umap, Y_embedded_umap = import_embedding( umap_embedding_path, limit=users_limit) fig = plt.figure(figsize=fig_size, facecolor='w') plt.subplot(1, 1, 1) plt.scatter(X_embedded_umap, Y_embedded_umap, cmap='hsv') plt.title( '2D embedding using UMAP Embedding n_neighbors={} min_dist={} distance={}\n' .format(neigh, dist, distance)) plt.xlabel('Feature 1') plt.ylabel('Feature 2')
type=str, required=False, default="cameras", help='Device use to collect data. It can be either "webcam" or "cameras"') args = parser.parse_args() # pass args exp = args.exp subject_id = args.subject label_id = args.label device = args.device # load camera info --> options.json op, cam = utils.load_options(device) # load experiment labels --> labels.json labels = utils.load_labels(exp) log = Logger(name="Capture") # get folder to store the collected data if exp == "emotions": folder = op.folder_emotions elif exp == "signals": folder = op.folder_signals elif exp == "gestures": folder = op.folder_gestures elif exp == "adl": folder = op.folder_adl elif exp == "falls": folder = op.folder_falls else:
nargs='+') parser.add_argument('--tsne-alphas', help='list of per-class alphas for t-SNE plot', type=float, nargs='+') args = parser.parse_args() # load input expression matrix emx = utils.load_dataframe(args.infile) print('Loaded %s %s' % (args.infile, str(emx.shape))) # load label file or generate empty labels if args.labels != None: labels = utils.load_labels(args.labels) else: labels = np.zeros(len(emx.index), dtype=str) print('Loaded %s %s' % ('labels', str(labels.shape))) # plot sample distributions if args.density != None: print('Plotting sample distributions...') plot_density(emx, args.density, xmax=args.density_xmax) # plot t-SNE of samples if args.tsne != None: print('Plotting 2-D t-SNE...')
target_labels[t_idx])) output_filepath = os.path.join( output_directory, 'hist_{}.png'.format(target_labels[t_idx])) print(output_filepath) plt.tight_layout() plt.savefig(output_filepath) plt.clf() if __name__ == '__main__': # labels stored in external csv files feature_labels_filepath = '../../data/datasets/features.csv' target_labels_filepath = '../../data/datasets/targets.csv' # load in labels feature_labels = utils.load_labels(feature_labels_filepath) target_labels = utils.load_labels(target_labels_filepath) ## the dataset filepaths to visualize along with labels # input_filepaths = [ # # '../../data/datasets/risk.h5', # '../../data/datasets/bootstrap/iter_4.h5', # # '../../data/datasets/march/risk_20_sec_3_timesteps.h5', # ] # dataset_labels = [ # # 'full', # 'boot', # # 'risk_5' # ] num_iters = 50