Ejemplo n.º 1
0
def run_program(args):
    kg = utils.load_kg(args.dataset)
    kg_mask = KGMask(kg)

    train_labels = utils.load_labels(args.dataset, 'train')
    test_labels = utils.load_labels(args.dataset, 'test')
    path_counts = utils.load_path_count(args.dataset)  # Training path freq
    with open(args.infer_path_data, 'rb') as f:
        raw_paths = pickle.load(f)  # Test path with scores

    symbolic_model = create_symbolic_model(args, kg, train=False)
    program_exe = MetaProgramExecutor(symbolic_model, kg_mask, args)

    pred_labels = {}
    pbar = tqdm(total=len(test_labels))
    for uid in test_labels:
        program = create_heuristic_program(kg.metapaths, raw_paths[uid], path_counts[uid], args.sample_size)
        program_exe.execute(program, uid, train_labels[uid])
        paths = program_exe.collect_results(program)
        tmp = [(r[0][-1], np.mean(r[1][-1])) for r in paths]
        tmp = sorted(tmp, key=lambda x: x[1], reverse=True)[:10]
        pred_labels[uid] = [t[0] for t in tmp]
        pbar.update(1)

    msg = evaluate_with_insufficient_pred(pred_labels, test_labels)
    logger.info(msg)
def calc_experiment_params(args):
    G = utils.load_graph(args.graph)
    nodes_cluster = utils.load_labels(args.all_labels)
    known_labels = utils.load_labels(args.seed_set)
    holdout = utils.load_labels(args.holdout)
    node2features = None
    if (not args.features is None):
        node2features = cPickle.load(open(args.features))
    special_params = {}

    if (args.model == "norm_lp" or args.model == "feature_diffusion_norm_lp"):
        special_params["M"] = label_propagation.get_graph_normalized_laplacian(
            G)
    if (args.model == "lp" or args.model == 'feature_diffusion_lp'):
        special_params["M"] = label_propagation.get_graph_laplacian(G)

    num_classes = max([nodes_cluster[node_id]
                       for node_id in nodes_cluster]) + 1
    n = max(G.nodes())
    cluster_distribution = defaultdict(int)
    cluster_count = defaultdict(int)

    for node_id in nodes_cluster:
        cluster_count[nodes_cluster[node_id]] += 1

    for cluster_id in cluster_count:
        cluster_distribution[cluster_id] = cluster_count[cluster_id] / float(
            len(nodes_cluster))

    parameters = []
    parameters.append(
        (G, num_classes, known_labels, cluster_distribution, holdout,
         nodes_cluster, node2features, n, args, special_params))
    return parameters
Ejemplo n.º 3
0
 def __init__(self,
              file_labels,
              audio_dir,
              max_length_sec,
              online=True,
              feats_fir=None,
              calc_flevel=None):
     """ Class to load a custom Dataset. Can be used as an input for the DataLoader.
     Args:
         file_labels (string): Path to the csv file with the labels.
         audio_dir (string): Path to the WAV utterances.
         online (boolean, optional): if True, features are computed on the fly.
                                     if False, features are loaded from disk. Default: True
         feats_fir (string, optional): The directory containing the files of the features (use only if
                                     'online'=False). Default: None.
         calc_flevel (callable, optional): Optional calculation to be applied on a sample. E.g. compute fbanks
                                         or MFCCs of the audio signals. Use when online=True.
         max_length_sec (int): Maximum length in seconds to keep from the utterances.
     :return dictionary {
     """
     name_set = os.path.basename(feats_fir)
     self.labels = utils.load_labels(file_labels, name_set)
     self.list_wavs = utils.get_files_abspaths(path=audio_dir + name_set,
                                               file_type='.wav')
     self.name_set = name_set
     self.calc_flevel = calc_flevel
     self.online = online
     self.max_length_sec = max_length_sec
     if not online:
         self.list_feature_files = utils.get_files_abspaths(
             path=feats_fir, file_type='.npy')
Ejemplo n.º 4
0
def main(args):
  feat, case_ids = load_features(args.src, zscore=True)
  lab = load_labels(args.labsrc)

  ((nepc_f, nepc_lab), (m0_f, m0_lab), (m0p_f, m0p_lab), (m1_f, m1_lab)) = split_sets(feat, lab)

  yvect = ['M0']*m0_f.shape[0] + ['NPEC']*nepc_f.shape[0]
  ttests = []
  fig = plt.figure()
  for f in feat.columns:
    m0_ = m0_f.loc[:, f]
    nepc_ = nepc_f.loc[:, f]
    tt = ttest_ind(m0_, nepc_)
    if tt.pvalue < 1e-10:
      feature_data = pd.DataFrame({'group': yvect, 
        'feature': np.concatenate([m0_, nepc_], axis=0)})
      print(f, tt)
      out = os.path.join(args.dst, 'f_{}.png'.format(f))
      plt.clf()
      # sns.boxplot(x='group', y='feature', data=feature_data)
      sns.distplot(m0_, label='M0')
      sns.distplot(nepc_, label='NEPC')
      plt.legend()
      plt.title('Feature {}'.format(f))
      plt.savefig(out, bbox_inches='tight')
Ejemplo n.º 5
0
def task_bitcoinalpha(args):
    A, X = utils.load_XA(args.dataset, datadir="../Generate_XA_Data/XAL")
    L = utils.load_labels(args.dataset, datadir="../Generate_XA_Data/XAL")
    num_classes = max(L) + 1
    print("NUMBER OF CLASS IS: " + str(num_classes))
    input_dim = X.shape[1]

    print("Input dimension is: ", input_dim)

    model = models.GcnEncoderNode(
        input_dim,
        args.hidden_dim,
        args.output_dim,
        num_classes,
        args.num_gc_layers,
        bn=args.bn,
        args=args,
    )

    train_node_classifier.train(model,
                                A,
                                X,
                                L,
                                args,
                                normalize_adjacency=False)
Ejemplo n.º 6
0
def bitcoin(args):
        
    A, X = utils.load_XA(args.dataset, datadir = "../Generate_XA_Data/XAL")
    L = utils.load_labels(args.dataset, datadir = "../Generate_XA_Data/XAL")
    num_classes = max(L) + 1
    input_dim = X.shape[1]
    num_nodes = X.shape[0]
    ckpt = utils.load_ckpt(args)

    print("input dim: ", input_dim, "; num classes: ", num_classes)
    
    model = models.GcnEncoderNode(
            input_dim=input_dim,
            hidden_dim=args.hidden_dim,
            embedding_dim=args.output_dim,
            label_dim=num_classes,
            num_layers=args.num_gc_layers,
            bn=args.bn,
            args=args,
        )
    
    model.load_state_dict(ckpt["model_state"]) 
    pred = ckpt["save_data"]["pred"]
    
    explainer = pe.Node_Explainer(model, A, X, pred, args.num_gc_layers)
    
    node_to_explain = [i for [i] in np.argwhere(np.sum(A,axis = 0) > 2)]
    
    explanations = explainer.explain_range(node_to_explain, num_samples = args.num_perturb_samples, top_node = args.top_node)
    
    
    print(explanations)
    
    savename = utils.gen_filesave(args)
    np.save(savename,explanations)
Ejemplo n.º 7
0
def main(args):
    feat, case_ids = load_features(args.src)
    lab = load_labels(args.labsrc)

    feat = drop_high_cor(feat, cor_thresh=0.8)
    print('Features after high cor drop')
    print(feat.head())

    run_tsne(feat, lab)
Ejemplo n.º 8
0
def render_gen(args):
    fps_counter = utils.avg_fps_counter(30)

    engines, titles = utils.make_engines(args.model, DetectionEngine)
    assert utils.same_input_image_sizes(engines)
    engines = itertools.cycle(engines)
    engine = next(engines)

    labels = utils.load_labels(args.labels) if args.labels else None
    filtered_labels = set(
        l.strip() for l in args.filter.split(',')) if args.filter else None
    get_color = make_get_color(args.color, labels)

    draw_overlay = True

    yield utils.input_image_size(engine)

    output = None
    while True:
        tensor, layout, command = (yield output)

        inference_rate = next(fps_counter)
        if draw_overlay:
            start = time.monotonic()
            # Changed to detect_with_input_tensor. Res is same
            # See https://coral.googlesource.com/edgetpuvision/+/refs/heads/4.14.98%5E%21/#F0
            objs = engine.detect_with_input_tensor(tensor,
                                                   threshold=args.threshold,
                                                   top_k=args.top_k)
            inference_time = time.monotonic() - start
            objs = [convert(obj, labels) for obj in objs]

            if labels and filtered_labels:
                objs = [obj for obj in objs if obj.label in filtered_labels]

            objs = [
                obj for obj in objs
                if args.min_area <= obj.bbox.area() <= args.max_area
            ]

            if args.print:
                print_results(inference_rate, objs)

            autoturret_render_artifacts = controller.run(objs)

            title = titles[engine]
            output = overlay(title, objs, get_color, inference_time,
                             inference_rate, layout,
                             autoturret_render_artifacts)
        else:
            output = None

        if command == 'o':
            draw_overlay = not draw_overlay
        elif command == 'n':
            engine = next(engines)
Ejemplo n.º 9
0
def main():
    multiplier = 1.0
    input_size = 224
    modelname = "mobilenet_{}_{}_{}".format(VERSION, multiplier, input_size)
    labels = load_labels()

    download_checkpoint(multiplier, input_size)
    checkpoint = os.path.join(SAVEDIR, modelname,
                              "mobilenet_v2_1.0_224.tflite")
    predict_using_tflite(checkpoint, labels)
Ejemplo n.º 10
0
def load_globals():
    # this initial process is just brought over from the sample_prediction.py
    with open("./../src/ModelConfig.yaml", "r") as f:
        model_config = yaml.safe_load(f)

    global MODEL
    global FLOWER_SPECIES_NAMES

    MODEL = load_model(config=model_config)
    FLOWER_SPECIES_NAMES = load_labels(config=model_config)
Ejemplo n.º 11
0
def main():
    # args
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)
    check_args_to_run(args)

    # data
    print('\n===== Starting preparing data =====\n')

    labels = load_labels()
    images = load_images(mode='train')

    train_images, val_images, train_labels, val_labels = train_test_split(images, labels, test_size=args.test_size) # TODO: discuss

    train_dataset = BengaliTrainDataset(images=train_images, labels=train_labels, size=args.image_size)
    val_dataset = BengaliTrainDataset(images=val_images, labels=val_labels, size=args.image_size)
    train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=os.cpu_count())
    val_dataloader = DataLoader(val_dataset, batch_size=512, shuffle=False, num_workers=os.cpu_count())

    print('\n===== Completed preparing data =====')

    # model
    criterions = build_loss(args)
    base_cnn_model = BaseCNNModel(model_name=args.model_name, 
                                  hidden_dim=args.hidden_dim, 
                                  dropout=args.dropout,
                                  activation=args.activation)

    optimizer = build_optimizer(args, base_cnn_model)
    scheduler = build_scheduler(args, optimizer)

    model = BengaliLightningModel(base_model=base_cnn_model,
                                  train_dataloader=train_dataloader,
                                  val_dataloader=val_dataloader,
                                  criterions=criterions,
                                  optimizer=optimizer,
                                  scheduler=scheduler)
    
    # callbacks
    filepath = f'/home/jarvis1121/AI/Kaggle/Bengali/kaggle-Bengali/models/trial_{int(args.exp)}'
    checkpoint_callback = pl.callbacks.ModelCheckpoint(filepath=filepath, monitor='val_loss',
                                                       verbose=1, mode='min')
    
    print('\n===== Starting training =====')
    
    # train
    trainer = pl.Trainer(max_epochs=args.epochs,
                         gpus=args.gpus,
                         early_stop_callback=False,
                         checkpoint_callback=checkpoint_callback)

    trainer.fit(model)

    print('\n===== End training =====')
Ejemplo n.º 12
0
    def render_gen(self, args1):
        fps_counter = utils.avg_fps_counter(30)
        args = self.parser.parse_args()
        engines, titles = utils.make_engines(args.model, DetectionEngine)
        assert utils.same_input_image_sizes(engines)
        engines = itertools.cycle(engines)
        engine = next(engines)

        labels = utils.load_labels(args.labels) if args.labels else None
        filtered_labels = set(
            l.strip() for l in args.filter.split(',')) if args.filter else None
        get_color = make_get_color(args.color, labels)

        draw_overlay = True

        yield utils.input_image_size(engine)

        output = None
        while True:
            tensor, layout, command = (yield output)

            inference_rate = next(fps_counter)
            if draw_overlay:
                start = time.monotonic()
                objs = engine.detect_with_input_tensor(
                    tensor, threshold=args.threshold, top_k=args.top_k)
                inference_time = time.monotonic() - start
                objs = [convert(obj, labels) for obj in objs]

                if labels and filtered_labels:
                    objs = [
                        obj for obj in objs if obj.label in filtered_labels
                    ]

                objs = [
                    obj for obj in objs
                    if args.min_area <= obj.bbox.area() <= args.max_area
                ]

                if args.print:
                    print_results(inference_rate, objs)

                title = titles[engine]
                output = overlay(title, objs, get_color, inference_time,
                                 inference_rate, layout)
            else:
                output = None

            if command == 'o':
                draw_overlay = not draw_overlay
            elif command == 'n':
                engine = next(engines)
Ejemplo n.º 13
0
def task_syn(args):
        
    A, X = utils.load_XA(args.dataset, datadir = "../Generate_XA_Data/XAL")
    L = utils.load_labels(args.dataset, datadir = "../Generate_XA_Data/XAL")
    num_classes = max(L) + 1
    input_dim = X.shape[1]
    ckpt = utils.load_ckpt(args)

    print("input dim: ", input_dim, "; num classes: ", num_classes)
    
    model = models.GcnEncoderNode(
            input_dim=input_dim,
            hidden_dim=args.hidden_dim,
            embedding_dim=args.output_dim,
            label_dim=num_classes,
            num_layers=args.num_gc_layers,
            bn=args.bn,
            args=args,
        )
    
    model.load_state_dict(ckpt["model_state"]) 
    pred = ckpt["save_data"]["pred"]
    
    explainer = pe.Node_Explainer(model, A, X, pred, args.num_gc_layers)
    
    explanations = {}
    if args.explain_node == None:
        if args.dataset == 'syn1': 
            explanations = explainer.explain_range(list(range(300,700)), num_samples = args.num_perturb_samples, top_node = args.top_node)
        elif args.dataset == 'syn2': 
            explanations = explainer.explain_range(list(range(300,700)) + list(range(1000,1400)), num_samples = args.num_perturb_samples, top_node = args.top_node, pred_threshold = 0.1)
        elif args.dataset == 'syn3': 
            explanations = explainer.explain_range(list(range(300,1020)), num_samples = args.num_perturb_samples, top_node = args.top_node,pred_threshold = 0.05) 
        elif args.dataset == 'syn4': 
            explanations = explainer.explain_range(list(range(511,871)), num_samples = args.num_perturb_samples, top_node = args.top_node, pred_threshold = 0.1) 
        elif args.dataset == 'syn5': 
            explanations = explainer.explain_range(list(range(511,1231)), num_samples = args.num_perturb_samples, top_node = args.top_node, pred_threshold = 0.05)     
        elif args.dataset == 'syn6': 
            explanations = explainer.explain_range(list(range(300,700)), num_samples = args.num_perturb_samples, top_node = args.top_node)
    else:
        explanation = explainer.explain(args.explain_node, num_samples = args.num_perturb_samples, top_node = args.top_node)
        print(explanation)
        explanations[args.explain_node] = explanation
    
    
    print(explanations)
    
    savename = utils.gen_filesave(args)
    np.save(savename,explanations)
Ejemplo n.º 14
0
def estimate_path_count(args):
    kg = utils.load_kg(args.dataset)
    num_mp = len(kg.metapaths)
    train_labels = utils.load_labels(args.dataset, 'train')
    counts = {}
    pbar = tqdm(total=len(train_labels))
    for uid in train_labels:
        counts[uid] = np.zeros(num_mp)
        for pid in train_labels[uid]:
            for mpid in range(num_mp):
                cnt = kg.count_paths_with_target(mpid, uid, pid, 50)
                counts[uid][mpid] += cnt
        counts[uid] = counts[uid] / len(train_labels[uid])
        pbar.update(1)
    utils.save_path_count(args.dataset, counts)
Ejemplo n.º 15
0
def main(args):
  feat_importance = pd.read_csv(args.src, sep='\t', index_col=0, header=None)
  features , _ = load_features(args.featsrc, zscore=True)
  labels = load_labels(args.labelsrc)
  
  feat_importance.sort_values(1, ascending=False, inplace=True)
  sns.distplot(feat_importance)
  plt.savefig('tile_feature_importance_dist.png', bbox_inches='tight')

  sns.regplot(np.squeeze(feat_importance.index.values), np.squeeze(feat_importance.values))

  feat_importance = feat_importance.iloc[:args.n, :]
  print('highest feature importance:')
  for f in feat_importance.index.values:
    print(f, feat_importance.loc[f].values)
Ejemplo n.º 16
0
    def render_gen(args):

        acc = accumulator(size=args.window, top_k=args.top_k)
        acc.send(None)  # Initialize.

        fps_counter = utils.avg_fps_counter(30)

        engines, titles = utils.make_engines(args.model, ClassificationEngine)
        assert utils.same_input_image_sizes(engines)
        engines = itertools.cycle(engines)
        engine = next(engines)

        labels = utils.load_labels(args.labels)
        draw_overlay = True

        yield utils.input_image_size(engine)

        output = None
        while True:
            tensor, layout, command = (yield output)

            inference_rate = next(fps_counter)
            if draw_overlay:
                start = time.monotonic()
                results = engine.classify_with_input_tensor(
                    tensor, threshold=args.threshold, top_k=args.top_k)
                inference_time = time.monotonic() - start

                results = [(labels[i], score) for i, score in results]
                results = acc.send(results)
                if args.print:
                    print_results(inference_rate, results)

                title = titles[engine]

                output = overlay(title, results, inference_time,
                                 inference_rate, layout)
            else:
                output = None

            if command == 'o':
                draw_overlay = not draw_overlay
            elif command == 'n':
                engine = next(engines)
Ejemplo n.º 17
0
def main():
    args = cmdparser()
    config = get_config(args.config)
    if args.preprocess:
        utils.preprocess(config['raw_path'], config['train_path'],
                         config['dev_path'], config['label_path'],
                         config['stop_word_path'], config['vocabulary_path'])
    labels = utils.load_labels(config['label_path'])
    vocabulary = utils.load_vocabulary(config['vocabulary_path'])
    stop_words = utils.load_stop_words(config['stop_word_path'])

    if args.dev:
        train(config, vocabulary, labels, stop_words, save_path='', mode='dev')
    elif args.train:
        if int(config['ensemble_size']) == 1:
            train(config,
                  vocabulary,
                  labels,
                  stop_words,
                  save_path=config['model_path'],
                  mode='train')
        else:
            for i in range(int(config['ensemble_size'])):
                train(config,
                      vocabulary,
                      labels,
                      stop_words,
                      save_path=config[f'model_path_{i+1}'],
                      mode='train')
    elif args.test:
        if int(config['ensemble_size']) == 1:
            test(config,
                 vocabulary,
                 labels,
                 stop_words,
                 save_path=[config['model_path']])
        else:
            test_paths = [
                config[f'model_path_{i+1}']
                for i in range(int(config['ensemble_size']))
            ]
            test(config, vocabulary, labels, stop_words, save_path=test_paths)
Ejemplo n.º 18
0
def api_call():
    f = request.files['file']
    filename = secure_filename(f.filename)
    f.save(os.path.join(app.config['UPLOAD_FOLDER'], str(filename)))
    image = Image.open(os.path.join(app.config['UPLOAD_FOLDER'],
                                    str(filename)))
    #image_resized = image.resize([299,299], Image.ANTIALIAS)

    file_name = (os.path.join(app.config['UPLOAD_FOLDER'], str(filename)))
    input_height = 299
    input_width = 299
    input_mean = 128
    input_std = 128

    t = read_tensor_from_image_file(file_name,
                                    input_height=input_height,
                                    input_width=input_width,
                                    input_mean=input_mean,
                                    input_std=input_std)

    results = sess.run(output_operation.outputs[0],
                       {input_operation.outputs[0]: t})
    results = np.squeeze(results)

    top_k = results.argsort()[-5:][::-1]
    labels = load_labels(label_file)
    for i in top_k:
        print(labels[i], results[i])

    # Save images to their original data directory if probability of prediction > threshold --- for retraining
    #if results[top_k[0]]>0.70:
    #    shutil.copy(os.path.join(app.config['UPLOAD_FOLDER'],str(filename)), "/home/ubuntu/crop_classification_updated/data_dir/train_dir/{0}/{1}".format(data_dict[labels[top_k[0]]],filename))
    #    print ('Saving image to: /home/ubuntu/crop_classification_updated/data_dir/train_dir/{0}/{1}'.format(data_dict[labels[top_k[0]]],filename))
    result_list = [{
        'Prediction1': '%s' % (labels[top_k[0]]),
        'Confidence1': '%s' % (results[top_k[0]]),
        'Prediction2': '%s' % (labels[top_k[1]]),
        'Confidence2': '%s' % (results[top_k[1]])
    }]

    return jsonify(result_list)
Ejemplo n.º 19
0
def infer_paths(args):
    kg = utils.load_kg(args.dataset)
    model = create_symbolic_model(args, kg, train=False)

    train_labels = utils.load_labels(args.dataset, 'train')
    train_uids = list(train_labels.keys())
    kg_mask = KGMask(kg)

    predicts = {}
    pbar = tqdm(total=len(train_uids))
    for uid in train_uids:
        predicts[uid] = {}
        for mpid in range(len(kg.metapaths)):
            metapath = kg.metapaths[mpid]
            paths = model.infer_with_path(metapath, uid, kg_mask,
                                          excluded_pids=train_labels[uid],
                                          topk_paths=20)
            predicts[uid][mpid] = paths
        pbar.update(1)
    with open(args.infer_path_data, 'wb') as f:
        pickle.dump(predicts, f)
Ejemplo n.º 20
0
def task_syn(args):
    A, X = utils.load_XA(args.dataset, datadir="../Generate_XA_Data/XAL")
    L = utils.load_labels(args.dataset, datadir="../Generate_XA_Data/XAL")
    num_classes = max(L) + 1
    input_dim = X.shape[1]

    model = models.GcnEncoderNode(
        args.input_dim,
        args.hidden_dim,
        args.output_dim,
        num_classes,
        args.num_gc_layers,
        bn=args.bn,
        args=args,
    )

    train_node_classifier.train(model,
                                A,
                                X,
                                L,
                                args,
                                normalize_adjacency=False)
Ejemplo n.º 21
0
	parser.add_argument("--labels", help="list of sample labels", required=True)
	parser.add_argument("--gene-sets", help="list of curated gene sets")
	parser.add_argument("--target", help="target class")
	parser.add_argument("--set", help="gene set to run", type=str, default="HALLMARK_ALL")
	parser.add_argument("--output-dir", help="Output directory", default=".")

	args = parser.parse_args()

	# load input data
	print("loading input dataset...")

	df = utils.load_dataframe(args.dataset)
	df_samples = df.index
	df_genes = df.columns

	labels, classes = utils.load_labels(args.labels)

	print("loaded input dataset (%s genes, %s samples)" % (df.shape[1], df.shape[0]))

	# impute missing values
	df.fillna(value=df.min().min(), inplace=True)

	# determine target class
	try:
		if args.target == None:
			args.target = -1
		else:
			args.target = classes.index(args.target)
			print("target class is: %s" % (classes[args.target]))
	except ValueError:
		print("error: class %s not found in dataset" % (args.target))
Ejemplo n.º 22
0
def get_input_fetures(features_file, label_file):
    labels = utils.load_labels(label_file, data_root)
    X = np.load(features_file)
    y = np.array(labels, dtype=np.uint8)

    return X, y
Ejemplo n.º 23
0
def evaluateBossu(args):
    '''
    Evalaute detections from the Bossu rain detection algorithm. 
    Saves:
        Plots of the different metrics
        CSV containing metrics for each input file and accumulated
        text file containing results and additional information

    Input:
        args:
            - labelFile: Path to the label file
            - inputFolder: Path to the folder containing the different detection csv files
            - outputFolder: Path to the folder where the output will be saved
            - filePlots: Whether to save plots for each input file
    '''

    label_file = args["labelFile"]
    main_path = args["inputFolder"]
    output_path = args["outputFolder"]
    plots_per_file = args["filePlots"]

    if "laser" in label_file:
        label_type = "Laser"
    else:
        label_type = "Mechanical"

    # Setup output paths
    main_output_path = os.path.join(
        output_path, "{}-{}-{}".format(os.path.basename(main_path), label_type,
                                       "Bossu"))
    if not os.path.exists(main_output_path):
        os.makedirs(main_output_path)

    output_path = os.path.join(main_output_path, "results_collected.csv")

    # Set threshod values
    thresholds = [x for x in np.linspace(0, 1, 101)]
    label_dict = utils.load_labels(label_file)

    # Containers for the type errors and counters for different label types
    em_per_minute_counter = np.zeros((101, 4))
    kalman_sampled_counter = np.zeros((101, 4))
    em_per_frame_counter = np.zeros((1, 4))
    kalman_per_frame_counter = np.zeros((1, 4))

    label_total_per_minute = 0
    label_pos_per_minute = 0
    label_total_per_frame = 0
    label_pos_per_frame = 0

    with open(output_path, 'w', newline="") as csvWriteFile:
        writer = csv.writer(csvWriteFile, delimiter=";")

        # Write the headers in the new csv file
        firstrow = []
        firstrow.append("file")
        firstrow.append("Total Frames")
        firstrow.append("EM Rain Frames")
        firstrow.append("Kalman Rain Frames")
        firstrow.append("EM %")
        firstrow.append("Kalman %")
        firstrow.append("TP")
        firstrow.append("TN")
        firstrow.append("FP")
        firstrow.append("FN")
        firstrow.append("Accuracy")
        firstrow.append("F1-Score (TP)")
        firstrow.append("F1-Score (TN)")
        firstrow.append("MCC")
        firstrow.append("Kalman TP")
        firstrow.append("Kalman TN")
        firstrow.append("Kalman FP")
        firstrow.append("Kalman FN")
        firstrow.append("Kalman Accuracy")
        firstrow.append("Kalman F1-Score (TP)")
        firstrow.append("Kalman F1-Score (TN)")
        firstrow.append("Kalman MCC")

        writer.writerow(firstrow)

        for dirs in os.listdir(main_path):

            dir_path = os.path.join(main_path, dirs)
            print("\n{}".format(dirs))

            dir_content = os.listdir(dir_path)

            settings = [s for s in dir_content if "setting" in s.lower()]
            if len(settings) > 1:
                raise ValueError(
                    "more than one settings file present in {}".format(
                        dir_path))

            for subdir in dir_content:
                if os.path.isdir(os.path.join(dir_path, subdir)):
                    continue
                if os.path.splitext(subdir)[-1] == ".txt":
                    continue

                ###### LOAD LABELS ######
                filename = subdir.replace(".mkv", ".mp4")[:-12]
                filename = filename.replace("-brick", "")

                print(filename)

                dict_ind = label_dict[os.path.basename(filename)]
                offset = dict_ind[
                    "frameOffset"]  # How many frames left of the starting minute e.g. 16:00:45, has 15 seconds left
                # This corresponds to 450 frames (30 FPS), and we assume we are halfway through the second, so 435 frame offset
                # These initial 435 frames are assigned to the label of 16:00:00, while the 436th label is assigned to 16:00:01
                FPM = dict_ind["FPM"]  # Frames per minute
                labels = dict_ind["labels"]  # List of labels per minute
                frameCount = dict_ind["frameCount"]

                ###### LOAD BOSSU OUTPUT ######
                # Load the supplied csv file
                csv_file = os.path.join(dir_path, subdir)
                rain_dataframe = pd.read_csv(csv_file, sep=";")

                ####### ANALYSE DATA #######
                start_frame = rain_dataframe[" Frame#"][0] - 1
                total_frames = len(rain_dataframe[" Frame#"])

                maxFrameStart = np.max(rain_dataframe[" Frame#"])
                print(
                    "Total frames in video:  {}\nLargest frame analyzed: {}\nDifference: {}"
                    .format(frameCount, maxFrameStart,
                            frameCount - maxFrameStart))
                if frameCount != (total_frames + start_frame):
                    print(
                        "\tSize mismatch between labels {}, and data, {}. Skipping this one\n"
                        .format(frameCount, total_frames))
                    continue

                em_detected = rain_dataframe["EM Rain Detected"]
                kalman_detected = rain_dataframe["Kalman Rain Detected"]

                ## Raw EM Detections
                em_per_frame, em_per_minute, em_per_frame_labels, em_per_minute_labels = analyze_Bossu_predictions(
                    em_detected,
                    labels,
                    offset,
                    FPM,
                    start_frame,
                    thresholds=thresholds)
                em_per_frame_counter += np.asarray(em_per_frame["Type Errors"])
                em_per_minute_counter += np.asarray(
                    em_per_minute["Type Errors"])
                if plots_per_file:
                    utils.make_metrics_plots(em_per_minute, thresholds,
                                             main_output_path,
                                             filename.replace(".mp4", ".pdf"))

                ## Kalman Detections
                kalman_per_frame, kalman_per_minute, kalman_per_frame_labels, kalman_per_minute_labels = analyze_Bossu_predictions(
                    kalman_detected,
                    labels,
                    offset,
                    FPM,
                    start_frame,
                    thresholds=thresholds)
                kalman_per_frame_counter += np.asarray(
                    kalman_per_frame["Type Errors"])
                kalman_sampled_counter += np.asarray(
                    kalman_per_minute["Type Errors"])
                if plots_per_file:
                    utils.make_metrics_plots(
                        kalman_per_minute, thresholds, main_output_path,
                        filename.replace(".mp4", "_kalman.pdf"))

                row = []
                row.append(filename)
                row.append(total_frames)
                row.append(np.sum(em_detected))
                row.append(np.sum(kalman_detected))
                row.append(np.sum(em_detected) / total_frames * 100)
                row.append(np.sum(kalman_detected) / total_frames * 100)
                row.append(em_per_frame["Type Errors"][0][0])
                row.append(em_per_frame["Type Errors"][0][1])
                row.append(em_per_frame["Type Errors"][0][2])
                row.append(em_per_frame["Type Errors"][0][3])
                row.append(em_per_frame["Accuracy"][0])
                row.append(em_per_frame["F1-score"][0][0])
                row.append(em_per_frame["F1-score"][0][1])
                row.append(em_per_frame["MCC"][0])
                row.append(kalman_per_frame["Type Errors"][0][0])
                row.append(kalman_per_frame["Type Errors"][0][1])
                row.append(kalman_per_frame["Type Errors"][0][2])
                row.append(kalman_per_frame["Type Errors"][0][3])
                row.append(kalman_per_frame["Accuracy"][0])
                row.append(kalman_per_frame["F1-score"][0][0])
                row.append(kalman_per_frame["F1-score"][0][1])
                row.append(kalman_per_frame["MCC"][0])
                writer.writerow(row)

                assert (em_per_minute_labels == kalman_per_minute_labels).all(
                ), "The minute labels for EM and Kalman are not the same!"
                assert (em_per_frame_labels == kalman_per_frame_labels).all(
                ), "The frame labels for EM and Kalman are not the same!"

                label_total_per_minute += len(em_per_minute_labels)
                label_pos_per_minute += sum(em_per_minute_labels)
                label_total_per_frame += len(em_per_frame_labels)
                label_pos_per_frame += sum(em_per_frame_labels)

        # Calculate all metrics based on the accumlated type errors
        total_em_per_minute = calculate_classification_metrics_full_dataset(
            em_per_minute_counter, thresholds)
        total_em_per_frame = calculate_classification_metrics_full_dataset(
            em_per_frame_counter)
        total_kalman_per_minute = calculate_classification_metrics_full_dataset(
            kalman_sampled_counter, thresholds)
        total_kalman_per_frame = calculate_classification_metrics_full_dataset(
            kalman_per_frame_counter)

        # Make plots of the different metrics
        utils.make_metrics_plots(total_em_per_minute, thresholds,
                                 main_output_path, "overall_em.pdf")
        utils.make_metrics_plots(total_kalman_per_minute, thresholds,
                                 main_output_path, "overall_kalman.pdf")

        # Save results
        with open(os.path.join(main_output_path, 'evaluation_information.txt'),
                  'w') as f:
            f.write("Metrics (EM) per frame: {}\n".format(total_em_per_frame))
            f.write("Metrics (Kalman) per frame: {}\n\n".format(
                total_kalman_per_frame))
            f.write("{} % Rain labels (Per minute)\n".format(
                label_pos_per_minute / label_total_per_minute * 100))
            f.write("{} rainy out of {} (Per minute)\n\n".format(
                label_pos_per_minute, label_total_per_minute))
            f.write("{} % Rain labels (Per frame)\n".format(
                label_pos_per_frame / label_total_per_frame * 100))
            f.write("{} rainy out of {} (Per frame)\n\n".format(
                label_pos_per_frame, label_total_per_frame))
            f.write("Label file used: {}\n".format(label_file))
            f.write("Method used: Bossu\n")
            f.write("Label type used: {}".format(label_type))

        row = []
        row.append("Total")
        row.append("")
        row.append("")
        row.append("")
        row.append("")
        row.append("")
        row.append(total_em_per_frame["Type Errors"][0][0])
        row.append(total_em_per_frame["Type Errors"][0][1])
        row.append(total_em_per_frame["Type Errors"][0][2])
        row.append(total_em_per_frame["Type Errors"][0][3])
        row.append(total_em_per_frame["Accuracy"][0])
        row.append(total_em_per_frame["F1-score"][0][0])
        row.append(total_em_per_frame["F1-score"][0][1])
        row.append(total_em_per_frame["MCC"][0])
        row.append(total_kalman_per_frame["Type Errors"][0][0])
        row.append(total_kalman_per_frame["Type Errors"][0][1])
        row.append(total_kalman_per_frame["Type Errors"][0][2])
        row.append(total_kalman_per_frame["Type Errors"][0][3])
        row.append(total_kalman_per_frame["Accuracy"][0])
        row.append(total_kalman_per_frame["F1-score"][0][0])
        row.append(total_kalman_per_frame["F1-score"][0][1])
        row.append(total_kalman_per_frame["MCC"][0])
        writer.writerow(row)
Ejemplo n.º 24
0
import pickle
import sys
import numpy as np
import scipy.optimize
sys.path.append("..")
import utils
import sac
from utils import ASSERT_SIZE, ASSERT_NO_NAN

MAX_PATCHES = 60000
images = utils.load_images("../data/train-images-idx3-ubyte")
labels_ = utils.load_labels("../data/train-labels-idx1-ubyte")
patches = images[:, 0:MAX_PATCHES]
labels = labels_[0:MAX_PATCHES]

# Note, this is the output from running mnist_train.py in the top level.
print "Reading edge detector."
fname = "data/numeral_sac.pickle"
f = open(fname, "r")
edge_detector_solution = pickle.load(f)

options = sac.SparseAutoEncoderOptions(28 * 28,
                                       196,
                                       output_dir = "output")

edge_detector = sac.SparseAutoEncoder(options, patches)
print "Computing edges."
edges, identity = edge_detector.feed_forward(images[:, 0:MAX_PATCHES],
                                             edge_detector_solution.W1,
                                             edge_detector_solution.W2,
                                             edge_detector_solution.b1,
Ejemplo n.º 25
0
    pp.fileOrder(valid, test)
    pp.resampleDatabase(sampling_rate)

# Device Configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Process Dataset
trainList, trainLabel = list(), list()
validList, validLabel = list(), list()
testList, testLabel = list(), list()

utils.generate_labels(pp.TRAIN_PATH)
utils.generate_labels(pp.VALID_PATH)
utils.generate_labels(pp.TEST_PATH)

trainList, trainLabel = utils.load_labels(pp.TRAIN_PATH)
validList, validLabel = utils.load_labels(pp.VALID_PATH)
testList, testLabel = utils.load_labels(pp.TEST_PATH)


class Signalset(Dataset):
    def __init__(self, dataList, dataLabel, path, inputDim):
        self.dataList = dataList
        self.dataLabel = dataLabel
        self.path = path
        self.inputDim = inputDim

    def __len__(self):
        return len(self.dataList)

    def __getitem__(self, idx):
import sys

if len(sys.argv) != 2:
  print "Usage: ./display_saved_network.py somefile.pickle"
  sys.exit(1)

fname = sys.argv[1]
f = open(fname, "r")
solution = pickle.load(f)

utils.save_as_figure((solution.W1 + solution.b1).T, "loadedW1.png")
utils.save_as_figure(solution.W2, "loadedW2.png")


images = utils.load_images("data/train-images-idx3-ubyte")
labels = utils.load_labels("data/train-labels-idx1-ubyte")
utils.save_as_figure(images[:, 0:100], "output/input.png")

patches = images[:, 0:10000]
visible_size = 28*28
hidden_size = 196


options = sac.SparseAutoEncoderOptions(visible_size,
                                       hidden_size,
                                       output_dir="output",
                                       max_iterations = 400)

network = sac.SparseAutoEncoder(options, patches)

theta = network.flatten(solution.W1, solution.W2, solution.b1, solution.b2)
seed = int("".join(str(string.ascii_lowercase.index(x)) for x in "sugarbyte"))
random.seed(seed)

# img_list = set()
# A_PS_labels = load_labels("data/raw/amazon/labels.csv", CC.LABELS.CATEGORICAL.CLOUD, to_dataframe=True)
# img_list |= set(map(lambda x : os.path.join("data/raw/amazon/tif", x), A_PS_labels.index))
#
# T_PS_labels = load_labels("data/raw/tropics/planetlabs/labels.csv", CC.LABELS.CATEGORICAL.CLOUD, to_dataframe=True)
# img_list |= set(map(lambda x : os.path.join("data/raw/tropics/planetlabs/tif", x), T_PS_labels.index))
#
# T_S2_labels = load_labels("data/raw/tropics/sentinel2/labels.csv", CC.LABELS.CATEGORICAL.CLOUD, to_dataframe=True)
# img_list |= set(map(lambda x : os.path.join("data/raw/tropics/sentinel2/tif", x), T_S2_labels.index))
img_list = set()
A_PS_labels = load_labels("data/raw/amazon/labels.csv",
                          CC.LABELS.ALL,
                          to_dataframe=True)
img_list |= set(
    map(lambda x: os.path.join("data/raw/amazon/tif", x), A_PS_labels.index))

T_PS_labels = load_labels("data/raw/tropics/planetlabs/labels.csv",
                          CC.LABELS.ALL,
                          to_dataframe=True)
img_list |= set(
    map(lambda x: os.path.join("data/raw/tropics/planetlabs/tif", x),
        T_PS_labels.index))

T_S2_labels = load_labels("data/raw/tropics/sentinel2/labels.csv",
                          CC.LABELS.ALL,
                          to_dataframe=True)
img_list |= set(
Ejemplo n.º 28
0
    parser.add_argument('--gene-sets', help='list of curated gene sets')
    parser.add_argument('--set', help='specific gene set to run')
    parser.add_argument('--tsne', help='plot t-SNE of samples', action='store_true')
    parser.add_argument('--heatmap', help='plot heatmaps of sample perturbations', action='store_true')
    parser.add_argument('--target', help='target class')
    parser.add_argument('--output-dir', help='output directory', default='.')

    args = parser.parse_args()

    # load input data
    print('loading train/perturb data...')

    df_train = utils.load_dataframe(args.train_data)
    df_perturb = utils.load_dataframe(args.perturb_data)

    y_train, classes = utils.load_labels(args.train_labels)
    y_perturb, _ = utils.load_labels(args.perturb_labels, classes)

    print('loaded train data (%s genes, %s samples)' % (df_train.shape[1], df_train.shape[0]))
    print('loaded perturb data (%s genes, %s samples)' % (df_perturb.shape[1], df_perturb.shape[0]))

    # impute missing values
    min_value = df_train.min().min()

    df_train.fillna(value=min_value, inplace=True)
    df_perturb.fillna(value=min_value, inplace=True)

    # sanitize class names
    classes = [utils.sanitize(c) for c in classes]

    # determine target class
Ejemplo n.º 29
0
users_limit = 150000
print_user_id = False
users_to_print = []  # fill user labels to print next to 2d point

fig_size = (20, 20)
text_size = 10

labels_data_path = config["labels"]
umap_embedding_folder = config["umap_embedding_folder"]
distance = config["umap_distance"]
n_neighbors = config["umap_n_neighbors"]
min_dist = config["umap_min_dist"]

if print_user_id:
    labels = load_labels(labels_data_path)

for neigh in n_neighbors:
    for dist in min_dist:
        umap_embedding_path = "{}/embedding_umap_{}_{}_{}.csv".format(
            umap_embedding_folder, neigh, dist, distance)
        X_embedded_umap, Y_embedded_umap = import_embedding(
            umap_embedding_path, limit=users_limit)
        fig = plt.figure(figsize=fig_size, facecolor='w')
        plt.subplot(1, 1, 1)
        plt.scatter(X_embedded_umap, Y_embedded_umap, cmap='hsv')
        plt.title(
            '2D embedding using UMAP Embedding n_neighbors={} min_dist={} distance={}\n'
            .format(neigh, dist, distance))
        plt.xlabel('Feature 1')
        plt.ylabel('Feature 2')
Ejemplo n.º 30
0
    type=str,
    required=False,
    default="cameras",
    help='Device use to collect data. It can be either "webcam" or "cameras"')
args = parser.parse_args()

# pass args
exp = args.exp
subject_id = args.subject
label_id = args.label
device = args.device

# load camera info --> options.json
op, cam = utils.load_options(device)
# load experiment labels --> labels.json
labels = utils.load_labels(exp)

log = Logger(name="Capture")

# get folder to store the collected data
if exp == "emotions":
    folder = op.folder_emotions
elif exp == "signals":
    folder = op.folder_signals
elif exp == "gestures":
    folder = op.folder_gestures
elif exp == "adl":
    folder = op.folder_adl
elif exp == "falls":
    folder = op.folder_falls
else:
Ejemplo n.º 31
0
                        nargs='+')
    parser.add_argument('--tsne-alphas',
                        help='list of per-class alphas for t-SNE plot',
                        type=float,
                        nargs='+')

    args = parser.parse_args()

    # load input expression matrix
    emx = utils.load_dataframe(args.infile)

    print('Loaded %s %s' % (args.infile, str(emx.shape)))

    # load label file or generate empty labels
    if args.labels != None:
        labels = utils.load_labels(args.labels)
    else:
        labels = np.zeros(len(emx.index), dtype=str)

    print('Loaded %s %s' % ('labels', str(labels.shape)))

    # plot sample distributions
    if args.density != None:
        print('Plotting sample distributions...')

        plot_density(emx, args.density, xmax=args.density_xmax)

    # plot t-SNE of samples
    if args.tsne != None:
        print('Plotting 2-D t-SNE...')
Ejemplo n.º 32
0
                                      target_labels[t_idx]))
        output_filepath = os.path.join(
            output_directory, 'hist_{}.png'.format(target_labels[t_idx]))
        print(output_filepath)
        plt.tight_layout()
        plt.savefig(output_filepath)
        plt.clf()


if __name__ == '__main__':
    # labels stored in external csv files
    feature_labels_filepath = '../../data/datasets/features.csv'
    target_labels_filepath = '../../data/datasets/targets.csv'

    # load in labels
    feature_labels = utils.load_labels(feature_labels_filepath)
    target_labels = utils.load_labels(target_labels_filepath)

    ## the dataset filepaths to visualize along with labels
    # input_filepaths = [
    #     # '../../data/datasets/risk.h5',
    #     '../../data/datasets/bootstrap/iter_4.h5',
    #     # '../../data/datasets/march/risk_20_sec_3_timesteps.h5',
    # ]
    # dataset_labels = [
    #     # 'full',
    #     'boot',
    #     # 'risk_5'
    # ]

    num_iters = 50