예제 #1
0
def main(argv) :
    parser = argparse.ArgumentParser(description='Tool to classify data based on 1-NN of segment data')
    parser.add_argument('-i', '--infile', help='Input JSON Segment File')
    parser.add_argument('-o', '--outfile', help='Output JSON Learning File')
    parser.add_argument('-p', '--pool', default=multiprocessing.cpu_count(), help='Threads of computation to use')
    args = parser.parse_args(argv[1:])
    segments_json = load_data(args.infile, 'segments', None, None, "Euclidean Segment Distances: ")
    if segments_json == None :
        print "Could not load --infile : %s" % (args.infile,)
        exit()

    segments = Segments.fromJSONDict(segments_json)

    if int(args.pool) != 1 :
        pool = multiprocessing.Pool(int(args.pool))
    else :
        pool = None

    esd = EuclideanDistances(segments.config, segments, pool=pool)
    esd.compute_distances()

    if (args.outfile == None) :
        args.outfile = EuclideanDistances.get_distances_filename(esd.config)

    print "Writing %s" % (args.outfile,)
    esd.config.status = "EuclideanDistances"
    save_data(args.outfile, esd.toJSONDict())
예제 #2
0
def main(argv) :
    parser = argparse.ArgumentParser(description='Tool to classify data based on chaotic invariants of segment data')
    parser.add_argument('-i', '--infile', help='Input JSON Segment File')
    parser.add_argument('-o', '--outfile', help='Output JSON Learning File')
    parser.add_argument('-e', '--epsilon', type=float, default=1.0, help='epsilon value used for generation of chaotic invariants')
    parser.add_argument('-p', '--pool', default=max(1,multiprocessing.cpu_count()-2), help='Threads of computation to use')
    args = parser.parse_args(argv[1:])
    segments_json = load_data(args.infile, 'segments', None, None, "Chaotic Invariant Features: ")
    if segments_json == None :
        print "Could not load --infile : %s" % (args.infile,)
        exit()

    segments = Segments.fromJSONDict(segments_json)
    if segments.config.segment_size != segments.config.window_size :
        print "%s ERROR ill formed input, segment_size != window_size in %s" % (argv[0], args.infile) 
        sys.exit(1)

    if args.epsilon != None :
        segments.config.invariant_epsilon = args.epsilon
    
    if int(args.pool) != 1 :
        pool = multiprocessing.Pool(int(args.pool))
    else :
        pool = None

    cid = ChaoticInvariantFeatures(segments.config, segments, epsilon=segments.config.invariant_epsilon, pool=pool)
    cid.compute_features()

    if (args.outfile == None) :
        args.outfile = ChaoticInvariantFeatures.get_features_filename(cid.config)

    print "Writing %s" % (args.outfile,)
    cid.config.status = "ChaoticInvariantFeatures"
    save_data(args.outfile, cid.toJSONDict())
예제 #3
0
def main(argv):
    parser = argparse.ArgumentParser(description='Post Processing tool for Segment Data')
    parser.add_argument('-i', '--infile')
    parser.add_argument('-o', '--outfile')
    parser.add_argument('-p', '--pool', type=int, default=min(1,multiprocessing.cpu_count()-2))
    args = vars(parser.parse_args(argv[1:]))
    segments_json = load_data(args['infile'], 'segments', None, None, "ChaosPost: ")
    if segments_json == None :
        print "Could not load --infile : %s" % (args['infile'],)
        exit()
    print "input read"
    segments = Segments.fromJSONDict(segments_json)
    segments.config.post_process="ChaosPost"
    dimensions = len(segments.segments[0].windows[0]) / segments.config.window_size
    if args['pool'] == 1 :
        pool = None
    else :
        pool = multiprocessing.Pool(args['pool'])
        print "%d processes started" % args['pool']
    for index in range(dimensions) :
        config = copy(segments.config)
        config.data_index = segments.segments[0].data_index[index]
        post_processed = ChaosPost(config, segments.segments, dimensions=dimensions, index=index, pool=pool)
        if args['outfile'] == None:
            outfile = ChaosPost.get_segment_filename(config)
        else :
            outfile = args['outfile']
        print "Writing %s" % outfile
        post_processed.config.status = "ChaosPost"
        save_data(outfile, post_processed.toJSONDict())
예제 #4
0
def main(argv) :
    parser = argparse.ArgumentParser(description='Tool to generate a radial basis kernel from segmented data')
    parser.add_argument('-i', '--infile', help='Input JSON Segment file')
    parser.add_argument('-o', '--outfile', help='Output JSON RBF Kernel Matrix file')
    parser.add_argument('-g', '--kernel-gamma')
    parser.add_argument('-p', '--pool', default=multiprocessing.cpu_count(), help='Threads of computation to use')
    args = vars(parser.parse_args(argv[1:]))
    sf_json = load_data(args['infile'], 'segments', None, None, "RBFKernel: ")
    if sf_json == None :
        print "Could not load --infile : %s" % (args['infile'],)
        exit()
    segments = Segments.fromJSONDict(sf_json)
    config = segments.config
    if (int(args['pool']) > 1) :
      pool = multiprocessing.Pool(int(args['pool']))
    else :
      pool = None
    if (args['kernel_gamma'] != None) :
        if args['kernel_gamma'] == 'cv' :
            config.kernel_gamma = 'cv'
        else :
            config.kernel_gamma = float(args['kernel_gamma'])
    rk = RBFKernel(config, segments, pool=pool)
    start = time.clock() 
    rk.compute_kernel()
    stop = time.clock()
    print "%f seconds" % (stop - start,)
    if args['outfile'] == None :
        outfile = RBFKernel.get_kernel_filename(config)
    else :
        outfile = args['outfile']
    print "RBFKernel: Writing %s" % (outfile,)
    rk.config.status = "RBFKernel"
    save_data(outfile, rk.toJSONDict())
예제 #5
0
def main(argv) :
    parser = argparse.ArgumentParser(description='Tool to generate the scale space similarity between all pairs of persistence diagrams')
    parser.add_argument('-i', '--infile', help='Input JSON Persistence Diagram File')
    parser.add_argument('-o', '--outfile', help='Output JSON Learning File')
    parser.add_argument('-k', '--kernel-scale', type=float, help='Kernel Scale to use for Scale Space Similarity')
    parser.add_argument('-d', '--persistence-degree', type=float, help='Persistence degree to consider when computing Scale Space Similarity')
    parser.add_argument('--kernel-file', help='translate from PersistenceKernel instead of redoing calculation')
    parser.add_argument('-p', '--pool', default=multiprocessing.cpu_count(), help='Threads of computation to use')
    args = parser.parse_args(argv[1:])
    
    if args.kernel_file != None :
        from Datatypes.Kernel import Kernel
        kernel_json = load_data(args.kernel_file, 'kernel', None, None, "Scale Space Similarity: ")
        if kernel_json == None :
            print "Could not load --kernel-file : %s" % (args.kernel_file, )
            exit()
        kernel = Kernel.fromJSONDict(kernel_json)

        sss = Distances(kernel.config, 
                        [[Distance(mean=k) for k in row] for row in kernel.kernel_matrix],
                        kernel.segment_info)
        
        if (args.outfile == None) :
            args.outfile = ScaleSpaceSimilarity.get_distances_filename(sss.config)
        
        print "Writing %s" % (args.outfile, )
        sss.config.status = "ScaleSpaceSimilarity"
        save_data(args.outfile, sss.toJSONDict())

    else :
        persistences_json = load_data(args.infile, 'persistence_diagrams', None, None, "Scale Space Similarity: ")
        if persistences_json == None :
            print "Could not load --infile : %s" % (args.infile, )
            exit()
        persistences = PD.fromJSONDict(persistences_json)
        if args.kernel_scale == None :
            args.kernel_scale = float(persistences.config.kernel_scale)
        else :
            persistences.config.kernel_scale = args.kernel_scale

        if args.pool != 1 :
            pool = multiprocessing.Pool(int(args.pool))
        else :
            pool = None

        sss = ScaleSpaceSimilarity(persistences.config, persistences, 
                                   persistences.config.kernel_scale, persistences.config.persistence_degree,
                                   pool=pool)
        
        sss.compute_distances()

        if (args.outfile == None) :
            args.outfile = ScaleSpaceSimilarity.get_distances_filename(sss.config)

        print "Writing %s" % (args.outfile, )
        sss.config.status = "ScaleSpaceSimilarity"
        save_data(args.outfile, sss.toJSONDict())
예제 #6
0
def main(argv) :
    parser = argparse.ArgumentParser(description="Tool to generate Persistence Diagrams from segmented data")
    parser.add_argument('-i', '--infile', help='Input JSON Segment Data file')
    parser.add_argument('-o', '--outfile', help='Output JSON Persistence Diagram file')
    parser.add_argument('-m', '--max-simplices')
    parser.add_argument('-e', '--epsilon')
    parser.add_argument('-p', '--pool', default=multiprocessing.cpu_count(), help='Threads of computation to use')
    args = vars(parser.parse_args(argv[1:]))

    if (not os.path.exists(args['infile'])) :
        print "Could not find %s for reading" % (args['infile'],)
        return

    if (int(args['pool']) > 1) :
      pool = multiprocessing.Pool(int(args['pool']))
    else :
      pool = None

    segments_json = load_data(args['infile'], 'segments', None, None, "PersistenceGenerator: ")
    if segments_json == None :
        print "Could not load --infile : %s" % (args['infile'],)
        exit()
    segments = Segments.fromJSONDict(segments_json)
    config = segments.config
    if (args['max_simplices'] != None) :
        config.max_simplices = int(args['max_simplices'])
        config.persistence_epsilon = None
    if (args['epsilon'] != None) :
        config.persistence_epsilon = float(args['epsilon'])
        config.max_simplices = None

    def identity(x) :
        return x

    if pool != None :
        persistences = pool.imap(process, itertools.product(segments.segments, [(config.max_simplices, config.persistence_epsilon)]))
    else :
        persistences = itertools.imap(process, itertools.product(segments.segments, [(config.max_simplices, config.persistence_epsilon)]))

    persistences = PDS(config, list(persistences))
    
    if args['outfile'] == None :
        outfile = PDS.get_persistence_diagrams_filename(config)
    else :
        outfile = args['outfile']
    print "PersistenceGenerator: Writing %s" % (outfile,)
    persistences.config.status = "PersistenceGenerator"
    save_data(outfile, persistences.toJSONDict())
예제 #7
0
def main(argv) :
    parser = argparse.ArgumentParser(description='Tool to evaluate the Landscape Distances between two Arrays of Persistence Diagrams')
    parser.add_argument('-a', '--infile-a', help="JSON Persistence Diagram file of the first set of Persistence Diagrams")
    parser.add_argument('-b', '--infile-b', help="JSON Persistence Diagram file of the second set of Persistence Diagrams")
    parser.add_argument('-o', '--outfile', help="JSON Output File")
    parser.add_argument('-d', '--degree', type=int, default=1, help="Persistence Degree to consider")
    parser.add_argument('-p', '--pool', default=multiprocessing.cpu_count(), help='Threads of computation to use')
    args = vars(parser.parse_args(argv[1:]))
    if (not os.path.exists(args['infile_a']) or \
        ((not (args['infile_a'] == args['infile_b'] or args['infile_b'] == None) and not os.path.exists(args['infile_b'])))) :
        parser.print_help()
        exit()
    pfa_json = load_data(args['infile_a'], 'persistence_diagrams', None, None, "LandscapeDistances: ")
    if pfa_json == None :
        print "Could not load --infile-a : %s" % (args['infile_a'],)
        exit()
    persistences_a = PD.fromJSONDict(pfa_json)
    if args['infile_a'] == args['infile_b'] or args['infile_b'] == None :
        persistences_b = persistences_a
    else :
        pfb_json = load_data(args['infile_b'], 'persistences', None, None, "LandscapeDistances: ")
        if pfb_json == None :
            print "Could not load --infile-b : %s" % (args['infile_b'],)
            exit()
        persistences_b = PD.fromJSONDict(pfb_json)

    config = persistences_a.config
    if args['degree'] != None :
        config.persistence_degree = args['degree']

    if (int(args['pool']) > 1) :
      pool = multiprocessing.Pool(int(args['pool']))
    else :
      pool = None

    start = time.time()
    distances =  LandscapeDistances(config, persistences_a, persistences_b, pool=pool)
    dist = distances.compute_distances()
    end = time.time()
    print "Time elapsed %f" % (end - start)
    if not('outfile' in args) or args['outfile'] == None :
        outfile = LandscapeDistances.get_distances_filename(config)
    else :
        outfile = args['outfile']
    print "Writing %s" % (outfile,)
    distances.config.status = "LandscapeDistance"
    save_data(outfile, distances.toJSONDict())
예제 #8
0
def main(argv):
    parser = argparse.ArgumentParser(description='Post Processing tool for Segment Data')
    parser.add_argument('-i', '--infile')
    parser.add_argument('-o', '--outfile')
    args = vars(parser.parse_args(argv[1:]))
    segments_json = load_data(args['infile'], 'segments', None, None, "NormalizePost: ")
    if segments_json == None :
        print "Could not load --infile : %s" % (args['infile'],)
        exit()
    segments = Segments.fromJSONDict(segments_json)
    segments.config.post_process="NormalizePost"
    post_processed = NormalizePost(segments.config, segments.segments)
    if args['outfile'] == None:
        outfile = NormalizePost.get_segment_filename(segments.config)
    else :
        outfile = args['outfile']
    print "Writing %s" % outfile
    post_processed.config.status = "NormalizePost"
    save_data(outfile, post_processed.toJSONDict())
예제 #9
0
def main(argv) :
    parser = argparse.ArgumentParser(description='Tool to generate a similarity kernel from persistence data')
    parser.add_argument('-i', '--infile', help='Input JSON Persistence Diagram file')
    parser.add_argument('-o', '--outfile', help='Output JSON Kernel Similarity Matrix file')
    parser.add_argument('-p', '--pool', default=multiprocessing.cpu_count(), help='Threads of computation to use')
    parser.add_argument('-k', '--kernel-scale')
    parser.add_argument('-d', '--persistence-degree', help='Filter persistence to entries of this degree')
    args = vars(parser.parse_args(argv[1:]))

    persistences_json = load_data(args['infile'], 'persistence_diagrams', None, None, "PersistenceKernel: ")
    if persistences_json == None :
        print "Could not load --infile : %s" % (args['infile'],)
        exit()
    persistences = PD.fromJSONDict(persistences_json)
    config = persistences.config
    
    if (int(args['pool']) > 1) :
        pool = multiprocessing.Pool(int(args['pool']))
    else :
        pool = None

    if (args['kernel_scale'] != None) :
	config.kernel_scale = float(args['kernel_scale'])

    if (args['persistence_degree'] != None) :
        config.persistence_degree = args['persistence_degree']

    pk = PersistenceKernel(config, persistences,
                           kernel_fun=ScaleSpaceWrapper(float(config.kernel_scale)),
                           pool=pool)
    start = time.clock() 
    pk.compute_kernel()
    stop = time.clock()
    print "%f seconds" % (stop - start,)
    print "Is Positive Semidefinite? %s" %  (isPSD(numpy.matrix(pk.kernel_matrix),))

    if args['outfile'] == None :
        outfile = PersistenceKernel.get_kernel_filename(config)
    else :
        outfile = args['outfile'] 
    print "PersistenceKernel: Writing %s" % (outfile,)
    pk.config.status = "PersistenceKernel"
    save_data(outfile, pk.toJSONDict())
예제 #10
0
def main(argv):
    parser = argparse.ArgumentParser(
        description='Tool to generate a radial basis kernel from segmented data'
    )
    parser.add_argument('-i', '--infile', help='Input JSON Segment file')
    parser.add_argument('-o',
                        '--outfile',
                        help='Output JSON RBF Kernel Matrix file')
    parser.add_argument('-g', '--kernel-gamma')
    parser.add_argument('-p',
                        '--pool',
                        default=multiprocessing.cpu_count(),
                        help='Threads of computation to use')
    args = vars(parser.parse_args(argv[1:]))
    sf_json = load_data(args['infile'], 'segments', None, None, "RBFKernel: ")
    if sf_json == None:
        print "Could not load --infile : %s" % (args['infile'], )
        exit()
    segments = Segments.fromJSONDict(sf_json)
    config = segments.config
    if (int(args['pool']) > 1):
        pool = multiprocessing.Pool(int(args['pool']))
    else:
        pool = None
    if (args['kernel_gamma'] != None):
        if args['kernel_gamma'] == 'cv':
            config.kernel_gamma = 'cv'
        else:
            config.kernel_gamma = float(args['kernel_gamma'])
    rk = RBFKernel(config, segments, pool=pool)
    start = time.clock()
    rk.compute_kernel()
    stop = time.clock()
    print "%f seconds" % (stop - start, )
    if args['outfile'] == None:
        outfile = RBFKernel.get_kernel_filename(config)
    else:
        outfile = args['outfile']
    print "RBFKernel: Writing %s" % (outfile, )
    rk.config.status = "RBFKernel"
    save_data(outfile, rk.toJSONDict())
예제 #11
0
def main(argv):
    parser = argparse.ArgumentParser(
        description='Post Processing tool for Segment Data')
    parser.add_argument('-i', '--infile')
    parser.add_argument('-o', '--outfile')
    args = vars(parser.parse_args(argv[1:]))
    segments_json = load_data(args['infile'], 'segments', None, None,
                              "NormalizePost: ")
    if segments_json == None:
        print "Could not load --infile : %s" % (args['infile'], )
        exit()
    segments = Segments.fromJSONDict(segments_json)
    segments.config.post_process = "NormalizePost"
    post_processed = NormalizePost(segments.config, segments.segments)
    if args['outfile'] == None:
        outfile = NormalizePost.get_segment_filename(segments.config)
    else:
        outfile = args['outfile']
    print "Writing %s" % outfile
    post_processed.config.status = "NormalizePost"
    save_data(outfile, post_processed.toJSONDict())
예제 #12
0
def main(argv):
    parser = argparse.ArgumentParser(
        description='Tool to perform learning on pregenerated features')
    parser.add_argument('-i',
                        '--infile',
                        help='Input JSON Similarity Feature file')
    parser.add_argument('-o', '--outfile', help='Output JSON Learning file')
    parser.add_argument('-p',
                        '--pool',
                        default=multiprocessing.cpu_count(),
                        help='Threads of computation to use')
    parser.add_argument(
        '-c',
        '--learning-C',
        help=
        'C value for SVM. Specify a range for 1-dimensional cross-validation')
    parser.add_argument('--timeout', type=int, default=3600)
    parser.add_argument('-t',
                        '--train-test-partitions',
                        help='Precomputed train / test partitions')
    args = vars(parser.parse_args(argv[1:]))

    ff_json = load_data(args['infile'], 'features', None, None,
                        "FeatureLearning: ")
    if ff_json == None:
        print "Could not load Features from %s" % (args['infile'], )
        sys.exit(1)
    features = Features.fromJSONDict(ff_json)
    config = features.config
    segment_info = features.segment_info
    if (int(args['pool']) > 1):
        pool = multiprocessing.Pool(int(args['pool']))
    else:
        pool = None

    if (args['learning_C'] != None):
        learning_C = parse_range(args['learning_C'], t=float)
    else:
        learning_C = config.learning_C
    if not isinstance(learning_C, list):
        learning_C = [learning_C]
    else:
        learning_C = learning_C
    if (args['train_test_partitions'] != None):
        partitions_json = load_data(args['train_test_partitions'],
                                    'partitions', None, None,
                                    "FeatureLearning: ")
        if partitions_json == None:
            print "Could not load Train / Test Partitions from %s" % (
                args['train_test_partitions'], )
            sys.exit(1)
        partitions = TrainTestPartitions.fromJSONDict(partitions_json)
    else:
        partitions = generate_partitions(config,
                                         segment_info,
                                         cv_iterations=5 if
                                         (len(learning_C) > 1) else 0)

    if len(learning_C) > 1 and len(partitions.cross_validation) > 0:
        num_cv = len(partitions.cross_validation)
        learning_wrap = LearningWrapper(features)
        if pool != None:
            results = pool.imap(
                learning_wrap,
                itertools.product(partitions.cross_validation, learning_C))
            final_results = []
            try:
                while True:
                    result = results.next(
                        args['timeout'])  # timeout in case shogun died on us
                    final_results.append(result)
            except StopIteration:
                pass
            except multiprocessing.TimeoutError as e:
                self.pool.terminate()
                print traceback.print_exc()
                sys.exit(1)
            results = final_results
        else:
            results = map(
                learning_wrap,
                itertools.product(partitions.cross_validation, learning_C))
        max_correct = 0.0
        best_C = learning_C[0]
        print len(results)
        for C in learning_C:
            correct = Learning(
                config, [_result for (_C, _result) in results if C == _C
                         ]).get_average_correct()
            if correct > max_correct:
                best_C = C
                max_correct = correct
        config.learning_C = best_C
        print "FeatureLearning: using C = %s, correct = %s" % (
            config.learning_C, max_correct)
    else:
        if isinstance(learning_C, list):
            config.learning_C = learning_C[0]
        else:
            config.learning_C = learning_C

    learning_wrap = LearningWrapper(features)

    if pool != None:
        results = pool.map(
            learning_wrap,
            itertools.product(partitions.evaluation, [config.learning_C]))
    else:
        results = map(
            learning_wrap,
            itertools.product(partitions.evaluation, [config.learning_C]))
    learning = Learning(config, [result for (C, result) in results])

    if args['outfile'] == None:
        learning_filename = FeatureLearning.get_learning_filename(config)
    else:
        learning_filename = args['outfile']

    correct = learning.get_average_correct()
    print "%s correct %2.2f%% error %2.2f%% classes %s" % (
        "FeatureLearning:", correct * 100.0, (1.0 - correct) * 100.0,
        len(set([s.max_label() for s in features.segment_info])))
    print "Writing %s" % (learning_filename, )
    learning.config.status = "FeatureLearning"
    save_data(learning_filename, learning.toJSONDict())
예제 #13
0
if __name__ == "__main__" :
    parser = argparse.ArgumentParser("Tool to generate train / test splits for testing and cross validation")
    parser.add_argument("--segments", "-i")
    parser.add_argument("--outfile", "-o")
    parser.add_argument("--learning-split", "-s", type=float)
    parser.add_argument("--learning-iterations", "-I", type=int)
    parser.add_argument("--cv-iterations", "-v", default=5, type=int)
    parser.add_argument("--seed", "-S")
    args = parser.parse_args(sys.argv[1:])

    segments_json = load_data(args.segments, 'segments', None, None, sys.argv[0] + " : ")
    if segments_json == None :
        print "Could not load Segments from %s" % (args.segments,)
        sys.exit(1)
    segment_info = [SegmentInfo.fromJSONDict(s) for s in segments_json['segments']]
    config = Configuration.fromJSONDict(segments_json['config'])
    if args.learning_split != None :
        config.learning_split = args.learning_split
    if args.learning_iterations != None : 
        config.learning_iterations = args.learning_iterations
    
    output = generate_partitions(config, segment_info, cv_iterations=args.cv_iterations, seed=args.seed)
    
    if args.outfile == None :
        args.outfile = TrainTestPartitions.get_partition_filename(config)

    print "Writing %s" % (args.outfile,)
    save_data(args.outfile, output.toJSONDict())
    
예제 #14
0
    kernel_matrix = [[numpy.average([k.kernel_matrix[i][j] for k in kernels]) for i in range(kernel_dimension)] for j in range(kernel_dimension)]
    return Kernel(config, kernel_matrix, segment_info)

if __name__ == "__main__" :
    parser = argparse.ArgumentParser(description="Tool to take multiple Kernels and average them")
    parser.add_argument("--infile", "-i", nargs="+")
    parser.add_argument("--outfile", "-o")
    parser.add_argument("--ratio", "-r", type=float, default=0.5)
    parser.add_argument("--pool", "-p")
    
    args = parser.parse_args(sys.argv[1:])
    kernels_json = [load_data(infile, 'kernel', None, None, sys.argv[0]+": ") for infile in args.infile]
    kernels = [Kernel.fromJSONDict(kernel_json) for kernel_json in kernels_json]

    if len(kernels) == 2 :
        weights = [args.ratio, 1.0 - args.ratio]
    else :
        weights = None
    average_kernel = AverageKernel(kernels, weights)

    if args.outfile == None :
        args.outfile = get_filename(average_kernel.config, 
                                    ['max_simplices', 'persistence_epsilon', 
                                     'segment_filename', 'segment_stride', 'segment_size', 
                                     'window_size', 'window_stride', 
                                     'kernel_scale', 'kernel_gamma', 'invariant_epsilon', 
                                     'data_file', 'data_index', 'label_index', 'persistence_degree', 
                                     'data_type', 'post_process', 'post_process_arg'], "AverageKernel")
    print "Writing %s" % (args.outfile,)
    save_data(args.outfile, average_kernel.toJSONDict())
예제 #15
0
def main(argv):
    parser = argparse.ArgumentParser(
        description=
        'Tool to evaluate the Bottleneck Distances between two Arrays of Persistence Diagrams'
    )
    parser.add_argument(
        '-a',
        '--infile-a',
        help=
        "JSON Persistence Diagram file of the first set of Persistence Diagrams"
    )
    parser.add_argument(
        '-b',
        '--infile-b',
        help=
        "JSON Persistence Diagram file of the second set of Persistence Diagrams"
    )
    parser.add_argument('-o', '--outfile', help="JSON Output File")
    parser.add_argument('-d',
                        '--degree',
                        type=int,
                        default=1,
                        help="Persistence Degree to consider")
    parser.add_argument('-p',
                        '--pool',
                        default=multiprocessing.cpu_count(),
                        help='Threads of computation to use')
    args = vars(parser.parse_args(argv[1:]))
    if (not os.path.exists(args['infile_a']) or \
        ((not (args['infile_a'] == args['infile_b'] or args['infile_b'] == None) and not os.path.exists(args['infile_b'])))) :
        parser.print_help()
        exit()
    pfa_json = load_data(args['infile_a'], 'persistence_diagrams', None, None,
                         "BottleneckDistances: ")
    if pfa_json == None:
        print "Could not load --infile-a : %s" % (args['infile_a'], )
        exit()
    persistences_a = PersistenceDiagrams.fromJSONDict(pfa_json)
    if args['infile_a'] == args['infile_b'] or args['infile_b'] == None:
        persistences_b = persistences_a
    else:
        pfb_json = load_data(args['infile_b'], 'persistence_diagrams', None,
                             None, "BottleneckDistances: ")
        if pfb_json == None:
            print "Could not load --infile-b : %s" % (args['infile_b'], )
            exit()
        persistences_b = PersistenceDiagrams.fromJSONDict(pfb_json)
    if (int(args['pool']) > 1):
        pool = multiprocessing.Pool(int(args['pool']))
    else:
        pool = None
    start = time.time()
    distances = BottleneckDistances(persistences_a.config,
                                    persistences_a,
                                    persistences_b,
                                    pool=pool,
                                    degree=args['degree'])
    dist = distances.compute_distances()
    end = time.time()
    print "Time elapsed %f" % (end - start)
    if not ('outfile' in args) or args['outfile'] == None:
        outfile = BottleneckDistances.get_distances_filename(distances.config)
    else:
        outfile = args['outfile']
    print "Writing %s" % (outfile, )
    distances.config.status = "BottleneckDistance"
    save_data(outfile, distances.toJSONDict())
예제 #16
0
def main(argv):
    parser = argparse.ArgumentParser(
        description=
        'Tool to generate the scale space similarity between all pairs of persistence diagrams'
    )
    parser.add_argument('-i',
                        '--infile',
                        help='Input JSON Persistence Diagram File')
    parser.add_argument('-o', '--outfile', help='Output JSON Learning File')
    parser.add_argument('-k',
                        '--kernel-scale',
                        type=float,
                        help='Kernel Scale to use for Scale Space Similarity')
    parser.add_argument(
        '-d',
        '--persistence-degree',
        type=float,
        help=
        'Persistence degree to consider when computing Scale Space Similarity')
    parser.add_argument(
        '--kernel-file',
        help='translate from PersistenceKernel instead of redoing calculation')
    parser.add_argument('-p',
                        '--pool',
                        default=multiprocessing.cpu_count(),
                        help='Threads of computation to use')
    args = parser.parse_args(argv[1:])

    if args.kernel_file != None:
        from Datatypes.Kernel import Kernel
        kernel_json = load_data(args.kernel_file, 'kernel', None, None,
                                "Scale Space Similarity: ")
        if kernel_json == None:
            print "Could not load --kernel-file : %s" % (args.kernel_file, )
            exit()
        kernel = Kernel.fromJSONDict(kernel_json)

        sss = Distances(kernel.config, [[Distance(mean=k) for k in row]
                                        for row in kernel.kernel_matrix],
                        kernel.segment_info)

        if (args.outfile == None):
            args.outfile = ScaleSpaceSimilarity.get_distances_filename(
                sss.config)

        print "Writing %s" % (args.outfile, )
        sss.config.status = "ScaleSpaceSimilarity"
        save_data(args.outfile, sss.toJSONDict())

    else:
        persistences_json = load_data(args.infile, 'persistence_diagrams',
                                      None, None, "Scale Space Similarity: ")
        if persistences_json == None:
            print "Could not load --infile : %s" % (args.infile, )
            exit()
        persistences = PD.fromJSONDict(persistences_json)
        if args.kernel_scale == None:
            args.kernel_scale = float(persistences.config.kernel_scale)
        else:
            persistences.config.kernel_scale = args.kernel_scale

        if args.pool != 1:
            pool = multiprocessing.Pool(int(args.pool))
        else:
            pool = None

        sss = ScaleSpaceSimilarity(persistences.config,
                                   persistences,
                                   persistences.config.kernel_scale,
                                   persistences.config.persistence_degree,
                                   pool=pool)

        sss.compute_distances()

        if (args.outfile == None):
            args.outfile = ScaleSpaceSimilarity.get_distances_filename(
                sss.config)

        print "Writing %s" % (args.outfile, )
        sss.config.status = "ScaleSpaceSimilarity"
        save_data(args.outfile, sss.toJSONDict())
예제 #17
0
    parser.add_argument("--learning-split", "-s", type=float)
    parser.add_argument("--learning-iterations", "-I", type=int)
    parser.add_argument("--cv-iterations", "-v", default=5, type=int)
    parser.add_argument("--seed", "-S")
    args = parser.parse_args(sys.argv[1:])

    segments_json = load_data(args.segments, 'segments', None, None,
                              sys.argv[0] + " : ")
    if segments_json == None:
        print "Could not load Segments from %s" % (args.segments, )
        sys.exit(1)
    segment_info = [
        SegmentInfo.fromJSONDict(s) for s in segments_json['segments']
    ]
    config = Configuration.fromJSONDict(segments_json['config'])
    if args.learning_split != None:
        config.learning_split = args.learning_split
    if args.learning_iterations != None:
        config.learning_iterations = args.learning_iterations

    output = generate_partitions(config,
                                 segment_info,
                                 cv_iterations=args.cv_iterations,
                                 seed=args.seed)

    if args.outfile == None:
        args.outfile = TrainTestPartitions.get_partition_filename(config)

    print "Writing %s" % (args.outfile, )
    save_data(args.outfile, output.toJSONDict())
예제 #18
0
def main(argv):
    parser = argparse.ArgumentParser(description='Tool to perform learning on pregenerated features')
    parser.add_argument('-i', '--infile', help='Input JSON Similarity Feature file')
    parser.add_argument('-o', '--outfile', help='Output JSON Learning file')
    parser.add_argument('-p', '--pool', default=multiprocessing.cpu_count(), help='Threads of computation to use')
    parser.add_argument('-c', '--learning-C', help='C value for SVM. Specify a range for 1-dimensional cross-validation')
    parser.add_argument('--timeout', type=int, default=3600)
    parser.add_argument('-t', '--train-test-partitions', help='Precomputed train / test partitions')
    args = vars(parser.parse_args(argv[1:]))
    
    ff_json = load_data(args['infile'], 'features', None, None, "FeatureLearning: ")
    if ff_json == None :
        print "Could not load Features from %s" % (args['infile'],)
        sys.exit(1)
    features = Features.fromJSONDict(ff_json)
    config = features.config
    segment_info = features.segment_info
    if (int(args['pool']) > 1) :
      pool = multiprocessing.Pool(int(args['pool']))
    else :
      pool = None
    
    if (args['learning_C'] != None) :
        learning_C = parse_range(args['learning_C'], t=float)
    else :
        learning_C = config.learning_C
    if not isinstance(learning_C,list) :
        learning_C = [learning_C]
    else :
        learning_C = learning_C
    if (args['train_test_partitions'] != None) :
        partitions_json = load_data(args['train_test_partitions'], 'partitions', None, None, "FeatureLearning: ")
        if partitions_json == None :
            print "Could not load Train / Test Partitions from %s" % (args['train_test_partitions'],)
            sys.exit(1)
        partitions = TrainTestPartitions.fromJSONDict(partitions_json)
    else :
        partitions = generate_partitions(config, segment_info, 
                                         cv_iterations=5 if (len(learning_C) > 1) else 0)

    if len(learning_C) > 1 and len(partitions.cross_validation) > 0 :
        num_cv = len(partitions.cross_validation)
        learning_wrap = LearningWrapper( features )
        if pool != None :
            results = pool.imap(learning_wrap, itertools.product(partitions.cross_validation, learning_C))
            final_results = []
            try:
                while True:
                    result = results.next(args['timeout']) # timeout in case shogun died on us
                    final_results.append(result)
            except StopIteration:
                pass
            except multiprocessing.TimeoutError as e:
                self.pool.terminate()
                print traceback.print_exc()
                sys.exit(1)
            results = final_results
        else :
            results = map(learning_wrap, itertools.product(partitions.cross_validation, learning_C))
        max_correct = 0.0
        best_C = learning_C[0]
        print len(results)
        for C in learning_C :
            correct = Learning(config, [_result for (_C, _result) in results if C == _C]).get_average_correct()
            if correct > max_correct :
                best_C = C
                max_correct = correct
        config.learning_C = best_C
        print "FeatureLearning: using C = %s, correct = %s" % (config.learning_C, max_correct)
    else :
        if isinstance(learning_C, list) :
            config.learning_C = learning_C[0]
        else :
            config.learning_C = learning_C

    learning_wrap = LearningWrapper( features )

    if pool != None :
        results = pool.map(learning_wrap, itertools.product(partitions.evaluation, [config.learning_C]))
    else :
        results = map(learning_wrap, itertools.product(partitions.evaluation, [config.learning_C]))
    learning = Learning(config, [result for (C,result) in results])

    if args['outfile'] == None :
        learning_filename = FeatureLearning.get_learning_filename(config)
    else :
        learning_filename = args['outfile']

    correct = learning.get_average_correct()
    print "%s correct %2.2f%% error %2.2f%% classes %s" % ("FeatureLearning:", correct * 100.0, (1.0 - correct)*100.0, 
                                                   len(set([s.max_label() for s in features.segment_info])))
    print "Writing %s" % (learning_filename, )
    learning.config.status = "FeatureLearning"
    save_data(learning_filename, learning.toJSONDict())
예제 #19
0
def main(argv):
    parser = argparse.ArgumentParser(
        description=
        'Tool to classify data based on 1-NN matching based on the supplied distance matrix'
    )
    parser.add_argument('-i', '--infile', help='Input JSON Distance File')
    parser.add_argument('-o', '--outfile', help='Output JSON Learning File')
    parser.add_argument(
        '-m',
        '--max-mode',
        action='store_true',
        help=
        'Use maximum "Distance" instead of minimum (for Similarity measures"')
    parser.add_argument('-t',
                        '--train-test-partitions',
                        help='Precomputed train / test partitions')
    parser.add_argument('-p', '--pool', default=1, type=int)
    args = parser.parse_args(argv[1:])
    distances_json = load_data(args.infile, 'distances', None, None,
                               "DistanceLearning: ")

    if distances_json == None:
        print "Could not load --infile : %s" % (args.infile, )
        sys.exit(1)

    distances = Distances.fromJSONDict(distances_json)

    if (args.train_test_partitions != None):
        partitions_json = load_data(args.train_test_partitions, 'partitions',
                                    None, None, "KernelLearning: ")
        if partitions_json == None:
            print "Could not load Train / Test Partitions from %s" % (
                args.train_test_partitions, )
            sys.exit(1)
        partitions = TrainTestPartitions.fromJSONDict(partitions_json)
    else:
        partitions = generate_partitions(distances.config,
                                         distances.segment_info,
                                         cv_iterations=0)

    if (args.pool > 1):
        pool = multiprocessing.Pool(args.pool)
    else:
        pool = None

    learning_wrap = LearningWrapper(distances, args.max_mode)

    if pool != None:
        results = pool.map(learning_wrap, partitions.evaluation)
    else:
        results = map(learning_wrap, partitions.evaluation)

    learning = Learning(distances.config, list(results))

    if (args.outfile == None):
        args.outfile = DistanceLearning.get_learning_filename(learning.config)

    print "Writing %s, %2.2f%% correct" % (
        args.outfile, learning.get_average_correct() * 100.0)
    learning.config.status = "DistanceLearning"
    save_data(args.outfile, learning.toJSONDict())