def main(argv) : parser = argparse.ArgumentParser(description='Tool to classify data based on 1-NN of segment data') parser.add_argument('-i', '--infile', help='Input JSON Segment File') parser.add_argument('-o', '--outfile', help='Output JSON Learning File') parser.add_argument('-p', '--pool', default=multiprocessing.cpu_count(), help='Threads of computation to use') args = parser.parse_args(argv[1:]) segments_json = load_data(args.infile, 'segments', None, None, "Euclidean Segment Distances: ") if segments_json == None : print "Could not load --infile : %s" % (args.infile,) exit() segments = Segments.fromJSONDict(segments_json) if int(args.pool) != 1 : pool = multiprocessing.Pool(int(args.pool)) else : pool = None esd = EuclideanDistances(segments.config, segments, pool=pool) esd.compute_distances() if (args.outfile == None) : args.outfile = EuclideanDistances.get_distances_filename(esd.config) print "Writing %s" % (args.outfile,) esd.config.status = "EuclideanDistances" save_data(args.outfile, esd.toJSONDict())
def main(argv) : parser = argparse.ArgumentParser(description='Tool to classify data based on chaotic invariants of segment data') parser.add_argument('-i', '--infile', help='Input JSON Segment File') parser.add_argument('-o', '--outfile', help='Output JSON Learning File') parser.add_argument('-e', '--epsilon', type=float, default=1.0, help='epsilon value used for generation of chaotic invariants') parser.add_argument('-p', '--pool', default=max(1,multiprocessing.cpu_count()-2), help='Threads of computation to use') args = parser.parse_args(argv[1:]) segments_json = load_data(args.infile, 'segments', None, None, "Chaotic Invariant Features: ") if segments_json == None : print "Could not load --infile : %s" % (args.infile,) exit() segments = Segments.fromJSONDict(segments_json) if segments.config.segment_size != segments.config.window_size : print "%s ERROR ill formed input, segment_size != window_size in %s" % (argv[0], args.infile) sys.exit(1) if args.epsilon != None : segments.config.invariant_epsilon = args.epsilon if int(args.pool) != 1 : pool = multiprocessing.Pool(int(args.pool)) else : pool = None cid = ChaoticInvariantFeatures(segments.config, segments, epsilon=segments.config.invariant_epsilon, pool=pool) cid.compute_features() if (args.outfile == None) : args.outfile = ChaoticInvariantFeatures.get_features_filename(cid.config) print "Writing %s" % (args.outfile,) cid.config.status = "ChaoticInvariantFeatures" save_data(args.outfile, cid.toJSONDict())
def main(argv): parser = argparse.ArgumentParser(description='Post Processing tool for Segment Data') parser.add_argument('-i', '--infile') parser.add_argument('-o', '--outfile') parser.add_argument('-p', '--pool', type=int, default=min(1,multiprocessing.cpu_count()-2)) args = vars(parser.parse_args(argv[1:])) segments_json = load_data(args['infile'], 'segments', None, None, "ChaosPost: ") if segments_json == None : print "Could not load --infile : %s" % (args['infile'],) exit() print "input read" segments = Segments.fromJSONDict(segments_json) segments.config.post_process="ChaosPost" dimensions = len(segments.segments[0].windows[0]) / segments.config.window_size if args['pool'] == 1 : pool = None else : pool = multiprocessing.Pool(args['pool']) print "%d processes started" % args['pool'] for index in range(dimensions) : config = copy(segments.config) config.data_index = segments.segments[0].data_index[index] post_processed = ChaosPost(config, segments.segments, dimensions=dimensions, index=index, pool=pool) if args['outfile'] == None: outfile = ChaosPost.get_segment_filename(config) else : outfile = args['outfile'] print "Writing %s" % outfile post_processed.config.status = "ChaosPost" save_data(outfile, post_processed.toJSONDict())
def main(argv) : parser = argparse.ArgumentParser(description='Tool to generate a radial basis kernel from segmented data') parser.add_argument('-i', '--infile', help='Input JSON Segment file') parser.add_argument('-o', '--outfile', help='Output JSON RBF Kernel Matrix file') parser.add_argument('-g', '--kernel-gamma') parser.add_argument('-p', '--pool', default=multiprocessing.cpu_count(), help='Threads of computation to use') args = vars(parser.parse_args(argv[1:])) sf_json = load_data(args['infile'], 'segments', None, None, "RBFKernel: ") if sf_json == None : print "Could not load --infile : %s" % (args['infile'],) exit() segments = Segments.fromJSONDict(sf_json) config = segments.config if (int(args['pool']) > 1) : pool = multiprocessing.Pool(int(args['pool'])) else : pool = None if (args['kernel_gamma'] != None) : if args['kernel_gamma'] == 'cv' : config.kernel_gamma = 'cv' else : config.kernel_gamma = float(args['kernel_gamma']) rk = RBFKernel(config, segments, pool=pool) start = time.clock() rk.compute_kernel() stop = time.clock() print "%f seconds" % (stop - start,) if args['outfile'] == None : outfile = RBFKernel.get_kernel_filename(config) else : outfile = args['outfile'] print "RBFKernel: Writing %s" % (outfile,) rk.config.status = "RBFKernel" save_data(outfile, rk.toJSONDict())
def main(argv) : parser = argparse.ArgumentParser(description='Tool to generate the scale space similarity between all pairs of persistence diagrams') parser.add_argument('-i', '--infile', help='Input JSON Persistence Diagram File') parser.add_argument('-o', '--outfile', help='Output JSON Learning File') parser.add_argument('-k', '--kernel-scale', type=float, help='Kernel Scale to use for Scale Space Similarity') parser.add_argument('-d', '--persistence-degree', type=float, help='Persistence degree to consider when computing Scale Space Similarity') parser.add_argument('--kernel-file', help='translate from PersistenceKernel instead of redoing calculation') parser.add_argument('-p', '--pool', default=multiprocessing.cpu_count(), help='Threads of computation to use') args = parser.parse_args(argv[1:]) if args.kernel_file != None : from Datatypes.Kernel import Kernel kernel_json = load_data(args.kernel_file, 'kernel', None, None, "Scale Space Similarity: ") if kernel_json == None : print "Could not load --kernel-file : %s" % (args.kernel_file, ) exit() kernel = Kernel.fromJSONDict(kernel_json) sss = Distances(kernel.config, [[Distance(mean=k) for k in row] for row in kernel.kernel_matrix], kernel.segment_info) if (args.outfile == None) : args.outfile = ScaleSpaceSimilarity.get_distances_filename(sss.config) print "Writing %s" % (args.outfile, ) sss.config.status = "ScaleSpaceSimilarity" save_data(args.outfile, sss.toJSONDict()) else : persistences_json = load_data(args.infile, 'persistence_diagrams', None, None, "Scale Space Similarity: ") if persistences_json == None : print "Could not load --infile : %s" % (args.infile, ) exit() persistences = PD.fromJSONDict(persistences_json) if args.kernel_scale == None : args.kernel_scale = float(persistences.config.kernel_scale) else : persistences.config.kernel_scale = args.kernel_scale if args.pool != 1 : pool = multiprocessing.Pool(int(args.pool)) else : pool = None sss = ScaleSpaceSimilarity(persistences.config, persistences, persistences.config.kernel_scale, persistences.config.persistence_degree, pool=pool) sss.compute_distances() if (args.outfile == None) : args.outfile = ScaleSpaceSimilarity.get_distances_filename(sss.config) print "Writing %s" % (args.outfile, ) sss.config.status = "ScaleSpaceSimilarity" save_data(args.outfile, sss.toJSONDict())
def main(argv) : parser = argparse.ArgumentParser(description="Tool to generate Persistence Diagrams from segmented data") parser.add_argument('-i', '--infile', help='Input JSON Segment Data file') parser.add_argument('-o', '--outfile', help='Output JSON Persistence Diagram file') parser.add_argument('-m', '--max-simplices') parser.add_argument('-e', '--epsilon') parser.add_argument('-p', '--pool', default=multiprocessing.cpu_count(), help='Threads of computation to use') args = vars(parser.parse_args(argv[1:])) if (not os.path.exists(args['infile'])) : print "Could not find %s for reading" % (args['infile'],) return if (int(args['pool']) > 1) : pool = multiprocessing.Pool(int(args['pool'])) else : pool = None segments_json = load_data(args['infile'], 'segments', None, None, "PersistenceGenerator: ") if segments_json == None : print "Could not load --infile : %s" % (args['infile'],) exit() segments = Segments.fromJSONDict(segments_json) config = segments.config if (args['max_simplices'] != None) : config.max_simplices = int(args['max_simplices']) config.persistence_epsilon = None if (args['epsilon'] != None) : config.persistence_epsilon = float(args['epsilon']) config.max_simplices = None def identity(x) : return x if pool != None : persistences = pool.imap(process, itertools.product(segments.segments, [(config.max_simplices, config.persistence_epsilon)])) else : persistences = itertools.imap(process, itertools.product(segments.segments, [(config.max_simplices, config.persistence_epsilon)])) persistences = PDS(config, list(persistences)) if args['outfile'] == None : outfile = PDS.get_persistence_diagrams_filename(config) else : outfile = args['outfile'] print "PersistenceGenerator: Writing %s" % (outfile,) persistences.config.status = "PersistenceGenerator" save_data(outfile, persistences.toJSONDict())
def main(argv) : parser = argparse.ArgumentParser(description='Tool to evaluate the Landscape Distances between two Arrays of Persistence Diagrams') parser.add_argument('-a', '--infile-a', help="JSON Persistence Diagram file of the first set of Persistence Diagrams") parser.add_argument('-b', '--infile-b', help="JSON Persistence Diagram file of the second set of Persistence Diagrams") parser.add_argument('-o', '--outfile', help="JSON Output File") parser.add_argument('-d', '--degree', type=int, default=1, help="Persistence Degree to consider") parser.add_argument('-p', '--pool', default=multiprocessing.cpu_count(), help='Threads of computation to use') args = vars(parser.parse_args(argv[1:])) if (not os.path.exists(args['infile_a']) or \ ((not (args['infile_a'] == args['infile_b'] or args['infile_b'] == None) and not os.path.exists(args['infile_b'])))) : parser.print_help() exit() pfa_json = load_data(args['infile_a'], 'persistence_diagrams', None, None, "LandscapeDistances: ") if pfa_json == None : print "Could not load --infile-a : %s" % (args['infile_a'],) exit() persistences_a = PD.fromJSONDict(pfa_json) if args['infile_a'] == args['infile_b'] or args['infile_b'] == None : persistences_b = persistences_a else : pfb_json = load_data(args['infile_b'], 'persistences', None, None, "LandscapeDistances: ") if pfb_json == None : print "Could not load --infile-b : %s" % (args['infile_b'],) exit() persistences_b = PD.fromJSONDict(pfb_json) config = persistences_a.config if args['degree'] != None : config.persistence_degree = args['degree'] if (int(args['pool']) > 1) : pool = multiprocessing.Pool(int(args['pool'])) else : pool = None start = time.time() distances = LandscapeDistances(config, persistences_a, persistences_b, pool=pool) dist = distances.compute_distances() end = time.time() print "Time elapsed %f" % (end - start) if not('outfile' in args) or args['outfile'] == None : outfile = LandscapeDistances.get_distances_filename(config) else : outfile = args['outfile'] print "Writing %s" % (outfile,) distances.config.status = "LandscapeDistance" save_data(outfile, distances.toJSONDict())
def main(argv): parser = argparse.ArgumentParser(description='Post Processing tool for Segment Data') parser.add_argument('-i', '--infile') parser.add_argument('-o', '--outfile') args = vars(parser.parse_args(argv[1:])) segments_json = load_data(args['infile'], 'segments', None, None, "NormalizePost: ") if segments_json == None : print "Could not load --infile : %s" % (args['infile'],) exit() segments = Segments.fromJSONDict(segments_json) segments.config.post_process="NormalizePost" post_processed = NormalizePost(segments.config, segments.segments) if args['outfile'] == None: outfile = NormalizePost.get_segment_filename(segments.config) else : outfile = args['outfile'] print "Writing %s" % outfile post_processed.config.status = "NormalizePost" save_data(outfile, post_processed.toJSONDict())
def main(argv) : parser = argparse.ArgumentParser(description='Tool to generate a similarity kernel from persistence data') parser.add_argument('-i', '--infile', help='Input JSON Persistence Diagram file') parser.add_argument('-o', '--outfile', help='Output JSON Kernel Similarity Matrix file') parser.add_argument('-p', '--pool', default=multiprocessing.cpu_count(), help='Threads of computation to use') parser.add_argument('-k', '--kernel-scale') parser.add_argument('-d', '--persistence-degree', help='Filter persistence to entries of this degree') args = vars(parser.parse_args(argv[1:])) persistences_json = load_data(args['infile'], 'persistence_diagrams', None, None, "PersistenceKernel: ") if persistences_json == None : print "Could not load --infile : %s" % (args['infile'],) exit() persistences = PD.fromJSONDict(persistences_json) config = persistences.config if (int(args['pool']) > 1) : pool = multiprocessing.Pool(int(args['pool'])) else : pool = None if (args['kernel_scale'] != None) : config.kernel_scale = float(args['kernel_scale']) if (args['persistence_degree'] != None) : config.persistence_degree = args['persistence_degree'] pk = PersistenceKernel(config, persistences, kernel_fun=ScaleSpaceWrapper(float(config.kernel_scale)), pool=pool) start = time.clock() pk.compute_kernel() stop = time.clock() print "%f seconds" % (stop - start,) print "Is Positive Semidefinite? %s" % (isPSD(numpy.matrix(pk.kernel_matrix),)) if args['outfile'] == None : outfile = PersistenceKernel.get_kernel_filename(config) else : outfile = args['outfile'] print "PersistenceKernel: Writing %s" % (outfile,) pk.config.status = "PersistenceKernel" save_data(outfile, pk.toJSONDict())
def main(argv): parser = argparse.ArgumentParser( description='Tool to generate a radial basis kernel from segmented data' ) parser.add_argument('-i', '--infile', help='Input JSON Segment file') parser.add_argument('-o', '--outfile', help='Output JSON RBF Kernel Matrix file') parser.add_argument('-g', '--kernel-gamma') parser.add_argument('-p', '--pool', default=multiprocessing.cpu_count(), help='Threads of computation to use') args = vars(parser.parse_args(argv[1:])) sf_json = load_data(args['infile'], 'segments', None, None, "RBFKernel: ") if sf_json == None: print "Could not load --infile : %s" % (args['infile'], ) exit() segments = Segments.fromJSONDict(sf_json) config = segments.config if (int(args['pool']) > 1): pool = multiprocessing.Pool(int(args['pool'])) else: pool = None if (args['kernel_gamma'] != None): if args['kernel_gamma'] == 'cv': config.kernel_gamma = 'cv' else: config.kernel_gamma = float(args['kernel_gamma']) rk = RBFKernel(config, segments, pool=pool) start = time.clock() rk.compute_kernel() stop = time.clock() print "%f seconds" % (stop - start, ) if args['outfile'] == None: outfile = RBFKernel.get_kernel_filename(config) else: outfile = args['outfile'] print "RBFKernel: Writing %s" % (outfile, ) rk.config.status = "RBFKernel" save_data(outfile, rk.toJSONDict())
def main(argv): parser = argparse.ArgumentParser( description='Post Processing tool for Segment Data') parser.add_argument('-i', '--infile') parser.add_argument('-o', '--outfile') args = vars(parser.parse_args(argv[1:])) segments_json = load_data(args['infile'], 'segments', None, None, "NormalizePost: ") if segments_json == None: print "Could not load --infile : %s" % (args['infile'], ) exit() segments = Segments.fromJSONDict(segments_json) segments.config.post_process = "NormalizePost" post_processed = NormalizePost(segments.config, segments.segments) if args['outfile'] == None: outfile = NormalizePost.get_segment_filename(segments.config) else: outfile = args['outfile'] print "Writing %s" % outfile post_processed.config.status = "NormalizePost" save_data(outfile, post_processed.toJSONDict())
def main(argv): parser = argparse.ArgumentParser( description='Tool to perform learning on pregenerated features') parser.add_argument('-i', '--infile', help='Input JSON Similarity Feature file') parser.add_argument('-o', '--outfile', help='Output JSON Learning file') parser.add_argument('-p', '--pool', default=multiprocessing.cpu_count(), help='Threads of computation to use') parser.add_argument( '-c', '--learning-C', help= 'C value for SVM. Specify a range for 1-dimensional cross-validation') parser.add_argument('--timeout', type=int, default=3600) parser.add_argument('-t', '--train-test-partitions', help='Precomputed train / test partitions') args = vars(parser.parse_args(argv[1:])) ff_json = load_data(args['infile'], 'features', None, None, "FeatureLearning: ") if ff_json == None: print "Could not load Features from %s" % (args['infile'], ) sys.exit(1) features = Features.fromJSONDict(ff_json) config = features.config segment_info = features.segment_info if (int(args['pool']) > 1): pool = multiprocessing.Pool(int(args['pool'])) else: pool = None if (args['learning_C'] != None): learning_C = parse_range(args['learning_C'], t=float) else: learning_C = config.learning_C if not isinstance(learning_C, list): learning_C = [learning_C] else: learning_C = learning_C if (args['train_test_partitions'] != None): partitions_json = load_data(args['train_test_partitions'], 'partitions', None, None, "FeatureLearning: ") if partitions_json == None: print "Could not load Train / Test Partitions from %s" % ( args['train_test_partitions'], ) sys.exit(1) partitions = TrainTestPartitions.fromJSONDict(partitions_json) else: partitions = generate_partitions(config, segment_info, cv_iterations=5 if (len(learning_C) > 1) else 0) if len(learning_C) > 1 and len(partitions.cross_validation) > 0: num_cv = len(partitions.cross_validation) learning_wrap = LearningWrapper(features) if pool != None: results = pool.imap( learning_wrap, itertools.product(partitions.cross_validation, learning_C)) final_results = [] try: while True: result = results.next( args['timeout']) # timeout in case shogun died on us final_results.append(result) except StopIteration: pass except multiprocessing.TimeoutError as e: self.pool.terminate() print traceback.print_exc() sys.exit(1) results = final_results else: results = map( learning_wrap, itertools.product(partitions.cross_validation, learning_C)) max_correct = 0.0 best_C = learning_C[0] print len(results) for C in learning_C: correct = Learning( config, [_result for (_C, _result) in results if C == _C ]).get_average_correct() if correct > max_correct: best_C = C max_correct = correct config.learning_C = best_C print "FeatureLearning: using C = %s, correct = %s" % ( config.learning_C, max_correct) else: if isinstance(learning_C, list): config.learning_C = learning_C[0] else: config.learning_C = learning_C learning_wrap = LearningWrapper(features) if pool != None: results = pool.map( learning_wrap, itertools.product(partitions.evaluation, [config.learning_C])) else: results = map( learning_wrap, itertools.product(partitions.evaluation, [config.learning_C])) learning = Learning(config, [result for (C, result) in results]) if args['outfile'] == None: learning_filename = FeatureLearning.get_learning_filename(config) else: learning_filename = args['outfile'] correct = learning.get_average_correct() print "%s correct %2.2f%% error %2.2f%% classes %s" % ( "FeatureLearning:", correct * 100.0, (1.0 - correct) * 100.0, len(set([s.max_label() for s in features.segment_info]))) print "Writing %s" % (learning_filename, ) learning.config.status = "FeatureLearning" save_data(learning_filename, learning.toJSONDict())
if __name__ == "__main__" : parser = argparse.ArgumentParser("Tool to generate train / test splits for testing and cross validation") parser.add_argument("--segments", "-i") parser.add_argument("--outfile", "-o") parser.add_argument("--learning-split", "-s", type=float) parser.add_argument("--learning-iterations", "-I", type=int) parser.add_argument("--cv-iterations", "-v", default=5, type=int) parser.add_argument("--seed", "-S") args = parser.parse_args(sys.argv[1:]) segments_json = load_data(args.segments, 'segments', None, None, sys.argv[0] + " : ") if segments_json == None : print "Could not load Segments from %s" % (args.segments,) sys.exit(1) segment_info = [SegmentInfo.fromJSONDict(s) for s in segments_json['segments']] config = Configuration.fromJSONDict(segments_json['config']) if args.learning_split != None : config.learning_split = args.learning_split if args.learning_iterations != None : config.learning_iterations = args.learning_iterations output = generate_partitions(config, segment_info, cv_iterations=args.cv_iterations, seed=args.seed) if args.outfile == None : args.outfile = TrainTestPartitions.get_partition_filename(config) print "Writing %s" % (args.outfile,) save_data(args.outfile, output.toJSONDict())
kernel_matrix = [[numpy.average([k.kernel_matrix[i][j] for k in kernels]) for i in range(kernel_dimension)] for j in range(kernel_dimension)] return Kernel(config, kernel_matrix, segment_info) if __name__ == "__main__" : parser = argparse.ArgumentParser(description="Tool to take multiple Kernels and average them") parser.add_argument("--infile", "-i", nargs="+") parser.add_argument("--outfile", "-o") parser.add_argument("--ratio", "-r", type=float, default=0.5) parser.add_argument("--pool", "-p") args = parser.parse_args(sys.argv[1:]) kernels_json = [load_data(infile, 'kernel', None, None, sys.argv[0]+": ") for infile in args.infile] kernels = [Kernel.fromJSONDict(kernel_json) for kernel_json in kernels_json] if len(kernels) == 2 : weights = [args.ratio, 1.0 - args.ratio] else : weights = None average_kernel = AverageKernel(kernels, weights) if args.outfile == None : args.outfile = get_filename(average_kernel.config, ['max_simplices', 'persistence_epsilon', 'segment_filename', 'segment_stride', 'segment_size', 'window_size', 'window_stride', 'kernel_scale', 'kernel_gamma', 'invariant_epsilon', 'data_file', 'data_index', 'label_index', 'persistence_degree', 'data_type', 'post_process', 'post_process_arg'], "AverageKernel") print "Writing %s" % (args.outfile,) save_data(args.outfile, average_kernel.toJSONDict())
def main(argv): parser = argparse.ArgumentParser( description= 'Tool to evaluate the Bottleneck Distances between two Arrays of Persistence Diagrams' ) parser.add_argument( '-a', '--infile-a', help= "JSON Persistence Diagram file of the first set of Persistence Diagrams" ) parser.add_argument( '-b', '--infile-b', help= "JSON Persistence Diagram file of the second set of Persistence Diagrams" ) parser.add_argument('-o', '--outfile', help="JSON Output File") parser.add_argument('-d', '--degree', type=int, default=1, help="Persistence Degree to consider") parser.add_argument('-p', '--pool', default=multiprocessing.cpu_count(), help='Threads of computation to use') args = vars(parser.parse_args(argv[1:])) if (not os.path.exists(args['infile_a']) or \ ((not (args['infile_a'] == args['infile_b'] or args['infile_b'] == None) and not os.path.exists(args['infile_b'])))) : parser.print_help() exit() pfa_json = load_data(args['infile_a'], 'persistence_diagrams', None, None, "BottleneckDistances: ") if pfa_json == None: print "Could not load --infile-a : %s" % (args['infile_a'], ) exit() persistences_a = PersistenceDiagrams.fromJSONDict(pfa_json) if args['infile_a'] == args['infile_b'] or args['infile_b'] == None: persistences_b = persistences_a else: pfb_json = load_data(args['infile_b'], 'persistence_diagrams', None, None, "BottleneckDistances: ") if pfb_json == None: print "Could not load --infile-b : %s" % (args['infile_b'], ) exit() persistences_b = PersistenceDiagrams.fromJSONDict(pfb_json) if (int(args['pool']) > 1): pool = multiprocessing.Pool(int(args['pool'])) else: pool = None start = time.time() distances = BottleneckDistances(persistences_a.config, persistences_a, persistences_b, pool=pool, degree=args['degree']) dist = distances.compute_distances() end = time.time() print "Time elapsed %f" % (end - start) if not ('outfile' in args) or args['outfile'] == None: outfile = BottleneckDistances.get_distances_filename(distances.config) else: outfile = args['outfile'] print "Writing %s" % (outfile, ) distances.config.status = "BottleneckDistance" save_data(outfile, distances.toJSONDict())
def main(argv): parser = argparse.ArgumentParser( description= 'Tool to generate the scale space similarity between all pairs of persistence diagrams' ) parser.add_argument('-i', '--infile', help='Input JSON Persistence Diagram File') parser.add_argument('-o', '--outfile', help='Output JSON Learning File') parser.add_argument('-k', '--kernel-scale', type=float, help='Kernel Scale to use for Scale Space Similarity') parser.add_argument( '-d', '--persistence-degree', type=float, help= 'Persistence degree to consider when computing Scale Space Similarity') parser.add_argument( '--kernel-file', help='translate from PersistenceKernel instead of redoing calculation') parser.add_argument('-p', '--pool', default=multiprocessing.cpu_count(), help='Threads of computation to use') args = parser.parse_args(argv[1:]) if args.kernel_file != None: from Datatypes.Kernel import Kernel kernel_json = load_data(args.kernel_file, 'kernel', None, None, "Scale Space Similarity: ") if kernel_json == None: print "Could not load --kernel-file : %s" % (args.kernel_file, ) exit() kernel = Kernel.fromJSONDict(kernel_json) sss = Distances(kernel.config, [[Distance(mean=k) for k in row] for row in kernel.kernel_matrix], kernel.segment_info) if (args.outfile == None): args.outfile = ScaleSpaceSimilarity.get_distances_filename( sss.config) print "Writing %s" % (args.outfile, ) sss.config.status = "ScaleSpaceSimilarity" save_data(args.outfile, sss.toJSONDict()) else: persistences_json = load_data(args.infile, 'persistence_diagrams', None, None, "Scale Space Similarity: ") if persistences_json == None: print "Could not load --infile : %s" % (args.infile, ) exit() persistences = PD.fromJSONDict(persistences_json) if args.kernel_scale == None: args.kernel_scale = float(persistences.config.kernel_scale) else: persistences.config.kernel_scale = args.kernel_scale if args.pool != 1: pool = multiprocessing.Pool(int(args.pool)) else: pool = None sss = ScaleSpaceSimilarity(persistences.config, persistences, persistences.config.kernel_scale, persistences.config.persistence_degree, pool=pool) sss.compute_distances() if (args.outfile == None): args.outfile = ScaleSpaceSimilarity.get_distances_filename( sss.config) print "Writing %s" % (args.outfile, ) sss.config.status = "ScaleSpaceSimilarity" save_data(args.outfile, sss.toJSONDict())
parser.add_argument("--learning-split", "-s", type=float) parser.add_argument("--learning-iterations", "-I", type=int) parser.add_argument("--cv-iterations", "-v", default=5, type=int) parser.add_argument("--seed", "-S") args = parser.parse_args(sys.argv[1:]) segments_json = load_data(args.segments, 'segments', None, None, sys.argv[0] + " : ") if segments_json == None: print "Could not load Segments from %s" % (args.segments, ) sys.exit(1) segment_info = [ SegmentInfo.fromJSONDict(s) for s in segments_json['segments'] ] config = Configuration.fromJSONDict(segments_json['config']) if args.learning_split != None: config.learning_split = args.learning_split if args.learning_iterations != None: config.learning_iterations = args.learning_iterations output = generate_partitions(config, segment_info, cv_iterations=args.cv_iterations, seed=args.seed) if args.outfile == None: args.outfile = TrainTestPartitions.get_partition_filename(config) print "Writing %s" % (args.outfile, ) save_data(args.outfile, output.toJSONDict())
def main(argv): parser = argparse.ArgumentParser(description='Tool to perform learning on pregenerated features') parser.add_argument('-i', '--infile', help='Input JSON Similarity Feature file') parser.add_argument('-o', '--outfile', help='Output JSON Learning file') parser.add_argument('-p', '--pool', default=multiprocessing.cpu_count(), help='Threads of computation to use') parser.add_argument('-c', '--learning-C', help='C value for SVM. Specify a range for 1-dimensional cross-validation') parser.add_argument('--timeout', type=int, default=3600) parser.add_argument('-t', '--train-test-partitions', help='Precomputed train / test partitions') args = vars(parser.parse_args(argv[1:])) ff_json = load_data(args['infile'], 'features', None, None, "FeatureLearning: ") if ff_json == None : print "Could not load Features from %s" % (args['infile'],) sys.exit(1) features = Features.fromJSONDict(ff_json) config = features.config segment_info = features.segment_info if (int(args['pool']) > 1) : pool = multiprocessing.Pool(int(args['pool'])) else : pool = None if (args['learning_C'] != None) : learning_C = parse_range(args['learning_C'], t=float) else : learning_C = config.learning_C if not isinstance(learning_C,list) : learning_C = [learning_C] else : learning_C = learning_C if (args['train_test_partitions'] != None) : partitions_json = load_data(args['train_test_partitions'], 'partitions', None, None, "FeatureLearning: ") if partitions_json == None : print "Could not load Train / Test Partitions from %s" % (args['train_test_partitions'],) sys.exit(1) partitions = TrainTestPartitions.fromJSONDict(partitions_json) else : partitions = generate_partitions(config, segment_info, cv_iterations=5 if (len(learning_C) > 1) else 0) if len(learning_C) > 1 and len(partitions.cross_validation) > 0 : num_cv = len(partitions.cross_validation) learning_wrap = LearningWrapper( features ) if pool != None : results = pool.imap(learning_wrap, itertools.product(partitions.cross_validation, learning_C)) final_results = [] try: while True: result = results.next(args['timeout']) # timeout in case shogun died on us final_results.append(result) except StopIteration: pass except multiprocessing.TimeoutError as e: self.pool.terminate() print traceback.print_exc() sys.exit(1) results = final_results else : results = map(learning_wrap, itertools.product(partitions.cross_validation, learning_C)) max_correct = 0.0 best_C = learning_C[0] print len(results) for C in learning_C : correct = Learning(config, [_result for (_C, _result) in results if C == _C]).get_average_correct() if correct > max_correct : best_C = C max_correct = correct config.learning_C = best_C print "FeatureLearning: using C = %s, correct = %s" % (config.learning_C, max_correct) else : if isinstance(learning_C, list) : config.learning_C = learning_C[0] else : config.learning_C = learning_C learning_wrap = LearningWrapper( features ) if pool != None : results = pool.map(learning_wrap, itertools.product(partitions.evaluation, [config.learning_C])) else : results = map(learning_wrap, itertools.product(partitions.evaluation, [config.learning_C])) learning = Learning(config, [result for (C,result) in results]) if args['outfile'] == None : learning_filename = FeatureLearning.get_learning_filename(config) else : learning_filename = args['outfile'] correct = learning.get_average_correct() print "%s correct %2.2f%% error %2.2f%% classes %s" % ("FeatureLearning:", correct * 100.0, (1.0 - correct)*100.0, len(set([s.max_label() for s in features.segment_info]))) print "Writing %s" % (learning_filename, ) learning.config.status = "FeatureLearning" save_data(learning_filename, learning.toJSONDict())
def main(argv): parser = argparse.ArgumentParser( description= 'Tool to classify data based on 1-NN matching based on the supplied distance matrix' ) parser.add_argument('-i', '--infile', help='Input JSON Distance File') parser.add_argument('-o', '--outfile', help='Output JSON Learning File') parser.add_argument( '-m', '--max-mode', action='store_true', help= 'Use maximum "Distance" instead of minimum (for Similarity measures"') parser.add_argument('-t', '--train-test-partitions', help='Precomputed train / test partitions') parser.add_argument('-p', '--pool', default=1, type=int) args = parser.parse_args(argv[1:]) distances_json = load_data(args.infile, 'distances', None, None, "DistanceLearning: ") if distances_json == None: print "Could not load --infile : %s" % (args.infile, ) sys.exit(1) distances = Distances.fromJSONDict(distances_json) if (args.train_test_partitions != None): partitions_json = load_data(args.train_test_partitions, 'partitions', None, None, "KernelLearning: ") if partitions_json == None: print "Could not load Train / Test Partitions from %s" % ( args.train_test_partitions, ) sys.exit(1) partitions = TrainTestPartitions.fromJSONDict(partitions_json) else: partitions = generate_partitions(distances.config, distances.segment_info, cv_iterations=0) if (args.pool > 1): pool = multiprocessing.Pool(args.pool) else: pool = None learning_wrap = LearningWrapper(distances, args.max_mode) if pool != None: results = pool.map(learning_wrap, partitions.evaluation) else: results = map(learning_wrap, partitions.evaluation) learning = Learning(distances.config, list(results)) if (args.outfile == None): args.outfile = DistanceLearning.get_learning_filename(learning.config) print "Writing %s, %2.2f%% correct" % ( args.outfile, learning.get_average_correct() * 100.0) learning.config.status = "DistanceLearning" save_data(args.outfile, learning.toJSONDict())