def main(argv): current_dir = os.getcwd() parser = argparse.ArgumentParser( description="utility to plot distances as a 2-d plot") parser.add_argument("-b", "--bottleneck-distances") parser.add_argument("-w", "--wasserstein-distances") args = parser.parse_args(argv[1:]) print args.bottleneck_distances print args.wasserstein_distances distances = [ Distances.fromJSONDict( load_data(args.bottleneck_distances, "distances", None, None, argv[0] + ": ")), Distances.fromJSONDict( load_data(args.wasserstein_distances, "distances", None, None, argv[0] + ": ")) ] segment_info = distances[0].segment_info processes = [] try: display_thread = \ multiprocessing.Process(target=display, args=(distances,)) display_thread.start() processes.append(display_thread) display_thread.join() except KeyboardInterrupt: print "Caught cntl-c, shutting down..." exit(0)
def main(argv) : parser = argparse.ArgumentParser(description="General purpose cross validation tool") parser.add_argument("--kernel-module", "-K") parser.add_argument("--kernel-arg", "-k") parser.add_argument("--distances-module", "-D") parser.add_argument("--distances-arg", "-d") parser.add_argument("--learning-module", "-L") parser.add_argument("--learning-arg", "-l") parser.add_argument("--infile", "-i") parser.add_argument("--outfile", "-o") parser.add_argument("--train-test-partitions", "-t") parser.add_argument("--pool", "-p", type=int, default=max(1,multiprocessing.cpu_count()-2)) parser.add_argument("--timeout", type=int, default=0) args = parser.parse_args(argv[1:]) input_json = load_data(args.infile, "input", None, None, argv[0] + ":") partitions_json = load_data(args.train_test_partitions, "input", None, None, argv[0] + ":") partitions = TrainTestPartitions.fromJSONDict(partitions_json) if args.pool > 1 : pool = multiprocessing.Pool(args.pool) else : pool = None if args.kernel_arg != None : kernel_arg = parse_range(args.kernel_arg, t=float) else : kernel_arg = None if args.distances_arg != None : distances_arg = parse_range(args.distances_arg, t=float) else : distances_arg = None if args.learning_arg != None : learning_arg = parse_range(args.learning_arg, t=float) else : learning_arg = None print "Kernel %s distance %s learning %s" % (kernel_arg, distances_arg, learning_arg) cv = CrossValidation(input_json, config=Configuration.fromJSONDict(input_json['config']), kernel_module=args.kernel_module, kernel_arg=kernel_arg, distances_module=args.distances_module, distances_arg=distances_arg, learning_module=args.learning_module, learning_arg=learning_arg, partitions=partitions, pool=pool, timeout=args.timeout) cv.cross_validate() if args.outfile == None : args.outfile = CrossValidation.get_cross_validation_filename(cv.config) print "Writing %s" % args.outfile save_data(args.outfile, cv.toJSONDict())
def __init__(self, argv): wx.Frame.__init__(self,None,-1, 'Segment Size',size=(550,350)) parser = argparse.ArgumentParser(description="utility to plot a persistence diagram") parser.add_argument('file') self.args = vars(parser.parse_args(argv[1:])) self.file = self.args['file'] pf_json = load_data(self.file, 'persistence', None, None, None) if pf_json == None : print "Could not load persistence file : %s" % (self.args['file'],) exit() self.persistences = Persistences.fromJSONDict(pf_json) self.SetBackgroundColour(wx.NamedColour("WHITE")) self.figure = Figure() self.axes = self.figure.add_subplot(111) self.canvas = FigureCanvas(self, -1, self.figure) self.sizer = wx.BoxSizer(wx.VERTICAL) self.sizer.Add(NavigationToolbar2Wx(self.canvas), 1, wx.LEFT | wx.TOP | wx.GROW) self.sizer.Add(self.canvas, 8, wx.LEFT | wx.TOP | wx.GROW) self.SetSizer(self.sizer) self.Fit() self.background = self.axes.figure.canvas.copy_from_bbox(self.axes.bbox) self.colors = ['black', 'red', 'yellow', 'orange', 'blue', 'green', 'violet'] self.Bind(wx.EVT_PAINT, self.OnPaint) self.Bind(wx.EVT_KEY_UP, self.KeyEvent) self.index = 0 self.point_Refresh()
def main(argv) : parser = argparse.ArgumentParser() parser.add_argument('-i', '--input', help="Precomputed analysis") args = parser.parse_args(argv[1:]) data_json = load_data(args.input, None, None, None, argv[0]+ ": ") if data_json == None : print "Could not load --input : %s" % (args.input,) exit() params = [ [dict([('segment_start', d['segment_start']), ('segment_size', d['segment_size']), ('max_simplices', d['max_simplices']), ('runtime', d['runtime'])]) for d in sample_data[0]] for sample_data in data_json ] diagrams = [ [PersistenceDiagram.fromJSONDict(d['diagram']) for d in sample_data[0]] for sample_data in data_json ] bottleneck_distances = [ [[d['mean'] for d in row] for row in sample_data[1]] \ for sample_data in data_json ] wasserstein_distances = [ [[d['mean'] for d in row] for row in sample_data[2]] \ for sample_data in data_json ] try: app = App(0, params, diagrams, bottleneck_distances, wasserstein_distances) app.MainLoop() except KeyboardInterrupt: sys.exit(0)
def main(argv) : parser = argparse.ArgumentParser(description="utility to plot data and persistence diagrams. Also plots the 5 \ nearest neighbors to the selected segment") parser.add_argument('-p', '--persistences', help="Precomputed persistence diagram") parser.add_argument('-k', '--kNN', default=3, help="number of nearest neighbors to plot") args = parser.parse_args(argv[1:]) persistences_json = load_data(args.persistences, 'persistences', None, None, argv[0]) if persistences_json == None : print "Could not load --persistences : %s" % (args.persistences,) exit() persistences = PD.fromJSONDict(persistences_json) full_config = copy(persistences.config) full_config.window_size = -1 segments_module = importlib.import_module( 'persistence.' + persistences.config.data_type) segments_class = getattr(segments_module, persistences.config.data_type) full_data = segments_class(full_config) window_config = copy(persistences.config) windowed_data = segments_class(window_config) try: app = App(0, full_data, windowed_data, persistences, int(args.kNN)) app.MainLoop() except KeyboardInterrupt: sys.exit(0)
def main(argv): parser = argparse.ArgumentParser( description= "utility to plot data and persistence diagrams. Also plots the 5 \ nearest neighbors to the selected segment") parser.add_argument('-p', '--persistences', help="Precomputed persistence diagram") parser.add_argument('-k', '--kNN', default=3, help="number of nearest neighbors to plot") args = parser.parse_args(argv[1:]) persistences_json = load_data(args.persistences, 'persistences', None, None, argv[0]) if persistences_json == None: print "Could not load --persistences : %s" % (args.persistences, ) exit() persistences = PD.fromJSONDict(persistences_json) full_config = copy(persistences.config) full_config.window_size = -1 segments_module = importlib.import_module('persistence.' + persistences.config.data_type) segments_class = getattr(segments_module, persistences.config.data_type) full_data = segments_class(full_config) window_config = copy(persistences.config) windowed_data = segments_class(window_config) try: app = App(0, full_data, windowed_data, persistences, int(args.kNN)) app.MainLoop() except KeyboardInterrupt: sys.exit(0)
def main(argv) : parser = argparse.ArgumentParser(description="utility to plot \ the density of all persistence diagrams in a file.") parser.add_argument("-i", "--infile") parser.add_argument("-d", "--degree", type=int) parser.add_argument("-l", "--label", help="Show only persistence diagrams of a particular label") args = parser.parse_args(argv[1:]) if args.infile == None or not os.path.isfile(args.infile) : print "%s : --infile (%s) must specify file that exists" % \ (argv[0], args.infile) sys.exit(0) persistence_json = load_data(args.infile, "persistence_diagrams", None, None, argv[0] + " : ") if persistence_json == None : print "Could not load --infile : %s" % (args.persistence_a,) exit() persistence = PD.fromJSONDict(persistence_json) labels = list(set([d.segment_info.max_label() for d in persistence.diagrams])) labels.sort() if args.label != None : diagrams = [d for d in persistence.diagrams if d.segment_info.max_label() == args.label] persistence.diagrams = diagrams else : print "Labels : %s" % labels try: app = App(0, persistence, args.degree) app.MainLoop() except KeyboardInterrupt: sys.exit(0)
def main(argv): current_dir = os.getcwd() parser = argparse.ArgumentParser( description="utility to plot distance persistence diagram") parser.add_argument("-d", "--distances") args = parser.parse_args(argv[1:]) if args.distances != None: distances = Distances.fromJSONDict( load_data(args.distances, "distances", None, None, argv[0] + ": ")) segment_info = distances.segment_info else: print "You must supply a distances filename" sys.exit(1) processes = [] try: display_thread = \ multiprocessing.Process(target=display, args=([distances.distances[i][i+1] for i in range(len(distances.distances)-1)], segment_info)) display_thread.start() processes.append(display_thread) display_thread.join() except KeyboardInterrupt: print "Caught cntl-c, shutting down..." exit(0)
def __init__(self, argv): wx.Frame.__init__(self,None,-1, 'Segment Size',size=(550,350)) parser = argparse.ArgumentParser(description="utility to graph success levels for learning over a single configuration parameter") parser.add_argument('--label', '-l') parser.add_argument('files', metavar='FILE', nargs='*') self.args = vars(parser.parse_args(argv[1:])) self.files = self.args['files'] self.filedict = [] # load in the data files for f in self.files : learning = Learning.fromJSONDict(load_data(f, 'learning', None, None, argv[0] + ": ")) correct = [] for result in learning.results : num_correct = reduce((lambda s, (t0, t1) : s + 1 if t0 == t1 else s), zip(result['test_labels'], result['test_results']), 0) correct.append(float(num_correct) / float(len(result['test_labels']))) print "file %s correct %0.2f%%" % (f, numpy.average(correct)*100.0) self.filedict.append(dict([('file', f), ('correct', numpy.average(correct)), ('config', learning.config)])) if "PersistenceKernelLearning" in f : self.filedict[-1]['label'] = "Persistence Kernel " + learning.config.data_index if learning.config.post_process != None : self.filedict[-1]['label'] = self.filedict[-1]['label'] + " " + learning.config.post_process elif "AverageKernelLearning" in f : self.filedict[-1]['label'] = "Average Kernel" if learning.config.post_process != None : self.filedict[-1]['label'] = self.filedict[-1]['label'] + " " + learning.config.post_process elif "ChaoticInvariantFeaturesLearning" in f : self.filedict[-1]['label'] = " Chaotic Invariant Features" elif "ScaleSpaceSimilarityLearning" in f : self.filedict[-1]['label'] = "Scale Space Similarity" elif "EuclideanDistancesLearning" in f : self.filedict[-1]['label'] = "Euclidean Distance" if (len(correct) > 1) : self.filedict[-1]['correct_std'] = numpy.std(correct) self.filedict[-1]['correct_top'] = numpy.percentile(correct, 0.75) self.filedict[-1]['correct_bot'] = numpy.percentile(correct, 0.25) else : self.filedict[-1]['correct_std'] = 0.0 self.filedict[-1]['correct_top'] = 0.0 self.filedict[-1]['correct_bot'] = 0.0 self.SetBackgroundColour(wx.NamedColour("WHITE")) self.figure = Figure() self.axes = self.figure.add_subplot(211) self.canvas = FigureCanvas(self, -1, self.figure) self.sizer = wx.BoxSizer(wx.VERTICAL) self.sizer.Add(NavigationToolbar2Wx(self.canvas), 1, wx.LEFT | wx.TOP | wx.GROW) self.sizer.Add(self.canvas, 8, wx.LEFT | wx.TOP | wx.GROW) self.SetSizer(self.sizer) self.title = self.figure.suptitle("") self.Fit() self.background = self.axes.figure.canvas.copy_from_bbox(self.axes.bbox) self.colors = ['#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#b3de69', '#fb9a99', '#e31a1c', '#fb8072', '#ff7f00', '#a65628', '#fdb462', '#cab2d6'] self.Bind(wx.EVT_PAINT, self.OnPaint) self.Bind(wx.EVT_KEY_UP, self.KeyEvent) self.point_Refresh()
def __init__(self, argv): wx.Frame.__init__(self,None,-1, 'Segment Size',size=(550,350)) parser = argparse.ArgumentParser(description="utility to plot multiple persistence diagrams") parser.add_argument('files', nargs="*") self.args = vars(parser.parse_args(argv[1:])) self.files = self.args['files'] self.persistences = [] for f in self.files : pf_json = load_data(f, 'persistence', None, None, None) if pf_json == None : print "Could not load persistence file : %s" % (f,) sys.exit(1) self.persistences.append(PD.fromJSONDict(pf_json)) self.SetBackgroundColour(wx.NamedColour("WHITE")) self.displays = [] self.sizer = wx.BoxSizer(wx.VERTICAL) for f in self.files : self.displays.append(dict([('figure', Figure())])) self.displays[-1]['axes'] = self.displays[-1]['figure'].add_subplot(111) self.displays[-1]['canvas'] = FigureCanvas(self, -1, self.displays[-1]['figure']) self.sizer.Add(NavigationToolbar2Wx(self.displays[-1]['canvas']), 1, wx.LEFT | wx.TOP | wx.GROW) self.sizer.Add(self.displays[-1]['canvas'], 8, wx.LEFT | wx.TOP | wx.GROW) self.SetSizer(self.sizer) self.Fit() self.background = self.displays[0]['axes'].figure.canvas.copy_from_bbox(self.displays[0]['axes'].bbox) self.colors = ['red', 'yellow', 'orange', 'blue', 'green', 'violet', 'black'] self.Bind(wx.EVT_PAINT, self.OnPaint) self.Bind(wx.EVT_KEY_UP, self.KeyEvent) self.index = 0 self.point_Refresh()
def main(argv) : parser = argparse.ArgumentParser(description="utility to plot \ persistence diagrams for examining full vs sparse rips filtration") parser.add_argument('-p', '--prefix', help="data file prefix (e.g. foo.json to plot foo.json.0000 - foo.json.9999)") args = parser.parse_args(argv[1:]) files = glob.glob(args.prefix + ".[0-9][0-9][0-9][0-9]") files.sort() full_data = [load_data(fn, None, None, None, argv[0] + " : ") for fn in files] try: app = App(0, full_data) app.MainLoop() except KeyboardInterrupt: sys.exit(0)
def main(argv) : current_dir = os.getcwd() parser = argparse.ArgumentParser(description="utility to plot distances as a 2-d plot") parser.add_argument("-b", "--bottleneck-distances") parser.add_argument("-w", "--wasserstein-distances") args = parser.parse_args(argv[1:]) print args.bottleneck_distances print args.wasserstein_distances distances = [Distances.fromJSONDict(load_data(args.bottleneck_distances, "distances", None, None, argv[0]+": ")), Distances.fromJSONDict(load_data(args.wasserstein_distances, "distances", None, None, argv[0]+": "))] segment_info = distances[0].segment_info processes = [] try: display_thread = \ multiprocessing.Process(target=display, args=(distances,)) display_thread.start() processes.append(display_thread) display_thread.join() except KeyboardInterrupt: print "Caught cntl-c, shutting down..." exit(0)
def parse_configuration_files(files, verbose=True): if not isinstance(files, list): files = [files] final_configs = [] for f in files: configs = load_data(f, "Configurations", None, None, (sys.argv[0] + ": ") if verbose else None) if configs == None: sys.exit(0) if isinstance(configs, dict): configs = [configs] cond_parse_range = lambda x, y, t: parse_range(str(x[ y]), t=t) if y in x.keys() else None cond_list = lambda x: x if isinstance(x, list) else [x] cond_get = lambda x, y: x[y] if y in x.keys() else None for config in configs: for (window_size, window_stride, segment_size, segment_stride, persistence_epsilon, max_simplices) in \ itertools.product(cond_list(cond_parse_range(config, 'window_size', int)), cond_list(cond_parse_range(config, 'window_stride', int)), cond_list(cond_parse_range(config, 'segment_size', int)), cond_list(cond_parse_range(config, 'segment_stride', int)), cond_list(cond_parse_range(config, 'persistence_epsilon', float)), cond_list(cond_parse_range(config, 'max_simplices', int))) : final_configs.append( Configuration( max_simplices=max_simplices, persistence_epsilon=persistence_epsilon, segment_stride=segment_stride, segment_size=segment_size, window_size=window_size, window_stride=window_stride, kernel_scale=cond_get(config, 'kernel_scale'), kernel_gamma=cond_get(config, 'kernel_gamma'), invariant_epsilon=cond_get(config, 'invariant_epsilon'), data_file=cond_get(config, 'data_file'), data_index=cond_get(config, 'data_index'), label_index=cond_get(config, 'label_index'), out_directory=cond_get(config, 'out_directory'), learning_split=cond_get(config, 'learning_split'), learning_iterations=cond_get(config, 'learning_iterations'), learning_C=cond_get(config, 'learning_C'), persistence_degree=cond_get(config, 'persistence_degree'), data_type=cond_get(config, 'data_type'), post_process=cond_get(config, 'post_process'), post_process_arg=cond_get(config, 'post_process_arg'))) return final_configs
def main(argv) : parser = argparse.ArgumentParser() parser.add_argument('-d', '--distances') args = parser.parse_args(argv[1:]) data_json = load_data(args.distances, "distances" , None, None, argv[0]+": ") distances = Distances.fromJSONDict(data_json) if data_json == None : print "Could not load --distances : %s" % (args.distances,) exit() try: app = App(0, distances) app.MainLoop() except KeyboardInterrupt: sys.exit(0)
def parse_configuration_files(files, verbose=True) : if not isinstance(files, list) : files = [files] final_configs = [] for f in files : configs = load_data(f, "Configurations", None, None, (sys.argv[0] + ": ") if verbose else None) if configs == None : sys.exit(0) if isinstance(configs, dict) : configs = [configs] cond_parse_range = lambda x,y,t: parse_range(str(x[y]), t=t) if y in x.keys() else None cond_list = lambda x: x if isinstance(x,list) else [x] cond_get = lambda x,y: x[y] if y in x.keys() else None for config in configs : for (window_size, window_stride, segment_size, segment_stride, persistence_epsilon, max_simplices) in \ itertools.product(cond_list(cond_parse_range(config, 'window_size', int)), cond_list(cond_parse_range(config, 'window_stride', int)), cond_list(cond_parse_range(config, 'segment_size', int)), cond_list(cond_parse_range(config, 'segment_stride', int)), cond_list(cond_parse_range(config, 'persistence_epsilon', float)), cond_list(cond_parse_range(config, 'max_simplices', int))) : final_configs.append(Configuration(max_simplices = max_simplices, persistence_epsilon = persistence_epsilon, segment_stride = segment_stride, segment_size = segment_size, window_size = window_size, window_stride = window_stride, kernel_scale = cond_get(config, 'kernel_scale'), kernel_gamma = cond_get(config, 'kernel_gamma'), invariant_epsilon = cond_get(config, 'invariant_epsilon'), data_file = cond_get(config, 'data_file'), data_index = cond_get(config, 'data_index'), label_index = cond_get(config, 'label_index'), out_directory = cond_get(config, 'out_directory'), learning_split = cond_get(config, 'learning_split'), learning_iterations = cond_get(config, 'learning_iterations'), learning_C = cond_get(config, 'learning_C'), persistence_degree = cond_get(config, 'persistence_degree'), data_type = cond_get(config, 'data_type'), post_process = cond_get(config, 'post_process'), post_process_arg = cond_get(config, 'post_process_arg'))) return final_configs
def __init__(self, argv): wx.Frame.__init__(self, None, -1, 'Segment Size', size=(550, 350)) parser = argparse.ArgumentParser( description="utility to plot multiple persistence diagrams") parser.add_argument('files', nargs="*") self.args = vars(parser.parse_args(argv[1:])) self.files = self.args['files'] self.persistences = [] for f in self.files: pf_json = load_data(f, 'persistence', None, None, None) if pf_json == None: print "Could not load persistence file : %s" % (f, ) sys.exit(1) self.persistences.append(PD.fromJSONDict(pf_json)) self.SetBackgroundColour(wx.NamedColour("WHITE")) self.displays = [] self.sizer = wx.BoxSizer(wx.VERTICAL) for f in self.files: self.displays.append(dict([('figure', Figure())])) self.displays[-1]['axes'] = self.displays[-1][ 'figure'].add_subplot(111) self.displays[-1]['canvas'] = FigureCanvas( self, -1, self.displays[-1]['figure']) self.sizer.Add(NavigationToolbar2Wx(self.displays[-1]['canvas']), 1, wx.LEFT | wx.TOP | wx.GROW) self.sizer.Add(self.displays[-1]['canvas'], 8, wx.LEFT | wx.TOP | wx.GROW) self.SetSizer(self.sizer) self.Fit() self.background = self.displays[0][ 'axes'].figure.canvas.copy_from_bbox(self.displays[0]['axes'].bbox) self.colors = [ 'red', 'yellow', 'orange', 'blue', 'green', 'violet', 'black' ] self.Bind(wx.EVT_PAINT, self.OnPaint) self.Bind(wx.EVT_KEY_UP, self.KeyEvent) self.index = 0 self.point_Refresh()
def main(argv): parser = argparse.ArgumentParser(description="utility to plot \ the density of all persistence diagrams in a file.") parser.add_argument("-i", "--infile") parser.add_argument("-d", "--degree", type=int) parser.add_argument( "-l", "--label", help="Show only persistence diagrams of a particular label") args = parser.parse_args(argv[1:]) if args.infile == None or not os.path.isfile(args.infile): print "%s : --infile (%s) must specify file that exists" % \ (argv[0], args.infile) sys.exit(0) persistence_json = load_data(args.infile, "persistence_diagrams", None, None, argv[0] + " : ") if persistence_json == None: print "Could not load --infile : %s" % (args.persistence_a, ) exit() persistence = PD.fromJSONDict(persistence_json) labels = list( set([d.segment_info.max_label() for d in persistence.diagrams])) labels.sort() if args.label != None: diagrams = [ d for d in persistence.diagrams if d.segment_info.max_label() == args.label ] persistence.diagrams = diagrams else: print "Labels : %s" % labels try: app = App(0, persistence, args.degree) app.MainLoop() except KeyboardInterrupt: sys.exit(0)
def main(argv) : current_dir = os.getcwd() parser = argparse.ArgumentParser(description="utility to plot distance persistence diagram") parser.add_argument("-d", "--distances") args = parser.parse_args(argv[1:]) if args.distances != None : distances = Distances.fromJSONDict(load_data(args.distances, "distances", None, None, argv[0]+": ")) segment_info = distances.segment_info else : print "You must supply a distances filename" sys.exit(1) processes = [] try: display_thread = \ multiprocessing.Process(target=display, args=([distances.distances[i][i+1] for i in range(len(distances.distances)-1)], segment_info)) display_thread.start() processes.append(display_thread) display_thread.join() except KeyboardInterrupt: print "Caught cntl-c, shutting down..." exit(0)
def __init__(self, argv): wx.Frame.__init__(self, None, -1, 'UCR 2015 Learning Results', size=(550, 350)) parser = argparse.ArgumentParser( description= "utility to graph success levels for learning on the UCR Dataset") parser.add_argument( '-d', '--directory', help='Directory where the learning results are stored', required=False) parser.add_argument('files', metavar='FILE', nargs='*') self.args = vars(parser.parse_args(argv[1:])) if self.args['directory'] != None and len(self.args['files']) != 0: print "Ignoring files after the directory argument" elif self.args['directory'] == None and len(self.args['files']) == 0: parser.print_help() sys.exit() if self.args['directory'] != None: if os.path.isdir(self.args['directory']): self.files = subprocess.check_output([ "find", self.args['directory'], "-name", "*Learning.json*" ]) self.files = [f for f in self.files.split("\n") if f != ''] else: parser.print_help() sys.exit() else: self.files = self.args['files'] def learning_type(text): fields = text.split('-') t = fields[-1].split('.')[0][:-len('Learning')] try: if t == 'PersistenceKernel' or t == 'ScaleSpaceSimilarity' or t == 'MultipleKernel' or t == 'AverageKernel': if 'ChaosPost' in fields: w = '-Chaos' else: w = '-' + fields[fields.index('win') + 1] else: w = '' except ValueError: w = '' return t + w self.learning_types = list(set([learning_type(f) for f in self.files])) self.learning_types.sort() datasets = list(set([f.split('/')[-2] for f in self.files])) datasets.sort() self.filedict = dict([(s, dict([(t, 0) for t in self.learning_types])) for s in datasets]) # load in the data files for f in self.files: learning_t = learning_type(f) dataset = f.split('/')[-2] learning_json = load_data(f, "learning", None, None, None) learning = Learning.fromJSONDict(learning_json) best = self.filedict[dataset][learning_t] current = learning.get_average_correct() if (isinstance(best, list) and current > sum(best)) or (not isinstance(best, list) and current > best): if learning.results[0].mkl_weights != None: self.filedict[dataset][learning_t] = [ current * w for w in learning.results[0].mkl_weights ] else: self.filedict[dataset][learning_t] = current keylen = max([len(key) for (key, val) in self.filedict.items()]) format = '%' + str(keylen) + 's %s' for (key, val) in self.filedict.items(): vals = [ ("%s %02.2f%%" % (k, v * 100.0 if not isinstance(v, list) else sum(v) * 100.0)) + ((" " + str(["%02.2f%%" % v_ for v_ in v])) if isinstance(v, list) else "") for (k, v) in val.items() ] vals.sort() print format % (key, vals) self.frame = 0 self.SetBackgroundColour(wx.NamedColour("WHITE")) self.figure = Figure() self.axes = self.figure.add_subplot(121) plot_keys = self.filedict.items()[0][1].keys() dataset_width = len(plot_keys) + 1.5 self.axes.set_xticks([(0.5 + i) * dataset_width for i in range(len(self.filedict.items()))]) self.axes.set_xticklabels( [key for (key, value) in self.filedict.items()]) self.axes.set_ylim(0.0, 1.0, auto=False) self.canvas = FigureCanvas(self, -1, self.figure) self.sizer = wx.BoxSizer(wx.VERTICAL) self.sizer.Add(self.canvas, 1, wx.LEFT | wx.TOP | wx.GROW) self.SetSizer(self.sizer) self.title = self.figure.suptitle("UCR Learning") #self.Fit() self.background = self.axes.figure.canvas.copy_from_bbox( self.axes.bbox) self.colors = [ 'black', 'red', 'yellow', 'orange', 'blue', 'green', 'violet' ] self.Bind(wx.EVT_PAINT, self.OnPaint) self.Bind(wx.EVT_KEY_UP, self.KeyEvent) self.point_Refresh()
import numpy as np import matplotlib.pyplot as plt from matplotlib.colors import SymLogNorm from persistence.Datatypes.JSONObject import load_data from persistence.Datatypes.PersistenceDiagrams import PersistenceDiagrams, PersistenceDiagram from scipy import stats parser = argparse.ArgumentParser() parser.add_argument("-i", "--infile") parser.add_argument("-d", "--degree", type=int) parser.add_argument("-p", "--pool", default=multiprocessing.cpu_count(), type=int) args = parser.parse_args(sys.argv[1:]) persistences = PersistenceDiagrams.fromJSONDict(load_data(args.infile, "persistences", None, None, sys.argv[0] + ": ")) labels = list(set([diagram.segment_info.max_label() for diagram in persistences.diagrams])) labels.sort() labels = dict([('1', 'Working at Computer'), ('2', 'Standing Up, Walking, Going Up\Down Stairs'), ('3', 'Standing'), ('4', 'Walking'), ('5', 'Going Up\Down Stairs'), ('6', 'Walking and Talking with Someone'), ('7', 'Talking while Standing')]) print "Labels: '%s'" % ("', '".join(labels.values()),) xmin = [] xmax = [] ymin = []
from persistence.Datatypes.JSONObject import load_data from persistence.Datatypes.Configuration import Configuration from persistence.CrossValidation import CrossValidation if __name__ == "__main__": parser = argparse.ArgumentParser( description= "Utility to generate kernels and learning results from a cross validation result" ) parser.add_argument("--pool", "-p", type=int) parser.add_argument("--input", "-i") parser.add_argument("--cross-validation", "-v") parser.add_argument("--train-test-partitions", "-t") args = parser.parse_args(sys.argv[1:]) print args.cross_validation cv_json = load_data(args.cross_validation, "cross validation", None, None, sys.argv[0] + ": ") cv = CrossValidation.fromJSONDict(cv_json) config = cv.config print config if cv.kernel_module != None: kernel_module = importlib.import_module("persistence." + cv.kernel_module) kernel_class = getattr(kernel_module, cv.kernel_module) scale_arg = kernel_class.get_scale_arg() kernel_filename = kernel_class.get_kernel_filename(config) kernel_command = [ "python", "-u", "-O", "-m", "persistence." + cv.kernel_module, "--" + scale_arg.replace("_", "-"), str(config[scale_arg]), "--infile", args.input ]
def __init__(self, argv): wx.Frame.__init__(self,None,-1, 'Bottleneck Distance',size=(550,350)) parser = argparse.ArgumentParser(description="Utility to plot Bottleneck Distance average of 'representative' segments for each label") parser.add_argument('-d','--distances') parser.add_argument('-l','--learning') parser.add_argument('-p','--partition') parser.add_argument('-t','--threshold', default=0.75, type=float) self.args = parser.parse_args(argv[1:]) import traceback try : self.distances = \ Distances.fromJSONDict(load_data(self.args.distances, 'distances', None, None, argv[0]+": ")) self.learning = \ Learning.fromJSONDict(load_data(self.args.learning, 'learning', None, None, argv[0]+": ")) self.partitions = \ TrainTestPartitions.fromJSONDict(load_data(self.args.partition, 'partition', None, None, argv[0]+": ")) except : print "Could not parse input files: %s" % (traceback.format_exc(),) sys.exit(1) # Filter to only the segments that get above the threshold self.segments = [] for i in range(len(self.distances.segment_info)) : c = segment_correctness(i, self.learning, self.partitions) if c == None or c > self.args.threshold : self.segments.append((i, self.distances.segment_info[i].max_label())) sort_format = "0" * int(math.ceil(math.log(len(self.distances.segment_info)))) self.segments.sort(key=lambda x: str(x[1]+(("%"+sort_format+"d") % x[0]))) self.label_index = 0 self.labels = list(set([x[1] for x in self.segments])) self.labels.sort() self.segment_minimums = dict([(l, min([i for (i,x) in zip(range(len(self.segments)), self.segments) if x[1] == l])) \ for l in self.labels]) self.segment_maximums = dict([(l, max([i for (i,x) in zip(range(len(self.segments)), self.segments) if x[1] == l])) \ for l in self.labels]) self.segment_indices = dict([(l, min([i for (i,x) in zip(range(len(self.segments)), self.segments) if x[1] == l], key=lambda x:average_distance(x,l,self.distances))) \ for l in self.labels]) self.SetBackgroundColour(wx.NamedColour("WHITE")) self.figure = Figure() self.axes = self.figure.add_subplot(111) self.canvas = FigureCanvas(self, -1, self.figure) self.sizer = wx.BoxSizer(wx.VERTICAL) self.sizer.Add(NavigationToolbar2Wx(self.canvas), 1, wx.LEFT | wx.TOP | wx.GROW) self.sizer.Add(self.canvas, 8, wx.LEFT | wx.TOP | wx.GROW) self.SetSizer(self.sizer) self.background = self.axes.figure.canvas.copy_from_bbox(self.axes.bbox) self.colors = ['black', 'red', 'yellow', 'orange', 'blue', 'green', 'violet'] self.Bind(wx.EVT_PAINT, self.OnPaint) self.Bind(wx.EVT_KEY_UP, self.KeyEvent) self.index = 0 self.point_Refresh() self.Fit() self.figure.savefig(self.distances.config.out_directory + "/" + self.distances.config.out_directory.split('/')[-1] + '-win-' + str(self.distances.config.window_size) + '-best-distances.pdf') sys.exit(0)
import os import sys import math import itertools import multiprocessing from persistence.Datatypes.JSONObject import load_data, save_data from persistence.Datatypes.PersistenceDiagrams import PersistenceDiagrams, PersistenceDiagram def avg(l) : return sum(l,0.0) / len(l) def average_density(diagram) : points = [(p[0], p[1]) for p in diagram.points if p[2] == 1] if len(points) > 2 : diagram_distances = [] for (x0,y0) in points : distances = map(lambda (x1,y1) : math.sqrt((x0 - x1) * (x0 - x1) + (x0 - x1) * (x0 - x1)), points) diagram_distances.append(avg(distances[1:6])) return avg(diagram_distances) else : return 0.0 if __name__ == "__main__" : pool = multiprocessing.Pool(multiprocessing.cpu_count() - 2) for f in sys.argv[1:] : pds = PersistenceDiagrams.fromJSONDict(load_data(f, None, None, None, sys.argv[0] + " : ")) densities = pool.map(average_density, pds.diagrams) save_data(f + "-density", list(densities))
def main(argv) : parser = argparse.ArgumentParser(description="utility to plot \ data and dynamically generated persistence diagrams. Using \ the persistence option uses precomputed persistence and ignores all \ the other options.") parser.add_argument('-i', '--infile', help="Data to read") parser.add_argument('-m', '--max-simplices', default=2000000, type=int, help="Maximum number of simplices for persistence \ generation") parser.add_argument('-I', '--data-index', help="Index of data field for data types that require it") parser.add_argument('-L', '--label-index', type=int, help="Index of label field for data types that require it") parser.add_argument('-s', '--segment-size', type=int, help="Segment size for data types that require it") parser.add_argument('-S', '--segment-stride', type=int, help="Segment stride for data types that require it") parser.add_argument('-w', '--window-size', help="Window size for \ persistence generation. Integer is a literal window size, float \ between 0 and 1 is a fraction of the total Segment size") parser.add_argument('-p', '--persistences', help="Precomputed persistence diagram") parser.add_argument('-t', '--data-type', default="UCRSegments", help="Data type of the segments in infile") args = parser.parse_args(argv[1:]) if args.persistences != None : persistences_json = load_data(args.persistences, 'persistences', None, None, argv[0]) if persistences_json == None : print "Could not load --persistences : %s" % (args.persistences,) exit() persistences = PD.fromJSONDict(persistences_json) full_config = copy(persistences.config) full_config.window_size = -1 segments_module = importlib.import_module( 'persistence.' + persistences.config.data_type) segments_class = getattr(segments_module, persistences.config.data_type) full_data = segments_class(full_config) window_config = copy(persistences.config) windowed_data = segments_class(window_config) else : segments_module = importlib.import_module( 'persistence.' + args.data_type) segments_class = getattr(segments_module, args.data_type) full_config = Configuration.fromJSONDict(dict([ ("data_type", args.data_type), ("data_file", args.infile), ("label_index", 0), ("max_simplices", args.max_simplices), ("window_size", -1), ("window_stride", 1)])) if full_config.data_file.find(":") != -1 : full_config.data_file = full_config.data_file.split(':') if args.segment_size != None : full_config.segment_size = args.segment_size if args.segment_stride != None : full_config.segment_stride = args.segment_stride if args.data_index != None : full_config.data_index = parse_index(args.data_index)[0] if args.label_index != None : full_config.label_index = args.label_index full_data = segments_class(full_config) window_size = float(args.window_size) if (window_size < 1.0) : window_size = int(window_size * full_data.config.window_size) else : window_size = int(args.window_size) window_config = Configuration.fromJSONDict(dict([ ("data_type", args.data_type), ("data_file", full_config.data_file), ("label_index", 0), ("max_simplices", args.max_simplices), ("window_size", window_size), ("window_stride", 1)])) if args.segment_size != None : window_config.segment_size = args.segment_size if args.segment_stride != None : window_config.segment_stride = args.segment_stride if args.data_index != None : window_config.data_index = parse_index(args.data_index)[0] if args.label_index != None : window_config.label_index = args.label_index windowed_data = segments_class(window_config) persistences = PD(windowed_data.config, [None for segment in windowed_data.segments]) try: app = App(0, full_data, windowed_data, persistences) app.MainLoop() except KeyboardInterrupt: sys.exit(0)
def main(argv): current_dir = os.getcwd() parser = argparse.ArgumentParser( description="utility to plot distance persistence diagram") parser.add_argument("-s", "--segments") parser.add_argument("-P", "--persistence-diagrams") parser.add_argument("-d", "--distances") parser.add_argument( "-t", "--distance-type", default="bottleneck", help="Distance Measure to use, Bottleneck or Wasserstein") parser.add_argument("-p", "--pool", default=max(2, multiprocessing.cpu_count() - 4), help="Threads of computation to use", type=int) parser.add_argument("-m", "--max-simplices", default=1000000, type=int) parser.add_argument("-e", "--epsilon", type=float) args = parser.parse_args(argv[1:]) if args.segments != None: segments = Segments.fromJSONDict( load_data(args.segments, "segments", None, None, argv[0] + ": ")) segment_info = [SegmentInfo.fromJSONDict(s) for s in segments.segments] else: segments = None if args.persistence_diagrams != None: diagrams = PersistenceDiagrams.fromJSONDict( load_data(args.persistence_diagrams, "persistence diagrams", None, None, argv[0] + ": ")) segment_info = [d.segment_info for d in diagrams.diagrams] else: diagrams = None if args.distances != None: distances = Distances.fromJSONDict( load_data(args.distances, "distances", None, None, argv[0] + ": ")) segment_info = distances.segment_info else: distances = None distance_array = multiprocessing.Array('d', [0.0] + [-1.0 for s in segment_info[1:]]) segment_compare = multiprocessing.Value('i', 0) processes = [] try: compute_thread = \ multiprocessing.Process(target=compute, args=(args.distance_type, distance_array, segment_compare, args.pool-1, args.max_simplices, args.epsilon, segments, diagrams, distances)) compute_thread.start() display_thread = \ multiprocessing.Process(target=display, args=(args.distance_type, distance_array, segment_compare, segment_info)) display_thread.start() processes.append(display_thread) display_thread.join() compute_thread.join() except KeyboardInterrupt: print "Caught cntl-c, shutting down..." exit(0)
#This program is distributed in the hope that it will be useful, #but WITHOUT ANY WARRANTY; without even the implied warranty of #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #GNU General Public License for more details. # #You should have received a copy of the GNU General Public License #along with this program. If not, see <http://www.gnu.org/licenses/>. import sys import numpy from persistence.Datatypes.JSONObject import load_data from persistence.Datatypes.Segments import Segments data = [] for arg in sys.argv[1:]: segments = Segments.fromJSONDict( load_data(arg, "segments", None, None, sys.argv[0] + ": ")) taus = [int(s.tau) for s in segments.segments] window_sizes = [len(s.windows[0]) for s in segments.segments] data.append([ arg[arg.find('-data-') + 6:arg.find('-seg-')], min(taus), max(taus), numpy.mean(taus), min(window_sizes), max(window_sizes), numpy.mean(window_sizes) ]) data.sort() print data
from persistence.Datatypes.JSONObject import load_data, save_data from persistence.Datatypes.PersistenceDiagrams import PersistenceDiagrams, PersistenceDiagram def avg(l): return sum(l, 0.0) / len(l) def average_density(diagram): points = [(p[0], p[1]) for p in diagram.points if p[2] == 1] if len(points) > 2: diagram_distances = [] for (x0, y0) in points: distances = map( lambda (x1, y1): math.sqrt((x0 - x1) * (x0 - x1) + (x0 - x1) * (x0 - x1)), points) diagram_distances.append(avg(distances[1:6])) return avg(diagram_distances) else: return 0.0 if __name__ == "__main__": pool = multiprocessing.Pool(multiprocessing.cpu_count() - 2) for f in sys.argv[1:]: pds = PersistenceDiagrams.fromJSONDict( load_data(f, None, None, None, sys.argv[0] + " : ")) densities = pool.map(average_density, pds.diagrams) save_data(f + "-density", list(densities))
elif "Distances" in file_class: data_class = "Distances" elif "ScaleSpaceSimilarities" in file_class: file_class = "ScaleSpaceSimilarity" data_class = "Distances" elif "Kernel" in file_class: data_class = "Kernel" elif "CrossValidation" in file_class: data_class = file_class module = importlib.import_module('persistence.' + file_class) module_class = getattr(module, file_class.split('.')[-1]) module = importlib.import_module('persistence.' + file_class) data_class = getattr(module, data_class.split('.')[-1]) in_json = load_data(args.infile, "JSONObject", None, None, sys.argv[0] + " : ") in_obj = data_class.fromJSONDict(in_json) in_obj.config.out_directory = args.outdir if status != None: in_obj.config.status = status if "Segments" in file_class or \ "Post" in file_class : out_file = module_class.get_segment_filename(in_obj.config, gz=False) elif "Features" in file_class: out_file = module_class.get_features_filename(in_obj.config, gz=False) elif "PersistenceDiagrams" in file_class: out_file = module_class.get_persistence_diagrams_filename( in_obj.config, gz=False) elif "Partition" in file_class: out_file = module_class.get_partition_filename(in_obj.config, gz=False) elif "Learning" in file_class:
def main(argv): parser = argparse.ArgumentParser(description="utility to plot \ data and dynamically generated persistence diagrams. Using \ the persistence option uses precomputed persistence and ignores all \ the other options.") parser.add_argument('-i', '--infile', help="Data to read") parser.add_argument('-m', '--max-simplices', default=2000000, type=int, help="Maximum number of simplices for persistence \ generation") parser.add_argument( '-I', '--data-index', help="Index of data field for data types that require it") parser.add_argument( '-L', '--label-index', type=int, help="Index of label field for data types that require it") parser.add_argument('-s', '--segment-size', type=int, help="Segment size for data types that require it") parser.add_argument('-S', '--segment-stride', type=int, help="Segment stride for data types that require it") parser.add_argument('-w', '--window-size', help="Window size for \ persistence generation. Integer is a literal window size, float \ between 0 and 1 is a fraction of the total Segment size") parser.add_argument('-p', '--persistences', help="Precomputed persistence diagram") parser.add_argument('-t', '--data-type', default="UCRSegments", help="Data type of the segments in infile") args = parser.parse_args(argv[1:]) if args.persistences != None: persistences_json = load_data(args.persistences, 'persistences', None, None, argv[0]) if persistences_json == None: print "Could not load --persistences : %s" % (args.persistences, ) exit() persistences = PD.fromJSONDict(persistences_json) full_config = copy(persistences.config) full_config.window_size = -1 segments_module = importlib.import_module( 'persistence.' + persistences.config.data_type) segments_class = getattr(segments_module, persistences.config.data_type) full_data = segments_class(full_config) window_config = copy(persistences.config) windowed_data = segments_class(window_config) else: segments_module = importlib.import_module('persistence.' + args.data_type) segments_class = getattr(segments_module, args.data_type) full_config = Configuration.fromJSONDict( dict([("data_type", args.data_type), ("data_file", args.infile), ("label_index", 0), ("max_simplices", args.max_simplices), ("window_size", -1), ("window_stride", 1)])) if full_config.data_file.find(":") != -1: full_config.data_file = full_config.data_file.split(':') if args.segment_size != None: full_config.segment_size = args.segment_size if args.segment_stride != None: full_config.segment_stride = args.segment_stride if args.data_index != None: full_config.data_index = parse_index(args.data_index)[0] if args.label_index != None: full_config.label_index = args.label_index full_data = segments_class(full_config) window_size = float(args.window_size) if (window_size < 1.0): window_size = int(window_size * full_data.config.window_size) else: window_size = int(args.window_size) window_config = Configuration.fromJSONDict( dict([("data_type", args.data_type), ("data_file", full_config.data_file), ("label_index", 0), ("max_simplices", args.max_simplices), ("window_size", window_size), ("window_stride", 1)])) if args.segment_size != None: window_config.segment_size = args.segment_size if args.segment_stride != None: window_config.segment_stride = args.segment_stride if args.data_index != None: window_config.data_index = parse_index(args.data_index)[0] if args.label_index != None: window_config.label_index = args.label_index windowed_data = segments_class(window_config) persistences = PD(windowed_data.config, [None for segment in windowed_data.segments]) try: app = App(0, full_data, windowed_data, persistences) app.MainLoop() except KeyboardInterrupt: sys.exit(0)
import argparse import importlib import subprocess from persistence.Datatypes.JSONObject import load_data from persistence.Datatypes.Configuration import Configuration from persistence.CrossValidation import CrossValidation if __name__ == "__main__" : parser = argparse.ArgumentParser(description="Utility to generate kernels and learning results from a cross validation result") parser.add_argument("--pool", "-p", type=int) parser.add_argument("--input", "-i") parser.add_argument("--cross-validation", "-v") parser.add_argument("--train-test-partitions", "-t") args = parser.parse_args(sys.argv[1:]) print args.cross_validation cv_json = load_data(args.cross_validation, "cross validation", None, None, sys.argv[0]+": ") cv = CrossValidation.fromJSONDict(cv_json) config = cv.config print config if cv.kernel_module != None : kernel_module = importlib.import_module("persistence." + cv.kernel_module) kernel_class = getattr(kernel_module, cv.kernel_module) scale_arg = kernel_class.get_scale_arg() kernel_filename = kernel_class.get_kernel_filename(config) kernel_command = ["python", "-u", "-O", "-m", "persistence."+cv.kernel_module, "--"+scale_arg.replace("_","-"), str(config[scale_arg]), "--infile", args.input] if args.pool != None : kernel_command.extend(["--pool", str(args.pool)]) else :
# #This program is distributed in the hope that it will be useful, #but WITHOUT ANY WARRANTY; without even the implied warranty of #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #GNU General Public License for more details. # #You should have received a copy of the GNU General Public License #along with this program. If not, see <http://www.gnu.org/licenses/>. import sys from persistence.Datatypes.JSONObject import load_data from persistence.Datatypes.Learning import Learning, LearningResult from persistence.Datatypes.TrainTestPartitions import TrainTestPartitions partitions_json = load_data(sys.argv[1], 'partition', None, None, sys.argv[0] + ": ") partitions = TrainTestPartitions.fromJSONDict(partitions_json) all_wrongs = [] for f in sys.argv[2:] : results_json = load_data(f, 'learning', None, None, sys.argv[0] + ": ") results = Learning.fromJSONDict(results_json) wrongs = [] for (result, partition) in zip(results.results, partitions.evaluation) : correct = [(l == r) for (l,r) in zip(result.test_labels, result.test_results)] wrong = [p for (c,p) in zip(correct, partition.test) if not c] wrong.sort() wrongs.append(wrong) all_wrongs.append(wrongs) for (a,b) in zip(all_wrongs[0], all_wrongs[1]) : if a == b :
import argparse import importlib from persistence.Datatypes.JSONObject import load_data, save_data from persistence.Datatypes.Segments import Segments, Segment parser = argparse.ArgumentParser( description= "Creates windowed segments of a single dimension for a multidimensioned dataset" ) parser.add_argument("-i", "--infile") parser.add_argument("-d", "--data-index", default=0, type=int) parser.add_argument("-w", "--window-size", type=int) parser.add_argument("-W", "--window-stride", default=1, type=int) args = parser.parse_args(sys.argv[1:]) segments = Segments.fromJSONDict( load_data(args.infile, "segments", None, None, sys.argv[0] + ": ")) orig_window_size = segments.config.window_size segments.config.window_size = args.window_size segments.config.window_stride = args.window_stride dimensions = len(segments.segments[0].data_index) segments.config.data_index = segments.segments[0].data_index[args.data_index] for segment in segments.segments: windows = [[ segment.windows[0][(i + j) * dimensions + args.data_index] for j in range(args.window_size) ] for i in range(0, orig_window_size, args.window_stride) if ((i + args.window_size - 1) * dimensions + args.data_index) < len(segment.windows[0])] segment.data_index = segment.data_index[args.data_index] segment.window_size = args.window_stride segment.windows = windows
[dict([("full_diagram",full_pd.toJSONDict()), ("runtime", full_runtime)])] + sparse_pds) return full_pd.toJSONDict() if __name__ == "__main__" : parser = argparse.ArgumentParser(description=('Creates Persistence Diagrams using sparse rips ' + \ 'filtration with varying max_simplices to compare ' + \ 'with full rips filtration')) parser.add_argument('-i', '--infile', help='Segments file to use as input data') parser.add_argument('-o', '--outfile', help='File to save output into') parser.add_argument('-m', '--max-simplices', help='range of max_simplices values to use') parser.add_argument('-e', '--epsilon', help='range of epsilon values to use') parser.add_argument('-p', '--pool') parser.add_argument('-n', '--number', help='limit to n persistence diagrams') args = parser.parse_args(sys.argv[1:]) infile = Segments.fromJSONDict(load_data(args.infile, 'segments', None, None, sys.argv[0] + " : ")) if args.number != None : infile.segments = infile.segments[0:int(args.number)] if args.max_simplices != None : max_simplices = parse_range(args.max_simplices, t=float) epsilon = None else : epsilon = parse_range(args.epsilon, t=float) max_simplices = None if int(args.pool) > 1 : pool = multiprocessing.Pool(int(args.pool)) mapped = pool.imap(segment_processing_callable(args.outfile, max_simplices, epsilon, len(infile.segments)),
parser.add_argument('--window-size', default=40, type=int) parser.add_argument('--samples', default=10, type=int) parser.add_argument('--sample-at', default=None) parser.add_argument('--wasserstein', action='store_true') parser.add_argument('--pool', default=multiprocessing.cpu_count() - 2, type=int) args = parser.parse_args(sys.argv[1:]) if args.pool > 1: pool = multiprocessing.Pool(args.pool) else: pool = itertools data = [] if args.segments != None: segments_json = load_data(args.segments, 'segments', None, None, sys.argv[0] + ": ") segments = Segments.fromJSONDict(segments_json) args.type = segments.config.data_type config = segments.config config.window_size = args.window_size config.window_stride = 1 for segment in segments.segments: point_len = len(segment.windows[0]) / segment.segment_size this_data = [(segment.windows[0][i:i + point_len], segment.filename) for i in range(0, len(segment.windows[0]), point_len)] data.extend(this_data) else: config = Configuration.fromJSONDict( dict([('data_file', args.infile), ('data_type', args.type), ('window_size', args.window_size), ('window_stride', 1),
parser.add_argument('--outfile') parser.add_argument('--pool', default=multiprocessing.cpu_count(), type=int) args = parser.parse_args(sys.argv[1:]) infiles = [] for f in args.infile : (dirname,basename) = os.path.split(f) cmd = ["find", dirname, "-name", basename + "*", "-type", "f"] print " ".join(cmd) files = subprocess.check_output(cmd) files = files.split() infiles.extend(files) infiles.sort() print "\n".join(infiles) input_data = [] for f in infiles : input_json = load_data(f, '', None, None, "persistence_bottleneck_distance.py : ") input_data.extend(input_json) input_data = [i for i in input_data if i['segment_start'] != i['segment_size']] input_data.sort(key=lambda x: (x['segment_start'],x['max_simplices']/x['segment_size']/x['segment_size'], x['segment_size'])) # for entry in input_data : # print entry['segment_start'], entry['segment_size'], entry['max_simplices'] samples = list(set([d['segment_start'] for d in input_data])) samples.sort() if args.pool > 1 : pool = multiprocessing.Pool(args.pool) else : pool = itertools out_data = [] for s in samples :
import argparse import numpy from persistence.Datatypes.JSONObject import load_data from persistence.Datatypes import Learning parser = argparse.ArgumentParser( description="utility to print stats about learning JSON") parser.add_argument('--csv', action="store_true") parser.add_argument('files', metavar='FILE', nargs='*') args = vars(parser.parse_args(sys.argv[1:])) if args['csv']: print "Data File,Learning Type,Data Index,Segment Size,Segment Stride,Window Size,Max Simplices,Kernel Scale,C,Correct,Std. Dev.,Top Quart,Bot Quart,Train,Test,Classes,Kernel Weights" for f in args['files']: try: lf_json = load_data(f, 'learning', None, None, 'learning_stats: ' if not args['csv'] else None) if lf_json == None: print "Could not load file : %s" % (f, ) exit() learning = Learning.Learning.fromJSONDict(lf_json) correct = [] if (isinstance(learning.config.data_file, list)): (filename, ext) = os.path.splitext( os.path.basename(learning.config.data_file[0])) import string filename = string.join(filename.split("_")[0:-1]) elif (isinstance(learning.config.data_file, dict)): filename = 'placeholder' else: (filename, ext) = os.path.splitext(
def __init__(self, argv): wx.Frame.__init__(self, None, -1, 'Bottleneck Distance', size=(550, 350)) parser = argparse.ArgumentParser( description= "Utility to plot Bottleneck Distance average of 'representative' segments for each label" ) parser.add_argument('-d', '--distances') parser.add_argument('-l', '--learning') parser.add_argument('-p', '--partition') parser.add_argument('-t', '--threshold', default=0.75, type=float) self.args = parser.parse_args(argv[1:]) import traceback try: self.distances = \ Distances.fromJSONDict(load_data(self.args.distances, 'distances', None, None, argv[0]+": ")) self.learning = \ Learning.fromJSONDict(load_data(self.args.learning, 'learning', None, None, argv[0]+": ")) self.partitions = \ TrainTestPartitions.fromJSONDict(load_data(self.args.partition, 'partition', None, None, argv[0]+": ")) except: print "Could not parse input files: %s" % ( traceback.format_exc(), ) sys.exit(1) # Filter to only the segments that get above the threshold self.segments = [] for i in range(len(self.distances.segment_info)): c = segment_correctness(i, self.learning, self.partitions) if c == None or c > self.args.threshold: self.segments.append( (i, self.distances.segment_info[i].max_label())) sort_format = "0" * int( math.ceil(math.log(len(self.distances.segment_info)))) self.segments.sort( key=lambda x: str(x[1] + (("%" + sort_format + "d") % x[0]))) self.label_index = 0 self.labels = list(set([x[1] for x in self.segments])) self.labels.sort() self.segment_minimums = dict([(l, min([i for (i,x) in zip(range(len(self.segments)), self.segments) if x[1] == l])) \ for l in self.labels]) self.segment_maximums = dict([(l, max([i for (i,x) in zip(range(len(self.segments)), self.segments) if x[1] == l])) \ for l in self.labels]) self.segment_indices = dict([(l, min([i for (i,x) in zip(range(len(self.segments)), self.segments) if x[1] == l], key=lambda x:average_distance(x,l,self.distances))) \ for l in self.labels]) self.SetBackgroundColour(wx.NamedColour("WHITE")) self.figure = Figure() self.axes = self.figure.add_subplot(111) self.canvas = FigureCanvas(self, -1, self.figure) self.sizer = wx.BoxSizer(wx.VERTICAL) self.sizer.Add(NavigationToolbar2Wx(self.canvas), 1, wx.LEFT | wx.TOP | wx.GROW) self.sizer.Add(self.canvas, 8, wx.LEFT | wx.TOP | wx.GROW) self.SetSizer(self.sizer) self.background = self.axes.figure.canvas.copy_from_bbox( self.axes.bbox) self.colors = [ 'black', 'red', 'yellow', 'orange', 'blue', 'green', 'violet' ] self.Bind(wx.EVT_PAINT, self.OnPaint) self.Bind(wx.EVT_KEY_UP, self.KeyEvent) self.index = 0 self.point_Refresh() self.Fit() self.figure.savefig( self.distances.config.out_directory + "/" + self.distances.config.out_directory.split('/')[-1] + '-win-' + str(self.distances.config.window_size) + '-best-distances.pdf') sys.exit(0)
import sys import argparse import importlib from persistence.Datatypes.JSONObject import load_data, save_data from persistence.Datatypes.Distances import Distances from persistence.Datatypes.Configuration import Configuration from persistence.Datatypes.Segments import SegmentInfo if __name__ == "__main__": parser = argparse.ArgumentParser( description="Utility to add SegmentInfo data to a Distances file") parser.add_argument("--infile") parser.add_argument("--outfile") args = parser.parse_args(sys.argv[1:]) in_json = load_data(args.infile, "distances", None, None, sys.argv[0] + " : ") d = Distances.fromJSONDict(in_json) module = importlib.import_module('persistence.' + d.config.data_type) module_class = getattr(module, d.config.data_type) segment_filename = module_class.get_segment_filename(d.config) seg_json = load_data(segment_filename, "segments", None, None, sys.argv[0] + " : ") print segment_filename, len(seg_json['segments']) d.segment_info = [] i = 0 for segment in seg_json['segments']: d.segment_info.append(SegmentInfo.fromJSONDict(segment)) i = i + 1 if i % 250 == 0: print segment_filename, i
parser = argparse.ArgumentParser() parser.add_argument("-l", "--learning", nargs="+") parser.add_argument("-d", "--distance-10") parser.add_argument("-e", "--distance-20") parser.add_argument("-f", "--distance-30") parser.add_argument("-p", "--pool", default=max(1, multiprocessing.cpu_count() - 2), type=int) args = parser.parse_args(sys.argv[1:]) learning = [ Learning.fromJSONDict( load_data(l, "learning", None, None, sys.argv[0] + ": ")) for l in args.learning ] distances = [ Distances.fromJSONDict( load_data(args.distance_10, "distances", None, None, sys.argv[0] + ": ")), Distances.fromJSONDict( load_data(args.distance_20, "distances", None, None, sys.argv[0] + ": ")), Distances.fromJSONDict( load_data(args.distance_30, "distances", None, None, sys.argv[0] + ": ")) ] filedict = [] f, axes = plt.subplots(2, 2)
def main(argv): parser = argparse.ArgumentParser( description='Tool to generate a similarity kernel from persistence data' ) parser.add_argument('-i', '--infile', help='Input JSON Similarity Kernel file') parser.add_argument('-o', '--outfile', help='Output JSON Learning file') parser.add_argument('-p', '--pool', default=multiprocessing.cpu_count(), help='Threads of computation to use') parser.add_argument( '-c', '--learning-C', help= 'C value for SVM. Specify a range for 1-dimensional cross-validation') parser.add_argument('-t', '--train-test-partitions', help='Precomputed train / test partitions') args = vars(parser.parse_args(argv[1:])) kf_json = load_data(args['infile'], 'kernel', None, None, "KernelLearning: ") if kf_json == None: print "Could not load Kernel from %s" % (args['infile'], ) sys.exit(1) kernel = Kernel.fromJSONDict(kf_json) config = kernel.config segment_info = kernel.segment_info if (int(args['pool']) > 1): pool = multiprocessing.Pool(int(args['pool'])) else: pool = None if (args['learning_C'] != None): learning_C = parse_range(args['learning_C'], t=float) if not isinstance(learning_C, list): learning_C = [learning_C] elif not isinstance(learning_C, list): learning_C = [config.learning_C] else: learning_C = config.learning_C if (args['train_test_partitions'] != None): partitions_json = load_data(args['train_test_partitions'], 'partitions', None, None, "KernelLearning: ") if partitions_json == None: print "Could not load Train / Test Partitions from %s" % ( args['train_test_partitions'], ) sys.exit(1) partitions = TrainTestPartitions.fromJSONDict(partitions_json) else: partitions = generate_partitions(config, segment_info, cv_iterations=5 if (len(learning_C) > 1) else 0) if isinstance(learning_C, list) and len(learning_C) > 1 and len( partitions.cross_validation) > 0: num_cv = len(partitions.cross_validation) learning_wrap = LearningWrapper(kernel) if pool != None: results = pool.map( learning_wrap, itertools.product(partitions.cross_validation, learning_C)) else: results = map( learning_wrap, itertools.product(partitions.cross_validation, learning_C)) max_correct = 0.0 best_C = learning_C[0] results = list(results) print len(results) for C in learning_C: correct = Learning( config, [_result for (_C, _result) in results if C == _C ]).get_average_correct() if correct > max_correct: best_C = C max_correct = correct config.learning_C = best_C print "KernelLearning: using C = %s, correct = %s" % ( config.learning_C, max_correct) else: if isinstance(learning_C, list): config.learning_C = learning_C[0] else: config.learning_C = learning_C learning_wrap = LearningWrapper(kernel) if pool != None: results = pool.map( learning_wrap, itertools.product(partitions.evaluation, [config.learning_C])) else: results = map( learning_wrap, itertools.product(partitions.evaluation, [config.learning_C])) learning = Learning(config, [result for (C, result) in results]) if args['outfile'] == None: learning_filename = KernelLearning.get_learning_filename(config) else: learning_filename = args['outfile'] correct = learning.get_average_correct() print "%s correct %2.2f%% error %2.2f%% classes %s" % ( "KernelLearning:", correct * 100.0, (1.0 - correct) * 100.0, len(set([s.max_label() for s in kernel.segment_info]))) print "Writing %s" % (learning_filename, ) learning.config.status = "KernelLearning" save_data(learning_filename, learning.toJSONDict())
def __init__(self, argv): wx.Frame.__init__(self, None, -1, 'Segment Size', size=(550, 350)) parser = argparse.ArgumentParser( description= "utility to graph success levels for learning over a single configuration parameter" ) parser.add_argument('--label', '-l') parser.add_argument('files', metavar='FILE', nargs='*') self.args = vars(parser.parse_args(argv[1:])) self.files = self.args['files'] self.filedict = [] # load in the data files for f in self.files: learning = Learning.fromJSONDict( load_data(f, 'learning', None, None, argv[0] + ": ")) correct = [] for result in learning.results: num_correct = reduce( (lambda s, (t0, t1): s + 1 if t0 == t1 else s), zip(result['test_labels'], result['test_results']), 0) correct.append( float(num_correct) / float(len(result['test_labels']))) print "file %s correct %0.2f%%" % (f, numpy.average(correct) * 100.0) self.filedict.append( dict([('file', f), ('correct', numpy.average(correct)), ('config', learning.config)])) if "PersistenceKernelLearning" in f: self.filedict[-1][ 'label'] = "Persistence Kernel " + learning.config.data_index if learning.config.post_process != None: self.filedict[-1]['label'] = self.filedict[-1][ 'label'] + " " + learning.config.post_process elif "AverageKernelLearning" in f: self.filedict[-1]['label'] = "Average Kernel" if learning.config.post_process != None: self.filedict[-1]['label'] = self.filedict[-1][ 'label'] + " " + learning.config.post_process elif "ChaoticInvariantFeaturesLearning" in f: self.filedict[-1]['label'] = " Chaotic Invariant Features" elif "ScaleSpaceSimilarityLearning" in f: self.filedict[-1]['label'] = "Scale Space Similarity" elif "EuclideanDistancesLearning" in f: self.filedict[-1]['label'] = "Euclidean Distance" if (len(correct) > 1): self.filedict[-1]['correct_std'] = numpy.std(correct) self.filedict[-1]['correct_top'] = numpy.percentile( correct, 0.75) self.filedict[-1]['correct_bot'] = numpy.percentile( correct, 0.25) else: self.filedict[-1]['correct_std'] = 0.0 self.filedict[-1]['correct_top'] = 0.0 self.filedict[-1]['correct_bot'] = 0.0 self.SetBackgroundColour(wx.NamedColour("WHITE")) self.figure = Figure() self.axes = self.figure.add_subplot(211) self.canvas = FigureCanvas(self, -1, self.figure) self.sizer = wx.BoxSizer(wx.VERTICAL) self.sizer.Add(NavigationToolbar2Wx(self.canvas), 1, wx.LEFT | wx.TOP | wx.GROW) self.sizer.Add(self.canvas, 8, wx.LEFT | wx.TOP | wx.GROW) self.SetSizer(self.sizer) self.title = self.figure.suptitle("") self.Fit() self.background = self.axes.figure.canvas.copy_from_bbox( self.axes.bbox) self.colors = [ '#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#b3de69', '#fb9a99', '#e31a1c', '#fb8072', '#ff7f00', '#a65628', '#fdb462', '#cab2d6' ] self.Bind(wx.EVT_PAINT, self.OnPaint) self.Bind(wx.EVT_KEY_UP, self.KeyEvent) self.point_Refresh()
def __init__(self, argv): wx.Frame.__init__(self,None,-1, 'UCR 2015 Learning Results',size=(550,350)) parser = argparse.ArgumentParser(description="utility to graph success levels for learning on the UCR Dataset") parser.add_argument('-d', '--directory', help='Directory where the learning results are stored', required=False) parser.add_argument('files', metavar='FILE', nargs='*') self.args = vars(parser.parse_args(argv[1:])) if self.args['directory'] != None and len(self.args['files']) != 0 : print "Ignoring files after the directory argument" elif self.args['directory'] == None and len(self.args['files']) == 0 : parser.print_help() sys.exit() if self.args['directory'] != None : if os.path.isdir(self.args['directory']) : self.files = subprocess.check_output(["find", self.args['directory'], "-name", "*Learning.json*"]) self.files = [f for f in self.files.split("\n") if f != ''] else : parser.print_help() sys.exit() else : self.files = self.args['files'] def learning_type(text) : fields = text.split('-') t = fields[-1].split('.')[0][:-len('Learning')] try : if t == 'PersistenceKernel' or t == 'ScaleSpaceSimilarity' or t == 'MultipleKernel' or t == 'AverageKernel': if 'ChaosPost' in fields : w = '-Chaos' else : w = '-' + fields[fields.index('win')+1] else : w = '' except ValueError : w = '' return t + w self.learning_types = list(set([learning_type(f) for f in self.files])) self.learning_types.sort() datasets = list(set([f.split('/')[-2] for f in self.files])) datasets.sort() self.filedict = dict([(s, dict([(t,0) for t in self.learning_types])) for s in datasets]) # load in the data files for f in self.files : learning_t = learning_type(f) dataset = f.split('/')[-2] learning_json = load_data(f, "learning", None, None, None) learning = Learning.fromJSONDict(learning_json) best = self.filedict[dataset][learning_t] current = learning.get_average_correct() if (isinstance(best, list) and current > sum(best)) or (not isinstance(best, list) and current > best) : if learning.results[0].mkl_weights != None : self.filedict[dataset][learning_t] = [current * w for w in learning.results[0].mkl_weights] else : self.filedict[dataset][learning_t] = current keylen = max([len(key) for (key,val) in self.filedict.items() ]) format = '%'+str(keylen)+'s %s' for (key, val) in self.filedict.items() : vals = [("%s %02.2f%%" % (k,v*100.0 if not isinstance(v, list) else sum(v) * 100.0)) + ((" " + str(["%02.2f%%" % v_ for v_ in v])) if isinstance(v,list) else "") for (k,v) in val.items()] vals.sort() print format % (key, vals) self.frame = 0 self.SetBackgroundColour(wx.NamedColour("WHITE")) self.figure = Figure() self.axes = self.figure.add_subplot(121) plot_keys = self.filedict.items()[0][1].keys() dataset_width = len(plot_keys) + 1.5 self.axes.set_xticks([(0.5 + i) * dataset_width for i in range(len(self.filedict.items()))]) self.axes.set_xticklabels([key for (key,value) in self.filedict.items()]) self.axes.set_ylim(0.0,1.0, auto=False) self.canvas = FigureCanvas(self, -1, self.figure) self.sizer = wx.BoxSizer(wx.VERTICAL) self.sizer.Add(self.canvas, 1, wx.LEFT | wx.TOP | wx.GROW) self.SetSizer(self.sizer) self.title = self.figure.suptitle("UCR Learning") #self.Fit() self.background = self.axes.figure.canvas.copy_from_bbox(self.axes.bbox) self.colors = ['black', 'red', 'yellow', 'orange', 'blue', 'green', 'violet'] self.Bind(wx.EVT_PAINT, self.OnPaint) self.Bind(wx.EVT_KEY_UP, self.KeyEvent) self.point_Refresh()
#This program is distributed in the hope that it will be useful, #but WITHOUT ANY WARRANTY; without even the implied warranty of #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #GNU General Public License for more details. # #You should have received a copy of the GNU General Public License #along with this program. If not, see <http://www.gnu.org/licenses/>. import sys import numpy from persistence.Datatypes.JSONObject import load_data from persistence.Datatypes.Segments import Segments data = [] for arg in sys.argv[1:] : segments = Segments.fromJSONDict(load_data(arg, "segments", None, None, sys.argv[0] + ": ")) taus = [int(s.tau) for s in segments.segments] window_sizes = [len(s.windows[0]) for s in segments.segments] data.append([arg[arg.find('-data-')+6:arg.find('-seg-')], min(taus), max(taus), numpy.mean(taus),min(window_sizes), max(window_sizes), numpy.mean(window_sizes)]) data.sort() print data import matplotlib.pyplot as plt f = plt.figure() axes_tau = f.add_axes([0.1,0.3,0.35,0.6]) axes_tau.set_title("Time Delay") plots =[ axes_tau.bar(left=range(len(data)), height=[d[1] for d in data], bottom=0.0, width=0.8, color="#a8ddb5"), axes_tau.bar(left=range(len(data)), height=[d[2] - d[1] for d in data], bottom=[d[1] for d in data], width=0.8, color="#7bccc4"), axes_tau.bar(left=range(len(data)), height=[d[3] - d[2] for d in data], bottom=[d[2] for d in data], width=0.8, color="#4eb3d3") ]
import sys import argparse import importlib from persistence.Datatypes.JSONObject import load_data, save_data from persistence.Datatypes.Distances import Distances from persistence.Datatypes.Configuration import Configuration from persistence.Datatypes.Segments import SegmentInfo if __name__ == "__main__" : parser = argparse.ArgumentParser(description="Utility to add SegmentInfo data to a Distances file") parser.add_argument("--infile") parser.add_argument("--outfile") args = parser.parse_args(sys.argv[1:]) in_json = load_data(args.infile, "distances", None, None, sys.argv[0] + " : ") d = Distances.fromJSONDict(in_json) module = importlib.import_module('persistence.' + d.config.data_type) module_class = getattr(module, d.config.data_type) segment_filename = module_class.get_segment_filename(d.config) seg_json = load_data(segment_filename, "segments", None, None, sys.argv[0] + " : ") print segment_filename, len(seg_json['segments']) d.segment_info = [] i = 0 for segment in seg_json['segments'] : d.segment_info.append(SegmentInfo.fromJSONDict(segment)) i = i + 1 if i % 250 == 0: print segment_filename, i print "Writing %s" % (args.outfile,)
import sys import argparse import numpy from persistence.Datatypes.JSONObject import load_data from persistence.Datatypes import Learning parser = argparse.ArgumentParser(description="utility to print stats about learning JSON") parser.add_argument('--csv', action="store_true") parser.add_argument('files', metavar='FILE', nargs='*') args = vars(parser.parse_args(sys.argv[1:])) if args['csv'] : print "Data File,Learning Type,Data Index,Segment Size,Segment Stride,Window Size,Max Simplices,Kernel Scale,C,Correct,Std. Dev.,Top Quart,Bot Quart,Train,Test,Classes,Kernel Weights" for f in args['files']: try: lf_json = load_data(f, 'learning', None, None, 'learning_stats: ' if not args['csv'] else None) if lf_json == None : print "Could not load file : %s" % (f,) exit() learning = Learning.Learning.fromJSONDict(lf_json) correct = [] if (isinstance(learning.config.data_file, list)): (filename, ext) = os.path.splitext(os.path.basename(learning.config.data_file[0])) import string filename = string.join(filename.split("_")[0:-1]) elif (isinstance(learning.config.data_file, dict)) : filename = 'placeholder' else : (filename, ext) = os.path.splitext(os.path.basename(learning.config.data_file)) learning_type = (f.split('-')[-1]).split('.')[0]
data_class = "Distances" elif "ScaleSpaceSimilarities" in file_class : file_class = "ScaleSpaceSimilarity" data_class = "Distances" elif "Kernel" in file_class : data_class = "Kernel" elif "CrossValidation" in file_class : data_class = file_class module = importlib.import_module('persistence.' + file_class) module_class = getattr(module, file_class.split('.')[-1]) module = importlib.import_module('persistence.' + file_class) data_class = getattr(module, data_class.split('.')[-1]) in_json = load_data(args.infile, "JSONObject", None, None, sys.argv[0] + " : ") in_obj = data_class.fromJSONDict(in_json) in_obj.config.out_directory = args.outdir if status != None : in_obj.config.status = status if "Segments" in file_class or \ "Post" in file_class : out_file = module_class.get_segment_filename(in_obj.config, gz=False) elif "Features" in file_class : out_file = module_class.get_features_filename(in_obj.config, gz=False) elif "PersistenceDiagrams" in file_class : out_file = module_class.get_persistence_diagrams_filename(in_obj.config, gz=False) elif "Partition" in file_class : out_file = module_class.get_partition_filename(in_obj.config, gz=False) elif "Learning" in file_class : out_file = module_class.get_learning_filename(in_obj.config, gz=False)
def __init__(self, data_file, persistence_file, kernel_file): wx.Frame.__init__(self,None,-1, 'Data Visualization',size=(550,350)) self.segment_file = data_file segments_json = load_data(self.segment_file, 'segments', None, None, "explore_persistence: ") if segments_json == None: print "Could not load segment file : %s" % (self.segment_file,) exit() self.segments = Segments.fromJSONDict(segments_json) self.persistence_file = persistence_file persistence_json = load_data(self.persistence_file, 'persistence', None, None, "explore_persistence: ") if persistence_json == None : print "Could not load persistence file : %s" % (self.persistence_file,) exit() self.persistences = PD.fromJSONDict(persistence_json) self.kernel_file = kernel_file kernel_json = load_data(self.kernel_file, 'kernel', None, None, "explore_persistence: ") if kernel_json == None : print "Could not load kernel file : %s" % (self.kernel_file,) exit() self.kernel = Kernel.fromJSONDict(kernel_json) self.kernel_config = self.kernel.config self.spans = [] self.similarities = [] for segment in self.segments.segments : window_stride = segment.window_stride label = max_label(segment.labels) data = [] # We need to account for data overlap in the windows, which is not useful for this visualization for window in segment.windows : data.extend(window[0:window_stride]) data.extend(segment.windows[-1][window_stride:]) self.spans.append((label, segment.segment_start, data)) self.mins = None self.maxs = None for (l, start, xs) in self.spans : for x in xs : if self.maxs == None or x > self.maxs : self.maxs = x if self.mins == None or x < self.mins : self.mins = x self.labels = set([span[0] for span in self.spans]) self.index = 1 self.SetBackgroundColour(wx.NamedColour("WHITE")) self.figure = Figure() self.axes = self.figure.add_subplot(111) self.canvas = FigureCanvas(self, -1, self.figure) self.title = self.figure.suptitle("Data for Column %s" % (self.index,)) self.sub_figure_a = Figure() self.sub_axes_a = self.sub_figure_a.add_subplot(111) self.sub_canvas_a = FigureCanvas(self, -1, self.sub_figure_a) self.sub_title_a = self.sub_figure_a.suptitle("Data for Segment beginning at %s, label %s" % (" ", " ")) self.sub_figure_ap = Figure() self.sub_axes_ap = self.sub_figure_ap.add_subplot(111) self.sub_canvas_ap = FigureCanvas(self, -1, self.sub_figure_ap) self.sub_title_ap = self.sub_figure_ap.suptitle("Persistence for Segment beginning at %s, label %s" % (" ", " ")) self.sub_figure_b = Figure() self.sub_axes_b = self.sub_figure_b.add_subplot(111) self.sub_canvas_b = FigureCanvas(self, -1, self.sub_figure_b) self.sub_title_b = self.sub_figure_b.suptitle("Data for Segment beginning at %s, label %s" % (" ", " ")) self.sub_figure_bp = Figure() self.sub_axes_bp = self.sub_figure_bp.add_subplot(111) self.sub_canvas_bp = FigureCanvas(self, -1, self.sub_figure_bp) self.sub_title_bp = self.sub_figure_bp.suptitle("Persistence for Segment beginning at %s, label %s" % (" ", " ")) self.click_cid_down = self.canvas.mpl_connect('button_press_event', self.mouseDown) self.click_cid_up = self.canvas.mpl_connect('button_release_event', self.mouseUp) self.click_cid_move = self.canvas.mpl_connect('motion_notify_event', self.mouseMove) self.sizer = wx.GridBagSizer(hgap=5, vgap=5) self.sizer.Add(NavigationToolbar2Wx(self.canvas), pos=(0,0), span=(1,2), flag=wx.EXPAND) self.sizer.AddGrowableCol(1,0) self.sizer.Add(self.canvas, pos=(1,0), span=(8,2), flag=wx.EXPAND) self.sizer.AddGrowableCol(9,0) self.sizer.Add(self.sub_canvas_a, pos=(9,0), span=(4,1), flag=wx.EXPAND) self.sizer.Add(self.sub_canvas_b, pos=(9,1), span=(4,1), flag=wx.EXPAND) self.sizer.AddGrowableCol(13,0) self.sizer.Add(self.sub_canvas_ap, pos=(13,0), span=(4,1), flag=wx.EXPAND) self.sizer.Add(self.sub_canvas_bp, pos=(13,1), span=(4,1), flag=wx.EXPAND) self.SetSizer(self.sizer) self.caption = self.figure.text(0.15, 0.8, "%s Samples Read" % (\ reduce((lambda x,y: x+y),[len(span[2]) for span in self.spans], 0))) self.caption.set_backgroundcolor('#ffffff') self.Fit() self.background = self.axes.figure.canvas.copy_from_bbox(self.axes.bbox) self.Bind(wx.EVT_PAINT, self.OnPaint) self.Bind(wx.EVT_KEY_UP, self.KeyEvent) self.click = 0 self.index_a = None self.index_b = None self.point_Refresh() self.state = (None, 0)
#You should have received a copy of the GNU General Public License #along with this program. If not, see <http://www.gnu.org/licenses/>. import sys import argparse import importlib from persistence.Datatypes.JSONObject import load_data, save_data from persistence.Datatypes.PersistenceDiagrams import PersistenceDiagrams, PersistenceDiagram from persistence.Datatypes.Configuration import Configuration from persistence.Datatypes.Segments import SegmentInfo if __name__ == "__main__" : parser = argparse.ArgumentParser(description="Utility to add SegmentInfo data to a PersistenceDiagrams file") parser.add_argument("--infile") parser.add_argument("--outfile") args = parser.parse_args(sys.argv[1:]) in_json = load_data(args.infile, "persistence diagrams", None, None, sys.argv[0] + " : ") pd = PersistenceDiagrams.fromJSONDict(in_json) module = importlib.import_module('persistence.' + pd.config.data_type) module_class = getattr(module, pd.config.data_type) segment_filename = module_class.get_segment_filename(pd.config) seg_json = load_data(segment_filename, "segments", None, None, sys.argv[0] + " : ") for (diagram, segment) in zip(pd.diagrams, seg_json['segments']) : diagram.segment_info = SegmentInfo.fromJSONDict(segment) print "Writing %s" % (args.outfile,) save_data(args.outfile, pd.toJSONDict())
parser.add_argument('--max-segment-size', default=1000, type=int) parser.add_argument('--segment-size-step', default=20, type=int) parser.add_argument('--window-size', default=40, type=int) parser.add_argument('--samples', default=10, type=int) parser.add_argument('--sample-at', default=None) parser.add_argument('--wasserstein', action='store_true') parser.add_argument('--pool', default=multiprocessing.cpu_count()-2, type=int) args = parser.parse_args(sys.argv[1:]) if args.pool > 1 : pool = multiprocessing.Pool(args.pool) else : pool = itertools data = [] if args.segments != None : segments_json = load_data(args.segments, 'segments', None, None, sys.argv[0] + ": ") segments = Segments.fromJSONDict(segments_json) args.type = segments.config.data_type config = segments.config config.window_size = args.window_size config.window_stride = 1 for segment in segments.segments : point_len = len(segment.windows[0]) / segment.segment_size this_data = [(segment.windows[0][i:i+point_len], segment.filename) for i in range(0, len(segment.windows[0]), point_len)] data.extend(this_data) else : config = Configuration.fromJSONDict(dict([('data_file', args.infile), ('data_type', args.type), ('window_size', args.window_size), ('window_stride', 1),
def main(argv): parser = argparse.ArgumentParser(description='Tool to generate a similarity kernel from persistence data') parser.add_argument('-i', '--infile', help='Input JSON Similarity Kernel file') parser.add_argument('-o', '--outfile', help='Output JSON Learning file') parser.add_argument('-p', '--pool', default=multiprocessing.cpu_count(), help='Threads of computation to use') parser.add_argument('-c', '--learning-C', help='C value for SVM. Specify a range for 1-dimensional cross-validation') parser.add_argument('-t', '--train-test-partitions', help='Precomputed train / test partitions') args = vars(parser.parse_args(argv[1:])) kf_json = load_data(args['infile'], 'kernel', None, None, "KernelLearning: ") if kf_json == None : print "Could not load Kernel from %s" % (args['infile'],) sys.exit(1) kernel = Kernel.fromJSONDict(kf_json) config = kernel.config segment_info = kernel.segment_info if (int(args['pool']) > 1) : pool = multiprocessing.Pool(int(args['pool'])) else : pool = None if (args['learning_C'] != None) : learning_C = parse_range(args['learning_C'], t=float) if not isinstance(learning_C,list) : learning_C = [learning_C] elif not isinstance(learning_C,list) : learning_C = [config.learning_C] else : learning_C = config.learning_C if (args['train_test_partitions'] != None) : partitions_json = load_data(args['train_test_partitions'], 'partitions', None, None, "KernelLearning: ") if partitions_json == None : print "Could not load Train / Test Partitions from %s" % (args['train_test_partitions'],) sys.exit(1) partitions = TrainTestPartitions.fromJSONDict(partitions_json) else : partitions = generate_partitions(config, segment_info, cv_iterations=5 if (len(learning_C) > 1) else 0) if isinstance(learning_C, list) and len(learning_C) > 1 and len(partitions.cross_validation) > 0 : num_cv = len(partitions.cross_validation) learning_wrap = LearningWrapper( kernel ) if pool != None : results = pool.map(learning_wrap, itertools.product(partitions.cross_validation, learning_C)) else : results = map(learning_wrap, itertools.product(partitions.cross_validation, learning_C)) max_correct = 0.0 best_C = learning_C[0] results = list(results) print len(results) for C in learning_C : correct = Learning(config, [_result for (_C, _result) in results if C == _C]).get_average_correct() if correct > max_correct : best_C = C max_correct = correct config.learning_C = best_C print "KernelLearning: using C = %s, correct = %s" % (config.learning_C, max_correct) else : if isinstance(learning_C, list) : config.learning_C = learning_C[0] else : config.learning_C = learning_C learning_wrap = LearningWrapper( kernel ) if pool != None : results = pool.map(learning_wrap, itertools.product(partitions.evaluation, [config.learning_C])) else : results = map(learning_wrap, itertools.product(partitions.evaluation, [config.learning_C])) learning = Learning(config, [result for (C,result) in results]) if args['outfile'] == None : learning_filename = KernelLearning.get_learning_filename(config) else : learning_filename = args['outfile'] correct = learning.get_average_correct() print "%s correct %2.2f%% error %2.2f%% classes %s" % ("KernelLearning:", correct * 100.0, (1.0 - correct)*100.0, len(set([s.max_label() for s in kernel.segment_info]))) print "Writing %s" % (learning_filename, ) learning.config.status = "KernelLearning" save_data(learning_filename, learning.toJSONDict())