Beispiel #1
0
def main(argv):
    current_dir = os.getcwd()
    parser = argparse.ArgumentParser(
        description="utility to plot distances as a 2-d plot")
    parser.add_argument("-b", "--bottleneck-distances")
    parser.add_argument("-w", "--wasserstein-distances")

    args = parser.parse_args(argv[1:])
    print args.bottleneck_distances
    print args.wasserstein_distances
    distances = [
        Distances.fromJSONDict(
            load_data(args.bottleneck_distances, "distances", None, None,
                      argv[0] + ": ")),
        Distances.fromJSONDict(
            load_data(args.wasserstein_distances, "distances", None, None,
                      argv[0] + ": "))
    ]
    segment_info = distances[0].segment_info
    processes = []
    try:
        display_thread = \
          multiprocessing.Process(target=display,
                                  args=(distances,))
        display_thread.start()
        processes.append(display_thread)
        display_thread.join()
    except KeyboardInterrupt:
        print "Caught cntl-c, shutting down..."
        exit(0)
Beispiel #2
0
def main(argv) :
    parser = argparse.ArgumentParser(description="General purpose cross validation tool")
    parser.add_argument("--kernel-module", "-K")
    parser.add_argument("--kernel-arg", "-k")
    parser.add_argument("--distances-module", "-D")
    parser.add_argument("--distances-arg", "-d")
    parser.add_argument("--learning-module", "-L")
    parser.add_argument("--learning-arg", "-l")
    parser.add_argument("--infile", "-i")
    parser.add_argument("--outfile", "-o")
    parser.add_argument("--train-test-partitions", "-t")
    parser.add_argument("--pool", "-p", type=int, default=max(1,multiprocessing.cpu_count()-2))
    parser.add_argument("--timeout", type=int, default=0)
    args = parser.parse_args(argv[1:])
    input_json = load_data(args.infile, "input", None, None, argv[0] + ":")
    partitions_json = load_data(args.train_test_partitions, "input", None, None, argv[0] + ":")
    partitions = TrainTestPartitions.fromJSONDict(partitions_json)
    if args.pool > 1 :
        pool = multiprocessing.Pool(args.pool)
    else :
        pool = None
    
    if args.kernel_arg != None :
        kernel_arg = parse_range(args.kernel_arg, t=float)
    else :
        kernel_arg = None

    if args.distances_arg != None :
        distances_arg = parse_range(args.distances_arg, t=float)
    else :
        distances_arg = None

    if args.learning_arg != None :
        learning_arg = parse_range(args.learning_arg, t=float)
    else :
        learning_arg = None

    print "Kernel %s distance %s learning %s" % (kernel_arg, distances_arg, learning_arg)
    cv = CrossValidation(input_json, 
                         config=Configuration.fromJSONDict(input_json['config']),
                         kernel_module=args.kernel_module, 
                         kernel_arg=kernel_arg, 
                         distances_module=args.distances_module, 
                         distances_arg=distances_arg, 
                         learning_module=args.learning_module, 
                         learning_arg=learning_arg, 
                         partitions=partitions, 
                         pool=pool,
                         timeout=args.timeout)
    cv.cross_validate()
    
    if args.outfile == None :
        args.outfile = CrossValidation.get_cross_validation_filename(cv.config)
    
    print "Writing %s" % args.outfile
    save_data(args.outfile, cv.toJSONDict())
Beispiel #3
0
    def __init__(self, argv):
        wx.Frame.__init__(self,None,-1,
                         'Segment Size',size=(550,350))
        parser = argparse.ArgumentParser(description="utility to plot a persistence diagram")
        parser.add_argument('file')
        self.args = vars(parser.parse_args(argv[1:]))
        self.file = self.args['file']
        pf_json = load_data(self.file, 'persistence', None, None, None)
        if pf_json == None :
            print "Could not load persistence file : %s" % (self.args['file'],)
            exit()
        self.persistences = Persistences.fromJSONDict(pf_json)
        
        self.SetBackgroundColour(wx.NamedColour("WHITE"))
        self.figure = Figure()
        self.axes = self.figure.add_subplot(111)
        
        self.canvas = FigureCanvas(self, -1, self.figure)

        self.sizer = wx.BoxSizer(wx.VERTICAL)
        self.sizer.Add(NavigationToolbar2Wx(self.canvas), 1, wx.LEFT | wx.TOP | wx.GROW)
        self.sizer.Add(self.canvas, 8, wx.LEFT | wx.TOP | wx.GROW)
        self.SetSizer(self.sizer)
        self.Fit()
        self.background = self.axes.figure.canvas.copy_from_bbox(self.axes.bbox)
        self.colors = ['black', 'red', 'yellow', 'orange', 'blue', 'green', 'violet']
        self.Bind(wx.EVT_PAINT, self.OnPaint)
        self.Bind(wx.EVT_KEY_UP, self.KeyEvent)
        self.index = 0
        self.point_Refresh()
Beispiel #4
0
def main(argv) :
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--input', help="Precomputed analysis")

    args = parser.parse_args(argv[1:])

    data_json = load_data(args.input, None, None, None, argv[0]+ ": ")
    if data_json == None :
        print "Could not load --input : %s" % (args.input,)
        exit()

    params    = [ [dict([('segment_start', d['segment_start']),
                         ('segment_size', d['segment_size']),
                         ('max_simplices', d['max_simplices']),
                         ('runtime', d['runtime'])])
                   for d in sample_data[0]]
                  for sample_data in data_json ]

    diagrams  = [ [PersistenceDiagram.fromJSONDict(d['diagram']) for d in sample_data[0]] for sample_data in data_json ]

    bottleneck_distances  = [ [[d['mean'] for d in row] for row in sample_data[1]] \
                              for sample_data in data_json ]
    wasserstein_distances = [ [[d['mean'] for d in row] for row in sample_data[2]] \
                              for sample_data in data_json ]
    try:
        app = App(0, params, diagrams, bottleneck_distances, wasserstein_distances)
        app.MainLoop()
    except KeyboardInterrupt:
        sys.exit(0)
def main(argv) :
    parser = argparse.ArgumentParser(description="utility to plot data and persistence diagrams. Also plots the 5 \
    nearest neighbors to the selected segment")
    parser.add_argument('-p', '--persistences', help="Precomputed persistence diagram")
    parser.add_argument('-k', '--kNN', default=3, help="number of nearest neighbors to plot")
    args = parser.parse_args(argv[1:])

    persistences_json = load_data(args.persistences, 'persistences', None, None, argv[0])
    if persistences_json == None :
        print "Could not load --persistences : %s" % (args.persistences,)
        exit()
    persistences = PD.fromJSONDict(persistences_json)
    full_config = copy(persistences.config)
    full_config.window_size = -1
    segments_module = importlib.import_module( 'persistence.' + persistences.config.data_type)    
    segments_class = getattr(segments_module, persistences.config.data_type) 

    full_data = segments_class(full_config)
    window_config = copy(persistences.config)
    windowed_data = segments_class(window_config)

    try:
        app = App(0, full_data, windowed_data, persistences, int(args.kNN))
        app.MainLoop()
    except KeyboardInterrupt:
        sys.exit(0)
Beispiel #6
0
def main(argv):
    parser = argparse.ArgumentParser(
        description=
        "utility to plot data and persistence diagrams. Also plots the 5 \
    nearest neighbors to the selected segment")
    parser.add_argument('-p',
                        '--persistences',
                        help="Precomputed persistence diagram")
    parser.add_argument('-k',
                        '--kNN',
                        default=3,
                        help="number of nearest neighbors to plot")
    args = parser.parse_args(argv[1:])

    persistences_json = load_data(args.persistences, 'persistences', None,
                                  None, argv[0])
    if persistences_json == None:
        print "Could not load --persistences : %s" % (args.persistences, )
        exit()
    persistences = PD.fromJSONDict(persistences_json)
    full_config = copy(persistences.config)
    full_config.window_size = -1
    segments_module = importlib.import_module('persistence.' +
                                              persistences.config.data_type)
    segments_class = getattr(segments_module, persistences.config.data_type)

    full_data = segments_class(full_config)
    window_config = copy(persistences.config)
    windowed_data = segments_class(window_config)

    try:
        app = App(0, full_data, windowed_data, persistences, int(args.kNN))
        app.MainLoop()
    except KeyboardInterrupt:
        sys.exit(0)
def main(argv) :
    parser = argparse.ArgumentParser(description="utility to plot \
    the density of all persistence diagrams in a file.")
    parser.add_argument("-i", "--infile")    
    parser.add_argument("-d", "--degree", type=int)
    parser.add_argument("-l", "--label", help="Show only persistence diagrams of a particular label")
    args = parser.parse_args(argv[1:])
    if args.infile == None or not os.path.isfile(args.infile) :
        print "%s : --infile (%s) must specify file that exists" % \
            (argv[0], args.infile)
        sys.exit(0)

    persistence_json = load_data(args.infile, "persistence_diagrams", None, None, argv[0] + " : ")
    if persistence_json == None :
        print "Could not load --infile : %s" % (args.persistence_a,)
        exit()
    persistence = PD.fromJSONDict(persistence_json)
    labels = list(set([d.segment_info.max_label() for d in persistence.diagrams]))
    labels.sort()
    if args.label != None :
        diagrams = [d for d in persistence.diagrams if d.segment_info.max_label() == args.label]
        persistence.diagrams = diagrams
    else :
        print "Labels : %s" % labels

    try:
        app = App(0, 
                  persistence, 
                  args.degree)
        app.MainLoop()
    except KeyboardInterrupt:
        sys.exit(0)
Beispiel #8
0
def main(argv):
    current_dir = os.getcwd()
    parser = argparse.ArgumentParser(
        description="utility to plot distance persistence diagram")
    parser.add_argument("-d", "--distances")

    args = parser.parse_args(argv[1:])
    if args.distances != None:
        distances = Distances.fromJSONDict(
            load_data(args.distances, "distances", None, None, argv[0] + ": "))
        segment_info = distances.segment_info
    else:
        print "You must supply a distances filename"
        sys.exit(1)

    processes = []
    try:
        display_thread = \
          multiprocessing.Process(target=display,
                                  args=([distances.distances[i][i+1] for i in range(len(distances.distances)-1)], segment_info))
        display_thread.start()
        processes.append(display_thread)
        display_thread.join()
    except KeyboardInterrupt:
        print "Caught cntl-c, shutting down..."
        exit(0)
    def __init__(self, argv):
        wx.Frame.__init__(self,None,-1,
                         'Segment Size',size=(550,350))
        parser = argparse.ArgumentParser(description="utility to graph success levels for learning over a single configuration parameter")
        parser.add_argument('--label', '-l')
        parser.add_argument('files', metavar='FILE', nargs='*')
        self.args = vars(parser.parse_args(argv[1:]))
        self.files = self.args['files']

        self.filedict = []
        # load in the data files
        for f in self.files :
            learning = Learning.fromJSONDict(load_data(f, 'learning', None, None, argv[0] + ": "))
            correct = []
            for result in learning.results : 
                num_correct = reduce((lambda s, (t0, t1) : s + 1 if t0 == t1 else s), 
                                     zip(result['test_labels'], result['test_results']), 0)
                correct.append(float(num_correct) / float(len(result['test_labels'])))
            print "file %s correct %0.2f%%" % (f, numpy.average(correct)*100.0)
            self.filedict.append(dict([('file', f), ('correct', numpy.average(correct)), ('config', learning.config)]))
            if "PersistenceKernelLearning" in f :
                self.filedict[-1]['label'] = "Persistence Kernel " + learning.config.data_index
                if learning.config.post_process != None :
                    self.filedict[-1]['label'] = self.filedict[-1]['label'] + " " + learning.config.post_process
            elif "AverageKernelLearning" in f :
                self.filedict[-1]['label'] = "Average Kernel"
                if learning.config.post_process != None :
                    self.filedict[-1]['label'] = self.filedict[-1]['label'] + " " + learning.config.post_process
            elif "ChaoticInvariantFeaturesLearning" in f :
                self.filedict[-1]['label'] = " Chaotic Invariant Features"
            elif "ScaleSpaceSimilarityLearning" in f :
                self.filedict[-1]['label'] = "Scale Space Similarity"
            elif "EuclideanDistancesLearning" in f :
                self.filedict[-1]['label'] = "Euclidean Distance"
            if (len(correct) > 1) :
                self.filedict[-1]['correct_std'] = numpy.std(correct)
                self.filedict[-1]['correct_top'] = numpy.percentile(correct, 0.75)
                self.filedict[-1]['correct_bot'] = numpy.percentile(correct, 0.25)
            else :
                self.filedict[-1]['correct_std'] = 0.0
                self.filedict[-1]['correct_top'] = 0.0
                self.filedict[-1]['correct_bot'] = 0.0
        
        self.SetBackgroundColour(wx.NamedColour("WHITE"))
        self.figure = Figure()
        self.axes = self.figure.add_subplot(211)
        
        self.canvas = FigureCanvas(self, -1, self.figure)

        self.sizer = wx.BoxSizer(wx.VERTICAL)
        self.sizer.Add(NavigationToolbar2Wx(self.canvas), 1, wx.LEFT | wx.TOP | wx.GROW)
        self.sizer.Add(self.canvas, 8, wx.LEFT | wx.TOP | wx.GROW)
        self.SetSizer(self.sizer)
        self.title = self.figure.suptitle("")
        self.Fit()
        self.background = self.axes.figure.canvas.copy_from_bbox(self.axes.bbox)
        self.colors = ['#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#b3de69', '#fb9a99', '#e31a1c', '#fb8072', '#ff7f00', '#a65628', '#fdb462', '#cab2d6']
        self.Bind(wx.EVT_PAINT, self.OnPaint)
        self.Bind(wx.EVT_KEY_UP, self.KeyEvent)
        self.point_Refresh()
 def __init__(self, argv):
     wx.Frame.__init__(self,None,-1,
                      'Segment Size',size=(550,350))
     parser = argparse.ArgumentParser(description="utility to plot multiple persistence diagrams")
     parser.add_argument('files', nargs="*")
     self.args = vars(parser.parse_args(argv[1:]))
     self.files = self.args['files']
     self.persistences = []
     for f in self.files :
         pf_json = load_data(f, 'persistence', None, None, None)
         if pf_json == None :
             print "Could not load persistence file : %s" % (f,)
             sys.exit(1)
         self.persistences.append(PD.fromJSONDict(pf_json))
     
     self.SetBackgroundColour(wx.NamedColour("WHITE"))
     self.displays = []
     self.sizer = wx.BoxSizer(wx.VERTICAL)
     for f in self.files :
         self.displays.append(dict([('figure', Figure())]))
         self.displays[-1]['axes'] = self.displays[-1]['figure'].add_subplot(111)
         self.displays[-1]['canvas'] = FigureCanvas(self, -1, self.displays[-1]['figure'])
         self.sizer.Add(NavigationToolbar2Wx(self.displays[-1]['canvas']), 1, wx.LEFT | wx.TOP | wx.GROW)
         self.sizer.Add(self.displays[-1]['canvas'], 8, wx.LEFT | wx.TOP | wx.GROW)
     self.SetSizer(self.sizer)
     self.Fit()
     self.background = self.displays[0]['axes'].figure.canvas.copy_from_bbox(self.displays[0]['axes'].bbox)
     self.colors = ['red', 'yellow', 'orange', 'blue', 'green', 'violet', 'black']
     self.Bind(wx.EVT_PAINT, self.OnPaint)
     self.Bind(wx.EVT_KEY_UP, self.KeyEvent)
     self.index = 0
     self.point_Refresh()
Beispiel #11
0
def main(argv) :
    parser = argparse.ArgumentParser(description="utility to plot \
    persistence diagrams for examining full vs sparse rips filtration")
    parser.add_argument('-p', '--prefix', help="data file prefix (e.g. foo.json to plot foo.json.0000 - foo.json.9999)")
    args = parser.parse_args(argv[1:])
    files = glob.glob(args.prefix + ".[0-9][0-9][0-9][0-9]")
    files.sort()
    full_data = [load_data(fn, None, None, None, argv[0] + " : ") for fn in files]
    try:
        app = App(0, full_data)
        app.MainLoop()
    except KeyboardInterrupt:
        sys.exit(0)
Beispiel #12
0
def main(argv) :
    current_dir = os.getcwd()
    parser = argparse.ArgumentParser(description="utility to plot distances as a 2-d plot")
    parser.add_argument("-b", "--bottleneck-distances")
    parser.add_argument("-w", "--wasserstein-distances")

    args = parser.parse_args(argv[1:])
    print args.bottleneck_distances
    print args.wasserstein_distances
    distances = [Distances.fromJSONDict(load_data(args.bottleneck_distances, "distances", None, None, argv[0]+": ")),
                 Distances.fromJSONDict(load_data(args.wasserstein_distances, "distances", None, None, argv[0]+": "))]
    segment_info = distances[0].segment_info
    processes = []
    try:
        display_thread = \
          multiprocessing.Process(target=display, 
                                  args=(distances,))
        display_thread.start()
        processes.append(display_thread)
        display_thread.join()
    except KeyboardInterrupt:
        print "Caught cntl-c, shutting down..."
        exit(0)
Beispiel #13
0
def parse_configuration_files(files, verbose=True):
    if not isinstance(files, list):
        files = [files]

    final_configs = []
    for f in files:
        configs = load_data(f, "Configurations", None, None,
                            (sys.argv[0] + ": ") if verbose else None)
        if configs == None:
            sys.exit(0)
        if isinstance(configs, dict):
            configs = [configs]
        cond_parse_range = lambda x, y, t: parse_range(str(x[
            y]), t=t) if y in x.keys() else None
        cond_list = lambda x: x if isinstance(x, list) else [x]
        cond_get = lambda x, y: x[y] if y in x.keys() else None
        for config in configs:
            for (window_size, window_stride, segment_size, segment_stride, persistence_epsilon, max_simplices) in \
                itertools.product(cond_list(cond_parse_range(config, 'window_size', int)),
                                  cond_list(cond_parse_range(config, 'window_stride', int)),
                                  cond_list(cond_parse_range(config, 'segment_size', int)),
                                  cond_list(cond_parse_range(config, 'segment_stride', int)),
                                  cond_list(cond_parse_range(config, 'persistence_epsilon', float)),
                                  cond_list(cond_parse_range(config, 'max_simplices', int))) :
                final_configs.append(
                    Configuration(
                        max_simplices=max_simplices,
                        persistence_epsilon=persistence_epsilon,
                        segment_stride=segment_stride,
                        segment_size=segment_size,
                        window_size=window_size,
                        window_stride=window_stride,
                        kernel_scale=cond_get(config, 'kernel_scale'),
                        kernel_gamma=cond_get(config, 'kernel_gamma'),
                        invariant_epsilon=cond_get(config,
                                                   'invariant_epsilon'),
                        data_file=cond_get(config, 'data_file'),
                        data_index=cond_get(config, 'data_index'),
                        label_index=cond_get(config, 'label_index'),
                        out_directory=cond_get(config, 'out_directory'),
                        learning_split=cond_get(config, 'learning_split'),
                        learning_iterations=cond_get(config,
                                                     'learning_iterations'),
                        learning_C=cond_get(config, 'learning_C'),
                        persistence_degree=cond_get(config,
                                                    'persistence_degree'),
                        data_type=cond_get(config, 'data_type'),
                        post_process=cond_get(config, 'post_process'),
                        post_process_arg=cond_get(config, 'post_process_arg')))
    return final_configs
Beispiel #14
0
def main(argv) :
    parser = argparse.ArgumentParser()
    parser.add_argument('-d', '--distances')

    args = parser.parse_args(argv[1:])

    data_json = load_data(args.distances, "distances" , None, None, argv[0]+": ")
    distances = Distances.fromJSONDict(data_json)
    if data_json == None :
        print "Could not load --distances : %s" % (args.distances,)
        exit()

    try:
        app = App(0, distances)
        app.MainLoop()
    except KeyboardInterrupt:
        sys.exit(0)
def parse_configuration_files(files, verbose=True) :
    if not isinstance(files, list) :
        files = [files]

    final_configs = []
    for f in files :
        configs = load_data(f, "Configurations", None, None, (sys.argv[0] + ": ") if verbose else None)
        if configs == None :
            sys.exit(0)
        if isinstance(configs, dict) :
            configs = [configs]
        cond_parse_range = lambda x,y,t: parse_range(str(x[y]), t=t) if y in x.keys() else None
        cond_list = lambda x: x if isinstance(x,list) else [x]
        cond_get = lambda x,y: x[y] if y in x.keys() else None
        for config in configs :
            for (window_size, window_stride, segment_size, segment_stride, persistence_epsilon, max_simplices) in \
                itertools.product(cond_list(cond_parse_range(config, 'window_size', int)),
                                  cond_list(cond_parse_range(config, 'window_stride', int)),
                                  cond_list(cond_parse_range(config, 'segment_size', int)),
                                  cond_list(cond_parse_range(config, 'segment_stride', int)),
                                  cond_list(cond_parse_range(config, 'persistence_epsilon', float)),
                                  cond_list(cond_parse_range(config, 'max_simplices', int))) :
                final_configs.append(Configuration(max_simplices       = max_simplices, 
                                                   persistence_epsilon = persistence_epsilon, 
                                                   segment_stride      = segment_stride,
                                                   segment_size        = segment_size, 
                                                   window_size         = window_size, 
                                                   window_stride       = window_stride,
                                                   kernel_scale        = cond_get(config, 'kernel_scale'), 
                                                   kernel_gamma        = cond_get(config, 'kernel_gamma'), 
                                                   invariant_epsilon   = cond_get(config, 'invariant_epsilon'), 
                                                   data_file           = cond_get(config, 'data_file'), 
                                                   data_index          = cond_get(config, 'data_index'),
                                                   label_index         = cond_get(config, 'label_index'),
                                                   out_directory       = cond_get(config, 'out_directory'), 
                                                   learning_split      = cond_get(config, 'learning_split'),
                                                   learning_iterations = cond_get(config, 'learning_iterations'), 
                                                   learning_C          = cond_get(config, 'learning_C'), 
                                                   persistence_degree  = cond_get(config, 'persistence_degree'),
                                                   data_type           = cond_get(config, 'data_type'),
                                                   post_process        = cond_get(config, 'post_process'),
                                                   post_process_arg    = cond_get(config, 'post_process_arg')))
    return final_configs
    def __init__(self, argv):
        wx.Frame.__init__(self, None, -1, 'Segment Size', size=(550, 350))
        parser = argparse.ArgumentParser(
            description="utility to plot multiple persistence diagrams")
        parser.add_argument('files', nargs="*")
        self.args = vars(parser.parse_args(argv[1:]))
        self.files = self.args['files']
        self.persistences = []
        for f in self.files:
            pf_json = load_data(f, 'persistence', None, None, None)
            if pf_json == None:
                print "Could not load persistence file : %s" % (f, )
                sys.exit(1)
            self.persistences.append(PD.fromJSONDict(pf_json))

        self.SetBackgroundColour(wx.NamedColour("WHITE"))
        self.displays = []
        self.sizer = wx.BoxSizer(wx.VERTICAL)
        for f in self.files:
            self.displays.append(dict([('figure', Figure())]))
            self.displays[-1]['axes'] = self.displays[-1][
                'figure'].add_subplot(111)
            self.displays[-1]['canvas'] = FigureCanvas(
                self, -1, self.displays[-1]['figure'])
            self.sizer.Add(NavigationToolbar2Wx(self.displays[-1]['canvas']),
                           1, wx.LEFT | wx.TOP | wx.GROW)
            self.sizer.Add(self.displays[-1]['canvas'], 8,
                           wx.LEFT | wx.TOP | wx.GROW)
        self.SetSizer(self.sizer)
        self.Fit()
        self.background = self.displays[0][
            'axes'].figure.canvas.copy_from_bbox(self.displays[0]['axes'].bbox)
        self.colors = [
            'red', 'yellow', 'orange', 'blue', 'green', 'violet', 'black'
        ]
        self.Bind(wx.EVT_PAINT, self.OnPaint)
        self.Bind(wx.EVT_KEY_UP, self.KeyEvent)
        self.index = 0
        self.point_Refresh()
Beispiel #17
0
def main(argv):
    parser = argparse.ArgumentParser(description="utility to plot \
    the density of all persistence diagrams in a file.")
    parser.add_argument("-i", "--infile")
    parser.add_argument("-d", "--degree", type=int)
    parser.add_argument(
        "-l",
        "--label",
        help="Show only persistence diagrams of a particular label")
    args = parser.parse_args(argv[1:])
    if args.infile == None or not os.path.isfile(args.infile):
        print "%s : --infile (%s) must specify file that exists" % \
            (argv[0], args.infile)
        sys.exit(0)

    persistence_json = load_data(args.infile, "persistence_diagrams", None,
                                 None, argv[0] + " : ")
    if persistence_json == None:
        print "Could not load --infile : %s" % (args.persistence_a, )
        exit()
    persistence = PD.fromJSONDict(persistence_json)
    labels = list(
        set([d.segment_info.max_label() for d in persistence.diagrams]))
    labels.sort()
    if args.label != None:
        diagrams = [
            d for d in persistence.diagrams
            if d.segment_info.max_label() == args.label
        ]
        persistence.diagrams = diagrams
    else:
        print "Labels : %s" % labels

    try:
        app = App(0, persistence, args.degree)
        app.MainLoop()
    except KeyboardInterrupt:
        sys.exit(0)
Beispiel #18
0
def main(argv) :
    current_dir = os.getcwd()
    parser = argparse.ArgumentParser(description="utility to plot distance persistence diagram")
    parser.add_argument("-d", "--distances")

    args = parser.parse_args(argv[1:])
    if args.distances != None :
        distances = Distances.fromJSONDict(load_data(args.distances, "distances", None, None, argv[0]+": "))
        segment_info = distances.segment_info
    else :
        print "You must supply a distances filename"
        sys.exit(1)

    processes = []
    try:
        display_thread = \
          multiprocessing.Process(target=display, 
                                  args=([distances.distances[i][i+1] for i in range(len(distances.distances)-1)], segment_info))
        display_thread.start()
        processes.append(display_thread)
        display_thread.join()
    except KeyboardInterrupt:
        print "Caught cntl-c, shutting down..."
        exit(0)
Beispiel #19
0
    def __init__(self, argv):
        wx.Frame.__init__(self,
                          None,
                          -1,
                          'UCR 2015 Learning Results',
                          size=(550, 350))
        parser = argparse.ArgumentParser(
            description=
            "utility to graph success levels for learning on the UCR Dataset")
        parser.add_argument(
            '-d',
            '--directory',
            help='Directory where the learning results are stored',
            required=False)
        parser.add_argument('files', metavar='FILE', nargs='*')
        self.args = vars(parser.parse_args(argv[1:]))
        if self.args['directory'] != None and len(self.args['files']) != 0:
            print "Ignoring files after the directory argument"
        elif self.args['directory'] == None and len(self.args['files']) == 0:
            parser.print_help()
            sys.exit()

        if self.args['directory'] != None:
            if os.path.isdir(self.args['directory']):
                self.files = subprocess.check_output([
                    "find", self.args['directory'], "-name", "*Learning.json*"
                ])
                self.files = [f for f in self.files.split("\n") if f != '']
            else:
                parser.print_help()
                sys.exit()
        else:
            self.files = self.args['files']

        def learning_type(text):
            fields = text.split('-')
            t = fields[-1].split('.')[0][:-len('Learning')]
            try:
                if t == 'PersistenceKernel' or t == 'ScaleSpaceSimilarity' or t == 'MultipleKernel' or t == 'AverageKernel':
                    if 'ChaosPost' in fields:
                        w = '-Chaos'
                    else:
                        w = '-' + fields[fields.index('win') + 1]
                else:
                    w = ''
            except ValueError:
                w = ''
            return t + w

        self.learning_types = list(set([learning_type(f) for f in self.files]))
        self.learning_types.sort()
        datasets = list(set([f.split('/')[-2] for f in self.files]))
        datasets.sort()

        self.filedict = dict([(s, dict([(t, 0) for t in self.learning_types]))
                              for s in datasets])

        # load in the data files
        for f in self.files:
            learning_t = learning_type(f)
            dataset = f.split('/')[-2]
            learning_json = load_data(f, "learning", None, None, None)
            learning = Learning.fromJSONDict(learning_json)
            best = self.filedict[dataset][learning_t]
            current = learning.get_average_correct()
            if (isinstance(best, list)
                    and current > sum(best)) or (not isinstance(best, list)
                                                 and current > best):
                if learning.results[0].mkl_weights != None:
                    self.filedict[dataset][learning_t] = [
                        current * w for w in learning.results[0].mkl_weights
                    ]
                else:
                    self.filedict[dataset][learning_t] = current

        keylen = max([len(key) for (key, val) in self.filedict.items()])
        format = '%' + str(keylen) + 's %s'
        for (key, val) in self.filedict.items():
            vals = [
                ("%s %02.2f%%" %
                 (k, v * 100.0 if not isinstance(v, list) else sum(v) * 100.0))
                + ((" " + str(["%02.2f%%" % v_
                               for v_ in v])) if isinstance(v, list) else "")
                for (k, v) in val.items()
            ]
            vals.sort()
            print format % (key, vals)

        self.frame = 0
        self.SetBackgroundColour(wx.NamedColour("WHITE"))
        self.figure = Figure()
        self.axes = self.figure.add_subplot(121)
        plot_keys = self.filedict.items()[0][1].keys()
        dataset_width = len(plot_keys) + 1.5
        self.axes.set_xticks([(0.5 + i) * dataset_width
                              for i in range(len(self.filedict.items()))])
        self.axes.set_xticklabels(
            [key for (key, value) in self.filedict.items()])
        self.axes.set_ylim(0.0, 1.0, auto=False)

        self.canvas = FigureCanvas(self, -1, self.figure)

        self.sizer = wx.BoxSizer(wx.VERTICAL)
        self.sizer.Add(self.canvas, 1, wx.LEFT | wx.TOP | wx.GROW)
        self.SetSizer(self.sizer)
        self.title = self.figure.suptitle("UCR Learning")
        #self.Fit()
        self.background = self.axes.figure.canvas.copy_from_bbox(
            self.axes.bbox)
        self.colors = [
            'black', 'red', 'yellow', 'orange', 'blue', 'green', 'violet'
        ]
        self.Bind(wx.EVT_PAINT, self.OnPaint)
        self.Bind(wx.EVT_KEY_UP, self.KeyEvent)
        self.point_Refresh()
Beispiel #20
0
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import SymLogNorm
from persistence.Datatypes.JSONObject import load_data
from persistence.Datatypes.PersistenceDiagrams import PersistenceDiagrams, PersistenceDiagram
from scipy import stats

parser = argparse.ArgumentParser()
parser.add_argument("-i", "--infile")
parser.add_argument("-d", "--degree", type=int)

parser.add_argument("-p", "--pool", default=multiprocessing.cpu_count(), type=int)
args = parser.parse_args(sys.argv[1:])

persistences = PersistenceDiagrams.fromJSONDict(load_data(args.infile, "persistences", None, None, sys.argv[0] + ": "))

labels = list(set([diagram.segment_info.max_label() for diagram in persistences.diagrams]))
labels.sort()

labels = dict([('1', 'Working at Computer'),
               ('2', 'Standing Up, Walking, Going Up\Down Stairs'),
               ('3', 'Standing'),
               ('4', 'Walking'),
               ('5', 'Going Up\Down Stairs'),
               ('6', 'Walking and Talking with Someone'),
               ('7', 'Talking while Standing')])
print "Labels: '%s'" % ("', '".join(labels.values()),)
xmin = []
xmax = []
ymin = []
from persistence.Datatypes.JSONObject import load_data
from persistence.Datatypes.Configuration import Configuration
from persistence.CrossValidation import CrossValidation

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description=
        "Utility to generate kernels and learning results from a cross validation result"
    )
    parser.add_argument("--pool", "-p", type=int)
    parser.add_argument("--input", "-i")
    parser.add_argument("--cross-validation", "-v")
    parser.add_argument("--train-test-partitions", "-t")
    args = parser.parse_args(sys.argv[1:])
    print args.cross_validation
    cv_json = load_data(args.cross_validation, "cross validation", None, None,
                        sys.argv[0] + ": ")
    cv = CrossValidation.fromJSONDict(cv_json)
    config = cv.config
    print config

    if cv.kernel_module != None:
        kernel_module = importlib.import_module("persistence." +
                                                cv.kernel_module)
        kernel_class = getattr(kernel_module, cv.kernel_module)
        scale_arg = kernel_class.get_scale_arg()
        kernel_filename = kernel_class.get_kernel_filename(config)
        kernel_command = [
            "python", "-u", "-O", "-m", "persistence." + cv.kernel_module,
            "--" + scale_arg.replace("_", "-"),
            str(config[scale_arg]), "--infile", args.input
        ]
    def __init__(self, argv):
        wx.Frame.__init__(self,None,-1,
                         'Bottleneck Distance',size=(550,350))
        parser = argparse.ArgumentParser(description="Utility to plot Bottleneck Distance average of 'representative' segments for each label")
        parser.add_argument('-d','--distances')
        parser.add_argument('-l','--learning')
        parser.add_argument('-p','--partition')
        parser.add_argument('-t','--threshold', default=0.75, type=float)
        self.args = parser.parse_args(argv[1:])
        import traceback
        try :
            self.distances = \
              Distances.fromJSONDict(load_data(self.args.distances, 'distances', None, None, argv[0]+": "))
            self.learning = \
              Learning.fromJSONDict(load_data(self.args.learning, 'learning', None, None, argv[0]+": "))
            self.partitions = \
              TrainTestPartitions.fromJSONDict(load_data(self.args.partition, 'partition', None, None, argv[0]+": "))
        except :
            print "Could not parse input files: %s" % (traceback.format_exc(),)
            sys.exit(1)
        
        # Filter to only the segments that get above the threshold
        
        self.segments = []

        for i in range(len(self.distances.segment_info)) :
            c = segment_correctness(i, self.learning, self.partitions)
            if c == None or c > self.args.threshold :
                self.segments.append((i, self.distances.segment_info[i].max_label()))
        sort_format = "0" * int(math.ceil(math.log(len(self.distances.segment_info))))
        self.segments.sort(key=lambda x: str(x[1]+(("%"+sort_format+"d") % x[0])))

        self.label_index = 0
        self.labels = list(set([x[1] for x in self.segments]))
        self.labels.sort()

        self.segment_minimums = dict([(l, min([i for (i,x) in zip(range(len(self.segments)), self.segments) if x[1] == l])) \
                                       for l in self.labels])
        self.segment_maximums = dict([(l, max([i for (i,x) in zip(range(len(self.segments)), self.segments) if x[1] == l])) \
                                       for l in self.labels])

        self.segment_indices = dict([(l, min([i for (i,x) in zip(range(len(self.segments)), self.segments) if x[1] == l], key=lambda x:average_distance(x,l,self.distances))) \
                                       for l in self.labels])

        self.SetBackgroundColour(wx.NamedColour("WHITE"))
        self.figure = Figure()
        self.axes = self.figure.add_subplot(111)
        
        self.canvas = FigureCanvas(self, -1, self.figure)

        self.sizer = wx.BoxSizer(wx.VERTICAL)
        self.sizer.Add(NavigationToolbar2Wx(self.canvas), 1, wx.LEFT | wx.TOP | wx.GROW)
        self.sizer.Add(self.canvas, 8, wx.LEFT | wx.TOP | wx.GROW)
        self.SetSizer(self.sizer)
        self.background = self.axes.figure.canvas.copy_from_bbox(self.axes.bbox)
        self.colors = ['black', 'red', 'yellow', 'orange', 'blue', 'green', 'violet']
        self.Bind(wx.EVT_PAINT, self.OnPaint)
        self.Bind(wx.EVT_KEY_UP, self.KeyEvent)

        self.index = 0
        self.point_Refresh()
        self.Fit()
        self.figure.savefig(self.distances.config.out_directory + "/" + self.distances.config.out_directory.split('/')[-1] + '-win-' + str(self.distances.config.window_size) + '-best-distances.pdf')
        sys.exit(0)
import os
import sys
import math
import itertools
import multiprocessing

from persistence.Datatypes.JSONObject import load_data, save_data
from persistence.Datatypes.PersistenceDiagrams import PersistenceDiagrams, PersistenceDiagram
def avg(l) :
    return sum(l,0.0) / len(l)
def average_density(diagram) :
    points = [(p[0], p[1]) for p in diagram.points if p[2] == 1]
    if len(points) > 2 :
        diagram_distances = []
        for (x0,y0) in points :
            distances = map(lambda (x1,y1) : math.sqrt((x0 - x1) * (x0 - x1) + (x0 - x1) * (x0 - x1)), points)
            diagram_distances.append(avg(distances[1:6]))
        return avg(diagram_distances)
    else :
        return 0.0



if __name__ == "__main__" :
    pool = multiprocessing.Pool(multiprocessing.cpu_count() - 2)
    for f in sys.argv[1:] :
        pds = PersistenceDiagrams.fromJSONDict(load_data(f, None, None, None, sys.argv[0] + " : "))
        densities = pool.map(average_density, pds.diagrams)
        save_data(f + "-density", list(densities))
def main(argv) :
    parser = argparse.ArgumentParser(description="utility to plot \
    data and dynamically generated persistence diagrams. Using \
    the persistence option uses precomputed persistence and ignores all \
    the other options.")
    parser.add_argument('-i', '--infile', help="Data to read")
    parser.add_argument('-m', '--max-simplices', default=2000000,
                        type=int, help="Maximum number of simplices for persistence \
                        generation")
    parser.add_argument('-I', '--data-index', help="Index of data field for data types that require it")
    parser.add_argument('-L', '--label-index', type=int, help="Index of label field for data types that require it")
    parser.add_argument('-s', '--segment-size', type=int, help="Segment size for data types that require it")
    parser.add_argument('-S', '--segment-stride', type=int, help="Segment stride for data types that require it")
    parser.add_argument('-w', '--window-size', help="Window size for \
    persistence generation. Integer is a literal window size, float \
    between 0 and 1 is a fraction of the total Segment size")
    parser.add_argument('-p', '--persistences', help="Precomputed persistence diagram")
    parser.add_argument('-t', '--data-type', default="UCRSegments", help="Data type of the segments in infile")
    args = parser.parse_args(argv[1:])
    if args.persistences != None :
        persistences_json = load_data(args.persistences, 'persistences', None, None, argv[0])
        if persistences_json == None :
            print "Could not load --persistences : %s" % (args.persistences,)
            exit()
        persistences = PD.fromJSONDict(persistences_json)
        full_config = copy(persistences.config)
        full_config.window_size = -1
        segments_module = importlib.import_module( 'persistence.' + persistences.config.data_type)    
        segments_class = getattr(segments_module, persistences.config.data_type) 

        full_data = segments_class(full_config)
        window_config = copy(persistences.config)
        windowed_data = segments_class(window_config)
    else :
        segments_module = importlib.import_module( 'persistence.' + args.data_type)    
        segments_class = getattr(segments_module, args.data_type) 
        full_config = Configuration.fromJSONDict(dict([ ("data_type", args.data_type),
                                                        ("data_file", args.infile),
                                                        ("label_index", 0),
                                                        ("max_simplices", args.max_simplices),
                                                        ("window_size", -1),
                                                        ("window_stride", 1)]))
        if full_config.data_file.find(":") != -1 :
            full_config.data_file = full_config.data_file.split(':')
        if args.segment_size != None :
            full_config.segment_size = args.segment_size
        if args.segment_stride != None :
            full_config.segment_stride = args.segment_stride
        if args.data_index != None :
            full_config.data_index = parse_index(args.data_index)[0]
        if args.label_index != None :
            full_config.label_index = args.label_index
        full_data = segments_class(full_config)
        window_size = float(args.window_size)
        if (window_size < 1.0) :
            window_size = int(window_size * full_data.config.window_size)
        else :
            window_size = int(args.window_size)
        window_config =  Configuration.fromJSONDict(dict([ ("data_type", args.data_type),
                                                           ("data_file", full_config.data_file),
                                                           ("label_index", 0),
                                                           ("max_simplices", args.max_simplices),
                                                           ("window_size", window_size),
                                                           ("window_stride", 1)]))
        if args.segment_size != None :
            window_config.segment_size = args.segment_size
        if args.segment_stride != None :
            window_config.segment_stride = args.segment_stride
        if args.data_index != None :
            window_config.data_index = parse_index(args.data_index)[0]
        if args.label_index != None :
            window_config.label_index = args.label_index
        windowed_data = segments_class(window_config)
        persistences = PD(windowed_data.config, [None for segment in windowed_data.segments])
    try:
        app = App(0, full_data, windowed_data, persistences)
        app.MainLoop()
    except KeyboardInterrupt:
        sys.exit(0)
def main(argv):
    current_dir = os.getcwd()
    parser = argparse.ArgumentParser(
        description="utility to plot distance persistence diagram")
    parser.add_argument("-s", "--segments")
    parser.add_argument("-P", "--persistence-diagrams")
    parser.add_argument("-d", "--distances")

    parser.add_argument(
        "-t",
        "--distance-type",
        default="bottleneck",
        help="Distance Measure to use, Bottleneck or Wasserstein")
    parser.add_argument("-p",
                        "--pool",
                        default=max(2,
                                    multiprocessing.cpu_count() - 4),
                        help="Threads of computation to use",
                        type=int)
    parser.add_argument("-m", "--max-simplices", default=1000000, type=int)
    parser.add_argument("-e", "--epsilon", type=float)

    args = parser.parse_args(argv[1:])

    if args.segments != None:
        segments = Segments.fromJSONDict(
            load_data(args.segments, "segments", None, None, argv[0] + ": "))
        segment_info = [SegmentInfo.fromJSONDict(s) for s in segments.segments]
    else:
        segments = None
    if args.persistence_diagrams != None:
        diagrams = PersistenceDiagrams.fromJSONDict(
            load_data(args.persistence_diagrams, "persistence diagrams", None,
                      None, argv[0] + ": "))
        segment_info = [d.segment_info for d in diagrams.diagrams]
    else:
        diagrams = None
    if args.distances != None:
        distances = Distances.fromJSONDict(
            load_data(args.distances, "distances", None, None, argv[0] + ": "))
        segment_info = distances.segment_info
    else:
        distances = None

    distance_array = multiprocessing.Array('d', [0.0] +
                                           [-1.0 for s in segment_info[1:]])
    segment_compare = multiprocessing.Value('i', 0)
    processes = []
    try:
        compute_thread = \
          multiprocessing.Process(target=compute,
                                  args=(args.distance_type, distance_array, segment_compare, args.pool-1,
                                        args.max_simplices, args.epsilon, segments, diagrams, distances))
        compute_thread.start()
        display_thread = \
          multiprocessing.Process(target=display,
                                  args=(args.distance_type, distance_array, segment_compare, segment_info))
        display_thread.start()
        processes.append(display_thread)
        display_thread.join()
        compute_thread.join()
    except KeyboardInterrupt:
        print "Caught cntl-c, shutting down..."
        exit(0)
Beispiel #26
0
#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#GNU General Public License for more details.
#
#You should have received a copy of the GNU General Public License
#along with this program.  If not, see <http://www.gnu.org/licenses/>.

import sys
import numpy
from persistence.Datatypes.JSONObject import load_data
from persistence.Datatypes.Segments import Segments
data = []
for arg in sys.argv[1:]:
    segments = Segments.fromJSONDict(
        load_data(arg, "segments", None, None, sys.argv[0] + ": "))
    taus = [int(s.tau) for s in segments.segments]
    window_sizes = [len(s.windows[0]) for s in segments.segments]

    data.append([
        arg[arg.find('-data-') + 6:arg.find('-seg-')],
        min(taus),
        max(taus),
        numpy.mean(taus),
        min(window_sizes),
        max(window_sizes),
        numpy.mean(window_sizes)
    ])

data.sort()
print data
Beispiel #27
0
from persistence.Datatypes.JSONObject import load_data, save_data
from persistence.Datatypes.PersistenceDiagrams import PersistenceDiagrams, PersistenceDiagram


def avg(l):
    return sum(l, 0.0) / len(l)


def average_density(diagram):
    points = [(p[0], p[1]) for p in diagram.points if p[2] == 1]
    if len(points) > 2:
        diagram_distances = []
        for (x0, y0) in points:
            distances = map(
                lambda (x1, y1): math.sqrt((x0 - x1) * (x0 - x1) + (x0 - x1) *
                                           (x0 - x1)), points)
            diagram_distances.append(avg(distances[1:6]))
        return avg(diagram_distances)
    else:
        return 0.0


if __name__ == "__main__":
    pool = multiprocessing.Pool(multiprocessing.cpu_count() - 2)
    for f in sys.argv[1:]:
        pds = PersistenceDiagrams.fromJSONDict(
            load_data(f, None, None, None, sys.argv[0] + " : "))
        densities = pool.map(average_density, pds.diagrams)
        save_data(f + "-density", list(densities))
Beispiel #28
0
    elif "Distances" in file_class:
        data_class = "Distances"
    elif "ScaleSpaceSimilarities" in file_class:
        file_class = "ScaleSpaceSimilarity"
        data_class = "Distances"
    elif "Kernel" in file_class:
        data_class = "Kernel"
    elif "CrossValidation" in file_class:
        data_class = file_class

    module = importlib.import_module('persistence.' + file_class)
    module_class = getattr(module, file_class.split('.')[-1])
    module = importlib.import_module('persistence.' + file_class)
    data_class = getattr(module, data_class.split('.')[-1])

    in_json = load_data(args.infile, "JSONObject", None, None,
                        sys.argv[0] + " : ")
    in_obj = data_class.fromJSONDict(in_json)
    in_obj.config.out_directory = args.outdir
    if status != None:
        in_obj.config.status = status
    if   "Segments" in file_class or \
         "Post" in file_class :
        out_file = module_class.get_segment_filename(in_obj.config, gz=False)
    elif "Features" in file_class:
        out_file = module_class.get_features_filename(in_obj.config, gz=False)
    elif "PersistenceDiagrams" in file_class:
        out_file = module_class.get_persistence_diagrams_filename(
            in_obj.config, gz=False)
    elif "Partition" in file_class:
        out_file = module_class.get_partition_filename(in_obj.config, gz=False)
    elif "Learning" in file_class:
def main(argv):
    parser = argparse.ArgumentParser(description="utility to plot \
    data and dynamically generated persistence diagrams. Using \
    the persistence option uses precomputed persistence and ignores all \
    the other options.")
    parser.add_argument('-i', '--infile', help="Data to read")
    parser.add_argument('-m',
                        '--max-simplices',
                        default=2000000,
                        type=int,
                        help="Maximum number of simplices for persistence \
                        generation")
    parser.add_argument(
        '-I',
        '--data-index',
        help="Index of data field for data types that require it")
    parser.add_argument(
        '-L',
        '--label-index',
        type=int,
        help="Index of label field for data types that require it")
    parser.add_argument('-s',
                        '--segment-size',
                        type=int,
                        help="Segment size for data types that require it")
    parser.add_argument('-S',
                        '--segment-stride',
                        type=int,
                        help="Segment stride for data types that require it")
    parser.add_argument('-w',
                        '--window-size',
                        help="Window size for \
    persistence generation. Integer is a literal window size, float \
    between 0 and 1 is a fraction of the total Segment size")
    parser.add_argument('-p',
                        '--persistences',
                        help="Precomputed persistence diagram")
    parser.add_argument('-t',
                        '--data-type',
                        default="UCRSegments",
                        help="Data type of the segments in infile")
    args = parser.parse_args(argv[1:])
    if args.persistences != None:
        persistences_json = load_data(args.persistences, 'persistences', None,
                                      None, argv[0])
        if persistences_json == None:
            print "Could not load --persistences : %s" % (args.persistences, )
            exit()
        persistences = PD.fromJSONDict(persistences_json)
        full_config = copy(persistences.config)
        full_config.window_size = -1
        segments_module = importlib.import_module(
            'persistence.' + persistences.config.data_type)
        segments_class = getattr(segments_module,
                                 persistences.config.data_type)

        full_data = segments_class(full_config)
        window_config = copy(persistences.config)
        windowed_data = segments_class(window_config)
    else:
        segments_module = importlib.import_module('persistence.' +
                                                  args.data_type)
        segments_class = getattr(segments_module, args.data_type)
        full_config = Configuration.fromJSONDict(
            dict([("data_type", args.data_type), ("data_file", args.infile),
                  ("label_index", 0), ("max_simplices", args.max_simplices),
                  ("window_size", -1), ("window_stride", 1)]))
        if full_config.data_file.find(":") != -1:
            full_config.data_file = full_config.data_file.split(':')
        if args.segment_size != None:
            full_config.segment_size = args.segment_size
        if args.segment_stride != None:
            full_config.segment_stride = args.segment_stride
        if args.data_index != None:
            full_config.data_index = parse_index(args.data_index)[0]
        if args.label_index != None:
            full_config.label_index = args.label_index
        full_data = segments_class(full_config)
        window_size = float(args.window_size)
        if (window_size < 1.0):
            window_size = int(window_size * full_data.config.window_size)
        else:
            window_size = int(args.window_size)
        window_config = Configuration.fromJSONDict(
            dict([("data_type", args.data_type),
                  ("data_file", full_config.data_file), ("label_index", 0),
                  ("max_simplices", args.max_simplices),
                  ("window_size", window_size), ("window_stride", 1)]))
        if args.segment_size != None:
            window_config.segment_size = args.segment_size
        if args.segment_stride != None:
            window_config.segment_stride = args.segment_stride
        if args.data_index != None:
            window_config.data_index = parse_index(args.data_index)[0]
        if args.label_index != None:
            window_config.label_index = args.label_index
        windowed_data = segments_class(window_config)
        persistences = PD(windowed_data.config,
                          [None for segment in windowed_data.segments])
    try:
        app = App(0, full_data, windowed_data, persistences)
        app.MainLoop()
    except KeyboardInterrupt:
        sys.exit(0)
import argparse
import importlib
import subprocess
from persistence.Datatypes.JSONObject import load_data
from persistence.Datatypes.Configuration import Configuration
from persistence.CrossValidation import CrossValidation

if __name__ == "__main__" :
    parser = argparse.ArgumentParser(description="Utility to generate kernels and learning results from a cross validation result")
    parser.add_argument("--pool", "-p", type=int)
    parser.add_argument("--input", "-i")
    parser.add_argument("--cross-validation", "-v")
    parser.add_argument("--train-test-partitions", "-t")
    args = parser.parse_args(sys.argv[1:])
    print args.cross_validation
    cv_json = load_data(args.cross_validation, "cross validation", None, None, sys.argv[0]+": ")
    cv = CrossValidation.fromJSONDict(cv_json)
    config = cv.config
    print config

    if cv.kernel_module != None :
        kernel_module = importlib.import_module("persistence." + cv.kernel_module)
        kernel_class = getattr(kernel_module, cv.kernel_module)
        scale_arg = kernel_class.get_scale_arg()
        kernel_filename = kernel_class.get_kernel_filename(config)
        kernel_command = ["python", "-u", "-O", "-m", "persistence."+cv.kernel_module, 
                          "--"+scale_arg.replace("_","-"), str(config[scale_arg]),
                          "--infile", args.input]
        if args.pool != None :
            kernel_command.extend(["--pool", str(args.pool)])
        else :
Beispiel #31
0
#
#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#GNU General Public License for more details.
#
#You should have received a copy of the GNU General Public License
#along with this program.  If not, see <http://www.gnu.org/licenses/>.


import sys
from persistence.Datatypes.JSONObject import load_data
from persistence.Datatypes.Learning import Learning, LearningResult
from persistence.Datatypes.TrainTestPartitions import TrainTestPartitions

partitions_json = load_data(sys.argv[1], 'partition', None, None, sys.argv[0] + ": ")
partitions = TrainTestPartitions.fromJSONDict(partitions_json)
all_wrongs = []
for f in sys.argv[2:] :
    results_json = load_data(f, 'learning', None, None, sys.argv[0] + ": ")
    results = Learning.fromJSONDict(results_json)
    wrongs = []
    for (result, partition) in zip(results.results, partitions.evaluation) :
        correct = [(l == r) for (l,r) in zip(result.test_labels, result.test_results)]
        wrong = [p for (c,p) in zip(correct, partition.test) if not c]
        wrong.sort()
        wrongs.append(wrong)
    all_wrongs.append(wrongs)

for (a,b) in zip(all_wrongs[0], all_wrongs[1]) :
    if a == b :
Beispiel #32
0
import argparse
import importlib
from persistence.Datatypes.JSONObject import load_data, save_data
from persistence.Datatypes.Segments import Segments, Segment

parser = argparse.ArgumentParser(
    description=
    "Creates windowed segments of a single dimension for a multidimensioned dataset"
)
parser.add_argument("-i", "--infile")
parser.add_argument("-d", "--data-index", default=0, type=int)
parser.add_argument("-w", "--window-size", type=int)
parser.add_argument("-W", "--window-stride", default=1, type=int)
args = parser.parse_args(sys.argv[1:])
segments = Segments.fromJSONDict(
    load_data(args.infile, "segments", None, None, sys.argv[0] + ": "))
orig_window_size = segments.config.window_size
segments.config.window_size = args.window_size
segments.config.window_stride = args.window_stride
dimensions = len(segments.segments[0].data_index)
segments.config.data_index = segments.segments[0].data_index[args.data_index]
for segment in segments.segments:
    windows = [[
        segment.windows[0][(i + j) * dimensions + args.data_index]
        for j in range(args.window_size)
    ] for i in range(0, orig_window_size, args.window_stride)
               if ((i + args.window_size - 1) * dimensions +
                   args.data_index) < len(segment.windows[0])]
    segment.data_index = segment.data_index[args.data_index]
    segment.window_size = args.window_stride
    segment.windows = windows
Beispiel #33
0
                  [dict([("full_diagram",full_pd.toJSONDict()), ("runtime", full_runtime)])] + sparse_pds)
        return full_pd.toJSONDict()

if __name__ == "__main__" :
    parser = argparse.ArgumentParser(description=('Creates Persistence Diagrams using sparse rips ' + \
                                                  'filtration with varying max_simplices to compare ' + \
                                                  'with full rips filtration'))
    parser.add_argument('-i', '--infile', help='Segments file to use as input data')
    parser.add_argument('-o', '--outfile', help='File to save output into')
    parser.add_argument('-m', '--max-simplices', help='range of max_simplices values to use')
    parser.add_argument('-e', '--epsilon', help='range of epsilon values to use')
    parser.add_argument('-p', '--pool')
    parser.add_argument('-n', '--number', help='limit to n persistence diagrams')
    args = parser.parse_args(sys.argv[1:])

    infile = Segments.fromJSONDict(load_data(args.infile, 'segments', None, None, sys.argv[0] + " : "))

    if args.number != None :
        infile.segments = infile.segments[0:int(args.number)]

    if args.max_simplices != None :
        max_simplices = parse_range(args.max_simplices, t=float)
        epsilon = None
    else :
        epsilon = parse_range(args.epsilon, t=float)
        max_simplices = None

    if int(args.pool) > 1 :
        pool = multiprocessing.Pool(int(args.pool))
        
        mapped = pool.imap(segment_processing_callable(args.outfile, max_simplices, epsilon, len(infile.segments)),
Beispiel #34
0
    parser.add_argument('--window-size', default=40, type=int)
    parser.add_argument('--samples', default=10, type=int)
    parser.add_argument('--sample-at', default=None)
    parser.add_argument('--wasserstein', action='store_true')
    parser.add_argument('--pool',
                        default=multiprocessing.cpu_count() - 2,
                        type=int)
    args = parser.parse_args(sys.argv[1:])
    if args.pool > 1:
        pool = multiprocessing.Pool(args.pool)
    else:
        pool = itertools

    data = []
    if args.segments != None:
        segments_json = load_data(args.segments, 'segments', None, None,
                                  sys.argv[0] + ": ")
        segments = Segments.fromJSONDict(segments_json)
        args.type = segments.config.data_type
        config = segments.config
        config.window_size = args.window_size
        config.window_stride = 1
        for segment in segments.segments:
            point_len = len(segment.windows[0]) / segment.segment_size
            this_data = [(segment.windows[0][i:i + point_len],
                          segment.filename)
                         for i in range(0, len(segment.windows[0]), point_len)]
            data.extend(this_data)
    else:
        config = Configuration.fromJSONDict(
            dict([('data_file', args.infile), ('data_type', args.type),
                  ('window_size', args.window_size), ('window_stride', 1),
Beispiel #35
0
    parser.add_argument('--outfile')
    parser.add_argument('--pool', default=multiprocessing.cpu_count(), type=int)
    args = parser.parse_args(sys.argv[1:])
    infiles = []
    for f in args.infile :
        (dirname,basename) = os.path.split(f)
        cmd = ["find", dirname, "-name", basename + "*", "-type", "f"]
        print " ".join(cmd)
        files = subprocess.check_output(cmd)
        files = files.split()
        infiles.extend(files)
    infiles.sort()
    print "\n".join(infiles)
    input_data = []
    for f in infiles :
        input_json = load_data(f, '', None, None, "persistence_bottleneck_distance.py : ")
        input_data.extend(input_json)

    input_data = [i for i in input_data if i['segment_start'] != i['segment_size']]
    input_data.sort(key=lambda x: (x['segment_start'],x['max_simplices']/x['segment_size']/x['segment_size'], x['segment_size']))
    #    for entry in input_data :
    #        print entry['segment_start'], entry['segment_size'], entry['max_simplices']

    samples = list(set([d['segment_start'] for d in input_data]))
    samples.sort()
    if args.pool > 1 :
        pool = multiprocessing.Pool(args.pool)
    else :
        pool = itertools
    out_data = []
    for s in samples :
Beispiel #36
0
import argparse
import numpy

from persistence.Datatypes.JSONObject import load_data
from persistence.Datatypes import Learning

parser = argparse.ArgumentParser(
    description="utility to print stats about learning JSON")
parser.add_argument('--csv', action="store_true")
parser.add_argument('files', metavar='FILE', nargs='*')
args = vars(parser.parse_args(sys.argv[1:]))
if args['csv']:
    print "Data File,Learning Type,Data Index,Segment Size,Segment Stride,Window Size,Max Simplices,Kernel Scale,C,Correct,Std. Dev.,Top Quart,Bot Quart,Train,Test,Classes,Kernel Weights"
for f in args['files']:
    try:
        lf_json = load_data(f, 'learning', None, None,
                            'learning_stats: ' if not args['csv'] else None)
        if lf_json == None:
            print "Could not load file : %s" % (f, )
            exit()

        learning = Learning.Learning.fromJSONDict(lf_json)
        correct = []
        if (isinstance(learning.config.data_file, list)):
            (filename, ext) = os.path.splitext(
                os.path.basename(learning.config.data_file[0]))
            import string
            filename = string.join(filename.split("_")[0:-1])
        elif (isinstance(learning.config.data_file, dict)):
            filename = 'placeholder'
        else:
            (filename, ext) = os.path.splitext(
    def __init__(self, argv):
        wx.Frame.__init__(self,
                          None,
                          -1,
                          'Bottleneck Distance',
                          size=(550, 350))
        parser = argparse.ArgumentParser(
            description=
            "Utility to plot Bottleneck Distance average of 'representative' segments for each label"
        )
        parser.add_argument('-d', '--distances')
        parser.add_argument('-l', '--learning')
        parser.add_argument('-p', '--partition')
        parser.add_argument('-t', '--threshold', default=0.75, type=float)
        self.args = parser.parse_args(argv[1:])
        import traceback
        try:
            self.distances = \
              Distances.fromJSONDict(load_data(self.args.distances, 'distances', None, None, argv[0]+": "))
            self.learning = \
              Learning.fromJSONDict(load_data(self.args.learning, 'learning', None, None, argv[0]+": "))
            self.partitions = \
              TrainTestPartitions.fromJSONDict(load_data(self.args.partition, 'partition', None, None, argv[0]+": "))
        except:
            print "Could not parse input files: %s" % (
                traceback.format_exc(), )
            sys.exit(1)

        # Filter to only the segments that get above the threshold

        self.segments = []

        for i in range(len(self.distances.segment_info)):
            c = segment_correctness(i, self.learning, self.partitions)
            if c == None or c > self.args.threshold:
                self.segments.append(
                    (i, self.distances.segment_info[i].max_label()))
        sort_format = "0" * int(
            math.ceil(math.log(len(self.distances.segment_info))))
        self.segments.sort(
            key=lambda x: str(x[1] + (("%" + sort_format + "d") % x[0])))

        self.label_index = 0
        self.labels = list(set([x[1] for x in self.segments]))
        self.labels.sort()

        self.segment_minimums = dict([(l, min([i for (i,x) in zip(range(len(self.segments)), self.segments) if x[1] == l])) \
                                       for l in self.labels])
        self.segment_maximums = dict([(l, max([i for (i,x) in zip(range(len(self.segments)), self.segments) if x[1] == l])) \
                                       for l in self.labels])

        self.segment_indices = dict([(l, min([i for (i,x) in zip(range(len(self.segments)), self.segments) if x[1] == l], key=lambda x:average_distance(x,l,self.distances))) \
                                       for l in self.labels])

        self.SetBackgroundColour(wx.NamedColour("WHITE"))
        self.figure = Figure()
        self.axes = self.figure.add_subplot(111)

        self.canvas = FigureCanvas(self, -1, self.figure)

        self.sizer = wx.BoxSizer(wx.VERTICAL)
        self.sizer.Add(NavigationToolbar2Wx(self.canvas), 1,
                       wx.LEFT | wx.TOP | wx.GROW)
        self.sizer.Add(self.canvas, 8, wx.LEFT | wx.TOP | wx.GROW)
        self.SetSizer(self.sizer)
        self.background = self.axes.figure.canvas.copy_from_bbox(
            self.axes.bbox)
        self.colors = [
            'black', 'red', 'yellow', 'orange', 'blue', 'green', 'violet'
        ]
        self.Bind(wx.EVT_PAINT, self.OnPaint)
        self.Bind(wx.EVT_KEY_UP, self.KeyEvent)

        self.index = 0
        self.point_Refresh()
        self.Fit()
        self.figure.savefig(
            self.distances.config.out_directory + "/" +
            self.distances.config.out_directory.split('/')[-1] + '-win-' +
            str(self.distances.config.window_size) + '-best-distances.pdf')
        sys.exit(0)
Beispiel #38
0
import sys
import argparse
import importlib

from persistence.Datatypes.JSONObject import load_data, save_data
from persistence.Datatypes.Distances import Distances
from persistence.Datatypes.Configuration import Configuration
from persistence.Datatypes.Segments import SegmentInfo

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Utility to add SegmentInfo data to a Distances file")
    parser.add_argument("--infile")
    parser.add_argument("--outfile")
    args = parser.parse_args(sys.argv[1:])
    in_json = load_data(args.infile, "distances", None, None,
                        sys.argv[0] + " : ")
    d = Distances.fromJSONDict(in_json)
    module = importlib.import_module('persistence.' + d.config.data_type)
    module_class = getattr(module, d.config.data_type)
    segment_filename = module_class.get_segment_filename(d.config)
    seg_json = load_data(segment_filename, "segments", None, None,
                         sys.argv[0] + " : ")
    print segment_filename, len(seg_json['segments'])
    d.segment_info = []
    i = 0
    for segment in seg_json['segments']:
        d.segment_info.append(SegmentInfo.fromJSONDict(segment))
        i = i + 1
        if i % 250 == 0:
            print segment_filename, i
Beispiel #39
0
parser = argparse.ArgumentParser()
parser.add_argument("-l", "--learning", nargs="+")
parser.add_argument("-d", "--distance-10")
parser.add_argument("-e", "--distance-20")
parser.add_argument("-f", "--distance-30")

parser.add_argument("-p",
                    "--pool",
                    default=max(1,
                                multiprocessing.cpu_count() - 2),
                    type=int)
args = parser.parse_args(sys.argv[1:])
learning = [
    Learning.fromJSONDict(
        load_data(l, "learning", None, None, sys.argv[0] + ": "))
    for l in args.learning
]
distances = [
    Distances.fromJSONDict(
        load_data(args.distance_10, "distances", None, None,
                  sys.argv[0] + ": ")),
    Distances.fromJSONDict(
        load_data(args.distance_20, "distances", None, None,
                  sys.argv[0] + ": ")),
    Distances.fromJSONDict(
        load_data(args.distance_30, "distances", None, None,
                  sys.argv[0] + ": "))
]
filedict = []
f, axes = plt.subplots(2, 2)
Beispiel #40
0
def main(argv):
    parser = argparse.ArgumentParser(
        description='Tool to generate a similarity kernel from persistence data'
    )
    parser.add_argument('-i',
                        '--infile',
                        help='Input JSON Similarity Kernel file')
    parser.add_argument('-o', '--outfile', help='Output JSON Learning file')
    parser.add_argument('-p',
                        '--pool',
                        default=multiprocessing.cpu_count(),
                        help='Threads of computation to use')
    parser.add_argument(
        '-c',
        '--learning-C',
        help=
        'C value for SVM. Specify a range for 1-dimensional cross-validation')
    parser.add_argument('-t',
                        '--train-test-partitions',
                        help='Precomputed train / test partitions')
    args = vars(parser.parse_args(argv[1:]))

    kf_json = load_data(args['infile'], 'kernel', None, None,
                        "KernelLearning: ")
    if kf_json == None:
        print "Could not load Kernel from %s" % (args['infile'], )
        sys.exit(1)
    kernel = Kernel.fromJSONDict(kf_json)
    config = kernel.config
    segment_info = kernel.segment_info
    if (int(args['pool']) > 1):
        pool = multiprocessing.Pool(int(args['pool']))
    else:
        pool = None

    if (args['learning_C'] != None):
        learning_C = parse_range(args['learning_C'], t=float)
        if not isinstance(learning_C, list):
            learning_C = [learning_C]
    elif not isinstance(learning_C, list):
        learning_C = [config.learning_C]
    else:
        learning_C = config.learning_C

    if (args['train_test_partitions'] != None):
        partitions_json = load_data(args['train_test_partitions'],
                                    'partitions', None, None,
                                    "KernelLearning: ")
        if partitions_json == None:
            print "Could not load Train / Test Partitions from %s" % (
                args['train_test_partitions'], )
            sys.exit(1)
        partitions = TrainTestPartitions.fromJSONDict(partitions_json)
    else:
        partitions = generate_partitions(config,
                                         segment_info,
                                         cv_iterations=5 if
                                         (len(learning_C) > 1) else 0)

    if isinstance(learning_C, list) and len(learning_C) > 1 and len(
            partitions.cross_validation) > 0:
        num_cv = len(partitions.cross_validation)
        learning_wrap = LearningWrapper(kernel)
        if pool != None:
            results = pool.map(
                learning_wrap,
                itertools.product(partitions.cross_validation, learning_C))
        else:
            results = map(
                learning_wrap,
                itertools.product(partitions.cross_validation, learning_C))
        max_correct = 0.0
        best_C = learning_C[0]
        results = list(results)
        print len(results)
        for C in learning_C:
            correct = Learning(
                config, [_result for (_C, _result) in results if C == _C
                         ]).get_average_correct()
            if correct > max_correct:
                best_C = C
                max_correct = correct
        config.learning_C = best_C
        print "KernelLearning: using C = %s, correct = %s" % (
            config.learning_C, max_correct)
    else:
        if isinstance(learning_C, list):
            config.learning_C = learning_C[0]
        else:
            config.learning_C = learning_C

    learning_wrap = LearningWrapper(kernel)

    if pool != None:
        results = pool.map(
            learning_wrap,
            itertools.product(partitions.evaluation, [config.learning_C]))
    else:
        results = map(
            learning_wrap,
            itertools.product(partitions.evaluation, [config.learning_C]))
    learning = Learning(config, [result for (C, result) in results])

    if args['outfile'] == None:
        learning_filename = KernelLearning.get_learning_filename(config)
    else:
        learning_filename = args['outfile']

    correct = learning.get_average_correct()
    print "%s correct %2.2f%% error %2.2f%% classes %s" % (
        "KernelLearning:", correct * 100.0, (1.0 - correct) * 100.0,
        len(set([s.max_label() for s in kernel.segment_info])))
    print "Writing %s" % (learning_filename, )
    learning.config.status = "KernelLearning"
    save_data(learning_filename, learning.toJSONDict())
Beispiel #41
0
    def __init__(self, argv):
        wx.Frame.__init__(self, None, -1, 'Segment Size', size=(550, 350))
        parser = argparse.ArgumentParser(
            description=
            "utility to graph success levels for learning over a single configuration parameter"
        )
        parser.add_argument('--label', '-l')
        parser.add_argument('files', metavar='FILE', nargs='*')
        self.args = vars(parser.parse_args(argv[1:]))
        self.files = self.args['files']

        self.filedict = []
        # load in the data files
        for f in self.files:
            learning = Learning.fromJSONDict(
                load_data(f, 'learning', None, None, argv[0] + ": "))
            correct = []
            for result in learning.results:
                num_correct = reduce(
                    (lambda s, (t0, t1): s + 1 if t0 == t1 else s),
                    zip(result['test_labels'], result['test_results']), 0)
                correct.append(
                    float(num_correct) / float(len(result['test_labels'])))
            print "file %s correct %0.2f%%" % (f,
                                               numpy.average(correct) * 100.0)
            self.filedict.append(
                dict([('file', f), ('correct', numpy.average(correct)),
                      ('config', learning.config)]))
            if "PersistenceKernelLearning" in f:
                self.filedict[-1][
                    'label'] = "Persistence Kernel " + learning.config.data_index
                if learning.config.post_process != None:
                    self.filedict[-1]['label'] = self.filedict[-1][
                        'label'] + " " + learning.config.post_process
            elif "AverageKernelLearning" in f:
                self.filedict[-1]['label'] = "Average Kernel"
                if learning.config.post_process != None:
                    self.filedict[-1]['label'] = self.filedict[-1][
                        'label'] + " " + learning.config.post_process
            elif "ChaoticInvariantFeaturesLearning" in f:
                self.filedict[-1]['label'] = " Chaotic Invariant Features"
            elif "ScaleSpaceSimilarityLearning" in f:
                self.filedict[-1]['label'] = "Scale Space Similarity"
            elif "EuclideanDistancesLearning" in f:
                self.filedict[-1]['label'] = "Euclidean Distance"
            if (len(correct) > 1):
                self.filedict[-1]['correct_std'] = numpy.std(correct)
                self.filedict[-1]['correct_top'] = numpy.percentile(
                    correct, 0.75)
                self.filedict[-1]['correct_bot'] = numpy.percentile(
                    correct, 0.25)
            else:
                self.filedict[-1]['correct_std'] = 0.0
                self.filedict[-1]['correct_top'] = 0.0
                self.filedict[-1]['correct_bot'] = 0.0

        self.SetBackgroundColour(wx.NamedColour("WHITE"))
        self.figure = Figure()
        self.axes = self.figure.add_subplot(211)

        self.canvas = FigureCanvas(self, -1, self.figure)

        self.sizer = wx.BoxSizer(wx.VERTICAL)
        self.sizer.Add(NavigationToolbar2Wx(self.canvas), 1,
                       wx.LEFT | wx.TOP | wx.GROW)
        self.sizer.Add(self.canvas, 8, wx.LEFT | wx.TOP | wx.GROW)
        self.SetSizer(self.sizer)
        self.title = self.figure.suptitle("")
        self.Fit()
        self.background = self.axes.figure.canvas.copy_from_bbox(
            self.axes.bbox)
        self.colors = [
            '#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#b3de69', '#fb9a99',
            '#e31a1c', '#fb8072', '#ff7f00', '#a65628', '#fdb462', '#cab2d6'
        ]
        self.Bind(wx.EVT_PAINT, self.OnPaint)
        self.Bind(wx.EVT_KEY_UP, self.KeyEvent)
        self.point_Refresh()
Beispiel #42
0
    def __init__(self, argv):
        wx.Frame.__init__(self,None,-1,
                         'UCR 2015 Learning Results',size=(550,350))
        parser = argparse.ArgumentParser(description="utility to graph success levels for learning on the UCR Dataset")
        parser.add_argument('-d', '--directory', help='Directory where the learning results are stored', required=False)
        parser.add_argument('files', metavar='FILE', nargs='*')
        self.args = vars(parser.parse_args(argv[1:]))
        if self.args['directory'] != None and len(self.args['files']) != 0 :
            print "Ignoring files after the directory argument"
        elif self.args['directory'] == None and len(self.args['files']) == 0 :
            parser.print_help()
            sys.exit()
        
        if self.args['directory'] != None :
            if os.path.isdir(self.args['directory']) :
                self.files = subprocess.check_output(["find", self.args['directory'], "-name", "*Learning.json*"])
                self.files = [f for f in self.files.split("\n") if f != '']
            else :
                parser.print_help()
                sys.exit()
        else :
            self.files = self.args['files']
        def learning_type(text) :
            fields = text.split('-')
            t = fields[-1].split('.')[0][:-len('Learning')]
            try :
                if t == 'PersistenceKernel' or t == 'ScaleSpaceSimilarity' or t == 'MultipleKernel' or t == 'AverageKernel':
                    if 'ChaosPost' in fields :
                        w = '-Chaos'
                    else :
                        w = '-' + fields[fields.index('win')+1]
                else :
                    w = ''
            except ValueError :
                w = ''
            return t + w
        self.learning_types = list(set([learning_type(f) for f in self.files]))
        self.learning_types.sort()
        datasets = list(set([f.split('/')[-2] for f in self.files]))
        datasets.sort()

        self.filedict = dict([(s, dict([(t,0) for t in self.learning_types])) for s in datasets])

        # load in the data files
        for f in self.files :
            learning_t = learning_type(f)
            dataset = f.split('/')[-2]
            learning_json = load_data(f, "learning", None, None, None)
            learning = Learning.fromJSONDict(learning_json)
            best = self.filedict[dataset][learning_t]
            current = learning.get_average_correct()
            if (isinstance(best, list) and current > sum(best)) or (not isinstance(best, list) and current > best) :
                if learning.results[0].mkl_weights != None :
                    self.filedict[dataset][learning_t] = [current * w for w in learning.results[0].mkl_weights]
                else :
                    self.filedict[dataset][learning_t] = current

        keylen = max([len(key) for (key,val) in self.filedict.items() ])
        format = '%'+str(keylen)+'s %s'
        for (key, val) in self.filedict.items() :
            vals = [("%s %02.2f%%" % (k,v*100.0 if not isinstance(v, list) else sum(v) * 100.0)) + ((" " + str(["%02.2f%%" % v_ for v_ in v])) if isinstance(v,list) else "") for (k,v) in val.items()]
            vals.sort()
            print format % (key, vals)

        self.frame = 0
        self.SetBackgroundColour(wx.NamedColour("WHITE"))
        self.figure = Figure()
        self.axes = self.figure.add_subplot(121)
        plot_keys = self.filedict.items()[0][1].keys()
        dataset_width = len(plot_keys) + 1.5
        self.axes.set_xticks([(0.5 + i) * dataset_width for i in range(len(self.filedict.items()))])
        self.axes.set_xticklabels([key for (key,value) in self.filedict.items()])
        self.axes.set_ylim(0.0,1.0, auto=False)
        
        self.canvas = FigureCanvas(self, -1, self.figure)

        self.sizer = wx.BoxSizer(wx.VERTICAL)
        self.sizer.Add(self.canvas, 1, wx.LEFT | wx.TOP | wx.GROW)
        self.SetSizer(self.sizer)
        self.title = self.figure.suptitle("UCR Learning")
        #self.Fit()
        self.background = self.axes.figure.canvas.copy_from_bbox(self.axes.bbox)
        self.colors = ['black', 'red', 'yellow', 'orange', 'blue', 'green', 'violet']
        self.Bind(wx.EVT_PAINT, self.OnPaint)
        self.Bind(wx.EVT_KEY_UP, self.KeyEvent)
        self.point_Refresh()
Beispiel #43
0
#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#GNU General Public License for more details.
#
#You should have received a copy of the GNU General Public License
#along with this program.  If not, see <http://www.gnu.org/licenses/>.


import sys
import numpy
from persistence.Datatypes.JSONObject import load_data
from persistence.Datatypes.Segments import Segments
data = []
for arg in sys.argv[1:] :
    segments = Segments.fromJSONDict(load_data(arg, "segments", None, None, sys.argv[0] + ": "))
    taus = [int(s.tau) for s in segments.segments]
    window_sizes = [len(s.windows[0]) for s in segments.segments]

    data.append([arg[arg.find('-data-')+6:arg.find('-seg-')], min(taus), max(taus), numpy.mean(taus),min(window_sizes), max(window_sizes), numpy.mean(window_sizes)])

data.sort()
print data
import matplotlib.pyplot as plt
f = plt.figure()
axes_tau = f.add_axes([0.1,0.3,0.35,0.6])
axes_tau.set_title("Time Delay")
plots =[
axes_tau.bar(left=range(len(data)), height=[d[1] for d in data], bottom=0.0, width=0.8, color="#a8ddb5"),
axes_tau.bar(left=range(len(data)), height=[d[2] - d[1] for d in data], bottom=[d[1] for d in data], width=0.8, color="#7bccc4"),
axes_tau.bar(left=range(len(data)), height=[d[3] - d[2] for d in data], bottom=[d[2] for d in data], width=0.8, color="#4eb3d3") ]
Beispiel #44
0
import sys
import argparse
import importlib

from persistence.Datatypes.JSONObject import load_data, save_data
from persistence.Datatypes.Distances import Distances
from persistence.Datatypes.Configuration import Configuration
from persistence.Datatypes.Segments import SegmentInfo

if __name__ == "__main__" :
    parser = argparse.ArgumentParser(description="Utility to add SegmentInfo data to a Distances file")
    parser.add_argument("--infile")
    parser.add_argument("--outfile")
    args = parser.parse_args(sys.argv[1:])
    in_json = load_data(args.infile, "distances", None, None, sys.argv[0] + " : ")
    d = Distances.fromJSONDict(in_json)
    module = importlib.import_module('persistence.' + d.config.data_type)
    module_class = getattr(module, d.config.data_type)
    segment_filename = module_class.get_segment_filename(d.config)
    seg_json = load_data(segment_filename, "segments", None, None, sys.argv[0] + " : ")
    print segment_filename, len(seg_json['segments'])
    d.segment_info = []
    i = 0
    for segment in seg_json['segments'] :
        d.segment_info.append(SegmentInfo.fromJSONDict(segment))
        i = i + 1
        if i % 250 == 0:
            print segment_filename, i

    print "Writing %s" % (args.outfile,)
Beispiel #45
0
import sys
import argparse
import numpy

from persistence.Datatypes.JSONObject import load_data
from persistence.Datatypes import Learning

parser = argparse.ArgumentParser(description="utility to print stats about learning JSON")
parser.add_argument('--csv', action="store_true")
parser.add_argument('files', metavar='FILE', nargs='*')
args = vars(parser.parse_args(sys.argv[1:]))
if args['csv'] :
    print "Data File,Learning Type,Data Index,Segment Size,Segment Stride,Window Size,Max Simplices,Kernel Scale,C,Correct,Std. Dev.,Top Quart,Bot Quart,Train,Test,Classes,Kernel Weights"
for f in args['files']:
    try:
        lf_json = load_data(f, 'learning', None, None, 'learning_stats: ' if not args['csv'] else None)
        if lf_json == None :
            print "Could not load file : %s" % (f,)
            exit()
        
        learning = Learning.Learning.fromJSONDict(lf_json)
        correct = []
        if (isinstance(learning.config.data_file, list)):
            (filename, ext) = os.path.splitext(os.path.basename(learning.config.data_file[0]))
            import string
            filename = string.join(filename.split("_")[0:-1])
        elif (isinstance(learning.config.data_file, dict)) :
            filename = 'placeholder'
        else :
            (filename, ext) = os.path.splitext(os.path.basename(learning.config.data_file))
        learning_type = (f.split('-')[-1]).split('.')[0]
Beispiel #46
0
        data_class = "Distances"
    elif "ScaleSpaceSimilarities" in file_class :
        file_class = "ScaleSpaceSimilarity"
        data_class = "Distances"
    elif "Kernel" in file_class :
        data_class = "Kernel"
    elif "CrossValidation" in file_class :
        data_class = file_class

    module = importlib.import_module('persistence.' + file_class)
    module_class = getattr(module, file_class.split('.')[-1])
    module = importlib.import_module('persistence.' + file_class)
    data_class = getattr(module, data_class.split('.')[-1])
        

    in_json = load_data(args.infile, "JSONObject", None, None, sys.argv[0] + " : ")
    in_obj = data_class.fromJSONDict(in_json)
    in_obj.config.out_directory = args.outdir
    if status != None :
        in_obj.config.status = status          
    if   "Segments" in file_class or \
         "Post" in file_class :
        out_file = module_class.get_segment_filename(in_obj.config, gz=False)
    elif "Features" in file_class :
        out_file = module_class.get_features_filename(in_obj.config, gz=False)
    elif "PersistenceDiagrams" in file_class :
        out_file = module_class.get_persistence_diagrams_filename(in_obj.config, gz=False)
    elif "Partition" in file_class :
        out_file = module_class.get_partition_filename(in_obj.config, gz=False)
    elif "Learning" in file_class :
        out_file = module_class.get_learning_filename(in_obj.config, gz=False)
    def __init__(self, data_file, persistence_file, kernel_file):
        wx.Frame.__init__(self,None,-1,
                         'Data Visualization',size=(550,350))
        self.segment_file = data_file
        segments_json = load_data(self.segment_file, 'segments', None, None, "explore_persistence: ")
        if segments_json == None:
            print "Could not load segment file : %s" % (self.segment_file,)
            exit()
        self.segments = Segments.fromJSONDict(segments_json)

        self.persistence_file = persistence_file
        persistence_json = load_data(self.persistence_file, 'persistence', None, None, "explore_persistence: ")
        if persistence_json == None :
            print "Could not load persistence file : %s" % (self.persistence_file,)
            exit()
        self.persistences = PD.fromJSONDict(persistence_json)

        self.kernel_file = kernel_file
        kernel_json = load_data(self.kernel_file, 'kernel', None, None, "explore_persistence: ")
        if kernel_json == None :
            print "Could not load kernel file : %s" % (self.kernel_file,)
            exit()
        self.kernel = Kernel.fromJSONDict(kernel_json)

        self.kernel_config = self.kernel.config

        self.spans = []
        self.similarities = []

        for segment in self.segments.segments :
            window_stride = segment.window_stride
            label = max_label(segment.labels)
            data = []
            # We need to account for data overlap in the windows, which is not useful for this visualization
            for window in segment.windows :
                data.extend(window[0:window_stride])
            data.extend(segment.windows[-1][window_stride:])
            self.spans.append((label, segment.segment_start, data))
        self.mins = None
        self.maxs = None
        for  (l, start, xs) in self.spans :
            for x in xs :
                if self.maxs == None or x > self.maxs :
                    self.maxs = x
                if self.mins == None or x < self.mins :
                    self.mins = x
        
        self.labels = set([span[0] for span in self.spans])
        
        self.index = 1

        self.SetBackgroundColour(wx.NamedColour("WHITE"))
        self.figure = Figure()
        self.axes = self.figure.add_subplot(111)
        self.canvas = FigureCanvas(self, -1, self.figure)
        self.title = self.figure.suptitle("Data for Column %s" % (self.index,))

        self.sub_figure_a = Figure()
        self.sub_axes_a = self.sub_figure_a.add_subplot(111)
        self.sub_canvas_a = FigureCanvas(self, -1, self.sub_figure_a)
        self.sub_title_a = self.sub_figure_a.suptitle("Data for Segment beginning at %s, label %s" % (" ", " "))

        self.sub_figure_ap = Figure()
        self.sub_axes_ap = self.sub_figure_ap.add_subplot(111)
        self.sub_canvas_ap = FigureCanvas(self, -1, self.sub_figure_ap)
        self.sub_title_ap = self.sub_figure_ap.suptitle("Persistence for Segment beginning at %s, label %s" % (" ", " "))

        self.sub_figure_b = Figure()
        self.sub_axes_b = self.sub_figure_b.add_subplot(111)
        self.sub_canvas_b = FigureCanvas(self, -1, self.sub_figure_b)
        self.sub_title_b = self.sub_figure_b.suptitle("Data for Segment beginning at %s, label %s" % (" ", " "))

        self.sub_figure_bp = Figure()
        self.sub_axes_bp = self.sub_figure_bp.add_subplot(111)
        self.sub_canvas_bp = FigureCanvas(self, -1, self.sub_figure_bp)
        self.sub_title_bp = self.sub_figure_bp.suptitle("Persistence for Segment beginning at %s, label %s" % (" ", " "))

        self.click_cid_down = self.canvas.mpl_connect('button_press_event', self.mouseDown)
        self.click_cid_up = self.canvas.mpl_connect('button_release_event', self.mouseUp)
        self.click_cid_move = self.canvas.mpl_connect('motion_notify_event', self.mouseMove)
        self.sizer = wx.GridBagSizer(hgap=5, vgap=5)
        self.sizer.Add(NavigationToolbar2Wx(self.canvas), pos=(0,0), span=(1,2), flag=wx.EXPAND)
        self.sizer.AddGrowableCol(1,0)
        self.sizer.Add(self.canvas, pos=(1,0), span=(8,2), flag=wx.EXPAND)
        self.sizer.AddGrowableCol(9,0)
        self.sizer.Add(self.sub_canvas_a, pos=(9,0), span=(4,1), flag=wx.EXPAND)
        self.sizer.Add(self.sub_canvas_b, pos=(9,1), span=(4,1), flag=wx.EXPAND)
        self.sizer.AddGrowableCol(13,0)
        self.sizer.Add(self.sub_canvas_ap, pos=(13,0), span=(4,1), flag=wx.EXPAND)
        self.sizer.Add(self.sub_canvas_bp, pos=(13,1), span=(4,1), flag=wx.EXPAND)
        
        self.SetSizer(self.sizer)

        self.caption = self.figure.text(0.15, 0.8, "%s Samples Read" % (\
                                        reduce((lambda x,y: x+y),[len(span[2]) for span in self.spans], 0)))
        self.caption.set_backgroundcolor('#ffffff')
        self.Fit()
        self.background = self.axes.figure.canvas.copy_from_bbox(self.axes.bbox)

        self.Bind(wx.EVT_PAINT, self.OnPaint)
        self.Bind(wx.EVT_KEY_UP, self.KeyEvent)
        self.click = 0
        self.index_a = None
        self.index_b = None
        self.point_Refresh()
        self.state = (None, 0)
Beispiel #48
0
#You should have received a copy of the GNU General Public License
#along with this program.  If not, see <http://www.gnu.org/licenses/>.


import sys
import argparse
import importlib

from persistence.Datatypes.JSONObject import load_data, save_data
from persistence.Datatypes.PersistenceDiagrams import PersistenceDiagrams, PersistenceDiagram
from persistence.Datatypes.Configuration import Configuration
from persistence.Datatypes.Segments import SegmentInfo

if __name__ == "__main__" :
    parser = argparse.ArgumentParser(description="Utility to add SegmentInfo data to a PersistenceDiagrams file")
    parser.add_argument("--infile")
    parser.add_argument("--outfile")
    args = parser.parse_args(sys.argv[1:])
    in_json = load_data(args.infile, "persistence diagrams", None, None, sys.argv[0] + " : ")
    pd = PersistenceDiagrams.fromJSONDict(in_json)
    module = importlib.import_module('persistence.' + pd.config.data_type)
    module_class = getattr(module, pd.config.data_type)
    segment_filename = module_class.get_segment_filename(pd.config)
    seg_json = load_data(segment_filename, "segments", None, None, sys.argv[0] + " : ")
    
    for (diagram, segment) in zip(pd.diagrams, seg_json['segments']) :
        diagram.segment_info = SegmentInfo.fromJSONDict(segment)

    print "Writing %s" % (args.outfile,)
    save_data(args.outfile, pd.toJSONDict())
 parser.add_argument('--max-segment-size', default=1000, type=int)
 parser.add_argument('--segment-size-step', default=20, type=int)
 parser.add_argument('--window-size', default=40, type=int)
 parser.add_argument('--samples', default=10, type=int)
 parser.add_argument('--sample-at', default=None)
 parser.add_argument('--wasserstein', action='store_true')
 parser.add_argument('--pool', default=multiprocessing.cpu_count()-2, type=int)
 args = parser.parse_args(sys.argv[1:])
 if args.pool > 1 :
     pool = multiprocessing.Pool(args.pool)
 else :
     pool = itertools
     
 data = []
 if args.segments != None :
     segments_json = load_data(args.segments, 'segments', None, None, sys.argv[0] + ": ")
     segments = Segments.fromJSONDict(segments_json)
     args.type = segments.config.data_type
     config = segments.config
     config.window_size = args.window_size
     config.window_stride = 1
     for segment in segments.segments :
         point_len = len(segment.windows[0]) / segment.segment_size
         this_data = [(segment.windows[0][i:i+point_len], segment.filename)
                      for i in range(0, len(segment.windows[0]), point_len)]
         data.extend(this_data)
 else :
     config = Configuration.fromJSONDict(dict([('data_file', args.infile),
                                               ('data_type', args.type),
                                               ('window_size', args.window_size),
                                               ('window_stride', 1),
Beispiel #50
0
def main(argv):
    parser = argparse.ArgumentParser(description='Tool to generate a similarity kernel from persistence data')
    parser.add_argument('-i', '--infile', help='Input JSON Similarity Kernel file')
    parser.add_argument('-o', '--outfile', help='Output JSON Learning file')
    parser.add_argument('-p', '--pool', default=multiprocessing.cpu_count(), help='Threads of computation to use')
    parser.add_argument('-c', '--learning-C', help='C value for SVM. Specify a range for 1-dimensional cross-validation')
    parser.add_argument('-t', '--train-test-partitions', help='Precomputed train / test partitions')
    args = vars(parser.parse_args(argv[1:]))
    
    kf_json = load_data(args['infile'], 'kernel', None, None, "KernelLearning: ")
    if kf_json == None :
        print "Could not load Kernel from %s" % (args['infile'],)
        sys.exit(1)
    kernel = Kernel.fromJSONDict(kf_json)
    config = kernel.config
    segment_info = kernel.segment_info
    if (int(args['pool']) > 1) :
      pool = multiprocessing.Pool(int(args['pool']))
    else :
      pool = None
    
    if (args['learning_C'] != None) :
        learning_C = parse_range(args['learning_C'], t=float)
        if not isinstance(learning_C,list) :
            learning_C = [learning_C]
    elif not isinstance(learning_C,list) :
        learning_C = [config.learning_C]
    else :
        learning_C = config.learning_C

    if (args['train_test_partitions'] != None) :
        partitions_json = load_data(args['train_test_partitions'], 'partitions', None, None, "KernelLearning: ")
        if partitions_json == None :
            print "Could not load Train / Test Partitions from %s" % (args['train_test_partitions'],)
            sys.exit(1)
        partitions = TrainTestPartitions.fromJSONDict(partitions_json)
    else :
        partitions = generate_partitions(config, segment_info, 
                                         cv_iterations=5 if (len(learning_C) > 1) else 0)

    if isinstance(learning_C, list) and len(learning_C) > 1 and len(partitions.cross_validation) > 0 :
        num_cv = len(partitions.cross_validation)
        learning_wrap = LearningWrapper( kernel )
        if pool != None :
            results = pool.map(learning_wrap, itertools.product(partitions.cross_validation, learning_C))
        else :
            results = map(learning_wrap, itertools.product(partitions.cross_validation, learning_C))
        max_correct = 0.0
        best_C = learning_C[0]
        results = list(results)
        print len(results)
        for C in learning_C :
            correct = Learning(config, [_result for (_C, _result) in results if C == _C]).get_average_correct()
            if correct > max_correct :
                best_C = C
                max_correct = correct
        config.learning_C = best_C
        print "KernelLearning: using C = %s, correct = %s" % (config.learning_C, max_correct)
    else :
        if isinstance(learning_C, list) :
            config.learning_C = learning_C[0]
        else :
            config.learning_C = learning_C

    learning_wrap = LearningWrapper( kernel )

    if pool != None :
        results = pool.map(learning_wrap, itertools.product(partitions.evaluation, [config.learning_C]))
    else :
        results = map(learning_wrap, itertools.product(partitions.evaluation, [config.learning_C]))
    learning = Learning(config, [result for (C,result) in results])

    if args['outfile'] == None :
        learning_filename = KernelLearning.get_learning_filename(config)
    else :
        learning_filename = args['outfile']

    correct = learning.get_average_correct()
    print "%s correct %2.2f%% error %2.2f%% classes %s" % ("KernelLearning:", correct * 100.0, (1.0 - correct)*100.0, 
                                                   len(set([s.max_label() for s in kernel.segment_info])))
    print "Writing %s" % (learning_filename, )
    learning.config.status = "KernelLearning"
    save_data(learning_filename, learning.toJSONDict())