Ejemplo n.º 1
0
 def fromJSONDict(cls, json):
     return cls(Configuration.fromJSONDict(json['config']),
                segment_info=cond_get_obj_list(json, 'segment_info',
                                               SegmentInfo),
                cross_validation=cond_get_obj_list(json, 'cross_validation',
                                                   TrainTestPartition),
                evaluation=cond_get_obj_list(json, 'evaluation',
                                             TrainTestPartition))
Ejemplo n.º 2
0
def main(argv) :
    parser = argparse.ArgumentParser(description="General purpose cross validation tool")
    parser.add_argument("--kernel-module", "-K")
    parser.add_argument("--kernel-arg", "-k")
    parser.add_argument("--distances-module", "-D")
    parser.add_argument("--distances-arg", "-d")
    parser.add_argument("--learning-module", "-L")
    parser.add_argument("--learning-arg", "-l")
    parser.add_argument("--infile", "-i")
    parser.add_argument("--outfile", "-o")
    parser.add_argument("--train-test-partitions", "-t")
    parser.add_argument("--pool", "-p", type=int, default=max(1,multiprocessing.cpu_count()-2))
    parser.add_argument("--timeout", type=int, default=0)
    args = parser.parse_args(argv[1:])
    input_json = load_data(args.infile, "input", None, None, argv[0] + ":")
    partitions_json = load_data(args.train_test_partitions, "input", None, None, argv[0] + ":")
    partitions = TrainTestPartitions.fromJSONDict(partitions_json)
    if args.pool > 1 :
        pool = multiprocessing.Pool(args.pool)
    else :
        pool = None
    
    if args.kernel_arg != None :
        kernel_arg = parse_range(args.kernel_arg, t=float)
    else :
        kernel_arg = None

    if args.distances_arg != None :
        distances_arg = parse_range(args.distances_arg, t=float)
    else :
        distances_arg = None

    if args.learning_arg != None :
        learning_arg = parse_range(args.learning_arg, t=float)
    else :
        learning_arg = None

    print "Kernel %s distance %s learning %s" % (kernel_arg, distances_arg, learning_arg)
    cv = CrossValidation(input_json, 
                         config=Configuration.fromJSONDict(input_json['config']),
                         kernel_module=args.kernel_module, 
                         kernel_arg=kernel_arg, 
                         distances_module=args.distances_module, 
                         distances_arg=distances_arg, 
                         learning_module=args.learning_module, 
                         learning_arg=learning_arg, 
                         partitions=partitions, 
                         pool=pool,
                         timeout=args.timeout)
    cv.cross_validate()
    
    if args.outfile == None :
        args.outfile = CrossValidation.get_cross_validation_filename(cv.config)
    
    print "Writing %s" % args.outfile
    save_data(args.outfile, cv.toJSONDict())
Ejemplo n.º 3
0
 def fromJSONDict(cls, json):
     return cls(None, 
                config=Configuration.fromJSONDict(json['config']),
                kernel_module=cond_get(json, 'kernel_module'), 
                kernel_arg=cond_get(json, 'kernel_arg'),
                distances_module=cond_get(json, 'distances_module'), 
                distances_arg=cond_get(json, 'distances_arg'),
                learning_module=cond_get(json, 'learning_module'), 
                learning_arg=cond_get(json, 'learning_arg'), 
                partitions=cond_get(json, 'partitions'))
Ejemplo n.º 4
0
def parse_configuration_files(files, verbose=True):
    if not isinstance(files, list):
        files = [files]

    final_configs = []
    for f in files:
        configs = load_data(f, "Configurations", None, None,
                            (sys.argv[0] + ": ") if verbose else None)
        if configs == None:
            sys.exit(0)
        if isinstance(configs, dict):
            configs = [configs]
        cond_parse_range = lambda x, y, t: parse_range(str(x[
            y]), t=t) if y in x.keys() else None
        cond_list = lambda x: x if isinstance(x, list) else [x]
        cond_get = lambda x, y: x[y] if y in x.keys() else None
        for config in configs:
            for (window_size, window_stride, segment_size, segment_stride, persistence_epsilon, max_simplices) in \
                itertools.product(cond_list(cond_parse_range(config, 'window_size', int)),
                                  cond_list(cond_parse_range(config, 'window_stride', int)),
                                  cond_list(cond_parse_range(config, 'segment_size', int)),
                                  cond_list(cond_parse_range(config, 'segment_stride', int)),
                                  cond_list(cond_parse_range(config, 'persistence_epsilon', float)),
                                  cond_list(cond_parse_range(config, 'max_simplices', int))) :
                final_configs.append(
                    Configuration(
                        max_simplices=max_simplices,
                        persistence_epsilon=persistence_epsilon,
                        segment_stride=segment_stride,
                        segment_size=segment_size,
                        window_size=window_size,
                        window_stride=window_stride,
                        kernel_scale=cond_get(config, 'kernel_scale'),
                        kernel_gamma=cond_get(config, 'kernel_gamma'),
                        invariant_epsilon=cond_get(config,
                                                   'invariant_epsilon'),
                        data_file=cond_get(config, 'data_file'),
                        data_index=cond_get(config, 'data_index'),
                        label_index=cond_get(config, 'label_index'),
                        out_directory=cond_get(config, 'out_directory'),
                        learning_split=cond_get(config, 'learning_split'),
                        learning_iterations=cond_get(config,
                                                     'learning_iterations'),
                        learning_C=cond_get(config, 'learning_C'),
                        persistence_degree=cond_get(config,
                                                    'persistence_degree'),
                        data_type=cond_get(config, 'data_type'),
                        post_process=cond_get(config, 'post_process'),
                        post_process_arg=cond_get(config, 'post_process_arg')))
    return final_configs
Ejemplo n.º 5
0
 def fromJSONDict(cls, json):
     return cls(Configuration.fromJSONDict(json['config']), 
                [[Distance.fromJSONDict(d) for d in row] for row in json['distances']],
                cond_get_obj_list(json, 'segment_info', SegmentInfo))
def main(argv) :
    parser = argparse.ArgumentParser(description="utility to plot \
    data and dynamically generated persistence diagrams. Using \
    the persistence option uses precomputed persistence and ignores all \
    the other options.")
    parser.add_argument('-i', '--infile', help="Data to read")
    parser.add_argument('-m', '--max-simplices', default=2000000,
                        type=int, help="Maximum number of simplices for persistence \
                        generation")
    parser.add_argument('-I', '--data-index', help="Index of data field for data types that require it")
    parser.add_argument('-L', '--label-index', type=int, help="Index of label field for data types that require it")
    parser.add_argument('-s', '--segment-size', type=int, help="Segment size for data types that require it")
    parser.add_argument('-S', '--segment-stride', type=int, help="Segment stride for data types that require it")
    parser.add_argument('-w', '--window-size', help="Window size for \
    persistence generation. Integer is a literal window size, float \
    between 0 and 1 is a fraction of the total Segment size")
    parser.add_argument('-p', '--persistences', help="Precomputed persistence diagram")
    parser.add_argument('-t', '--data-type', default="UCRSegments", help="Data type of the segments in infile")
    args = parser.parse_args(argv[1:])
    if args.persistences != None :
        persistences_json = load_data(args.persistences, 'persistences', None, None, argv[0])
        if persistences_json == None :
            print "Could not load --persistences : %s" % (args.persistences,)
            exit()
        persistences = PD.fromJSONDict(persistences_json)
        full_config = copy(persistences.config)
        full_config.window_size = -1
        segments_module = importlib.import_module( 'persistence.' + persistences.config.data_type)    
        segments_class = getattr(segments_module, persistences.config.data_type) 

        full_data = segments_class(full_config)
        window_config = copy(persistences.config)
        windowed_data = segments_class(window_config)
    else :
        segments_module = importlib.import_module( 'persistence.' + args.data_type)    
        segments_class = getattr(segments_module, args.data_type) 
        full_config = Configuration.fromJSONDict(dict([ ("data_type", args.data_type),
                                                        ("data_file", args.infile),
                                                        ("label_index", 0),
                                                        ("max_simplices", args.max_simplices),
                                                        ("window_size", -1),
                                                        ("window_stride", 1)]))
        if full_config.data_file.find(":") != -1 :
            full_config.data_file = full_config.data_file.split(':')
        if args.segment_size != None :
            full_config.segment_size = args.segment_size
        if args.segment_stride != None :
            full_config.segment_stride = args.segment_stride
        if args.data_index != None :
            full_config.data_index = parse_index(args.data_index)[0]
        if args.label_index != None :
            full_config.label_index = args.label_index
        full_data = segments_class(full_config)
        window_size = float(args.window_size)
        if (window_size < 1.0) :
            window_size = int(window_size * full_data.config.window_size)
        else :
            window_size = int(args.window_size)
        window_config =  Configuration.fromJSONDict(dict([ ("data_type", args.data_type),
                                                           ("data_file", full_config.data_file),
                                                           ("label_index", 0),
                                                           ("max_simplices", args.max_simplices),
                                                           ("window_size", window_size),
                                                           ("window_stride", 1)]))
        if args.segment_size != None :
            window_config.segment_size = args.segment_size
        if args.segment_stride != None :
            window_config.segment_stride = args.segment_stride
        if args.data_index != None :
            window_config.data_index = parse_index(args.data_index)[0]
        if args.label_index != None :
            window_config.label_index = args.label_index
        windowed_data = segments_class(window_config)
        persistences = PD(windowed_data.config, [None for segment in windowed_data.segments])
    try:
        app = App(0, full_data, windowed_data, persistences)
        app.MainLoop()
    except KeyboardInterrupt:
        sys.exit(0)
Ejemplo n.º 7
0
from persistence.PersistenceKernel import PersistenceKernel
from persistence.ScaleSpaceSimilarity import ScaleSpaceSimilarity
from persistence.DistanceLearning import DistanceLearning
from persistence.KernelLearning import KernelLearning
from persistence.RBFKernel import RBFKernel
from persistence.EuclideanDistances import EuclideanDistances

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description=
        "Utility to run common variations of learning tasks on a configuration file, generating segments, distances, kernels, and learning results as appropriate"
    )
    parser.add_argument("--config")
    parser.add_argument("--pool", default=1, type=int)
    args = parser.parse_args(sys.argv[1:])
    config = Configuration.fromJSONDict(
        parse_args([sys.argv[0], "--config", args.config])[0])
    print config

    module = importlib.import_module('persistence.' + config.data_type)
    module_class = getattr(module, config.data_type)
    segment_filename = module_class.get_segment_filename(config)

    segments = module_class(config)
    print "Writing %s" % segment_filename
    save_data(segment_filename, segments.toJSONDict())

    #TrainTestSplit
    partition_command = [
        "python", "-u", "-O", "-m", "persistence.PartitionData", "--segments",
        segment_filename, "--learning-split",
        str(config.learning_split), "--learning-iterations",
Ejemplo n.º 8
0
 def fromJSONDict(cls, json):
     return cls(Configuration.fromJSONDict(json['config']),
                kernel_matrix=json['kernel_matrix'],
                segment_info=cond_get_obj_list(json, 'segment_info',
                                               SegmentInfo))
Ejemplo n.º 9
0
        segments_json = load_data(args.segments, 'segments', None, None,
                                  sys.argv[0] + ": ")
        segments = Segments.fromJSONDict(segments_json)
        args.type = segments.config.data_type
        config = segments.config
        config.window_size = args.window_size
        config.window_stride = 1
        for segment in segments.segments:
            point_len = len(segment.windows[0]) / segment.segment_size
            this_data = [(segment.windows[0][i:i + point_len],
                          segment.filename)
                         for i in range(0, len(segment.windows[0]), point_len)]
            data.extend(this_data)
    else:
        config = Configuration.fromJSONDict(
            dict([('data_file', args.infile), ('data_type', args.type),
                  ('window_size', args.window_size), ('window_stride', 1),
                  ('threads', args.pool), ('data_index', [1, 2, 3])]))
        for filename in args.infile:
            with open(filename, 'r') as data_file:
                if args.type == "BirdSoundsSegments":
                    this_data = [float(line.strip()) for line in data_file]
                else:
                    data_reader = csv.reader(data_file, delimiter=',')
                    this_data = [([line[i]
                                   for i in config.data_index], filename)
                                 for line in data_reader]
                    this_data = this_data[0:len(this_data)]
                data.extend(this_data)
            # labels = set([d[1] for d in data])

    segments_module = importlib.import_module('persistence.' + args.type)
    if args.segments != None :
        segments_json = load_data(args.segments, 'segments', None, None, sys.argv[0] + ": ")
        segments = Segments.fromJSONDict(segments_json)
        args.type = segments.config.data_type
        config = segments.config
        config.window_size = args.window_size
        config.window_stride = 1
        for segment in segments.segments :
            point_len = len(segment.windows[0]) / segment.segment_size
            this_data = [(segment.windows[0][i:i+point_len], segment.filename)
                         for i in range(0, len(segment.windows[0]), point_len)]
            data.extend(this_data)
    else :
        config = Configuration.fromJSONDict(dict([('data_file', args.infile),
                                                  ('data_type', args.type),
                                                  ('window_size', args.window_size),
                                                  ('window_stride', 1),
                                                  ('threads', args.pool),
                                                  ('data_index', [1,2,3])]))
        for filename in args.infile :
            with open(filename, 'r') as data_file :
                if args.type == "BirdSoundsSegments" :
                    this_data = [float(line.strip()) for line in data_file]
                else :
                    data_reader = csv.reader(data_file, delimiter=',')
                    this_data = [([line[i] for i in config.data_index], filename) for line in data_reader]
                    this_data = this_data[0:len(this_data)]
                data.extend(this_data)
            # labels = set([d[1] for d in data])

    segments_module = importlib.import_module( 'persistence.' + args.type )
    segments_class = getattr(segments_module, args.type)
Ejemplo n.º 11
0
def main(argv):
    parser = argparse.ArgumentParser(description="utility to plot \
    data and dynamically generated persistence diagrams. Using \
    the persistence option uses precomputed persistence and ignores all \
    the other options.")
    parser.add_argument('-i', '--infile', help="Data to read")
    parser.add_argument('-m',
                        '--max-simplices',
                        default=2000000,
                        type=int,
                        help="Maximum number of simplices for persistence \
                        generation")
    parser.add_argument(
        '-I',
        '--data-index',
        help="Index of data field for data types that require it")
    parser.add_argument(
        '-L',
        '--label-index',
        type=int,
        help="Index of label field for data types that require it")
    parser.add_argument('-s',
                        '--segment-size',
                        type=int,
                        help="Segment size for data types that require it")
    parser.add_argument('-S',
                        '--segment-stride',
                        type=int,
                        help="Segment stride for data types that require it")
    parser.add_argument('-w',
                        '--window-size',
                        help="Window size for \
    persistence generation. Integer is a literal window size, float \
    between 0 and 1 is a fraction of the total Segment size")
    parser.add_argument('-p',
                        '--persistences',
                        help="Precomputed persistence diagram")
    parser.add_argument('-t',
                        '--data-type',
                        default="UCRSegments",
                        help="Data type of the segments in infile")
    args = parser.parse_args(argv[1:])
    if args.persistences != None:
        persistences_json = load_data(args.persistences, 'persistences', None,
                                      None, argv[0])
        if persistences_json == None:
            print "Could not load --persistences : %s" % (args.persistences, )
            exit()
        persistences = PD.fromJSONDict(persistences_json)
        full_config = copy(persistences.config)
        full_config.window_size = -1
        segments_module = importlib.import_module(
            'persistence.' + persistences.config.data_type)
        segments_class = getattr(segments_module,
                                 persistences.config.data_type)

        full_data = segments_class(full_config)
        window_config = copy(persistences.config)
        windowed_data = segments_class(window_config)
    else:
        segments_module = importlib.import_module('persistence.' +
                                                  args.data_type)
        segments_class = getattr(segments_module, args.data_type)
        full_config = Configuration.fromJSONDict(
            dict([("data_type", args.data_type), ("data_file", args.infile),
                  ("label_index", 0), ("max_simplices", args.max_simplices),
                  ("window_size", -1), ("window_stride", 1)]))
        if full_config.data_file.find(":") != -1:
            full_config.data_file = full_config.data_file.split(':')
        if args.segment_size != None:
            full_config.segment_size = args.segment_size
        if args.segment_stride != None:
            full_config.segment_stride = args.segment_stride
        if args.data_index != None:
            full_config.data_index = parse_index(args.data_index)[0]
        if args.label_index != None:
            full_config.label_index = args.label_index
        full_data = segments_class(full_config)
        window_size = float(args.window_size)
        if (window_size < 1.0):
            window_size = int(window_size * full_data.config.window_size)
        else:
            window_size = int(args.window_size)
        window_config = Configuration.fromJSONDict(
            dict([("data_type", args.data_type),
                  ("data_file", full_config.data_file), ("label_index", 0),
                  ("max_simplices", args.max_simplices),
                  ("window_size", window_size), ("window_stride", 1)]))
        if args.segment_size != None:
            window_config.segment_size = args.segment_size
        if args.segment_stride != None:
            window_config.segment_stride = args.segment_stride
        if args.data_index != None:
            window_config.data_index = parse_index(args.data_index)[0]
        if args.label_index != None:
            window_config.label_index = args.label_index
        windowed_data = segments_class(window_config)
        persistences = PD(windowed_data.config,
                          [None for segment in windowed_data.segments])
    try:
        app = App(0, full_data, windowed_data, persistences)
        app.MainLoop()
    except KeyboardInterrupt:
        sys.exit(0)
Ejemplo n.º 12
0
 def fromJSONDict(cls, json):
     return cls(Configuration.fromJSONDict(json['config']), 
                cond_get_obj_list(json, 'results', LearningResult),
                cond_get(json, 'kernel_files'))
Ejemplo n.º 13
0
 def fromJSONDict(cls, json):
     return cls(Configuration.fromJSONDict(json['config']), 
                [f for f in json['features']],
                cond_get_obj_list(json, 'segment_info', SegmentInfo))
Ejemplo n.º 14
0
from persistence.Datatypes.Configuration import Configuration, parse_args, parse_range
from persistence.Datatypes.PersistenceDiagrams import PersistenceDiagrams
from persistence.Datatypes.TrainTestPartitions import TrainTestPartitions
from persistence.PersistenceKernel import PersistenceKernel
from persistence.ScaleSpaceSimilarity import ScaleSpaceSimilarity
from persistence.DistanceLearning import DistanceLearning
from persistence.KernelLearning import KernelLearning
from persistence.RBFKernel import RBFKernel
from persistence.EuclideanDistances import EuclideanDistances

if __name__ == "__main__" :
    parser = argparse.ArgumentParser(description="Utility to run common variations of learning tasks on a configuration file, generating segments, distances, kernels, and learning results as appropriate")
    parser.add_argument("--config")
    parser.add_argument("--pool", default=1, type=int)
    args = parser.parse_args(sys.argv[1:])
    config = Configuration.fromJSONDict(parse_args([sys.argv[0], "--config", args.config])[0])
    print config

    module = importlib.import_module('persistence.' + config.data_type)
    module_class = getattr(module, config.data_type)
    segment_filename = module_class.get_segment_filename(config)

    segments = module_class(config)
    print "Writing %s" % segment_filename
    save_data(segment_filename, segments.toJSONDict())

    #TrainTestSplit
    partition_command = ["python", "-u", "-O", "-m", "persistence.PartitionData", 
                         "--segments", segment_filename, 
                         "--learning-split", str(config.learning_split), 
                         "--learning-iterations", str(config.learning_iterations), 
Ejemplo n.º 15
0
 def fromJSONDict(cls, json):
     return cls(Configuration.fromJSONDict(json['config']),
                segment_info=cond_get_obj_list(json, 'segment_info', SegmentInfo),
                cross_validation=cond_get_obj_list(json, 'cross_validation', TrainTestPartition),
                evaluation=cond_get_obj_list(json, 'evaluation', TrainTestPartition))
Ejemplo n.º 16
0
 def fromJSONDict(cls, json):
     return cls(Configuration.fromJSONDict(json['config']),
                cond_get_obj_list(json, 'diagrams', PersistenceDiagram))