Example #1
0
def main(argv) :
    args = Configuration.parse_args(argv)
    config = list(Configuration.ArgsIter(args))
    
    if (len(config) > 1) :
        print "Unit testing currently supports only one configuration possibility"
    config = Configuration.Configuration.fromJSONDict(config[0])
    # Please don't put colons in your data_file names if you want this to work
    # We interpert "filea:fileb" as ["filea", "fileb"]
    segments_module = importlib.import_module( 'persistence.' + config['data_type'])    
    segments_class = getattr(segments_module, config['data_type']) 

    segments = segments_class(config)
    # generate all the segments to save to disk
    
    if not('outfile' in args[0]) or args[0]['outfile'] == None :
        outfile = segments_class.get_segment_filename(segments.config)
    else :
        outfile = args[0]['outfile']

    print "Writing %s" % outfile
    # Unless told otherwise, don't rewrite the output, because disk IO and gzip are slow
    if (isinstance(segments.config.reevaluate, dict) and 
        'segments' in segments.config.reevaluate.keys() and 
        segments.config.reevaluate['segments'] == True ) or \
       (not os.path.isfile(outfile)) :
        
        save_data(outfile, segments.toJSONDict())
Example #2
0
def main(argv) :
    parser = argparse.ArgumentParser(description="General purpose cross validation tool")
    parser.add_argument("--kernel-module", "-K")
    parser.add_argument("--kernel-arg", "-k")
    parser.add_argument("--distances-module", "-D")
    parser.add_argument("--distances-arg", "-d")
    parser.add_argument("--learning-module", "-L")
    parser.add_argument("--learning-arg", "-l")
    parser.add_argument("--infile", "-i")
    parser.add_argument("--outfile", "-o")
    parser.add_argument("--train-test-partitions", "-t")
    parser.add_argument("--pool", "-p", type=int, default=max(1,multiprocessing.cpu_count()-2))
    parser.add_argument("--timeout", type=int, default=0)
    args = parser.parse_args(argv[1:])
    input_json = load_data(args.infile, "input", None, None, argv[0] + ":")
    partitions_json = load_data(args.train_test_partitions, "input", None, None, argv[0] + ":")
    partitions = TrainTestPartitions.fromJSONDict(partitions_json)
    if args.pool > 1 :
        pool = multiprocessing.Pool(args.pool)
    else :
        pool = None
    
    if args.kernel_arg != None :
        kernel_arg = parse_range(args.kernel_arg, t=float)
    else :
        kernel_arg = None

    if args.distances_arg != None :
        distances_arg = parse_range(args.distances_arg, t=float)
    else :
        distances_arg = None

    if args.learning_arg != None :
        learning_arg = parse_range(args.learning_arg, t=float)
    else :
        learning_arg = None

    print "Kernel %s distance %s learning %s" % (kernel_arg, distances_arg, learning_arg)
    cv = CrossValidation(input_json, 
                         config=Configuration.fromJSONDict(input_json['config']),
                         kernel_module=args.kernel_module, 
                         kernel_arg=kernel_arg, 
                         distances_module=args.distances_module, 
                         distances_arg=distances_arg, 
                         learning_module=args.learning_module, 
                         learning_arg=learning_arg, 
                         partitions=partitions, 
                         pool=pool,
                         timeout=args.timeout)
    cv.cross_validate()
    
    if args.outfile == None :
        args.outfile = CrossValidation.get_cross_validation_filename(cv.config)
    
    print "Writing %s" % args.outfile
    save_data(args.outfile, cv.toJSONDict())
Example #3
0
#You should have received a copy of the GNU General Public License
#along with this program.  If not, see <http://www.gnu.org/licenses/>.


import sys
import argparse
import importlib

from persistence.Datatypes.JSONObject import load_data, save_data
from persistence.Datatypes.PersistenceDiagrams import PersistenceDiagrams, PersistenceDiagram
from persistence.Datatypes.Configuration import Configuration
from persistence.Datatypes.Segments import SegmentInfo

if __name__ == "__main__" :
    parser = argparse.ArgumentParser(description="Utility to add SegmentInfo data to a PersistenceDiagrams file")
    parser.add_argument("--infile")
    parser.add_argument("--outfile")
    args = parser.parse_args(sys.argv[1:])
    in_json = load_data(args.infile, "persistence diagrams", None, None, sys.argv[0] + " : ")
    pd = PersistenceDiagrams.fromJSONDict(in_json)
    module = importlib.import_module('persistence.' + pd.config.data_type)
    module_class = getattr(module, pd.config.data_type)
    segment_filename = module_class.get_segment_filename(pd.config)
    seg_json = load_data(segment_filename, "segments", None, None, sys.argv[0] + " : ")
    
    for (diagram, segment) in zip(pd.diagrams, seg_json['segments']) :
        diagram.segment_info = SegmentInfo.fromJSONDict(segment)

    print "Writing %s" % (args.outfile,)
    save_data(args.outfile, pd.toJSONDict())
Example #4
0
    if status != None :
        in_obj.config.status = status          
    if   "Segments" in file_class or \
         "Post" in file_class :
        out_file = module_class.get_segment_filename(in_obj.config, gz=False)
    elif "Features" in file_class :
        out_file = module_class.get_features_filename(in_obj.config, gz=False)
    elif "PersistenceDiagrams" in file_class :
        out_file = module_class.get_persistence_diagrams_filename(in_obj.config, gz=False)
    elif "Partition" in file_class :
        out_file = module_class.get_partition_filename(in_obj.config, gz=False)
    elif "Learning" in file_class :
        out_file = module_class.get_learning_filename(in_obj.config, gz=False)
    elif "Distances" in file_class or \
         "ScaleSpaceSimilarity" in file_class :
        out_file = module_class.get_distances_filename(in_obj.config, gz=False)
    elif "AverageKernel" in file_class : 
        out_file = get_filename(in_obj.config, 
                                ['max_simplices', 'persistence_epsilon', 
                                 'segment_filename', 'segment_stride', 'segment_size', 
                                 'window_size', 'window_stride', 
                                 'kernel_scale', 'kernel_gamma', 'invariant_epsilon', 
                                 'data_file', 'data_index', 'label_index', 'persistence_degree', 
                                 'data_type', 'post_process', 'post_process_arg'], "AverageKernel")
    elif "Kernel" in file_class :
        out_file = module_class.get_kernel_filename(in_obj.config, gz=False)
    elif "CrossValidation" in file_class :
        out_file = module_class.get_cross_validation_filename(in_obj.config, gz=False)
    print "Writing %s" % (out_file,)
    save_data(out_file, in_obj.toJSONDict())
Example #5
0
        "Utility to run common variations of learning tasks on a configuration file, generating segments, distances, kernels, and learning results as appropriate"
    )
    parser.add_argument("--config")
    parser.add_argument("--pool", default=1, type=int)
    args = parser.parse_args(sys.argv[1:])
    config = Configuration.fromJSONDict(
        parse_args([sys.argv[0], "--config", args.config])[0])
    print config

    module = importlib.import_module('persistence.' + config.data_type)
    module_class = getattr(module, config.data_type)
    segment_filename = module_class.get_segment_filename(config)

    segments = module_class(config)
    print "Writing %s" % segment_filename
    save_data(segment_filename, segments.toJSONDict())

    #TrainTestSplit
    partition_command = [
        "python", "-u", "-O", "-m", "persistence.PartitionData", "--segments",
        segment_filename, "--learning-split",
        str(config.learning_split), "--learning-iterations",
        str(config.learning_iterations), "--cv-iterations", "5"
    ]
    subprocess.call(partition_command)
    partition_filename = TrainTestPartitions.get_partition_filename(config)

    #PersistenceDiagrams
    persistence_command = [
        "python", "-u", "-O", "-m", "persistence.PersistenceGenerator",
        "--pool",
Example #6
0
        sample_data = [d for d in output if d['segment_start'] == s]
        segment_sizes = list(set([x['segment_size'] for x in sample_data]))
        segment_sizes.sort()
        for size in segment_sizes:
            segment_data = [
                d for d in sample_data if d['segment_size'] == size
            ]
            w_distances = WassersteinDistances(
                None,
                PersistenceDiagrams(None, [
                    PersistenceDiagram.fromJSONDict(d['diagram'])
                    for d in segment_data
                ]),
                pool=None)
            distances = BottleneckDistances(
                None,
                PersistenceDiagrams(None, [
                    PersistenceDiagram.fromJSONDict(d['diagram'])
                    for d in segment_data
                ]),
                pool=None)
            out_data.append((segment_data, distances, w_distances))
    goal = len(out_data)
    computed = pool.imap(map_func, out_data)
    done = []

    for (c, i) in itertools.izip(computed, range(goal)):
        print "%d of %d" % (i, goal)
        done.append(c)
    save_data(args.outfile, done)
Example #7
0
from persistence.Datatypes.JSONObject import load_data, save_data
from persistence.Datatypes.PersistenceDiagrams import PersistenceDiagrams, PersistenceDiagram


def avg(l):
    return sum(l, 0.0) / len(l)


def average_density(diagram):
    points = [(p[0], p[1]) for p in diagram.points if p[2] == 1]
    if len(points) > 2:
        diagram_distances = []
        for (x0, y0) in points:
            distances = map(
                lambda (x1, y1): math.sqrt((x0 - x1) * (x0 - x1) + (x0 - x1) *
                                           (x0 - x1)), points)
            diagram_distances.append(avg(distances[1:6]))
        return avg(diagram_distances)
    else:
        return 0.0


if __name__ == "__main__":
    pool = multiprocessing.Pool(multiprocessing.cpu_count() - 2)
    for f in sys.argv[1:]:
        pds = PersistenceDiagrams.fromJSONDict(
            load_data(f, None, None, None, sys.argv[0] + " : "))
        densities = pool.map(average_density, pds.diagrams)
        save_data(f + "-density", list(densities))

    output.sort(key=lambda x: (x['segment_start'],x['max_simplices']/x['segment_size']/x['segment_size'], x['segment_size']))
    
    print "Distance Computation"

    samples = list(set([d['segment_start'] for d in output]))
    samples.sort()
    out_data = []
    for s in samples :
        sample_data = [d for d in output if d['segment_start'] == s]
        segment_sizes = list(set([x['segment_size'] for x in sample_data]))
        segment_sizes.sort()
        for size in segment_sizes :
            segment_data = [d for d in sample_data if d['segment_size'] == size]
            w_distances = WassersteinDistances(None,
                                               PersistenceDiagrams(None, [PersistenceDiagram.fromJSONDict(d['diagram']) for d in segment_data]),
                                               pool=None)
            distances = BottleneckDistances(None,
                                            PersistenceDiagrams(None, [PersistenceDiagram.fromJSONDict(d['diagram']) for d in segment_data]),
                                            pool=None)
            out_data.append((segment_data, distances, w_distances))
    goal = len(out_data)
    computed = pool.imap(map_func, out_data)
    done = []

    for (c,i) in itertools.izip(computed, range(goal)) :
        print "%d of %d" % (i, goal)
        done.append(c)
    save_data(args.outfile, done)
Example #9
0
if __name__ == "__main__" :
    parser = argparse.ArgumentParser(description="Utility to run common variations of learning tasks on a configuration file, generating segments, distances, kernels, and learning results as appropriate")
    parser.add_argument("--config")
    parser.add_argument("--pool", default=1, type=int)
    args = parser.parse_args(sys.argv[1:])
    config = Configuration.fromJSONDict(parse_args([sys.argv[0], "--config", args.config])[0])
    print config

    module = importlib.import_module('persistence.' + config.data_type)
    module_class = getattr(module, config.data_type)
    segment_filename = module_class.get_segment_filename(config)

    segments = module_class(config)
    print "Writing %s" % segment_filename
    save_data(segment_filename, segments.toJSONDict())

    #TrainTestSplit
    partition_command = ["python", "-u", "-O", "-m", "persistence.PartitionData", 
                         "--segments", segment_filename, 
                         "--learning-split", str(config.learning_split), 
                         "--learning-iterations", str(config.learning_iterations), 
                         "--cv-iterations", "5"]
    subprocess.call(partition_command)
    partition_filename = TrainTestPartitions.get_partition_filename(config)

    #PersistenceDiagrams
    persistence_command = ["python", "-u", "-O", "-m", "persistence.PersistenceGenerator", 
                           "--pool", str(args.pool),
                           "--infile", segment_filename]
    subprocess.call(persistence_command)
Example #10
0
    "Creates windowed segments of a single dimension for a multidimensioned dataset"
)
parser.add_argument("-i", "--infile")
parser.add_argument("-d", "--data-index", default=0, type=int)
parser.add_argument("-w", "--window-size", type=int)
parser.add_argument("-W", "--window-stride", default=1, type=int)
args = parser.parse_args(sys.argv[1:])
segments = Segments.fromJSONDict(
    load_data(args.infile, "segments", None, None, sys.argv[0] + ": "))
orig_window_size = segments.config.window_size
segments.config.window_size = args.window_size
segments.config.window_stride = args.window_stride
dimensions = len(segments.segments[0].data_index)
segments.config.data_index = segments.segments[0].data_index[args.data_index]
for segment in segments.segments:
    windows = [[
        segment.windows[0][(i + j) * dimensions + args.data_index]
        for j in range(args.window_size)
    ] for i in range(0, orig_window_size, args.window_stride)
               if ((i + args.window_size - 1) * dimensions +
                   args.data_index) < len(segment.windows[0])]
    segment.data_index = segment.data_index[args.data_index]
    segment.window_size = args.window_stride
    segment.windows = windows
segment_module = importlib.import_module("persistence." +
                                         segments.config.data_type)
segment_class = getattr(segment_module, segments.config.data_type)
segment_filename = segment_class.get_segment_filename(segments.config)
print "Writing " + segment_filename
save_data(segment_filename, segments.toJSONDict())
Example #11
0
class segment_processing_callable:
    def __init__(self, outfile, max_simplices, epsilon, num_segments) :
        self.outfile = outfile
        self.max_simplices = max_simplices
        self.epsilon = epsilon
        self.num_segments = num_segments

    def __call__(self, (segment, index)) :
        print "Computing full rips filtration"
        start = time.clock()
        filtration = rips_filtration_generator(segment.windows, 2)
        persistence = PersistentHomology()
        full_persistence_diagram = persistence.compute_persistence_full(filtration, 2)
        full_runtime = time.clock() - start
        diagram_points = []
        for p in range(full_persistence_diagram.num_pairs()) :
            pair = full_persistence_diagram.get_pair(p)
            diagram_points.append([pair.birth_time(), pair.death_time(), pair.dim()])
        full_pd = PD(segment_start=segment.segment_start,
                     labels=segment.labels, learning=segment.learning,
                     filename=segment.filename, points=diagram_points)

        sparse_pds = []
        if self.max_simplices != None :
            for m in self.max_simplices :
                print "Computing sparse rips filtration max_simplices %s" % (int(round(m)),)
                start = time.clock()
                filtration = sparse_rips_filtration_generator(segment.windows, int(round(m)), None, 2)
                persistence = PersistentHomology()
                sparse_persistence_diagram = persistence.compute_persistence_sparse(filtration, 2)
                sparsity = [filtration.get_simplex_sparsity(i) for i in [0,1,2]]
                sparse_runtime = time.clock() - start
                
                diagram_points = []
                for p in range(sparse_persistence_diagram.num_pairs()) :
                    pair = sparse_persistence_diagram.get_pair(p)
                    diagram_points.append([pair.birth_time(), pair.death_time(), pair.dim()])
                    
                sparse_pd = PD(segment_start=segment.segment_start,
                               labels=segment.labels, learning=segment.learning,
                               filename=segment.filename, points=diagram_points)
                
                bottleneck = bottleneck_distance(full_pd.points, sparse_pd.points, 1)
                w1 = wasserstein_distance(full_pd.points, sparse_pd.points, 1, 1)
                w2 = wasserstein_distance(full_pd.points, sparse_pd.points, 1, 2)
                print "Distances: Bottleneck %g Wasserstein L1 %g L2 %g" % (bottleneck, w1, w2)
                sparse_pds.append(dict([("diagram", sparse_pd.toJSONDict()),
                                        ("max_simplices", int(round(m))), 
                                        ("sparsity", sparsity),
                                        ("bottleneck_distance", bottleneck),
                                        ("wasserstein_l1", w1),
                                        ("wasserstein_l2", w2),
                                        ("runtime", sparse_runtime)]))
        else :
            for e in self.epsilon :
                print "Computing sparse rips filtration epsilon %s" % (e,)
                start = time.clock()
                filtration = sparse_rips_filtration_generator(segment.windows, None, e, 2)
                persistence = PersistentHomology()
                sparse_persistence_diagram = persistence.compute_persistence_sparse(filtration, 2)
                sparsity = [filtration.get_simplex_sparsity(i) for i in [0,1,2]]
                sparse_runtime = time.clock() - start                
                
                diagram_points = []
                for p in range(sparse_persistence_diagram.num_pairs()) :
                    pair = sparse_persistence_diagram.get_pair(p)
                    diagram_points.append([pair.birth_time(), pair.death_time(), pair.dim()])
                    
                sparse_pd = PD(segment_start=segment.segment_start,
                               labels=segment.labels, learning=segment.learning,
                               filename=segment.filename, points=diagram_points)
                
                bottleneck = bottleneck_distance(full_pd.points, sparse_pd.points, 1)
                w1 = wasserstein_distance(full_pd.points, sparse_pd.points, 1, 1)
                w2 = wasserstein_distance(full_pd.points, sparse_pd.points, 1, 2)
                print "Distances: Bottleneck %g Wasserstein L1 %g L2 %g" % (bottleneck, w1, w2)
                sparse_pds.append(dict([("diagram", sparse_pd.toJSONDict()),
                                        ("epsilon", e), 
                                        ("sparsity", sparsity),
                                        ("bottleneck_distance", bottleneck),
                                        ("wasserstein_l1", w1),
                                        ("wasserstein_l2", w2),
                                        ("runtime", sparse_runtime)]))
        
        print "Saving data for segment %04d of %d to %s " % (index, self.num_segments, "%s.%04d" % (self.outfile, index))
        save_data("%s.%04d" % (self.outfile, index), 
                  [dict([("full_diagram",full_pd.toJSONDict()), ("runtime", full_runtime)])] + sparse_pds)
        return full_pd.toJSONDict()
Example #12
0
def main(argv):
    parser = argparse.ArgumentParser(description='Tool to generate a similarity kernel from persistence data')
    parser.add_argument('-i', '--infile', help='Input JSON Similarity Kernel file')
    parser.add_argument('-o', '--outfile', help='Output JSON Learning file')
    parser.add_argument('-p', '--pool', default=multiprocessing.cpu_count(), help='Threads of computation to use')
    parser.add_argument('-c', '--learning-C', help='C value for SVM. Specify a range for 1-dimensional cross-validation')
    parser.add_argument('-t', '--train-test-partitions', help='Precomputed train / test partitions')
    args = vars(parser.parse_args(argv[1:]))
    
    kf_json = load_data(args['infile'], 'kernel', None, None, "KernelLearning: ")
    if kf_json == None :
        print "Could not load Kernel from %s" % (args['infile'],)
        sys.exit(1)
    kernel = Kernel.fromJSONDict(kf_json)
    config = kernel.config
    segment_info = kernel.segment_info
    if (int(args['pool']) > 1) :
      pool = multiprocessing.Pool(int(args['pool']))
    else :
      pool = None
    
    if (args['learning_C'] != None) :
        learning_C = parse_range(args['learning_C'], t=float)
        if not isinstance(learning_C,list) :
            learning_C = [learning_C]
    elif not isinstance(learning_C,list) :
        learning_C = [config.learning_C]
    else :
        learning_C = config.learning_C

    if (args['train_test_partitions'] != None) :
        partitions_json = load_data(args['train_test_partitions'], 'partitions', None, None, "KernelLearning: ")
        if partitions_json == None :
            print "Could not load Train / Test Partitions from %s" % (args['train_test_partitions'],)
            sys.exit(1)
        partitions = TrainTestPartitions.fromJSONDict(partitions_json)
    else :
        partitions = generate_partitions(config, segment_info, 
                                         cv_iterations=5 if (len(learning_C) > 1) else 0)

    if isinstance(learning_C, list) and len(learning_C) > 1 and len(partitions.cross_validation) > 0 :
        num_cv = len(partitions.cross_validation)
        learning_wrap = LearningWrapper( kernel )
        if pool != None :
            results = pool.map(learning_wrap, itertools.product(partitions.cross_validation, learning_C))
        else :
            results = map(learning_wrap, itertools.product(partitions.cross_validation, learning_C))
        max_correct = 0.0
        best_C = learning_C[0]
        results = list(results)
        print len(results)
        for C in learning_C :
            correct = Learning(config, [_result for (_C, _result) in results if C == _C]).get_average_correct()
            if correct > max_correct :
                best_C = C
                max_correct = correct
        config.learning_C = best_C
        print "KernelLearning: using C = %s, correct = %s" % (config.learning_C, max_correct)
    else :
        if isinstance(learning_C, list) :
            config.learning_C = learning_C[0]
        else :
            config.learning_C = learning_C

    learning_wrap = LearningWrapper( kernel )

    if pool != None :
        results = pool.map(learning_wrap, itertools.product(partitions.evaluation, [config.learning_C]))
    else :
        results = map(learning_wrap, itertools.product(partitions.evaluation, [config.learning_C]))
    learning = Learning(config, [result for (C,result) in results])

    if args['outfile'] == None :
        learning_filename = KernelLearning.get_learning_filename(config)
    else :
        learning_filename = args['outfile']

    correct = learning.get_average_correct()
    print "%s correct %2.2f%% error %2.2f%% classes %s" % ("KernelLearning:", correct * 100.0, (1.0 - correct)*100.0, 
                                                   len(set([s.max_label() for s in kernel.segment_info])))
    print "Writing %s" % (learning_filename, )
    learning.config.status = "KernelLearning"
    save_data(learning_filename, learning.toJSONDict())
Example #13
0
def main(argv):
    parser = argparse.ArgumentParser(
        description='Tool to generate a similarity kernel from persistence data'
    )
    parser.add_argument('-i',
                        '--infile',
                        help='Input JSON Similarity Kernel file')
    parser.add_argument('-o', '--outfile', help='Output JSON Learning file')
    parser.add_argument('-p',
                        '--pool',
                        default=multiprocessing.cpu_count(),
                        help='Threads of computation to use')
    parser.add_argument(
        '-c',
        '--learning-C',
        help=
        'C value for SVM. Specify a range for 1-dimensional cross-validation')
    parser.add_argument('-t',
                        '--train-test-partitions',
                        help='Precomputed train / test partitions')
    args = vars(parser.parse_args(argv[1:]))

    kf_json = load_data(args['infile'], 'kernel', None, None,
                        "KernelLearning: ")
    if kf_json == None:
        print "Could not load Kernel from %s" % (args['infile'], )
        sys.exit(1)
    kernel = Kernel.fromJSONDict(kf_json)
    config = kernel.config
    segment_info = kernel.segment_info
    if (int(args['pool']) > 1):
        pool = multiprocessing.Pool(int(args['pool']))
    else:
        pool = None

    if (args['learning_C'] != None):
        learning_C = parse_range(args['learning_C'], t=float)
        if not isinstance(learning_C, list):
            learning_C = [learning_C]
    elif not isinstance(learning_C, list):
        learning_C = [config.learning_C]
    else:
        learning_C = config.learning_C

    if (args['train_test_partitions'] != None):
        partitions_json = load_data(args['train_test_partitions'],
                                    'partitions', None, None,
                                    "KernelLearning: ")
        if partitions_json == None:
            print "Could not load Train / Test Partitions from %s" % (
                args['train_test_partitions'], )
            sys.exit(1)
        partitions = TrainTestPartitions.fromJSONDict(partitions_json)
    else:
        partitions = generate_partitions(config,
                                         segment_info,
                                         cv_iterations=5 if
                                         (len(learning_C) > 1) else 0)

    if isinstance(learning_C, list) and len(learning_C) > 1 and len(
            partitions.cross_validation) > 0:
        num_cv = len(partitions.cross_validation)
        learning_wrap = LearningWrapper(kernel)
        if pool != None:
            results = pool.map(
                learning_wrap,
                itertools.product(partitions.cross_validation, learning_C))
        else:
            results = map(
                learning_wrap,
                itertools.product(partitions.cross_validation, learning_C))
        max_correct = 0.0
        best_C = learning_C[0]
        results = list(results)
        print len(results)
        for C in learning_C:
            correct = Learning(
                config, [_result for (_C, _result) in results if C == _C
                         ]).get_average_correct()
            if correct > max_correct:
                best_C = C
                max_correct = correct
        config.learning_C = best_C
        print "KernelLearning: using C = %s, correct = %s" % (
            config.learning_C, max_correct)
    else:
        if isinstance(learning_C, list):
            config.learning_C = learning_C[0]
        else:
            config.learning_C = learning_C

    learning_wrap = LearningWrapper(kernel)

    if pool != None:
        results = pool.map(
            learning_wrap,
            itertools.product(partitions.evaluation, [config.learning_C]))
    else:
        results = map(
            learning_wrap,
            itertools.product(partitions.evaluation, [config.learning_C]))
    learning = Learning(config, [result for (C, result) in results])

    if args['outfile'] == None:
        learning_filename = KernelLearning.get_learning_filename(config)
    else:
        learning_filename = args['outfile']

    correct = learning.get_average_correct()
    print "%s correct %2.2f%% error %2.2f%% classes %s" % (
        "KernelLearning:", correct * 100.0, (1.0 - correct) * 100.0,
        len(set([s.max_label() for s in kernel.segment_info])))
    print "Writing %s" % (learning_filename, )
    learning.config.status = "KernelLearning"
    save_data(learning_filename, learning.toJSONDict())
def compute(distance_type,
            distance_array,
            segment_compare,
            pool,
            max_simplices,
            epsilon,
            segments=None,
            pds=None,
            ds=None):
    compute_pool = multiprocessing.Pool(pool)
    d_len = len(distance_array)
    d_rng = range(d_len)
    last = -1
    if pds == None and ds == None:
        persistence_diagrams = [None for x in segments.segments]
        print "Generating initial persistence diagram"
        persistence_diagrams[0] = PersistenceGenerator.process(
            (segments.segments[0], (max_simplices, epsilon)))
        diagram_generator = yieldPersistenceDiagramAndDistance(
            max_simplices, epsilon, persistence_diagrams[0], distance_type)
        results = compute_pool.imap(
            diagram_generator, itertools.izip(segments.segments[1:],
                                              d_rng[1:]))
        for (i, diagram, distance) in results:
            persistence_diagrams[i] = diagram
            distance_array[i] = distance

        config = segments.config
        config.max_simplices = max_simplices
        config.persistence_epsilon = epsilon
        diagrams = PersistenceDiagrams(config, persistence_diagrams)
        filename = PersistenceDiagrams.get_persistence_diagrams_filename(
            config)
        print "plot_persistence_distance.py: Writing %s" % (filename, )
        save_data(filename, diagrams.toJSONDict())
    elif pds != None:
        persistence_diagrams = pds.diagrams
        config = pds.config

    distances = [[None for y in d_rng] for x in d_rng]
    if ds == None:
        print "Computing Distance Array"
        distance_generator = yieldDistance(distance_type)
        results = compute_pool.imap(
            distance_generator,
            itertools.product(itertools.izip(persistence_diagrams, d_rng),
                              itertools.izip(persistence_diagrams, d_rng)),
            max(1, d_len**2 / (10 * pool)))
        for (i, j, distance) in results:
            distances[i][j] = Distance(None, distance, None, None)
            if segment_compare.value != last:
                last = segment_compare.value
                for k in d_rng:
                    distance_array[k] = distances[last][
                        k].mean if distances[last][k] != None else -1.0

        if distance_type == 'bottleneck':
            filename = BottleneckDistances.get_distances_filename(config)
        elif distance_type == 'wasserstein':
            filename = WassersteinDistances.get_distances_filename(config)

        print "plot_persistence_distance.py: Writing %s" % (filename, )
        save_data(
            filename,
            Distances(config, distances,
                      [d.segment_info
                       for d in persistence_diagrams]).toJSONDict())
    else:
        for i in d_rng:
            for j in d_rng:
                distances[i][j] = ds.distances[i][j]
        last = -1
    compute_pool.close()
    compute_pool.join()
    last = segment_compare.value
    for k in d_rng:
        distance_array[k] = distances[last][k].mean
    while True:
        if segment_compare.value != last:
            last = segment_compare.value
            for k in d_rng:
                distance_array[k] = distances[last][k].mean
        else:
            time.sleep(0.05)
Example #15
0
    if   "Segments" in file_class or \
         "Post" in file_class :
        out_file = module_class.get_segment_filename(in_obj.config, gz=False)
    elif "Features" in file_class:
        out_file = module_class.get_features_filename(in_obj.config, gz=False)
    elif "PersistenceDiagrams" in file_class:
        out_file = module_class.get_persistence_diagrams_filename(
            in_obj.config, gz=False)
    elif "Partition" in file_class:
        out_file = module_class.get_partition_filename(in_obj.config, gz=False)
    elif "Learning" in file_class:
        out_file = module_class.get_learning_filename(in_obj.config, gz=False)
    elif "Distances" in file_class or \
         "ScaleSpaceSimilarity" in file_class :
        out_file = module_class.get_distances_filename(in_obj.config, gz=False)
    elif "AverageKernel" in file_class:
        out_file = get_filename(in_obj.config, [
            'max_simplices', 'persistence_epsilon', 'segment_filename',
            'segment_stride', 'segment_size', 'window_size', 'window_stride',
            'kernel_scale', 'kernel_gamma', 'invariant_epsilon', 'data_file',
            'data_index', 'label_index', 'persistence_degree', 'data_type',
            'post_process', 'post_process_arg'
        ], "AverageKernel")
    elif "Kernel" in file_class:
        out_file = module_class.get_kernel_filename(in_obj.config, gz=False)
    elif "CrossValidation" in file_class:
        out_file = module_class.get_cross_validation_filename(in_obj.config,
                                                              gz=False)
    print "Writing %s" % (out_file, )
    save_data(out_file, in_obj.toJSONDict())
Example #16
0
import os
import sys
import math
import itertools
import multiprocessing

from persistence.Datatypes.JSONObject import load_data, save_data
from persistence.Datatypes.PersistenceDiagrams import PersistenceDiagrams, PersistenceDiagram
def avg(l) :
    return sum(l,0.0) / len(l)
def average_density(diagram) :
    points = [(p[0], p[1]) for p in diagram.points if p[2] == 1]
    if len(points) > 2 :
        diagram_distances = []
        for (x0,y0) in points :
            distances = map(lambda (x1,y1) : math.sqrt((x0 - x1) * (x0 - x1) + (x0 - x1) * (x0 - x1)), points)
            diagram_distances.append(avg(distances[1:6]))
        return avg(diagram_distances)
    else :
        return 0.0



if __name__ == "__main__" :
    pool = multiprocessing.Pool(multiprocessing.cpu_count() - 2)
    for f in sys.argv[1:] :
        pds = PersistenceDiagrams.fromJSONDict(load_data(f, None, None, None, sys.argv[0] + " : "))
        densities = pool.map(average_density, pds.diagrams)
        save_data(f + "-density", list(densities))
Example #17
0
import sys
import argparse
import importlib
from persistence.Datatypes.JSONObject import load_data, save_data
from persistence.Datatypes.Segments import Segments, Segment


parser = argparse.ArgumentParser(description="Creates windowed segments of a single dimension for a multidimensioned dataset")
parser.add_argument("-i","--infile")
parser.add_argument("-d","--data-index", default=0, type=int)
parser.add_argument("-w","--window-size", type=int)
parser.add_argument("-W","--window-stride", default=1, type=int)
args = parser.parse_args(sys.argv[1:])
segments = Segments.fromJSONDict(load_data(args.infile, "segments", None, None, sys.argv[0] + ": "))
orig_window_size = segments.config.window_size
segments.config.window_size = args.window_size
segments.config.window_stride = args.window_stride
dimensions = len(segments.segments[0].data_index)
segments.config.data_index = segments.segments[0].data_index[args.data_index]
for segment in segments.segments :
    windows = [[segment.windows[0][(i + j) * dimensions + args.data_index] for j in range(args.window_size)]
               for i in range(0, orig_window_size, args.window_stride) if ((i + args.window_size - 1) * dimensions + args.data_index) < len(segment.windows[0])]
    segment.data_index = segment.data_index[args.data_index]
    segment.window_size = args.window_stride
    segment.windows = windows
segment_module = importlib.import_module("persistence." + segments.config.data_type)
segment_class = getattr(segment_module, segments.config.data_type)
segment_filename = segment_class.get_segment_filename(segments.config)
print "Writing " + segment_filename
save_data(segment_filename, segments.toJSONDict())