Beispiel #1
0
def convert_symbols_to_raw(src, dst, model, level, tolevel=0):
    """
    convert a symbolic time series to lower level or raw data
    """
    if level == "raw":
        return 
    if level == "symbol":
        level = 0
    if level == "rle":
        level = 1
    if level == "statecluster":
        level = 2
        prle = pr.RLEProcess()
        dat = ds.FileDataSource(src, src)
        dat.load()
        dat.data = prle.batch_process(dat.data)
        dat.save()
    if int(tolevel) == 0:
        ctr = cv.ToRaw()
        ctr.convert(src, dst, model, int(level))
    else:
        cts = cv.ToSymbols()
        cts.convert(src, dst, model, int(level))
        if int(tolevel) == 2:
            prle = pr.RLEProcess()
            dat = ds.FileDataSource(dst, dst)
            dat.load()
            dat.data = prle.batch_process(dat.data)
            dat.save()
Beispiel #2
0
def get_distance(file0, file1, rate):
    dat0 = ds.FileDataSource(file0, None)
    dat1 = ds.FileDataSource(file1, None)
    dat0.load()
    dat1.load()
    gdt = fu.Get_Distance()
    dist = gdt.levenshtein(dat0.data, dat1.data, int(rate))
    print "total-distance (d) = %d"%(dist[0])
    print "total-time-length (l) = %d"%(dist[1])
    print "normalized-distance (d/l) = %f"%(dist[0]*1.0/dist[1])
Beispiel #3
0
def find_states_spclust(inputfile, outputname, rate, dimensions, wgrid, wnbr, ememory, ethreshold, mindist):
    """
    batch process using spclust symbolization
    - input csv file with triple (time start, time end, values)
    - base name for output files (can include a folder)
    - sampling rate of the input time series
    - number of dimensions of the input time series
    - Spclust grid size
    - Spclust count threshold
    - StateFinder fading factor
    - StateFinder prediciton error threshold
    - StateFinder min distance for clustering segments (0-1)
    """
    call(["java", "-jar", "./Spclust/SpComputeModel.jar", inputfile,
          dimensions, wgrid, wnbr, outputname+"-model.spc"])
    call(["java", "-jar", "./Spclust/SpComputeSymbols.jar",
          outputname+"-model.spc", inputfile, outputname+"-symbol.csv"])
    nbclusters = int(open(outputname+"-model.spcn", 'r').readline())
    src = ds.FileDataSource(outputname+"-symbol.csv", outputname+"-statefinder.csv")
    rel = pr.RLEProcess()
    sem = pr.SegmentSparseProcess(rate, ethreshold, ememory)
    clu = pr.ClusterSparseProcess(mindist, nbclusters)
    src.load()
    src.data = rel.batch_process(src.data)
    src.save_to(outputname+"-rle.csv")
    src.data = rel.batch_process(src.data)
    segments = sem.batch_process(src.data)
    (src.data, lookup) = clu.batch_process(segments, src.data)
    src.save()
    lookups = {0:lt.SpclustSymbolLookupTable(outputname+"-model.spc"),
               1:lt.ExpandLookupTable(rate),
               2:(lt.ClusterSparseLookupTable(lookup, rate))}
    lkf = open(outputname+"-model.mdl", 'w')
    pickle.dump(lookups, lkf)
    lkf.close()
Beispiel #4
0
def convert_rle(src, dst):
    """
    apply RLE compression
    """
    dat = ds.FileDataSource(src, dst)
    prle = pr.RLEProcess()
    dat.load()
    dat.data = prle.batch_process(dat.data)
    dat.save()
Beispiel #5
0
def convert_median_filter(src, dst, win):
    """
    Apply median filtering to the time series
    """
    dat = ds.FileDataSource(src, dst)
    pmf = pr.MedianFilteringProcess(win)
    dat.load()
    dat.data = pmf.batch_process(dat.data)
    dat.save()
Beispiel #6
0
def split_file_by(filename, folder, offset=0, duration=86400):
    """
    split the file for applying the forecasting algorithm
    """
    src = ds.FileDataSource(filename, None)
    cut = fu.PeriodicCutProcess(int(duration), int(offset))
    src.load()
    src.data = cut.batch_process(src.data)
    spl = fu.Splitter(src.data)
    spl.splitFiles(folder, int(duration), int(offset))
Beispiel #7
0
def find_states(inputfile, outputname, rate, smethod, snbr, ememory, ethreshold, mindist):
    """
    batch process using standard symbolization
    - input csv file with triple (time start, time end, value)
    - base name for output files (can include a folder)
    - sampling rate of the input time series
    - symbolization method (0:unifom, 1:median, 2:distinct median)
    - number of symbols to generates
    - StateFinder fading factor
    - StateFinder prediciton error threshold
    - StateFinder min distance for clustering segments (0-1)
    """
    src = ds.FileDataSource(inputfile, outputname+"-statefinder.csv")
    src.load()
    enc = sbz.UniformSymbolizer()
    if smethod == "1":
        enc = sbz.MedianSymbolizer()
    if smethod == "2":
        enc = sbz.DistinctMedianSymbolizer()
    enc.load(src.data)
    (sep, mini, maxi) = enc.get_separators(int(snbr))
    sym = pr.SymbolizeProcess(1, sep)
    rel = pr.RLEProcess()
    sem = pr.SegmentSparseProcess(rate, ethreshold, ememory)
    clu = pr.ClusterSparseProcess(mindist, int(snbr)+1)

    src.data = sym.batch_process(src.data)
    src.save_to(outputname+"-symbol.csv")
    src.data = rel.batch_process(src.data)
    src.save_to(outputname+"-rle.csv")
    segments = sem.batch_process(src.data)
    (src.data, lookup) = clu.batch_process(segments, src.data)
    src.save()
    lookups = {0:lt.SymbolLookupTable(sep, mini, maxi),
               1:lt.ExpandLookupTable(rate),
               2:(lt.ClusterSparseLookupTable(lookup, rate))}
    lkf = open(outputname+"-model.mdl", 'w')
    pickle.dump(lookups, lkf)
    lkf.close()