def __init__(self, histogram_type, lowerBounding=False, logger=None): self.initialized = False self.HistogramType = histogram_type self.lowerBounding = lowerBounding self.MUSE_Bool = False logger.Log(self.__dict__, level=0) self.logger = logger self.sfa = SFA(histogram_type, LB=self.lowerBounding, logger=self.logger)
def createWords(self, samples): if self.signature == None: self.signature = SFA("EQUI_DEPTH") self.signature.fitWindowing(samples, self.windowLength, self.maxF, self.symbols, self.normMean,True) words = [] for i in range(samples["Samples"]): sfaWords = self.signature.transformWindowing(samples[i]) words_small = [] for word in sfaWords: words_small.append(self.createWord(word, self.maxF, int2byte(self.symbols))) words.append(words_small) return words
def lookup(self, windowLength, wordLength, symbols, sequence, val): normMean = True key = (windowLength, wordLength, symbols) if key not in self.sfa: sfa = SFA("EQUI_DEPTH") sfa.fitWindowing(self.raw, windowLength, wordLength, symbols, normMean, True) self.sfa[key] = [] for i in range(self.raw["Samples"]): sfa_sr = [] wordList = sfa.transformWindowing(self.raw[i]) for word in wordList: sfa_sr.append(self.sfaToDWord(word, symbols)) #print(str(i) + "-th transformed time series SFA word " + "\t" + sfaToWordList(wordList)) self.sfa[key].append(sfa_sr) for i in range(self.raw["Samples"]): for j in range(len(self.sfa[key][i])): if sequence in self.sfa[key][i][j]: self.acscores[i, j:(j + windowLength)] += val
def createWords(self, samples, index, bar=None): if self.signature[index] == None: self.signature[index] = SFA("INFORMATION_GAIN", True, False) self.signature[index].fitWindowing(samples, self.windowLengths[index], self.maxF, self.symbols, self.normMean, False) # self.signature[index].printBins() words = [] for i in range(samples["Samples"]): words.append(self.signature[index].transformWindowingInt( samples[i], self.maxF)) self.words[index] = words if bar != None: bar.update(index)
def createWords(self, samples, index, data): if self.signature[index] == None: self.signature[index] = [None for _ in range(samples['Dimensions'])] for i in range(samples['Dimensions']): self.signature[index][i] = SFA(self.histogramType, self.lowerBounding, logger = self.logger, mftUseMaxOrMin=False) self.signature[index][i].mv_fitWindowing(samples, self.windowLengths[index], self.maxF, self.alphabetSize, self.normMean, self.lowerBounding, dim = i) self.signature[index][i].printBins(self.logger) words = [] for m in range(samples["Samples"]): for n in range(samples["Dimensions"]): if len(samples[m][n].data) >= self.windowLengths[index]: words.append(self.signature[index][n].transformWindowingInt(samples[m][n], self.maxF)) else: words.append([]) self.logger.Log("Generating %s Words for Norm=%s and Window=%s" % (data, self.normMean, self.windowLengths[index])) self.words[index] = words
def createWords(self, samples, index, bar=None): if self.signature[index] == None: self.signature[index] = SFA(self.histogramType, False, self.lowerBounding, False) self.signature[index].mv_fitWindowing(samples, self.windowLengths[index], self.maxF, self.alphabetSize, self.normMean, False) # self.signature[index].printBins() words = [] for m in range(samples["Samples"]): for n in range(samples["Dimensions"]): if len(samples[m][n].data) >= self.windowLengths[index]: words.append(self.signature[index].transformWindowingInt( samples[m][n], self.maxF)) else: words.append([]) self.words[index] = words if bar != None: bar.update(index)
class SFASupervised(): def __init__(self, histogram_type, lowerBounding=False, logger=None): self.initialized = False self.HistogramType = histogram_type self.lowerBounding = lowerBounding self.MUSE_Bool = False logger.Log(self.__dict__, level=0) self.logger = logger self.sfa = SFA(histogram_type, LB=self.lowerBounding, logger=self.logger) def fitWindowing(self, samples, windowSize, wordLength, symbols, normMean, lowerBounding): self.sfa.quantization = self.quantizationSupervised self.transformation = MFT(windowSize, normMean, lowerBounding, self.MUSE_Bool) sa = {} index = 0 for i in range(samples["Samples"]): new_list = getDisjointSequences(samples[i], windowSize, normMean) for j in range(len(new_list)): sa[index] = new_list[j] index += 1 sa["Samples"] = index self.fitTransformed(sa, wordLength, symbols, normMean) def fitTransformed(self, samples, wordLength, symbols, normMean): length = len(samples[0].data) transformedSignal = self.sfa.fitTransformDouble( samples, length, symbols, normMean) best = self.calcBestCoefficients(samples, transformedSignal) self.bestValues = [0 for i in range(min(len(best), wordLength))] self.maxWordLength = 0 for i in range(len(self.bestValues)): if best[i][1] != -math.inf: self.bestValues[i] = best[i][0] self.maxWordLength = max(best[i][0] + 1, self.maxWordLength) self.maxWordLength += self.maxWordLength % 2 self.sfa.maxWordLength = self.maxWordLength return self.sfa.transform(samples, transformedSignal) def calcBestCoefficients(self, samples, transformedSignal): classes = {} for i in range(samples["Samples"]): if samples[i].label in classes.keys(): classes[samples[i].label].append(transformedSignal[i]) else: classes[samples[i].label] = [transformedSignal[i]] nSamples = len(transformedSignal) nClasses = len(classes.keys()) length = len(transformedSignal[1]) f = self.getFoneway(length, classes, nSamples, nClasses) f_sorted = sorted(f, reverse=True) best = [] inf_index = 0 for value in f_sorted: if value == -math.inf: index = f.index(value) + inf_index inf_index += 1 else: index = f.index(value) best.append([index, value]) #NOTE Changed to indent return best def getFoneway(self, length, classes, nSamples, nClasses): ss_alldata = [0. for i in range(length)] sums_args = {} keys_class = list(classes.keys()) for key in keys_class: allTs = classes[key] sums = [0. for i in range(len(ss_alldata))] sums_args[key] = sums for ts in allTs: for i in range(len(ts)): ss_alldata[i] += ts[i] * ts[i] sums[i] += ts[i] square_of_sums_alldata = [0. for i in range(len(ss_alldata))] square_of_sums_args = {} for key in keys_class: # square_of_sums_alldata2 = [0. for i in range(len(ss_alldata))] sums = sums_args[key] for i in range(len(sums)): square_of_sums_alldata[i] += sums[i] # square_of_sums_alldata += square_of_sums_alldata2 squares = [0. for i in range(len(sums))] square_of_sums_args[key] = squares for i in range(len(sums)): squares[i] += sums[i] * sums[i] for i in range(len(square_of_sums_alldata)): square_of_sums_alldata[i] *= square_of_sums_alldata[i] sstot = [0. for i in range(len(ss_alldata))] for i in range(len(sstot)): sstot[i] = ss_alldata[i] - square_of_sums_alldata[i] / nSamples ssbn = [0. for i in range(len(ss_alldata))] ## sum of squares between sswn = [0. for i in range(len(ss_alldata))] ## sum of squares within for key in keys_class: sums = square_of_sums_args[key] n_samples_per_class = len(classes[key]) for i in range(len(sums)): ssbn[i] += sums[i] / n_samples_per_class for i in range(len(square_of_sums_alldata)): ssbn[i] += -square_of_sums_alldata[i] / nSamples dfbn = nClasses - 1 ## degrees of freedom between dfwn = nSamples - nClasses ## degrees of freedom within msb = [0. for i in range(len(ss_alldata)) ] ## variance (mean square) between classes msw = [0. for i in range(len(ss_alldata)) ] ## variance (mean square) within samples f = [0. for i in range(len(ss_alldata))] ## f-ratio for i in range(len(sswn)): sswn[i] = sstot[i] - ssbn[i] msb[i] = ssbn[i] / dfbn msw[i] = sswn[i] / dfwn f[i] = msb[i] / msw[i] if msw[i] != 0. else -math.inf return f def quantizationSupervised(self, one_approx): signal = [0 for _ in range(min(len(one_approx), len(self.bestValues)))] for a in range(len(signal)): i = self.bestValues[a] b = 0 for beta in range(self.sfa.bins.shape[1]): if one_approx[i] < self.sfa.bins.iloc[i, beta]: break else: b += 1 signal[a] = b - 1 return signal
def sfaToWord(word): word_string = "" for w in word: word_string += chr(w + 97) return word_string def sfaToWordList(wordList): list_string = "" for word in wordList: list_string += sfaToWord(word) list_string += "; " return list_string train, test, train_labels, test_labels = load("CBF", "\t") sfa = SFA("EQUI_DEPTH") sfa.fitWindowing(train, train_labels, windowLength, wordLength, symbols, normMean, True) sfa.printBins() for i in range(test.shape[0]): wordList = sfa.transformWindowing(test.iloc[i, :]) print( str(i) + "-th transformed time series SFA word " + "\t" + sfaToWordList(wordList))
sys.path.append(os.getcwd()[:-5]) from src.timeseries.TimeSeriesLoader import load from src.transformation.SFA import * symbols = 8 wordLength = 16 normMean = False def sfaToWord(word): word_string = "" for w in word: word_string += chr(w + 97) return word_string train, test, train_labels, test_labels = load("CBF", "\t") sfa = SFA("EQUI_DEPTH") sfa.fitTransform(train, train_labels, wordLength, symbols, normMean) sfa.printBins() for i in range(test.shape[0]): wordList = sfa.transform2(test.iloc[i, :], "null") print( str(i) + "-th transformed time series SFA word " + "\t" + sfaToWord(wordList))
class BOSS(): def __init__(self, maxF, maxS, windowLength, normMean): self.maxF = maxF self.symbols = maxS self.windowLength = windowLength self.normMean = normMean self.signature = None def createWords(self, samples): if self.signature == None: self.signature = SFA("EQUI_DEPTH") self.signature.fitWindowing(samples, self.windowLength, self.maxF, self.symbols, self.normMean, True) # self.signature.printBins() words = [] for i in range(samples["Samples"]): sfaWords = self.signature.transformWindowing(samples[i]) words_small = [] for word in sfaWords: words_small.append(self.createWord(word, self.maxF, int2byte(self.symbols))) words.append(words_small) return words def createWord(self, numbers, maxF, bits): shortsPerLong = int(round(60 / bits)) to = min([len(numbers), maxF]) b = 0 s = 0 shiftOffset = 1 for i in range(s, (min(to, shortsPerLong + s))): shift = 1 for j in range(bits): if (numbers[i] & shift) != 0: b |= shiftOffset shiftOffset <<= 1 shift <<= 1 limit = 2147483647 total = 2147483647 + 2147483648 while b > limit: b = b - total - 1 return b def createBagOfPattern(self, words, samples, f): bagOfPatterns = [] usedBits = int2byte(self.symbols) mask = (1 << (usedBits * f)) - 1 for j in range(len(words)): BOP = {} lastWord = -9223372036854775808 for offset in range(len(words[j])): word = words[j][offset] & mask if word != lastWord: if word in BOP.keys(): BOP[word] += 1 else: BOP[word] = 1 lastWord = word bagOfPatterns.append(BOP) return bagOfPatterns def int2byte(self, number): log = 0 if (number & 0xffff0000) != 0: number >>= 16 log = 16 if number >= 256: number >>= 8 log += 8 if number >= 16: number >>= 4 log += 4 if number >= 4: number >>= 2 log += 2 return log + (number >> 1) def bag2dict(self, bag): bag_dict = [] for list in bag: new_dict = {} for element in list: if element in new_dict.keys(): new_dict[element] += 1 else: new_dict[element] = 1 bag_dict.append(new_dict) return bag_dict
class BOSSVS(): def __init__(self, maxF, maxS, windowLength, normMean, logger = None): self.maxF = maxF self.symbols = maxS self.windowLength = windowLength self.normMean = normMean self.signature = None logger.Log(self.__dict__, level = 0) self.logger = logger def createWords(self, samples): if self.signature == None: self.signature = SFA("EQUI_DEPTH", logger = self.logger) self.signature.fitWindowing(samples, self.windowLength, self.maxF, self.symbols, self.normMean,True) self.signature.printBins(self.logger) words = [] for i in range(samples["Samples"]): sfaWords = self.signature.transformWindowing(samples[i]) words_small = [] for word in sfaWords: words_small.append(self.createWord(word, self.maxF, int2byte(self.symbols))) words.append(words_small) return words def createWord(self, numbers, maxF, bits): shortsPerLong = int(round(60 / bits)) to = min([len(numbers), maxF]) b = 0 s = 0 shiftOffset = 1 for i in range(s, (min(to, shortsPerLong + s))): shift = 1 for j in range(bits): if (numbers[i] & shift) != 0: b |= shiftOffset shiftOffset <<= 1 shift <<= 1 limit = 2147483647 total = 2147483647 + 2147483648 while b > limit: b = b - total - 1 return b def createBagOfPattern(self, words, samples, f): bagOfPatterns = [] usedBits = int2byte(self.symbols) mask = (1 << (usedBits * f)) - 1 for j in range(len(words)): BOP = BagOfBigrams(samples[j].label)# {} lastWord = -9223372036854775808 for offset in range(len(words[j])): word = words[j][offset] & mask if word != lastWord: if word in BOP.bob.keys(): BOP.bob[word] += 1 else: BOP.bob[word] = 1 lastWord = word bagOfPatterns.append(BOP) return bagOfPatterns def createTfIdf(self, bagOfPatterns, sampleIndices, uniqueLabels, labels): matrix = {} for label in uniqueLabels: matrix[label] = {} for j in sampleIndices: label = labels[j] for key, value in bagOfPatterns[j].bob.items(): matrix[label][key] = matrix[label][key] + value if key in matrix[label].keys() else value wordInClassFreq = {} for key, value in matrix.items(): for key2, value2 in matrix[key].items(): wordInClassFreq[key2] = wordInClassFreq[key2] + 1 if key2 in wordInClassFreq.keys() else 1 for key, value in matrix.items(): tfIDFs = matrix[key] for key2, value2 in tfIDFs.items(): wordCount = wordInClassFreq.get(key2) if (value2 > 0) & (len(uniqueLabels) != wordCount): tfValue = 1. + math.log10(value2) idfValue = math.log10(1. + len(uniqueLabels) / wordCount) tfIdf = tfValue / idfValue tfIDFs[key2] = tfIdf else: tfIDFs[key2] = 0. matrix[key] = tfIDFs matrix = self.normalizeTfIdf(matrix) return matrix def normalizeTfIdf(self, classStatistics): for key, values in classStatistics.items(): squareSum = 0. for key2, value2 in classStatistics[key].items(): squareSum += value2 ** 2 squareRoot = math.sqrt(squareSum) if squareRoot > 0: for key2, value2 in classStatistics[key].items(): classStatistics[key][key2] /= squareRoot return classStatistics
from src.timeseries.TimeSeriesLoader import uv_load from src.transformation.SFA import * symbols = 8 wordLength = 16 normMean = False def sfaToWord(word): word_string = "" alphabet = "abcdefghijklmnopqrstuv" for w in word: word_string += alphabet[w] return word_string train, test = uv_load("Gun_Point") sfa = SFA("EQUI_DEPTH") sfa.fitTransform(train, wordLength, symbols, normMean) sfa.printBins() for i in range(test["Samples"]): wordList = sfa.transform2(test[i].data, "null") print(str(i) + "-th transformed time series SFA word " + "\t" + sfaToWord(wordList))
(FIXED_PARAMETERS['dataset'], scoreShotgunEnsemble)) if FIXED_PARAMETERS['test'] == 'Shotgun': logger.Log("Test: Shotgun") from src.classification.ShotgunClassifier import * shotgun = ShotgunClassifier(FIXED_PARAMETERS, logger) scoreShotgun = shotgun.eval(train, test)[0] logger.Log("%s: %s" % (FIXED_PARAMETERS['dataset'], scoreShotgun)) ##========================================================================================= ## SFA Word Tests ##========================================================================================= if FIXED_PARAMETERS['test'] == 'SFAWordTest': logger.Log("Test: SFAWordTest") from src.transformation.SFA import * sfa = SFA(FIXED_PARAMETERS["histogram_type"], logger=logger) sfa.fitTransform(train, FIXED_PARAMETERS['wordLength'], FIXED_PARAMETERS['symbols'], FIXED_PARAMETERS['normMean']) logger.Log(sfa.__dict__) for i in range(test["Samples"]): wordList = sfa.transform2(test[i].data, "null", str_return=True) logger.Log("%s-th transformed TEST time series SFA word \t %s " % (i, wordList)) if FIXED_PARAMETERS['test'] == 'SFAWordWindowingTest': logger.Log("Test: SFAWordWindowingTest") from src.transformation.SFA import * sfa = SFA(FIXED_PARAMETERS["histogram_type"], logger=logger)