def __init__(self, definition, connection, asset=None): """ create the object :param connection: the connection to use :param asset: the asset object, if none, the object will be created from the definition, :param definition: a json dict that contains the definition for the stats. (see examples in definitions dir) """ if not asset: self.asset = Sensor(definition['asset'], connection=connection) else: self.asset = asset self.groups = [] self.timers = [] groupNames = set( ) # used to check that all the groupnames are unique, otherwise, we have an issue. for group in definition['groups']: groupname = group['name'] if groupname in groupNames: raise Exception( "duplicate groupname '{}' detected in {}".format( groupname, definition['name'])) groupNames.add(groupname) reset = group['reset'] if 'reset' in group else None startDate = group['start date'] if 'start date' in group else None stat = Statistician(group['name'], group['calculate'], reset, startDate, self.asset) self.groups.append(stat) if "reset" in group: timer = Timer(self.asset, group['name']) self.timers.append(timer) timer.group = stat
def train(train_x, train_y): col = 0 # mean_variance_3d_array[0]: corresponds to a partucular feature i.e all row values of a col # mean_variance_3d_array[0][0]: all feature values belonging to label 0, for a particular col mean_variance_3d_array = [] while (col < Constants.tot_features): feature_data = Database.get_feature_data(train_x, col) label_sorted_2d_array = Database.partition_by_class_label( feature_data, train_y) mean_variance_2d_array = Statistician.get_mean_variance_of_label_sorted_2d_array( label_sorted_2d_array) mean_variance_2d_array = Statistician.handle_zero_variance_data( mean_variance_2d_array) mean_variance_3d_array.append(mean_variance_2d_array) col = col + 1 Database.dump_data( Constants.dataset_base + Constants.mean_variance_3d_array, mean_variance_3d_array) Naive_bayes.mean_variance_3d_array = mean_variance_3d_array Naive_bayes.calculate_class_probabilities(train_y)
def __init__(self, db, table, title="Data Party", author="Christian Battista", human=""): if human: self.human = human else: self.human = Statistician(db) self.title = title self.author = author self.name = title self.fname = self.name.replace(' ', '_') path = os.path.join("output", self.fname) f = open("%s.Rnw" % path, "w") self.f = f self.lvl = 1 self.WriteTitle()
def load(self): # load all the service-objects. They work as functions, don't wrap the data, cause the data objects can be scattered to_add = DeviceTemplate() Service.all_services[to_add.get_name()] = to_add Service.template_service = to_add to_add = Statistician() Service.all_services[to_add.get_name()] = to_add to_add = Delay() Service.all_services[to_add.get_name()] = to_add to_add = NN() Service.all_services[to_add.get_name()] = to_add to_add = Rule() Service.all_services[to_add.get_name()] = to_add to_add = DeviceMonitor() Service.all_services[to_add.get_name()] = to_add
def __init__(self, db, table, title="Data Party", author = "Christian Battista", human = ""): if human: self.human = human else: self.human = Statistician(db) self.title = title self.author = author self.name = title self.fname = self.name.replace(' ', '_') path = os.path.join("output", self.fname) f = open("%s.Rnw" % path, "w") self.f = f self.lvl = 1 self.WriteTitle()
def classify(sample): # Assumption: Naive_bayes.mean_variance_3d_array, Naive_bayes.class_probabilities are already present; # 1-D array; each value is the product of the liklihood_probabilities so far, for a particular class (in the RHS of Bayes formula) liklihood_probabilities = [] label_ind = 0 # init probabilities with 1s while (label_ind < Constants.tot_labels): liklihood_probabilities.append(1.0) label_ind = label_ind + 1 feature_ind = 0 # Given a sample, calulate the liklihood of each feature while (feature_ind < len(sample)): feature_value = sample[feature_ind] label_ind = 0 # Given a feature, calculate the liklihood for each class while (label_ind < Constants.tot_labels): mean_ = Naive_bayes.mean_variance_3d_array[feature_ind][ label_ind][0] variance_ = Naive_bayes.mean_variance_3d_array[feature_ind][ label_ind][1] probability_ = Statistician.gaussian_distribution( feature_value, mean_, variance_) liklihood_probabilities[label_ind] = liklihood_probabilities[ label_ind] * probability_ label_ind = label_ind + 1 feature_ind = feature_ind + 1 label_ind = 0 # 1-D array; each value is the posterior probability for a particular class posterior_probabilities = [] # Given, liklihood and class probabilities, calculate posterior probability while (label_ind < len(Naive_bayes.class_probabilities)): posterior_probability_ = liklihood_probabilities[ label_ind] * Naive_bayes.class_probabilities[label_ind] posterior_probabilities.append(posterior_probability_) label_ind = label_ind + 1 # choose a class with max posterior probability max_posterior_probability = max(posterior_probabilities) pred_class_int = posterior_probabilities.index( max_posterior_probability) return pred_class_int
class RnwMaker: def __init__(self, db, table, title="Data Party", author = "Christian Battista", human = ""): if human: self.human = human else: self.human = Statistician(db) self.title = title self.author = author self.name = title self.fname = self.name.replace(' ', '_') path = os.path.join("output", self.fname) f = open("%s.Rnw" % path, "w") self.f = f self.lvl = 1 self.WriteTitle() def WriteTitle(self): output = "\documentclass{article}\n\usepackage[utf8x]{inputenc}\n" output += "\\title{%s}\n\\author{%s}\n" % (self.title, self.author) output += "\\begin{document}\n\maketitle\n\\tableofcontents\n\\newpage\n" self.f.write(output) def AddText(self, text): self.f.write(str(text)) def AddTextFile(self, textfile): f = open(textfile, r) lines = textfile.read() f.close() self.f.write(lines) def ChangeLevel(self, lvl): self.lvl = lvl def AddSection(self, factors, prefix = "Effect of"): lvl = self.lvl if lvl == 1: tag = "\section" else: self.lvl = lvl tag = "\%ssection" % ("sub" * (lvl-1)) label = "%s %s" % (prefix, self.human.translate(factors)) self.f.write("%s{%s}\n" % (tag, label)) def addFigure(self, figure, caption=""): output = """\\begin{figure}\n\\begin{center}\n<<echo=false,fig=true>>==\n%s\n@\n\end{center}\n\caption{%s}\n\end{figure}\n""" % (figure, caption) return output def compareMeans(self, factors, measure, datFile, caption="", interpret = False): output, dfName, fName, tag = self.AddAnalysis(factors, measure, datFile) model = "%s~%s+Error(s_id/%s)" % (measure, fName, fName) #model = "%s~%s" % (measure, fName) output += "%s{%s}\n" % (tag, self.human.translate(measure).title()) if interpret: output += self.human.interpret(factors, measure, model, datFile) #contin, paired = self.human.describeFactor(factors) caption += ". %s" % self.human.hypothesize(fName, measure) output += """<<>>==\n%sModel = aov(%s, data=%s)\nsummary(%sModel)\n""" % (measure, model, dfName, measure) output += """tapply(%s$%s, %s$%s, mean)\n""" % (dfName, measure, dfName, factors) output += """tapply(%s$%s, %s$%s, sd)\n""" % (dfName, measure, dfName, factors) output += "print(model.tables(%sModel,\"means\"),digits=4)\n" % (measure) output += "print(var(%s$%s),digits=4)\n" % (dfName, measure) output += """@\n""" #if measure == "count" or type(factors) == list: figure = """boxplot(%s~%s,data=%s, ylab="%s", main="%s")""" % (measure, fName, dfName, self.human.translate(measure),self.human.translate(fName)) #elif contin: # figure = """plot(tapply(%s$%s, %s$%s, mean), xlab="%s", ylab="%s", main="%s")\nlines(tapply(%s$%s, %s$%s, mean)) """ % (dfName, measure, dfName, factors, self.human.translate(factors), self.human.translate(measure),self.human.translate(fName), dfName, measure, dfName, factors) #else: # figure = """barplot(tapply(%s$%s, %s$%s, mean), ylab="%s", main="%s")""" % (dfName, measure, dfName, factors, self.human.translate(measure),self.human.translate(fName)) output += self.addFigure(figure, caption) self.f.write(output) def correlate(self, factors, measure, datFile, caption=""): output, dfName, factors, tag = self.AddAnalysis(factors, measure, datFile) sig, r, p = self.human.correlate(measure[0], measure[1], datFile) output += "\nr=%2.2f, p<%0.2f\n\n" % (r, p) if not factors: figure = "plot(%s$%s, %s$%s, xlab = \"%s\", ylab = \"%s\")" % (dfName, measure[0], dfName, measure[1], self.human.translate(measure[0]), self.human.translate(measure[1])) output += self.addFigure(figure, caption) else: for f in factors: output += self.addFigure("scatterplot(%s ~ %s | %s, data=%s)" % (measure[0], measure[1], f, dfName), caption) self.f.write(output) def AddAnalysis(self, factors, measure, datFile): if type(factors) == str: factors = [factors] lvl = self.lvl dfName = "" fName = "" output = "" if lvl == 1: tag = "\section" else: tag = "\%ssection" % ("sub" * (lvl-1)) output += "<<echo=false>>=\n" output += "library(car)\n" #output += "library(gplots)\n" for f in factors: dfName = dfName + "_" + f fName = fName + "*" + f dfName = dfName.lstrip("_") fName = fName.lstrip("*") if type(measure) == list: m = "" for meas in measure: m+= "_%s" % meas dfName += m else: dfName+= "_%s" % measure dfName = dfName.lstrip("_") output += "%s = read.table(\"%s\", header=TRUE, sep=\",\")\n@\n" % (dfName, datFile) return output, dfName, fName, tag def Close(self, execute=False): self.f.write("\end{document}\n") self.f.close() if execute: os.chdir(os.path.join(os.getcwd(), "output")) os.system("R CMD Sweave %s.Rnw" % self.fname) os.system("R CMD pdflatex %s.tex" % self.fname) del self.f
class RnwMaker: def __init__(self, db, table, title="Data Party", author="Christian Battista", human=""): if human: self.human = human else: self.human = Statistician(db) self.title = title self.author = author self.name = title self.fname = self.name.replace(' ', '_') path = os.path.join("output", self.fname) f = open("%s.Rnw" % path, "w") self.f = f self.lvl = 1 self.WriteTitle() def WriteTitle(self): output = "\documentclass{article}\n\usepackage[utf8x]{inputenc}\n" output += "\\title{%s}\n\\author{%s}\n" % (self.title, self.author) output += "\\begin{document}\n\maketitle\n\\tableofcontents\n\\newpage\n" self.f.write(output) def AddText(self, text): self.f.write(str(text)) def AddTextFile(self, textfile): f = open(textfile, r) lines = textfile.read() f.close() self.f.write(lines) def ChangeLevel(self, lvl): self.lvl = lvl def AddSection(self, factors, prefix="Effect of"): lvl = self.lvl if lvl == 1: tag = "\section" else: self.lvl = lvl tag = "\%ssection" % ("sub" * (lvl - 1)) label = "%s %s" % (prefix, self.human.translate(factors)) self.f.write("%s{%s}\n" % (tag, label)) def addFigure(self, figure, caption=""): output = """\\begin{figure}\n\\begin{center}\n<<echo=false,fig=true>>==\n%s\n@\n\end{center}\n\caption{%s}\n\end{figure}\n""" % ( figure, caption) return output def compareMeans(self, factors, measure, datFile, caption="", interpret=False): output, dfName, fName, tag = self.AddAnalysis(factors, measure, datFile) model = "%s~%s+Error(s_id/%s)" % (measure, fName, fName) #model = "%s~%s" % (measure, fName) output += "%s{%s}\n" % (tag, self.human.translate(measure).title()) if interpret: output += self.human.interpret(factors, measure, model, datFile) #contin, paired = self.human.describeFactor(factors) caption += ". %s" % self.human.hypothesize(fName, measure) output += """<<>>==\n%sModel = aov(%s, data=%s)\nsummary(%sModel)\n""" % ( measure, model, dfName, measure) output += """tapply(%s$%s, %s$%s, mean)\n""" % (dfName, measure, dfName, factors) output += """tapply(%s$%s, %s$%s, sd)\n""" % (dfName, measure, dfName, factors) output += "print(model.tables(%sModel,\"means\"),digits=4)\n" % ( measure) output += "print(var(%s$%s),digits=4)\n" % (dfName, measure) output += """@\n""" #if measure == "count" or type(factors) == list: figure = """boxplot(%s~%s,data=%s, ylab="%s", main="%s")""" % ( measure, fName, dfName, self.human.translate(measure), self.human.translate(fName)) #elif contin: # figure = """plot(tapply(%s$%s, %s$%s, mean), xlab="%s", ylab="%s", main="%s")\nlines(tapply(%s$%s, %s$%s, mean)) """ % (dfName, measure, dfName, factors, self.human.translate(factors), self.human.translate(measure),self.human.translate(fName), dfName, measure, dfName, factors) #else: # figure = """barplot(tapply(%s$%s, %s$%s, mean), ylab="%s", main="%s")""" % (dfName, measure, dfName, factors, self.human.translate(measure),self.human.translate(fName)) output += self.addFigure(figure, caption) self.f.write(output) def correlate(self, factors, measure, datFile, caption=""): output, dfName, factors, tag = self.AddAnalysis( factors, measure, datFile) sig, r, p = self.human.correlate(measure[0], measure[1], datFile) output += "\nr=%2.2f, p<%0.2f\n\n" % (r, p) if not factors: figure = "plot(%s$%s, %s$%s, xlab = \"%s\", ylab = \"%s\")" % ( dfName, measure[0], dfName, measure[1], self.human.translate( measure[0]), self.human.translate(measure[1])) output += self.addFigure(figure, caption) else: for f in factors: output += self.addFigure( "scatterplot(%s ~ %s | %s, data=%s)" % (measure[0], measure[1], f, dfName), caption) self.f.write(output) def AddAnalysis(self, factors, measure, datFile): if type(factors) == str: factors = [factors] lvl = self.lvl dfName = "" fName = "" output = "" if lvl == 1: tag = "\section" else: tag = "\%ssection" % ("sub" * (lvl - 1)) output += "<<echo=false>>=\n" output += "library(car)\n" #output += "library(gplots)\n" for f in factors: dfName = dfName + "_" + f fName = fName + "*" + f dfName = dfName.lstrip("_") fName = fName.lstrip("*") if type(measure) == list: m = "" for meas in measure: m += "_%s" % meas dfName += m else: dfName += "_%s" % measure dfName = dfName.lstrip("_") output += "%s = read.table(\"%s\", header=TRUE, sep=\",\")\n@\n" % ( dfName, datFile) return output, dfName, fName, tag def Close(self, execute=False): self.f.write("\end{document}\n") self.f.close() if execute: os.chdir(os.path.join(os.getcwd(), "output")) os.system("R CMD Sweave %s.Rnw" % self.fname) os.system("R CMD pdflatex %s.tex" % self.fname) del self.f