def __init__(self, definition, connection, asset=None):
        """
        create the object
        :param connection: the connection to use
        :param asset: the asset object, if none, the object will be created from the definition,
        :param definition: a json dict that contains the definition for the stats. (see examples in definitions dir)
        """
        if not asset:
            self.asset = Sensor(definition['asset'], connection=connection)
        else:
            self.asset = asset
        self.groups = []
        self.timers = []
        groupNames = set(
        )  # used to check that all the groupnames are unique, otherwise, we have an issue.
        for group in definition['groups']:
            groupname = group['name']
            if groupname in groupNames:
                raise Exception(
                    "duplicate groupname '{}' detected in {}".format(
                        groupname, definition['name']))
            groupNames.add(groupname)

            reset = group['reset'] if 'reset' in group else None
            startDate = group['start date'] if 'start date' in group else None
            stat = Statistician(group['name'], group['calculate'], reset,
                                startDate, self.asset)
            self.groups.append(stat)
            if "reset" in group:
                timer = Timer(self.asset, group['name'])
                self.timers.append(timer)
                timer.group = stat
Example #2
0
 def train(train_x, train_y):
     col = 0
     # mean_variance_3d_array[0]: corresponds to a partucular feature i.e all row values of a col
     # mean_variance_3d_array[0][0]: all feature values belonging to label 0, for a particular col
     mean_variance_3d_array = []
     while (col < Constants.tot_features):
         feature_data = Database.get_feature_data(train_x, col)
         label_sorted_2d_array = Database.partition_by_class_label(
             feature_data, train_y)
         mean_variance_2d_array = Statistician.get_mean_variance_of_label_sorted_2d_array(
             label_sorted_2d_array)
         mean_variance_2d_array = Statistician.handle_zero_variance_data(
             mean_variance_2d_array)
         mean_variance_3d_array.append(mean_variance_2d_array)
         col = col + 1
     Database.dump_data(
         Constants.dataset_base + Constants.mean_variance_3d_array,
         mean_variance_3d_array)
     Naive_bayes.mean_variance_3d_array = mean_variance_3d_array
     Naive_bayes.calculate_class_probabilities(train_y)
Example #3
0
    def __init__(self,
                 db,
                 table,
                 title="Data Party",
                 author="Christian Battista",
                 human=""):
        if human:
            self.human = human
        else:
            self.human = Statistician(db)

        self.title = title
        self.author = author
        self.name = title
        self.fname = self.name.replace(' ', '_')

        path = os.path.join("output", self.fname)

        f = open("%s.Rnw" % path, "w")

        self.f = f
        self.lvl = 1

        self.WriteTitle()
    def load(self):
        # load all the service-objects. They work as functions, don't wrap the data, cause the data objects can be scattered
        to_add = DeviceTemplate()
        Service.all_services[to_add.get_name()] = to_add
        Service.template_service = to_add

        to_add = Statistician()
        Service.all_services[to_add.get_name()] = to_add

        to_add = Delay()
        Service.all_services[to_add.get_name()] = to_add

        to_add = NN()
        Service.all_services[to_add.get_name()] = to_add

        to_add = Rule()
        Service.all_services[to_add.get_name()] = to_add

        to_add = DeviceMonitor()
        Service.all_services[to_add.get_name()] = to_add
Example #5
0
	def __init__(self, db, table, title="Data Party", author = "Christian Battista", human = ""):
		if human:
			self.human = human
		else:
			self.human = Statistician(db)

		self.title = title
		self.author = author
		self.name = title
		self.fname = self.name.replace(' ', '_')

		path = os.path.join("output", self.fname)
		 
		f = open("%s.Rnw" % path, "w")

		self.f = f
		self.lvl = 1


		self.WriteTitle()
Example #6
0
 def classify(sample):
     # Assumption: Naive_bayes.mean_variance_3d_array, Naive_bayes.class_probabilities are already present;
     # 1-D array; each value is the product of the liklihood_probabilities so far, for a particular class (in the RHS of Bayes formula)
     liklihood_probabilities = []
     label_ind = 0
     # init probabilities with 1s
     while (label_ind < Constants.tot_labels):
         liklihood_probabilities.append(1.0)
         label_ind = label_ind + 1
     feature_ind = 0
     # Given a sample, calulate the liklihood of each feature
     while (feature_ind < len(sample)):
         feature_value = sample[feature_ind]
         label_ind = 0
         # Given a feature, calculate the liklihood for each class
         while (label_ind < Constants.tot_labels):
             mean_ = Naive_bayes.mean_variance_3d_array[feature_ind][
                 label_ind][0]
             variance_ = Naive_bayes.mean_variance_3d_array[feature_ind][
                 label_ind][1]
             probability_ = Statistician.gaussian_distribution(
                 feature_value, mean_, variance_)
             liklihood_probabilities[label_ind] = liklihood_probabilities[
                 label_ind] * probability_
             label_ind = label_ind + 1
         feature_ind = feature_ind + 1
     label_ind = 0
     # 1-D array; each value is the posterior probability for a particular class
     posterior_probabilities = []
     # Given, liklihood and class probabilities, calculate posterior probability
     while (label_ind < len(Naive_bayes.class_probabilities)):
         posterior_probability_ = liklihood_probabilities[
             label_ind] * Naive_bayes.class_probabilities[label_ind]
         posterior_probabilities.append(posterior_probability_)
         label_ind = label_ind + 1
     # choose a class with max posterior probability
     max_posterior_probability = max(posterior_probabilities)
     pred_class_int = posterior_probabilities.index(
         max_posterior_probability)
     return pred_class_int
Example #7
0
class RnwMaker:
	def __init__(self, db, table, title="Data Party", author = "Christian Battista", human = ""):
		if human:
			self.human = human
		else:
			self.human = Statistician(db)

		self.title = title
		self.author = author
		self.name = title
		self.fname = self.name.replace(' ', '_')

		path = os.path.join("output", self.fname)
		 
		f = open("%s.Rnw" % path, "w")

		self.f = f
		self.lvl = 1


		self.WriteTitle()

	def WriteTitle(self):
		output = "\documentclass{article}\n\usepackage[utf8x]{inputenc}\n"
		output += "\\title{%s}\n\\author{%s}\n" % (self.title, self.author)
		output += "\\begin{document}\n\maketitle\n\\tableofcontents\n\\newpage\n"
		self.f.write(output)


	def AddText(self, text):
		self.f.write(str(text))

	def AddTextFile(self, textfile):
		f = open(textfile, r)
		lines = textfile.read()
		f.close()
		self.f.write(lines)

	def ChangeLevel(self, lvl):
		self.lvl = lvl

	def AddSection(self, factors, prefix = "Effect of"):
		lvl = self.lvl

		if lvl == 1:
			tag = "\section"
		else:
			self.lvl = lvl
			tag = "\%ssection" % ("sub" * (lvl-1)) 

		label = "%s %s" % (prefix, self.human.translate(factors))
		
		self.f.write("%s{%s}\n" % (tag, label))

	def addFigure(self, figure, caption=""):
		output = """\\begin{figure}\n\\begin{center}\n<<echo=false,fig=true>>==\n%s\n@\n\end{center}\n\caption{%s}\n\end{figure}\n""" % (figure, caption)
		return output

	def compareMeans(self, factors, measure, datFile, caption="", interpret = False):
		output, dfName, fName, tag = self.AddAnalysis(factors, measure, datFile)

		model = "%s~%s+Error(s_id/%s)" % (measure, fName, fName)
		#model = "%s~%s" % (measure, fName)
		output += "%s{%s}\n" % (tag, self.human.translate(measure).title())
		
		if interpret:
			output += self.human.interpret(factors, measure, model, datFile)

		#contin, paired = self.human.describeFactor(factors)

		caption += ".  %s" % self.human.hypothesize(fName, measure)

		output += """<<>>==\n%sModel = aov(%s, data=%s)\nsummary(%sModel)\n""" % (measure, model, dfName, measure)
		output += """tapply(%s$%s, %s$%s, mean)\n""" % (dfName, measure, dfName, factors)
		output += """tapply(%s$%s, %s$%s, sd)\n""" % (dfName, measure, dfName, factors)
		output += "print(model.tables(%sModel,\"means\"),digits=4)\n" % (measure)
		output += "print(var(%s$%s),digits=4)\n" % (dfName, measure)
		output += """@\n"""

		#if measure == "count" or type(factors) == list:
		figure = """boxplot(%s~%s,data=%s, ylab="%s", main="%s")""" % (measure, fName, dfName, self.human.translate(measure),self.human.translate(fName))
		#elif contin:
		#	figure = """plot(tapply(%s$%s, %s$%s, mean), xlab="%s", ylab="%s", main="%s")\nlines(tapply(%s$%s, %s$%s, mean)) """ % (dfName, measure, dfName, factors, self.human.translate(factors), self.human.translate(measure),self.human.translate(fName), dfName, measure, dfName, factors)
		#else:
		#	figure = """barplot(tapply(%s$%s, %s$%s, mean), ylab="%s", main="%s")""" % (dfName, measure, dfName, factors, self.human.translate(measure),self.human.translate(fName))


		output += self.addFigure(figure, caption)

		self.f.write(output)		

	def correlate(self, factors, measure, datFile, caption=""):	
		output, dfName, factors, tag = self.AddAnalysis(factors, measure, datFile)

		sig, r, p = self.human.correlate(measure[0], measure[1], datFile)

		output += "\nr=%2.2f, p<%0.2f\n\n" % (r, p)

		if not factors:

			figure = "plot(%s$%s, %s$%s, xlab = \"%s\", ylab = \"%s\")" % (dfName, measure[0], dfName, measure[1], self.human.translate(measure[0]), self.human.translate(measure[1]))
			
			output += self.addFigure(figure, caption)	

		else:
			for f in factors:
				output += self.addFigure("scatterplot(%s ~ %s | %s, data=%s)" % (measure[0], measure[1], f, dfName), caption)


		self.f.write(output)


	def AddAnalysis(self, factors, measure, datFile):
		if type(factors) == str:
			factors = [factors]

		lvl = self.lvl

		dfName = ""
		fName = ""

		output = ""

		if lvl == 1:
			tag = "\section"
		else:
			tag = "\%ssection" % ("sub" * (lvl-1))

		output += "<<echo=false>>=\n"
		output += "library(car)\n"
		#output += "library(gplots)\n"
		for f in factors:
			dfName = dfName + "_" + f
			fName = fName + "*" + f
		dfName = dfName.lstrip("_")
		fName = fName.lstrip("*")
				
		if type(measure) == list:
			m = ""
			for meas in measure:
				m+= "_%s" % meas
			dfName += m
		else:
			dfName+= "_%s" % measure

		dfName = dfName.lstrip("_")
				
		output += "%s = read.table(\"%s\", header=TRUE, sep=\",\")\n@\n" % (dfName, datFile)

		return output, dfName, fName, tag
		
	def Close(self, execute=False):
		self.f.write("\end{document}\n")
		self.f.close()
		if execute:
			os.chdir(os.path.join(os.getcwd(), "output"))
			os.system("R CMD Sweave %s.Rnw" % self.fname)
			os.system("R CMD pdflatex %s.tex" % self.fname)
		del self.f
Example #8
0
class RnwMaker:
    def __init__(self,
                 db,
                 table,
                 title="Data Party",
                 author="Christian Battista",
                 human=""):
        if human:
            self.human = human
        else:
            self.human = Statistician(db)

        self.title = title
        self.author = author
        self.name = title
        self.fname = self.name.replace(' ', '_')

        path = os.path.join("output", self.fname)

        f = open("%s.Rnw" % path, "w")

        self.f = f
        self.lvl = 1

        self.WriteTitle()

    def WriteTitle(self):
        output = "\documentclass{article}\n\usepackage[utf8x]{inputenc}\n"
        output += "\\title{%s}\n\\author{%s}\n" % (self.title, self.author)
        output += "\\begin{document}\n\maketitle\n\\tableofcontents\n\\newpage\n"
        self.f.write(output)

    def AddText(self, text):
        self.f.write(str(text))

    def AddTextFile(self, textfile):
        f = open(textfile, r)
        lines = textfile.read()
        f.close()
        self.f.write(lines)

    def ChangeLevel(self, lvl):
        self.lvl = lvl

    def AddSection(self, factors, prefix="Effect of"):
        lvl = self.lvl

        if lvl == 1:
            tag = "\section"
        else:
            self.lvl = lvl
            tag = "\%ssection" % ("sub" * (lvl - 1))

        label = "%s %s" % (prefix, self.human.translate(factors))

        self.f.write("%s{%s}\n" % (tag, label))

    def addFigure(self, figure, caption=""):
        output = """\\begin{figure}\n\\begin{center}\n<<echo=false,fig=true>>==\n%s\n@\n\end{center}\n\caption{%s}\n\end{figure}\n""" % (
            figure, caption)
        return output

    def compareMeans(self,
                     factors,
                     measure,
                     datFile,
                     caption="",
                     interpret=False):
        output, dfName, fName, tag = self.AddAnalysis(factors, measure,
                                                      datFile)

        model = "%s~%s+Error(s_id/%s)" % (measure, fName, fName)
        #model = "%s~%s" % (measure, fName)
        output += "%s{%s}\n" % (tag, self.human.translate(measure).title())

        if interpret:
            output += self.human.interpret(factors, measure, model, datFile)

        #contin, paired = self.human.describeFactor(factors)

        caption += ".  %s" % self.human.hypothesize(fName, measure)

        output += """<<>>==\n%sModel = aov(%s, data=%s)\nsummary(%sModel)\n""" % (
            measure, model, dfName, measure)
        output += """tapply(%s$%s, %s$%s, mean)\n""" % (dfName, measure,
                                                        dfName, factors)
        output += """tapply(%s$%s, %s$%s, sd)\n""" % (dfName, measure, dfName,
                                                      factors)
        output += "print(model.tables(%sModel,\"means\"),digits=4)\n" % (
            measure)
        output += "print(var(%s$%s),digits=4)\n" % (dfName, measure)
        output += """@\n"""

        #if measure == "count" or type(factors) == list:
        figure = """boxplot(%s~%s,data=%s, ylab="%s", main="%s")""" % (
            measure, fName, dfName, self.human.translate(measure),
            self.human.translate(fName))
        #elif contin:
        #	figure = """plot(tapply(%s$%s, %s$%s, mean), xlab="%s", ylab="%s", main="%s")\nlines(tapply(%s$%s, %s$%s, mean)) """ % (dfName, measure, dfName, factors, self.human.translate(factors), self.human.translate(measure),self.human.translate(fName), dfName, measure, dfName, factors)
        #else:
        #	figure = """barplot(tapply(%s$%s, %s$%s, mean), ylab="%s", main="%s")""" % (dfName, measure, dfName, factors, self.human.translate(measure),self.human.translate(fName))

        output += self.addFigure(figure, caption)

        self.f.write(output)

    def correlate(self, factors, measure, datFile, caption=""):
        output, dfName, factors, tag = self.AddAnalysis(
            factors, measure, datFile)

        sig, r, p = self.human.correlate(measure[0], measure[1], datFile)

        output += "\nr=%2.2f, p<%0.2f\n\n" % (r, p)

        if not factors:

            figure = "plot(%s$%s, %s$%s, xlab = \"%s\", ylab = \"%s\")" % (
                dfName, measure[0], dfName, measure[1],
                self.human.translate(
                    measure[0]), self.human.translate(measure[1]))

            output += self.addFigure(figure, caption)

        else:
            for f in factors:
                output += self.addFigure(
                    "scatterplot(%s ~ %s | %s, data=%s)" %
                    (measure[0], measure[1], f, dfName), caption)

        self.f.write(output)

    def AddAnalysis(self, factors, measure, datFile):
        if type(factors) == str:
            factors = [factors]

        lvl = self.lvl

        dfName = ""
        fName = ""

        output = ""

        if lvl == 1:
            tag = "\section"
        else:
            tag = "\%ssection" % ("sub" * (lvl - 1))

        output += "<<echo=false>>=\n"
        output += "library(car)\n"
        #output += "library(gplots)\n"
        for f in factors:
            dfName = dfName + "_" + f
            fName = fName + "*" + f
        dfName = dfName.lstrip("_")
        fName = fName.lstrip("*")

        if type(measure) == list:
            m = ""
            for meas in measure:
                m += "_%s" % meas
            dfName += m
        else:
            dfName += "_%s" % measure

        dfName = dfName.lstrip("_")

        output += "%s = read.table(\"%s\", header=TRUE, sep=\",\")\n@\n" % (
            dfName, datFile)

        return output, dfName, fName, tag

    def Close(self, execute=False):
        self.f.write("\end{document}\n")
        self.f.close()
        if execute:
            os.chdir(os.path.join(os.getcwd(), "output"))
            os.system("R CMD Sweave %s.Rnw" % self.fname)
            os.system("R CMD pdflatex %s.tex" % self.fname)
        del self.f