def MixtureOfGaussians(): # Define a range for the number of mixture components k = Range(2) # Mixture component means means = Variable.Array[Vector](k) means_k = Variable.VectorGaussianFromMeanAndPrecision( Vector.FromArray(0.0, 0.0), PositiveDefiniteMatrix.IdentityScaledBy(2, 0.01)).ForEach(k) means.set_Item(k, means_k) # Mixture component precisions precs = Variable.Array[PositiveDefiniteMatrix](k).Named("precs") precs_k = Variable.WishartFromShapeAndScale( 100.0, PositiveDefiniteMatrix.IdentityScaledBy(2, 0.01)).ForEach(k) precs.set_Item(k, precs_k) # Mixture weights weights = Variable.Dirichlet(k, [1.0, 1.0]).Named("weights") # Create a variable array which will hold the data n = Range(300).Named("n") data = Variable.Array[Vector](n).Named("x") # Create latent indicator variable for each data point z = Variable.Array[Int32](n).Named("z") # The mixture of Gaussians model forEachBlock = Variable.ForEach(n) z.set_Item(n, Variable.Discrete(weights)) switchBlock = Variable.Switch(z.get_Item(n)) data.set_Item( n, Variable.VectorGaussianFromMeanAndPrecision( means.get_Item(z.get_Item(n)), precs.get_Item(z.get_Item(n)))) switchBlock.CloseBlock() forEachBlock.CloseBlock() # Attach some generated data data.ObservedValue = GenerateData(n.SizeAsInt) # Initialise messages randomly to break symmetry zInit = Variable.Array[Discrete](n).Named("zInit") zInit.ObservedValue = [ Discrete.PointMass(Rand.Int(k.SizeAsInt), k.SizeAsInt) for i in range(n.SizeAsInt) ] # The following does not work in pythonnet: #z.get_Item(n).InitialiseTo[Discrete].Overloads[Variable[Discrete]](zInit.get_Item(n)) InitialiseTo(z.get_Item(n), zInit.get_Item(n)) # The inference engine = InferenceEngine() print("Dist over pi=%s" % engine.Infer(weights)) print("Dist over means=\n%s" % engine.Infer(means)) print("Dist over precs=\n%s" % engine.Infer(precs))
def BayesPointMachineExample(): incomes = [63, 16, 28, 55, 22, 20] ages = [38, 23, 40, 27, 18, 40] will_buy = [True, False, True, True, False, False] # The following does not work in pythonnet: #y = Variable.Observed[bool](will_buy) #y = Variable.Observed[bool].Overloads[Array[bool]](will_buy) y = VariableObserved(will_buy) eye = PositiveDefiniteMatrix.Identity(3) m = Vector.Zero(3) vg = VectorGaussian(m, eye) w = Variable.Random[Vector](vg) BayesPointMachine(incomes, ages, w, y) engine = InferenceEngine() wPosterior = engine.Infer(w) print("Dist over w=\n%s" % wPosterior) incomesTest = [ 58, 18, 22 ] agesTest = [ 36, 24, 37 ] ytest = Variable.Array[bool](Range(len(agesTest))) BayesPointMachine(incomesTest, agesTest, Variable.Random[Vector](wPosterior), ytest) print("output=\n%s" % engine.Infer(ytest));
def LearningAGaussianWithRanges(): # Restart the infer.NET random number generator Rand.Restart(12347) data = [Rand.Normal(0.0, 1.0) for i in range(100)] mean = Variable.GaussianFromMeanAndVariance(0.0, 1.0).Named("mean") precision = Variable.GammaFromShapeAndScale(1.0, 1.0).Named("precision") data_range = Range(len(data)).Named("n") x = Variable.Array[Double](data_range) v = Variable.GaussianFromMeanAndPrecision(mean, precision).ForEach(data_range) x.set_Item(data_range, v) x.ObservedValue = data engine = InferenceEngine() print("mean=%s" % engine.Infer(mean)) print("precision=%s" %engine.Infer(precision))
def MotifFinder(): Rand.Restart(1337) SequenceCount = 50 SequenceLength = 25 MotifPresenceProbability = 0.8 trueMotifNucleobaseDist = [ NucleobaseDist(a=0.8, c=0.1, g=0.05, t=0.05), NucleobaseDist(a=0.0, c=0.9, g=0.05, t=0.05), NucleobaseDist(a=0.0, c=0.0, g=0.5, t=0.5), NucleobaseDist(a=0.25, c=0.25, g=0.25, t=0.25), NucleobaseDist(a=0.1, c=0.1, g=0.1, t=0.7), NucleobaseDist(a=0.0, c=0.0, g=0.9, t=0.1), NucleobaseDist(a=0.9, c=0.05, g=0.0, t=0.05), NucleobaseDist(a=0.5, c=0.5, g=0.0, t=0.0), ] backgroundNucleobaseDist = NucleobaseDist(a=0.25, c=0.25, g=0.25, t=0.25) sequenceData, motifPositionData = SampleMotifData(SequenceCount, SequenceLength, MotifPresenceProbability, trueMotifNucleobaseDist, backgroundNucleobaseDist) assert(sequenceData[0] == "CTACTTCGAATTTACCCCTATATTT") # should be CTACTTCGAATTTACCCCTATATTT assert(len(sequenceData) == 50) assert(motifPositionData[:10] ==[2, 15, -1, 0, 14, 5, -1, 5, 1, 9]) assert(len(motifPositionData) == 50) # Char.MaxValue is a string '\uffff', so we convert the hex to decimal. motif_nucleobase_pseudo_counts = PiecewiseVector.Constant(int('ffff', 16) + 1, 1e-6) # Cannot call managed PiecewiseVector object's indexer with ['A'], i.e. cannot do # motif_nucleobase_pseudo_counts['A'] = 2.0 motif_nucleobase_pseudo_counts[ord('A')] = 2.0 motif_nucleobase_pseudo_counts[ord('C')] = 2.0 motif_nucleobase_pseudo_counts[ord('G')] = 2.0 motif_nucleobase_pseudo_counts[ord('T')] = 2.0 motifLength = len(trueMotifNucleobaseDist) # Assume we know the true motif length. motifCharsRange = Range(motifLength) motifNucleobaseProbs = Variable.Array[Vector](motifCharsRange) # Cannot do motifNucleobaseProbs[motifCharsRange] = Variable.Dirichlet... motifNucleobaseProbs.set_Item(motifCharsRange, Variable.Dirichlet(motif_nucleobase_pseudo_counts).ForEach(motifCharsRange)) sequenceRange = Range(SequenceCount) sequences = Variable.Array[str](sequenceRange) motifPositions = Variable.Array[int](sequenceRange) motifPositions.set_Item(sequenceRange, Variable.DiscreteUniform(SequenceLength - motifLength + 1).ForEach(sequenceRange)) motifPresence = Variable.Array[bool](sequenceRange) motifPresence.set_Item(sequenceRange, Variable.Bernoulli(MotifPresenceProbability).ForEach(sequenceRange)) forEachBlock = Variable.ForEach(sequenceRange) ifVar = Variable.If(motifPresence.get_Item(sequenceRange)) motifChars = Variable.Array[Char](motifCharsRange) motifChars.set_Item(motifCharsRange, Variable.Char(motifNucleobaseProbs.get_Item(motifCharsRange))) motif = Variable.StringFromArray(motifChars) motifPos = motifPositions.get_Item(sequenceRange) backgroundLengthRight = motifPos.op_Subtraction(SequenceLength - motifLength, motifPositions.get_Item(sequenceRange)) backgroundLeft = VariableStringOfLength(motifPositions.get_Item(sequenceRange), backgroundNucleobaseDist) backgroundRight = VariableStringOfLength(backgroundLengthRight, backgroundNucleobaseDist) added_vars = backgroundLeft.op_Addition(backgroundLeft, motif) added_vars = added_vars.op_Addition(added_vars, backgroundRight) sequences.set_Item(sequenceRange, added_vars) ifVar.Dispose() ifNotVar = Variable.IfNot(motifPresence.get_Item(sequenceRange)) sequences.set_Item(sequenceRange, VariableStringOfLength(SequenceLength, backgroundNucleobaseDist)) ifNotVar.Dispose() forEachBlock.CloseBlock() sequences.ObservedValue = sequenceData engine = InferenceEngine() engine.NumberOfIterations = 30 #30 engine.Compiler.RecommendedQuality = QualityBand.Experimental motifNucleobaseProbsPosterior = engine.Infer[Array[Dirichlet]](motifNucleobaseProbs) motifPresencePosterior = engine.Infer[Array[Bernoulli]](motifPresence) motifPositionPosterior = engine.Infer[Array[Discrete]](motifPositions) # PrintMotifInferenceResults PrintPositionFrequencyMatrix("\nTrue position frequency matrix:", trueMotifNucleobaseDist, lambda dist, c: dist[c]) # Distributions.DiscreteChar indexer is implemented. PrintPositionFrequencyMatrix("\nInferred position frequency matrix mean:", motifNucleobaseProbsPosterior, # Array of Distribtions.Dirichlet; mean of each is a PiecewiseVector lambda dist, c: dist.GetMean()[ord(c)]) # PiecewiseVector indexer is implemented, but not for strings... # TypeError: No method matches given arguments for get_Item: (<class 'str'>) -> need to do ord(c) # Tried importing Console and ConsoleColor from System which works in powershell but not in VS console. printc("\nPREDICTION ", ConsoleColor.Yellow) printc("GROUND TRUTH ", ConsoleColor.Red) printc("OVERLAP \n\n", ConsoleColor.Green) for i in range(min(SequenceCount, 30)): motifPos = motifPositionPosterior[i].GetMode() if motifPresencePosterior[i].GetProbTrue() > 0.5 else -1 inPrediction, inGroundTruth = False, False for j in range(SequenceLength): if j == motifPos: inPrediction = True elif j == motifPos + motifLength: inPrediction = False if j == motifPositionData[i]: inGroundTruth = True elif j == motifPositionData[i] + motifLength: inGroundTruth = False color = Console.ForegroundColor if (inPrediction and inGroundTruth): color = ConsoleColor.Green elif (inPrediction): color = ConsoleColor.Yellow elif inGroundTruth: color = ConsoleColor.Red printc(sequenceData[i][j], color) print(f" P(has motif) = {motifPresencePosterior[i].GetProbTrue():.2f}", end=""); if (motifPos != -1): print(f" P(pos={motifPos}) = {motifPositionPosterior[i][motifPos]:.2f}", end=""); print()