Exemple #1
0
def MixtureOfGaussians():
    # Define a range for the number of mixture components
    k = Range(2)

    # Mixture component means
    means = Variable.Array[Vector](k)
    means_k = Variable.VectorGaussianFromMeanAndPrecision(
        Vector.FromArray(0.0, 0.0),
        PositiveDefiniteMatrix.IdentityScaledBy(2, 0.01)).ForEach(k)
    means.set_Item(k, means_k)

    # Mixture component precisions
    precs = Variable.Array[PositiveDefiniteMatrix](k).Named("precs")
    precs_k = Variable.WishartFromShapeAndScale(
        100.0, PositiveDefiniteMatrix.IdentityScaledBy(2, 0.01)).ForEach(k)
    precs.set_Item(k, precs_k)

    # Mixture weights
    weights = Variable.Dirichlet(k, [1.0, 1.0]).Named("weights")

    # Create a variable array which will hold the data
    n = Range(300).Named("n")
    data = Variable.Array[Vector](n).Named("x")

    # Create latent indicator variable for each data point
    z = Variable.Array[Int32](n).Named("z")

    # The mixture of Gaussians model
    forEachBlock = Variable.ForEach(n)
    z.set_Item(n, Variable.Discrete(weights))
    switchBlock = Variable.Switch(z.get_Item(n))
    data.set_Item(
        n,
        Variable.VectorGaussianFromMeanAndPrecision(
            means.get_Item(z.get_Item(n)), precs.get_Item(z.get_Item(n))))
    switchBlock.CloseBlock()
    forEachBlock.CloseBlock()

    # Attach some generated data
    data.ObservedValue = GenerateData(n.SizeAsInt)

    # Initialise messages randomly to break symmetry
    zInit = Variable.Array[Discrete](n).Named("zInit")
    zInit.ObservedValue = [
        Discrete.PointMass(Rand.Int(k.SizeAsInt), k.SizeAsInt)
        for i in range(n.SizeAsInt)
    ]
    # The following does not work in pythonnet:
    #z.get_Item(n).InitialiseTo[Discrete].Overloads[Variable[Discrete]](zInit.get_Item(n))
    InitialiseTo(z.get_Item(n), zInit.get_Item(n))

    # The inference
    engine = InferenceEngine()
    print("Dist over pi=%s" % engine.Infer(weights))
    print("Dist over means=\n%s" % engine.Infer(means))
    print("Dist over precs=\n%s" % engine.Infer(precs))
Exemple #2
0
def BayesPointMachineExample():
    incomes = [63, 16, 28, 55, 22, 20]
    ages = [38, 23, 40, 27, 18, 40]
    will_buy = [True, False, True, True, False, False]

    # The following does not work in pythonnet:
    #y = Variable.Observed[bool](will_buy)
    #y = Variable.Observed[bool].Overloads[Array[bool]](will_buy)
    y = VariableObserved(will_buy)

    eye = PositiveDefiniteMatrix.Identity(3)
    m = Vector.Zero(3)
    vg = VectorGaussian(m, eye)
    w = Variable.Random[Vector](vg)
    BayesPointMachine(incomes, ages, w, y)

    engine = InferenceEngine()
    wPosterior = engine.Infer(w)
    print("Dist over w=\n%s" % wPosterior)

    incomesTest = [ 58, 18, 22 ]
    agesTest = [ 36, 24, 37 ]
    ytest = Variable.Array[bool](Range(len(agesTest)))
    BayesPointMachine(incomesTest, agesTest, Variable.Random[Vector](wPosterior), ytest)
    print("output=\n%s" % engine.Infer(ytest));
Exemple #3
0
def LearningAGaussianWithRanges():
    # Restart the infer.NET random number generator
    Rand.Restart(12347)
    data = [Rand.Normal(0.0, 1.0) for i in range(100)]

    mean = Variable.GaussianFromMeanAndVariance(0.0, 1.0).Named("mean")
    precision = Variable.GammaFromShapeAndScale(1.0, 1.0).Named("precision")

    data_range = Range(len(data)).Named("n")
    x = Variable.Array[Double](data_range)
    v = Variable.GaussianFromMeanAndPrecision(mean, precision).ForEach(data_range)
    x.set_Item(data_range, v)
    x.ObservedValue = data

    engine = InferenceEngine()
    print("mean=%s" % engine.Infer(mean))
    print("precision=%s" %engine.Infer(precision))
Exemple #4
0
def MotifFinder():
    Rand.Restart(1337)

    SequenceCount = 50
    SequenceLength = 25
    MotifPresenceProbability = 0.8

    trueMotifNucleobaseDist = [
        NucleobaseDist(a=0.8, c=0.1, g=0.05, t=0.05),
        NucleobaseDist(a=0.0, c=0.9, g=0.05, t=0.05),
        NucleobaseDist(a=0.0, c=0.0, g=0.5, t=0.5),
        NucleobaseDist(a=0.25, c=0.25, g=0.25, t=0.25),
        NucleobaseDist(a=0.1, c=0.1, g=0.1, t=0.7),
        NucleobaseDist(a=0.0, c=0.0, g=0.9, t=0.1),
        NucleobaseDist(a=0.9, c=0.05, g=0.0, t=0.05),
        NucleobaseDist(a=0.5, c=0.5, g=0.0, t=0.0),
    ]

    backgroundNucleobaseDist = NucleobaseDist(a=0.25, c=0.25, g=0.25, t=0.25)

    sequenceData, motifPositionData = SampleMotifData(SequenceCount, SequenceLength, 
                                                      MotifPresenceProbability, 
                                                      trueMotifNucleobaseDist, 
                                                      backgroundNucleobaseDist)

    assert(sequenceData[0] == "CTACTTCGAATTTACCCCTATATTT")
    # should be CTACTTCGAATTTACCCCTATATTT
    assert(len(sequenceData) == 50) 
    assert(motifPositionData[:10] ==[2, 15, -1, 0, 14, 5, -1, 5, 1, 9])
    assert(len(motifPositionData) == 50)
    # Char.MaxValue is a string '\uffff', so we convert the hex to decimal.
    motif_nucleobase_pseudo_counts = PiecewiseVector.Constant(int('ffff', 16) + 1, 1e-6)
    # Cannot call managed PiecewiseVector object's indexer with ['A'], i.e. cannot do
    # motif_nucleobase_pseudo_counts['A'] = 2.0
    motif_nucleobase_pseudo_counts[ord('A')] = 2.0
    motif_nucleobase_pseudo_counts[ord('C')] = 2.0
    motif_nucleobase_pseudo_counts[ord('G')] = 2.0
    motif_nucleobase_pseudo_counts[ord('T')] = 2.0
   
    motifLength = len(trueMotifNucleobaseDist)  # Assume we know the true motif length.
    motifCharsRange = Range(motifLength)
    motifNucleobaseProbs = Variable.Array[Vector](motifCharsRange)
    # Cannot do motifNucleobaseProbs[motifCharsRange] = Variable.Dirichlet...
    motifNucleobaseProbs.set_Item(motifCharsRange, Variable.Dirichlet(motif_nucleobase_pseudo_counts).ForEach(motifCharsRange))
    sequenceRange = Range(SequenceCount)
    sequences = Variable.Array[str](sequenceRange)

    motifPositions = Variable.Array[int](sequenceRange)
    motifPositions.set_Item(sequenceRange, Variable.DiscreteUniform(SequenceLength - motifLength + 1).ForEach(sequenceRange))

    motifPresence = Variable.Array[bool](sequenceRange)
    motifPresence.set_Item(sequenceRange, Variable.Bernoulli(MotifPresenceProbability).ForEach(sequenceRange))

    forEachBlock = Variable.ForEach(sequenceRange)
    ifVar = Variable.If(motifPresence.get_Item(sequenceRange))

    motifChars = Variable.Array[Char](motifCharsRange)
    motifChars.set_Item(motifCharsRange, Variable.Char(motifNucleobaseProbs.get_Item(motifCharsRange)))
    motif = Variable.StringFromArray(motifChars)
    motifPos = motifPositions.get_Item(sequenceRange)

    backgroundLengthRight = motifPos.op_Subtraction(SequenceLength - motifLength, motifPositions.get_Item(sequenceRange))
    backgroundLeft = VariableStringOfLength(motifPositions.get_Item(sequenceRange), backgroundNucleobaseDist)
    backgroundRight = VariableStringOfLength(backgroundLengthRight, backgroundNucleobaseDist)
    added_vars = backgroundLeft.op_Addition(backgroundLeft, motif)
    added_vars = added_vars.op_Addition(added_vars, backgroundRight)
    sequences.set_Item(sequenceRange, added_vars)

    ifVar.Dispose()

    ifNotVar = Variable.IfNot(motifPresence.get_Item(sequenceRange))

    sequences.set_Item(sequenceRange, VariableStringOfLength(SequenceLength, backgroundNucleobaseDist))

    ifNotVar.Dispose()
    forEachBlock.CloseBlock()

    sequences.ObservedValue = sequenceData
    engine = InferenceEngine()
    engine.NumberOfIterations = 30  #30
    engine.Compiler.RecommendedQuality = QualityBand.Experimental

    motifNucleobaseProbsPosterior = engine.Infer[Array[Dirichlet]](motifNucleobaseProbs)
    motifPresencePosterior = engine.Infer[Array[Bernoulli]](motifPresence)
    motifPositionPosterior = engine.Infer[Array[Discrete]](motifPositions)

    # PrintMotifInferenceResults
    PrintPositionFrequencyMatrix("\nTrue position frequency matrix:",
                                 trueMotifNucleobaseDist,
                                 lambda dist, c: dist[c])  # Distributions.DiscreteChar indexer is implemented.

    PrintPositionFrequencyMatrix("\nInferred position frequency matrix mean:",
                                 motifNucleobaseProbsPosterior, # Array of Distribtions.Dirichlet; mean of each is a PiecewiseVector
                                 lambda dist, c: dist.GetMean()[ord(c)])  # PiecewiseVector indexer is implemented, but not for strings...
    # TypeError: No method matches given arguments for get_Item: (<class 'str'>) -> need to do ord(c)
    # Tried importing Console and ConsoleColor from System which works in powershell but not in VS console.

    printc("\nPREDICTION   ", ConsoleColor.Yellow)
    printc("GROUND TRUTH    ", ConsoleColor.Red)
    printc("OVERLAP    \n\n", ConsoleColor.Green)
    for i in range(min(SequenceCount, 30)):
        motifPos = motifPositionPosterior[i].GetMode() if motifPresencePosterior[i].GetProbTrue() > 0.5 else -1

        inPrediction, inGroundTruth = False, False
        for j in range(SequenceLength):
            if j == motifPos:
                inPrediction = True
            elif j == motifPos + motifLength:
                inPrediction = False
            if j == motifPositionData[i]:
                inGroundTruth = True
            elif j == motifPositionData[i] + motifLength:
                inGroundTruth = False

            color = Console.ForegroundColor
            if (inPrediction and inGroundTruth):
                color = ConsoleColor.Green
            elif (inPrediction):
                color = ConsoleColor.Yellow
            elif inGroundTruth:
                color = ConsoleColor.Red
            printc(sequenceData[i][j], color)
        print(f"    P(has motif) = {motifPresencePosterior[i].GetProbTrue():.2f}", end="");
        if (motifPos != -1):
            print(f"   P(pos={motifPos}) = {motifPositionPosterior[i][motifPos]:.2f}", end="");
        print()