def build(*clauses,**kwargs): priorLea = kwargs.get('priorLea',None) # TODO: check no other args !! # PY3: def build(*clauses,priorLea=None): elseClauseResults = tuple(result for (cond,result) in clauses if cond is None) if len(elseClauseResults) > 1: raise Lea.Error("impossible to define more than one 'other' clause") if len(elseClauseResults) == 1: if priorLea is not None: raise Lea.Error("impossible to define together prior probabilities and 'other' clause") elseClauseResult = elseClauseResults[0] else: elseClauseResult = None normClauseLeas = tuple((Lea.coerce(cond),Lea.coerce(result)) for (cond,result) in clauses if cond is not None) condLeas = tuple(condLea for (condLea,resultLea) in normClauseLeas) # check that conditions are disjoint for (condLea1,condLea2) in genPairs(condLeas): if (condLea1&condLea2).isFeasible(): raise Lea.Error("clause conditions are not disjoint") # build the OR of all given conditions orCondsLea = Lea.reduce(or_,condLeas) isClauseSetComplete = orCondsLea.isTrue() if priorLea is not None: # prior distribution: determine elseClauseResult if isClauseSetComplete: # TODO check priorLea equivalent to self raise Lea.Error("forbidden to define prior probabilities for complete clause set") (pTrue,count) = orCondsLea._p(True) pFalse = count - pTrue priorAleaDict = dict(priorLea.getAlea().genVPs()) priorAleaCount = sum(priorAleaDict.values()) normAleaDict = dict(Mlea(*(resultLea for (condLea,resultLea) in normClauseLeas)).getAlea().genVPs()) normAleaCount = sum(normAleaDict.values()) valuesSet = frozenset(chain(priorAleaDict.keys(),normAleaDict.keys())) vps = [] for value in valuesSet: priorP = priorAleaDict.get(value,0) condP = normAleaDict.get(value,0) p = priorP*count*normAleaCount - condP*pTrue*priorAleaCount if not(0 <= p <= pFalse*normAleaCount*priorAleaCount): # Infeasible : probability represented by p goes outside range from 0 to 1 priorPFraction = ProbFraction(priorP,priorAleaCount) lowerPFraction = ProbFraction(condP*pTrue,count*normAleaCount) upperPFraction = ProbFraction(condP*pTrue+pFalse*normAleaCount,count*normAleaCount) raise Lea.Error("prior probability of '%s' is %s, outside the range [ %s , %s ]"%(value,priorPFraction,lowerPFraction,upperPFraction)) vps.append((value,p)) elseClauseResult = Lea.fromValFreqs(*vps) elif elseClauseResult is None: # check that clause set is complete if not isClauseSetComplete: # TODO? : assume a uniform prior distribution ? ... which values ? raise Lea.Error("incomplete clause set requires 'other' clause or prior probabilities") if elseClauseResult is not None: elseCondLea = ~orCondsLea normClauseLeas += ((elseCondLea,Lea.coerce(elseClauseResult)),) # note that orCondsLea is NOT extended with rCondsLea |= elseCondLea # so, in case of else clause (and only in this case), orCondsLea is NOT certainly true return Blea(*(Ilea(resultLea,condLea) for (condLea,resultLea) in normClauseLeas))
# PyBossa is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with PyBossa. If not, see <http://www.gnu.org/licenses/>. from lea import Lea import pandas as pd import ngram def lower(s): return s.lower() task_runs = Lea.fromValFreqs(("hola mundo", 55), ("HoLa mundos", 45), ("algo horroroso", 10)) observation = task_runs.random(30) a = [lower(w) for w in observation] df = pd.DataFrame({'info': a}) desc = df.describe() top_string = desc['info']['top'] print "The top transcribed word is: %s" % top_string G = ngram.NGram([ lower(w) for w in a])
from __future__ import division, print_function from lea import Lea # define coin coin = Lea.fromValFreqs(('H', 1), ('T', 1)) print('Coin Distribution', coin, sep='\n') # define six-sided die die6 = Lea.fromValFreqs(('1', 1), ('2', 1), ('3', 1), ('4', 1), ('5', 1), ('6', 1)) print('Six-sided Die Distribution', die6, sep='\n') # define four-side die die4 = Lea.fromValFreqs(('1', 1), ('2', 1), ('3', 1), ('4', 1)) print('Four-sided Die Distribution',
from __future__ import division, print_function from lea import Lea # define cancer dist cancer = Lea.fromValFreqs(('yes', 1), ('no', 99)) print('\nCancer Distribution', 'P(C)', cancer.asPct(), sep='\n') # prob for mamm given cancer == yes mamm_g_cancer = Lea.fromValFreqs(('pos', 80), ('neg', 20)) print('\nProb for mammogram given cancer', 'P(M|C=yes)', mamm_g_cancer.asPct(), sep='\n') # prob for mamm given cancer == no mamm_g_no_cancer = Lea.fromValFreqs(('pos', 96), ('neg', 1000-96)) print('\nProb for mammogram given NO cancer', 'P(M|C=no)', mamm_g_no_cancer.asPct(), sep='\n')
def setTermsChoices(self,*termsChoices): self.termsChoices = Lea.fromValFreqs(*termsChoices)
from __future__ import division, print_function from lea import Lea # define cancer dist cancer = Lea.fromValFreqs(('yes', 1), ('no', 99)) print('\nCancer Distribution', 'P(C)', cancer.asPct(), sep='\n') # prob for mamm given cancer == yes mamm_g_cancer = Lea.fromValFreqs(('pos', 80), ('neg', 20)) print('\nProb for mammogram given cancer', 'P(M|C=yes)', mamm_g_cancer.asPct(), sep='\n') # prob for mamm given cancer == no mamm_g_no_cancer = Lea.fromValFreqs(('pos', 96), ('neg', 1000 - 96)) print('\nProb for mammogram given NO cancer', 'P(M|C=no)', mamm_g_no_cancer.asPct(), sep='\n') # conditional probability table mammograms = Lea.buildCPT((cancer == 'yes', mamm_g_cancer), (cancer == 'no', mamm_g_no_cancer)) print('\nMammograms', 'P(M)', mammograms.asPct(), sep='\n') # get joint probs for all events