def test_hypergeometric_range(self): # Test for ticket #921 assert_(np.all(random.hypergeometric(3, 18, 11, size=10) < 4)) assert_(np.all(random.hypergeometric(18, 3, 11, size=10) > 0)) # Test for ticket #5623 args = [ (2**20 - 2, 2**20 - 2, 2**20 - 2), # Check for 32-bit systems ] is_64bits = sys.maxsize > 2**32 if is_64bits and sys.platform != 'win32': # Check for 64-bit systems args.append((2**40 - 2, 2**40 - 2, 2**40 - 2)) for arg in args: assert_(random.hypergeometric(*arg) > 0)
def hypergeometric(self, ngood, nbad, nall): '''Parameters:\n ngood: integer, >=0.\n nbad: integer, >=0.\n nall: integer, >=1 and <=ngood+nbad. ''' return r.hypergeometric(ngood, nbad, nall, self.size)
def hypothesisTest(self, seq1, seq2, totalSeq1, totalSeq2): replicates = 10000 # observed difference obsDiff = float(seq1) / totalSeq1 - float(seq2) / totalSeq2 # randomly permute assignment of sequences permutationDiffs = [] posSeq = seq1 + seq2 negSeq = totalSeq1 + totalSeq2 - posSeq for dummy in xrange(0, replicates): c1 = hypergeometric(posSeq, negSeq, totalSeq1) c2 = posSeq - c1 permutationDiffs.append( float(c1) / totalSeq1 - float(c2) / totalSeq2) # find p-value of permutation test (number of replicates with a value lower/greater than the observed value) leftCount = 0 rightCount = 0 twoSidedCount = 0 for value in permutationDiffs: if value <= obsDiff: leftCount += 1 if value >= obsDiff: rightCount += 1 if abs(value) >= abs(obsDiff): twoSidedCount += 1 oneSidedCount = leftCount if rightCount < oneSidedCount: oneSidedCount = rightCount return float(oneSidedCount) / replicates, float( twoSidedCount) / replicates
def hypothesisTest(self, seq1, seq2, totalSeq1, totalSeq2): replicates = self.preferences['Replicates'] # observed difference obsDiff = float(seq1) / totalSeq1 - float(seq2) / totalSeq2 # randomly permute assignment of sequences permutationDiffs = [] posSeq = seq1+seq2 negSeq = totalSeq1+totalSeq2-posSeq for dummy in xrange(0, replicates): c1 = hypergeometric(posSeq, negSeq, totalSeq1) c2 = posSeq - c1 permutationDiffs.append(float(c1) / totalSeq1 - float(c2) / totalSeq2) # find p-value of permutation test (number of replicates with a value lower/greater than the observed value) leftCount = 0 rightCount = 0 twoSidedCount = 0 for value in permutationDiffs: if value <= obsDiff: leftCount += 1 if value >= obsDiff: rightCount += 1 if abs(value) >= abs(obsDiff): twoSidedCount += 1 oneSidedCount = leftCount if rightCount < oneSidedCount: oneSidedCount = rightCount return float(oneSidedCount) / replicates, float(twoSidedCount) / replicates, ''
def getCardDrawnProbabilities(self, tries=100000): """ Calculates, for each card, the probabilities for drawing that card at least once by each turn up to 10. The probabilities will be returned as a dict card names and lists in which each of the indexes of the list tells the probability for the index-turn. """ cardDrawnProbabilities = {} cardCounts = defaultdict(int) for card in self._cards: cardCounts[card.name] += 1 for card in self._cards: cardDrawnProbabilities[card.name] = [] nGood = cardCounts[card.name] samples = len(self._cards) nBad = samples - nGood for turnNumber in range(0, 11): #------------------------------------ #The probability for getting AT LEAST one pointed card from #a sample of total number of cards with a certain number #of cards drawn can be extracted from the hypergeometric #distribution. cardsDrawn = turnNumber + 7 draws = hypergeometric(nGood, nBad, cardsDrawn, tries) probability = sum(draws >= 1) / float(tries) cardDrawnProbabilities[card.name].append(probability)
def generate_random_table(rowsums, colsums, dim=[2,2]): from numpy.random import hypergeometric #from numpy import array n11= hypergeometric(rowsums[0], rowsums[1], colsums[0], size=1)[0] n21= colsums[0] -n11 n12 = rowsums[0]-n11 n22 = rowsums[1]-n21 #table =array([[n11, n21],[ n12, n22]]) return n11,n12,n21,n22
def draw(self): if self.number_of_cards == 0: return None return ExpansionCards(name=ADVENTURES, kingdom_cards=sorted(random.sample(self.kingdom_cards, self.number_of_cards)), event_cards=sorted(random.sample(self.event_cards, min(hypergeometric(len(event_cards), len(kingdom_cards) + len(event_cards), self.number_of_cards), 2))))
def _rvs(self, M, n, N): return mtrand.hypergeometric(n, M - n, N, size=self._size)
def _rvs(self, M, n, N): return mtrand.hypergeometric(n, M-n, N, size=self._size)
all_doubleton_opportunities[idxs]).astype( numpy.int32) low_ngood = low_doubletons.astype(numpy.int32) low_nbad = (low_doubleton_opportunities - low_doubletons).astype( numpy.int32) low_p = low_doubletons.sum() * 1.0 / low_doubleton_opportunities.sum() all_ngood = all_doubletons[idxs].astype(numpy.int32) all_nbad = (all_doubleton_opportunities[idxs] - all_ngood).astype( numpy.int32) all_p = all_doubletons.sum() * 1.0 / all_doubleton_opportunities.sum() bootstrapped_low_ps.extend( hypergeometric(low_ngood, low_nbad, sample_sizes) * 1.0 / sample_sizes) bootstrapped_all_ps.extend( hypergeometric(all_ngood, all_nbad, sample_sizes) * 1.0 / sample_sizes) bootstrapped_fake_low_ps.extend( binomial(sample_sizes, low_p) * 1.0 / sample_sizes) bootstrapped_fake_all_ps.extend( binomial(sample_sizes, all_p) * 1.0 / sample_sizes) #xs, ns = stats_utils.calculate_unnormalized_survival_from_vector(bootstrapped_low_ps, min_x=0,max_x=2) #sharing_axis.step(xs,ns*1.0/ns[0],'r-',label='Low $d_S$ (matched)',zorder=3) #xs, ns = stats_utils.calculate_unnormalized_survival_from_vector(bootstrapped_all_ps, min_x=0,max_x=2) #sharing_axis.step(xs,ns*1.0/ns[0],'k-',label='All (matched)',zorder=2) #xs, ns = stats_utils.calculate_unnormalized_survival_from_vector(bootstrapped_fake_low_ps, min_x=0,max_x=1) #sharing_axis.step(xs,ns*1.0/ns[0],'r-',label='Low $d_S$ (pooled)',zorder=1,alpha=0.5)
#print(a) print("計算結果:",total1/10000) total2=total1/10000 plt.hist(total,bins=5) plt.axvline(x=0.2617,ymin=0,ymax=10,c='red') plt.axvline(x=total2,ymin=0,ymax=10,c='black') plt.show() print() print("-----------------超幾何分配") import numpy.random as nr all=[] total=0 # 這個問題機率是 0.3687 for i in range(1000): # 1000 次大實驗,共有 1000x100 個數據 s=nr.hypergeometric(20,15,5,100) # 總人數 :35,男:20、女:15,隨機抽五人,裏頭的男生數 #print('樣本為5個,5個裡面男生的數量:') #print(s) p=sum(s==3)/100 # 恰好男生數是 3 的組數再做平均 total+=p # 1000 次平均的總和 all.append(p) # 每次平均都存進 all,共 1000 次 print(total/1000) # 100000 個中恰好男生數是 3 的數據平均 x=np.arange(1000) plt.hist(all,bins=10) plt.axvline(x=total/1000,ymin=0,ymax=1,c='red') plt.show() print() print("-----------------常態分配") x=np.random.normal(5,1,8) # normal(平均值,標準差,size) y=np.random.normal(5,2,8)
# resampe everything at known rates idxs = choice(numpy.arange(0,len(all_doubletons)),size=len(low_doubletons)) sample_sizes = numpy.fmin(low_doubleton_opportunities, all_doubleton_opportunities[idxs]).astype(numpy.int32) low_ngood = low_doubletons.astype(numpy.int32) low_nbad = (low_doubleton_opportunities-low_doubletons).astype(numpy.int32) low_p = low_doubletons.sum()*1.0/low_doubleton_opportunities.sum() all_ngood = all_doubletons[idxs].astype(numpy.int32) all_nbad = (all_doubleton_opportunities[idxs] - all_ngood).astype(numpy.int32) all_p = all_doubletons.sum()*1.0/all_doubleton_opportunities.sum() bootstrapped_low_ps.extend( hypergeometric(low_ngood, low_nbad, sample_sizes)*1.0/sample_sizes ) bootstrapped_all_ps.extend( hypergeometric(all_ngood, all_nbad, sample_sizes)*1.0/sample_sizes ) bootstrapped_fake_low_ps.extend( binomial(sample_sizes, low_p)*1.0/sample_sizes ) bootstrapped_fake_all_ps.extend( binomial(sample_sizes, all_p)*1.0/sample_sizes ) #xs, ns = stats_utils.calculate_unnormalized_survival_from_vector(bootstrapped_low_ps, min_x=0,max_x=2) #sharing_axis.step(xs,ns*1.0/ns[0],'r-',label='Low $d_S$ (matched)',zorder=3) #xs, ns = stats_utils.calculate_unnormalized_survival_from_vector(bootstrapped_all_ps, min_x=0,max_x=2) #sharing_axis.step(xs,ns*1.0/ns[0],'k-',label='All (matched)',zorder=2) #xs, ns = stats_utils.calculate_unnormalized_survival_from_vector(bootstrapped_fake_low_ps, min_x=0,max_x=1) #sharing_axis.step(xs,ns*1.0/ns[0],'r-',label='Low $d_S$ (pooled)',zorder=1,alpha=0.5)
def hypergeometric(size, params): try: return random.hypergeometric(params['ngood'], params['nbad'], params['nsample'], size) except ValueError as e: exit(e)