def bandwidth_generate(maxbandwith=125, maxrate=0.99, numofnode=16, turn=2): #size in MB a = 1.0000001 #the exponent of zipf, a > 1. listofoutput = [] #save the bandwidth for i in range(1, numofnode + 1, 1): tmp = zipf.pmf(i, a) / zipf.pmf(1, a) * maxbandwith * maxrate #generate listofoutput.append(int(tmp)) print(listofoutput) fw_used = open("bandwidth_used.txt", "w") fw_rest = open("bandwidth_rest.txt", "w") for i in range(1, turn + 1, 1): #output shuffle(listofoutput) #random for j in range(0, numofnode - 1, 1): fw_used.write("%d " % listofoutput[j]) fw_rest.write("%d " % (maxbandwith - listofoutput[j])) fw_used.write("%d\n" % listofoutput[-1]) fw_rest.write("%d\n" % (maxbandwith - listofoutput[-1])) fw_used.close() fw_rest.close()
def generate_google_rates(k, n): rates = np.zeros((k,n)) k = (int)(k) fast_rate = 1.0/0.071648 slow_rate = 1.0/7.429076 zipf_factor=1.05 slow_number = 7 for index_u in range((int)(slow_number)): # slow preference = np.random.permutation(n) for index_f in range(n): rates[index_u, index_f] = zipf.pmf(preference[index_f]+1, zipf_factor) # normalize rates[index_u,:] /= sum(rates[index_u,:]) rates[index_u, :] *= slow_rate for index_u in np.arange(slow_number, k): preference = np.random.permutation(n) for index_f in range(n): rates[index_u, index_f] = zipf.pmf(preference[index_f] + 1, zipf_factor) rates[index_u, :] /= sum(rates[index_u, :]) rates[index_u, :] *= fast_rate log_rates(k, rates) return rates
def fileReadInZipf(fileNum, zipfFactor, times): p = [0] * fileNum for i in range(fileNum): p[i] = zipf.pmf(i, zipfFactor) p_sum = sum(p) for i in range(fileNum): p[i] /= p_sum x = np.random.choice(range(fileNum), size=times, p=p) return x
def test_zipfian_asymptotic(self): # test limiting case that zipfian(a, n) -> zipf(a) as n-> oo a = 6.5 N = 10000000 k = np.arange(1, 21) assert_allclose(zipfian.pmf(k, a, N), zipf.pmf(k, a)) assert_allclose(zipfian.cdf(k, a, N), zipf.cdf(k, a)) assert_allclose(zipfian.sf(k, a, N), zipf.sf(k, a)) assert_allclose(zipfian.stats(a, N, moments='msvk'), zipf.stats(a, moments='msvk'))
def generate_model_test_rates(file_number, zipf_factor): preference = np.random.permutation(file_number) rates = np.zeros(file_number) for index_f in range(file_number): rates[index_f] = zipf.pmf(preference[index_f] + 1, zipf_factor) # normalize rates /= sum(rates) log_rates(rates)
def __getZipfianDifference(self, a, sample_ss): sum_sample = 0 for i in sample_ss: sum_sample += i[1] difference = 0 count = 0 for i in sample_ss: count += 1 difference += pow((1.0 * sum_sample * zipf.pmf(count, a) - i[1]), 2) return difference
def popularity(fileNumber, zipfFactor): popularity = list() for i in range(1, fileNumber + 1, 1): popularity.append(zipf.pmf(i, zipfFactor)) popularity /= sum(popularity) shuffle(popularity) fw = open(tests_dir + "/ec_test_files/popularity.txt", "w") for item in popularity: fw.write(str(item)+'\n') fw.close() return popularity
def SPTestSetUp(fileSize, zipfFactor, flag): # file size in MB, flag: whether write the files #settings fileNumber = 10 #500 #fileSize = 200 #MB #zipfFactor = 1.5 machineNumber = 30 #30 SPFactor = 6 # generate popularity vector popularity = list() for i in range(1, fileNumber + 1, 1): popularity.append(zipf.pmf(i, zipfFactor)) popularity /= sum(popularity) shuffle(popularity) tests_dir = os.path.expanduser('~') # for Linux #tests_dir = os.getenv('HOME')# for mac OS print "tests dir:" + tests_dir if not os.path.exists(tests_dir + "/test_files"): os.makedirs(tests_dir + "/test_files") fw = open(tests_dir + "/test_files/popularity.txt", "wb") for item in popularity: fw.write("%s\n" % item) # calculate the partition_number, in the range of [1, machineNumber] kVector = [ max(min(int(popularity[id] * 100 * SPFactor), machineNumber), 1) for id in range(0, fileNumber) ] #kVector =10*numpy.ones(fileNumber,dtype=numpy.int) # print partitionNumber fw = open(tests_dir + "/test_files/k.txt", "wb") for k in kVector: fw.write("%s\n" % k) fw.close() #create the file of given size with open(tests_dir + "/test_files/test_local_file", "wb") as out: out.seek((fileSize * 1000 * 1000) - 1) out.write('\0') out.close() # write the files to Alluxio given the kvalues profile # remember to add the path of alluxio if (flag == 1): start = int(round(time.time() * 1000)) # in millisecond os.system('./bin/alluxio runSPPrepareFile') end = int(round(time.time() * 1000)) print 'Write %s files takes %s' % (fileNumber, end - start)
def make_zipf_plot(counts, tokens, title=None, savepath='./', save=False): """ makes Zipfian distribution plot """ # A Zipf plot # adapted from here: https://finnaarupnielsen.wordpress.com/2013/10/22/zipf-plot-for-word-counts-in-brown-corpus/ # get counts for x and y ranks = np.arange(1, len(counts) + 1) indices = np.argsort(-counts) normalized_frequencies = counts[indices] / sum(counts) # make plot f = plt.figure(figsize=(10, 10)) plt.loglog(ranks, normalized_frequencies, marker=".") # add the expected Zipfian distribution from the equation # 1.07 is usually a good bet for the shape parameter plt.loglog(ranks, [z for z in zipf.pmf(ranks, 1.07)]) # add labels for clarity plt.xlabel("Frequency rank of token") plt.ylabel("Absolute frequency of token") ax = plt.gca() # get current axis ax.set_aspect('equal') # make the plot square plt.grid(True) if title is not None: plt.title(title) else: title = 'zipf_plot' # for saving figure plt.title("Zipf plot") # add text labels last_freq = None for i in list( np.logspace(-0.5, np.log10(len(counts) - 1), 10).astype(int)): if last_freq != normalized_frequencies[ i]: # ensure words don't overlap...make sure y-val is different dummy = plt.text(ranks[i], normalized_frequencies[i], " " + tokens[indices[i]], verticalalignment="bottom", horizontalalignment="left") last_freq = normalized_frequencies[i] if save: plt.savefig(savepath + title + '.png') plt.show()
def generate_rates(n, arrival_rate, factor): zipf_factor = 1.05 k = 15 rates = np.zeros((k, n)) k = (int)(k) for index_u in range(k / 3): # slow preference = np.random.permutation(n) for index_f in range(n): rates[index_u, index_f] = zipf.pmf(preference[index_f] + 1, zipf_factor) # normalize rates[index_u, :] /= sum(rates[index_u, :]) rates[index_u, :] *= arrival_rate arrival_rate *= factor for index_u in np.arange(k / 3, 2 * k / 3): preference = np.random.permutation(n) for index_f in range(n): rates[index_u, index_f] = zipf.pmf(preference[index_f] + 1, zipf_factor) rates[index_u, :] /= sum(rates[index_u, :]) rates[index_u, :] *= arrival_rate arrival_rate *= factor for index_u in np.arange(k * 2 / 3, k): preference = np.random.permutation(n) for index_f in range(n): rates[index_u, index_f] = zipf.pmf(preference[index_f] + 1, zipf_factor) rates[index_u, :] /= sum(rates[index_u, :]) rates[index_u, :] *= arrival_rate log_rates(k, rates) return rates
def prepare_objs(ioctx, reads_num, use_zipf, zipf_parm): ioctx.require_ioctx_open() cluster_objects = list(ioctx.list_objects()) objs = [] count = 0 if use_zipf: objs_p = [ zipf.pmf(i, zipf_parm) for i in range(1, len(cluster_objects) + 1) ] objs_p /= sum(objs_p) objs_c = [] for p in objs_p: c = int(p * reads_num) + 1 if count < reads_num else 0 objs_c.append(c) count += c shuffle(objs_c) for i, obj in enumerate(cluster_objects): key = 0 length = 0 for j in range(objs_c[i]): if j == 0: key = obj.key length = obj.stat()[0] objs.append(dict(key=key, len=length)) else: for obj in cluster_objects: objs.append(dict(key=obj.key, len=obj.stat()[0])) count += 1 if count == reads_num: return objs obj_num = count while count < reads_num: idx = count % obj_num objs.append(dict(key=objs[idx]['key'], len=objs[idx]['len'])) count += 1 shuffle(objs) return objs
def zipf_weights(length, q=0.7) -> list: """ Alternative to the above using Zipf distribution. Note that this returns an array where the first element is 0, so we will be dropping that and adding to the index. int, (float) -> [floats] """ length += 1 # later we drop the first value. Zipf results start with 0 # Zipf PMF scales inversely to Poisson. This lets us switch distribution # without making changes, since we prevent division by zero here. if q == 0: shape = 1 else: shape = 1 / q # Probability mass function to yield weights for weighted choice weights = [zipf.pmf(i, shape) for i in range(length)][1:] return weights
def SPTestSetUp(fileSize, zipfFactor): # file size in MB, flag: whether write the files # settings fileNumber = 1 # 500 # fileSize = 200 #MB # zipfFactor = 1.5 # machineNumber = 2 # 30 SPFactor = 6 # generate popularity vector popularity = list() for i in range(1, fileNumber + 1, 1): popularity.append(zipf.pmf(i, zipfFactor)) popularity /= sum(popularity) shuffle(popularity) tests_dir = os.path.expanduser('~') # for Linux # tests_dir = os.getenv('HOME')# for mac OS print "tests dir:" + tests_dir if not os.path.exists(tests_dir + "/test_files"): os.makedirs(tests_dir + "/test_files") fw = open(tests_dir + "/test_files/popularity.txt", "wb") for item in popularity: fw.write("%s\n" % item) # calculate the partition_number, in the range of [1, machineNumber] # kVector = [max(min(int(popularity[id] * 100 * SPFactor), machineNumber), 1) for id in # range(0, fileNumber)] kVector = [1,2,3,4] # kVector =10*numpy.ones(fileNumber,dtype=numpy.int) # print partitionNumber fw = open(tests_dir + "/test_files/k.txt", "wb") for k in kVector: fw.write("%d\n" % k) fw.close() # create the file of given size with open(tests_dir + "/test_files/test_local_file%dMB" % fileSize, "wb") as out: out.seek((fileSize * 1000 * 1000) - 1) out.write('\0') out.close()
logser_p = md.logser_solver(ab) logser_values = md.trunc_logser.pmf(x_values, logser_p, upper_bound=float("inf")) lsll = md.logser_ll(ab, logser_p) nb_n, nb_p = md.nbinom_lower_trunc_solver(ab) nb_values = md.nbinom_lower_trunc.pmf(x_values, nb_n, nb_p) nbll = md.nbinom_lower_trunc_ll(ab, nb_n, nb_p) pln_mu, pln_sigma = md.pln_solver(ab) pln_values = md.pln.pmf(x_values, pln_mu, pln_sigma, lower_trunc=True) plnll = md.pln_ll(ab, pln_mu, pln_sigma) zipf_par = md.zipf_solver(ab) zipf_values = zipf.pmf(x_values, zipf_par) zll = md.zipf_ll(ab, zipf_par) ab_y = np.zeros(len(x_values) + 1) for j in range(len(ab)): ab_y[ab[j]] = ab_y[ab[j]] + 1 / len(ab) ax.set_xlim([0, min(50, max(x_values))]) plt.ylabel('frequency') plt.xlabel('abundance') plt.title(plot_labels[i]) # Width originally set at 12 when width was 50. # This should be the same proportional width width = 3 / min(50, max(x_values)) * 50
def pmf(x, distribution): ret = zipf.pmf(x, distribution) return ret
import numpy as np import matplotlib.pyplot as plt from scipy.stats import zipf x = np.arange(1, 1001) plt.loglog(x, zipf.pmf(x, 1.07)) plt.show() plt.plot(x, zipf.pmf(x, 1.07)) plt.show() for i in [1.07, 2, 3]: plt.loglog(x, zipf.pmf(x, i), label=str(i)) plt.legend() plt.show()
# In[1]: from scipy.stats import zipf import numpy as np import matplotlib.pyplot as plt a = 2 k = 1 # x = np.arange(zipf.ppf(0.01, a), # zipf.ppf(0.99, a)) # rv = zipf(a) # prob = zipf.cdf(x, a) # np.allclose(x, zipf.ppf(prob, a)) r = zipf.rvs(a, size=10) pf = zipf.pmf(k, a, loc=0) # print(r) #print(pf) # pmf(k, a, loc=0) # la = 1-pf # In[2]: # np.random.choice(5, 3, p=[0.1, 0, 0.3, 0.6, 0]) from random import choices files = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] weights = [ 0.6079271018540265, 0.04356365534955261, 0.04356365534955261, 0.04356365534955261, 0.04356365534955261, 0.04356365534955261, 0.04356365534955261, 0.04356365534955261, 0.04356365534955261,
import matplotlib.pyplot as plt from scipy.stats import zipf lst=[] with open('/Users/lilucy/Desktop/zipfdata.csv','w') as csvfile: fieldnames=['word','count'] writer=csv.writer(csvfile) writer.writerow(fieldnames) for row in records: wordtokens=row[0].lower() count=row[1].lower() lst.append((count,wordtokens)) plt.bar([key for val, key in lst], [val for val, key in lst], color='limegreen') alpha = 1.37065874 total = sum([p for p, c in lst]) plt.plot(range(len(lst)), [zipf.pmf(p, alpha) * total for p in range(1, len(lst) + 1)], color='crimson', lw=3) plt.ylabel("Frequency") plt.xticks(rotation='vertical') plt.tight_layout() plt.show()
def generate_graph_data(self): ageGroup = self.tableModel.data[self.selected_item_index.row()][0] parameter = self.tableModel.data[self.selected_item_index.row()][1] p1 = self.temporaryParametersDict[ageGroup][parameter]["p1"] p2 = self.temporaryParametersDict[ageGroup][parameter]["p2"] distributionType = self.temporaryParametersDict[ageGroup][parameter][ "distributionType"] xyDict = {"x": [], "y": []} try: if distributionType == 'Binomial': xyDict["x"] = np.arange(binom.ppf(0.01, int(p1), p2 / 100), binom.ppf(0.99, int(p1), p2 / 100)) xyDict["y"] = binom.pmf(xyDict["x"], int(p1), p2 / 100) elif distributionType == 'Geometric': xyDict["x"] = np.arange(geom.ppf(0.01, p1 / 100), geom.ppf(0.99, p1 / 100)) xyDict["y"] = geom.pmf(xyDict["x"], p1 / 100) if p2 != 0: self.tableModel.setData( self.selected_item_index.sibling( self.selected_item_index.row(), 3), 0, Qt.EditRole) elif distributionType == 'Laplacian': xyDict["x"] = np.arange(dlaplace.ppf(0.01, p1 / 100), dlaplace.ppf(0.99, p1 / 100)) xyDict["y"] = dlaplace.pmf(xyDict["x"], p1 / 100) if p2 != 0: self.tableModel.setData( self.selected_item_index.sibling( self.selected_item_index.row(), 3), 0, Qt.EditRole) elif distributionType == 'Logarithmic': xyDict["x"] = np.arange(logser.ppf(0.01, p1 / 100), logser.ppf(0.99, p1 / 100)) xyDict["y"] = logser.pmf(xyDict["x"], p1 / 100) if p2 != 0: self.tableModel.setData( self.selected_item_index.sibling( self.selected_item_index.row(), 3), 0, Qt.EditRole) elif distributionType == 'Neg. binomial': xyDict["x"] = np.arange(nbinom.ppf(0.01, p1, p2 / 100), nbinom.ppf(0.99, p1, p2 / 100)) xyDict["y"] = nbinom.pmf(xyDict["x"], p1, p2 / 100) elif distributionType == 'Planck': xyDict["x"] = np.arange(planck.ppf(0.01, p1 / 100), planck.ppf(0.99, p1 / 100)) xyDict["y"] = planck.pmf(xyDict["x"], p1 / 100) if p2 != 0: self.tableModel.setData( self.selected_item_index.sibling( self.selected_item_index.row(), 3), 0, Qt.EditRole) elif distributionType == 'Poisson': xyDict["x"] = np.arange(poisson.ppf(0.01, p1), poisson.ppf(0.99, p1)) xyDict["y"] = poisson.pmf(xyDict["x"], p1) if p2 != 0: self.tableModel.setData( self.selected_item_index.sibling( self.selected_item_index.row(), 3), 0, Qt.EditRole) elif distributionType == 'Uniform': if p1 - 0.5 * p2 < 0: p2 = p1 min = p1 - 0.5 * p2 max = p1 + 0.5 * p2 xyDict["x"] = np.arange(randint.ppf(0.01, min, max), randint.ppf(0.99, min, max)) xyDict["y"] = randint.pmf(xyDict["x"], min, max) elif distributionType == 'Zipf (Zeta)': xyDict["x"] = np.arange(zipf.ppf(0.01, p1), zipf.ppf(0.99, p1)) xyDict["y"] = zipf.pmf(xyDict["x"], p1) if p2 != 0: self.tableModel.setData( self.selected_item_index.sibling( self.selected_item_index.row(), 3), 0, Qt.EditRole) self.update_graph(xyDict) except Exception as E: log.error(E)
def ECTestSetUp(filesize, fileNumber): # file size in MB, flag: whether write the files # settings # fileNumber = 1 # 500 # fileSize = 200 #MB zipfFactor = 1.5 # machineNumber = 2 # 30 # SPFactor = 6 # # generate popularity vector popularity = list() for i in range(1, fileNumber + 1, 1): popularity.append(zipf.pmf(i, zipfFactor)) popularity /= sum(popularity) shuffle(popularity) tests_dir = os.path.expanduser('~') # for Linux # tests_dir = os.getenv('HOME')# for mac OS print("tests dir:" + tests_dir) if not os.path.exists(tests_dir + "/ec_test_files"): os.makedirs(tests_dir + "/ec_test_files") fw = open(tests_dir + "/ec_test_files/popularity.txt", "w") for item in popularity: fw.write(str(item) + '\n') #filesize = np.random.exponential(1.5, fileNumber) #filesize = filesize/min(filesize)*4 filesize = filesize * 1024 * 1024 filesizes = [filesize] * fileNumber fw = open(tests_dir + "/ec_test_files/fileSize.txt", "w") for size in filesizes: fw.write(str(int(size)) + '\n') fw.close() # calculate the partition_number, in the range of [1, machineNumber] # kVector = [max(min(int(popularity[id] * 100 * SPFactor), machineNumber), 1) for id in # range(0, fileNumber)] kVector = [3] * fileNumber # kVector =10*np.ones(fileNumber,dtype=np.int) # print partitionNumber fw = open(tests_dir + "/ec_test_files/k.txt", "w") for k in kVector: fw.write(str(k) + '\n') fw.close() nVector = [1] * fileNumber # kVector =10*np.ones(fileNumber,dtype=np.int) # print partitionNumber fw = open(tests_dir + "/ec_test_files/n.txt", "w") for n in nVector: fw.write(str(n) + '\n') fw.close() # create the file of given size # with open(tests_dir + "/ec_test_files/test_local_file", "w") as out: # out.seek((fileSize * 1000 * 1000) - 1) # out.write('\0') # out.close() # write the files to Alluxio given the kvalues profile # remember to add the path of alluxio # if (flag == 1): start = int(round(time.time() * 1000)) # in millisecond os.system('$ALLUXIO_HOME/bin/alluxio runECPrepareFile true') end = int(round(time.time() * 1000)) print('Write %s files takes %s' % (fileNumber, end - start))
def chisquare(observations, shape_file, min_prob, maxlength, dist): max_length = maxlength if maxlength else max(observations) ''' if not maxlength or (maxlength and max(observations) < maxlength): max_length = max(observations) else: max_length = maxlength ''' #remove observations larger than the maximal length observed = [o for o in observations if o <= max_length] #get shape parameters shape_values = get_shape_values(shape_file, dist) #define results data frame results = {"0shape": [], "1chisq": [], "2pvalue": [], "3n.observations": [], "4n.bins": [], "5n.expected < 5": [], "6n.observed < 5": []} if dist == "negbinom": results["0shape2"] = [] for shape in shape_values: #calculate expected frequencies: if dist == "zipf": expect_freq = zipf.pmf(range(1,max_length+1), shape) elif dist == "negbinom": r, p = shape expect_freq = nbinom.pmf(range(1,max_length+1), r, p) try: expect_freq = np.array(expect_freq) / sum(expect_freq) except: print ("shape caused zero-division: ", shape) # accumulate frequencies to a minimal probability of MIN_PROB acc_freqs = [0] bins_lengths = [0] for freq in expect_freq: if acc_freqs[-1] < min_prob: acc_freqs[-1] += freq bins_lengths[-1] += 1 else: acc_freqs.append(freq) bins_lengths.append(1) acc_expected = np.array(acc_freqs) * len(observed) #observed: observed_hist = list(np.bincount(observed)[1:]) # accumulate observations according to the accumulated frequencies i = 0 acc_observed = [] for length in bins_lengths: acc_observed.append(sum(observed_hist[i:i+length])) i += length try: chisq, pval = stats.chisquare(acc_observed, acc_expected) except: chisq, pval = -1, -1 ''' print bins_lengths print expect_freq print max_length print observed_hist print acc_observed print acc_expected ''' #count how many bins are less than 5 in both expected and observed less_obs = sum(i < MIN_BIN for i in acc_observed) less_exp = sum(i < MIN_BIN for i in acc_expected) if dist == "zipf": results["0shape"].append(shape) if dist == "negbinom": results["0shape"].append(r) results["0shape2"].append(p) results["1chisq"].append(chisq) results["2pvalue"].append(pval) results["3n.observations"].append(len(observed)) results["4n.bins"].append(len(acc_expected)) results["5n.expected < 5"].append(less_exp) results["6n.observed < 5"].append(less_obs) return pd.DataFrame(results)
from scipy.stats import zipf import numpy as np fileNumber = 100 zipfFactor = 1.05 popularity = list() for i in range(1, fileNumber+1 ,1): popularity.append(zipf.pmf(i, zipfFactor)) popularity /= sum(popularity) #popularity = popularity[::-1] count = list() size = list() for pop in popularity: this_count = max(min((int)(300*pop),30),1) count.append(this_count) size.append(100/this_count) sum(popularity) print size
x_values = np.array(range(max(ab) + 2)[1:]) logser_p = md.logser_solver(ab) logser_values = md.trunc_logser.pmf(x_values, logser_p, upper_bound=float("inf")) lsll = md.logser_ll(ab, logser_p) nb_n, nb_p = md.nbinom_lower_trunc_solver(ab) nb_values = md.nbinom_lower_trunc.pmf(x_values, nb_n, nb_p) nbll = md.nbinom_lower_trunc_ll(ab, nb_n, nb_p) pln_mu, pln_sigma = md.pln_solver(ab) pln_values = md.pln.pmf(x_values, pln_mu, pln_sigma, lower_trunc=True) plnll = md.pln_ll(ab, pln_mu, pln_sigma) zipf_par = md.zipf_solver(ab) zipf_values = zipf.pmf(x_values, zipf_par) zll = md.zipf_ll(ab, zipf_par) ab_y = np.zeros(len(x_values) + 1) for j in range(len(ab)): ab_y[ab[j]] = ab_y[ab[j]] + 1/len(ab) ax.set_xlim([0,min(50, max(x_values))]) plt.ylabel('frequency') plt.xlabel('abundance') plt.title(plot_labels[i]) # Width originally set at 12 when width was 50. # This should be the same proportional width width = 3 / min(50, max(x_values)) * 50
from scipy.stats import zipf import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) # Calculate a few first moments: a = 6.5 mean, var, skew, kurt = zipf.stats(a, moments='mvsk') # Display the probability mass function (``pmf``): x = np.arange(zipf.ppf(0.01, a), zipf.ppf(0.99, a)) ax.plot(x, zipf.pmf(x, a), 'bo', ms=8, label='zipf pmf') ax.vlines(x, 0, zipf.pmf(x, a), colors='b', lw=5, alpha=0.5) # Alternatively, the distribution object can be called (as a function) # to fix the shape and location. This returns a "frozen" RV object holding # the given parameters fixed. # Freeze the distribution and display the frozen ``pmf``: rv = zipf(a) ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='-', lw=1, label='frozen pmf') ax.legend(loc='best', frameon=False)