コード例 #1
def generate_regular(size = 100, zipf_distribution = 2, rate = 1.0, seed = 0.0, output_filename = None):
    if(output_filename == None):
        output_filename = './workloads/regular_size' + str(size) + '_dist' + str(zipf_distribution) + '.csv'
    #get ips/ids from ethereum repo
    ip_file = open('./workloads/ips.txt', "r")
    id_file = open('./workloads/ids.txt', "r")
    topics = random.zipf(a=zipf_distribution, size=size)#for topics
    t_next_req = 0.0 # time of next request
    with open(output_filename, 'w') as output_file:
        fieldnames = ['time', 'id', 'ip', 'topic', 'attack']
        dict_writer = csv.DictWriter(output_file, fieldnames=fieldnames)
        for i in range(0, size):
            t_next_req += rand.expovariate(rate)
            record = {}
            ip = ip_file.readline().rstrip()
            iD = id_file.readline().rstrip()
            if(not ip or not iD):
                print("Not enough IPs/IDs in the files")
            #record['time'] = int(1000*t_next_req)
            record['time'] = int(10*i)
            record['id'] = iD
            record['ip'] =ip
            record['topic'] = 't' + str(topics[i])
            record['attack'] = 0
    print("Generated regular workload in", str(output_filename))
コード例 #2
ファイル: operators.py プロジェクト: CaptainPatate/mtools
    def __call__(self, options=None):
        options = self._parse_options(options)

        # decode distribution parameter
        alpha = self._decode(options['alpha'])

        val = zipf(alpha) - 1
        return val
コード例 #3
ファイル: operators.py プロジェクト: slathropacc/mtools
    def __call__(self, options=None):
        options = self._parse_options(options)

        # decode distribution parameter
        alpha = self._decode(options['alpha'])

        val = zipf(alpha) - 1
        return val
コード例 #4
def randhouse(house):
    x = zipf(
        a=1.01 + random.random() / 2, size=1000
    )  # use zipf distribution with some randomness in the variability of the distribution, a = 1-2 because larger number of distribution makes one number too popular
    return (
        10 - (int(random.choice(x[x < 6])) * 2 - random.randint(0, 1)) -
        int(house / (len(houses) - 1) * 8) - random.randint(-1, 1)
    ) % 10 + 1.0  # return a number between 1-10, change popularity between houses so that some more are popular than others
コード例 #5
ファイル: network_setup.py プロジェクト: sisyga/covidnetwork
def get_companysizes(a, n, max=35000):
    Draw the size of n companies from a Zipf distribution
    :param n: int, number of companies
    :param a: float, coefficient of zipf distribution
    :param max: float, maximum size, for Munich equal to BMW with roughly 35000 employees

    return npr.zipf(a, size=n)
コード例 #6
def generate_first_descriptions(dictionary):
    # randomly generate a set of first descriptions
    first_descriptions = []

    for i in range(NUM_MUTANTS):
        first_desc = []

        # randomly appending words from the dictionary
        for j in range(DESCRIPTION_LEN):
            zipf_val = rnd.zipf(ZIPF_LAW_CONST)

            while zipf_val > len(dictionary):
                # just in case our zipf value is absurdly big
                zipf_val = rnd.zipf(ZIPF_LAW_CONST)

            first_desc.append(dictionary[int(zipf_val) - 1])


    return first_descriptions
コード例 #7
ファイル: Item.py プロジェクト: dbiir/MiDBench
 def setZipfian(self, para_a, sequence, size, scale, a_change):
     for i in range(0, len(sequence)):
         sequence[i] = int(round(1.0 * sequence[i] * scale))
         # sequence[i] = 1.0 * sequence[i] * scale
     result = []
     count = 0
     while count < size:
         one = zipf(para_a)
         if one >= 1 and one <= len(sequence):
             result.append(sequence[one - 1])
             count += 1
     self.data = result
     self.distribution = 'zipfian'
コード例 #8
def write_schedule_to (TCL) :
    TCL.write("# scheduling\n")
    for app in list_of_apps:
        if app[1] == "zipf":
            target_volume = app[4]
            current_volume = 0
            while current_volume < target_volume:
                volume = npr.zipf(1.5) % target_volume
                current_volume = current_volume + volume
                # random value between 0 and 4 minutes
                time = npr.rand() * 240
                TCL.write("$ns at "+ str(time) + " \"$app" + app[2] + app[3] + \
                        " send " + str(volume) + "\"\n")
        else: # app[1] == "onoff"
            # On/Off model : we just need to schedule the start
            time = npr.rand() * 240
            TCL.write("$ns at "+ str(time) + " \"$app" + app[2] + app[3] + \
                    " start\"\n")

    TCL.write("$ns at 300.0 \"finish\"\n")
コード例 #9
def get_random_accesses(amount):
        return zipf(NumDocuments, amount)
コード例 #10
ファイル: utils.py プロジェクト: kunlegiwa/MANGO
def zipf(size, params):
        return random.zipf(params['a'], size)
    except ValueError as e:
コード例 #11
def generate_attack_topic(size = 100, zipf_distribution = 2, topic_to_attack = 't11', attacker_ip_num = 3, attacker_id_num=10, rate_normal = 1.0, rate_attack = 10.0, seed = 0.0, output_filename = None):
    if(output_filename == None):
        output_filename = './workloads/attack_topic_size' + str(size) + '_dist' + str(zipf_distribution) + '.csv'
    #get ips/ids from ethereum repo
    ip_file = open('./workloads/ips.txt', "r")
    id_file = open('./workloads/ids.txt', "r")
    topics = random.zipf(a=zipf_distribution, size=size)#for topics

    attacker_ips = []
    for i in range(0, attacker_ip_num):
        num = int(255/attacker_ip_num * i)
        ip = str(num) + "." + str(num) + "."+ str(num) + "."+ str(num)
    attacker_ids = []
    for i in range(0, attacker_id_num):
    print("attacker ips:", attacker_ips)
    print("attacker ids:", attacker_ids)

    t_next_normal_req = rand.expovariate(rate_normal)  # time of next normal request
    t_next_attack_req = rand.expovariate(rate_attack) # time of next attack request
    time = 0.0
    attack = 0
    with open(output_filename, 'w') as output_file:
        fieldnames = ['time', 'id', 'ip', 'topic', 'attack']
        dict_writer = csv.DictWriter(output_file, fieldnames=fieldnames)
        for i in range(0, size):
            if t_next_normal_req < t_next_attack_req:
                attack = 0
                time = t_next_normal_req
            elif t_next_normal_req > t_next_attack_req:
                attack = 1
                time = t_next_attack_req

            record = {}
            if( attack == 0 ):
                ip = ip_file.readline().rstrip()
                iD = id_file.readline().rstrip()
                if(not ip or not iD):
                    print("Not enough IPs/IDs in the files")
                topic = 't' + str(topics[i])
            else: # attack == 1
                ip = attacker_ips[i % attacker_ip_num]
                iD = attacker_ids[i % attacker_id_num]
                topic = topic_to_attack

            record['time'] = int(10*i)
            record['id'] = iD
            record['ip'] =ip
            record['topic'] = topic
            record['attack'] = attack
            if time == t_next_normal_req:
                t_next_normal_req += rand.expovariate(rate_normal)
            if time == t_next_attack_req:
                t_next_attack_req += rand.expovariate(rate_attack)

    print("Generated regular workload in", str(output_filename))
コード例 #12
ファイル: lang.py プロジェクト: afcarl/nerv
def zipfgen():
    tok_it = _strseq()
    vocab = defaultdict(lambda: next(tok_it))
    while True:
        yield vocab[zipf(2.0)]
コード例 #13
def np_zipf_distribution():
    x = random.zipf(a=2, size=(2, 3))
    x = random.zipf(a=2, size=1000)
    sns.distplot(x[x < 10], kde=False)
コード例 #14
ファイル: _discrete_distns.py プロジェクト: danaon/scipy
 def _rvs(self, a):
     return mtrand.zipf(a, size=self._size)
コード例 #15
ファイル: _discrete_distns.py プロジェクト: sugiki/scipy
 def _rvs(self, a):
     return mtrand.zipf(a, size=self._size)
コード例 #16
Zipf's law: In a collection the nth common term is 1/n times of the most common term. 
e.g. 5th common word in english has occur nearly 1/5 th times as most of the most used word.

# It has two parameters.

# a - distribution parameter

# size - the shape of the returned array

# draw out a sample for zipf distribution with distribution parameter 2 with size 2x3

from numpy import random

x = random.zipf(a=2, size=(2, 3))


# visualization of zipf distribution

# sample 1000 points but plotting only ones with value < 10 for more meanngful chart.

# from numpy import random

import matplotlib.pyplot as plt
import seaborn as sns

x = random.zipf(a=2, size=1000)
sns.distplot(x[x < 10], kde=False)
コード例 #17
ファイル: zipfnumber.py プロジェクト: alisheykhi/SocialPDA
            pattern_meas = re.compile(r"^(\d+)\s+(\d+)\s+([-]?\d+)$",
                                      re.VERBOSE | re.MULTILINE)
        if file_name.split(".")[0] == 'amazon':
            pattern_meas = re.compile(r"^(\d+)\s+(\d+)",
                                      re.VERBOSE | re.MULTILINE)
        for match in pattern_meas.finditer(text):
            nodes_list.append("%s" % int(match.group(1)))
            nodes_list.append("%s" % int(match.group(2)))

for node in nodes_list:
    count = frequency.get(node, 0)
    frequency[node] = count + 1
node_ocurr = []
s = []
for key, value in reversed(sorted(frequency.items(), key=itemgetter(1))):
    node_ocurr.append([key, value / 2])
    s.append(zipf(2., value / 2))

a = 2
s = zipf(a, 10)

result = (s / float(max(s))) * 5

for i in result:
    print s[i]
    print '------'

print min(s), max(s)
print min(result), max(result)
コード例 #18
ファイル: estadistica.py プロジェクト: vtomasr5/distribuits
 def calculaTiempoSesion(self):
     rho = 2
     a = rand.zipf(rho)
     return a
コード例 #19
        print "  Payoffs: ",
        print "  Demes: ",
        print "Born: " + str(self.born)
        print "Rounds Alive: " + str(self.roundsAlive)
        print "Times observed: " + str(self.nObserved)
        print "Current Deme: " + str(self.currentDeme)
        print "Points Earned: " + str(self.pointsEarned)
        print "Number of Offspring: " + str(self.nOffspring)

# Initialize structures in model
fitness = [] # fitness landscape
for i in range(3):
#    tmp = [round(2*random.expovariate(lambd)**2) for x in range(nact)]
    tmp = [round(2*npr.zipf(alpha)) for x in range(nact)]
#    tmp = [round(2*random.lognormvariate(lgmu,lgsd)**2) for x in range(nact)]
aliveAgents = []
Agents = []


# Initialize stats
class statsDict:
    Name = "statsDict"

    def __init__(self):
        self.aliveAgents = 0
        self.innovate = 0
        self.observe = 0
コード例 #20
 def zipfTime(self):
     i = random.zipf(1.3)
     return i
コード例 #21
ファイル: zipf_test.py プロジェクト: alisheykhi/SocialPDA
from numpy.random import zipf
from math import ceil

privacy_level = []
s = zipf(a, 50000)
zipf_dis = (s/float(max(s)))*5

privacy_level = [x for x in zipf_dis]
for item in privacy_level:
    print int(ceil(item))
print len(privacy_level)
コード例 #22
ファイル: lang.py プロジェクト: pdsujnow/nerv
def zipfgen():
    tok_it = _strseq()
    vocab = defaultdict(lambda : next(tok_it))
    while True:
        yield vocab[zipf(2.0)]
コード例 #23
 def create_zipf_firm(self):
     ref = {}
     x = zipf(a=4, size=self.num_firms)
     ref[self.name] = x
     return ref
コード例 #24
# zipf distribution are used to sample data based in zipf's law.
# zipf's law - in a collection the nth common term is 1/n times of the most common term.
# E.g. 5th common word in english has occurs nearly 1/5th times as of the most used word.
# it has two parameters
# a = distribution parameter.
# size - shape of returned array.
from numpy import random
import matplotlib.pyplot as plt
import seaborn as sns

arr1 = random.zipf(a=2, size=1000)
sns.distplot(arr1[arr1 < 25],
             kde=False)  # shows how many of 1000 values are below 25
コード例 #25
Zipf Distribution se related aur kuch baate:

1. Zipf's law ko probability me kuch ayse dekha jata hai ki, yaha par kise event ki hone ki frequency (f) hoti hai aur uska rank (r) hota hai.
2. Iss law ko American linguist George Kingsly Zipf (1902-1950) ne diya tha.
3. Iss law ko unhone ne English me kisi bhi word ke aane ki frequecy ko janne ke liye iss law ko diya tha. Joki aaj bahot hi popular aur Machine Learning me bahot jada useful hai.
4. Wise hi Zipf ne 1949 me issi law me ye bhi claim kiya tha ki, desh me maujud sab se bada sahar, dusre sahar se doguna bada hai aur 3rd wala se tiguna bada hai and so on. Lekin ye law kewal wahin par fit batha iska use language me ya kcuh aur case me sahi fit nahi bathta hai.

Jada jankari ke liye:
1. https://www.sciencedirect.com/topics/computer-science/zipf-distribution
2. https://www.nngroup.com/articles/zipf-curves-and-website-popularity/
3. https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4176592/
4. https://plus.maths.org/content/mystery-zipf

import numpy.random as r
import matplotlib.pyplot as plt
import seaborn as sns

zipf = r.zipf(a=2, size=(1000))
# print('\n',zipf)
# print('\n',zipf[zipf<10]);exit()
sns.distplot(zipf, hist=False)

plt.xlabel('Rank X')
plt.ylabel('Frequencey Y')
plt.title('Zipf Distribution')
# plt.xlim(0,100)
# plt.ylim(0,100)

コード例 #26
ファイル: test_algorithms.py プロジェクト: trauzti/mimir
import unittest
import random
import sys

from numpy.random import zipf

import ARC, CLOCK, LRU, LFU, LRU3, LRU10
from cache import Cache
from common import Entry

key_alpha = 1.33
keydistribution = zipf(key_alpha, NUMREQUESTS)

class TestAlgorithms(unittest.TestCase):
    def setUp(self):

    def test_algorithm(self, name=None):
        if name == None:
        self.cache = Cache(name, CACHESIZE)
        self.assertEqual(str(self.cache.cache), name)
        self.assertEqual(self.cache.get("trausti"), None)
        self.assertEqual(self.cache.put("trausti", 100), 1)
        self.assertEqual(self.cache.get("trausti"), 100)

        for j in xrange(2000):