Ejemplo n.º 1
0
def main(argv):
    # stop execution if file not specified
    if len(argv) == 0:
        raise Exception("No file specified")

    h = frequency.histogram(argv[1])
    # call sample
    print(sample(h))
def smpl_dstr(filename):
    words_histogram = frequency.histogram(filename)
    words_source = []
    for each_word in words_histogram:
        for i in range(0, frequency.frequency(each_word, words_histogram)):
            words_source.append(each_word)

    index = random.randint(0, len(words_source) - 1)
    return(words_source[index])
Ejemplo n.º 3
0
def gen_word():
    my_file = open("./words.txt", "r")
    lines = my_file.readlines()
    my_histogram = histogram(lines)

    sentence = ""
    num_words = 10
    # for i in range(num_words):
    #     word = sample(my_histogram)
    #     sentence += " " + word
    markovchain = MarkovChain(lines)
    sentence = markovchain.walk(num_words)
    return sentence
Ejemplo n.º 4
0
def generate_random_word(source_text):

    histogram_sample = histogram(source_text)

    count = 0

    random_num = random.randint(0, int(total_weight(histogram_sample)))

    for element in histogram_sample:

        random_num = random_num - element[1]

        if random_num <= 0:
            generated_word = element[0]
            return generated_word
            break

        else:

            continue
Ejemplo n.º 5
0
def median(inlist: List(float)) -> float:
    """
Returns the computed median value of a list of numbers, given the
number of bins to use for the histogram (more bins brings the computed value
closer to the median score, default number of bins = 1000).  See G.W.
Heiman's Basic Stats (1st Edition), or CRC Probability & Statistics.

Usage:   lmedian (inlist, numbins=1000)
"""
    numbins = 1000
    (hist, smallest, binsize,
     extras) = frequency.histogram(inlist, numbins,
                                   [min(inlist), max(inlist)])  # make histog
    cumhist = support.cumsum(hist)  # make cumulative histogram
    for i in range(len(cumhist)):  # get 1st(!) index holding 50%ile score
        if cumhist[i] >= len(inlist) / 2.0:
            cfbin = i
            break
    LRL = smallest + binsize * cfbin  # get lower read limit of that bin
    cfbelow = cumhist[cfbin - 1]
    freq = float(hist[cfbin])  # frequency IN the 50%ile bin
    _median = LRL + ((len(inlist) / 2.0 - cfbelow) /
                     float(freq)) * binsize  # median formula
    return _median
    length = len(hist)
    num = random.randint(0, length)
    for word in hist:
        num -= hist[word]
        if num <= 0:
            return word


def test_probability(hist):
    """Checks to see if the weighted_select has a
    non-uniform selection probability; returns a dictionary
    mapping keys to probabilities

    dictionary -> dictionary
    """
    keys = {}
    for key in hist.keys():
        keys[key] = 0
    for i in range(10000):
        key = weighted_select(hist)
        keys[key] += 1
    for key in keys:
        keys[key] = keys[key]/10000
    return keys

if __name__ == "__main__":
    args = sys.argv[1:]
    text = ' '.join(args)
    hist = histogram(text)
    print(test_probability(hist))
Ejemplo n.º 7
0
def main(argv):
    corpus = frequency.histogram(argv[1])
    for i in range(50):
        print(
            stochastic.sample(corpus) + "  " + stochastic.sample(corpus) +
            "  " + stochastic.sample(corpus))
Ejemplo n.º 8
0
    print(moment.describe(lf))

    print('\nFREQUENCY')
    print('freqtable:')
    print('itemfreq:')
    print(frequency.itemfreq(l))
    print(frequency.itemfreq(l))
    print('scoreatpercentile:', frequency.scoreatpercentile(l, 40),
          frequency.scoreatpercentile(lf, 40),
          frequency.scoreatpercentile(l, 40),
          frequency.scoreatpercentile(lf, 40))
    print('percentileofscore:', frequency.percentileofscore(l, 12),
          frequency.percentileofscore(lf, 12),
          frequency.percentileofscore(l, 12),
          frequency.percentileofscore(lf, 12))
    print('histogram:', frequency.histogram(l, 10, [0, max(l)]),
          frequency.histogram(l, 10, [0, max(l)]))
    print('cumfreq:')
    print(frequency.cumfreq(l))
    print(frequency.cumfreq(lf))
    print(frequency.cumfreq(l))
    print(frequency.cumfreq(lf))
    print('relfreq:')
    print(frequency.relfreq(l))
    print(frequency.relfreq(lf))
    print(frequency.relfreq(l))
    print(frequency.relfreq(lf))

    print('\nVARIATION')
    print('obrientransform:')