コード例 #1
0
def generate_bootstrap_histograms(data, title):
    """
    Generate histograms for the bootstrapped values.

    Parameters
    ----------
    data: dict, ex. {
                        'expert1': [ 1, 2, 1, 0, 0.5 ],
                        'expert2': [ 4, 5.5, 6, 4, 5 ]
                    }
    title: string, a title of what the distribution is. duh.
    """
    for expert, values in data.iteritems():
        ex_name = "".join(char for char in expert if char not in ".,")
        filename = title + "-" + ex_name
        filename = filename.strip().lower().replace(" ", "-")
        utils.histogram(
            data=values,
            filename="charts/fantasypros/{}.png".format(filename),
            title="{} - {}".format(title, expert),
            figsize=(10, 5),
            titlesize=26,
            xsize=26,
            xlim=(-3, 3),
            small=True,
        )
        confidence = np.percentile(values, q=[2.5, 50, 97.5])
        lower, mid, upper = [round(i, 2) for i in sorted(confidence)]
        msg = "95% {}: {} +/- {} (Lower: {} Mid: {} Upper: {})"
        print(msg.format(expert, mid, (mid - lower), lower, mid, upper))
コード例 #2
0
def histogram_matching(img, ref, bins=256):
    assert img.shape == ref.shape

    result = img.copy()
    h, w = img.shape
    pixels = h * w

    # histogram
    hist_img = histogram(img)
    hist_ref = histogram(ref)
    # cumulative histogram
    cum_img = cumulative_histogram(hist_img)
    cum_ref = cumulative_histogram(hist_ref)
    # normalization
    prob_img = cum_img / pixels
    prob_ref = cum_ref / pixels

    new_values = np.zeros(bins)
    for a in range(bins):
        j = bins - 1
        while True:
            new_values[a] = j
            j = j - 1

            if j < 0 or prob_img[a] >= prob_ref[j]:
                break

    for i in range(h):
        for j in range(w):
            a = img.item(i, j)
            b = new_values[a]
            result.itemset((i, j), b)

    return result
コード例 #3
0
def generate_bootstrap_histograms(data, title):
    """
    Generate histograms for the bootstrapped values.

    Parameters
    ----------
    data: dict, ex. {
                        'expert1': [ 1, 2, 1, 0, 0.5 ],
                        'expert2': [ 4, 5.5, 6, 4, 5 ]
                    }
    title: string, a title of what the distribution is. duh.
    """
    for expert, values in data.iteritems():
        ex_name = ''.join(char for char in expert if char not in '.,')
        filename = title + '-' + ex_name
        filename = filename.strip().lower().replace(' ', '-')
        utils.histogram(
            data=values,
            filename='charts/fantasypros/{}.png'.format(filename),
            title='{} - {}'.format(title, expert),
            figsize=(10,5),
            titlesize=26,
            xsize=26,
            xlim=(-3, 3),
            small=True
        )
        confidence = np.percentile(values, q=[2.5, 50, 97.5])
        lower, mid, upper = [round(i, 2) for i in sorted(confidence)]
        msg = '95% {}: {} +/- {} (Lower: {} Mid: {} Upper: {})'
        print(msg.format(expert, mid, (mid-lower), lower, mid, upper))
コード例 #4
0
ファイル: codec.py プロジェクト: zbanach/koda
    def encode(self, source):
        """Koduje wejściowy ciąg danych przy pomocy wykładniczego kodu Golomba.

        Argumenty:
            source (List[int]): ciąg liczb naturalnych do zakodowania

        Zwraca:
            BitStream: strumień bitowy zawierający ciąg słów kodowych oraz opcjonalnie
                nagłówek (przy pośrednim trybie pracy kodera).
        """
        stream = BitStream()
        self._source = source
        self._hist = histogram(source)
        # Utworzenie i zapisanie w nagłówku książki kodów (jeżeli wybrano tryb pośredni)
        if not self._direct:
            self._codebook = self._make_codebook(stream)
        header_len = len(stream)
        # Kodowanie danych źródłowych
        for word in source:
            self._encode_word(word, stream)
        # Obliczenie statystyk
        self._stream_len = len(stream)
        self._stream_data_len = len(stream) - header_len
        self._stats = Statistics(self)
        return stream
コード例 #5
0
def generate_error_histograms(df, column, title):
    """
    Generate actual error distributions for each expert.
    Plots the distribution of the given column.
    """
    for expert in df.EXPERT.unique().tolist():
        ex_name = "".join(char for char in expert if char not in ".,")
        filename = title + "-" + ex_name
        filename = filename.strip().lower().replace(" ", "-")
        utils.histogram(
            data=df[df.EXPERT == expert][column],
            filename="charts/fantasypros/{}.png".format(filename),
            title="{} - {}".format(title, expert),
            figsize=(10, 5),
            titlesize=26,
            xsize=26,
            xlim=(-40, 40),
            small=True,
        )
コード例 #6
0
def generate_error_histograms(df, column, title):
    """
    Generate actual error distributions for each expert.
    Plots the distribution of the given column.
    """
    for expert in df.EXPERT.unique().tolist():
        ex_name = ''.join(char for char in expert if char not in '.,')
        filename = title + '-' + ex_name
        filename = filename.strip().lower().replace(' ', '-')
        utils.histogram(
            data=df[ df.EXPERT == expert ][column],
            filename='charts/fantasypros/{}.png'.format(filename),
            title='{} - {}'.format(title, expert),
            figsize=(10,5),
            titlesize=26,
            xsize=26,
            xlim=(-40, 40),
            small=True
        )
コード例 #7
0
def frequency(data, column, n):
    counts = histogram(data[column].values)
    if len(counts) < n:
        n = len(counts)
    labels, x = unzip(counts)
    _, ax = plt.subplots(figsize=(10, 4))
    y = list(range(n))
    ax.barh(y, x[-n:])
    plt.yticks(y, tuple(labels[-n:]), fontsize=7)
    plt.tight_layout()
    plt.savefig("pngs/{}_frequency.png".format(column))
    plt.close()
    return counts, n
コード例 #8
0
ファイル: movierank.py プロジェクト: biern/movierank
def main():
    logging.basicConfig(level=logging.DEBUG)
    # Parser and args
    parser = create_parser()
    args = parser.parse_args()
    # Setup resources and dirs
    dest = open(args.out, 'w')
    res_dir = os.path.split(os.path.abspath(__file__))[0]
    template = open(os.path.join(res_dir, 'template.html'), 'r').read()
    output = HTMLOutput(dest, template)
    cache_dir = os.path.split(args.out)[0]
    # Use cache
    dbs = [FilmwebDatabase()]
    if not args.force:
        cache = load_cache(cache_dir, args.out)
        if cache:
            logging.info("using cache file")
            dbs = cache

    # Get movies
    movies = find_movies_info(args.dirs, dbs, output, '-rating')

    # Histogram?
    if args.histogram:
        path = os.path.join(cache_dir, '.movierank-histogram.png')
        histogram(movies, path)
        output.add_extra('histogram', path)

    # Finish
    store_cache(cache_dir, dbs, suffix=args.out)
    output.flush()

    # Run browser?
    if args.run:
        subprocess.Popen(["xdg-open", args.out],
                         stderr=subprocess.STDOUT,
                         stdout=subprocess.PIPE)
コード例 #9
0
ファイル: movierank.py プロジェクト: biern/movierank
def main():
    logging.basicConfig(level=logging.DEBUG)
    # Parser and args
    parser = create_parser()
    args = parser.parse_args()
    # Setup resources and dirs
    dest = open(args.out, 'w')
    res_dir = os.path.split(os.path.abspath(__file__))[0]
    template = open(os.path.join(res_dir, 'template.html'), 'r').read()
    output = HTMLOutput(dest, template)
    cache_dir = os.path.split(args.out)[0]
    # Use cache
    dbs = [FilmwebDatabase()]
    if not args.force:
        cache = load_cache(cache_dir, args.out)
        if cache:
            logging.info("using cache file")
            dbs = cache

    # Get movies
    movies = find_movies_info(args.dirs, dbs, output, '-rating')

    # Histogram?
    if args.histogram:
        path = os.path.join(cache_dir, '.movierank-histogram.png')
        histogram(movies, path)
        output.add_extra('histogram', path)

    # Finish
    store_cache(cache_dir, dbs, suffix=args.out)
    output.flush()

    # Run browser?
    if args.run:
        subprocess.Popen(["xdg-open", args.out],
                         stderr=subprocess.STDOUT,
                         stdout=subprocess.PIPE)
コード例 #10
0
def main():
    args = parser.parse_args()

    with open(args.data, 'r') as f:
        data_serial = f.read()

    data_json = json.loads(data_serial)

    # tag => concatenated articles
    tagged_corpus_by_articles = defaultdict(lambda: [])

    for example in data_json:
        tag = re.sub('\s', '_', example['tag']).lower()
        tagged_corpus_by_articles[tag].append(example['content'])

    tagged_corpus = {
        tag: histogram(' '.join(articles).split())
        for tag, articles in tagged_corpus_by_articles.iteritems()
    }

    if not args.output:
        args.output = os.path.join(os.path.dirname(args.data), 'classifier')

    if not os.path.isdir(args.output):
        os.makedirs(args.output)

    vocab = set()

    for tag, card in tagged_corpus.iteritems():
        filepath = os.path.join(args.output, tag)
        vocab |= set(card.keys())
        with open(filepath, 'w+') as f:
            for w, c in sorted(card.iteritems(),
                               key=lambda (w, c): c,
                               reverse=True):
                print >> f, "{} {}".format(c, w)
            print >> f

    with open(os.path.join(args.output, parameters.PRIORS_FILE), 'w+') as f:
        for tag, articles in tagged_corpus_by_articles.iteritems():
            print >> f, "{} {}".format(len(articles), tag)

    with open(os.path.join(args.output, parameters.VOCAB_FILE), 'w+') as f:
        for w in sorted(vocab):
            print >> f, w
コード例 #11
0
ファイル: codec.py プロジェクト: zbanach/koda
 def __init__(self, codec=None):
     if codec:
         self._source_len = len(codec._source)
         self._entropy = entropy(codec._source)
         self._hist = codec._hist if codec._hist else histogram(codec._source)
         self._symbol_size = int(math.ceil(math.log(max(self._hist.keys()) or 1, 2)))
         self._cr = float(self._source_len) * self._symbol_size / codec._stream_len
         self._mean_code_len = float(codec._stream_data_len) / self._source_len
         self._source_size = self._symbol_size * self._source_len
         self._stream_size = codec._stream_len
     else:
         self._source_len = 0
         self._entropy = 0
         self._hist = {}
         self._cr = 0
         self._mean_code_len = 0
         self._symbol_len = 0
         self._source_size = 0
         self._stream_size = 0
コード例 #12
0
def main():
    args = parser.parse_args()

    with open(args.data, 'r') as f:
        data_serial = f.read()

    data_json = json.loads(data_serial)

    # tag => concatenated articles
    tagged_corpus_by_articles = defaultdict(lambda: [])

    for example in data_json:
        tag = re.sub('\s', '_', example['tag']).lower()
        tagged_corpus_by_articles[tag].append(example['content'])

    tagged_corpus = {tag: histogram(' '.join(articles).split()) for tag, articles in tagged_corpus_by_articles.iteritems()}

    if not args.output:
        args.output = os.path.join(os.path.dirname(args.data), 'classifier')

    if not os.path.isdir(args.output):
        os.makedirs(args.output)

    vocab = set()

    for tag, card in tagged_corpus.iteritems():
        filepath = os.path.join(args.output, tag)
        vocab |= set(card.keys())
        with open(filepath, 'w+') as f:
            for w, c in sorted(card.iteritems(), key=lambda (w, c): c, reverse=True):
                print >> f, "{} {}".format(c, w)
            print >> f

    with open(os.path.join(args.output, parameters.PRIORS_FILE), 'w+') as f:
        for tag, articles in tagged_corpus_by_articles.iteritems():
            print >> f, "{} {}".format(len(articles), tag)

    with open(os.path.join(args.output, parameters.VOCAB_FILE), 'w+') as f:
        for w in sorted(vocab):
            print >> f, w
コード例 #13
0
    def get_shrunk_channels(self, src):
        shrink = self.options["shrink"]
        n_orient = self.options["n_orient"]
        grd_smooth_rad = self.options["grd_smooth_rad"]
        grd_norm_rad = self.options["grd_norm_rad"]

        luv = rgb2luv(src)
        size = (luv.shape[0] / shrink, luv.shape[1] / shrink)
        channels = [resize(luv, size)]

        for scale in [1.0, 0.5]:
            img = resize(luv, (luv.shape[0] * scale, luv.shape[1] * scale))
            img = conv_tri(img, grd_smooth_rad)

            magnitude, orientation = gradient(img, grd_norm_rad)

            downscale = max(1, int(shrink * scale))
            hist = histogram(magnitude, orientation, downscale, n_orient)

            channels.append(resize(magnitude, size)[:, :, None])
            channels.append(resize(hist, size))

        channels = N.concatenate(channels, axis=2)

        reg_smooth_rad = self.options["reg_smooth_rad"] / float(shrink)
        ss_smooth_rad = self.options["ss_smooth_rad"] / float(shrink)

        if reg_smooth_rad > 1.0:
            reg_ch = conv_tri(channels, int(round(reg_smooth_rad)))
        else:
            reg_ch = conv_tri(channels, reg_smooth_rad)

        if ss_smooth_rad > 1.0:
            ss_ch = conv_tri(channels, int(round(ss_smooth_rad)))
        else:
            ss_ch = conv_tri(channels, ss_smooth_rad)

        return reg_ch, ss_ch
コード例 #14
0
    def train(self, data):
        self._processor.process_examples(data)

        articles_per_tag = defaultdict(lambda: [])
        for example in data:
            tag = self.normalize_tag_label(example['tag'])
            if tag in self.IGNORE_TAGS:
                continue
            articles_per_tag[tag].append(example['tokens'])

        self._ntokens_per_tag = {
            tag: histogram(token for article in articles for token in article)
            for tag, articles in articles_per_tag.iteritems()
        }
        self._ndocs_per_tag = {
            tag: len(articles)
            for tag, articles in articles_per_tag.iteritems()
        }
        self._ndocs = sum(self._ndocs_per_tag.values())
        self._vocab = set(t
                          for tag, tokens in self._ntokens_per_tag.iteritems()
                          for t in tokens.keys())
        self._tags = list(self._ntokens_per_tag.keys())
        self._weights = self._compute_weights()

        for tag, tokens in self._ntokens_per_tag.iteritems():
            total = sum(tokens.values())
            with open(
                    '/Users/bernardorufino/pastebin/classifier/{}.dat'.format(
                        tag), 'w') as f:
                for token, n in sorted(tokens.iteritems(),
                                       key=lambda (t, n): n,
                                       reverse=True):
                    f.write("{:<14} {:<5} {:<5.2f} {:<5.2f}\n".format(
                        token, n,
                        float(n) / total, self._weights[token]))
                f.write('\n')
コード例 #15
0
    def train(self, data):
        self._processor.process_examples(data)

        articles_per_tag = defaultdict(lambda: [])
        for example in data:
            tag = self.normalize_tag_label(example['tag'])
            if tag in self.IGNORE_TAGS:
                continue
            articles_per_tag[tag].append(example['tokens'])

        self._ntokens_per_tag = {tag: histogram(token for article in articles for token in article)
                                 for tag, articles in articles_per_tag.iteritems()}
        self._ndocs_per_tag = {tag: len(articles) for tag, articles in articles_per_tag.iteritems()}
        self._ndocs = sum(self._ndocs_per_tag.values())
        self._vocab = set(t for tag, tokens in self._ntokens_per_tag.iteritems() for t in tokens.keys())
        self._tags = list(self._ntokens_per_tag.keys())
        self._weights = self._compute_weights()

        for tag, tokens in self._ntokens_per_tag.iteritems():
            total = sum(tokens.values())
            with open('/Users/bernardorufino/pastebin/classifier/{}.dat'.format(tag), 'w') as f:
                for token, n in sorted(tokens.iteritems(), key=lambda (t, n): n, reverse=True):
                    f.write("{:<14} {:<5} {:<5.2f} {:<5.2f}\n".format(token, n, float(n) / total, self._weights[token]))
                f.write('\n')
コード例 #16
0
def dram_multiple_contours(img, contours, max_contours=10, approximate=False):
    # draw in blue the contours that were founded
    image_entropy = img.copy()
    cv2.drawContours(img, contours, -1, 255, 3)

    # find the biggest countour (c) by the area
    c = sorted(contours, key=cv2.contourArea, reverse=True)

    # draw the biggest contour (c) in green
    overlap_area = np.zeros((max_contours, 4))
    for i in range(max_contours):
        x, y, w, h = cv2.boundingRect(c[i])

        entropy_computed = (entropy(
            histogram(crop_image(image_entropy, (x, y, w, h)))))
        print(overlap_area)

        if entropy_computed > 7:
            if not overlap(overlap_area, (x, y, w, h), i):
                print(overlap(overlap_area, (x, y, w, h), i))
                cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)

                print(x, y, w, h)
                overlap_area[i, :] = x, y, w, h
コード例 #17
0
    for C in C_list_log:
        clf = SVM.gaussian_kernel(label, data, 0.0, 80, 10**C)
        free_SV, free_SV_coef = SVM.free_SV(clf, 10**C)
        SV = SVM.get_SV(clf)
        SV_coef = SVM.get_dual_coef(clf)
        dis = SVM.cal_dis(SV, SV_coef[0], free_SV[0])
        dis_list.append(dis)
    utils.curve(C_list_log, dis_list, '14.png', 'log(C)', 'dis')
    
    # question 15
    gamma_list = [0, 1, 2, 3, 4]
    C = 0.1
    E_out_list = []
    for gamma in gamma_list:
        clf = SVM.gaussian_kernel(label, data, 0.0, 10**gamma, C)
        E_out_list.append(SVM.error_0_1(utils.which_binary(test_label, 0), test_data, clf))
    utils.curve(gamma_list, E_out_list, '15.png', 'log(gamma)', 'E_out')

    # question 16
    C = 0.1
    gamma_list = [-1, 0, 1, 2, 3]
    gamma_pick = [0, 0, 0, 0, 0]
    for i in xrange(100):
        val_label, val_data, train_label, train_data = utils.split_data(label, data, 1000)
        E_val_list = []
        for gamma in gamma_list:
            clf = SVM.gaussian_kernel(train_label, train_data, 0.0, 10**gamma, C)
            E_val_list.append(SVM.error_0_1(utils.which_binary(val_label, 0), val_data, clf))
        gamma_pick[E_val_list.index(max(E_val_list))] += 1
    utils.histogram(gamma_list, gamma_pick, '16.png', 'log(gamma)', '#selected')
コード例 #18
0
 def processInputData(self, *args):
     image, label = super(GramHistoResizeModel,
                          self).processInputData(*args)
     return image, histogram(image), label
コード例 #19
0
train_lh, train_prior = naive_bayes.naive_bayes(train_data)

#=====================================
# 2j. plot and predict the movies
#
movies = ['Finding Nemo', 'The Matrix', 'Gone with the Wind', 'Harry Potter and the Goblet of Fire', 'Avatar']
test_movies = findMovie(all_movies, movies)
for tm in test_movies:
    predicted_y = naive_bayes.predict(train_lh, train_prior, utils.bags(tm['summary']))
    minY, maxY = naive_bayes.findMinMaxY(predicted_y)
    x = []
    y = []
    for year in predicted_y:
        x.append(year)
        y.append(predicted_y[year]+abs(predicted_y[minY]))
    utils.histogram(x, y, 'Decade', 'Posterior Probability', tm['title']+' ('+str(tm['year'])+') Histogram of Posterior Probability for each decade')
    print tm['title']+' is done.', 'Predicted decade '+str(maxY), 'Real decade '+str(tm['year'])

#======================================
# 2k. Accuracy measurement
#
accuracy = 0
for d in test_data:
    predicted_y = naive_bayes.predict(train_lh, train_prior, utils.bags(d['summary']))
    minY, maxY = naive_bayes.findMinMaxY(predicted_y)
    if maxY == d['year']:
        accuracy += 1

accuracy /= float(len(test_data))
print 'The accuracy of the model on test data is ', accuracy
コード例 #20
0
ファイル: main.py プロジェクト: julianx/histograms
        3.03, 1.79, 0.78, 0.82, 0.00, 0.92, 0.69, 1.07, 2.26, 0.61, 0.62, 0.00, 1.10, 0.86, 1.17, 0.48, 1.09, 0.53,
        0.94, 0.63, 0.63, 0.86, 0.68, 0.63, 0.49, 0.44, 0.33, 0.28, 0.36, 0.99, 0.49, 0.53, 0.65, 0.49, 0.73, 0.48,
        0.40, 0.90, 0.80, 0.52, 0.67, 0.94, 0.89, 0.69, 0.62, 0.84, 0.29, 0.51, 0.75, 0.52, 0.99, 0.30, 0.36, 0.48,
        0.48, 0.31, 0.38, 0.33, 0.35, 0.50, 1.31, 0.34, 0.43, 0.52, 0.32, 0.56, 0.62, 0.56, 0.79, 0.30, 0.53, 0.36,
        0.47, 0.33, 0.50, 0.63, 0.65, 0.49, 0.42, 0.34, 0.45, 0.53, 5.17, 0.63, 0.61, 0.65, 0.39, 0.53, 0.73, 0.39,
        0.39, 0.29, 0.29, 0.28, 0.47, 0.36, 0.86, 0.53, 0.50, 0.29, 0.45, 0.49, 0.44, 0.25, 0.31, 0.40, 0.63, 0.26,
        0.71, 0.58, 0.57, 0.41, 0.53, 1.16, 0.32, 0.14, 0.15, 0.23, 0.10, 0.15, 1.20, 0.52
)

# %%
# Campaign 0 (Stage 1) Stamps
# Campaign 1 (Stage 2) Pages and stamps <<<
# Campaign 2 (Stage 3) Only pages
# Campaign 4 (Stage 5) Pages and stamps <<<

utils.histogram(data=stage2b, x_label='Time (mins)', y_label='Frequency',
                title='Labeling for Stage2 - Time per image - Reproduced results', bins='auto', counter=0)

utils.histogram(data=stage3b, x_label='Time (mins)', y_label='Frequency',
                title='Labeling for Stage3 - Time per image - Reproduced results', bins='auto', counter=0)

utils.histogram(data=stage4b, x_label='Time (mins)', y_label='Frequency',
                title='Labeling for Campaign4 - Time per image', bins='auto', counter=0)

# %%

utils.histogram(data=stage2_labels, x_label='Time (mins)', y_label='Frequency',
                title='Labeling for Stage2 - Time per label', bins='auto', counter=0)

utils.histogram(data=stage3_labels, x_label='Time (mins)', y_label='Frequency',
                title='Labeling for Stage3 - Time per label', bins='auto', counter=0)
コード例 #21
0
all_movies = list(
    parse_movies.load_all_movies(os.path.join(config.baseDir,
                                              config.data_file)))

#==============================================
# 2a. PMF of P(Y)
#==============================================
pmf, data_year = cal_pmf(all_movies)
n = len(data_year)
x = []
y = []
for year, amount in pmf.iteritems():
    x.append(year)
    y.append(float(amount) / float(n))
utils.histogram(x, y, 'Decade', 'PMF', 'PMF of P(Y)')
print 'PMF of P(Y) done'

#==============================================
# 2b. PMF of P(Y|X"radio">0)
#==============================================
pmf, data_year = cal_pmf(all_movies, 'radio')
n = len(data_year)
x = []
y = []
for year, amount in pmf.iteritems():
    x.append(year)
    y.append(float(amount) / float(n))
utils.histogram(x, y, 'Decade', 'PMF', 'PMF of P(Y|X"radio">0)')
print 'PMF of P(Y|X"radio">0) done'
コード例 #22
0
Created on 7 mar. 2017

Generates the histograms needed in the task
@author: jorge
'''
from utils import histogram
import networkx as nx
import matplotlib.pyplot as plt

path='A1-networks/'
files=['model/ER1000k8.net', 'model/SF_1000_g2.7.net', 'model/ws1000.net', 'real/airports_UW.net']
names=['ER1000k8', 'SF_1000_g2.7', 'ws1000', 'airports_UW']

for i in range(len(files)):
    G=nx.read_pajek(path+files[i])
    plt=histogram(G, log=True, norm=True, n=10)
    plt.title('Log histogram for '+names[i])
    plt.savefig('log_'+names[i]+'.png')
    plt.clf()
    plt=histogram(G, log=False, norm=True, n=10)
    plt.title('Normed histogram for '+ names[i])
    plt.savefig('norm_'+names[i]+'.png')
    plt.clf()
    plt=histogram(G, log=True, norm=True,cumu=-1, n=10)
    plt.title('Cumulative log histogram for '+names[i])
    plt.savefig('Cumu_log_'+names[i]+'.png')
    plt.clf()
    plt=histogram(G, log=False, norm=True,cumu=-1, n=10)
    plt.title('Cumulative normed histogram for '+ names[i])
    plt.savefig('Cumu_norm_'+names[i]+'.png')
    plt.clf()
コード例 #23
0
movies = [
    'Finding Nemo', 'The Matrix', 'Gone with the Wind',
    'Harry Potter and the Goblet of Fire', 'Avatar'
]
test_movies = findMovie(all_movies, movies)
for tm in test_movies:
    predicted_y = naive_bayes.predict(train_lh, train_prior,
                                      utils.bags(tm['summary']))
    minY, maxY = naive_bayes.findMinMaxY(predicted_y)
    x = []
    y = []
    for year in predicted_y:
        x.append(year)
        y.append(predicted_y[year] + abs(predicted_y[minY]))
    utils.histogram(
        x, y, 'Decade', 'Posterior Probability',
        tm['title'] + ' (' + str(tm['year']) +
        ') Histogram of Posterior Probability for each decade')
    print tm['title'] + ' is done.', 'Predicted decade ' + str(
        maxY), 'Real decade ' + str(tm['year'])

#======================================
# 2k. Accuracy measurement
#
accuracy = 0
for d in test_data:
    predicted_y = naive_bayes.predict(train_lh, train_prior,
                                      utils.bags(d['summary']))
    minY, maxY = naive_bayes.findMinMaxY(predicted_y)
    if maxY == d['year']:
        accuracy += 1