def getCategoriesNB():

    response = ''
    feedback = ''
    try:
        newsPosts = crawler.take_all_news_posts()


        fileToRead = open(naivebayes_classification.str_dict_word_in_cat)
        dict_words = Unpickler(fileToRead).load()
        fileToRead.close()

        fileToRead = open(naivebayes_classification.str_dict_cat_count)
        dict_cats = Unpickler(fileToRead).load()
        fileToRead.close()

        fileToRead = open(naivebayes_classification.str_dict_priors)
        dict_priors = Unpickler(fileToRead).load()
        fileToRead.close()


        dict_results = {}

        for np in newsPosts:
            #words, dict_words, dict_cats, dict_priors
            category = test_classifications.get_NB_category(np.words, dict_words, dict_cats, dict_priors)

            dict_results.setdefault(category, [])
            dict_results[category].append(np)

        response += 'number of documents: %d\n' % (len(newsPosts))
        for cat in dict_results:
            response += '%s\t\t%d\n' % (cat, len(dict_results.get(cat, [])))

        for cat in dict_results:
            response += '%s\n' % cat
            for np in dict_results[cat]:
                response += '\t%s\n' % np.title
            response += '\n'


    except Exception as inst:
        feedback += 'Exception type: %s\n' % type(inst)
        feedback += 'Exception: %s\n' % inst.message

    response += feedback
    return Response(response, mimetype='text/plain')
def getClusters():
    feedback = ''
    str = ''
    try:
        newsPosts = crawler.take_all_news_posts()


        # utility dicts for majority voting with naive bayes

        fileToRead = open(naivebayes_classification.str_dict_word_in_cat)
        dict_words = Unpickler(fileToRead).load()
        fileToRead.close()

        fileToRead = open(naivebayes_classification.str_dict_cat_count)
        dict_cats = Unpickler(fileToRead).load()
        fileToRead.close()

        fileToRead = open(naivebayes_classification.str_dict_priors)
        dict_priors = Unpickler(fileToRead).load()
        fileToRead.close()

        feedback += 'took the newsposts \n'



        #return Response('%d' % counter, mimetype='text/plain')
        clusters, innerfeedback = clustering.cluster_news(newsPosts)

        feedback += '%s\n' % innerfeedback

        feedback += 'done the clustering\n'
        i = 0

        feedback += 'num of clusters: %d\n' % len(clusters)

        clusters = sorted(clusters, key=lambda x:-len(x.posts))
        for c in clusters:

            feedback += 'getting posts from cluster\n'
            newsInCluster = c.posts
            feedback += 'got the posts from cluster\n'

            str += 'cluster %d\n' % i

            #implementing the majority voting

            votes_cat = {}

            for np in newsInCluster:
                str += ' \t %s\n' % np.title
                category = test_classifications.get_NB_category(np.words,dict_words, dict_cats, dict_priors)
                votes_cat[category] = 1 + votes_cat.get(category, 0)

            maxVotes = 0
            maxCat = ''

            for cat in votes_cat:
                if votes_cat[cat] > maxVotes:
                    maxVotes = votes_cat[cat]
                    maxCat = cat

            feedback += '^^^ CLUSTER CATEGORY: %s with maxVotes: %d\n' % (maxCat, maxVotes)

            listNews = []

            feedback += ' number of posts in cluster %d\n' % len(c.posts)
            for np in  c.posts:

                feedback += 'trying to create NewsPostClient\n'
                feedback += 'title: %s \n' % np.title
                feedback += 'numWords: %d\n' % np.numWords
                feedback +=  'url: %s\n' % np.url

                newNews = NewsPostClient(url = np.url, host_page = np.host_page, title = np.title, numWords = np.numWords, source_id = np.source_id,
                                         source_url = np.source_url,
                                         img_url = np.img_url, description = np.description)

                feedback += 'created NewsPostClient'
                listNews.append(newNews)
                feedback += 'appended newNews\n'

            newCluster = Cluster(category = maxCat, listNews = listNews)
            newCluster.put()
            
            str += '\n'

            i += 1

        str += feedback
    except Exception as inst:
        feedback += 'Exception type: %s\n' % (type(inst))
        feedback += 'Exception: %s\n' % (inst.message)

    str += feedback
    return Response(str, mimetype='text/plain')