Ejemplo n.º 1
0
def  getClustersWithCat():
    feedback = ''
    str = ''
    try:
        category = request.args.get('category')

        str += 'got parameter category: %s\n' % category
        clusters = Cluster.query(Cluster.category == category).fetch()
        str += 'fetched the clusters'
        str = ''
        i = 1
        for c in clusters:
            str += 'cluster %d:\n' % i
            str += 'category: %s\n' % c.category


            for np in c.listNews:
                str += '\t%s\n' % np.title


            str += '\n'


            i+= 1

    except Exception as inst:
            feedback += 'Exception type: %s\n' % type(inst)
            feedback += 'Exception: %s\n' % inst.message

    str += feedback

    return Response(str, mimetype='text/plain')
Ejemplo n.º 2
0
def getClustersServer():

    category = request.args.get('category')

    str = ''

    clusters = Cluster.query(Cluster.category == category).fetch()

    str += 'Got category %s\n' % category

    if clusters is None:
        str = 'None clusters :/'
    else:
        i = 1

        str += 'Number of clusters %d\n' % len(clusters)

        for c in clusters:
            str += 'Cluster %d\n' % i

            for np in c.listNews:
                str += '\t%s\t%s\n' % (np.title, np.source_url)

            i += 1


    return Response(str, mimetype='text/plain')
Ejemplo n.º 3
0
def prepareClustering():
    logging.debug('preparing for refreshing..')
    ndb.delete_multi(Cluster.query().fetch(keys_only=True))

    getClusters()
    logging.debug('refreshed!')

    return ''
Ejemplo n.º 4
0
def getMYClusters():
    category = request.args.get('category')

    clusters = Cluster.query(Cluster.category == category).fetch()

    obj = {'listClusters' : [c.serialize() for c in clusters]}

    result = json.dumps(obj, ensure_ascii=True)

    return Response(result, mimetype='text/plain')
Ejemplo n.º 5
0
def getFilteredClustersDebug():
    reqParam = request.args.get('wantedSources')
    listWanted = []


    #vrednosta na parametarot wantedSources kje bide od oblik id1,id2


    if None != reqParam:
        listWanted = reqParam.split(',')

    # da se pretvorat vo integers
    for i in range(0, len(listWanted)):
        listWanted[i] = int(listWanted[i])

    result = 'Wanted sources: %s\n' % listWanted



    category = request.args.get('category')

    clusters = Cluster.query(Cluster.category == category).fetch()


    #tuka gi smestuvame samo klasterite shto se so filtrirani vesti
    # (ne zemame vesti od izvori sto korisnikot ne gi saka)
    clustersToReturn = []

    for c in clusters:
        listNews = []
        for np in c.listNews:
            if np.source_id in listWanted:
                listNews.append(np)
        #dokolku klasterot nema nitu edna vest posle filtriranjeto
        #ne go vkluchuvame bidejkji e prazen :)
        if len(listNews) > 0:
            c.listNews = listNews
            clustersToReturn.append(c)



    i = 1

    #debugging
    result += 'Number of clusters %d\n' % len(clusters)
    
    for c in clustersToReturn:
        result += 'Cluster %d\n' % i
        for np in c.listNews:
            result += '\t%s\t%s\n' % (np.title, np.source_url)
        i += 1



    return Response(result, mimetype='text/plain')
Ejemplo n.º 6
0
    def get_clusters(self):
        clusters = []

        with open(self.filename) as file:
            data = json.load(file)
            if data["clusters"]:
                clusters_json = data["clusters"]

                for cluster_json in clusters_json:
                    cluster = Cluster()
                    cluster.set_dimensions(self.get_dimensions())

                    if cluster_json["type"]:
                        type_found = False
                        for distribution in Distribution:
                            if distribution.name == cluster_json["type"]:
                                cluster.set_distribution(Distribution[distribution.name])
                                type_found = True
                        if not type_found:
                            print("Couldn't map distribution type. Defaulting to 'gaussian'")
                    else:
                        print("Couldn't find 'type' element for cluster")

                    if cluster_json["cardinality"]:
                        cardinality = int(cluster_json["cardinality"])
                        if 0 < cardinality <= 10:
                            cluster.set_cardinality(cardinality)
                        else:
                            print("Invalid cardinality. Has to be within 1 to 10. Defaulting to 5.")
                    else:
                        print("Couldn't find 'cardinality' element for cluster")

                    if cluster_json["density"]:
                        density = int(cluster_json["density"])
                        if 0 < density <= 10:
                            cluster.set_density(density)
                        else:
                            print("Invalid density. Has to be within 1 to 10. Defaulting to 5.")
                    else:
                        print("Couldn't find 'density' element for cluster")

                    clusters.append(cluster)
            else:
                print("Couldn't find 'clusters' element in input file")

        return clusters
Ejemplo n.º 7
0
def getMYNews():
    category = request.args.get('category')
    news = NewsPost.query().fetch()
    clusters = Cluster.query(Cluster.category == category).fetch()

    newNews = []

    for n in news:
        newObject = NewsPostClient(url = n.url,host_page = n.host_page,title = n.title, description = n.description)
        newNews.append(newObject)


    #result = str(byteify(newNews[0].serialize()))

    result = ''


    return Response(result, mimetype='application/javascript')
Ejemplo n.º 8
0
def getFilteredClusters():

    reqParam = request.args.get('wantedSources')

    listWanted = []

    if None != reqParam:
        listWanted = reqParam.split(',')


    for i in range(0, len(listWanted)):
        listWanted[i] = int(listWanted[i])

    category = request.args.get('category')

    clusters = Cluster.query(Cluster.category == category).fetch()

    clustersToReturn = []

    for c in clusters:
        listNews = []
        for np in c.listNews:
            if np.source_id in listWanted:
                listNews.append(np)

        if len(listNews) > 0:
            c.listNews = listNews
            clustersToReturn.append(c)


    # WE NEED TO SORT THE CLUSTERS BY SOME PARAMETER (MAYBE THE SIZE OF THE CLUSTER)

    obj = {'listClusters' : [c.serialize() for c in clustersToReturn]}

    result = json.dumps(obj, ensure_ascii=True)

    return Response(result, mimetype='text/plain')
Ejemplo n.º 9
0
def getClusters():
    feedback = ''
    str = ''
    try:
        newsPosts = crawler.take_all_news_posts()


        # utility dicts for majority voting with naive bayes

        fileToRead = open(naivebayes_classification.str_dict_word_in_cat)
        dict_words = Unpickler(fileToRead).load()
        fileToRead.close()

        fileToRead = open(naivebayes_classification.str_dict_cat_count)
        dict_cats = Unpickler(fileToRead).load()
        fileToRead.close()

        fileToRead = open(naivebayes_classification.str_dict_priors)
        dict_priors = Unpickler(fileToRead).load()
        fileToRead.close()

        feedback += 'took the newsposts \n'



        #return Response('%d' % counter, mimetype='text/plain')
        clusters, innerfeedback = clustering.cluster_news(newsPosts)

        feedback += '%s\n' % innerfeedback

        feedback += 'done the clustering\n'
        i = 0

        feedback += 'num of clusters: %d\n' % len(clusters)

        clusters = sorted(clusters, key=lambda x:-len(x.posts))
        for c in clusters:

            feedback += 'getting posts from cluster\n'
            newsInCluster = c.posts
            feedback += 'got the posts from cluster\n'

            str += 'cluster %d\n' % i

            #implementing the majority voting

            votes_cat = {}

            for np in newsInCluster:
                str += ' \t %s\n' % np.title
                category = test_classifications.get_NB_category(np.words,dict_words, dict_cats, dict_priors)
                votes_cat[category] = 1 + votes_cat.get(category, 0)

            maxVotes = 0
            maxCat = ''

            for cat in votes_cat:
                if votes_cat[cat] > maxVotes:
                    maxVotes = votes_cat[cat]
                    maxCat = cat

            feedback += '^^^ CLUSTER CATEGORY: %s with maxVotes: %d\n' % (maxCat, maxVotes)

            listNews = []

            feedback += ' number of posts in cluster %d\n' % len(c.posts)
            for np in  c.posts:

                feedback += 'trying to create NewsPostClient\n'
                feedback += 'title: %s \n' % np.title
                feedback += 'numWords: %d\n' % np.numWords
                feedback +=  'url: %s\n' % np.url

                newNews = NewsPostClient(url = np.url, host_page = np.host_page, title = np.title, numWords = np.numWords, source_id = np.source_id,
                                         source_url = np.source_url,
                                         img_url = np.img_url, description = np.description)

                feedback += 'created NewsPostClient'
                listNews.append(newNews)
                feedback += 'appended newNews\n'

            newCluster = Cluster(category = maxCat, listNews = listNews)
            newCluster.put()
            
            str += '\n'

            i += 1

        str += feedback
    except Exception as inst:
        feedback += 'Exception type: %s\n' % (type(inst))
        feedback += 'Exception: %s\n' % (inst.message)

    str += feedback
    return Response(str, mimetype='text/plain')
 def addCluster(self, r, x, y, v, theta, lambda0, color, is_point):
     self.clusters_list.append(
         Cluster(r, x, y, v, theta, lambda0, color, is_point))