コード例 #1
0
def get_cluster_coords(location, price, ptype, psize):

    # get clusters,listings etc
    clusters = get_data('clusters')
    #listings = get_data('listings')
    coordinates = cluster_coordinates(location, price, ptype, psize, clusters)

    return coordinates
コード例 #2
0
def get_cluster_ranking(location, price, ptype, psize, listing_id):

    # get clusters,listings etc
    clusters = get_data('clusters')
    #listings = get_data('listings')
    cl_rank, cl_average, cl_scores = user_ranking(location, price, ptype, psize, listing_id, clusters)

    return cl_rank, cl_average, cl_scores
コード例 #3
0
def get_wordcloud(cluster_id):

    # the dataframe wordcounts_df contains the most common 2 words associations
    # and associated counts for the clusters in label_list as well as
    # across all clusters (if the cluster label for listing_id is not in this
    # list, the wordcloud will be based on the word counts calculated across
    # all clusters)
    label_list = ['L:Westminster_P:very_cheap_S:room',\
                'L:Tower Hamlets_P:very_cheap_S:room',\
                'L:Tower Hamlets_P:cheap_S:room',\
                'L:Camden_P:very_cheap_S:room',\
                'L:Lambeth_P:cheap_S:room',\
                'L:Camden_P:cheap_S:room',\
                'L:Southwark_P:cheap_S:room',\
                'L:Hammersmith and Fulham_P:very_cheap_S:room',\
                'L:Southwark_P:very_cheap_S:room',\
                'L:Lambeth_P:very_cheap_S:room',\
                'L:Westminster_P:cheap_S:small',\
                'L:Hackney_P:cheap_S:room',\
                'L:Islington_P:very_cheap_S:room',\
                'L:Kensington and Chelsea_P:very_cheap_S:room',\
                'L:Westminster_P:cheap_S:room',\
                'L:Islington_P:cheap_S:room',\
                'L:Westminster_P:very_expensive_S:large',\
                'L:Hackney_P:very_cheap_S:room',\
                'L:Westminster_P:expensive_S:large',\
                'L:Westminster_P:average_S:small',\
                'L:Wandsworth_P:cheap_S:room']

    wordcounts_df = get_data('wordcount')
    # If cluster_id not in above list, set to 'All'. The wordcloud will be based
    # on the word associations calculated across all clusters
    if cluster_id not in label_list:
        cluster_id = 'All'

    # Extract word_counts from wordcl_df for cluster_id
    word_counts = wordcounts_df.loc[wordcounts_df['cluster'] == cluster_id]

    # Turn into right format for wordcloud and create wordcloud
    word_counts = pd.Series(word_counts['count'].to_list(),
                            index=word_counts['quotes'].to_list())
    wordcloud = WordCloud(
        background_color="white",
        collocation_threshold=5).generate_from_frequencies(word_counts)

    return wordcloud
コード例 #4
0
def listing_to_cluster(listing_id):
    clusters = get_data('clusters')
    cluster_id = clusters[clusters['listing_id']==listing_id].iloc[0]['cluster']

    return cluster_id
コード例 #5
0
ファイル: lib.py プロジェクト: sleepgopher/fivestar
 def __init__(self):
     self.listings = get_data()
     self.clusters = get_data('clusters')
     self.model = Model().load_model()
     self.build_cluster_info()