예제 #1
0
def get_connector(textContent):
    if len(textContent) == 3:
        text, tension, has_descr = textContent
    else:
        text, tension = textContent
    # add connector according to available description
    if tension == '5.0':
        # prepend the connector
        text = rand(CLIMAX_CONNECTORS) + text + rand(CLIMAX_FINALISERS)
    elif '.' not in text:
        text = text + '. '
    return (text, tension)
예제 #2
0
def get_connector(textContent):
    if len(textContent) == 3:
        text, tension, has_descr = textContent
    else:
        text, tension = textContent
    # add connector according to available description
    if tension == '5.0':
        # prepend the connector
        text = rand(CLIMAX_CONNECTORS) + text + rand(CLIMAX_FINALISERS)
    elif '.' not in text:
        text = text + '. '
    return (text, tension)
예제 #3
0
def new_centroids(data,centroids,clusters,clusters_id,num_of_iterations,num_of_clusters,iteration_count):
    #new_centroid = [[float(0) for _ in range(centroids.shape[1])] for _ in range(num_of_clusters)]
    new_centroid = []

    for i in range(len(clusters)):
        val = np.array(clusters[i])
        new_centroid.append((np.sum(val,0))/len(clusters[i]))
        
    new_centroid = np.array(new_centroid)
    sums = np.sum(np.array(centroids)-np.array(new_centroid))
    d = dict()
    if(sums==0 or iteration_count==iterations):
        print("Converged")
        for x in range(len(clusters_id)):
            for y in range(len(clusters_id[x])):
                d[clusters_id[x][y]] = x+1
        
        vals = [d[x] for x in sorted(d.keys())]
        ground_truth = list(map(int,data[:,1]))

        print("Jaccard")
        ja = helpers.jaccard(ground_truth,vals)
        print(ja)
        
        print("Rand index")
        rd = helpers.rand(ground_truth,vals)
        print(rd)
        
        unique_predicted = list(set(vals))
        new_x = helpers.pca(data[:,2:])
        helpers.scatter(new_x[:,0],new_x[:,1],vals,unique_predicted,"K-means Algorithm","iyer.txt")
    else:
        kmeans(data,new_centroid,iterations,no_cluster,iteration_count)
예제 #4
0
def get_location_by_tension(tension):
    '''
    Get location dict by exact tension level

    Input: integer tension
    Output: list location(s)
    '''
    locations_on_tension = find_by_attribute(LOCATIONS, "Tension", tension)
    return rand(locations_on_tension)
예제 #5
0
def add_location_description(loc):
    '''
    Adds a short sentence after location identifier which describes
    the location.
    Let's hope this gives a more realistic feel to the text!

    Input: dict or String location
    Output: String sentence
    '''
    if type(loc) == str:
        loc = get_location_by_name(loc)
    ambience = loc['Ambience']
    amb = rand(ambience)
    tension = [float(t) for t in loc['Sentiment']]
    # attempt to get an appropriate ambience for the location
    while True:
        amb = rand(ambience)
        if (abs(sum(tension) - sum(sentiment(amb))) < 1):
            return add_sentence(loc['Location'], amb, nutsness=NUTSNESS)
예제 #6
0
def add_location_props(loc):
    '''
    Returns partial sentence of location props.

    Input: dict or String location
    Output: String " amongst the X" with X a random prop
    '''
    if type(loc) == str:
        loc = get_location_by_name(loc)
    props = loc['Props']
    if props == [''] or props == '':
        return ''
    else:
        prop = rand(props)
        return ' amongst the {}'.format(pluralize(prop))
예제 #7
0
def new_centroids(reducedSpace, centroids, clusters, clusters_id,
                  num_of_iterations, num_clusters, iteration_count):
    new_centroid = []

    for i in range(len(clusters)):
        val = np.array(clusters[i])
        new_centroid.append((np.sum(val, 0)) / len(clusters[i]))

    new_centroid = np.array(new_centroid)
    sums = np.sum(np.array(centroids) - np.array(new_centroid))
    # sums = 0
    d = dict()
    if (sums == 0 or iteration_count == iterations):
        print("Converged")
        for x in range(len(clusters_id)):
            for y in range(len(clusters_id[x])):
                d[clusters_id[x][y]] = x + 1

        vals = [d[x] for x in sorted(d.keys())]
        vals = np.array(vals)
        print(vals)
        print(set(vals))
        # ground_truth = list(map(int,GeneExpressions[:,1]))

        print("Jaccard")
        ja = helpers.jaccard(Groundtruth, vals)
        print(ja)

        print("Rand index")
        rd = helpers.rand(Groundtruth, vals)
        print(rd)

        unique_predicted = list(set(vals))
        new_x = helpers.pca(GeneExpressions)
        helpers.scatter(new_x[:, 0], new_x[:, 1], vals, unique_predicted)

    else:
        kmeans(reducedSpace, new_centroid, iterations, num_clusters,
               iteration_count)
예제 #8
0
# 2. Declare any specific inputs to the program and call the algorithm
epsilon = float(input("Enter epsilon value: "))

min_pts = float(input("Enter min_pts value: "))

# 3. Perform DBSCAN
model = __dbscan.DBSCAN(X, epsilon, min_pts)
predicted = model.fit()
unique, counts = np.unique(predicted, return_counts=True)
print("Counts by cluster:")
for key, value in zip(unique, counts):
    print("{}: {}".format(key, value))

# 4. Find Rand index and Jaccard

rand_score = helpers.rand(y, predicted)
jaccard_score = helpers.jaccard(y, predicted)
unique_predicted = list(set(predicted))
print(predicted)
print(rand_score)
print(jaccard_score)

# print(adjusted_rand_score(y, predicted))
# print(jaccard_similarity_score(y, predicted))

# 5. Visualize using PCA
new_X = X
if X.shape[1] > 2:
    new_X = helpers.pca(X)

helpers.scatter(new_X[:, 0], new_X[:, 1], predicted, unique_predicted,
예제 #9
0
def get_location_at_random():
    location = rand(find_by_attribute(LOCATIONS, "Preposition", "in"))
    return location
예제 #10
0
def make_characters(n):
    '''Get n characters'''
    chars = []
    for i in range(n):
        chars.append(rand(NOC)['Character'])
    return chars
# Testing data
#distMatrix = [[0.00,0.71,5.66,3.61,4.24,3.20],[0.71,0.00,4.95,2.92,3.54,2.50],[5.66,4.95,0.00,2.24,1.41,2.50],[3.61,2.92,2.24,0.00,1.00,0.50],[4.24,3.54,1.41,1.00,0.00,1.12],[3.20,2.50,2.50,0.50,1.12,0.00]]
#rowsnumbers = [[0],[1],[2],[3],[4],[5]]
distMatrix = np.array(distMatrix)
#print(distMatrix)
while (len(distMatrix) >= 2):
    if (len(rowsnumbers) == k):
        break
    distMatrix, rowsnumbers = updateDistMatrix(distMatrix, rowsnumbers)
print(rowsnumbers)

clusterassignments = {}

cluster = 1
for i in rowsnumbers:
    for j in i:
        clusterassignments[j + 1] = cluster
    cluster += 1
sorted(clusterassignments)
sorted(Groundtruth)

predicted = list(clusterassignments.values())
unique_predicted = list(set(predicted))
Ground = list(Groundtruth.values())
print(h.jaccard(Ground, predicted))
print(h.rand(Ground, predicted))

new_X = h.pca(GeneExpressions)
h.scatter(new_X[:, 0], new_X[:, 1], predicted, unique_predicted)