def get_connector(textContent): if len(textContent) == 3: text, tension, has_descr = textContent else: text, tension = textContent # add connector according to available description if tension == '5.0': # prepend the connector text = rand(CLIMAX_CONNECTORS) + text + rand(CLIMAX_FINALISERS) elif '.' not in text: text = text + '. ' return (text, tension)
def new_centroids(data,centroids,clusters,clusters_id,num_of_iterations,num_of_clusters,iteration_count): #new_centroid = [[float(0) for _ in range(centroids.shape[1])] for _ in range(num_of_clusters)] new_centroid = [] for i in range(len(clusters)): val = np.array(clusters[i]) new_centroid.append((np.sum(val,0))/len(clusters[i])) new_centroid = np.array(new_centroid) sums = np.sum(np.array(centroids)-np.array(new_centroid)) d = dict() if(sums==0 or iteration_count==iterations): print("Converged") for x in range(len(clusters_id)): for y in range(len(clusters_id[x])): d[clusters_id[x][y]] = x+1 vals = [d[x] for x in sorted(d.keys())] ground_truth = list(map(int,data[:,1])) print("Jaccard") ja = helpers.jaccard(ground_truth,vals) print(ja) print("Rand index") rd = helpers.rand(ground_truth,vals) print(rd) unique_predicted = list(set(vals)) new_x = helpers.pca(data[:,2:]) helpers.scatter(new_x[:,0],new_x[:,1],vals,unique_predicted,"K-means Algorithm","iyer.txt") else: kmeans(data,new_centroid,iterations,no_cluster,iteration_count)
def get_location_by_tension(tension): ''' Get location dict by exact tension level Input: integer tension Output: list location(s) ''' locations_on_tension = find_by_attribute(LOCATIONS, "Tension", tension) return rand(locations_on_tension)
def add_location_description(loc): ''' Adds a short sentence after location identifier which describes the location. Let's hope this gives a more realistic feel to the text! Input: dict or String location Output: String sentence ''' if type(loc) == str: loc = get_location_by_name(loc) ambience = loc['Ambience'] amb = rand(ambience) tension = [float(t) for t in loc['Sentiment']] # attempt to get an appropriate ambience for the location while True: amb = rand(ambience) if (abs(sum(tension) - sum(sentiment(amb))) < 1): return add_sentence(loc['Location'], amb, nutsness=NUTSNESS)
def add_location_props(loc): ''' Returns partial sentence of location props. Input: dict or String location Output: String " amongst the X" with X a random prop ''' if type(loc) == str: loc = get_location_by_name(loc) props = loc['Props'] if props == [''] or props == '': return '' else: prop = rand(props) return ' amongst the {}'.format(pluralize(prop))
def new_centroids(reducedSpace, centroids, clusters, clusters_id, num_of_iterations, num_clusters, iteration_count): new_centroid = [] for i in range(len(clusters)): val = np.array(clusters[i]) new_centroid.append((np.sum(val, 0)) / len(clusters[i])) new_centroid = np.array(new_centroid) sums = np.sum(np.array(centroids) - np.array(new_centroid)) # sums = 0 d = dict() if (sums == 0 or iteration_count == iterations): print("Converged") for x in range(len(clusters_id)): for y in range(len(clusters_id[x])): d[clusters_id[x][y]] = x + 1 vals = [d[x] for x in sorted(d.keys())] vals = np.array(vals) print(vals) print(set(vals)) # ground_truth = list(map(int,GeneExpressions[:,1])) print("Jaccard") ja = helpers.jaccard(Groundtruth, vals) print(ja) print("Rand index") rd = helpers.rand(Groundtruth, vals) print(rd) unique_predicted = list(set(vals)) new_x = helpers.pca(GeneExpressions) helpers.scatter(new_x[:, 0], new_x[:, 1], vals, unique_predicted) else: kmeans(reducedSpace, new_centroid, iterations, num_clusters, iteration_count)
# 2. Declare any specific inputs to the program and call the algorithm epsilon = float(input("Enter epsilon value: ")) min_pts = float(input("Enter min_pts value: ")) # 3. Perform DBSCAN model = __dbscan.DBSCAN(X, epsilon, min_pts) predicted = model.fit() unique, counts = np.unique(predicted, return_counts=True) print("Counts by cluster:") for key, value in zip(unique, counts): print("{}: {}".format(key, value)) # 4. Find Rand index and Jaccard rand_score = helpers.rand(y, predicted) jaccard_score = helpers.jaccard(y, predicted) unique_predicted = list(set(predicted)) print(predicted) print(rand_score) print(jaccard_score) # print(adjusted_rand_score(y, predicted)) # print(jaccard_similarity_score(y, predicted)) # 5. Visualize using PCA new_X = X if X.shape[1] > 2: new_X = helpers.pca(X) helpers.scatter(new_X[:, 0], new_X[:, 1], predicted, unique_predicted,
def get_location_at_random(): location = rand(find_by_attribute(LOCATIONS, "Preposition", "in")) return location
def make_characters(n): '''Get n characters''' chars = [] for i in range(n): chars.append(rand(NOC)['Character']) return chars
# Testing data #distMatrix = [[0.00,0.71,5.66,3.61,4.24,3.20],[0.71,0.00,4.95,2.92,3.54,2.50],[5.66,4.95,0.00,2.24,1.41,2.50],[3.61,2.92,2.24,0.00,1.00,0.50],[4.24,3.54,1.41,1.00,0.00,1.12],[3.20,2.50,2.50,0.50,1.12,0.00]] #rowsnumbers = [[0],[1],[2],[3],[4],[5]] distMatrix = np.array(distMatrix) #print(distMatrix) while (len(distMatrix) >= 2): if (len(rowsnumbers) == k): break distMatrix, rowsnumbers = updateDistMatrix(distMatrix, rowsnumbers) print(rowsnumbers) clusterassignments = {} cluster = 1 for i in rowsnumbers: for j in i: clusterassignments[j + 1] = cluster cluster += 1 sorted(clusterassignments) sorted(Groundtruth) predicted = list(clusterassignments.values()) unique_predicted = list(set(predicted)) Ground = list(Groundtruth.values()) print(h.jaccard(Ground, predicted)) print(h.rand(Ground, predicted)) new_X = h.pca(GeneExpressions) h.scatter(new_X[:, 0], new_X[:, 1], predicted, unique_predicted)