def initialize_clusters(markers, distance_threshold, bearing_threshold): ''' Create an initial set of clusters. Return a list of Cluster objects. markers is a list of PointWithID instances. We arbitrarily select a marker that has not yet been assigned to a cluster, and create a new cluster at the marker. Then, any markers that are within distance_threshold and bearing_threshold of the selected marker are added to the cluster. For example, suppose that we have created a cluster at (26, 0, 83), and the remaining seed points are at (0, 0, 90), (30, 0, 90), (50, 10, 0), and (50, 40, 0). Then, (30, 0, 90) is assigned to the existing cluster since the distance to the cluster is 4 while the bearing difference is 7, which are both within the thresholds. The other seed points become new clusters. You might find Index (from util.py) useful to query points that are near a specified location. ''' clusters = [] markers = markers[:] index = Index(markers) while markers: random_marker = markers[random.randint(0, len(markers) - 1)] markers.remove(random_marker) nearest_markers = index.nearby(random_marker, distance_threshold) nearest_markers = [ marker for marker in nearest_markers if marker.angle_to(random_marker) <= bearing_threshold ] cluster = Cluster(random_marker) cluster.add_member(random_marker) for marker in nearest_markers: cluster.add_member(marker) clusters.append(cluster) markers = list(set(markers) - set(nearest_markers)) return clusters
def loadjson(j): """Load from JSON dict. """ raws = j[jsonkey] v = j.get('v', 1) if v in {1, 1220}: # Original version or array version. skills = Index((i, Skill(i, raw)) for i, raw in items(raws)) else: # raise ValueError("%r has unknown skills version %s." % (fpath, v)) raise ValueError("Skill data has unknown skills version %s." % (v,)) return skills
def read_dungeons(raw, v): dungeons = Index() current = None for line in gh_csv(raw): if line[0] == 'd': current = Dungeon(line, v) dungeons[current.id] = current elif line[0] == 'f': current.append(Floor(line, v)) elif line[0] == 'c': continue else: assert 0 return dungeons
def loadjson(j, limit=INSANE_CARD_LIMIT): """Load from JSON dict. """ raws = j['card'] v = j['v'] #^ We can use version to determine what the fields are. if v < 810: raise NotImplementedError(v) raws = [card[:] for card in raws] if v < 900: # Collab and inherits. for card in raws: card.extend((0, 0)) if v < 920: # Index. for i, card in items(raws): card.insert(0, i) if v < 1220: # Weird furigana thing. for card in raws: card.append('') if v < 1230: # Limit break. for card in raws: card.append(0) if v < 1240: # Super awakening (prep). for card in raws: card.insert(-9, '') if v < 1520: # Voice. for card in raws: card.append(0) if v < 1600: # Orbskin. for card in raws: card.append(0) # 1800: HT 2019-10-11 # 1800: NA 2019-10-~20 if v < 1800: # Only used so far for Fagan Rai? for card in raws: card.append('') if v > 1800: # Should raise a warning here. warnings.warn("Unknown card_data version: %s" % v) # else: # raise ValueError("%r has unknown card version %s." % (fpath, v)) cards = Index((raw[0], BookCard(raw)) for raw in values(raws) if not limit or raw[0] < limit) _register_evos(cards) _register_families(cards) return cards
def loadjson(j): """Load from JSON dict. """ raw = j['enemy_skills'] v = j.get('v', 1) # version # v=1: dict-based # v=2: csv if v == 1: table = parse_1(raw) elif v == 2: table = parse_2(raw) else: warnings.warn("Enemy Skill JSON has unknown version %s." % (v)) table = parse_2(raw) eskills = Index((row[0], EnemySkill(row)) for row in table) return eskills
def loadjson(j, limit=INSANE_CARD_LIMIT): """Load from JSON dict. """ raws = j['card'] v = j['v'] #^ We can use version to determine what the fields are. if v == 810: raise NotImplementedError('BookCard810') elif v == 900: cards = [BookCard900(i, raw) for i, raw in items(raws)] # cards = util.Bag(cards) elif v == 920: cards = Index((raw[0], BookCard920(raw)) for raw in values(raws) if not limit or raw[0] < limit) else: fpath = "FORGOT TO ADD FPATH" raise ValueError("%r has unknown card version %s." % (fpath, v)) _register_evos(cards) _register_families(cards) return cards
#!/usr/bin/env python2 from util import Index UNK = "*unknown*" NULL = "*null*" STRING_INDEX = Index() LAYOUT_INDEX = Index() ANSWER_INDEX = Index() STRING_INDEX.index(UNK) LAYOUT_INDEX.index(UNK) ANSWER_INDEX.index(UNK) STRING_INDEX.index(NULL) LAYOUT_INDEX.index(NULL) ANSWER_INDEX.index(NULL)
from util import Index WORD_INDEX = Index()
def kmeans(markers, initial_clusters, distance_threshold, movement_threshold): ''' Run K-means algorithm on the clusters. markers is a list of PointWithID instances, while initial_clusters is what you returned in initialize_clusters above. On each iteration, we first recompute the set of clusters: for every cluster from the previous iteration, we initialize one cluster at the mean of the members of the old cluster. Then, we re-assign marker points to the new clusters. To re-assign points, distance_threshold is an upper bound for the distance that a marker can be from a cluster. You can use this to search for nearby clusters using the Index. However, you should use similarity_metric to determine the most similar cluster. We continue until the sum of the distance that clusters move on the recomputation step is less than movement_threshold. For example, if we have two clusters (0, 0) and (1, 1) with three points (0, 0), (1, 1), and (1.1, 1), then the first point is assigned to the first cluster while the other two are assigned to the second cluster. On the next recomputation step, the first cluster moves 0 units, and the second cluster moves 0.1 units; if movement_threshold > 0.1, then we would terminate. ''' def similarity_metric(cluster, marker): ''' Returns a similarity metric between the specified cluster and marker. A lower value indicates greater similarity. ''' distance = marker.distance_to(cluster) angle_difference = marker.angle_to(cluster) return distance + angle_difference def recompute_clusters(prev_clusters): ''' Return a list of new clusters, and the sum of the distances that clusters moved. ''' new_clusters = [] total_distance_moved = 0 for cluster in prev_clusters: new_cluster = Cluster(cluster.get_mean()) new_clusters.append(new_cluster) total_distance_moved += new_cluster.center.distance_to( cluster.center) return new_clusters, total_distance_moved def assign_members(cluster_index, markers): ''' Assign each marker point to the closest cluster. ''' # your code here for marker in markers: nearby_clusters = cluster_index.nearby(marker, distance_threshold) best_cluster = min( nearby_clusters, key=lambda cluster: similarity_metric(cluster, marker)) best_cluster.add_member(marker) clusters = initial_clusters distance = float('inf') while distance >= movement_threshold: clusters, distance = recompute_clusters(clusters) index = Index(clusters) assign_members(index, markers) print 'moved clusters distance={}'.format(distance) return clusters
#!/usr/bin/env python2 from util import Index UNK = "*unknown*" NULL = "*null*" QUESTION_INDEX = Index() MODULE_INDEX = Index() MODULE_TYPE_INDEX = Index() ANSWER_INDEX = Index() UNK_ID = QUESTION_INDEX.index(UNK) MODULE_INDEX.index(UNK) ANSWER_INDEX.index(UNK) NULL_ID = QUESTION_INDEX.index(NULL) #MODULE_INDEX.index(NULL) #ANSWER_INDEX.index(NULL)