def main(): line_chart_example() bar_chart_example() pie_chart_example() histogram_example() values, counts = utils.get_frequencies(utils.msrp_table, utils.header.index("ModelYear")) # parallel arrays print(values) print(counts) # solutions to bar and pie chart tasks bar_chart_example(values, counts, "model_year_bar_chart.pdf") pie_chart_example(values, counts, "model_year_pie_chart.pdf") year_names, year_groups = utils.group_by(utils.msrp_table, utils.header.index("ModelYear")) print(year_names) print(year_groups) box_plot_example() # solution to msrp grouped by model year box plot task print("operating on a longer table for testing msrp grouped by year") year_names, year_msrp_groups = utils.group_by( utils.msrp_table_long, utils.header.index("ModelYear"), include_only_column_index=utils.header.index("MSRP")) print(year_names) print(year_msrp_groups) mrsp_grouped_by_year_box_plot_example(year_msrp_groups, year_names) # discretization # converting a numeric (continuous) attribute to discrete (categorical) # we will implement equal width bin discretization values = sorted(np.random.choice(100, 20)) # 20 values in [0, 100) print(values) cutoffs = utils.compute_equal_widths_cutoffs(values, 5) print("cutoffs:", cutoffs) # compare to np.histogram() np_freqs, np_cutoffs = np.histogram(values, 5) print("np_cutoffs:", np_cutoffs[1:]) # task: write a function to compute the frequencies for the # bins defined by cutoffs # check your work with numpy np_freqs freqs = utils.compute_bin_frequencies(values, cutoffs) print("freqs:", freqs) print("np_freqs:", np_freqs) # now we can plot our own histogram using cutoffs, freqs, and a bar chart # TODO: adjust bar widths to be width of bins bar_chart_example(cutoffs, freqs, "our_own_histogram_example.pdf")
def get_ranking_table( db: Session, year: int, ranking_system: Optional[RankingSystemEnum] = None, ranking_type: RankingTypeEnum = RankingTypeEnum["university ranking"], field: str = "All", subject: str = "All", ) -> Dict[RankingSystemEnum, List[Ranking]]: """[summary] Args: db (Session): SQLAlchemy session instant to connect to the DB year (int): The ranking year of publication ranking_system (Optional[RankingSystemEnum], optional): The ranking system. Defaults to None. ranking_type (RankingTypeEnum, optional): The ranking type. Defaults to RankingTypeEnum["university ranking"]. field (str, optional): The ranking field. Defaults to "All". subject (str, optional): The ranking subject. Defaults to "All". Returns: Dict[RankingSystemEnum, List[Ranking]]: The ranking table results, grouped by ranking systems """ filters = ( Ranking.ranking_type == ranking_type, Ranking.year == year, Ranking.field == field, Ranking.subject == subject, Ranking.metric == "Rank", ) if ranking_system: filters = (*filters, Ranking.ranking_system == ranking_system) rankings = db.query(Ranking).filter(*filters).order_by(Ranking.value).all() return group_by(rankings, ["ranking_system"])
def migrate_cards(cards, to, unwrap=False, no_event=False): g = Game.getgame() from .cards import VirtualCard groups = group_by(cards, lambda c: id(c) if c.is_card(VirtualCard) else id(c.resides_in)) for l in groups: cl = l[0].resides_in for c in l: if unwrap and c.is_card(VirtualCard): # c.move_to(None) c.detach() # resides_in should be consistent with normal cards migrate_cards( c.associated_cards, to, unwrap != migrate_cards.SINGLE_LAYER, no_event ) else: c.move_to(to) if c.is_card(VirtualCard): assert c.resides_in.owner sp = c.resides_in.owner.special migrate_cards(c.associated_cards, sp, False, no_event) if not no_event: act = g.action_stack[-1] g.emit_event('card_migration', (act, l, cl, to)) # (action, cardlist, from, to)
def partition_mst(self): W = self._G.edge_properties['weights'] nW = self._G.new_edge_property('double') self._G.edge_properties['negative_weights'] = nW nW.a = list(-W.get_array()) T = graph_tool.topology.min_spanning_tree(self._G, nW) H = graph_tool.Graph(directed=False) for i, v in enumerate(T): if v == 1: e = graph_tool.util.find_edge(self._G, self._G.edge_index, int(i))[0] H.add_edge(e.source(), e.target()) I = np.nonzero(T.a) K = np.squeeze(np.dstack((I, np.array(W.a)[I]))) # Sort by second column. if K.size == 2: E = [K] else: E = K[K[:, 1].argsort()] P = [] for q in E: e = graph_tool.util.find_edge(self._G, self._G.edge_index, int(q[0]))[0] e_h = H.edge(e.source(), e.target()) H.remove_edge(e_h) C, h = graph_tool.topology.label_components(H) P.append([self._S[p] for p in utils.group_by(np.array(C.a))]) return P
def migrate_cards(cards, to, unwrap=False, no_event=False): g = Game.getgame() from .cards import VirtualCard groups = group_by(cards, lambda c: c if c.is_card(VirtualCard) else c.resides_in) for l in groups: cl = l[0].resides_in for c in l: if unwrap and c.is_card(VirtualCard): c.move_to(None) migrate_cards( c.associated_cards, to, unwrap != migrate_cards.SINGLE_LAYER, no_event ) else: c.move_to(to) if c.is_card(VirtualCard): assert c.resides_in.owner sp = c.resides_in.owner.special migrate_cards(c.associated_cards, sp, False, no_event) if not no_event: act = g.action_stack[-1] g.emit_event('card_migration', (act, l, cl, to)) # (action, cardlist, from, to)
def optimize_bundles(bundles): bundle_by_size = group_by(bundles, keyfunc=lambda i: i.size) while bundle_by_size[3] and bundle_by_size[5]: bundle_by_size[4].extend([Bundle(size=4), Bundle(size=4)]) bundle_by_size[3].pop() bundle_by_size[5].pop() return list(chain(*bundle_by_size.values()))
def fit(self, X, y): self.c = np.unique(y) self.X_c = group_by(X, y, self.c) for key in self.X_c: X_c_i = np.array(self.X_c[key]) self.p_c[key] = np.shape(X_c_i)[0] / np.shape(X)[0] self.mean_c[key] = np.mean(X_c_i, axis=0) self.cov_matrix_c[key] = cov_matrix(X_c_i)
def __init__(self, ds, ids, batch_size, num_chunks): self.ds = ds self.batch_size = batch_size self.ids = ids self.groups = [ x[1] for x in utl.group_by(self.ids, key=lambda x: self.ds.qa[x]['question'], value=lambda x: x).items() if len(x[1]) > 1 ] self.cur_chunk = 0 self.num_chunks = num_chunks self.chunk_size = len(self.groups) // num_chunks self.index = 0
def migrate_cards(cards, to, unwrap=False, is_bh=False, front=False, trans=None): ''' cards: cards to move around to: destination card list unwrap: drop all VirtualCard wrapping, preserve PhysicalCard only is_bh: indicates this operation is bottom half of a complete migration (pairing with detach_cards) front: Rotate migrated cards to front (if not, cards are appended to the back of CardList) trans: associated MigrateCardsTransaction ''' if not trans: with MigrateCardsTransaction(Game.getgame().action_stack[-1]) as trans: migrate_cards(cards, to, unwrap, is_bh, front, trans) return not trans.cancelled if to.owner and to.owner.dead: # do not migrate cards to dead character trans.cancelled = True return from .cards import VirtualCard groups = group_by( cards, lambda c: id(c) if c.is_card(VirtualCard) else id(c.resides_in)) DETACHED = migrate_cards.DETACHED UNWRAPPED = migrate_cards.UNWRAPPED detaching = to is DETACHED for l in groups: cl = l[0].resides_in if l[0].is_card(VirtualCard): assert len(l) == 1 trans.move(l, cl, UNWRAPPED if unwrap else to, False, is_bh) l[0].unwrapped or migrate_cards( l[0].associated_cards, to if unwrap or detaching else to.owner.special, unwrap if type(unwrap) is bool else unwrap - 1, is_bh, front, trans) else: trans.move(l, cl, to, is_bh, front)
def select_attribute(instances, att_indexes, class_index): lowest = [100,0] for i in att_indexes: _, groups = utils.group_by(instances, i) total = 0 for group in groups: E = 0 _, counts = utils.get_frequencies(group, class_index) for j in range(len(counts)): proportion = counts[j]/sum(counts) E += -(proportion * math.log(proportion, 2)) E *= len(group)/len(instances) total += E if total < lowest[0]: lowest[0] = total lowest[1] = i return lowest[1]
def stratify(table, header, class_label, k): ''' parameters table and header are the data table and a list of the attributes in order, respectively parameter class_label is a string in the header representing the column to use to as class parameter k is the number of folds/partitions to make returns stratified_list a list of tables ''' # group table by class label, partitioned table is a list of tables partitioned_table = utils.group_by(table, header, class_label) # for each partition, loop through rows and append to different table in stratified list (also a list of tables) stratified_list = [[] for i in range(k)] for partition in partitioned_table: index = 0 for row in partition: stratified_list[index].append(row) index = (index + 1) % k return stratified_list
def find_directions(link, probes, get_distance=get_distance): by_sample_id = group_by('sampleID', probes) result = {} for (sample_id, probes) in by_sample_id: sorted_by_datetime = sorted( probes, key=lambda p: datetime_parser.parse(p.dateTime)) if len(sorted_by_datetime) > 1: distance_from_first = find_distance_from_ref( link, first(sorted_by_datetime)) distance_from_last = find_distance_from_ref( link, last(sorted_by_datetime)) result[ sample_id] = 'F' if distance_from_first < distance_from_last else 'T' else: result[sample_id] = '?' return result
def get_stratified_folds(table, k=10): ''' Creates k folds of table which have an equal amount of each class in the column at class_index Param table: A table to divide into folds Param class_index: The index of the attribute of the table which defines the class Param categories_dict: An optional dictionary to categorize a continuous attribute at class_index. Default None Param k: The number of folds Returns: folds, a list of tables which have an equal amount of each class. Latter folds may have one fewer per class. ''' # table = copy.deepcopy(table) # _, groups = utils.group_by(table, len(table[0]) - 1) folds = [[] for _ in range(k)] for group in groups: for i, instance in enumerate(group): folds[i % k].append(instance) return folds
def migrate_cards(cards, to, unwrap=False, is_bh=False, trans=None): ''' cards: cards to move around to: destination card list unwrap: drop all VirtualCard wrapping, preserve PhysicalCard only is_bh: indicates this operation is bottom half of a complete migration (pairing with detach_cards) trans: associated MigrateCardsTransaction ''' if not trans: with MigrateCardsTransaction(Game.getgame().action_stack[-1]) as trans: migrate_cards(cards, to, unwrap, is_bh, trans) return not trans.cancelled if to.owner and to.owner.dead: # do not migrate cards to dead character trans.cancelled = True return from .cards import VirtualCard groups = group_by(cards, lambda c: id(c) if c.is_card(VirtualCard) else id(c.resides_in)) DETACHED = migrate_cards.DETACHED UNWRAPPED = migrate_cards.UNWRAPPED detaching = to is DETACHED for l in groups: cl = l[0].resides_in if l[0].is_card(VirtualCard): assert len(l) == 1 trans.move(l, cl, UNWRAPPED if unwrap else to, is_bh) l[0].unwrapped or migrate_cards( l[0].associated_cards, to if unwrap or detaching else to.owner.special, unwrap if type(unwrap) is bool else unwrap - 1, is_bh, trans ) else: trans.move(l, cl, to, is_bh)
def _aggregate_metrics(self, metrics: List[Ranking]) -> List[Ranking]: """Aggregates the metric values for different institutions. The function attempts to group a list of rankings by year, ranking system, and metric type. In each group, it will then calculates the mean value and assigns the result to a new Ranking object, which is in turn appended to the results list. Args: metrics (List[Ranking]): The list of ranking metrics to be aggregated Returns: List[Ranking]: The list of aggregated metrics """ if self.entity_type == EntityTypeEnum["institution"]: return metrics fields = ["year", "ranking_system", "metric"] grouped_metrics = group_by(metrics, fields) result = [] for group, metrics_in_group in grouped_metrics.items(): values = [m.value for m in metrics_in_group if m.value] if not values: continue count = len(values) if self.remove_nulls else len(self.ids) mean = round(sum(values) / count, 2) kwargs = { **metrics_in_group[0].__dict__, **dict(zip(fields, group)), "value": mean, } aggregated_metric = self._create_metric(**kwargs) result.append(aggregated_metric) return result
def normalize(items): cache = group_by(items, keyfunc=lambda i: i.label) return tuple( CartItem(label=label, quantity=sum(item.quantity for item in cache[label])) for label in unseen(item.label for item in items))
def __init__(self, keyword_array, weight=1): self.weight = weight self.keyword_links = LowerKeyDict({ kw : set(flatten([item['linked_keywords'] for item in items], 1)) for kw, items in group_by('keyword', keyword_array).items() })
if len(X) == len(Y) == 5 and len(set(X)) > 1 and len(set(Y)) > 1: rho = correlation.spearman_rho_tr(X, Y) spearmans['all'].append(rho) if u.mturk and not u.scholar: spearmans['mturk'].append(rho) if u.scholar and not u.mturk: spearmans['scholar'].append(rho) write('intra-rater spearman correlations:') for c in spearmans: write('\t%s: %.3f n=%d, dev=%.3f' % ( c, utils.mean(spearmans[c]), len(spearmans[c]), utils.dev(spearmans[c]) )) for spec in utils.SPECIFICITIES: write('mean absolute errors for %s questions:' % spec) for c in utils.CONDITIONS: diffs = [] for (pair_id, ratings) in s.get_ratings_by_condition(spec, c).items(): for reratings in utils.group_by(ratings, lambda r: r.user).values(): if len(reratings) >= 2: diffs.append(abs(reratings[0].response - reratings[1].response)) if diffs: write('\tcondition %s: mae=%.3f, n=%d, dev=%.3f' % ( c, utils.mean(diffs), len(diffs), utils.dev(diffs))) f.close()
else: ratings[r.pair_id] = r.response if len(X) == len(Y) == 5 and len(set(X)) > 1 and len(set(Y)) > 1: rho = correlation.spearman_rho_tr(X, Y) spearmans['all'].append(rho) if u.mturk and not u.scholar: spearmans['mturk'].append(rho) if u.scholar and not u.mturk: spearmans['scholar'].append(rho) write('intra-rater spearman correlations:') for c in spearmans: write('\t%s: %.3f n=%d, dev=%.3f' % (c, utils.mean( spearmans[c]), len(spearmans[c]), utils.dev(spearmans[c]))) for spec in utils.SPECIFICITIES: write('mean absolute errors for %s questions:' % spec) for c in utils.CONDITIONS: diffs = [] for (pair_id, ratings) in s.get_ratings_by_condition(spec, c).items(): for reratings in utils.group_by(ratings, lambda r: r.user).values(): if len(reratings) >= 2: diffs.append( abs(reratings[0].response - reratings[1].response)) if diffs: write('\tcondition %s: mae=%.3f, n=%d, dev=%.3f' % (c, utils.mean(diffs), len(diffs), utils.dev(diffs))) f.close()
def normalize(items): cache = group_by(items, keyfunc=lambda i: i.reference) return tuple( CartItem(reference=reference, quantity=sum(item.quantity for item in cache[reference])) for reference in unseen(item.reference for item in items))
def sort_messages(all_msgs): channels = group_by(all_msgs, lambda msg: msg['channel']['name']) for chan in channels.values(): chan.sort(key = lambda msg: float(msg['created_at'])) return [msg for chan in channels.values() for msg in chan]