Ejemplo n.º 1
0
def main():
    line_chart_example()
    bar_chart_example()
    pie_chart_example()
    histogram_example()

    values, counts = utils.get_frequencies(utils.msrp_table,
                                           utils.header.index("ModelYear"))
    # parallel arrays
    print(values)
    print(counts)

    # solutions to bar and pie chart tasks
    bar_chart_example(values, counts, "model_year_bar_chart.pdf")
    pie_chart_example(values, counts, "model_year_pie_chart.pdf")

    year_names, year_groups = utils.group_by(utils.msrp_table,
                                             utils.header.index("ModelYear"))
    print(year_names)
    print(year_groups)

    box_plot_example()

    # solution to msrp grouped by model year box plot task
    print("operating on a longer table for testing msrp grouped by year")
    year_names, year_msrp_groups = utils.group_by(
        utils.msrp_table_long,
        utils.header.index("ModelYear"),
        include_only_column_index=utils.header.index("MSRP"))
    print(year_names)
    print(year_msrp_groups)
    mrsp_grouped_by_year_box_plot_example(year_msrp_groups, year_names)

    # discretization
    # converting a numeric (continuous) attribute to discrete (categorical)
    # we will implement equal width bin discretization
    values = sorted(np.random.choice(100, 20))  # 20 values in [0, 100)
    print(values)
    cutoffs = utils.compute_equal_widths_cutoffs(values, 5)
    print("cutoffs:", cutoffs)
    # compare to np.histogram()
    np_freqs, np_cutoffs = np.histogram(values, 5)
    print("np_cutoffs:", np_cutoffs[1:])
    # task: write a function to compute the frequencies for the
    # bins defined by cutoffs
    # check your work with numpy np_freqs
    freqs = utils.compute_bin_frequencies(values, cutoffs)
    print("freqs:", freqs)
    print("np_freqs:", np_freqs)
    # now we can plot our own histogram using cutoffs, freqs, and a bar chart
    # TODO: adjust bar widths to be width of bins
    bar_chart_example(cutoffs, freqs, "our_own_histogram_example.pdf")
Ejemplo n.º 2
0
def get_ranking_table(
    db: Session,
    year: int,
    ranking_system: Optional[RankingSystemEnum] = None,
    ranking_type: RankingTypeEnum = RankingTypeEnum["university ranking"],
    field: str = "All",
    subject: str = "All",
) -> Dict[RankingSystemEnum, List[Ranking]]:
    """[summary]

    Args:
        db (Session): SQLAlchemy session instant to connect to the DB
        year (int): The ranking year of publication
        ranking_system (Optional[RankingSystemEnum], optional): The
        ranking system. Defaults to None.
        ranking_type (RankingTypeEnum, optional): The ranking type.
        Defaults to RankingTypeEnum["university ranking"].
        field (str, optional): The ranking field. Defaults to "All".
        subject (str, optional): The ranking subject. Defaults to "All".

    Returns:
        Dict[RankingSystemEnum, List[Ranking]]: The ranking table
        results, grouped by ranking systems
    """
    filters = (
        Ranking.ranking_type == ranking_type,
        Ranking.year == year,
        Ranking.field == field,
        Ranking.subject == subject,
        Ranking.metric == "Rank",
    )
    if ranking_system:
        filters = (*filters, Ranking.ranking_system == ranking_system)
    rankings = db.query(Ranking).filter(*filters).order_by(Ranking.value).all()
    return group_by(rankings, ["ranking_system"])
Ejemplo n.º 3
0
def migrate_cards(cards, to, unwrap=False, no_event=False):
    g = Game.getgame()
    from .cards import VirtualCard
    groups = group_by(cards, lambda c: id(c) if c.is_card(VirtualCard) else id(c.resides_in))

    for l in groups:
        cl = l[0].resides_in
        for c in l:
            if unwrap and c.is_card(VirtualCard):
                # c.move_to(None)
                c.detach()  # resides_in should be consistent with normal cards
                migrate_cards(
                    c.associated_cards,
                    to,
                    unwrap != migrate_cards.SINGLE_LAYER,
                    no_event
                )
            else:
                c.move_to(to)
                if c.is_card(VirtualCard):
                    assert c.resides_in.owner
                    sp = c.resides_in.owner.special
                    migrate_cards(c.associated_cards, sp, False, no_event)

        if not no_event:
            act = g.action_stack[-1]
            g.emit_event('card_migration', (act, l, cl, to))  # (action, cardlist, from, to)
Ejemplo n.º 4
0
 def partition_mst(self):
     W = self._G.edge_properties['weights']
     nW = self._G.new_edge_property('double')
     self._G.edge_properties['negative_weights'] = nW
     nW.a = list(-W.get_array())
     T = graph_tool.topology.min_spanning_tree(self._G, nW)
     H = graph_tool.Graph(directed=False)
     for i, v in enumerate(T):
         if v == 1:
             e = graph_tool.util.find_edge(self._G, self._G.edge_index,
                                           int(i))[0]
             H.add_edge(e.source(), e.target())
     I = np.nonzero(T.a)
     K = np.squeeze(np.dstack((I, np.array(W.a)[I])))
     # Sort by second column.
     if K.size == 2:
         E = [K]
     else:
         E = K[K[:, 1].argsort()]
     P = []
     for q in E:
         e = graph_tool.util.find_edge(self._G, self._G.edge_index,
                                       int(q[0]))[0]
         e_h = H.edge(e.source(), e.target())
         H.remove_edge(e_h)
         C, h = graph_tool.topology.label_components(H)
         P.append([self._S[p] for p in utils.group_by(np.array(C.a))])
     return P
Ejemplo n.º 5
0
def migrate_cards(cards, to, unwrap=False, no_event=False):
    g = Game.getgame()
    from .cards import VirtualCard
    groups = group_by(cards, lambda c: c if c.is_card(VirtualCard) else c.resides_in)

    for l in groups:
        cl = l[0].resides_in
        for c in l:
            if unwrap and c.is_card(VirtualCard):
                c.move_to(None)
                migrate_cards(
                    c.associated_cards,
                    to,
                    unwrap != migrate_cards.SINGLE_LAYER,
                    no_event
                )
            else:
                c.move_to(to)
                if c.is_card(VirtualCard):
                    assert c.resides_in.owner
                    sp = c.resides_in.owner.special
                    migrate_cards(c.associated_cards, sp, False, no_event)

        if not no_event:
            act = g.action_stack[-1]
            g.emit_event('card_migration', (act, l, cl, to)) # (action, cardlist, from, to)
Ejemplo n.º 6
0
def optimize_bundles(bundles):
    bundle_by_size = group_by(bundles, keyfunc=lambda i: i.size)
    while bundle_by_size[3] and bundle_by_size[5]:
        bundle_by_size[4].extend([Bundle(size=4), Bundle(size=4)])
        bundle_by_size[3].pop()
        bundle_by_size[5].pop()
    return list(chain(*bundle_by_size.values()))
Ejemplo n.º 7
0
    def fit(self, X, y):
        self.c = np.unique(y)
        self.X_c = group_by(X, y, self.c)

        for key in self.X_c:
            X_c_i = np.array(self.X_c[key])

            self.p_c[key] = np.shape(X_c_i)[0] / np.shape(X)[0]
            self.mean_c[key] = np.mean(X_c_i, axis=0)
            self.cov_matrix_c[key] = cov_matrix(X_c_i)
Ejemplo n.º 8
0
 def __init__(self, ds, ids, batch_size, num_chunks):
     self.ds = ds
     self.batch_size = batch_size
     self.ids = ids
     self.groups = [
         x[1] for x in utl.group_by(self.ids,
                                    key=lambda x: self.ds.qa[x]['question'],
                                    value=lambda x: x).items()
         if len(x[1]) > 1
     ]
     self.cur_chunk = 0
     self.num_chunks = num_chunks
     self.chunk_size = len(self.groups) // num_chunks
     self.index = 0
Ejemplo n.º 9
0
def migrate_cards(cards,
                  to,
                  unwrap=False,
                  is_bh=False,
                  front=False,
                  trans=None):
    '''
    cards: cards to move around
    to: destination card list
    unwrap: drop all VirtualCard wrapping, preserve PhysicalCard only
    is_bh: indicates this operation is bottom half of a complete migration (pairing with detach_cards)
    front: Rotate migrated cards to front (if not, cards are appended to the back of CardList)
    trans: associated MigrateCardsTransaction
    '''
    if not trans:
        with MigrateCardsTransaction(Game.getgame().action_stack[-1]) as trans:
            migrate_cards(cards, to, unwrap, is_bh, front, trans)
            return not trans.cancelled

    if to.owner and to.owner.dead:
        # do not migrate cards to dead character
        trans.cancelled = True
        return

    from .cards import VirtualCard
    groups = group_by(
        cards, lambda c: id(c) if c.is_card(VirtualCard) else id(c.resides_in))

    DETACHED = migrate_cards.DETACHED
    UNWRAPPED = migrate_cards.UNWRAPPED
    detaching = to is DETACHED

    for l in groups:
        cl = l[0].resides_in

        if l[0].is_card(VirtualCard):
            assert len(l) == 1
            trans.move(l, cl, UNWRAPPED if unwrap else to, False, is_bh)
            l[0].unwrapped or migrate_cards(
                l[0].associated_cards,
                to if unwrap or detaching else to.owner.special, unwrap
                if type(unwrap) is bool else unwrap - 1, is_bh, front, trans)

        else:
            trans.move(l, cl, to, is_bh, front)
Ejemplo n.º 10
0
def select_attribute(instances, att_indexes, class_index):
    lowest = [100,0]
    for i in att_indexes:
        _, groups = utils.group_by(instances, i)
        total = 0
        for group in groups:
            E = 0
            _, counts = utils.get_frequencies(group, class_index)
            for j in range(len(counts)):
                proportion = counts[j]/sum(counts)
                E += -(proportion * math.log(proportion, 2))
            E *= len(group)/len(instances)
            total += E
        if total < lowest[0]:
            lowest[0] = total
            lowest[1] = i
    
    return lowest[1]
Ejemplo n.º 11
0
def stratify(table, header, class_label, k):
    '''
    parameters table and header are the data table and a list of the attributes in order, respectively
    parameter class_label is a string in the header representing the column to use to as class
    parameter k is the number of folds/partitions to make
    returns stratified_list a list of tables
    '''
    # group table by class label, partitioned table is a list of tables
    partitioned_table = utils.group_by(table, header, class_label)

    # for each partition, loop through rows and append to different table in stratified list (also a list of tables)
    stratified_list = [[] for i in range(k)]
    for partition in partitioned_table:
        index = 0
        for row in partition:
            stratified_list[index].append(row)
            index = (index + 1) % k

    return stratified_list
def find_directions(link, probes, get_distance=get_distance):
    by_sample_id = group_by('sampleID', probes)

    result = {}
    for (sample_id, probes) in by_sample_id:
        sorted_by_datetime = sorted(
            probes, key=lambda p: datetime_parser.parse(p.dateTime))

        if len(sorted_by_datetime) > 1:
            distance_from_first = find_distance_from_ref(
                link, first(sorted_by_datetime))
            distance_from_last = find_distance_from_ref(
                link, last(sorted_by_datetime))
            result[
                sample_id] = 'F' if distance_from_first < distance_from_last else 'T'
        else:
            result[sample_id] = '?'

    return result
def get_stratified_folds(table, k=10):
    '''
        Creates k folds of table which have an equal amount of each class in the column
        at class_index
        Param table: A table to divide into folds
        Param class_index: The index of the attribute of the table which defines the class
        Param categories_dict: An optional dictionary to categorize a continuous attribute at class_index. Default None
        Param k: The number of folds
        Returns: folds, a list of tables which have an equal amount of each class. Latter folds
        may have one fewer per class.
    '''
    #    table = copy.deepcopy(table)
    #
    _, groups = utils.group_by(table, len(table[0]) - 1)

    folds = [[] for _ in range(k)]
    for group in groups:
        for i, instance in enumerate(group):
            folds[i % k].append(instance)
    return folds
Ejemplo n.º 14
0
def migrate_cards(cards, to, unwrap=False, is_bh=False, trans=None):
    '''
    cards: cards to move around
    to: destination card list
    unwrap: drop all VirtualCard wrapping, preserve PhysicalCard only
    is_bh: indicates this operation is bottom half of a complete migration (pairing with detach_cards)
    trans: associated MigrateCardsTransaction
    '''
    if not trans:
        with MigrateCardsTransaction(Game.getgame().action_stack[-1]) as trans:
            migrate_cards(cards, to, unwrap, is_bh, trans)
            return not trans.cancelled

    if to.owner and to.owner.dead:
        # do not migrate cards to dead character
        trans.cancelled = True
        return

    from .cards import VirtualCard
    groups = group_by(cards, lambda c: id(c) if c.is_card(VirtualCard) else id(c.resides_in))

    DETACHED = migrate_cards.DETACHED
    UNWRAPPED = migrate_cards.UNWRAPPED
    detaching = to is DETACHED

    for l in groups:
        cl = l[0].resides_in

        if l[0].is_card(VirtualCard):
            assert len(l) == 1
            trans.move(l, cl, UNWRAPPED if unwrap else to, is_bh)
            l[0].unwrapped or migrate_cards(
                l[0].associated_cards,
                to if unwrap or detaching else to.owner.special,
                unwrap if type(unwrap) is bool else unwrap - 1,
                is_bh,
                trans
            )

        else:
            trans.move(l, cl, to, is_bh)
Ejemplo n.º 15
0
    def _aggregate_metrics(self, metrics: List[Ranking]) -> List[Ranking]:
        """Aggregates the metric values for different institutions.

        The function attempts to group a list of rankings by year,
        ranking system, and metric type. In each group, it will then
        calculates the mean value and assigns the result to a new
        Ranking object, which is in turn appended to the results list.

        Args:
            metrics (List[Ranking]): The list of ranking metrics to be
            aggregated

        Returns:
            List[Ranking]: The list of aggregated metrics
        """
        if self.entity_type == EntityTypeEnum["institution"]:
            return metrics

        fields = ["year", "ranking_system", "metric"]
        grouped_metrics = group_by(metrics, fields)
        result = []
        for group, metrics_in_group in grouped_metrics.items():
            values = [m.value for m in metrics_in_group if m.value]
            if not values:
                continue

            count = len(values) if self.remove_nulls else len(self.ids)
            mean = round(sum(values) / count, 2)

            kwargs = {
                **metrics_in_group[0].__dict__,
                **dict(zip(fields, group)),
                "value": mean,
            }
            aggregated_metric = self._create_metric(**kwargs)
            result.append(aggregated_metric)

        return result
Ejemplo n.º 16
0
 def normalize(items):
     cache = group_by(items, keyfunc=lambda i: i.label)
     return tuple(
         CartItem(label=label,
                  quantity=sum(item.quantity for item in cache[label]))
         for label in unseen(item.label for item in items))
Ejemplo n.º 17
0
 def __init__(self, keyword_array, weight=1):
     self.weight = weight
     self.keyword_links = LowerKeyDict({
         kw : set(flatten([item['linked_keywords'] for item in items], 1))
         for kw, items in group_by('keyword', keyword_array).items()
     })
Ejemplo n.º 18
0
        if len(X) == len(Y) == 5 and len(set(X)) > 1 and len(set(Y)) > 1:
            rho = correlation.spearman_rho_tr(X, Y)
            spearmans['all'].append(rho)
            if u.mturk and not u.scholar:
                spearmans['mturk'].append(rho)
            if u.scholar and not u.mturk:
                spearmans['scholar'].append(rho)

write('intra-rater spearman correlations:')
for c in spearmans:
    write('\t%s: %.3f n=%d, dev=%.3f' % (
            c,
            utils.mean(spearmans[c]),
            len(spearmans[c]),
            utils.dev(spearmans[c])
        ))

for spec in utils.SPECIFICITIES:
    write('mean absolute errors for %s questions:' % spec)
    for c in utils.CONDITIONS:
        diffs = []
        for (pair_id, ratings) in s.get_ratings_by_condition(spec, c).items():
            for reratings in utils.group_by(ratings, lambda r: r.user).values():
                if len(reratings) >= 2:
                    diffs.append(abs(reratings[0].response - reratings[1].response))
        if diffs:
            write('\tcondition %s: mae=%.3f, n=%d, dev=%.3f' %
                ( c, utils.mean(diffs), len(diffs), utils.dev(diffs)))
f.close()
Ejemplo n.º 19
0
                else:
                    ratings[r.pair_id] = r.response

        if len(X) == len(Y) == 5 and len(set(X)) > 1 and len(set(Y)) > 1:
            rho = correlation.spearman_rho_tr(X, Y)
            spearmans['all'].append(rho)
            if u.mturk and not u.scholar:
                spearmans['mturk'].append(rho)
            if u.scholar and not u.mturk:
                spearmans['scholar'].append(rho)

write('intra-rater spearman correlations:')
for c in spearmans:
    write('\t%s: %.3f n=%d, dev=%.3f' % (c, utils.mean(
        spearmans[c]), len(spearmans[c]), utils.dev(spearmans[c])))

for spec in utils.SPECIFICITIES:
    write('mean absolute errors for %s questions:' % spec)
    for c in utils.CONDITIONS:
        diffs = []
        for (pair_id, ratings) in s.get_ratings_by_condition(spec, c).items():
            for reratings in utils.group_by(ratings,
                                            lambda r: r.user).values():
                if len(reratings) >= 2:
                    diffs.append(
                        abs(reratings[0].response - reratings[1].response))
        if diffs:
            write('\tcondition %s: mae=%.3f, n=%d, dev=%.3f' %
                  (c, utils.mean(diffs), len(diffs), utils.dev(diffs)))
f.close()
Ejemplo n.º 20
0
 def normalize(items):
     cache = group_by(items, keyfunc=lambda i: i.reference)
     return tuple(
         CartItem(reference=reference,
                  quantity=sum(item.quantity for item in cache[reference]))
         for reference in unseen(item.reference for item in items))
Ejemplo n.º 21
0
def sort_messages(all_msgs):
    channels = group_by(all_msgs, lambda msg: msg['channel']['name'])
    for chan in channels.values():
        chan.sort(key = lambda msg: float(msg['created_at']))
    return [msg for chan in channels.values() for msg in chan]