def display_cp(self) -> None:
     display_df(
         pd.DataFrame.from_records([
             dict(
                 name=self.name,
                 iteration=self.cp["iteration"],
                 loss=self.new_loss,
                 patience=self.patience,
                 max_patience=self.cp["max_patience"],
                 num_left=self.num_epochs - self.epoch,
                 min_loss=self.cp["min_loss"],
             )
         ]))
     display_df(
         pd.DataFrame.from_records([
             dict(
                 val_correct=f"{self.val_correct}/{self.val_total}",
                 val_acc=round(self.cp["val_acc_history"][-1], 3),
                 max_val_acc=round(self.cp["max_val_acc"], 3),
                 correct=f"{self.correct}/{self.total}",
                 acc=round(self.cp["acc_history"][-1], 3),
                 max_acc=round(self.cp["max_acc"], 3),
                 trans_correct=f"{self.trans_correct}/{self.trans_total}",
                 trans_acc=round(self.cp["trans_acc_history"][-1], 3),
                 max_trans_acc=round(self.cp["max_trans_acc"], 3),
             )
         ]))
def print_num_memes_in_folders():
    display_df(
        pd.DataFrame.from_records(
            list({
                "name": name,
                "num": len(list(os.listdir(MEMES_REPO + name + "/")))
            } for name in MEMES_TO_USE)).sort_values("num"))
def stonks_to_aws() -> None:
    names = [
        os.path.splitext(filename)[0]
        for filename in os.listdir(LOAD_STONK_REPO.format("jit"))
    ]
    stats: Dict[str, int] = dict(num_names=len(names), success=0, failed=0)
    for name in names:
        success = upload_to_aws(LOAD_STONK_REPO.format("jit") + f"{name}.pt")
        stats["success" if success else "failed"] += 1
        clear_output()
        display_df(pd.DataFrame.from_records([stats]))
Exemple #4
0
 def histo(self):
     df = pd.DataFrame(list(self.smd["name_acc"].items()),
                       columns=["name", "acc"]).sort_values(by="acc",
                                                            ascending=True)
     _ = plt.figure(figsize=(20, 6))
     _ = plt.hist(df["acc"], range=(0.7, 1), bins=3 * 40)
     plt.grid()
     plt.title("Market Accuracy Histogram")
     plt.show()
     df["acc"] = df["acc"].apply(round_3)
     display_df(df[:25])
def miss_match() -> None:
    downloaded_blanks = [
        os.path.splitext(filename)[0] for filename in os.listdir(BLANKS_REPO)
    ]
    display_df(
        pd.read_sql(
            cast(
                str,
                training_db.query(Template).filter(
                    cast(ClauseElement,
                         ~Template.name.in_(downloaded_blanks))).statement,
            ),
            training_db.bind,
        ))
Exemple #6
0
def evaluated():
    num_filled = (
        site_db.query(RedditMeme)
        .filter(
            and_(
                cast(ClauseElement, RedditMeme.version == LOAD_MEME_CLF_VERSION),
                cast(ClauseElement, RedditMeme.is_a_template_official != None),
            )
        )
        .count()
    )
    total_official = (
        site_db.query(RedditMeme)
        .filter(cast(ClauseElement, RedditMeme.is_a_template_official != None))
        .count()
    )
    num_stonks = (
        site_db.query(RedditMeme)
        .filter(cast(ClauseElement, RedditMeme.stonk_official != None))
        .count()
    )
    num_not_template = (
        site_db.query(RedditMeme)
        .filter(cast(ClauseElement, RedditMeme.is_a_template_official == False))
        .count()
    )
    num_not_template_wrong = (
        site_db.query(RedditMeme)
        .filter(
            and_(
                cast(ClauseElement, RedditMeme.version == LOAD_MEME_CLF_VERSION),
                cast(ClauseElement, RedditMeme.stonk == True),
                cast(ClauseElement, RedditMeme.is_a_template_official == False),
            )
        )
        .count()
    )
    tp = (
        site_db.query(RedditMeme)
        .filter(
            and_(
                cast(ClauseElement, RedditMeme.meme_clf == RedditMeme.stonk_official),
                cast(ClauseElement, RedditMeme.stonk == True),
                cast(ClauseElement, RedditMeme.stonk_official != None),
                cast(ClauseElement, RedditMeme.version == LOAD_MEME_CLF_VERSION),
            )
        )
        .count()
    )
    fn = (
        site_db.query(RedditMeme)
        .filter(
            and_(
                cast(ClauseElement, RedditMeme.meme_clf == RedditMeme.stonk_official),
                cast(ClauseElement, RedditMeme.stonk == False),
                cast(ClauseElement, RedditMeme.stonk_official != None),
                cast(ClauseElement, RedditMeme.version == LOAD_MEME_CLF_VERSION),
            )
        )
        .count()
    )
    meme_clf_wrong = (
        site_db.query(RedditMeme)
        .filter(
            and_(
                cast(ClauseElement, RedditMeme.meme_clf != RedditMeme.stonk_official),
                cast(ClauseElement, RedditMeme.stonk_official != None),
                cast(ClauseElement, RedditMeme.version == LOAD_MEME_CLF_VERSION),
            )
        )
        .count()
    )
    meme_clf_wrong_stonk_right = (
        site_db.query(RedditMeme)
        .filter(
            and_(
                cast(ClauseElement, RedditMeme.meme_clf != RedditMeme.stonk_official),
                cast(ClauseElement, RedditMeme.stonk == False),
                cast(ClauseElement, RedditMeme.stonk_official != None),
                cast(ClauseElement, RedditMeme.version == LOAD_MEME_CLF_VERSION),
            )
        )
        .count()
    )
    meme_clf_wrong_stonk_wrong = (
        site_db.query(RedditMeme)
        .filter(
            and_(
                cast(ClauseElement, RedditMeme.meme_clf != RedditMeme.stonk_official),
                cast(ClauseElement, RedditMeme.stonk == True),
                cast(ClauseElement, RedditMeme.stonk_official != None),
                cast(ClauseElement, RedditMeme.version == LOAD_MEME_CLF_VERSION),
            )
        )
        .count()
    )
    display_df(
        pd.DataFrame.from_records(
            [
                dict(
                    num_stonks=num_stonks,
                    num_not_template=num_not_template,
                    total_official=total_official,
                    num_filled=num_filled,
                    percent_done=round(num_filled / total_official, 3),
                )
            ]
        )
    )
    display_df(
        pd.DataFrame.from_records(
            [
                dict(
                    num_not_template_wrong=num_not_template_wrong,
                    num_nt_wrong_percent=round(
                        num_not_template_wrong / num_not_template, 3
                    ),
                    true_positives=tp,
                    tp_percent=round(tp / num_stonks, 3),
                    false_negatives=fn,
                    fn_percent=round(fn / num_stonks, 3),
                )
            ]
        )
    )
    display_df(
        pd.DataFrame.from_records(
            [
                dict(
                    meme_clf_wrong=meme_clf_wrong,
                    wrong_right=meme_clf_wrong_stonk_right,
                    wr_percent=round(fn / meme_clf_wrong, 3),
                    wrong_wrong=meme_clf_wrong_stonk_wrong,
                    ww_percent=round(fn / meme_clf_wrong, 3),
                )
            ]
        )
    )
Exemple #7
0
 def print_worst(self) -> None:
     display_df(
         pd.DataFrame(list(self.smd["name_acc"].items()),
                      columns=["name", "acc"]).sort_values("acc",
                                                           ascending=True))
def print_stats(name: str = ""):
    if name:
        correct_stonk = (site_db.query(RedditMeme).filter(
            and_(
                cast(ClauseElement,
                     RedditMeme.version == LOAD_MEME_CLF_VERSION),
                cast(ClauseElement, RedditMeme.stonk_correct == True),
                cast(ClauseElement, RedditMeme.meme_clf == name),
            )).count())
        wrong_stonk = (site_db.query(RedditMeme).filter(
            and_(
                cast(ClauseElement,
                     RedditMeme.version == LOAD_MEME_CLF_VERSION),
                cast(ClauseElement, RedditMeme.stonk_correct == False),
                cast(ClauseElement, RedditMeme.meme_clf == name),
            )).count())
        num_posts = (site_db.query(RedditMeme).filter(
            and_(
                cast(ClauseElement,
                     RedditMeme.version == LOAD_MEME_CLF_VERSION),
                cast(ClauseElement, RedditMeme.meme_clf == name),
                cast(ClauseElement, RedditMeme.stonk == True),
            )).count())
    else:
        correct_stonk = 0
        wrong_stonk = 0
        num_posts = 0
    correct_stonks = (site_db.query(RedditMeme).filter(
        and_(
            cast(ClauseElement, RedditMeme.version == LOAD_MEME_CLF_VERSION),
            cast(ClauseElement, RedditMeme.stonk_correct == True),
        )).count())
    wrong_stonks = (site_db.query(RedditMeme).filter(
        and_(
            cast(ClauseElement, RedditMeme.version == LOAD_MEME_CLF_VERSION),
            cast(ClauseElement, RedditMeme.stonk_correct == False),
        )).count())
    correct_meme_clf = (site_db.query(RedditMeme).filter(
        and_(
            cast(ClauseElement, RedditMeme.version == LOAD_MEME_CLF_VERSION),
            cast(ClauseElement, RedditMeme.meme_clf_correct == True),
        )).count())
    wrong_meme_clf = (site_db.query(RedditMeme).filter(
        and_(
            cast(ClauseElement, RedditMeme.version == LOAD_MEME_CLF_VERSION),
            cast(ClauseElement, RedditMeme.meme_clf_correct == False),
        )).count())
    memes_classified = (site_db.query(RedditMeme).filter(
        cast(ClauseElement,
             RedditMeme.version == LOAD_MEME_CLF_VERSION)).count())
    memes_unclassified = (site_db.query(RedditMeme).filter(
        cast(ClauseElement, RedditMeme.version == None)).count())
    perc_correct = correct_stonk / (correct_stonk + wrong_stonk) if (
        correct_stonk + wrong_stonk) != 0 else 0
    percent_correct = correct_stonks / (correct_stonks + wrong_stonks) if (
        correct_stonks + wrong_stonks) != 0 else 0
    display_df(
        pd.DataFrame.from_records([
            dict(
                num_posts=num_posts,
                correct_stonk=correct_stonk,
                wrong_stonk=wrong_stonk,
                perc_correct=perc_correct,
                memes_classified=memes_classified,
                memes_unclassified=memes_unclassified,
                correct_stonks=correct_stonks,
                wrong_stonks=wrong_stonks,
                percent_correct=percent_correct,
                correct_meme_clf=correct_meme_clf,
                wrong_meme_clf=wrong_meme_clf,
            )
        ]))