def display_cp(self) -> None: display_df( pd.DataFrame.from_records([ dict( name=self.name, iteration=self.cp["iteration"], loss=self.new_loss, patience=self.patience, max_patience=self.cp["max_patience"], num_left=self.num_epochs - self.epoch, min_loss=self.cp["min_loss"], ) ])) display_df( pd.DataFrame.from_records([ dict( val_correct=f"{self.val_correct}/{self.val_total}", val_acc=round(self.cp["val_acc_history"][-1], 3), max_val_acc=round(self.cp["max_val_acc"], 3), correct=f"{self.correct}/{self.total}", acc=round(self.cp["acc_history"][-1], 3), max_acc=round(self.cp["max_acc"], 3), trans_correct=f"{self.trans_correct}/{self.trans_total}", trans_acc=round(self.cp["trans_acc_history"][-1], 3), max_trans_acc=round(self.cp["max_trans_acc"], 3), ) ]))
def print_num_memes_in_folders(): display_df( pd.DataFrame.from_records( list({ "name": name, "num": len(list(os.listdir(MEMES_REPO + name + "/"))) } for name in MEMES_TO_USE)).sort_values("num"))
def stonks_to_aws() -> None: names = [ os.path.splitext(filename)[0] for filename in os.listdir(LOAD_STONK_REPO.format("jit")) ] stats: Dict[str, int] = dict(num_names=len(names), success=0, failed=0) for name in names: success = upload_to_aws(LOAD_STONK_REPO.format("jit") + f"{name}.pt") stats["success" if success else "failed"] += 1 clear_output() display_df(pd.DataFrame.from_records([stats]))
def histo(self): df = pd.DataFrame(list(self.smd["name_acc"].items()), columns=["name", "acc"]).sort_values(by="acc", ascending=True) _ = plt.figure(figsize=(20, 6)) _ = plt.hist(df["acc"], range=(0.7, 1), bins=3 * 40) plt.grid() plt.title("Market Accuracy Histogram") plt.show() df["acc"] = df["acc"].apply(round_3) display_df(df[:25])
def miss_match() -> None: downloaded_blanks = [ os.path.splitext(filename)[0] for filename in os.listdir(BLANKS_REPO) ] display_df( pd.read_sql( cast( str, training_db.query(Template).filter( cast(ClauseElement, ~Template.name.in_(downloaded_blanks))).statement, ), training_db.bind, ))
def evaluated(): num_filled = ( site_db.query(RedditMeme) .filter( and_( cast(ClauseElement, RedditMeme.version == LOAD_MEME_CLF_VERSION), cast(ClauseElement, RedditMeme.is_a_template_official != None), ) ) .count() ) total_official = ( site_db.query(RedditMeme) .filter(cast(ClauseElement, RedditMeme.is_a_template_official != None)) .count() ) num_stonks = ( site_db.query(RedditMeme) .filter(cast(ClauseElement, RedditMeme.stonk_official != None)) .count() ) num_not_template = ( site_db.query(RedditMeme) .filter(cast(ClauseElement, RedditMeme.is_a_template_official == False)) .count() ) num_not_template_wrong = ( site_db.query(RedditMeme) .filter( and_( cast(ClauseElement, RedditMeme.version == LOAD_MEME_CLF_VERSION), cast(ClauseElement, RedditMeme.stonk == True), cast(ClauseElement, RedditMeme.is_a_template_official == False), ) ) .count() ) tp = ( site_db.query(RedditMeme) .filter( and_( cast(ClauseElement, RedditMeme.meme_clf == RedditMeme.stonk_official), cast(ClauseElement, RedditMeme.stonk == True), cast(ClauseElement, RedditMeme.stonk_official != None), cast(ClauseElement, RedditMeme.version == LOAD_MEME_CLF_VERSION), ) ) .count() ) fn = ( site_db.query(RedditMeme) .filter( and_( cast(ClauseElement, RedditMeme.meme_clf == RedditMeme.stonk_official), cast(ClauseElement, RedditMeme.stonk == False), cast(ClauseElement, RedditMeme.stonk_official != None), cast(ClauseElement, RedditMeme.version == LOAD_MEME_CLF_VERSION), ) ) .count() ) meme_clf_wrong = ( site_db.query(RedditMeme) .filter( and_( cast(ClauseElement, RedditMeme.meme_clf != RedditMeme.stonk_official), cast(ClauseElement, RedditMeme.stonk_official != None), cast(ClauseElement, RedditMeme.version == LOAD_MEME_CLF_VERSION), ) ) .count() ) meme_clf_wrong_stonk_right = ( site_db.query(RedditMeme) .filter( and_( cast(ClauseElement, RedditMeme.meme_clf != RedditMeme.stonk_official), cast(ClauseElement, RedditMeme.stonk == False), cast(ClauseElement, RedditMeme.stonk_official != None), cast(ClauseElement, RedditMeme.version == LOAD_MEME_CLF_VERSION), ) ) .count() ) meme_clf_wrong_stonk_wrong = ( site_db.query(RedditMeme) .filter( and_( cast(ClauseElement, RedditMeme.meme_clf != RedditMeme.stonk_official), cast(ClauseElement, RedditMeme.stonk == True), cast(ClauseElement, RedditMeme.stonk_official != None), cast(ClauseElement, RedditMeme.version == LOAD_MEME_CLF_VERSION), ) ) .count() ) display_df( pd.DataFrame.from_records( [ dict( num_stonks=num_stonks, num_not_template=num_not_template, total_official=total_official, num_filled=num_filled, percent_done=round(num_filled / total_official, 3), ) ] ) ) display_df( pd.DataFrame.from_records( [ dict( num_not_template_wrong=num_not_template_wrong, num_nt_wrong_percent=round( num_not_template_wrong / num_not_template, 3 ), true_positives=tp, tp_percent=round(tp / num_stonks, 3), false_negatives=fn, fn_percent=round(fn / num_stonks, 3), ) ] ) ) display_df( pd.DataFrame.from_records( [ dict( meme_clf_wrong=meme_clf_wrong, wrong_right=meme_clf_wrong_stonk_right, wr_percent=round(fn / meme_clf_wrong, 3), wrong_wrong=meme_clf_wrong_stonk_wrong, ww_percent=round(fn / meme_clf_wrong, 3), ) ] ) )
def print_worst(self) -> None: display_df( pd.DataFrame(list(self.smd["name_acc"].items()), columns=["name", "acc"]).sort_values("acc", ascending=True))
def print_stats(name: str = ""): if name: correct_stonk = (site_db.query(RedditMeme).filter( and_( cast(ClauseElement, RedditMeme.version == LOAD_MEME_CLF_VERSION), cast(ClauseElement, RedditMeme.stonk_correct == True), cast(ClauseElement, RedditMeme.meme_clf == name), )).count()) wrong_stonk = (site_db.query(RedditMeme).filter( and_( cast(ClauseElement, RedditMeme.version == LOAD_MEME_CLF_VERSION), cast(ClauseElement, RedditMeme.stonk_correct == False), cast(ClauseElement, RedditMeme.meme_clf == name), )).count()) num_posts = (site_db.query(RedditMeme).filter( and_( cast(ClauseElement, RedditMeme.version == LOAD_MEME_CLF_VERSION), cast(ClauseElement, RedditMeme.meme_clf == name), cast(ClauseElement, RedditMeme.stonk == True), )).count()) else: correct_stonk = 0 wrong_stonk = 0 num_posts = 0 correct_stonks = (site_db.query(RedditMeme).filter( and_( cast(ClauseElement, RedditMeme.version == LOAD_MEME_CLF_VERSION), cast(ClauseElement, RedditMeme.stonk_correct == True), )).count()) wrong_stonks = (site_db.query(RedditMeme).filter( and_( cast(ClauseElement, RedditMeme.version == LOAD_MEME_CLF_VERSION), cast(ClauseElement, RedditMeme.stonk_correct == False), )).count()) correct_meme_clf = (site_db.query(RedditMeme).filter( and_( cast(ClauseElement, RedditMeme.version == LOAD_MEME_CLF_VERSION), cast(ClauseElement, RedditMeme.meme_clf_correct == True), )).count()) wrong_meme_clf = (site_db.query(RedditMeme).filter( and_( cast(ClauseElement, RedditMeme.version == LOAD_MEME_CLF_VERSION), cast(ClauseElement, RedditMeme.meme_clf_correct == False), )).count()) memes_classified = (site_db.query(RedditMeme).filter( cast(ClauseElement, RedditMeme.version == LOAD_MEME_CLF_VERSION)).count()) memes_unclassified = (site_db.query(RedditMeme).filter( cast(ClauseElement, RedditMeme.version == None)).count()) perc_correct = correct_stonk / (correct_stonk + wrong_stonk) if ( correct_stonk + wrong_stonk) != 0 else 0 percent_correct = correct_stonks / (correct_stonks + wrong_stonks) if ( correct_stonks + wrong_stonks) != 0 else 0 display_df( pd.DataFrame.from_records([ dict( num_posts=num_posts, correct_stonk=correct_stonk, wrong_stonk=wrong_stonk, perc_correct=perc_correct, memes_classified=memes_classified, memes_unclassified=memes_unclassified, correct_stonks=correct_stonks, wrong_stonks=wrong_stonks, percent_correct=percent_correct, correct_meme_clf=correct_meme_clf, wrong_meme_clf=wrong_meme_clf, ) ]))