def __init__(self, json_data_path: Union[str, Path] = None) -> None: """ Instantiate, will store the entire json as an attribute, then two pandas DataFrame. One contains usage data, the other contains messages data. Args: json_data_path: string, path (absolute or relative) to json file with your Tinder data. """ self.json_file_path: Path = Path(json_data_path).absolute() with timeit(lambda spanned: logger.debug(f"Loaded json data in {spanned:.4f} seconds")): with self.json_file_path.open("r") as f: self.json_data = json.load(f) with timeit(lambda spanned: logger.debug(f"Processed usage data in {spanned:.4f} seconds")): self.usage_df: pd.DataFrame = pd.DataFrame.from_dict(self.json_data["Usage"]) self.usage_df.index = pd.to_datetime(self.usage_df.index) with timeit( lambda spanned: logger.debug(f"Processed message data in {spanned:.4f} seconds") ): self.messages_df: pd.DataFrame = pd.DataFrame(self.json_data["Messages"]) self.messages_df.match_id = self.messages_df.match_id.apply( lambda x: int(x.split(" ")[1]) ) self.messages_df.set_index("match_id", inplace=True) self.messages_df.sort_index(inplace=True) logger.debug("Initialisation complete")
def output_usage_statistics(self) -> None: """ Print out some statistics about your Tinder usage through the period you've used it. Returns: Nothing, will just print. """ with timeit( lambda spanned: logger.debug(f"Gathered usage statistics in {spanned:.4f} seconds") ): app_opens: pd.Series = self.usage_df.app_opens creation_date: str = app_opens.index.min() last_use_date: str = app_opens.index.max() days_with_use: int = app_opens[app_opens != 0].size time_period = pd.to_timedelta(last_use_date - creation_date) print("\n---- Usage Statistics ----") print(f"Account creation date: {creation_date}") print(f"Last usage date: {last_use_date}") print(f"Days spent with an active Tinder account: {time_period.days}") print( f"Opened on {days_with_use} unique days, and untouched on " f"{time_period.days - days_with_use} unique days." ) print(f"Average app-open per usage-day: {app_opens[app_opens != 0].mean():.1f}\n")
def sankey_metrics(self) -> dict: """ Extract and return usage and message metrics from the 'usage_df' and 'messages_df' dataframes,that will be necessary for the Sankey diagram. The 'usage_df' dataframe is indexed by date and fairly simple. The 'messages_df' dataframe is indexed by match ID (starting at 1) and its 'messages' column is a list in which is a dictionnary for each message sent to the specific match, with keys 'to', 'from' and 'message'. The values calculated from 'usage_df' are simply summation over time of columns. Some insight on keys taken from 'messages_df' and their values' calculation: contacted: number of rows without an empty list as values. replied: number of rows with >= 3 dicts (messages sent) in the list. Since Tinder doesn't give messages sent by your matches, I assume you sending 3 or more means they've answered. short_conversations: number of rows with <= 4 dicts (messages sent) in the list. long-conversations: number of rows with >= 20 dicts (messages sent) in the list. Returns: A dictionary with the different usage metrics, all integers. """ with timeit( lambda spanned: logger.debug( f"Gathered Sankey general metrics in {spanned:.4f} seconds" ) ): sankey_dict: dict = { "passes": self.usage_df.swipes_passes.sum(), "likes": self.usage_df.swipes_likes.sum(), "swipes": self.usage_df.swipes_passes.sum() + self.usage_df.swipes_likes.sum(), "matches": self.usage_df.matches.sum(), "sent_messages": self.usage_df.messages_sent.sum(), "received_messages": self.usage_df.messages_received.sum(), } with timeit( lambda spanned: logger.debug( f"Gathered Sankey message metrics in {spanned:.4f} seconds" ) ): sankey_messages_dict = { "contacted": self.messages_df.messages.size - sum(1 for e in self.messages_df.messages if len(e) == 0), "replied": sum(1 for e in self.messages_df.messages if len(e) >= 3), "short_conversations": sum(1 for e in self.messages_df.messages if len(e) <= 4), "long_conversations": sum(1 for e in self.messages_df.messages if len(e) >= 20), } return dict(sankey_dict, **sankey_messages_dict)
def output_message_statistics(self) -> None: """ Compute and output some statistics on messages from your Tinder data. Returns: Nothing, will just print. """ with timeit( lambda spanned: logger.debug(f"Gathered message statistics in {spanned:.4f} seconds") ): total_messages_sent: int = self.usage_df.messages_sent.sum() total_messages_received: int = self.usage_df.messages_received.sum() print("\n---- Message Statistics ----") print(f"Sent a total of {total_messages_sent} messages") print(f"Received a total of {total_messages_received} messages\n")
def plot_swipes_weekday_relative_stats( self, figsize: Tuple[int, int] = (20, 12), showfig: bool = False, savefig: bool = False ): """ Plot percentage of the total number of swipes that were likes, passes and those that resulted in matches, on a monthly basis. Args: figsize: figure size showfig: if this is set to True, the figure will be shown. Defaults to False. savefig: if this is set to True, the figure will be saved. Defaults to False. Returns: Nothing, just plots. """ logger.debug("Plotting swipes weekday relative insights") with timeit( lambda spanned: logger.debug( f"Gathered relative weekday swipe statistics in {spanned:.4f} seconds" ) ): # Careful, index sorts alphabetically for now) vals_weekday = self.usage_df.groupby(self.usage_df.index.weekday_name).sum() vals_weekday.fillna(0, inplace=True) # Getting index as categorical properly ordered weekdays weekday_categories = pd.api.types.CategoricalDtype(categories=WEEK_DAYS, ordered=True) vals_weekday.index = vals_weekday.index.astype(weekday_categories) vals_weekday["total_swipes"] = vals_weekday.swipes_likes + vals_weekday.swipes_passes vals_weekday["likes_ratio"] = ( 100 * vals_weekday.swipes_likes / vals_weekday.total_swipes ) vals_weekday["matches_ratio"] = 100 * vals_weekday.matches / vals_weekday.total_swipes vals_weekday["passes_ratio"] = ( 100 * vals_weekday.swipes_passes / vals_weekday.total_swipes ) vals_weekday.fillna(0, inplace=True) fig, axis1 = plt.subplots(figsize=figsize) axis1.plot( vals_weekday.index, vals_weekday.likes_ratio, color="blue", label="'Like' Swipes", lw=1.5, ) axis1.plot( vals_weekday.index, vals_weekday.passes_ratio, color="red", label="'Pass' Swipes", lw=1.5, ) axis1.plot( vals_weekday.index, vals_weekday.matches_ratio, color="green", label="Matches", lw=1.5 ) axis1.set_xlabel("Date") axis1.set_ylabel("Percentage of Total Swipes [%]") axis1.set_title("Monthly Ratio of Likes, Passes and Matches to Total Swipes") axis1.tick_params(axis="x", rotation=75) axis1.legend() fig.tight_layout() if showfig: logger.debug("Showing swipes weekday relative stats figure") plt.show() if savefig: logger.debug("Saving swipes weekday relative stats figure") fig.savefig("plots/swipes_weekdays_relative_stats.png", format="png", dpi=500) logger.success( "Saved swipes weekdays relative stats plot as " "'plots/swipes_weekdays_relative_stats.png'" )
def plot_swipes_weekday_stats( self, figsize: Tuple[int, int] = (20, 12), showfig: bool = False, savefig: bool = False ) -> None: """ Compute the number of right and left swipes sent for each day of the week, then output it as a stacked barplot. Also plot the number of matches for each weekday. Args: figsize: figure size showfig: if this is set to True, the figure will be shown. Defaults to False. savefig: if this is set to True, the figure will be saved. Defaults to False. Returns: Nothing, just plots. """ logger.debug("Plotting swipes weekday insights") with timeit( lambda spanned: logger.debug( f"Gathered weekday swipe statistics in {spanned:.4f} seconds" ) ): # Careful, index sorts alphabetically for now) vals_weekday = self.usage_df.groupby(self.usage_df.index.weekday_name).sum() vals_weekday.fillna(0, inplace=True) # Getting index as categorical properly ordered weekdays weekday_categories = pd.api.types.CategoricalDtype(categories=WEEK_DAYS, ordered=True) vals_weekday.index = vals_weekday.index.astype(weekday_categories) vals_weekday.sort_index(inplace=True) fig, axis1 = plt.subplots(figsize=figsize) axis2 = axis1.twinx() axis1.bar( vals_weekday.index, vals_weekday.swipes_likes, label="'Like' Swipes", color="blue", alpha=0.4, ) axis1.bar( vals_weekday.index, vals_weekday.swipes_passes, bottom=vals_weekday.swipes_likes, label="'Pass' Swipes", color="red", alpha=0.4, ) axis2.plot(vals_weekday.index, vals_weekday.matches, color="grey", lw=1.5) axis1.set_xlabel("Date") axis1.set_ylabel("Number of Swipes") axis1.set_title("Swipes per Weekday") axis1.tick_params(axis="x", rotation=75) axis1.legend() axis2.set_ylabel("Number of Matches", color="grey") axis2.tick_params(axis="y", labelcolor="grey") axis2.grid(False) fig.tight_layout() if showfig: logger.debug("Showing swipes weekday stats figure") plt.show() if savefig: logger.debug("Saving swipes weekday stats figure") fig.savefig("plots/swipes_weekdays_stats.png", format="png", dpi=500) logger.success("Saved swipes weekdays stats plot as 'plots/swipes_weekdays_stats.png'")
def plot_swipes_monthly_stats( self, figsize: Tuple[int, int] = (20, 12), showfig: bool = False, savefig: bool = False ): """ Compute the monthly left and right swipes, then output it as a stacked barplot. Also plot the monthly number of matches. Args: figsize: figure size showfig: if this is set to True, the figure will be shown. Defaults to False. savefig: if this is set to True, the figure will be saved. Defaults to False. Returns: Nothing, just plots. """ logger.debug("Plotting swipes monthly insights") with timeit( lambda spanned: logger.debug( f"Gathered monthly swipe statistics in {spanned:.4f} seconds" ) ): vals_monthly = self.usage_df.groupby(pd.Grouper(freq="M")).sum() vals_monthly.fillna(0, inplace=True) fig, axis1 = plt.subplots(figsize=figsize) axis2 = axis1.twinx() axis1.bar( vals_monthly.index, vals_monthly.swipes_likes, width=1, label="'Like' Swipes", color="blue", alpha=0.4, ) axis1.bar( vals_monthly.index, vals_monthly.swipes_passes, bottom=vals_monthly.swipes_likes, width=1, label="'Pass' Likes", color="red", alpha=0.4, ) axis2.plot(vals_monthly.index, vals_monthly.matches, color="grey", label="Matches", lw=1.5) axis1.set_xlabel("Date") axis1.set_ylabel("Number of Swipes") axis1.set_title("Swipes per Month") axis1.tick_params(axis="x", rotation=75) axis2.set_ylabel("Number of Matches", color="grey") axis2.tick_params(axis="y", labelcolor="grey") axis2.grid(False) fig.legend(loc=2, bbox_to_anchor=(0, 1), bbox_transform=axis1.transAxes) fig.tight_layout() if showfig: logger.debug("Showing swipes monthly stats figure") plt.show() if savefig: logger.debug("Saving swipes monthly stats figure") fig.savefig("plots/swipes_monthly_stats.png", format="png", dpi=500) logger.success("Saved swipes monthly stats plot as 'plots/swipes_monthly_stats.png'")
def plot_messages_monthly_stats( self, figsize: Tuple[int, int] = (20, 12), showfig: bool = False, savefig: bool = False ) -> None: """ Compute the monthly sent and received number of messages, then output it as a stacked barplot. Also plot the % of received/sent ratio. Args: figsize: figure size showfig: if this is set to True, the figure will be shown. Defaults to False. savefig: if this is set to True, the figure will be saved. Defaults to False. Returns: Nothing, just plots. """ logger.debug("Plotting messages monthly insights") with timeit( lambda spanned: logger.debug( f"Gathered monthly message statistics in {spanned:.4f} seconds" ) ): vals_monthly = self.usage_df.groupby(pd.Grouper(freq="M")).sum() vals_monthly["ratio"] = ( 100 * vals_monthly.messages_received / vals_monthly.messages_sent ) vals_monthly.fillna(0, inplace=True) fig, axis1 = plt.subplots(figsize=figsize) axis2 = axis1.twinx() axis1.bar( vals_monthly.index, vals_monthly.messages_sent, width=1, label="Messages Sent", color="blue", alpha=0.4, ) axis1.bar( vals_monthly.index, vals_monthly.messages_received, bottom=vals_monthly.messages_sent, width=1, label="Messages Received", color="red", alpha=0.4, ) axis2.plot(vals_monthly.index, vals_monthly.ratio, color="grey", lw=1.5) axis1.set_xlabel("Date") axis1.set_ylabel("Number of Messages") axis1.set_title("Messages per Month") axis1.tick_params(axis="x", rotation=75) axis1.legend() axis2.set_ylabel("Received/Sent [%]", color="grey") axis2.tick_params(axis="y", labelcolor="grey") axis2.grid(False) fig.tight_layout() if showfig: logger.debug("Showing message monthly stats figure") plt.show() if savefig: logger.debug("Saving message monthly stats figure") fig.savefig("plots/messages_monthly_stats.png", format="png", dpi=500) logger.success("Saved message monthly stats plot as 'plots/messages_monthly_stats.png'")