def daily_correlation(self): """ Determines the pmcc between days since account creation and usage + displays linegraph with line of best fit. """ delta = datetime.datetime(2020, 4, 27) - datetime.datetime( 2017, 7, 19) + datetime.timedelta(days=1) daily_data = self._data_time(slice(0, 1), "", slice(0, 10), return_ints=False) days_since_account_creation = list(range(delta.days)) daily_activity = [] for i in range(delta.days): date = datetime.datetime.strftime( datetime.datetime(2017, 7, 19) + datetime.timedelta(days=i), "%Y-%m-%d") #print(date, i, daily_data.count(date)) daily_activity.append(daily_data.count(date)) print( self.ylabel, round( np.corrcoef(days_since_account_creation, daily_activity)[0][1], 4)) util.graph((days_since_account_creation, daily_activity), "all time", "Day", ylabel=self.ylabel, unique=True, line_of_best_fit=True) util.graph_histogram_2(daily_activity, ylabel=self.ylabel)
def months(self, year): """ Conducts analysis on months. Parameters ---------- year : string The data to match - can be YYYY or empty for all time. """ time_data = self._data_time(slice(0, len(year)), year, slice(5, 7)) if len(year) == 4: util.graph(time_data, year, "Month", xtick_min=1, xtick_max=13, ylabel=self.ylabel) else: year = "all time" util.graph(time_data, year, "Month", xtick_min=1, xtick_max=13, ylabel=self.ylabel) #util.graph_boxplot(time_data,year,"Month") util.table(time_data, print_latex=self.print_latex, print_lists=self.print_lists)
def graph_polarity_rating(l): x_label = 'Polarity Score' y_label = 'Rating (stars)' title = 'Polarity of Review Text vs. Reviewer Rating' save_filename = 'output/polarity_vs_stars.png' graph(l, title, x_label, y_label, save_filename) return
def graph_strlen_rating(l): x_label = 'String Length' y_label = 'Rating (stars)' title = 'Length of Review Text vs. Reviewer Rating' save_filename = 'output/review_length_vs_rating.png' graph(l, title, x_label, y_label, save_filename) return
def graph_day_polarity(l): x_label = "Day of Week" y_label = "Polarity Score" title = "Day of Week vs. Polarity Score" save_filename = "output/day_of_week_vs_polarity_score.png" graph(l, title, x_label, y_label, save_filename) return
def graph_day_rating(l): x_label = "Day of Week" y_label = "Rating (stars)" title = "Day of Week vs. Rating (stars)" save_filename = "output/day_of_week_vs_rating.png" graph(l, title, x_label, y_label, save_filename) return
def days_range(self, start_date, finish_date): """ Conducts analysis on all days between start_date and finish_date inclusive. Parameters ---------- start_date : string The date to match - YYYY-MM-DD. finish_date : string The date to match - YYYY-MM-DD. """ time_data = [] for year in range(int(start_date[:4]), int(finish_date[:4]) + 1): if year == int(start_date[:4]): start_month = int(start_date[5:7]) else: start_month = 1 if year == int(finish_date[:4]): finish_month = int(finish_date[5:7]) else: finish_month = 12 for month in range(start_month, finish_month + 1): year_month = str( year) + "-" + "0" * (2 - len(str(month))) + str(month) if year_month == start_date[0:7]: start_day = int(start_date[8:10]) else: start_day = 1 if year_month == finish_date[0:7]: finish_day = int(finish_date[8:10]) else: finish_day = 31 for day in range(start_day, finish_day + 1): date = year_month + "-" + "0" * (2 - len(str(day))) + str(day) time_data += self._data_time(slice(0, 10), date, slice(0, 10), return_ints=False) util.graph(time_data, start_date + " to " + finish_date, "Day", ylabel=self.ylabel) #util.graph_boxplot(time_data,start_date+" to "+finish_date,"Day") print("Between", start_date, "and", finish_date, "inclusive:") util.table(time_data, print_latex=self.print_latex, print_lists=self.print_lists)
def years(self): """ Conducts analysis on years. """ time_data = self._data_time(slice(10, 11), "T", slice(0, 4)) util.graph(time_data, "all time", "Year", xtick_min=self.min_year, xtick_max=self.max_year + 1, ylabel=self.ylabel) util.table(time_data, print_latex=self.print_latex, print_lists=self.print_lists)
def days(self, year_month): """ Conducts analysis on days. Parameters ---------- year_month : string The data to match - YYYY-MM. """ time_data = self._data_time(slice(0, 7), year_month, slice(8, 10)) util.graph(time_data, year_month, "Day", ylabel=self.ylabel) util.graph_boxplot(time_data, year_month, "Day") util.table(time_data, print_latex=self.print_latex, print_lists=self.print_lists)
def breaks(self, date, min_break=datetime.timedelta(days=1)): """ Conducts analysis on breaks I have taken from Instagram. Parameters ---------- date : string When should the analysis cover - empty string for all time. min_break : datetime.timedelta, optional What is the minimum break that should be included. The default is datetime.timedelta(days=1). """ time_data = self._data_time(slice(0, len(date)), date, slice(0, 19), return_ints=False) time_data.sort(reverse=True) time_data = [ datetime.datetime.strptime(datestamp, "%Y-%m-%dT%H:%M:%S") for datestamp in time_data ] max_break = datetime.timedelta(seconds=0) break_start = "" breaks = ([], []) for i in range(len(time_data) - 1): break_length = time_data[i] - time_data[i + 1] break_length_hours = break_length.days * 24 + break_length.seconds // 3600 if break_length > min_break: breaks[0].append(time_data[i].strftime("%Y-%m-%d")) breaks[1].append(break_length_hours) breaks[0].reverse() breaks[1].reverse() util.graph(breaks, util.date_to_time_period(date), "day", ylabel="hours break", unique=True) util.table(breaks, sort_by_likes=True, unique=True, print_latex=self.print_latex, print_lists=self.print_lists)