Example #1
0
    def daily_correlation(self):
        """
        Determines the pmcc between days since account creation and usage + displays linegraph with line of best fit.
        """
        delta = datetime.datetime(2020, 4, 27) - datetime.datetime(
            2017, 7, 19) + datetime.timedelta(days=1)
        daily_data = self._data_time(slice(0, 1),
                                     "",
                                     slice(0, 10),
                                     return_ints=False)
        days_since_account_creation = list(range(delta.days))
        daily_activity = []
        for i in range(delta.days):
            date = datetime.datetime.strftime(
                datetime.datetime(2017, 7, 19) + datetime.timedelta(days=i),
                "%Y-%m-%d")
            #print(date, i, daily_data.count(date))
            daily_activity.append(daily_data.count(date))

        print(
            self.ylabel,
            round(
                np.corrcoef(days_since_account_creation, daily_activity)[0][1],
                4))

        util.graph((days_since_account_creation, daily_activity),
                   "all time",
                   "Day",
                   ylabel=self.ylabel,
                   unique=True,
                   line_of_best_fit=True)

        util.graph_histogram_2(daily_activity, ylabel=self.ylabel)
Example #2
0
    def months(self, year):
        """
        Conducts analysis on months.

        Parameters
        ----------
        year : string
            The data to match - can be YYYY or empty for all time.

        """
        time_data = self._data_time(slice(0, len(year)), year, slice(5, 7))
        if len(year) == 4:
            util.graph(time_data,
                       year,
                       "Month",
                       xtick_min=1,
                       xtick_max=13,
                       ylabel=self.ylabel)
        else:
            year = "all time"
            util.graph(time_data,
                       year,
                       "Month",
                       xtick_min=1,
                       xtick_max=13,
                       ylabel=self.ylabel)
        #util.graph_boxplot(time_data,year,"Month")
        util.table(time_data,
                   print_latex=self.print_latex,
                   print_lists=self.print_lists)
def graph_polarity_rating(l):
	x_label = 'Polarity Score'
	y_label = 'Rating (stars)'
	title = 'Polarity of Review Text vs. Reviewer Rating'
	save_filename = 'output/polarity_vs_stars.png'

	graph(l, title, x_label, y_label, save_filename)
	return
def graph_strlen_rating(l):
	x_label = 'String Length'
	y_label = 'Rating (stars)'
	title = 'Length of Review Text vs. Reviewer Rating'
	save_filename = 'output/review_length_vs_rating.png'

	graph(l, title, x_label, y_label, save_filename)
	return
def graph_day_polarity(l):
    x_label = "Day of Week"
    y_label = "Polarity Score"
    title = "Day of Week vs. Polarity Score"
    save_filename = "output/day_of_week_vs_polarity_score.png"

    graph(l, title, x_label, y_label, save_filename)
    return
def graph_day_rating(l):
    x_label = "Day of Week"
    y_label = "Rating (stars)"
    title = "Day of Week vs. Rating (stars)"
    save_filename = "output/day_of_week_vs_rating.png"

    graph(l, title, x_label, y_label, save_filename)
    return
Example #7
0
    def days_range(self, start_date, finish_date):
        """ 
        Conducts analysis on all days between start_date and finish_date inclusive.

        Parameters
        ----------
        start_date : string
            The date to match - YYYY-MM-DD.
        finish_date : string
            The date to match - YYYY-MM-DD.

        """
        time_data = []
        for year in range(int(start_date[:4]), int(finish_date[:4]) + 1):
            if year == int(start_date[:4]):
                start_month = int(start_date[5:7])
            else:
                start_month = 1
            if year == int(finish_date[:4]):
                finish_month = int(finish_date[5:7])
            else:
                finish_month = 12

            for month in range(start_month, finish_month + 1):
                year_month = str(
                    year) + "-" + "0" * (2 - len(str(month))) + str(month)

                if year_month == start_date[0:7]:
                    start_day = int(start_date[8:10])
                else:
                    start_day = 1

                if year_month == finish_date[0:7]:
                    finish_day = int(finish_date[8:10])
                else:
                    finish_day = 31

                for day in range(start_day, finish_day + 1):
                    date = year_month + "-" + "0" * (2 -
                                                     len(str(day))) + str(day)
                    time_data += self._data_time(slice(0, 10),
                                                 date,
                                                 slice(0, 10),
                                                 return_ints=False)

        util.graph(time_data,
                   start_date + " to " + finish_date,
                   "Day",
                   ylabel=self.ylabel)
        #util.graph_boxplot(time_data,start_date+" to "+finish_date,"Day")
        print("Between", start_date, "and", finish_date, "inclusive:")
        util.table(time_data,
                   print_latex=self.print_latex,
                   print_lists=self.print_lists)
Example #8
0
 def years(self):
     """
     Conducts analysis on years.
     """
     time_data = self._data_time(slice(10, 11), "T", slice(0, 4))
     util.graph(time_data,
                "all time",
                "Year",
                xtick_min=self.min_year,
                xtick_max=self.max_year + 1,
                ylabel=self.ylabel)
     util.table(time_data,
                print_latex=self.print_latex,
                print_lists=self.print_lists)
Example #9
0
    def days(self, year_month):
        """
        Conducts analysis on days.

        Parameters
        ----------
        year_month : string
            The data to match - YYYY-MM.

        """
        time_data = self._data_time(slice(0, 7), year_month, slice(8, 10))
        util.graph(time_data, year_month, "Day", ylabel=self.ylabel)
        util.graph_boxplot(time_data, year_month, "Day")
        util.table(time_data,
                   print_latex=self.print_latex,
                   print_lists=self.print_lists)
Example #10
0
    def breaks(self, date, min_break=datetime.timedelta(days=1)):
        """
        Conducts analysis on breaks I have taken from Instagram.

        Parameters
        ----------
        date : string
            When should the analysis cover - empty string for all time.
        min_break : datetime.timedelta, optional
            What is the minimum break that should be included. The default is datetime.timedelta(days=1).

        """
        time_data = self._data_time(slice(0, len(date)),
                                    date,
                                    slice(0, 19),
                                    return_ints=False)
        time_data.sort(reverse=True)
        time_data = [
            datetime.datetime.strptime(datestamp, "%Y-%m-%dT%H:%M:%S")
            for datestamp in time_data
        ]

        max_break = datetime.timedelta(seconds=0)
        break_start = ""
        breaks = ([], [])

        for i in range(len(time_data) - 1):
            break_length = time_data[i] - time_data[i + 1]
            break_length_hours = break_length.days * 24 + break_length.seconds // 3600
            if break_length > min_break:
                breaks[0].append(time_data[i].strftime("%Y-%m-%d"))
                breaks[1].append(break_length_hours)

        breaks[0].reverse()
        breaks[1].reverse()
        util.graph(breaks,
                   util.date_to_time_period(date),
                   "day",
                   ylabel="hours break",
                   unique=True)
        util.table(breaks,
                   sort_by_likes=True,
                   unique=True,
                   print_latex=self.print_latex,
                   print_lists=self.print_lists)