Beispiel #1
0
def timeofday(features_dict, bins):
    try:
        file = open(f"./features/timeofday_{bins}bins.pickle", "rb")
        preload = pickle.load(file)
        print(f"Preloading timeofday {bins} bins feature")
        for key in preload:
            features_dict[key].extend(preload[key])
    except:
        print(f"Generating timeofday {bins} bins feature")
        columns = [f"day_part{bin}" for bin in range(bins)]
        features_dict["columns"].extend(columns)
        # set up dict to pickle
        for_pickle = make_empty_dict()
        for_pickle["columns"].extend(columns)
        # end
        streamers = chatlog.get_display_name()
        for streamer in streamers:
            try:
                result, days = sch.daily_schedule(streamer, False)
                result_list = np.array(result.values).flatten()
            except:
                print("Unexpected error:", sys.exc_info())
                freshday = sch.make_freshday()
                result_list = np.zeros(len(freshday))
                days = 0
            binsize = int(np.ceil(len(result_list) / bins))
            result_binned = np.nanmax(
                np.pad(
                    result_list.astype(float),
                    (0, binsize - result_list.size % binsize),
                    mode="constant",
                    constant_values=np.NaN,
                ).reshape(-1, binsize),
                axis=1,
            )
            # number of sessions per day during time of day
            result_norm = result_binned / days
            features_dict[streamer].extend(result_norm)
            for_pickle[streamer].extend(result_norm)
        file = open(f"./features/timeofday_{bins}bins.pickle", "wb")
        pickle.dump(for_pickle, file)
        file.close()
    if False:  # write to sql
        for streamer in preload:
            if streamer == "columns":
                continue
            query = f"SELECT id FROM streamer WHERE display_name='{streamer}'"
            postgres = tp.Postgres()
            streamer_id = np.array(postgres.rawselect(query))[0, 0]
            postgres.close()
            hourlyQ = ", ".join([str(i) for i in preload[streamer]])
            coltag = ["day_part%s" % idx for idx in range(24)]
            coltagQ = ", ".join(coltag)
            query = f"INSERT INTO hourly_proba (streamer_id, {coltagQ}) VALUES ({streamer_id}, {hourlyQ});"
            postgres = tp.Postgres()
            postgres.rawsql(query)
            postgres.close()

    return features_dict
Beispiel #2
0
def tarray_feature(features_dict, binsize=60):
    # make features out of timearray of chatlogs
    excitement_dict = dict()
    excitement_dict["columns"] = ["excitement"]
    streamers = chatlog.get_display_name()
    #for streamer in streamers:
    for streamer in ["Metaphor"]:
        # get streamer_id
        query = f"SELECT id FROM streamer WHERE display_name='{streamer}';"
        postgres = tp.Postgres()
        streamer_id = np.array(postgres.rawselect(query))[0, 0]
        postgres.close()
        query = f"SELECT timearray FROM chatlog WHERE streamer_id={streamer_id};"
        postgres = tp.Postgres()
        records = postgres.rawselect(query)
        postgres.close()
        """
        for record in records[:1]:
            binned_chat = np.array(record[0])//binsize
            binned_counter = np.bincount(binned_chat)
            bins = len(binned_counter)
            fig = plt.figure(figsize=(7,4))
            ax = plt.axes([0.15, 0.15, 0.8, 0.8])
            ax.hist(binned_chat, bins=bins)
            plt.show()
        """
        concat_chat = [0]
        for record in records:
            concat_chat.extend(np.array(record[0]) + concat_chat[-1])
        binned_chat = np.array(concat_chat) // binsize
        binned_counter = np.bincount(binned_chat)
        bins = len(binned_counter)
        sort_counter = np.sort(binned_counter)
        median = np.median(sort_counter)
        excite_count = 0
        for event in sort_counter:
            if event > transformed_cut(median):
                excite_count += 1
        excitements_per_hour = excite_count / (bins * binsize) * 3600.
        excitement_dict[streamer] = [excitements_per_hour]
        # FIGURE
        fig = plt.figure(figsize=(7, 4))
        ax = plt.axes([0.15, 0.15, 0.8, 0.8])
        ax.hist(binned_chat, bins=bins)
        ax.axhline(transformed_cut(median), color="red")
        ax.set_ylim([0, 100])
        ax.set_xlim([0, max(binned_chat)])
        ax.set_xlabel("Minutes in stream", fontsize=16)
        ax.set_ylabel("Lines of chat / minute", fontsize=16)
        plt.savefig("./plots/chat_frequency.png", dpi=300, transparent=False)
        #plt.show()
        # END FIGURE
    for key in features_dict:
        features_dict[key].extend(excitement_dict[key])
    return features_dict
Beispiel #3
0
def chat_frequency():
    # creates timearray and inserts into database
    streamers = chatlog.get_display_name()
    for streamer in streamers:
        # get streamer_id
        query = f"SELECT id FROM streamer WHERE display_name='{streamer}';"
        postgres = tp.Postgres()
        streamer_id = np.array(postgres.rawselect(query))[0, 0]
        postgres.close()
        # get chatlog list
        #query = f"SELECT id, chatlog FROM vod WHERE streamer_id='{streamer_id}' AND created_at > '2019-07-01T00:00:00Z'"
        query = f"SELECT v.id, v.chatlog FROM vod v LEFT JOIN chatlog c ON v.id = c.vod_id WHERE v.streamer_id='{streamer_id}' AND v.created_at > '2019-07-01T00:00:00Z' AND timearray IS null;"
        postgres = tp.Postgres()
        logdirs = np.array(postgres.rawselect(query))
        postgres.close()
        # strip for time stamps
        for logdir in logdirs:
            vod_id = logdir[0]
            # check if directory is good
            if not check_logdir(logdir[1]):
                continue
            # if good path
            timearray = []
            with open(logdir[1], "r") as chat:
                for line in chat:
                    timestamp = line[1:].split("]")[0].split(":")
                    minute = int(timestamp[1])
                    second = int(timestamp[2])
                    try:
                        hour = int(timestamp[0])
                        tdelta = int(
                            datetime.timedelta(hours=hour,
                                               minutes=minute,
                                               seconds=second).total_seconds())
                    except:
                        dayhour = timestamp[0].split(" day, ")
                        day = int(dayhour[0])
                        hour = int(dayhour[1])
                        tdelta = int(
                            datetime.timedelta(days=day,
                                               hours=hour,
                                               minutes=minute,
                                               seconds=second).total_seconds())

                    timearray.append(tdelta)
            if len(timearray) == 0:
                timearray = [0]
            query = f"INSERT INTO chatlog (streamer_id, vod_id, timearray) VALUES ('{streamer_id}', '{vod_id}', ARRAY{timearray});"
            postgres = tp.Postgres()
            postgres.rawsql(query)
            postgres.close()
Beispiel #4
0
def timeofweek(features_dict, flag, units):
    streamers = chatlog.get_display_name()
    if flag == "median":
        try:
            file = open("./features/timeofweek_median.pickle", "rb")
            preload = pickle.load(file)
            print("Preloading timeofweek feature")
            for key in preload:
                if key in ["columns"]:
                    features_dict[key].extend(preload[key])
                    continue
                if units == "seconds":
                    features_dict[key].extend(preload[key])
                elif units == "minutes":
                    features_dict[key].extend(
                        list(np.array(preload[key]) / 60.0))
                elif units == "hours":
                    features_dict[key].extend(
                        list(np.array(preload[key]) / 3600.0))
        except:
            raise Exception
            print("Generating timeofweek feature")
            columns = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
            features_dict["columns"].extend(columns)
            for_pickle = make_empty_dict()
            for_pickle["columns"].extend(columns)
            for streamer in streamers:
                try:
                    result = sch.weekly_schedule(streamer, False)
                except:
                    result = {dayofweek: [] for dayofweek in range(7)}
                median = []
                for dayofweek in range(7):
                    median.append(np.median(result[dayofweek]))
                if units == "seconds":
                    features_dict[streamer].extend(median)
                elif units == "minutes":
                    features_dict[streamer].extend(
                        list(np.array(median) / 60.0))
                elif units == "hours":
                    features_dict[streamer].extend(
                        list(np.array(median) / 3600.0))
                for_pickle[streamer].extend(median)
            file = open("./features/timeofweek_median.pickle", "wb")
            pickle.dump(for_pickle, file)
            file.close()
    return features_dict
Beispiel #5
0
def tarray_feature(features_dict, binsize=60):
    # make features out of timearray of chatlogs
    try:
        a = asdf
        file = open(f"./features/excitement.pickle", "rb")
        excitement_dict = pickle.load(file)
        print(f"Preloading excitement feature")
    except:
        excitement_dict = dict()
        excitement_dict["columns"] = ["excitement"]
        streamers = chatlog.get_display_name()
        #for streamer in streamers:
        for streamer in ['Metaphor']:
            # get streamer_id
            query = f"SELECT id FROM streamer WHERE display_name='{streamer}';"
            postgres = tp.Postgres()
            streamer_id = np.array(postgres.rawselect(query))[0, 0]
            postgres.close()
            query = f"SELECT timearray FROM chatlog WHERE streamer_id={streamer_id};"
            postgres = tp.Postgres()
            records = postgres.rawselect(query)
            postgres.close()
            """
            for record in records[:1]:
                binned_chat = np.array(record[0])//binsize
                binned_counter = np.bincount(binned_chat)
                bins = len(binned_counter)
                fig = plt.figure(figsize=(7,4))
                ax = plt.axes([0.15, 0.15, 0.8, 0.8])
                ax.hist(binned_chat, bins=bins)
                plt.show()
            """
            concat_chat = [0]
            for record in records:
                concat_chat.extend(np.array(record[0]) + concat_chat[-1])
            binned_chat = np.array(concat_chat) // binsize
            binned_counter = np.bincount(binned_chat)
            bins = len(binned_counter)
            sort_counter = np.sort(binned_counter)
            median = np.median(sort_counter)
            #print("median:", sort_counter[bins//2])
            #print("middle 90%:", sort_counter[int(bins*0.95)], sort_counter[int(bins*0.05)])
            excite_count = 0
            for event in sort_counter:
                if event > transformed_cut(median):
                    excite_count += 1
            #if sort_counter[bins//2] == 0:
            #    excite_count = 0
            #print("cutoff:", transformed_cut(median))
            #print(excite_count)
            excitements_per_hour = excite_count / (bins * binsize) * 3600.
            excitement_dict[streamer] = [excitements_per_hour]
            # FIGURE
            fig = plt.figure(figsize=(7, 4))
            ax = plt.axes([0.15, 0.15, 0.8, 0.8])
            n, bins, patches = ax.hist(binned_chat, bins=bins, color="#6441A4")
            ax.axhline(transformed_cut(median), color="red")
            ax.set_xlim([0, max(bins)])
            ax.set_ylim([0, 100])
            ax.set_xlabel("Minutes in stream", fontsize=16)
            ax.set_ylabel("Lines of chat / minute", fontsize=16)
            plt.tick_params(axis="both", labelsize=16)
            plt.savefig(f"./features/chathistory.png",
                        dpi=300,
                        transparent=False)
            plt.show()
            # END FIGURE
            #file = open(f"./features/excitement.pickle", "wb")
            #pickle.dump(excitement_dict, file)
            #file.close()
    for key in features_dict:
        features_dict[key].extend(excitement_dict[key])
    if False:  # push into database
        for streamer in excitement_dict:
            if streamer == "columns":
                continue
            postgres = tp.Postgres()
            query = f"SELECT id FROM streamer WHERE display_name='{streamer}';"
            streamer_id = np.array(postgres.rawselect(query))[0, 0]
            postgres.close()
            excite = excitement_dict[streamer][0]
            query = f"INSERT INTO excitement (streamer_id, excite) VALUES ({streamer_id}, {excite});"
            postgres = tp.Postgres()
            postgres.rawsql(query)
            postgres.close()
    return features_dict
Beispiel #6
0
def make_empty_dict():
    streamers = chatlog.get_display_name()
    features_dict = {streamer: [] for streamer in streamers}
    features_dict["columns"] = []
    return features_dict
Beispiel #7
0
def make_empty_df():
    streamers = chatlog.get_display_name()
    df = pd.DataFrame(index=streamers)
    df.index.name = "display_name"
    return df