Ejemplo n.º 1
0
    def __init__(self, filename, platform="WhatsApp"):
        self.filename     = filename
        self.platform     = platform
        self.raw_messages = []
        self.messages     = []     # List of Messages objects
        self.features     = ChatFeatures() # Chat Features object
        self.senders      = []
        self.root         = ''

        if platform == "WhatsApp":
            self.open_file = self.open_file_whatsapp
        elif platform == "Facebook":
            self.open_file = self.open_file_facebook_json
Ejemplo n.º 2
0
    def __init__(self, filename, platform="WhatsApp"):
        self.filename     = filename
        self.platform     = platform
        self.raw_messages = []
        self.messages     = []     # List of Messages objects
        self.features     = ChatFeatures() # Chat Features object
        self.senders      = []
        self.root         = ''

        if platform == "WhatsApp":
            self.open_file = self.open_file_whatsapp
        elif platform == "Facebook":
            self.open_file = self.open_file_facebook_json
Ejemplo n.º 3
0
class Chat():

    def __init__(self, filename, platform="WhatsApp"):
        self.filename     = filename
        self.platform     = platform
        self.raw_messages = []
        self.messages     = []     # List of Messages objects
        self.features     = ChatFeatures() # Chat Features object
        self.senders      = []
        self.root         = ''

        if platform == "WhatsApp":
            self.open_file = self.open_file_whatsapp
        elif platform == "Facebook":
            self.open_file = self.open_file_facebook_json

    def open_file_whatsapp(self):
        arq = codecs.open(self.filename, "r", "utf-8-sig")
        content = arq.read()
        arq.close()
        lines = content.split("\n")
        lines = [l for l in lines if len(l) != 1]
        for l in lines:
            self.raw_messages.append(l.encode("utf-8"))

    def open_file_facebook_json(self):
        arq = codecs.open(self.filename, "r", "utf-8-sig")
        content = arq.read()
        arq.close()
        dicts = json.loads(content)
        lines = dicts["data"]
        for l in lines:
            self.raw_messages.append(l)

    def parse_messages(self):
        if self.platform == "WhatsApp":
            p = whatsapp.ParserWhatsapp(self.raw_messages)
            self.senders, self.messages = p.parse()
        elif self.platform == "Facebook":
            p = facebook.ParserFacebook(self.raw_messages)
            self.senders, self.messages = p.parse()

    def set_root(self, root):
        self.root = root

    def get_contact(self):
        return list(set(self.senders).difference(set(self.root)))[0]

    def response_time_and_burst(self, root=None):
        if self.root is None:
            self.root = self.senders[0]
        return self.features.compute_response_time_and_burst(self.messages, self.root, self.senders)

    def messages_per_weekday(self):
        return self.features.compute_messages_per_weekday(self.messages)

    def messages_per_shift(self):
        return self.features.compute_messages_per_shift(self.messages)

    def messages_pattern(self):
        return self.features.compute_messages_pattern(self.messages, self.senders, self.patterns)

    def message_proportions(self):
        return self.features.compute_message_proportions(self.messages, self.senders, self.root, self.get_contact())

    def most_used_words(self):
        return self.features.compute_most_used_words(self.messages, 10, 3)
    
    def all_features(self, **kargs):
        burst_thrs = kargs.get("burst_thrs", 3)
        initiation_thrs = kargs.get("initiation_thrs", 60*60*8)
        response_thrs = kargs.get("response_thrs", 60*60*3)
        pattern_list = kargs.get("pattern_list", [])
        top = kargs.get("top", 10)
        word_length_threshold = kargs.get("word_length_threshold", 3)

        self.features.compute_response_time_and_burst(self.messages, self.root, self.senders, initiation_thrs, burst_thrs, response_thrs)
        self.features.compute_messages_per_weekday(self.messages) 
        self.features.compute_messages_per_shift(self.messages)
        self.features.compute_messages_pattern(self.messages, self.senders, pattern_list)
        self.features.compute_message_proportions(self.messages, self.senders, self.root, self.get_contact())
        self.features.compute_most_used_words(self.messages, top, word_length_threshold)

    def print_features(self):
        print "Root: %s" % (self.senders[0])
        print ""

        print "Average root response time (s): %.2f" % (self.features.compute_avg_root_response_time())
        print "Average contact response time (s): %.2f" % (self.features.compute_avg_contact_response_time())
        print "Ratio: %.2f" % (self.features.compute_response_time_ratio(self.root, self.get_contact()))
        print ""

        # print "Number of root bursts: %d" % (self.features.compute_nbr_root_burst())
        # print "Average burst length: %.2ff" % (self.features.compute_avg_root_burst())
        # print ""

        print "Number of contact bursts: %d" % (self.features.compute_nbr_contact_burst())
        print "Average burst length: %.2ff" % (self.features.compute_avg_contact_burst())
        print "Ratio: %.2f" % (self.features.compute_bursts_ratio(self.root, self.get_contact()))
        print ""

        for s in self.senders:
            if s == self.root:
                print "Root initiations: %d" % (self.features.initiations[s])
            else:
                print "Contact initiations: %d" % (self.features.initiations[s])

        print "Root initiation ratio: %.2f" % (self.features.compute_root_initation_ratio(self.root, self.get_contact()))
        print ""

        print "Proportions:"
        pretty_print(self.features.proportions, self.features.proportions.keys()[0], 1)
        print ""
        print "Weekdays:"
        pretty_print(self.features.weekday, "Weekday", 0)
        print ""
        print "Shifts:"
        pretty_print(self.features.shifts, "Shifts", 0)
        print ""
        print "Patterns:"
        pretty_print(self.features.patterns, "Patterns", 0)
        print ""
        print "Most used words:"
        for muw in self.features.most_used_words:
            try: 
                print muw[0]
            except UnicodeEncodeError:
                self.features.most_used_words.remove(muw)

    def save_features(self, output_name):
        import pprint, os
        output = {}
        output["root"] = self.root
        output["avg_response_time"] = {}
        for s in self.senders:
            if s == self.root:
                output["avg_response_time"][s] = self.features.compute_avg_root_response_time()
            else:
                output["avg_response_time"][s] = self.features.compute_avg_contact_response_time()
        output["avg_response_time"]["ratio"] = self.features.compute_response_time_ratio(self.root, self.get_contact())

        output["nbr_bursts"] = {}
        for s in self.senders:
            if s == self.root:
                output["nbr_bursts"][s] = self.features.compute_nbr_root_burst()
            else:
                output["nbr_bursts"][s] = self.features.compute_nbr_contact_burst()
        output["nbr_bursts"]["ratio"] = self.features.compute_bursts_ratio(self.root, self.get_contact())

        # output["avg_bursts"] = {}
        # for s in self.senders:
        #     if s == self.root:
        #         output["avg_bursts"][s] = self.features.compute_avg_root_burst()
        #     else:
        #         output["avg_bursts"][s] = self.features.compute_avg_contact_burst()


        output["initiations"] = self.features.initiations
        output["initiations"]["root_initiation_ratio"] = self.features.compute_root_initation_ratio(self.root, self.get_contact())
        output["proportions"] = self.features.proportions
        output["weekdays"] = self.features.weekday
        output["shifts"] = self.features.shifts
        output["patterns"] = self.features.patterns
        output["senders"] = self.senders
        output["muw"] = self.features.most_used_words
        output["outcome"] = self.features.generate_outcome(self.root, self.get_contact(), 0) #TODO: make macros for outcome methodology 
        # if fallback to default path, make sure the hardcoded folder `log` is present in the folder
        if output_name == "./logs/basic_stats.json":
            if not os.path.exists('logs'):
                os.makedirs('logs')
        try:
            if output_name.endswith(".json"):
                arq = open(output_name, "w")
            else:
                arq = open(output_name+".json", "w")
            arq.write(json.dumps(output))
            pprint.pprint(output)
            arq.close()
        # In case path (directory) mentioned by the user doesn't exist
        except IOError:
            print "\nI/O Error: Following path doesn't exist:", output_name, "\n"
            exit(1)
Ejemplo n.º 4
0
class Chat():

    def __init__(self, filename, platform="WhatsApp"):
        self.filename     = filename
        self.platform     = platform
        self.raw_messages = []
        self.messages     = []     # List of Messages objects
        self.features     = ChatFeatures() # Chat Features object
        self.senders      = []
        self.root         = ''

        if platform == "WhatsApp":
            self.open_file = self.open_file_whatsapp
        elif platform == "Facebook":
            self.open_file = self.open_file_facebook_json

    def open_file_whatsapp(self):
        arq = codecs.open(self.filename, "r", "utf-8-sig")
        content = arq.read()
        arq.close()
        lines = content.split("\n")
        lines = [l for l in lines if len(l) != 1]
        for l in lines:
            self.raw_messages.append(l.encode("utf-8"))

    def open_file_facebook_json(self):
        arq = codecs.open(self.filename, "r", "utf-8-sig")
        content = arq.read()
        arq.close()
        dicts = json.loads(content)
        lines = dicts["data"]
        for l in lines:
            self.raw_messages.append(l)

    def parse_messages(self):
        if self.platform == "WhatsApp":
            p = whatsapp.ParserWhatsapp(self.raw_messages)
            self.senders, self.messages = p.parse()
        elif self.platform == "Facebook":
            p = facebook.ParserFacebook(self.raw_messages)
            self.senders, self.messages = p.parse()

    def set_root(self, root):
        self.root = root

    def get_contact(self):
        return list(set(self.senders).difference(set(self.root)))[0]

    def response_time_and_burst(self, root=None):
        if self.root is None:
            self.root = self.senders[0]
        return self.features.compute_response_time_and_burst(self.messages, self.root, self.senders)

    def messages_per_weekday(self):
        return self.features.compute_messages_per_weekday(self.messages)

    def messages_per_shift(self):
        return self.features.compute_messages_per_shift(self.messages)

    def messages_pattern(self):
        return self.features.compute_messages_pattern(self.messages, self.senders, self.patterns)

    def message_proportions(self):
        return self.features.compute_message_proportions(self.messages, self.senders, self.root, self.get_contact())

    def most_used_words(self):
        return self.features.compute_most_used_words(self.messages, 10, 3)
    
    def all_features(self, **kargs):
        burst_thrs = kargs.get("burst_thrs", 3)
        initiation_thrs = kargs.get("initiation_thrs", 60*60*8)
        response_thrs = kargs.get("response_thrs", 60*60*3)
        pattern_list = kargs.get("pattern_list", [])
        top = kargs.get("top", 10)
        word_length_threshold = kargs.get("word_length_threshold", 3)

        self.features.compute_response_time_and_burst(self.messages, self.root, self.senders, initiation_thrs, burst_thrs, response_thrs)
        self.features.compute_messages_per_weekday(self.messages)
        self.features.compute_messages_per_shift(self.messages)
        self.features.compute_messages_pattern(self.messages, self.senders, pattern_list)
        self.features.compute_message_proportions(self.messages, self.senders, self.root, self.get_contact())
        self.features.compute_most_used_words(self.messages, top, word_length_threshold)

    def print_features(self):
        print "Root: %s" % (self.senders[0])
        print ""

        print "Average root response time (s): %.2f" % (self.features.compute_avg_root_response_time())
        print "Average contact response time (s): %.2f" % (self.features.compute_avg_contact_response_time())
        print "Ratio: %.2f" % (self.features.compute_response_time_ratio(self.root, self.get_contact()))
        print ""

        # print "Number of root bursts: %d" % (self.features.compute_nbr_root_burst())
        # print "Average burst length: %.2ff" % (self.features.compute_avg_root_burst())
        # print ""

        print "Number of contact bursts: %d" % (self.features.compute_nbr_contact_burst())
        print "Average burst length: %.2ff" % (self.features.compute_avg_contact_burst())
        print "Ratio: %.2f" % (self.features.compute_bursts_ratio(self.root, self.get_contact()))
        print ""

        for s in self.senders:
            if s == self.root:
                print "Root initiations: %d" % (self.features.initiations[s])
            else:
                print "Contact initiations: %d" % (self.features.initiations[s])

        print "Root initiation ratio: %.2f" % (self.features.compute_root_initation_ratio(self.root, self.get_contact()))
        print ""

        print "Proportions:"
        pretty_print(self.features.proportions, self.features.proportions.keys()[0], 1)
        print ""
        print "Weekdays:"
        pretty_print(self.features.weekday, "Weekday", 0)
        print ""
        print "Shifts:"
        pretty_print(self.features.shifts, "Shifts", 0)
        print ""
        print "Patterns:"
        pretty_print(self.features.patterns, "Patterns", 0)
        print ""
        print "Most used words:"
        for muw in self.features.most_used_words:
            try: 
                print muw[0]
            except UnicodeEncodeError:
                self.features.most_used_words.remove(muw)

    def save_features(self, output_name):
        import pprint
        output = {}
        output["root"] = self.root
        output["avg_response_time"] = {}
        for s in self.senders:
            if s == self.root:
                output["avg_response_time"][s] = self.features.compute_avg_root_response_time()
            else:
                output["avg_response_time"][s] = self.features.compute_avg_contact_response_time()
        output["avg_response_time"]["ratio"] = self.features.compute_response_time_ratio(self.root, self.get_contact())

        output["nbr_bursts"] = {}
        for s in self.senders:
            if s == self.root:
                output["nbr_bursts"][s] = self.features.compute_nbr_root_burst()
            else:
                output["nbr_bursts"][s] = self.features.compute_nbr_contact_burst()
        output["nbr_bursts"]["ratio"] = self.features.compute_bursts_ratio(self.root, self.get_contact())

        # output["avg_bursts"] = {}
        # for s in self.senders:
        #     if s == self.root:
        #         output["avg_bursts"][s] = self.features.compute_avg_root_burst()
        #     else:
        #         output["avg_bursts"][s] = self.features.compute_avg_contact_burst()


        output["initiations"] = self.features.initiations
        output["initiations"]["root_initiation_ratio"] = self.features.compute_root_initation_ratio(self.root, self.get_contact())
        output["proportions"] = self.features.proportions
        output["weekdays"] = self.features.weekday
        output["shifts"] = self.features.shifts
        output["patterns"] = self.features.patterns
        output["senders"] = self.senders
        output["muw"] = self.features.most_used_words
        output["outcome"] = self.features.generate_outcome(self.root, self.get_contact(), 0) #TODO: make macros for outcome methodology 
        if output_name.endswith(".json"):
            arq = open(output_name, "w")
        else:
            arq = open(output_name+".json", "w")
        arq.write(json.dumps(output))
        pprint.pprint(output)
        arq.close()