Beispiel #1
0
    def resolve_entities(self, inplace=True):
        if self.entities is None:
            if self.activity is None:
                self.get_activity()

            self.entities = process.resolve_sender_entities(self.activity)

        to_replace = []

        value = []

        for e, names in self.entities.items():
            for n in names:
                to_replace.append(n)
                value.append(e)

        data = self.data.replace(to_replace=to_replace,
                                 value=value,
                                 inplace=inplace)

        # clear and replace activity with resolved activity
        self.activity = None
        self.get_activity()

        if inplace:
            return self.data
        else:
            return data
Beispiel #2
0
    def resolve_entities(self,inplace=True):
        if self.entities is None:
            if self.activity is None:
                self.get_activity()

            self.entities = process.resolve_sender_entities(self.activity)

        to_replace = []
        value = []

        for e, names in self.entities.items():
            for n in names:
                to_replace.append(n)
                value.append(e)

        data = self.data.replace(to_replace=to_replace,value=value,inplace=inplace)

        # clear and replace activity with resolved activity
        self.activity = None
        self.get_activity()

        if inplace:
            return self.data
        else:
            return data
Beispiel #3
0
def test_email_entity_resolution():
    name = "2001-November.txt"

    arx = archive.Archive(name,archive_dir="tests/data",mbox=True)

    e = process.resolve_sender_entities(arx.get_activity(resolved=False))

    eact = utils.repartition_dataframe(arx.get_activity(),e)

    assert True, "email entity resolution crashed"
Beispiel #4
0
    def test_email_entity_resolution(self):
        name = "2001-November.txt"

        arx = archive.Archive(name, archive_dir="tests/data", mbox=True)

        e = process.resolve_sender_entities(arx.get_activity(resolved=False))

        utils.repartition_dataframe(arx.get_activity(), e)

        self.assertTrue(True, msg="email entity resolution crashed")
Beispiel #5
0
    def get_activity(self, resolved=False):
        """
        Get the activity matrix of an Archive.
        Columns of the returned DataFrame are the Senders of emails.
        Rows are indexed by ordinal date.
        Cells are the number of emails sent by each sender on each data.

        If *resolved* is true, then default entity resolution is run on the
        activity matrix before it is returned.
        """
        if self.activity is None:
            self.activity = self.compute_activity(self)

        if resolved:
            self.entities = process.resolve_sender_entities(self.activity)
            eact = utils.repartition_dataframe(self.activity, self.entities)

            return eact

        return self.activity
Beispiel #6
0
    def get_activity(self,resolved=False):
        """
        Get the activity matrix of an Archive.
        Columns of the returned DataFrame are the Senders of emails.
        Rows are indexed by ordinal date.
        Cells are the number of emails sent by each sender on each data.

        If *resolved* is true, then default entity resolution is run on the
        activity matrix before it is returned.
        """
        if self.activity is None:
            self.activity = self.compute_activity(self)

        if resolved:
            self.entities = process.resolve_sender_entities(self.activity)
            eact = utils.repartition_dataframe(self.activity,self.entities)
            
            return eact

        return self.activity