コード例 #1
0
    def resolve_entities(self, inplace=True):
        if self.entities is None:
            if self.activity is None:
                self.get_activity()

            self.entities = process.resolve_sender_entities(self.activity)

        to_replace = []

        value = []

        for e, names in self.entities.items():
            for n in names:
                to_replace.append(n)
                value.append(e)

        data = self.data.replace(to_replace=to_replace,
                                 value=value,
                                 inplace=inplace)

        # clear and replace activity with resolved activity
        self.activity = None
        self.get_activity()

        if inplace:
            return self.data
        else:
            return data
コード例 #2
0
ファイル: archive.py プロジェクト: Aryan-Barbarian/bigbang
    def resolve_entities(self,inplace=True):
        if self.entities is None:
            if self.activity is None:
                self.get_activity()

            self.entities = process.resolve_sender_entities(self.activity)

        to_replace = []
        value = []

        for e, names in self.entities.items():
            for n in names:
                to_replace.append(n)
                value.append(e)

        data = self.data.replace(to_replace=to_replace,value=value,inplace=inplace)

        # clear and replace activity with resolved activity
        self.activity = None
        self.get_activity()

        if inplace:
            return self.data
        else:
            return data
コード例 #3
0
ファイル: bigbang_tests.py プロジェクト: sbenthall/bigbang
def test_email_entity_resolution():
    name = "2001-November.txt"

    arx = archive.Archive(name,archive_dir="tests/data",mbox=True)

    e = process.resolve_sender_entities(arx.get_activity(resolved=False))

    eact = utils.repartition_dataframe(arx.get_activity(),e)

    assert True, "email entity resolution crashed"
コード例 #4
0
    def test_email_entity_resolution(self):
        name = "2001-November.txt"

        arx = archive.Archive(name, archive_dir="tests/data", mbox=True)

        e = process.resolve_sender_entities(arx.get_activity(resolved=False))

        utils.repartition_dataframe(arx.get_activity(), e)

        self.assertTrue(True, msg="email entity resolution crashed")
コード例 #5
0
    def get_activity(self, resolved=False):
        """
        Get the activity matrix of an Archive.
        Columns of the returned DataFrame are the Senders of emails.
        Rows are indexed by ordinal date.
        Cells are the number of emails sent by each sender on each data.

        If *resolved* is true, then default entity resolution is run on the
        activity matrix before it is returned.
        """
        if self.activity is None:
            self.activity = self.compute_activity(self)

        if resolved:
            self.entities = process.resolve_sender_entities(self.activity)
            eact = utils.repartition_dataframe(self.activity, self.entities)

            return eact

        return self.activity
コード例 #6
0
ファイル: archive.py プロジェクト: Aryan-Barbarian/bigbang
    def get_activity(self,resolved=False):
        """
        Get the activity matrix of an Archive.
        Columns of the returned DataFrame are the Senders of emails.
        Rows are indexed by ordinal date.
        Cells are the number of emails sent by each sender on each data.

        If *resolved* is true, then default entity resolution is run on the
        activity matrix before it is returned.
        """
        if self.activity is None:
            self.activity = self.compute_activity(self)

        if resolved:
            self.entities = process.resolve_sender_entities(self.activity)
            eact = utils.repartition_dataframe(self.activity,self.entities)
            
            return eact

        return self.activity