def resolve_entities(self, inplace=True): if self.entities is None: if self.activity is None: self.get_activity() self.entities = process.resolve_sender_entities(self.activity) to_replace = [] value = [] for e, names in self.entities.items(): for n in names: to_replace.append(n) value.append(e) data = self.data.replace(to_replace=to_replace, value=value, inplace=inplace) # clear and replace activity with resolved activity self.activity = None self.get_activity() if inplace: return self.data else: return data
def resolve_entities(self,inplace=True): if self.entities is None: if self.activity is None: self.get_activity() self.entities = process.resolve_sender_entities(self.activity) to_replace = [] value = [] for e, names in self.entities.items(): for n in names: to_replace.append(n) value.append(e) data = self.data.replace(to_replace=to_replace,value=value,inplace=inplace) # clear and replace activity with resolved activity self.activity = None self.get_activity() if inplace: return self.data else: return data
def test_email_entity_resolution(): name = "2001-November.txt" arx = archive.Archive(name,archive_dir="tests/data",mbox=True) e = process.resolve_sender_entities(arx.get_activity(resolved=False)) eact = utils.repartition_dataframe(arx.get_activity(),e) assert True, "email entity resolution crashed"
def test_email_entity_resolution(self): name = "2001-November.txt" arx = archive.Archive(name, archive_dir="tests/data", mbox=True) e = process.resolve_sender_entities(arx.get_activity(resolved=False)) utils.repartition_dataframe(arx.get_activity(), e) self.assertTrue(True, msg="email entity resolution crashed")
def get_activity(self, resolved=False): """ Get the activity matrix of an Archive. Columns of the returned DataFrame are the Senders of emails. Rows are indexed by ordinal date. Cells are the number of emails sent by each sender on each data. If *resolved* is true, then default entity resolution is run on the activity matrix before it is returned. """ if self.activity is None: self.activity = self.compute_activity(self) if resolved: self.entities = process.resolve_sender_entities(self.activity) eact = utils.repartition_dataframe(self.activity, self.entities) return eact return self.activity
def get_activity(self,resolved=False): """ Get the activity matrix of an Archive. Columns of the returned DataFrame are the Senders of emails. Rows are indexed by ordinal date. Cells are the number of emails sent by each sender on each data. If *resolved* is true, then default entity resolution is run on the activity matrix before it is returned. """ if self.activity is None: self.activity = self.compute_activity(self) if resolved: self.entities = process.resolve_sender_entities(self.activity) eact = utils.repartition_dataframe(self.activity,self.entities) return eact return self.activity