def retrieve_element(self, element, config): if self.is_aggregate(): og_folder = Path(config["source"]) return Etype.Any(og_folder.name, paths=[x[1] for x in self.results]) else: return Etype.Any(element.id, paths=[element.path])
def post_analyse(self, elements): for el in elements: el_json = el.paths[0] with open(el_json) as f: tweets = json.load(f) initial_tweet = tweets[0] self.logger(f"Adding tweet {initial_tweet['id']} to graph...") self.add_to_graph(initial_tweet) for tweet in tweets[1:]: self.logger(f"Adding reply {tweet['id']} to graph...") self.add_to_graph(tweet, inreplyto=initial_tweet) xlsx_path = TMP / "final.xlsx" self.graph.to_xlsx(xlsx_path) return Etype.Any("FINAL", xlsx_path)
def post_analyse(self, _): # TODO: a kind of hack... should maybe make available as a func, i.e. `self.get_analysed()` analysed_els = self.disk.read_elements([self.dest_q]) for el in analysed_els: el_json = el.paths[0] with open(el_json) as f: tweets = json.load(f) initial_tweet = tweets[0] self.logger(f"Adding tweet {initial_tweet['id']} to graph...") self.add_to_graph(initial_tweet) for tweet in tweets[1:]: self.logger(f"Adding reply {tweet['id']} to graph...") self.add_to_graph(tweet, inreplyto=initial_tweet) xlsx_path = TMP / "final.xlsx" self.graph.to_xlsx(xlsx_path) return Etype.Any("FINAL", xlsx_path)
def test_Any(base): e = Etype.Any(base.id, [base.txt1]) assert len(e.paths) == 1 e = Etype.Any(base.id, [base.txt1, base.md1, base.im3]) assert len(e.paths) == 3