def retrieve_element(self, element, _): base = TMP / element.id base.mkdir(parents=True, exist_ok=True) with open(base / "tweet.json", "w+") as fp: json.dump(element.__dict__, fp) # retrieve photos if "download_photos" in self.config and self.config.download_photos: photos = element.photos.split(",") if len(photos) < 1 or photos[0] == "": self.logger(f"{element.id} downloaded.") return Etype.cast(element.id, files(base)) for url in photos: fname = url.rsplit("/", 1)[-1] urlretrieve(url, base / fname) self.logger(f"{element.id} downloaded (with images).") if "download_videos" in self.config and self.config.download_videos: if hasattr(element, "video") and element.video != "": fname = element.video.rsplit("/", 1)[-1] urlretrieve(element.video, base / fname) self.disk.delete_local_on_write = True return Etype.cast(element.id, files(base))
def read_elements(self, qs: List[str]) -> List[LocalElement]: """Take a list of queries, and returns a flattened list of LocalElements for the specified folders. The order of the query is maintained in the return value.""" els = [] for q in qs: element_pth = self.read_query(q) el_paths = subdirs(element_pth) # TODO: cast elements properly and throw error if they don't conform for el in el_paths: lel = Etype.cast(el.name, files(el)) lel.query = q els.append(lel) return els
def retrieve_element(self, element, _) -> Union(Etype.Video, Etype.Json): with self.ydl: try: result = self.ydl.extract_info(element.url) meta = TMP / element.id / "meta.json" with open(meta, "w+") as fp: json.dump(result, fp) self.logger( f"{element.id}: video and meta downloaded successfully.") self.disk.delete_local_on_write = True return Etype.cast(element.id, files(TMP / element.id)) except youtube_dl.utils.DownloadError: raise ElementShouldSkipError( f"Something went wrong downloading {element.id}. It may have been deleted." )
def retrieve_element(self, element, _): base = TMP / element.id base.mkdir(parents=True, exist_ok=True) fn = element.filename identifier = element.id comment = element.comment url = element.url with open(base / f"{identifier}_comment.txt", "w+") as f: f.write(comment) if url != "": urlretrieve(url, base / fn) return Etype.cast(element.id, files(base))
def analyse_element(self, element, config): fps = int(config["fps"]) if "fps" in config else 1 jsons = [x for x in element.paths if x.suffix in ".json"] dest = Path("/tmp") / element.id if dest.exists(): rmtree(dest) dest.mkdir() if len(jsons) is 1: json = jsons[0] copyfile(json, dest / "meta.json") video = [x for x in element.paths if x.suffix in VID_SUFFIXES][0] ffmpeg_frames(dest, video, fps) self.logger(f"Frames successfully created for element {element.id}.") self.disk.delete_local_on_write = True return GLOSSED_FRAMES(element.id, paths=files(dest))
def get_custom_etypes(): base_import = "lib.etypes" module_folder = Path("/mtriage/src/lib/etypes") all_etypes = [t.stem for t in files(module_folder)] imports = [f"{base_import}.{p}" for p in all_etypes] return [import_module(mod).etype for mod in imports]