def retrieve_element(self, element, _): base = TMP / element.id base.mkdir(parents=True, exist_ok=True) with open(base / "tweet.json", "w+") as fp: json.dump(element.__dict__, fp) # retrieve photos if "download_photos" in self.config and self.config.download_photos: photos = element.photos.split(",") if len(photos) < 1 or photos[0] == "": self.logger(f"{element.id} downloaded.") return Etype.cast(element.id, files(base)) for url in photos: fname = url.rsplit("/", 1)[-1] urlretrieve(url, base / fname) self.logger(f"{element.id} downloaded (with images).") if "download_videos" in self.config and self.config.download_videos: if hasattr(element, "video") and element.video != "": fname = element.video.rsplit("/", 1)[-1] urlretrieve(element.video, base / fname) self.disk.delete_local_on_write = True return Etype.cast(element.id, files(base))
def read_elements(self, qs: List[str]) -> List[LocalElement]: """Take a list of queries, and returns a flattened list of LocalElements for the specified folders. The order of the query is maintained in the return value.""" els = [] for q in qs: element_pth = self.read_query(q) el_paths = subdirs(element_pth) # TODO: cast elements properly and throw error if they don't conform for el in el_paths: lel = Etype.cast(el.name, files(el)) lel.query = q els.append(lel) return els
def retrieve_element(self, element, _) -> Union(Etype.Video, Etype.Json): with self.ydl: try: result = self.ydl.extract_info(element.url) meta = TMP / element.id / "meta.json" with open(meta, "w+") as fp: json.dump(result, fp) self.logger( f"{element.id}: video and meta downloaded successfully.") self.disk.delete_local_on_write = True return Etype.cast(element.id, files(TMP / element.id)) except youtube_dl.utils.DownloadError: raise ElementShouldSkipError( f"Something went wrong downloading {element.id}. It may have been deleted." )
def retrieve_element(self, element, _): base = TMP / element.id base.mkdir(parents=True, exist_ok=True) fn = element.filename identifier = element.id comment = element.comment url = element.url with open(base / f"{identifier}_comment.txt", "w+") as f: f.write(comment) if url != "": urlretrieve(url, base / fn) return Etype.cast(element.id, files(base))
def retrieve_element(self, row, config): return Etype.cast(row.id, row.path)