Ejemplo n.º 1
0
    def retrieve_element(self, element, _):
        base = TMP / element.id
        base.mkdir(parents=True, exist_ok=True)
        with open(base / "tweet.json", "w+") as fp:
            json.dump(element.__dict__, fp)

        # retrieve photos
        if "download_photos" in self.config and self.config.download_photos:
            photos = element.photos.split(",")
            if len(photos) < 1 or photos[0] == "":
                self.logger(f"{element.id} downloaded.")
                return Etype.cast(element.id, files(base))

            for url in photos:
                fname = url.rsplit("/", 1)[-1]
                urlretrieve(url, base / fname)

            self.logger(f"{element.id} downloaded (with images).")

        if "download_videos" in self.config and self.config.download_videos:
            if hasattr(element, "video") and element.video != "":
                fname = element.video.rsplit("/", 1)[-1]
                urlretrieve(element.video, base / fname)

        self.disk.delete_local_on_write = True
        return Etype.cast(element.id, files(base))
Ejemplo n.º 2
0
 def read_elements(self, qs: List[str]) -> List[LocalElement]:
     """Take a list of queries, and returns a flattened list of LocalElements for the specified folders. The order
     of the query is maintained in the return value."""
     els = []
     for q in qs:
         element_pth = self.read_query(q)
         el_paths = subdirs(element_pth)
         # TODO: cast elements properly and throw error if they don't conform
         for el in el_paths:
             lel = Etype.cast(el.name, files(el))
             lel.query = q
             els.append(lel)
     return els
Ejemplo n.º 3
0
 def retrieve_element(self, element, _) -> Union(Etype.Video, Etype.Json):
     with self.ydl:
         try:
             result = self.ydl.extract_info(element.url)
             meta = TMP / element.id / "meta.json"
             with open(meta, "w+") as fp:
                 json.dump(result, fp)
             self.logger(
                 f"{element.id}: video and meta downloaded successfully.")
             self.disk.delete_local_on_write = True
             return Etype.cast(element.id, files(TMP / element.id))
         except youtube_dl.utils.DownloadError:
             raise ElementShouldSkipError(
                 f"Something went wrong downloading {element.id}. It may have been deleted."
             )
Ejemplo n.º 4
0
    def retrieve_element(self, element, _):
        base = TMP / element.id
        base.mkdir(parents=True, exist_ok=True)

        fn = element.filename
        identifier = element.id
        comment = element.comment
        url = element.url

        with open(base / f"{identifier}_comment.txt", "w+") as f:
            f.write(comment)

        if url != "":
            urlretrieve(url, base / fn)

        return Etype.cast(element.id, files(base))
Ejemplo n.º 5
0
    def analyse_element(self, element, config):
        fps = int(config["fps"]) if "fps" in config else 1
        jsons = [x for x in element.paths if x.suffix in ".json"]
        dest = Path("/tmp") / element.id
        if dest.exists():
            rmtree(dest)
        dest.mkdir()

        if len(jsons) is 1:
            json = jsons[0]
            copyfile(json, dest / "meta.json")

        video = [x for x in element.paths if x.suffix in VID_SUFFIXES][0]
        ffmpeg_frames(dest, video, fps)

        self.logger(f"Frames successfully created for element {element.id}.")
        self.disk.delete_local_on_write = True
        return GLOSSED_FRAMES(element.id, paths=files(dest))
Ejemplo n.º 6
0
def get_custom_etypes():
    base_import = "lib.etypes"
    module_folder = Path("/mtriage/src/lib/etypes")
    all_etypes = [t.stem for t in files(module_folder)]
    imports = [f"{base_import}.{p}" for p in all_etypes]
    return [import_module(mod).etype for mod in imports]