コード例 #1
0
ファイル: core.py プロジェクト: Smoltbob/mtriage
 def retrieve_element(self, element, config):
     if self.is_aggregate():
         og_folder = Path(config["source"])
         return Etype.Any(og_folder.name,
                          paths=[x[1] for x in self.results])
     else:
         return Etype.Any(element.id, paths=[element.path])
コード例 #2
0
    def retrieve_element(self, element, _):
        base = TMP / element.id
        base.mkdir(parents=True, exist_ok=True)
        with open(base / "tweet.json", "w+") as fp:
            json.dump(element.__dict__, fp)

        # retrieve photos
        if "download_photos" in self.config and self.config.download_photos:
            photos = element.photos.split(",")
            if len(photos) < 1 or photos[0] == "":
                self.logger(f"{element.id} downloaded.")
                return Etype.cast(element.id, files(base))

            for url in photos:
                fname = url.rsplit("/", 1)[-1]
                urlretrieve(url, base / fname)

            self.logger(f"{element.id} downloaded (with images).")

        if "download_videos" in self.config and self.config.download_videos:
            if hasattr(element, "video") and element.video != "":
                fname = element.video.rsplit("/", 1)[-1]
                urlretrieve(element.video, base / fname)

        self.disk.delete_local_on_write = True
        return Etype.cast(element.id, files(base))
コード例 #3
0
def rank(elements: List,
         threshold=0.5,
         logger=print,
         element_id="__RANKING") -> Etype:
    ranking_data = {}

    for element in elements:
        jsons = [f for f in element.paths if f.suffix in ".json"]
        if len(jsons) != 1:
            continue

        jsonp = jsons[0]
        with open(jsonp, "r") as jsonf:
            data = json.load(jsonf)

        try:
            # TODO: this logic should be a custom etype built from a core etype class...
            # the core class can then include associated methods.
            labels = data["labels"]
            for label, preds in labels.items():
                frames, scores = preds["frames"], preds["scores"]
                valid_frames = [
                    idx for idx, _ in enumerate(frames)
                    if scores[idx] > threshold
                ]
                rank = len(valid_frames)
                if rank > 4:
                    logger(f"label '{label}': rank {rank}")
                # gather all ranks in `ranking_data`
                if label not in ranking_data:
                    ranking_data[label] = {}
                ranking_data[label][element.id] = rank

            # dpath = WK_DIR / f"{element.id}.json"
            logger(f"Rankings indexed for {element.id}.")
            # return Etype.CvJson(element.id, dpath)
            return None

        except Exception as e:
            logger(f"Could not analyse {element.id}: {e}")

    ranking = {}
    for label, values in ranking_data.items():
        s_vals = sorted(values.items(), key=operator.itemgetter(1))
        s_vals.reverse()
        s_els = [t[0] for t in s_vals]
        ranking[label] = s_els

    path = WK_DIR / "all"
    if not os.path.exists(path):
        os.makedirs(path)
    file = path / "rankings.json"
    logger("All rankings aggregated, printed to all/rankings.json")

    with open(file, "w") as f:
        json.dump(ranking, f)

    return Etype.Json(element_id, file)
コード例 #4
0
def test_Image(base):
    # shouldn't accept one txt
    with pytest.raises(EtypeCastError):
        Etype.Image(base.id, ["/tmp/notafile.txt"])

    # shouldn't accept an image that doesn't exist
    with pytest.raises(EtypeCastError):
        Etype.Image(base.id, ["/tmp/nonexistent_image.png"])

    # shouldn't be okay with 2 valid images
    with pytest.raises(EtypeCastError):
        Etype.Image(base.id, [base.im1, base.im2])

    # works with either single path or list
    im1 = Etype.Image(base.id, base.im1)
    assert len(im1.paths) == 1
    im1 = Etype.Image(base.id, [base.im1])
    assert len(im1.paths) == 1
    im2 = Etype.Image(base.id, base.im2)
    assert len(im1.paths) == 1

    # filters out invalid files
    im1_filtered = Etype.Image(base.id, [base.im1, base.txt1])
    assert len(im1.paths) == 1
    assert im1.paths[0] == base.im1
コード例 #5
0
ファイル: core.py プロジェクト: satishluintel/mtriage
 def post_analyse(self, _):
     ranking = self.data_to_ranking()
     path = WK_DIR / "all"
     if not os.path.exists(path):
         os.makedirs(path)
     file = path / "rankings.json"
     self.logger("All rankings aggregated, printed to all/rankings.json")
     with open(file, "w") as f:
         json.dump(ranking, f)
     return Etype.Json("__RANKING", file)
コード例 #6
0
 def read_elements(self, qs: List[str]) -> List[LocalElement]:
     """Take a list of queries, and returns a flattened list of LocalElements for the specified folders. The order
     of the query is maintained in the return value."""
     els = []
     for q in qs:
         element_pth = self.read_query(q)
         el_paths = subdirs(element_pth)
         # TODO: cast elements properly and throw error if they don't conform
         for el in el_paths:
             lel = Etype.cast(el.name, files(el))
             lel.query = q
             els.append(lel)
     return els
コード例 #7
0
ファイル: core.py プロジェクト: satishluintel/mtriage
 def retrieve_element(self, element, _) -> Union(Etype.Video, Etype.Json):
     with self.ydl:
         try:
             result = self.ydl.extract_info(element.url)
             meta = TMP / element.id / "meta.json"
             with open(meta, "w+") as fp:
                 json.dump(result, fp)
             self.logger(
                 f"{element.id}: video and meta downloaded successfully.")
             self.disk.delete_local_on_write = True
             return Etype.cast(element.id, files(TMP / element.id))
         except youtube_dl.utils.DownloadError:
             raise ElementShouldSkipError(
                 f"Something went wrong downloading {element.id}. It may have been deleted."
             )
コード例 #8
0
    def analyse_element(self, element: Etype.Audio, config) -> Etype.Audio:
        output_ext = config["output_ext"]

        FNULL = open(os.devnull, "w")
        output = f"/tmp/{element.id}.{output_ext}"
        # TODO: error handling
        out = call(
            ["ffmpeg", "-y", "-i", element.paths[0], output],
            stdout=FNULL,
            stderr=STDOUT,
        )
        self.logger(
            f"Converted '{element.id}' from {element.paths[0].suffix} to .{output_ext}"
        )
        return Etype.Audio(element.id, paths=[output])
コード例 #9
0
ファイル: core.py プロジェクト: Smoltbob/mtriage
    def post_analyse(self, elements):
        for el in elements:
            el_json = el.paths[0]
            with open(el_json) as f:
                tweets = json.load(f)

            initial_tweet = tweets[0]
            self.logger(f"Adding tweet {initial_tweet['id']} to graph...")
            self.add_to_graph(initial_tweet)
            for tweet in tweets[1:]:
                self.logger(f"Adding reply {tweet['id']} to graph...")
                self.add_to_graph(tweet, inreplyto=initial_tweet)

        xlsx_path = TMP / "final.xlsx"
        self.graph.to_xlsx(xlsx_path)
        return Etype.Any("FINAL", xlsx_path)
コード例 #10
0
ファイル: core.py プロジェクト: jamesb93/mtriage
    def retrieve_element(self, element, _):
        base = TMP / element.id
        base.mkdir(parents=True, exist_ok=True)

        fn = element.filename
        identifier = element.id
        comment = element.comment
        url = element.url

        with open(base / f"{identifier}_comment.txt", "w+") as f:
            f.write(comment)

        if url != "":
            urlretrieve(url, base / fn)

        return Etype.cast(element.id, files(base))
コード例 #11
0
ファイル: core.py プロジェクト: satishluintel/mtriage
    def post_analyse(self, _):
        # TODO: a kind of hack... should maybe make available as a func, i.e. `self.get_analysed()`
        analysed_els = self.disk.read_elements([self.dest_q])
        for el in analysed_els:
            el_json = el.paths[0]
            with open(el_json) as f:
                tweets = json.load(f)

            initial_tweet = tweets[0]
            self.logger(f"Adding tweet {initial_tweet['id']} to graph...")
            self.add_to_graph(initial_tweet)
            for tweet in tweets[1:]:
                self.logger(f"Adding reply {tweet['id']} to graph...")
                self.add_to_graph(tweet, inreplyto=initial_tweet)

        xlsx_path = TMP / "final.xlsx"
        self.graph.to_xlsx(xlsx_path)
        return Etype.Any("FINAL", xlsx_path)
コード例 #12
0
ファイル: cvjson.py プロジェクト: Smoltbob/mtriage
    def from_preds(element, get_preds):
        imgs = [p for p in element.paths if p.suffix in IMG_SFXS]
        labels = {}
        for imp in imgs:
            frame_no, preds = deduce_frame_no(imp), get_preds(imp)
            for pred_label, pred_conf in preds:
                if pred_label in labels.keys():
                    labels[pred_label]["frames"].append(frame_no)
                    labels[pred_label]["scores"].append(pred_conf)
                else:
                    labels[pred_label] = {"frames": [frame_no], "scores": [pred_conf]}

        meta = [p for p in element.paths if p.suffix in ".json"][0]
        out = {**prepare_json(meta), "labels": labels}
        base = TMP / element.id
        base.mkdir(parents=True, exist_ok=True)
        outp = base / "preds.json"

        with open(outp, "w") as fp:
            json.dump(out, fp)

        return Etype.Json(element.id, outp)
コード例 #13
0
def test_Any(base):
    e = Etype.Any(base.id, [base.txt1])
    assert len(e.paths) == 1
    e = Etype.Any(base.id, [base.txt1, base.md1, base.im3])
    assert len(e.paths) == 3
コード例 #14
0
 def get_in_etype(self):
     return Etype.Union(Etype.Image.array(), Etype.Json)
コード例 #15
0
 def get_in_etype(self):
     return Etype.Union(Etype.Json, Etype.Video)
コード例 #16
0
 def retrieve_element(self, row, config):
     return Etype.cast(row.id, row.path)
コード例 #17
0
 def get_out_etype(self):
     return Etype.Union(Etype.Video, Etype.Json)