Ejemplo n.º 1
0
def save_annotation(
    image_id: int,
    caption_id: int,
    original_sentence: str,
    translated_sentence: str,
    marker: str,
    folder: Path,
    timestamps: Tuple[datetime, datetime],
):
    obj = Translation(markers=[marker],
                      original_sentence=original_sentence,
                      translated_sentences=[translated_sentence],
                      timestamps=[timestamps])
    translation_file: Path = folder / str(image_id) / f"{caption_id}.json"
    if translation_file.is_file():
        data = Translation(**json.load(str(translation_file), verbose=False))
        data["markers"].append(marker)
        data["translated_sentences"].append(translated_sentence)
        data["timestamps"].append(timestamps)
        obj = data
    translation_file.parent.mkdir(parents=True, exist_ok=True)
    json.dump(obj,
              str(translation_file),
              indent=2,
              ensure_ascii=False,
              overwrite=True,
              verbose=False)
    print(f"Saved img {image_id}, cap {caption_id}.")
Ejemplo n.º 2
0
 def _dump_thread_json(self, thread_data):
     json.dump(
         thread_data,
         str(self.json_path),
         indent=2,
         sort_keys=True,
         ensure_ascii=False,
         overwrite=True,
         verbose=self.verbose,
     )
Ejemplo n.º 3
0
def dump_db(self, file, pretty=False, overwrite=False, verbose=True):
    """
    Dump :class:`mongomock.database.Database` to a local file. Only support
    ``*.json`` or ``*.gz`` (compressed json file)

    :param file: file path.
    :param pretty: bool, toggle on jsonize into pretty format.
    :param overwrite: bool, allow overwrite to existing file.
    :param verbose: bool, toggle on log.
    """
    db_data = _dump(self)
    json.dump(
        db_data,
        file,
        pretty=pretty,
        overwrite=overwrite,
        verbose=verbose,
    )
Ejemplo n.º 4
0
    def setup_class(cls):
        dataset_name = "movie"
        movie_data = [
            dict(
                movie_id=1,
                title="The Shawshank Redemption",
                description=
                "Two imprisoned men bond over a number of years, finding solace and eventual redemption through acts of common decency.",
                genres="Drama"),
            dict(
                movie_id=2,
                title="The Godfather",
                description=
                "The aging patriarch of an organized crime dynasty transfers control of his clandestine empire to his reluctant son.",
                genres="Crime, Drama"),
            dict(
                movie_id=3,
                title="The Godfather: Part II",
                description=
                "The early life and career of Vito Corleone in 1920s New York City is portrayed, while his son, Michael, expands and tightens his grip on the family crime syndicate.",
                genres="Crime, Drama"),
        ]
        movie_setting_data = {
            "columns": [
                {
                    "name": "movie_id",
                    "type_is_store": True,
                },
                {
                    "name": "title",
                    "type_is_ngram": True,
                    "ngram_minsize": 2,
                    "ngram_maxsize": 10,
                },
                {
                    "name": "description",
                    "type_is_phrase": True,
                },
                {
                    "name": "genres",
                    "type_is_keyword": True,
                    "keyword_lowercase": True,
                },
            ],
            "title_field":
            "{title} ({genres})",
            "subtitle_field":
            "description",
            "arg_field":
            "movie_id",
            "autocomplete_field":
            "{title}",
            "icon_field":
            str(Path(ALFRED_FTS, "movie-icon.png").abspath),
        }
        movie_setting = Setting.from_dict(movie_setting_data)
        cls.dataset_name = dataset_name
        cls.data = movie_data
        cls.setting = movie_setting

        dataset = DataSet(name=dataset_name,
                          data=None,
                          setting=Setting(skip_post_init=True))
        data_file_path = dataset.get_data_file_path()
        setting_file_path = dataset.get_setting_file_path()
        index_dir = dataset.get_index_dir_path()
        if index_dir.exists():
            shutil.rmtree(index_dir.abspath)
        json.dump(movie_data,
                  data_file_path.abspath,
                  indent=4,
                  sort_keys=True,
                  ensure_ascii=False,
                  overwrite=True,
                  verbose=False)
        json.dump(movie_setting_data,
                  setting_file_path.abspath,
                  indent=4,
                  sort_keys=True,
                  ensure_ascii=False,
                  overwrite=True,
                  verbose=False)
Ejemplo n.º 5
0
def save_snapshot(folder: Path, json_path: Path, overwrite=False):
    folders = {folder.name: folder for folder in args.folder.rglob("**")}
    files = {file.name: file for file in args.folder.rglob("*") if file.is_file()}
    snapshot = {"folders": folders, "files": files}
    json.dump(snapshot, str(json_path), indent=2, sort_keys=True, overwrite=overwrite)
Ejemplo n.º 6
0
 def set(self, json_path, value):
     set_value(self.data, json_path, value)
     json.dump(self.data, self.secret_file,
               pretty=True, ensure_ascii=False, overwrite=True, verbose=False)
Ejemplo n.º 7
0
# -*- coding: utf-8 -*-

import pandas as pd
from superjson import json
from seedinvest_monitor.model import Startup

json_data = list()
df_data = list()
df_columns = set()

cursor = Startup.scan(attributes_to_get=["id", "details"])
for startup in cursor:
    details_data = startup.details.as_dict()
    json_data.append(details_data)
    for key in details_data:
        df_columns.add(key)
    df_data.append(details_data)

df_columns = list(df_columns)
df_columns.sort()

json.dump(json_data, "startups-data.json", pretty=True, overwrite=True)
df = pd.DataFrame(df_data, columns=df_columns)
df.to_excel("startups-data.xlsx", sheet_name="data", index=False)
Ejemplo n.º 8
0
from pathlib_mate import Path
from pandas_mate import transform
from attrs_mate import AttrsClass
from superjson import json

PWD = Path(__file__).parent
FILE_CLASS_CODE = Path(PWD, "class-code.txt")
FILE_SKILL_TAB_CODE = Path(PWD, "skill-tab-code.txt")
FILE_SKILL_CODE = Path(PWD, "skill-code.txt")

df_class_code = pd.read_csv(FILE_CLASS_CODE.abspath, sep="\t")
df_skill_tab_code = pd.read_csv(FILE_SKILL_TAB_CODE.abspath, sep="\t")
df_skill_code = pd.read_csv(FILE_SKILL_CODE.abspath, sep="\t")


@attr.s
class FTSData(AttrsClass):
    columns = attr.ib()
    searchable = attr.ib()
    data = attr.ib()


columns = list(df_skill_code.columns)
searchable = ["class", "skill_tab", "name_en", "name_cn", "name_abbr"]
data = transform.to_dict_list_generic_type(df_skill_code, int_col=[
    "code",
])

fts_data = FTSData(columns=columns, searchable=searchable, data=data)
json.dump(fts_data.to_dict(), "d2skill.json", pretty=True)