def find_page(self, model, data):
        """
        Find a page by its URL and import its data.

        Data importing has to be done here because often the page can't
        be saved until the data is imported (i.e. null fields)
        """
        try:
            url = PurePosixPath(data.pop('url'))
            if not url.is_absolute():
                raise CommandError("Path %s must be absolute" % url)

        except KeyError:
            raise CommandError("Need `url' for page")

        try:
            page = model.objects.get(url_path=normalise(url))
            self.import_data(page, data)
            page.save()
            self.stdout.write("Updating existing page %s" % url)
        except model.DoesNotExist:
            try:
                # pylint:disable=no-member
                parent = Page.objects.get(url_path=normalise(url.parent))
            except Page.DoesNotExist:
                raise CommandError("Parent of %s doesn't exist" % url)

            page = model(slug=url.name)
            self.import_data(page, data)
            parent.add_child(instance=page)
            self.stdout.write("Creating new page %s" % url)

        return page
Exemple #2
0
    def handle_import(self, name, compilation, rule):
        """Implementation of the core Sass import mechanism, which just looks
        for files on disk.
        """
        # TODO this is all not terribly well-specified by Sass.  at worst,
        # it's unclear how far "upwards" we should be allowed to go.  but i'm
        # also a little fuzzy on e.g. how relative imports work from within a
        # file that's not actually in the search path.
        # TODO i think with the new origin semantics, i've made it possible to
        # import relative to the current file even if the current file isn't
        # anywhere in the search path.  is that right?
        path = PurePosixPath(name)

        search_exts = list(compilation.compiler.dynamic_extensions)
        if path.suffix and path.suffix in search_exts:
            basename = path.stem
        else:
            basename = path.name
        relative_to = path.parent
        search_path = []  # tuple of (origin, start_from)
        search_path.extend(
            (origin, relative_to)
            for origin in compilation.compiler.search_path
        )
        if relative_to.is_absolute():
            relative_to = PurePosixPath(*relative_to.parts[1:])
        elif rule.source_file.origin:
            # Search relative to the current file first, only if not doing an
            # absolute import
            search_path.append((
                rule.source_file.origin,
                rule.source_file.relpath.parent / relative_to,
            ))

        for prefix, suffix in product(('_', ''), search_exts):
            filename = prefix + basename + suffix
            for origin, relative_to in search_path:
                relpath = relative_to / filename
                # Lexically (ignoring symlinks!) eliminate .. from the part
                # of the path that exists within Sass-space.  pathlib
                # deliberately doesn't do this, but os.path does.
                relpath = PurePosixPath(os.path.normpath(str(relpath)))

                if rule.source_file.key == (origin, relpath):
                    # Avoid self-import
                    # TODO is this what ruby does?
                    continue

                path = origin / relpath
                if not path.exists():
                    continue

                # All good!
                # TODO if this file has already been imported, we'll do the
                # source preparation twice.  make it lazy.
                return SourceFile.read(origin, relpath)
def collect_files(template_dir, url_base):
    basepath = PurePosixPath(template_dir)
    baseurl = PurePosixPath(url_base)

    for dirname, _, files in os.walk(template_dir):
        rel_dirname = PurePosixPath(dirname).relative_to(template_dir)
        for filename in files:
            template_path = path.join(dirname, filename)
            url = baseurl.joinpath(rel_dirname, filename)
            with open(template_path, "r", encoding="utf8") as f:
                yield str(url), file_contents(f)
Exemple #4
0
 def fetch_resource(self, resource_name):
     resource_table = self._resource_tables[resource_name]
     for destination, source_name in resource_table.items():
         source_subpath = PurePosixPath(source_name)
         if (
             source_subpath.is_absolute() or
             len(source_subpath.parts) > 1 or '..' in source_subpath.parts
         ):
             raise RuntimeError(source_name)
         source_path = self._resource_dir_path / source_name
         destination_subpath = PurePosixPath(destination)
         if (
             destination_subpath.is_absolute() or
             '..' in destination_subpath.parts
         ):
             raise RuntimeError(destination)
         destination_path: PosixPath = self.root / destination_subpath
         if not destination_path.parent.exists():
             destination_path.parent.mkdir(parents=True)
         if destination_path.is_dir():
             raise IsADirectoryError(str(destination))
         if destination_path.is_symlink():
             destination_path.unlink()
         shutil.copyfile(str(source_path), str(destination_path))
Exemple #5
0
def systemd_escape_path(path: pathlib.PurePosixPath) -> str:
    """Escape a path for inclusion in a systemd unit name.

    See the 'systemd-escape --path' command for details.
    """
    if not path.is_absolute():
        raise ValueError("systemd_escape_path can only escape absolute paths")
    if ".." in path.parts:
        raise ValueError(
            "systemd_escape_path can only escape paths without '..' components"
        )
    stdout: bytes = subprocess.check_output(
        ["systemd-escape", "--path", "--", str(path)]
    )
    return stdout.decode("utf-8").rstrip("\n")
Exemple #6
0
 def as_source_path( self, *, suffix: Optional[str] = None
 ) -> PurePosixPath:
     source_path = PurePosixPath(*self.parts)
     if suffix is not None:
         source_path = source_path.with_suffix(suffix)
     return source_path
 def get_core_file_path(self) -> PurePosixPath:
     '''Return the relative path of the core output file that should be generated.'''
     name = self.get_name()
     return PurePosixPath(f'{name}.json')
Exemple #8
0
 def session_path(self) -> PurePath:
     return PurePosixPath(
         f'{self.storeCfg.root_path}/run/session_logs/{self.trial_id}/{self.user.email}/{self.session_id}'
     )
Exemple #9
0
 def __trial_video(self, catalog: str, video_id: str) -> PurePath:
     return PurePosixPath(
         f'{self.__trial_user_path(catalog)}_{video_id}.json')
Exemple #10
0
 def parse_name(cls, path, name_str):
     if name_str.startswith(':'):
         name_str = name_str[1:]
     return cls(path, PurePosixPath(name_str))
Exemple #11
0
 def results_path_out(self) -> PurePath:
     """path to store the results of trials once complete"""
     return PurePosixPath(f'{self.storeCfg.root_path}/results')
def test_paths_of_a_different_flavour():
    with pytest.raises(TypeError):
        PureS3Path('/bucket/key') < PurePosixPath('/bucket/key')

    with pytest.raises(TypeError):
        PureWindowsPath('/bucket/key') > PureS3Path('/bucket/key')
Exemple #13
0
 def test(self):
     # ARRANGE #
     cases = [
         CaseWithFiles(
             'single file name',
             args.FilesCondition([
                 args.FileCondition('file-name'),
             ]), SymbolsArrEx.empty(),
             {PurePosixPath('file-name'): asrt.is_none}),
         CaseWithFiles(
             'two file names',
             args.FilesCondition([
                 args.FileCondition('fn1'),
                 args.FileCondition('fn2'),
             ]), SymbolsArrEx.empty(), {
                 PurePosixPath('fn1'): asrt.is_none,
                 PurePosixPath('fn2'): asrt.is_none,
             }),
         CaseWithFiles(
             'two files with the same names',
             args.FilesCondition([
                 args.FileCondition('fn'),
                 args.FileCondition('fn'),
             ]), SymbolsArrEx.empty(), {
                 PurePosixPath('fn'): asrt.is_none,
             }),
         CaseWithFiles(
             'some unique files, some repeated',
             args.FilesCondition([
                 args.FileCondition('fn1'),
                 args.FileCondition('fn2'),
                 args.FileCondition('fn1'),
             ]), SymbolsArrEx.empty(), {
                 PurePosixPath('fn1'): asrt.is_none,
                 PurePosixPath('fn2'): asrt.is_none,
             }),
         CaseWithFiles(
             'different symbols with identical value',
             args.FilesCondition([
                 args.FileCondition(SymbolWithReferenceSyntax('sym_ref1')),
                 args.FileCondition(SymbolWithReferenceSyntax('sym_ref2')),
             ]),
             SymbolsArrEx([
                 StringConstantSymbolContext('sym_ref1', 'fn'),
                 StringConstantSymbolContext('sym_ref2', 'fn'),
             ], [
                 is_sym_ref_to_string__w_all_indirect_refs_are_strings(
                     'sym_ref1'),
                 is_sym_ref_to_string__w_all_indirect_refs_are_strings(
                     'sym_ref2'),
             ]), {
                 PurePosixPath('fn'): asrt.is_none,
             }),
     ]
     for case in cases:
         with self.subTest(case.name):
             # ACT & ASSERT #
             CHECKER.check__w_source_variants(
                 self, case.source.as_arguments, None,
                 arrangement_wo_tcds(case.symbols.symbol_table),
                 Expectation(
                     ParseExpectation(symbol_references=case.symbols.
                                      expected_references_assertion),
                     ExecutionExpectation(),
                     prim_asrt__constant(
                         asrt_primitive.files_matches(case.expected))))
Exemple #14
0
 def _exists(self) -> bool:
     load_path = self._get_load_path()
     return self._s3.isfile(str(PurePosixPath(load_path)))
def get_series():
    test_series = [
        # Int Series
        pd.Series([1, 2, 3], name="int_series"),
        pd.Series(range(10), name="int_range"),
        pd.Series([1, 2, 3], name="Int64_int_series", dtype="Int64"),
        pd.Series([1, 2, 3, np.nan], name="Int64_int_nan_series", dtype="Int64"),
        pd.Series([1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0], name="int_series_boolean"),
        # Count
        pd.Series(np.array([1, 2, 3, 4], dtype=np.uint32), name="np_uint32"),
        pd.Series([1, 2, 3, 4], dtype="UInt32", name="pd_uint32"),
        # Categorical
        pd.Series([1, 2, 3], name="categorical_int_series", dtype="category"),
        pd.Series(
            pd.Categorical(
                ["A", "B", "C", "C", "B", "A"],
                categories=["A", "B", "C"],
                ordered=False,
            ),
            name="categorical_char",
        ),
        pd.Series([1.0, 2.0, 3.1], dtype="category", name="categorical_float_series"),
        pd.Series(
            ["Georgia", "Sam"], dtype="category", name="categorical_string_series"
        ),
        pd.Series(
            [complex(0, 0), complex(1, 2), complex(3, -1)],
            name="categorical_complex_series",
            dtype="category",
        ),
        # Ordinal
        pd.Series(
            pd.Categorical(
                ["A", "B", "C", "C", "B", "A"], categories=["A", "B", "C"], ordered=True
            ),
            name="ordinal",
        ),
        # Float Series
        pd.Series([1.0, 2.1, 3.0], name="float_series"),
        pd.Series([1.0, 2.5, np.nan], name="float_nan_series"),
        pd.Series([1.0, 2.0, 3.0, 4.0], name="float_series2"),
        pd.Series(np.array([1.2, 2, 3, 4], dtype=np.float64), name="float_series3"),
        pd.Series([1, 2, 3.05, 4], dtype=np.float64, name="float_series4"),
        pd.Series([np.nan, 1.2], name="float_series5"),
        pd.Series([np.nan, 1.1], dtype=np.single, name="float_series6"),
        pd.Series([np.inf, np.NINF, np.PINF, 1000000.0, 5.5], name="float_with_inf"),
        pd.Series([np.inf, np.NINF, np.Infinity, np.PINF], name="inf_series"),
        pd.Series([1, 2, np.nan], name="int_nan_series"),
        # Nan Series
        pd.Series([np.nan], name="nan_series"),
        pd.Series([np.nan, np.nan, np.nan, np.nan], name="nan_series_2"),
        # String Series
        pd.Series(["Patty", "Valentine"], name="string_series"),
        pd.Series(["mack", "the", "finger"], name="string_unicode_series"),
        pd.Series(
            np.array(["upper", "hall"], dtype=np.unicode_),
            name="string_np_unicode_series",
        ),
        pd.Series(["1.0", "2.0", np.nan], name="string_num_nan"),
        pd.Series(["1,000.0", "2.1", np.nan], name="string_with_sep_num_nan"),
        pd.Series(["1.0", "2.0", "3.0"], name="string_num"),
        pd.Series(["1.0", "45.67", np.nan], name="string_flt_nan"),
        pd.Series(["1.0", "45.67", "3.5"], name="string_flt"),
        pd.Series(
            [
                "I was only robbing the register,",
                "I hope you understand",
                "One of us had better call up the cops",
                "In the hot New Jersey night",
                np.nan,
            ],
            name="string_str_nan",
        ),
        pd.Series(["True", "False", None], name="string_bool_nan"),
        pd.Series(range(20), name="int_str_range").astype("str"),
        pd.Series(
            [
                "http://www.cwi.nl:80/%7Eguido/Python.html",
                "https://github.com/dylan-profiling/hurricane",
            ],
            name="str_url",
        ),
        pd.Series(
            [r"C:\\home\\user\\file.txt", r"C:\\home\\user\\test2.txt"],
            name="path_series_windows_str",
        ),
        pd.Series(
            [r"/home/user/file.txt", r"/home/user/test2.txt"],
            name="path_series_linux_str",
        ),
        pd.Series(["0011", "12"], name="str_int_leading_zeros"),
        pd.Series(["0.0", "0.04", "0"], name="str_float_non_leading_zeros"),
        pd.Series(["0.0", "0.000", "0", "2"], name="str_int_zeros"),
        # Bool Series
        pd.Series([True, False], name="bool_series"),
        pd.Series([True, False, None], name="bool_nan_series"),
        pd.Series([True, False, None], name="nullable_bool_series", dtype=btype),
        pd.Series([True, False, False, True], name="bool_series2", dtype=bool),
        pd.Series([True, False, False, True], name="bool_series2", dtype=bool),
        pd.Series(np.array([1, 0, 0, 1], dtype=bool), name="bool_series3"),
        # Complex Series
        pd.Series(
            [complex(0, 0), complex(1, 2), complex(3, -1)],
            name="complex_series",
        ),
        pd.Series(
            [
                complex(0, 0),
                complex(1, 2),
                complex(3, -1),
                complex(np.nan, np.nan),
            ],
            name="complex_series_nan",
        ),
        pd.Series(["(1+1j)", "(2+2j)", "(10+100j)"], name="str_complex"),
        pd.Series(["(1+1j)", "(2+2j)", "(10+100j)", "NaN"], name="str_complex_nan"),
        pd.Series(
            [complex(0, 0), complex(1, 2), complex(3, -1), np.nan],
            name="complex_series_nan_2",
        ),
        pd.Series(
            [complex(0, 0), complex(1, 2), complex(3, -1), np.nan],
            name="complex_series_py_nan",
        ),
        pd.Series(
            [complex(0, 0), complex(1, 2), complex(3, -1)], name="complex_series_py"
        ),
        pd.Series(
            [
                complex(0, 0),
                complex(1, 0),
                complex(3, 0),
                complex(-1, 0),
            ],
            name="complex_series_float",
        ),
        # Datetime Series
        pd.Series(["1937-05-06", "20/4/2014"], name="string_date"),
        pd.Series(["1941-05-24", "13/10/2016"], name="timestamp_string_series"),
        pd.to_datetime(
            pd.Series(
                [datetime.datetime(2017, 3, 5, 12, 2), datetime.datetime(2019, 12, 4)],
                name="timestamp_series",
            )
        ),
        pd.to_datetime(
            pd.Series(
                [
                    datetime.datetime(2017, 3, 5),
                    datetime.datetime(2019, 12, 4, 3, 2, 0),
                    pd.NaT,
                ],
                name="timestamp_series_nat",
            )
        ),
        pd.to_datetime(
            pd.Series(
                [datetime.datetime(2017, 3, 5), datetime.datetime(2019, 12, 4), pd.NaT],
                name="date_series_nat",
            )
        ),
        pd.Series(
            pd.date_range(
                start="2013-05-18 12:00:01",
                periods=2,
                freq="H",
                tz="Europe/Brussels",
                name="timestamp_aware_series",
            )
        ),
        pd.to_datetime(
            pd.Series(
                [
                    datetime.date(2011, 1, 1),
                    datetime.date(2012, 1, 2),
                    datetime.date(2013, 1, 1),
                ],
                name="datetime",
            )
        ),
        # Date series
        pd.Series(
            [
                datetime.date(2011, 1, 1),
                datetime.date(2012, 1, 2),
                datetime.date(2013, 1, 1),
            ],
            name="date",
        ),
        # Time series
        pd.Series(
            [
                datetime.time(8, 43, 12),
                datetime.time(9, 43, 12),
                datetime.time(10, 43, 12),
            ],
            name="time",
        ),
        # http://pandas-docs.github.io/pandas-docs-travis/user_guide/timeseries.html#timestamp-limitations
        # pd.to_datetime(
        #     pd.Series(
        #         [
        #             datetime.datetime(year=1, month=1, day=1, hour=8, minute=43, second=12),
        #             datetime.datetime(year=1, month=1, day=1, hour=9, minute=43, second=12),
        #             datetime.datetime(
        #                 year=1, month=1, day=1, hour=10, minute=43, second=12
        #             ),
        #         ],
        #         name="datetime_to_time",
        #     )
        # ),
        # Timedelta Series
        pd.Series([pd.Timedelta(days=i) for i in range(3)], name="timedelta_series"),
        pd.Series(
            [pd.Timedelta(days=i) for i in range(3)] + [pd.NaT],
            name="timedelta_series_nat",
        ),
        pd.Series(
            [
                pd.Timedelta("1 days 00:03:43"),
                pd.Timedelta("5 days 12:33:57"),
                pd.Timedelta("0 days 01:25:07"),
                pd.Timedelta("-2 days 13:46:56"),
                pd.Timedelta("1 days 23:49:25"),
            ],
            name="timedelta_negative",
        ),
        # Path Series
        pd.Series(
            [
                PurePosixPath("/home/user/file.txt"),
                PurePosixPath("/home/user/test2.txt"),
            ],
            name="path_series_linux",
        ),
        pd.Series(
            [
                PurePosixPath("/home/user/file.txt"),
                PurePosixPath("/home/user/test2.txt"),
                None,
            ],
            name="path_series_linux_missing",
        ),
        pd.Series(
            [
                PureWindowsPath("C:\\home\\user\\file.txt"),
                PureWindowsPath("C:\\home\\user\\test2.txt"),
            ],
            name="path_series_windows",
        ),
        # Url Series
        pd.Series(
            [
                urlparse("http://www.cwi.nl:80/%7Eguido/Python.html"),
                urlparse("https://github.com/dylan-profiling/hurricane"),
            ],
            name="url_series",
        ),
        pd.Series(
            [
                urlparse("http://www.cwi.nl:80/%7Eguido/Python.html"),
                urlparse("https://github.com/dylan-profiling/hurricane"),
                np.nan,
            ],
            name="url_nan_series",
        ),
        pd.Series(
            [
                urlparse("http://www.cwi.nl:80/%7Eguido/Python.html"),
                urlparse("https://github.com/dylan-profiling/hurricane"),
                None,
            ],
            name="url_none_series",
        ),
        # UUID Series
        pd.Series(
            [
                uuid.UUID("0b8a22ca-80ad-4df5-85ac-fa49c44b7ede"),
                uuid.UUID("aaa381d6-8442-4f63-88c8-7c900e9a23c6"),
                uuid.UUID("00000000-0000-0000-0000-000000000000"),
            ],
            name="uuid_series",
        ),
        pd.Series(
            [
                uuid.UUID("0b8a22ca-80ad-4df5-85ac-fa49c44b7ede"),
                uuid.UUID("aaa381d6-8442-4f63-88c8-7c900e9a23c6"),
                uuid.UUID("00000000-0000-0000-0000-000000000000"),
                None,
            ],
            name="uuid_series_missing",
        ),
        pd.Series(
            [
                "0b8a22ca-80ad-4df5-85ac-fa49c44b7ede",
                "aaa381d6-8442-4f63-88c8-7c900e9a23c6",
                "00000000-0000-0000-0000-000000000000",
            ],
            name="uuid_series_str",
        ),
        # Object Series
        pd.Series([[1, ""], [2, "Rubin"], [3, "Carter"]], name="mixed_list[str,int]"),
        pd.Series(
            [{"why": "did you"}, {"bring him": "in for he"}, {"aint": "the guy"}],
            name="mixed_dict",
        ),
        pd.Series(
            [pd.to_datetime, pd.to_timedelta, pd.read_json, pd.to_pickle],
            name="callable",
        ),
        pd.Series([pd, np], name="module"),
        pd.Series(["1.1", "2"], name="textual_float"),
        pd.Series(["1.1", "2", "NAN"], name="textual_float_nan"),
        # Object (Mixed, https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.api.types.infer_dtype.html)
        pd.Series(["a", 1], name="mixed_integer"),
        pd.Series([True, False, np.nan], name="mixed"),
        pd.Series([[True], [False], [False]], name="mixed_list"),
        pd.Series([[1, ""], [2, "Rubin"], [3, "Carter"]], name="mixed_list[str,int]"),
        pd.Series(
            [{"why": "did you"}, {"bring him": "in for he"}, {"aint": "the guy"}],
            name="mixed_dict",
        ),
        # IP
        pd.Series([IPv4Address("127.0.0.1"), IPv4Address("127.0.0.1")], name="ip"),
        pd.Series(["127.0.0.1", "127.0.0.1"], name="ip_str"),
        # Empty
        pd.Series([], name="empty", dtype=np.float64),
        pd.Series([], name="empty_float", dtype=float),
        pd.Series([], name="empty_int64", dtype="Int64"),
        pd.Series([], name="empty_object", dtype="object"),
        pd.Series([], name="empty_bool", dtype=bool),
        # IP
        pd.Series([IPv4Address("127.0.0.1"), IPv4Address("127.0.0.1")], name="ip"),
        pd.Series(
            [IPv4Address("127.0.0.1"), None, IPv4Address("127.0.0.1")],
            name="ip_missing",
        ),
        pd.Series(
            [IPv6Address("0:0:0:0:0:0:0:1"), IPv4Address("127.0.0.1")],
            name="ip_mixed_v4andv6",
        ),
        pd.Series(["127.0.0.1", "127.0.0.1"], name="ip_str"),
        # File
        pd.Series(
            [
                pathlib.Path(os.path.join(base_path, "series.py")).absolute(),
                pathlib.Path(os.path.join(base_path, "__init__.py")).absolute(),
                pathlib.Path(os.path.join(base_path, "utils.py")).absolute(),
            ],
            name="file_test_py",
        ),
        pd.Series(
            [
                pathlib.Path(os.path.join(base_path, "..", "py.typed")).absolute(),
                pathlib.Path(
                    os.path.join(
                        base_path, "..", "visualisation", "circular_packing.html"
                    )
                ).absolute(),
                pathlib.Path(os.path.join(base_path, "series.py")).absolute(),
            ],
            name="file_mixed_ext",
        ),
        pd.Series(
            [
                pathlib.Path(os.path.join(base_path, "series.py")).absolute(),
                None,
                pathlib.Path(os.path.join(base_path, "__init__.py")).absolute(),
                None,
                pathlib.Path(os.path.join(base_path, "utils.py")).absolute(),
            ],
            name="file_test_py_missing",
        ),
        # Image
        pd.Series(
            [
                pathlib.Path(
                    os.path.join(
                        base_path,
                        "../visualisation/typesets/typeset_complete.png",
                    )
                ).absolute(),
                pathlib.Path(
                    os.path.join(
                        base_path,
                        r"../visualisation/typesets/typeset_standard.png",
                    )
                ).absolute(),
                pathlib.Path(
                    os.path.join(
                        base_path,
                        r"../visualisation/typesets/typeset_geometry.png",
                    )
                ).absolute(),
            ],
            name="image_png",
        ),
        pd.Series(
            [
                pathlib.Path(
                    os.path.join(
                        base_path,
                        r"../visualisation/typesets/typeset_complete.png",
                    )
                ).absolute(),
                pathlib.Path(
                    os.path.join(
                        base_path,
                        r"../visualisation/typesets/typeset_standard.png",
                    )
                ).absolute(),
                None,
                pathlib.Path(
                    os.path.join(
                        base_path,
                        r"../visualisation/typesets/typeset_geometry.png",
                    )
                ).absolute(),
                None,
            ],
            name="image_png_missing",
        ),
        # Email
        pd.Series(
            [FQDA("test", "example.com"), FQDA("info", "example.eu")],
            name="email_address",
        ),
        pd.Series(
            [FQDA("test", "example.com"), FQDA("info", "example.eu"), None],
            name="email_address_missing",
        ),
        pd.Series(["*****@*****.**", "*****@*****.**"], name="email_address_str"),
    ]

    if int(pd.__version__.split(".")[0]) >= 1:
        pandas_1_series = [
            pd.Series(
                ["Patty", "Valentine"], dtype="string", name="string_dtype_series"
            )
        ]
        test_series.extend(pandas_1_series)

    return test_series
Exemple #16
0
    def _load(self) -> pd.DataFrame:
        load_path = PurePosixPath(self._get_load_path())

        with self._s3.open(str(load_path), mode="rb") as s3_file:
            return pd.read_parquet(s3_file, **self._load_args)
Exemple #17
0
def upload(
    paths,
    girder_collection,
    girder_top_folder,
    local_top_path,
    girder_instance,
    existing,
    validation_,
    fake_data,
    develop_debug,
):
    # Ensure that we have all Folders created as well
    assert local_top_path, "--local-top-path must be specified for now"
    assert girder_collection, "--collection must be specified"

    if not girder_top_folder:
        # TODO: UI
        #  Most often it would be the same directory name as of the local top dir
        girder_top_folder = op.basename(local_top_path)
        if girder_top_folder in (op.pardir, op.curdir):
            girder_top_folder = op.basename(op.realpath(local_top_path))

    import multiprocessing
    from .. import girder
    from ..pynwb_utils import get_metadata
    from ..pynwb_utils import validate as pynwb_validate
    from ..pynwb_utils import ignore_benign_pynwb_warnings
    from ..support.generatorify import generator_from_callback
    from ..support.pyout import naturalsize
    from pathlib import Path, PurePosixPath

    ignore_benign_pynwb_warnings()  # so validate doesn't whine

    client = girder.authenticate(girder_instance)

    collection_rec = girder.ensure_collection(client, girder_collection)
    lgr.debug("Working with collection %s", collection_rec)

    local_top_path = Path(local_top_path)
    girder_top_folder = PurePosixPath(girder_top_folder)

    # We will keep a shared set of "being processed" paths so
    # we could limit the number of them until
    #   https://github.com/pyout/pyout/issues/87
    # properly addressed
    process_paths = set()
    uploaded_paths = {}  # path: uploaded size

    def skip_file(msg):
        return {"status": "skipped", "message": msg}

    lock = multiprocessing.Lock()

    # TODO: we might want to always yield a full record so no field is not
    # provided to pyout to cause it to halt
    def process_path(path, relpath):
        try:
            try:
                stat = os.stat(path)
                yield {"size": stat.st_size}
            except FileNotFoundError:
                yield skip_file("ERROR: File not found")
                return
            except Exception as exc:
                # without limiting [:50] it might cause some pyout indigestion
                yield skip_file("ERROR: %s" % str(exc)[:50])
                return

            yield {"status": "checking girder"}

            girder_folder = girder_top_folder / relpath.parent

            while True:
                try:
                    lock.acquire(timeout=60)
                    # TODO: we need to make this all thread safe all the way
                    #       until uploading the file since multiple threads would
                    #       create multiple
                    folder_rec = girder.ensure_folder(
                        client, collection_rec, girder_collection, girder_folder
                    )

                    # Get (if already exists) or create an item
                    item_rec = client.createItem(
                        folder_rec["_id"], name=relpath.name, reuseExisting=True
                    )
                finally:
                    lock.release()

                file_recs = list(client.listFile(item_rec["_id"]))
                if len(file_recs) > 1:
                    raise NotImplementedError(
                        f"Item {item_rec} contains multiple files: {file_recs}"
                    )
                elif file_recs:  # there is a file already
                    if existing == "skip":
                        yield skip_file("exists already")
                        return
                    elif existing == "reupload":
                        yield {
                            "message": "exists - reuploading",
                            "status": "deleting old item",
                        }
                        # TODO: delete an item here
                        raise NotImplementedError("yarik did not find deleteItem API")
                        continue
                    else:
                        raise ValueError(existing)
                break  # no need to loop

            # we need to delete it first??? I do not see a method TODO
            if validation_ != "skip":
                yield {"status": "validating"}
                validation_errors = pynwb_validate(path)
                yield {"errors": len(validation_errors)}
                # TODO: split for dandi, pynwb errors
                if validation_errors:
                    if validation_ == "require":
                        yield skip_file("failed validation")
                        return
            else:
                # yielding empty causes pyout to get stuck or crash
                # https://github.com/pyout/pyout/issues/91
                # yield {"errors": '',}
                pass

            # Extract metadata before actual upload and skip if fails
            # TODO: allow for for non-nwb files to skip this step
            yield {"status": "extracting metadata"}
            try:
                metadata = get_metadata(path)
            except Exception as exc:
                yield skip_file("failed to extract metadata: %s" % str(exc))
                return

            yield {"status": "uploading"}
            # Upload file to an item
            # XXX TODO progress reporting back to pyout is actually tricky
            #     if possible to implement via callback since
            #     callback would need to yield somehow from the context here.
            #     yoh doesn't see how that could be done yet. In the worst
            #     case we would copy uploadFileToItem and _uploadContents
            #     and make them into generators to relay progress instead of
            #     via callback
            # https://stackoverflow.com/questions/9968592/turn-functions-with-a-callback-into-python-generators
            # has some solutions but all IMHO are abit too complex

            for r in generator_from_callback(
                lambda c: client.uploadFileToItem(
                    item_rec["_id"], path, progressCallback=c
                )
            ):
                uploaded_paths[str(path)] = r["current"]
                yield {
                    "upload": 100.0
                    * ((r["current"] / r["total"]) if r["total"] else 1.0)
                }

            # Provide metadata for the item from the file, could be done via
            #  a callback to be triggered upon successfull upload, or we could
            #  just do it "manually"
            metadata_ = {}
            for k, v in metadata.items():
                if v in ("", None):
                    continue  # degenerate, why bother
                # XXX TODO: remove this -- it is only temporary, search should handle
                if isinstance(v, str):
                    metadata_[k] = v.lower()
                elif isinstance(v, datetime.datetime):
                    metadata_[k] = str(v)
            # we will add some fields which would help us with deciding to
            # reupload or not
            metadata_["uploaded_size"] = os.stat(str(path)).st_size
            metadata_["uploaded_mtime"] = os.stat(str(path)).st_mtime

            yield {"status": "uploading metadata"}
            client.addMetadataToItem(item_rec["_id"], metadata_)

            yield {"status": "done"}

        except Exception as exc:
            if develop_debug:
                raise
            yield {"status": "ERROR", "message": str(exc)}
        finally:
            process_paths.remove(str(path))

    # We will again use pyout to provide a neat table summarizing our progress
    # with upload etc
    import pyout
    from ..support import pyout as pyouts

    # for the upload speeds we need to provide a custom  aggregate
    t0 = time.time()

    def upload_agg(*ignored):
        dt = time.time() - t0
        total = sum(uploaded_paths.values())
        if not total:
            return ""
        speed = total / dt if dt else 0
        return "%s/s" % naturalsize(speed)

    pyout_style = pyouts.get_style(hide_if_missing=False)
    pyout_style["upload"]["aggregate"] = upload_agg

    rec_fields = ("path", "size", "errors", "upload", "status", "message")
    out = pyout.Tabular(style=pyout_style, columns=rec_fields)

    with out:
        for path in paths:
            while len(process_paths) >= 10:
                lgr.log(2, "Sleep waiting for some paths to finish processing")
                time.sleep(0.5)
            process_paths.add(path)

            rec = {"path": path}
            path = Path(path)
            try:
                relpath = path.relative_to(local_top_path)
                rec["path"] = str(relpath)
                if develop_debug:
                    # DEBUG: do serially
                    for v in process_path(path, relpath):
                        print(v)
                else:
                    rec[rec_fields[1:]] = process_path(path, relpath)
            except ValueError as exc:
                # typically if local_top_path is not the top path for the path
                rec["status"] = skip_file(exc)
            out(rec)
Exemple #18
0
def read_andotp_accounts(data_root):
    # Parse the preferences file to determine what kind of backups we can have AndOTP generate and where they will reside
    try:
        handle = adb_read_file(
            data_root /
            'org.shadowice.flocke.andotp/shared_prefs/org.shadowice.flocke.andotp_preferences.xml'
        )
    except FileNotFoundError:
        return

    preferences = ElementTree.parse(handle)

    try:
        backup_path = PurePosixPath(
            preferences.find('.//string[@name="pref_backup_directory"]').text)
    except AttributeError:
        backup_path = PurePosixPath('$EXTERNAL_STORAGE/andOTP')

    try:
        allowed_backup_broadcasts = [
            s.text for s in preferences.findall(
                './/set[@name="pref_backup_broadcasts"]/string')
        ]
    except AttributeError:
        allowed_backup_broadcasts = []

    try:
        initial_backup_files = set(adb_list_dir(backup_path))
    except FileNotFoundError:
        initial_backup_files = set()

    if 'encrypted' in allowed_backup_broadcasts:
        try:
            from Crypto.Cipher import AES
        except:
            logger.error(
                'Reading encrypted AndOTP backups requires PyCryptodome')
            return

        adb_fast_run(
            'am broadcast -a org.shadowice.flocke.andotp.broadcast.ENCRYPTED_BACKUP org.shadowice.flocke.andotp',
            prefix=b'am: ')
    elif 'plain' in allowed_backup_broadcasts:
        if not input(
                'Encrypted AndOTP backups are disabled. Are you sure you want to create a plaintext backup (y/N)? '
        ).lower().startswith('y'):
            logger.debug('Aborted AndOTP plaintext backup')
            return

        adb_fast_run(
            'am broadcast -a org.shadowice.flocke.andotp.broadcast.PLAIN_TEXT_BACKUP org.shadowice.flocke.andotp',
            prefix=b'am: ')
    else:
        logger.error(
            'No AndOTP backup broadcasts are setup. Please enable at least encrypted backups in the AndOTP settings.'
        )
        return

    backup_data = None
    backup_file = None

    # Find all newly-created backup files
    for i in range(10):
        try:
            time.sleep(0.1)
            new_backups = list(
                set(adb_list_dir(backup_path)) - initial_backup_files)

            if not new_backups:
                continue

            backup_file = new_backups[0]
            backup_data = adb_read_file(backup_file)
            break
        except FileNotFoundError:
            logger.warning(
                'Did not find any new backup files in %s (attempt %d)',
                backup_path, i + 1)
    else:
        logger.error(
            'Could not read the AndOTP backup file. Do you have a backup password set?'
        )
        return

    if 'encrypted' in allowed_backup_broadcasts:
        backup_password = getpass.getpass('Enter the AndOTP backup password: '******'utf-8')).digest()

        cipher = AES.new(key, AES.MODE_GCM, nonce=nonce)

        try:
            accounts_json = cipher.decrypt(ciphertext)
            cipher.verify(tag)
        except ValueError:
            logger.error(
                'Could not decrypt the AndOTP backup. Is your password correct?'
            )
            return
    else:
        accounts_json = backup_data.read()

        if backup_file.suffix == '.json':
            if not input('Do you want to delete the plaintext backup (y/N)? '
                         ).lower().startswith('y'):
                adb_fast_run(f'su -c "rm {shlex.quote(str(backup_file))}"',
                             prefix=b'rm: ')

    for account in json.loads(accounts_json):
        if account['type'] == 'TOTP':
            yield TOTPAccount(account['label'],
                              account['secret'],
                              digits=account['digits'],
                              period=account['period'],
                              algorithm=account['algorithm'])
        elif account['type'] == 'HOTP':
            yield HOTPAccount(account['label'],
                              account['secret'],
                              digits=account['digits'],
                              counter=account['counter'],
                              algorithm=account['algorithm'])
        elif account['type'] == 'STEAM':
            yield SteamAccount(account['label'], account['secret'])
        else:
            logger.warning('Unknown AndOTP account type: %s', account['type'])
Exemple #19
0
    parser.add_argument('--no-freeotp',
                        action='store_true',
                        help='no FreeOTP codes')
    parser.add_argument('--no-google-authenticator',
                        action='store_true',
                        help='no Google Authenticator codes')
    parser.add_argument('--no-microsoft-authenticator',
                        action='store_true',
                        help='no Microsoft Authenticator codes')
    parser.add_argument('--no-steam-authenticator',
                        action='store_true',
                        help='no Steam Authenticator codes')

    parser.add_argument('--data',
                        type=PurePosixPath,
                        default=PurePosixPath('$ANDROID_DATA/data/'),
                        help='path to the app data folder')

    parser.add_argument(
        '--no-show-uri',
        action='store_true',
        help='disable printing the accounts as otpauth:// URIs')
    parser.add_argument(
        '--show-qr',
        action='store_true',
        help='displays the accounts as a local webpage with scannable QR codes'
    )

    parser.add_argument('--prepend-issuer',
                        action='store_true',
                        help='adds the issuer to the token name')
Exemple #20
0
def split_path(path: str) -> List[str]:
    """
    Split a relative (not absolute) POSIX path into its segments.
    """
    pure = PurePosixPath(path)
    return [str(p.name) for p in list(reversed(pure.parents))[1:] + [pure]]
Exemple #21
0
    def _build_image(self, push: bool = True) -> tuple:
        """
        Build a Docker image using the docker python library.

        Args:
            - push (bool, optional): Whether or not to push the built Docker image, this
                requires the `registry_url` to be set

        Returns:
            - tuple: generated UUID strings `image_name`, `image_tag`

        Raises:
            - ValueError: if the image fails to build
            - InterruptedError: if either pushing or pulling the image fails
        """
        assert isinstance(self.image_name, str), "Image name must be provided"
        assert isinstance(self.image_tag, str), "An image tag must be provided"

        # Make temporary directory to hold serialized flow, healthcheck script, and dockerfile
        # note that if the user provides a custom dockerfile, we create the temporary directory
        # within the current working directory to preserve their build context
        with tempfile.TemporaryDirectory(
                dir="." if self.dockerfile else None) as tempdir:

            if sys.platform == "win32":
                # problem with docker and relative paths only on windows
                tempdir = os.path.abspath(tempdir)

            # Build the dockerfile
            if self.base_image and not self.local_image:
                self.pull_image()

            dockerfile_path = self.create_dockerfile_object(directory=tempdir)
            client = self._get_client()

            # Verify that a registry url has been provided for images that should be pushed
            if self.registry_url:
                full_name = str(
                    PurePosixPath(self.registry_url, self.image_name))
            elif push is True:
                warnings.warn(
                    "This Docker storage object has no `registry_url`, and "
                    "will not be pushed.",
                    UserWarning,
                )
                full_name = self.image_name
            else:
                full_name = self.image_name

            # Use the docker client to build the image
            self.logger.info("Building the flow's Docker storage...")
            output = client.build(
                path="." if self.dockerfile else tempdir,
                dockerfile=dockerfile_path,
                tag="{}:{}".format(full_name, self.image_tag),
                forcerm=True,
            )
            self._parse_generator_output(output)

            if len(client.images(name=full_name)) == 0:
                raise ValueError(
                    "Your docker image failed to build!  Your flow might have "
                    "failed one of its deployment health checks - please ensure "
                    "that all necessary files and dependencies have been included."
                )

            # Push the image if requested
            if push and self.registry_url:
                self.push_image(full_name, self.image_tag)

                # Remove the image locally after being pushed
                client.remove_image(image="{}:{}".format(
                    full_name, self.image_tag),
                                    force=True)

        return self.image_name, self.image_tag
Exemple #22
0
def test_pipes_advanced(cleanup_pipe):
    import d6tflow.pipes
    d6tflow.pipes.init(cfg['d6tpipe_pipe1'],
                       profile=cfg['d6tpipe_profile'],
                       local_pipe=True,
                       reset=True)
    assert 'Local' in d6tflow.pipes.get_pipe().__class__.__name__
    d6tflow.pipes.init(cfg['d6tpipe_pipe1'],
                       profile=cfg['d6tpipe_profile'],
                       reset=True)

    class Task1(d6tflow.tasks.TaskPqPandas):
        def run(self):
            self.save(df)

    t1 = Task1()
    pipe1 = t1.get_pipe()
    pipedir = pipe1.dirpath
    t1filepath = t1.output().path
    t1file = str(PurePosixPath(t1filepath.relative_to(pipedir)))

    d6tflow.preview(t1)
    assert d6tflow.run(t1)
    assert t1.complete()

    with fuckit:
        pipe1._pullpush_luigi([t1file], op='remove')

    assert pipe1.scan_remote(cached=False) == []
    assert t1.pull_preview() == []
    assert t1.push_preview() == [t1file]
    assert d6tflow.pipes.all_push_preview(t1) == {
        cfg['d6tpipe_pipe1']: [t1file]
    }
    assert d6tflow.pipes.all_push(t1) == {cfg['d6tpipe_pipe1']: [t1file]}

    class Task1(d6tflow.tasks.TaskPqPandas):
        external = True
        pipename = cfg['d6tpipe_pipe1']

    class Task2(d6tflow.tasks.TaskPqPandas):
        persist = ['df2', 'df4']

        def requires(self):
            return Task1()

        def run(self):
            df2fun(self)

    import importlib
    importlib.reload(d6tflow)
    importlib.reload(d6tflow.pipes)
    d6tflow.cache.pipes = {}
    d6tflow.pipes.init(cfg['d6tpipe_pipe2'],
                       profile=cfg['d6tpipe_profile2'],
                       reset=True)
    t1 = Task1()
    assert t1.get_pipename() == cfg['d6tpipe_pipe1']
    assert not t1.complete()
    assert t1.pull_preview() == [str(t1file)]
    assert d6tflow.pipes.all_pull_preview(t1) == {
        cfg['d6tpipe_pipe1']: [t1file]
    }
    assert t1.pull() == [str(t1file)]
    assert t1.complete()
    assert t1.output().load().equals(df)

    t2 = Task2()
    d6tflow.show([t2])
    assert d6tflow.run([t2])  # run as list

    pipe2 = t2.get_pipe()
    pipedir = t2.get_pipe().dirpath
    # assert False
    t2files = [
        str(PurePosixPath(p.path.relative_to(pipedir)))
        for p in t2.output().values()
    ]

    assert d6tflow.pipes.all_push_preview(t2) == {
        cfg['d6tpipe_pipe2']: t2files
    }

    # cleanup
    pipe1._pullpush_luigi([t1file], op='remove')
    assert pipe1.scan_remote(cached=False) == []
Exemple #23
0
def get_parent(path):
    if str(PurePosixPath(path).parent) == '.':
        return './'
    return format_path('/' + str(PurePosixPath(path).parent),
                       path_type='folder')
    def __init__(
        self,
        filepath: str,
        load_args: Dict[str, Any] = None,
        save_args: Dict[str, Any] = None,
        version: Version = None,
        credentials: Dict[str, Any] = None,
        fs_args: Dict[str, Any] = None,
    ) -> None:
        """Creates a new instance of ``TensorFlowModelDataset``.

        Args:
            filepath: Filepath in POSIX format to a TensorFlow model directory prefixed with a
                protocol like `s3://`. If prefix is not provided `file` protocol (local filesystem)
                will be used. The prefix should be any protocol supported by ``fsspec``.
                Note: `http(s)` doesn't support versioning.
            load_args: TensorFlow options for loading models.
                Here you can find all available arguments:
                https://www.tensorflow.org/api_docs/python/tf/keras/models/load_model
                All defaults are preserved.
            save_args: TensorFlow options for saving models.
                Here you can find all available arguments:
                https://www.tensorflow.org/api_docs/python/tf/keras/models/save_model
                All defaults are preserved, except for "save_format", which is set to "tf".
            version: If specified, should be an instance of
                ``kedro.io.core.Version``. If its ``load`` attribute is
                None, the latest version will be loaded. If its ``save``
                attribute is None, save version will be autogenerated.
            credentials: Credentials required to get access to the underlying filesystem.
                E.g. for ``GCSFileSystem`` it should look like `{'token': None}`.
            fs_args: Extra arguments to pass into underlying filesystem class constructor
                (e.g. `{"project": "my-project"}` for ``GCSFileSystem``).
        """
        _fs_args = copy.deepcopy(fs_args) or {}
        _credentials = copy.deepcopy(credentials) or {}
        protocol, path = get_protocol_and_path(filepath, version)
        if protocol == "file":
            _fs_args.setdefault("auto_mkdir", True)

        self._protocol = protocol
        self._fs = fsspec.filesystem(self._protocol, **_credentials,
                                     **_fs_args)
        super().__init__(
            filepath=PurePosixPath(path),
            version=version,
            exists_function=self._fs.exists,
            glob_function=self._fs.glob,
        )

        self._tmp_prefix = "kedro_tensorflow_tmp"  # temp prefix pattern

        # Handle default load and save arguments
        self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS)
        if load_args is not None:
            self._load_args.update(load_args)
        self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS)
        if save_args is not None:
            self._save_args.update(save_args)

        if self._save_args.get("save_format") == "h5":
            self._tmpfile_callable = tempfile.NamedTemporaryFile  # type: Callable
        else:
            self._tmpfile_callable = tempfile.TemporaryDirectory
Exemple #25
0
 def results_path_recfluence(self) -> PurePath:
     """path that recfluence stores its latest results"""
     return PurePosixPath('results')
    def __init__(
        self,
        filepath: str,
        load_args: Dict[str, Any] = None,
        save_args: Dict[str, Any] = None,
        version: Version = None,
        credentials: Dict[str, Any] = None,
        fs_args: Dict[str, Any] = None,
    ) -> None:
        """Creates a new instance of ``GeoJSONDataSet`` pointing to a concrete GeoJSON file
        on a specific filesystem fsspec.

        Args:

            filepath: Filepath in POSIX format to a GeoJSON file prefixed with a protocol like
                `s3://`. If prefix is not provided `file` protocol (local filesystem) will be used.
                The prefix should be any protocol supported by ``fsspec``.
                Note: `http(s)` doesn't support versioning.
            load_args: GeoPandas options for loading GeoJSON files.
                Here you can find all available arguments:
                https://geopandas.org/en/stable/docs/reference/api/geopandas.read_file.html
            save_args: GeoPandas options for saving geojson files.
                Here you can find all available arguments:
                https://geopandas.org/en/stable/docs/reference/api/geopandas.GeoDataFrame.to_file.html
                The default_save_arg driver is 'GeoJSON', all others preserved.
            version: If specified, should be an instance of
                ``kedro.io.core.Version``. If its ``load`` attribute is
                None, the latest version will be loaded. If its ``save``
            credentials: credentials required to access the underlying filesystem.
                Eg. for ``GCFileSystem`` it would look like `{'token': None}`.
            fs_args: Extra arguments to pass into underlying filesystem class constructor
                (e.g. `{"project": "my-project"}` for ``GCSFileSystem``), as well as
                to pass to the filesystem's `open` method through nested keys
                `open_args_load` and `open_args_save`.
                Here you can find all available arguments for `open`:
                https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open
                All defaults are preserved, except `mode`, which is set to `wb` when saving.
        """
        _fs_args = copy.deepcopy(fs_args) or {}
        _fs_open_args_load = _fs_args.pop("open_args_load", {})
        _fs_open_args_save = _fs_args.pop("open_args_save", {})
        _credentials = copy.deepcopy(credentials) or {}
        protocol, path = get_protocol_and_path(filepath, version)
        self._protocol = protocol
        if protocol == "file":
            _fs_args.setdefault("auto_mkdir", True)

        self._fs = fsspec.filesystem(self._protocol, **_credentials,
                                     **_fs_args)

        super().__init__(
            filepath=PurePosixPath(path),
            version=version,
            exists_function=self._fs.exists,
            glob_function=self._fs.glob,
        )

        self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS)
        if load_args is not None:
            self._load_args.update(load_args)

        self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS)
        if save_args is not None:
            self._save_args.update(save_args)

        _fs_open_args_save.setdefault("mode", "wb")
        self._fs_open_args_load = _fs_open_args_load
        self._fs_open_args_save = _fs_open_args_save
Exemple #27
0
 def __trial_path(self, catalog: str) -> PurePath:
     return PurePosixPath(
         f'{self.storeCfg.root_path}/run/{catalog}/{self.trial_id}')
Exemple #28
0
def lambda_handler(event, context):
    print("Lambda or NAT IP Address:", instance_id)
    logger.info(json.dumps(event, default=str))

    for trigger_record in event['Records']:
        trigger_body = trigger_record['body']
        job = json.loads(trigger_body)
        logger.info(json.dumps(job, default=str))

        # 跳过初次配置时候, S3 自动写SQS的访问测试记录
        if 'Event' in job:
            if job['Event'] == 's3:TestEvent':
                logger.info('Skip s3:TestEvent')
                continue

        # 判断是S3来的消息,而不是jodsender来的就转换一下
        if 'Records' in job:  # S3来的消息带着'Records'
            for One_record in job['Records']:
                if 's3' in One_record:
                    Src_bucket = One_record['s3']['bucket']['name']
                    Src_key = One_record['s3']['object']['key']
                    Src_key = urllib.parse.unquote_plus(Src_key)
                    Size = One_record['s3']['object']['size']
                    if "versionId" in One_record['s3']['object']:
                        versionId = One_record['s3']['object']['versionId']
                    else:
                        versionId = 'null'
                    Des_bucket, Des_prefix = Des_bucket_default, Des_prefix_default
                    Des_key = str(PurePosixPath(Des_prefix) / Src_key)
                    if Src_key[-1] == '/':  # 针对空目录对象
                        Des_key += '/'
                    job = {
                        'Src_bucket': Src_bucket,
                        'Src_key': Src_key,
                        'Size': Size,
                        'Des_bucket': Des_bucket,
                        'Des_key': Des_key,
                        'versionId': versionId
                    }
        if 'Des_bucket' not in job:  # 消息结构不对
            logger.warning(f'Wrong sqs job: {json.dumps(job, default=str)}')
            logger.warning('Try to handle next message')
            raise WrongRecordFormat
        if 'versionId' not in job:
            job['versionId'] = 'null'

        # TODO: 如果是一次多条Job并且出现一半失败的问题未处理,所以目前只设置SQS Batch=1
        if job['Size'] > ResumableThreshold:
            upload_etag_full = step_function(
                job, table, s3_src_client, s3_des_client, instance_id,
                StorageClass, ChunkSize, MaxRetry, MaxThread, JobTimeout,
                ifVerifyMD5Twice, CleanUnfinishedUpload, UpdateVersionId,
                GetObjectWithVersionId)
        else:
            upload_etag_full = step_fn_small_file(job, table, s3_src_client,
                                                  s3_des_client, instance_id,
                                                  StorageClass, MaxRetry,
                                                  UpdateVersionId,
                                                  GetObjectWithVersionId)
        if upload_etag_full != "TIMEOUT" and upload_etag_full != "ERR":
            # 如果是超时或ERR的就不删SQS消息,是正常结束就删
            # 大文件会在退出线程时设 MaxRetry 为 TIMEOUT,小文件则会返回 MaxRetry
            # 小文件出现该问题可以认为没必要再让下一个worker再试了,不是因为文件下载太大导致,而是权限设置导致
            # 直接删除SQS,并且DDB并不会记录结束状态
            # 如果希望小文件也继续让SQS消息恢复,并让下一个worker再试,则在上面判断加upload_etag_full != "MaxRetry"
            continue
        else:
            raise TimeoutOrMaxRetry

    return {'statusCode': 200, 'body': 'Jobs completed'}
Exemple #29
0
 def user_path(self) -> PurePath:
     return PurePosixPath(
         f'{self.storeCfg.root_path}/run/user/{self.user.email}')
Exemple #30
0
    def _ro_aggregates(self) -> List[Aggregate]:
        """Gather dictionary of files to be added to the manifest."""
        def guess_mediatype(
            rel_path: str,
        ) -> Tuple[Optional[str], Optional[Union[str, List[str]]]]:
            """Return the mediatypes."""
            media_types = {
                # Adapted from
                # https://w3id.org/bundle/2014-11-05/#media-types
                "txt": TEXT_PLAIN,
                "ttl": 'text/turtle; charset="UTF-8"',
                "rdf": "application/rdf+xml",
                "json": "application/json",
                "jsonld": "application/ld+json",
                "xml": "application/xml",
                ##
                "cwl": 'text/x+yaml; charset="UTF-8"',
                "provn": 'text/provenance-notation; charset="UTF-8"',
                "nt": "application/n-triples",
            }  # type: Dict[str, str]
            conforms_to = {
                "provn": "http://www.w3.org/TR/2013/REC-prov-n-20130430/",
                "cwl": "https://w3id.org/cwl/",
            }  # type: Dict[str, str]

            prov_conforms_to = {
                "provn":
                "http://www.w3.org/TR/2013/REC-prov-n-20130430/",
                "rdf":
                "http://www.w3.org/TR/2013/REC-prov-o-20130430/",
                "ttl":
                "http://www.w3.org/TR/2013/REC-prov-o-20130430/",
                "nt":
                "http://www.w3.org/TR/2013/REC-prov-o-20130430/",
                "jsonld":
                "http://www.w3.org/TR/2013/REC-prov-o-20130430/",
                "xml":
                "http://www.w3.org/TR/2013/NOTE-prov-xml-20130430/",
                "json":
                "http://www.w3.org/Submission/2013/SUBM-prov-json-20130424/",
            }  # type: Dict[str, str]

            extension = rel_path.rsplit(".",
                                        1)[-1].lower()  # type: Optional[str]
            if extension == rel_path:
                # No ".", no extension
                extension = None

            mediatype = None  # type: Optional[str]
            conformsTo = None  # type: Optional[Union[str, List[str]]]
            if extension in media_types:
                mediatype = media_types[extension]

            if extension in conforms_to:
                # TODO: Open CWL file to read its declared "cwlVersion", e.g.
                # cwlVersion = "v1.0"
                conformsTo = conforms_to[extension]

            if (rel_path.startswith(posix_path(PROVENANCE))
                    and extension in prov_conforms_to):
                if ".cwlprov" in rel_path:
                    # Our own!
                    conformsTo = [
                        prov_conforms_to[extension],
                        CWLPROV_VERSION,
                    ]
                else:
                    # Some other PROV
                    # TODO: Recognize ProvOne etc.
                    conformsTo = prov_conforms_to[extension]
            return (mediatype, conformsTo)

        aggregates = []  # type: List[Aggregate]
        for path in self.bagged_size.keys():

            temp_path = PurePosixPath(path)
            folder = temp_path.parent
            filename = temp_path.name

            # NOTE: Here we end up aggregating the abstract
            # data items by their sha1 hash, so that it matches
            # the entity() in the prov files.

            # TODO: Change to nih:sha-256; hashes
            #  https://tools.ietf.org/html/rfc6920#section-7
            aggregate_dict = {
                "uri": "urn:hash::sha1:" + filename,
                "bundledAs": {
                    # The arcp URI is suitable ORE proxy; local to this Research Object.
                    # (as long as we don't also aggregate it by relative path!)
                    "uri": self.base_uri + path,
                    # relate it to the data/ path
                    "folder": "/%s/" % folder,
                    "filename": filename,
                },
            }  # type: Aggregate
            if path in self._file_provenance:
                # Made by workflow run, merge captured provenance
                bundledAs = aggregate_dict["bundledAs"]
                if bundledAs:
                    bundledAs.update(self._file_provenance[path])
                else:
                    aggregate_dict["bundledAs"] = cast(
                        Optional[Dict[str, Any]], self._file_provenance[path])
            else:
                # Probably made outside wf run, part of job object?
                pass
            if path in self._content_types:
                aggregate_dict["mediatype"] = self._content_types[path]

            aggregates.append(aggregate_dict)

        for path in self.tagfiles:
            if not (path.startswith(METADATA) or path.startswith(WORKFLOW)
                    or path.startswith(SNAPSHOT)):
                # probably a bagit file
                continue
            if path == str(PurePosixPath(METADATA) / "manifest.json"):
                # Should not really be there yet! But anyway, we won't
                # aggregate it.
                continue

            # These are local paths like metadata/provenance - but
            # we need to relativize them for our current directory for
            # as we are saved in metadata/manifest.json
            mediatype, conformsTo = guess_mediatype(path)
            rel_aggregates = {
                "uri": str(Path(os.pardir) / path),
                "mediatype": mediatype,
                "conformsTo": conformsTo,
            }  # type: Aggregate

            if path in self._file_provenance:
                # Propagate file provenance (e.g. timestamp)
                rel_aggregates.update(self._file_provenance[path])
            elif not path.startswith(SNAPSHOT):
                # make new timestamp?
                (
                    rel_aggregates["createdOn"],
                    rel_aggregates["createdBy"],
                ) = self._self_made()
            aggregates.append(rel_aggregates)
        aggregates.extend(self._external_aggregates)
        return aggregates
Exemple #31
0
 def feed_json(self, scan_num: int) -> PurePath:
     return PurePosixPath(
         f'{self.__trial_user_path("feed")}.{scan_num}.json')
 def get_mod_file_path(self, modid: str) -> PurePosixPath:
     '''Return the relative path of the expected mod output file that should be generated.'''
     name = self.get_name()
     mod_data = self.manager.arkman.getModData(modid)
     assert mod_data
     return PurePosixPath(f'{modid}-{mod_data["name"]}/{name}.json')
Exemple #33
0
 def from_source_path( cls, source_path: PurePosixPath
 ) -> 'RecordPath':
     if source_path.is_absolute():
         raise ValueError(source_path)
     return cls.from_parts(source_path.parts)
Exemple #34
0
    def __init__(
        self,
        filepath: str,
        engine: str = "xlsxwriter",
        load_args: Dict[str, Any] = None,
        save_args: Dict[str, Any] = None,
        version: Version = None,
        credentials: Dict[str, Any] = None,
        fs_args: Dict[str, Any] = None,
    ) -> None:
        """Creates a new instance of ``ExcelDataSet`` pointing to a concrete Excel file
        on a specific filesystem.

        Args:
            filepath: Filepath in POSIX format to a Excel file prefixed with a protocol like
                `s3://`. If prefix is not provided, `file` protocol (local filesystem) will be used.
                The prefix should be any protocol supported by ``fsspec``.
                Note: `http(s)` doesn't support versioning.
            engine: The engine used to write to excel files. The default
                engine is 'xlsxwriter'.
            load_args: Pandas options for loading Excel files.
                Here you can find all available arguments:
                https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_excel.html
                All defaults are preserved, but "engine", which is set to "xlrd".
            save_args: Pandas options for saving Excel files.
                Here you can find all available arguments:
                https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_excel.html
                All defaults are preserved, but "index", which is set to False.
                If you would like to specify options for the `ExcelWriter`,
                you can include them under the "writer" key. Here you can
                find all available arguments:
                https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.ExcelWriter.html
            version: If specified, should be an instance of
                ``kedro.io.core.Version``. If its ``load`` attribute is
                None, the latest version will be loaded. If its ``save``
                attribute is None, save version will be autogenerated.
            credentials: Credentials required to get access to the underlying filesystem.
                E.g. for ``GCSFileSystem`` it should look like `{"token": None}`.
            fs_args: Extra arguments to pass into underlying filesystem class constructor
                (e.g. `{"project": "my-project"}` for ``GCSFileSystem``), as well as
                to pass to the filesystem's `open` method through nested keys
                `open_args_load` and `open_args_save`.
                Here you can find all available arguments for `open`:
                https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open
                All defaults are preserved, except `mode`, which is set to `wb` when saving.
        """
        _fs_args = deepcopy(fs_args) or {}
        _fs_open_args_load = _fs_args.pop("open_args_load", {})
        _fs_open_args_save = _fs_args.pop("open_args_save", {})
        _credentials = deepcopy(credentials) or {}

        protocol, path = get_protocol_and_path(filepath, version)
        if protocol == "file":
            _fs_args.setdefault("auto_mkdir", True)

        self._protocol = protocol
        self._fs = fsspec.filesystem(self._protocol, **_credentials,
                                     **_fs_args)

        super().__init__(
            filepath=PurePosixPath(path),
            version=version,
            exists_function=self._fs.exists,
            glob_function=self._fs.glob,
        )

        # Handle default load arguments
        self._load_args = deepcopy(self.DEFAULT_LOAD_ARGS)
        if load_args is not None:
            self._load_args.update(load_args)

        # Handle default save arguments
        self._save_args = deepcopy(self.DEFAULT_SAVE_ARGS)
        if save_args is not None:
            self._save_args.update(save_args)
        self._writer_args = self._save_args.pop("writer", {"engine": engine})

        _fs_open_args_save.setdefault("mode", "wb")
        self._fs_open_args_load = _fs_open_args_load
        self._fs_open_args_save = _fs_open_args_save
    def run(self, options, robot_class, **static_options):

        try:
            from robotpy_installer import installer
        except ImportError:
            raise ImportError(
                "You must have the robotpy-installer package installed to deploy code!"
            )

        from .. import config
        config.mode = 'upload'

        # run the test suite before uploading
        if not options.skip_tests:
            from .cli_test import PyFrcTest

            tester = PyFrcTest()

            retval = tester.run_test([],
                                     robot_class,
                                     options.builtin,
                                     ignore_missing_test=True)
            if retval != 0:
                print_err("ERROR: Your robot tests failed, aborting upload.")
                if not sys.stdin.isatty():
                    print_err(
                        "- Use --skip-tests if you want to upload anyways")
                    return retval

                print()
                if not yesno('- Upload anyways?'):
                    return retval

                if not yesno('- Are you sure? Your robot code may crash!'):
                    return retval

                print()
                print("WARNING: Uploading code against my better judgement...")

        # upload all files in the robot.py source directory
        robot_file = abspath(inspect.getfile(robot_class))
        robot_path = dirname(robot_file)
        robot_filename = basename(robot_file)
        cfg_filename = join(robot_path, '.deploy_cfg')

        if not options.nonstandard and robot_filename != 'robot.py':
            print_err(
                "ERROR: Your robot code must be in a file called robot.py (launched from %s)!"
                % robot_filename)
            print_err()
            print_err(
                "If you really want to do this, then specify the --nonstandard argument"
            )
            return 1

        # This probably should be configurable... oh well

        deploy_dir = PurePosixPath('/home/lvuser')
        py_deploy_subdir = 'py'
        py_new_deploy_subdir = 'py_new'
        py_deploy_dir = deploy_dir / py_deploy_subdir

        # note below: deployed_cmd appears that it only can be a single line

        # In 2015, there were stdout/stderr issues. In 2016, they seem to
        # have been fixed, but need to use -u for it to really work properly

        if options.debug:
            compileall_flags = ''
            deployed_cmd = 'env LD_LIBRARY_PATH=/usr/local/frc/lib/ /usr/local/bin/python3 -u %s/%s -v run' % (
                py_deploy_dir, robot_filename)
            deployed_cmd_fname = 'robotDebugCommand'
            extra_cmd = 'touch /tmp/frcdebug; chown lvuser:ni /tmp/frcdebug'
            bash_cmd = '/bin/bash -cex'
        else:
            compileall_flags = '-O'
            deployed_cmd = 'env LD_LIBRARY_PATH=/usr/local/frc/lib/ /usr/local/bin/python3 -u -O %s/%s run' % (
                py_deploy_dir, robot_filename)
            deployed_cmd_fname = 'robotCommand'
            extra_cmd = ''
            bash_cmd = '/bin/bash -ce'

        if options.in_place:
            replace_cmd = 'true'
            py_new_deploy_subdir = py_deploy_subdir
        else:
            replace_cmd = "[ -d %(py_deploy_dir)s ] && rm -rf %(py_deploy_dir)s; mv %(py_new_deploy_dir)s %(py_deploy_dir)s"

        py_new_deploy_dir = deploy_dir / py_new_deploy_subdir
        replace_cmd %= {
            "py_deploy_dir": py_deploy_dir,
            "py_new_deploy_dir": py_new_deploy_dir
        }

        check_version = '/usr/local/bin/python3 -c "exec(open(\\"$SITEPACKAGES/wpilib/version.py\\", \\"r\\").read(), globals()); print(\\"WPILib version on robot is \\" + __version__);exit(0) if __version__ == \\"%s\\" else exit(89)"' % wpilib.__version__
        if options.no_version_check:
            check_version = ''

        check_startup_dlls = '(if [ "$(grep ^StartupDLLs /etc/natinst/share/ni-rt.ini)" != "" ]; then exit 91; fi)'

        # This is a nasty bit of code now...
        sshcmd = inspect.cleandoc("""
            %(bash_cmd)s '[ -x /usr/local/bin/python3 ] || exit 87
            SITEPACKAGES=$(/usr/local/bin/python3 -c "import site; print(site.getsitepackages()[0])")
            [ -f $SITEPACKAGES/wpilib/version.py ] || exit 88
            %(check_version)s
            echo "%(deployed_cmd)s" > %(deploy_dir)s/%(deployed_cmd_fname)s
            %(extra_cmd)s
            %(check_startup_dlls)s
            '
        """)

        sshcmd %= locals()

        sshcmd = re.sub("\n+", ";", sshcmd)

        nc_thread = None

        try:
            controller = installer.ssh_from_cfg(cfg_filename,
                                                username='******',
                                                password='',
                                                hostname=options.robot,
                                                allow_mitm=True,
                                                no_resolve=options.no_resolve)

            try:
                # Housekeeping first
                logger.debug('SSH: %s', sshcmd)
                controller.ssh(sshcmd)
            except installer.SshExecError as e:
                doret = True
                if e.retval == 87:
                    print_err(
                        "ERROR: python3 was not found on the roboRIO: have you installed robotpy?"
                    )
                elif e.retval == 88:
                    print_err(
                        "ERROR: WPILib was not found on the roboRIO: have you installed robotpy?"
                    )
                elif e.retval == 89:
                    print_err("ERROR: expected WPILib version %s" %
                              wpilib.__version__)
                    print_err()
                    print_err("You should either:")
                    print_err(
                        "- If the robot version is older, upgrade the RobotPy on your robot"
                    )
                    print_err("- Otherwise, upgrade pyfrc on your computer")
                    print_err()
                    print_err(
                        "Alternatively, you can specify --no-version-check to skip this check"
                    )
                elif e.retval == 90:
                    print_err("ERROR: error running compileall")
                elif e.retval == 91:
                    # Not an error; ssh in as admin and fix the startup dlls (Saves 24M of RAM)
                    # -> https://github.com/wpilibsuite/EclipsePlugins/pull/154
                    logger.info("Fixing StartupDLLs to save RAM...")
                    controller.username = '******'
                    controller.ssh(
                        'sed -i -e "s/^StartupDLLs/;StartupDLLs/" /etc/natinst/share/ni-rt.ini'
                    )

                    controller.username = '******'
                    doret = False
                else:
                    print_err("ERROR: %s" % e)

                if doret:
                    return 1

            # Copy the files over, copy to a temporary directory first
            # -> this is inefficient, but it's easier in sftp
            tmp_dir = tempfile.mkdtemp()
            try:
                py_tmp_dir = join(tmp_dir, py_new_deploy_subdir)
                self._copy_to_tmpdir(py_tmp_dir, robot_path)
                controller.sftp(py_tmp_dir,
                                deploy_dir,
                                mkdir=not options.in_place)
            finally:
                shutil.rmtree(tmp_dir)

            # start the netconsole listener now if requested, *before* we
            # actually start the robot code, so we can see all messages
            if options.nc or options.nc_ds:
                from netconsole import run
                nc_event = threading.Event()
                nc_thread = threading.Thread(target=run,
                                             args=(controller.hostname, ),
                                             kwargs=dict(
                                                 connect_event=nc_event,
                                                 fakeds=options.nc_ds),
                                             daemon=True)
                nc_thread.start()
                nc_event.wait(5)
                logger.info("Netconsole is listening...")

            if not options.in_place:
                # Restart the robot code and we're done!
                sshcmd = "%(bash_cmd)s '" + \
                         '%(replace_cmd)s;' + \
                         '/usr/local/bin/python3 %(compileall_flags)s -m compileall -q -r 5 /home/lvuser/py;' + \
                         '. /etc/profile.d/natinst-path.sh; ' + \
                         'chown -R lvuser:ni %(py_deploy_dir)s; ' + \
                         'sync; ' + \
                         '/usr/local/frc/bin/frcKillRobot.sh -t -r || true' + \
                         "'"

                sshcmd %= {
                    'bash_cmd': bash_cmd,
                    'compileall_flags': compileall_flags,
                    'py_deploy_dir': py_deploy_dir,
                    'replace_cmd': replace_cmd,
                }

                logger.debug('SSH: %s', sshcmd)
                controller.ssh(sshcmd)

        except installer.Error as e:
            print_err("ERROR: %s" % e)
            return 1
        else:
            print("\nSUCCESS: Deploy was successful!")

        if nc_thread is not None:
            nc_thread.join()

        return 0
Exemple #36
0

def test_convert_paths_raises_error_on_relative_project_path():
    path = Path("relative/path")
    with pytest.raises(ValueError) as excinfo:
        _convert_paths_to_absolute_posix(project_path=path, conf_dictionary={})

    assert (str(excinfo.value) ==
            f"project_path must be an absolute path. Received: {path}")


@pytest.mark.parametrize(
    "project_path,input_conf,expected",
    [
        (
            PurePosixPath("/tmp"),
            {
                "handler": {
                    "filename": "logs/info.log"
                }
            },
            {
                "handler": {
                    "filename": "/tmp/logs/info.log"
                }
            },
        ),
        (
            PurePosixPath("/User/kedro"),
            {
                "my_dataset": {
Exemple #37
0
 def relative_to(self, path):
     try:
         return PurePosixPath.relative_to(self, path)
     except ValueError:
         return self
Exemple #38
0
#!/usr/bin/python3 -tt

from rpmfluff import SimpleRpmBuild
from rpmfluff import YumRepoBuild
from pathlib import PurePosixPath
import os
import shutil
import subprocess

work_file = os.path.realpath(__file__)
work_dir = os.path.dirname(work_file)
file_base_mane = PurePosixPath(work_file).stem
repo_dir = os.path.join(work_dir, file_base_mane)
temp_dir = os.path.join(repo_dir, 'temp')

if not os.path.exists(repo_dir):
    os.makedirs(repo_dir)

if not os.path.exists(temp_dir):
    os.makedirs(temp_dir)

os.chdir(temp_dir)

pkgs = []
rpm = SimpleRpmBuild('TestA', '1.0.0', '1', ['noarch'])
rpm.add_requires('TestB')
rpm.add_group('Testgroup')
pkgs.append(rpm)
# Used for install remove tests if requirement TestB is handled properly.

rpm = SimpleRpmBuild('TestB', '1.0.0', '1', ['noarch'])