def find_page(self, model, data): """ Find a page by its URL and import its data. Data importing has to be done here because often the page can't be saved until the data is imported (i.e. null fields) """ try: url = PurePosixPath(data.pop('url')) if not url.is_absolute(): raise CommandError("Path %s must be absolute" % url) except KeyError: raise CommandError("Need `url' for page") try: page = model.objects.get(url_path=normalise(url)) self.import_data(page, data) page.save() self.stdout.write("Updating existing page %s" % url) except model.DoesNotExist: try: # pylint:disable=no-member parent = Page.objects.get(url_path=normalise(url.parent)) except Page.DoesNotExist: raise CommandError("Parent of %s doesn't exist" % url) page = model(slug=url.name) self.import_data(page, data) parent.add_child(instance=page) self.stdout.write("Creating new page %s" % url) return page
def handle_import(self, name, compilation, rule): """Implementation of the core Sass import mechanism, which just looks for files on disk. """ # TODO this is all not terribly well-specified by Sass. at worst, # it's unclear how far "upwards" we should be allowed to go. but i'm # also a little fuzzy on e.g. how relative imports work from within a # file that's not actually in the search path. # TODO i think with the new origin semantics, i've made it possible to # import relative to the current file even if the current file isn't # anywhere in the search path. is that right? path = PurePosixPath(name) search_exts = list(compilation.compiler.dynamic_extensions) if path.suffix and path.suffix in search_exts: basename = path.stem else: basename = path.name relative_to = path.parent search_path = [] # tuple of (origin, start_from) search_path.extend( (origin, relative_to) for origin in compilation.compiler.search_path ) if relative_to.is_absolute(): relative_to = PurePosixPath(*relative_to.parts[1:]) elif rule.source_file.origin: # Search relative to the current file first, only if not doing an # absolute import search_path.append(( rule.source_file.origin, rule.source_file.relpath.parent / relative_to, )) for prefix, suffix in product(('_', ''), search_exts): filename = prefix + basename + suffix for origin, relative_to in search_path: relpath = relative_to / filename # Lexically (ignoring symlinks!) eliminate .. from the part # of the path that exists within Sass-space. pathlib # deliberately doesn't do this, but os.path does. relpath = PurePosixPath(os.path.normpath(str(relpath))) if rule.source_file.key == (origin, relpath): # Avoid self-import # TODO is this what ruby does? continue path = origin / relpath if not path.exists(): continue # All good! # TODO if this file has already been imported, we'll do the # source preparation twice. make it lazy. return SourceFile.read(origin, relpath)
def collect_files(template_dir, url_base): basepath = PurePosixPath(template_dir) baseurl = PurePosixPath(url_base) for dirname, _, files in os.walk(template_dir): rel_dirname = PurePosixPath(dirname).relative_to(template_dir) for filename in files: template_path = path.join(dirname, filename) url = baseurl.joinpath(rel_dirname, filename) with open(template_path, "r", encoding="utf8") as f: yield str(url), file_contents(f)
def fetch_resource(self, resource_name): resource_table = self._resource_tables[resource_name] for destination, source_name in resource_table.items(): source_subpath = PurePosixPath(source_name) if ( source_subpath.is_absolute() or len(source_subpath.parts) > 1 or '..' in source_subpath.parts ): raise RuntimeError(source_name) source_path = self._resource_dir_path / source_name destination_subpath = PurePosixPath(destination) if ( destination_subpath.is_absolute() or '..' in destination_subpath.parts ): raise RuntimeError(destination) destination_path: PosixPath = self.root / destination_subpath if not destination_path.parent.exists(): destination_path.parent.mkdir(parents=True) if destination_path.is_dir(): raise IsADirectoryError(str(destination)) if destination_path.is_symlink(): destination_path.unlink() shutil.copyfile(str(source_path), str(destination_path))
def systemd_escape_path(path: pathlib.PurePosixPath) -> str: """Escape a path for inclusion in a systemd unit name. See the 'systemd-escape --path' command for details. """ if not path.is_absolute(): raise ValueError("systemd_escape_path can only escape absolute paths") if ".." in path.parts: raise ValueError( "systemd_escape_path can only escape paths without '..' components" ) stdout: bytes = subprocess.check_output( ["systemd-escape", "--path", "--", str(path)] ) return stdout.decode("utf-8").rstrip("\n")
def as_source_path( self, *, suffix: Optional[str] = None ) -> PurePosixPath: source_path = PurePosixPath(*self.parts) if suffix is not None: source_path = source_path.with_suffix(suffix) return source_path
def get_core_file_path(self) -> PurePosixPath: '''Return the relative path of the core output file that should be generated.''' name = self.get_name() return PurePosixPath(f'{name}.json')
def session_path(self) -> PurePath: return PurePosixPath( f'{self.storeCfg.root_path}/run/session_logs/{self.trial_id}/{self.user.email}/{self.session_id}' )
def __trial_video(self, catalog: str, video_id: str) -> PurePath: return PurePosixPath( f'{self.__trial_user_path(catalog)}_{video_id}.json')
def parse_name(cls, path, name_str): if name_str.startswith(':'): name_str = name_str[1:] return cls(path, PurePosixPath(name_str))
def results_path_out(self) -> PurePath: """path to store the results of trials once complete""" return PurePosixPath(f'{self.storeCfg.root_path}/results')
def test_paths_of_a_different_flavour(): with pytest.raises(TypeError): PureS3Path('/bucket/key') < PurePosixPath('/bucket/key') with pytest.raises(TypeError): PureWindowsPath('/bucket/key') > PureS3Path('/bucket/key')
def test(self): # ARRANGE # cases = [ CaseWithFiles( 'single file name', args.FilesCondition([ args.FileCondition('file-name'), ]), SymbolsArrEx.empty(), {PurePosixPath('file-name'): asrt.is_none}), CaseWithFiles( 'two file names', args.FilesCondition([ args.FileCondition('fn1'), args.FileCondition('fn2'), ]), SymbolsArrEx.empty(), { PurePosixPath('fn1'): asrt.is_none, PurePosixPath('fn2'): asrt.is_none, }), CaseWithFiles( 'two files with the same names', args.FilesCondition([ args.FileCondition('fn'), args.FileCondition('fn'), ]), SymbolsArrEx.empty(), { PurePosixPath('fn'): asrt.is_none, }), CaseWithFiles( 'some unique files, some repeated', args.FilesCondition([ args.FileCondition('fn1'), args.FileCondition('fn2'), args.FileCondition('fn1'), ]), SymbolsArrEx.empty(), { PurePosixPath('fn1'): asrt.is_none, PurePosixPath('fn2'): asrt.is_none, }), CaseWithFiles( 'different symbols with identical value', args.FilesCondition([ args.FileCondition(SymbolWithReferenceSyntax('sym_ref1')), args.FileCondition(SymbolWithReferenceSyntax('sym_ref2')), ]), SymbolsArrEx([ StringConstantSymbolContext('sym_ref1', 'fn'), StringConstantSymbolContext('sym_ref2', 'fn'), ], [ is_sym_ref_to_string__w_all_indirect_refs_are_strings( 'sym_ref1'), is_sym_ref_to_string__w_all_indirect_refs_are_strings( 'sym_ref2'), ]), { PurePosixPath('fn'): asrt.is_none, }), ] for case in cases: with self.subTest(case.name): # ACT & ASSERT # CHECKER.check__w_source_variants( self, case.source.as_arguments, None, arrangement_wo_tcds(case.symbols.symbol_table), Expectation( ParseExpectation(symbol_references=case.symbols. expected_references_assertion), ExecutionExpectation(), prim_asrt__constant( asrt_primitive.files_matches(case.expected))))
def _exists(self) -> bool: load_path = self._get_load_path() return self._s3.isfile(str(PurePosixPath(load_path)))
def get_series(): test_series = [ # Int Series pd.Series([1, 2, 3], name="int_series"), pd.Series(range(10), name="int_range"), pd.Series([1, 2, 3], name="Int64_int_series", dtype="Int64"), pd.Series([1, 2, 3, np.nan], name="Int64_int_nan_series", dtype="Int64"), pd.Series([1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0], name="int_series_boolean"), # Count pd.Series(np.array([1, 2, 3, 4], dtype=np.uint32), name="np_uint32"), pd.Series([1, 2, 3, 4], dtype="UInt32", name="pd_uint32"), # Categorical pd.Series([1, 2, 3], name="categorical_int_series", dtype="category"), pd.Series( pd.Categorical( ["A", "B", "C", "C", "B", "A"], categories=["A", "B", "C"], ordered=False, ), name="categorical_char", ), pd.Series([1.0, 2.0, 3.1], dtype="category", name="categorical_float_series"), pd.Series( ["Georgia", "Sam"], dtype="category", name="categorical_string_series" ), pd.Series( [complex(0, 0), complex(1, 2), complex(3, -1)], name="categorical_complex_series", dtype="category", ), # Ordinal pd.Series( pd.Categorical( ["A", "B", "C", "C", "B", "A"], categories=["A", "B", "C"], ordered=True ), name="ordinal", ), # Float Series pd.Series([1.0, 2.1, 3.0], name="float_series"), pd.Series([1.0, 2.5, np.nan], name="float_nan_series"), pd.Series([1.0, 2.0, 3.0, 4.0], name="float_series2"), pd.Series(np.array([1.2, 2, 3, 4], dtype=np.float64), name="float_series3"), pd.Series([1, 2, 3.05, 4], dtype=np.float64, name="float_series4"), pd.Series([np.nan, 1.2], name="float_series5"), pd.Series([np.nan, 1.1], dtype=np.single, name="float_series6"), pd.Series([np.inf, np.NINF, np.PINF, 1000000.0, 5.5], name="float_with_inf"), pd.Series([np.inf, np.NINF, np.Infinity, np.PINF], name="inf_series"), pd.Series([1, 2, np.nan], name="int_nan_series"), # Nan Series pd.Series([np.nan], name="nan_series"), pd.Series([np.nan, np.nan, np.nan, np.nan], name="nan_series_2"), # String Series pd.Series(["Patty", "Valentine"], name="string_series"), pd.Series(["mack", "the", "finger"], name="string_unicode_series"), pd.Series( np.array(["upper", "hall"], dtype=np.unicode_), name="string_np_unicode_series", ), pd.Series(["1.0", "2.0", np.nan], name="string_num_nan"), pd.Series(["1,000.0", "2.1", np.nan], name="string_with_sep_num_nan"), pd.Series(["1.0", "2.0", "3.0"], name="string_num"), pd.Series(["1.0", "45.67", np.nan], name="string_flt_nan"), pd.Series(["1.0", "45.67", "3.5"], name="string_flt"), pd.Series( [ "I was only robbing the register,", "I hope you understand", "One of us had better call up the cops", "In the hot New Jersey night", np.nan, ], name="string_str_nan", ), pd.Series(["True", "False", None], name="string_bool_nan"), pd.Series(range(20), name="int_str_range").astype("str"), pd.Series( [ "http://www.cwi.nl:80/%7Eguido/Python.html", "https://github.com/dylan-profiling/hurricane", ], name="str_url", ), pd.Series( [r"C:\\home\\user\\file.txt", r"C:\\home\\user\\test2.txt"], name="path_series_windows_str", ), pd.Series( [r"/home/user/file.txt", r"/home/user/test2.txt"], name="path_series_linux_str", ), pd.Series(["0011", "12"], name="str_int_leading_zeros"), pd.Series(["0.0", "0.04", "0"], name="str_float_non_leading_zeros"), pd.Series(["0.0", "0.000", "0", "2"], name="str_int_zeros"), # Bool Series pd.Series([True, False], name="bool_series"), pd.Series([True, False, None], name="bool_nan_series"), pd.Series([True, False, None], name="nullable_bool_series", dtype=btype), pd.Series([True, False, False, True], name="bool_series2", dtype=bool), pd.Series([True, False, False, True], name="bool_series2", dtype=bool), pd.Series(np.array([1, 0, 0, 1], dtype=bool), name="bool_series3"), # Complex Series pd.Series( [complex(0, 0), complex(1, 2), complex(3, -1)], name="complex_series", ), pd.Series( [ complex(0, 0), complex(1, 2), complex(3, -1), complex(np.nan, np.nan), ], name="complex_series_nan", ), pd.Series(["(1+1j)", "(2+2j)", "(10+100j)"], name="str_complex"), pd.Series(["(1+1j)", "(2+2j)", "(10+100j)", "NaN"], name="str_complex_nan"), pd.Series( [complex(0, 0), complex(1, 2), complex(3, -1), np.nan], name="complex_series_nan_2", ), pd.Series( [complex(0, 0), complex(1, 2), complex(3, -1), np.nan], name="complex_series_py_nan", ), pd.Series( [complex(0, 0), complex(1, 2), complex(3, -1)], name="complex_series_py" ), pd.Series( [ complex(0, 0), complex(1, 0), complex(3, 0), complex(-1, 0), ], name="complex_series_float", ), # Datetime Series pd.Series(["1937-05-06", "20/4/2014"], name="string_date"), pd.Series(["1941-05-24", "13/10/2016"], name="timestamp_string_series"), pd.to_datetime( pd.Series( [datetime.datetime(2017, 3, 5, 12, 2), datetime.datetime(2019, 12, 4)], name="timestamp_series", ) ), pd.to_datetime( pd.Series( [ datetime.datetime(2017, 3, 5), datetime.datetime(2019, 12, 4, 3, 2, 0), pd.NaT, ], name="timestamp_series_nat", ) ), pd.to_datetime( pd.Series( [datetime.datetime(2017, 3, 5), datetime.datetime(2019, 12, 4), pd.NaT], name="date_series_nat", ) ), pd.Series( pd.date_range( start="2013-05-18 12:00:01", periods=2, freq="H", tz="Europe/Brussels", name="timestamp_aware_series", ) ), pd.to_datetime( pd.Series( [ datetime.date(2011, 1, 1), datetime.date(2012, 1, 2), datetime.date(2013, 1, 1), ], name="datetime", ) ), # Date series pd.Series( [ datetime.date(2011, 1, 1), datetime.date(2012, 1, 2), datetime.date(2013, 1, 1), ], name="date", ), # Time series pd.Series( [ datetime.time(8, 43, 12), datetime.time(9, 43, 12), datetime.time(10, 43, 12), ], name="time", ), # http://pandas-docs.github.io/pandas-docs-travis/user_guide/timeseries.html#timestamp-limitations # pd.to_datetime( # pd.Series( # [ # datetime.datetime(year=1, month=1, day=1, hour=8, minute=43, second=12), # datetime.datetime(year=1, month=1, day=1, hour=9, minute=43, second=12), # datetime.datetime( # year=1, month=1, day=1, hour=10, minute=43, second=12 # ), # ], # name="datetime_to_time", # ) # ), # Timedelta Series pd.Series([pd.Timedelta(days=i) for i in range(3)], name="timedelta_series"), pd.Series( [pd.Timedelta(days=i) for i in range(3)] + [pd.NaT], name="timedelta_series_nat", ), pd.Series( [ pd.Timedelta("1 days 00:03:43"), pd.Timedelta("5 days 12:33:57"), pd.Timedelta("0 days 01:25:07"), pd.Timedelta("-2 days 13:46:56"), pd.Timedelta("1 days 23:49:25"), ], name="timedelta_negative", ), # Path Series pd.Series( [ PurePosixPath("/home/user/file.txt"), PurePosixPath("/home/user/test2.txt"), ], name="path_series_linux", ), pd.Series( [ PurePosixPath("/home/user/file.txt"), PurePosixPath("/home/user/test2.txt"), None, ], name="path_series_linux_missing", ), pd.Series( [ PureWindowsPath("C:\\home\\user\\file.txt"), PureWindowsPath("C:\\home\\user\\test2.txt"), ], name="path_series_windows", ), # Url Series pd.Series( [ urlparse("http://www.cwi.nl:80/%7Eguido/Python.html"), urlparse("https://github.com/dylan-profiling/hurricane"), ], name="url_series", ), pd.Series( [ urlparse("http://www.cwi.nl:80/%7Eguido/Python.html"), urlparse("https://github.com/dylan-profiling/hurricane"), np.nan, ], name="url_nan_series", ), pd.Series( [ urlparse("http://www.cwi.nl:80/%7Eguido/Python.html"), urlparse("https://github.com/dylan-profiling/hurricane"), None, ], name="url_none_series", ), # UUID Series pd.Series( [ uuid.UUID("0b8a22ca-80ad-4df5-85ac-fa49c44b7ede"), uuid.UUID("aaa381d6-8442-4f63-88c8-7c900e9a23c6"), uuid.UUID("00000000-0000-0000-0000-000000000000"), ], name="uuid_series", ), pd.Series( [ uuid.UUID("0b8a22ca-80ad-4df5-85ac-fa49c44b7ede"), uuid.UUID("aaa381d6-8442-4f63-88c8-7c900e9a23c6"), uuid.UUID("00000000-0000-0000-0000-000000000000"), None, ], name="uuid_series_missing", ), pd.Series( [ "0b8a22ca-80ad-4df5-85ac-fa49c44b7ede", "aaa381d6-8442-4f63-88c8-7c900e9a23c6", "00000000-0000-0000-0000-000000000000", ], name="uuid_series_str", ), # Object Series pd.Series([[1, ""], [2, "Rubin"], [3, "Carter"]], name="mixed_list[str,int]"), pd.Series( [{"why": "did you"}, {"bring him": "in for he"}, {"aint": "the guy"}], name="mixed_dict", ), pd.Series( [pd.to_datetime, pd.to_timedelta, pd.read_json, pd.to_pickle], name="callable", ), pd.Series([pd, np], name="module"), pd.Series(["1.1", "2"], name="textual_float"), pd.Series(["1.1", "2", "NAN"], name="textual_float_nan"), # Object (Mixed, https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.api.types.infer_dtype.html) pd.Series(["a", 1], name="mixed_integer"), pd.Series([True, False, np.nan], name="mixed"), pd.Series([[True], [False], [False]], name="mixed_list"), pd.Series([[1, ""], [2, "Rubin"], [3, "Carter"]], name="mixed_list[str,int]"), pd.Series( [{"why": "did you"}, {"bring him": "in for he"}, {"aint": "the guy"}], name="mixed_dict", ), # IP pd.Series([IPv4Address("127.0.0.1"), IPv4Address("127.0.0.1")], name="ip"), pd.Series(["127.0.0.1", "127.0.0.1"], name="ip_str"), # Empty pd.Series([], name="empty", dtype=np.float64), pd.Series([], name="empty_float", dtype=float), pd.Series([], name="empty_int64", dtype="Int64"), pd.Series([], name="empty_object", dtype="object"), pd.Series([], name="empty_bool", dtype=bool), # IP pd.Series([IPv4Address("127.0.0.1"), IPv4Address("127.0.0.1")], name="ip"), pd.Series( [IPv4Address("127.0.0.1"), None, IPv4Address("127.0.0.1")], name="ip_missing", ), pd.Series( [IPv6Address("0:0:0:0:0:0:0:1"), IPv4Address("127.0.0.1")], name="ip_mixed_v4andv6", ), pd.Series(["127.0.0.1", "127.0.0.1"], name="ip_str"), # File pd.Series( [ pathlib.Path(os.path.join(base_path, "series.py")).absolute(), pathlib.Path(os.path.join(base_path, "__init__.py")).absolute(), pathlib.Path(os.path.join(base_path, "utils.py")).absolute(), ], name="file_test_py", ), pd.Series( [ pathlib.Path(os.path.join(base_path, "..", "py.typed")).absolute(), pathlib.Path( os.path.join( base_path, "..", "visualisation", "circular_packing.html" ) ).absolute(), pathlib.Path(os.path.join(base_path, "series.py")).absolute(), ], name="file_mixed_ext", ), pd.Series( [ pathlib.Path(os.path.join(base_path, "series.py")).absolute(), None, pathlib.Path(os.path.join(base_path, "__init__.py")).absolute(), None, pathlib.Path(os.path.join(base_path, "utils.py")).absolute(), ], name="file_test_py_missing", ), # Image pd.Series( [ pathlib.Path( os.path.join( base_path, "../visualisation/typesets/typeset_complete.png", ) ).absolute(), pathlib.Path( os.path.join( base_path, r"../visualisation/typesets/typeset_standard.png", ) ).absolute(), pathlib.Path( os.path.join( base_path, r"../visualisation/typesets/typeset_geometry.png", ) ).absolute(), ], name="image_png", ), pd.Series( [ pathlib.Path( os.path.join( base_path, r"../visualisation/typesets/typeset_complete.png", ) ).absolute(), pathlib.Path( os.path.join( base_path, r"../visualisation/typesets/typeset_standard.png", ) ).absolute(), None, pathlib.Path( os.path.join( base_path, r"../visualisation/typesets/typeset_geometry.png", ) ).absolute(), None, ], name="image_png_missing", ), # Email pd.Series( [FQDA("test", "example.com"), FQDA("info", "example.eu")], name="email_address", ), pd.Series( [FQDA("test", "example.com"), FQDA("info", "example.eu"), None], name="email_address_missing", ), pd.Series(["*****@*****.**", "*****@*****.**"], name="email_address_str"), ] if int(pd.__version__.split(".")[0]) >= 1: pandas_1_series = [ pd.Series( ["Patty", "Valentine"], dtype="string", name="string_dtype_series" ) ] test_series.extend(pandas_1_series) return test_series
def _load(self) -> pd.DataFrame: load_path = PurePosixPath(self._get_load_path()) with self._s3.open(str(load_path), mode="rb") as s3_file: return pd.read_parquet(s3_file, **self._load_args)
def upload( paths, girder_collection, girder_top_folder, local_top_path, girder_instance, existing, validation_, fake_data, develop_debug, ): # Ensure that we have all Folders created as well assert local_top_path, "--local-top-path must be specified for now" assert girder_collection, "--collection must be specified" if not girder_top_folder: # TODO: UI # Most often it would be the same directory name as of the local top dir girder_top_folder = op.basename(local_top_path) if girder_top_folder in (op.pardir, op.curdir): girder_top_folder = op.basename(op.realpath(local_top_path)) import multiprocessing from .. import girder from ..pynwb_utils import get_metadata from ..pynwb_utils import validate as pynwb_validate from ..pynwb_utils import ignore_benign_pynwb_warnings from ..support.generatorify import generator_from_callback from ..support.pyout import naturalsize from pathlib import Path, PurePosixPath ignore_benign_pynwb_warnings() # so validate doesn't whine client = girder.authenticate(girder_instance) collection_rec = girder.ensure_collection(client, girder_collection) lgr.debug("Working with collection %s", collection_rec) local_top_path = Path(local_top_path) girder_top_folder = PurePosixPath(girder_top_folder) # We will keep a shared set of "being processed" paths so # we could limit the number of them until # https://github.com/pyout/pyout/issues/87 # properly addressed process_paths = set() uploaded_paths = {} # path: uploaded size def skip_file(msg): return {"status": "skipped", "message": msg} lock = multiprocessing.Lock() # TODO: we might want to always yield a full record so no field is not # provided to pyout to cause it to halt def process_path(path, relpath): try: try: stat = os.stat(path) yield {"size": stat.st_size} except FileNotFoundError: yield skip_file("ERROR: File not found") return except Exception as exc: # without limiting [:50] it might cause some pyout indigestion yield skip_file("ERROR: %s" % str(exc)[:50]) return yield {"status": "checking girder"} girder_folder = girder_top_folder / relpath.parent while True: try: lock.acquire(timeout=60) # TODO: we need to make this all thread safe all the way # until uploading the file since multiple threads would # create multiple folder_rec = girder.ensure_folder( client, collection_rec, girder_collection, girder_folder ) # Get (if already exists) or create an item item_rec = client.createItem( folder_rec["_id"], name=relpath.name, reuseExisting=True ) finally: lock.release() file_recs = list(client.listFile(item_rec["_id"])) if len(file_recs) > 1: raise NotImplementedError( f"Item {item_rec} contains multiple files: {file_recs}" ) elif file_recs: # there is a file already if existing == "skip": yield skip_file("exists already") return elif existing == "reupload": yield { "message": "exists - reuploading", "status": "deleting old item", } # TODO: delete an item here raise NotImplementedError("yarik did not find deleteItem API") continue else: raise ValueError(existing) break # no need to loop # we need to delete it first??? I do not see a method TODO if validation_ != "skip": yield {"status": "validating"} validation_errors = pynwb_validate(path) yield {"errors": len(validation_errors)} # TODO: split for dandi, pynwb errors if validation_errors: if validation_ == "require": yield skip_file("failed validation") return else: # yielding empty causes pyout to get stuck or crash # https://github.com/pyout/pyout/issues/91 # yield {"errors": '',} pass # Extract metadata before actual upload and skip if fails # TODO: allow for for non-nwb files to skip this step yield {"status": "extracting metadata"} try: metadata = get_metadata(path) except Exception as exc: yield skip_file("failed to extract metadata: %s" % str(exc)) return yield {"status": "uploading"} # Upload file to an item # XXX TODO progress reporting back to pyout is actually tricky # if possible to implement via callback since # callback would need to yield somehow from the context here. # yoh doesn't see how that could be done yet. In the worst # case we would copy uploadFileToItem and _uploadContents # and make them into generators to relay progress instead of # via callback # https://stackoverflow.com/questions/9968592/turn-functions-with-a-callback-into-python-generators # has some solutions but all IMHO are abit too complex for r in generator_from_callback( lambda c: client.uploadFileToItem( item_rec["_id"], path, progressCallback=c ) ): uploaded_paths[str(path)] = r["current"] yield { "upload": 100.0 * ((r["current"] / r["total"]) if r["total"] else 1.0) } # Provide metadata for the item from the file, could be done via # a callback to be triggered upon successfull upload, or we could # just do it "manually" metadata_ = {} for k, v in metadata.items(): if v in ("", None): continue # degenerate, why bother # XXX TODO: remove this -- it is only temporary, search should handle if isinstance(v, str): metadata_[k] = v.lower() elif isinstance(v, datetime.datetime): metadata_[k] = str(v) # we will add some fields which would help us with deciding to # reupload or not metadata_["uploaded_size"] = os.stat(str(path)).st_size metadata_["uploaded_mtime"] = os.stat(str(path)).st_mtime yield {"status": "uploading metadata"} client.addMetadataToItem(item_rec["_id"], metadata_) yield {"status": "done"} except Exception as exc: if develop_debug: raise yield {"status": "ERROR", "message": str(exc)} finally: process_paths.remove(str(path)) # We will again use pyout to provide a neat table summarizing our progress # with upload etc import pyout from ..support import pyout as pyouts # for the upload speeds we need to provide a custom aggregate t0 = time.time() def upload_agg(*ignored): dt = time.time() - t0 total = sum(uploaded_paths.values()) if not total: return "" speed = total / dt if dt else 0 return "%s/s" % naturalsize(speed) pyout_style = pyouts.get_style(hide_if_missing=False) pyout_style["upload"]["aggregate"] = upload_agg rec_fields = ("path", "size", "errors", "upload", "status", "message") out = pyout.Tabular(style=pyout_style, columns=rec_fields) with out: for path in paths: while len(process_paths) >= 10: lgr.log(2, "Sleep waiting for some paths to finish processing") time.sleep(0.5) process_paths.add(path) rec = {"path": path} path = Path(path) try: relpath = path.relative_to(local_top_path) rec["path"] = str(relpath) if develop_debug: # DEBUG: do serially for v in process_path(path, relpath): print(v) else: rec[rec_fields[1:]] = process_path(path, relpath) except ValueError as exc: # typically if local_top_path is not the top path for the path rec["status"] = skip_file(exc) out(rec)
def read_andotp_accounts(data_root): # Parse the preferences file to determine what kind of backups we can have AndOTP generate and where they will reside try: handle = adb_read_file( data_root / 'org.shadowice.flocke.andotp/shared_prefs/org.shadowice.flocke.andotp_preferences.xml' ) except FileNotFoundError: return preferences = ElementTree.parse(handle) try: backup_path = PurePosixPath( preferences.find('.//string[@name="pref_backup_directory"]').text) except AttributeError: backup_path = PurePosixPath('$EXTERNAL_STORAGE/andOTP') try: allowed_backup_broadcasts = [ s.text for s in preferences.findall( './/set[@name="pref_backup_broadcasts"]/string') ] except AttributeError: allowed_backup_broadcasts = [] try: initial_backup_files = set(adb_list_dir(backup_path)) except FileNotFoundError: initial_backup_files = set() if 'encrypted' in allowed_backup_broadcasts: try: from Crypto.Cipher import AES except: logger.error( 'Reading encrypted AndOTP backups requires PyCryptodome') return adb_fast_run( 'am broadcast -a org.shadowice.flocke.andotp.broadcast.ENCRYPTED_BACKUP org.shadowice.flocke.andotp', prefix=b'am: ') elif 'plain' in allowed_backup_broadcasts: if not input( 'Encrypted AndOTP backups are disabled. Are you sure you want to create a plaintext backup (y/N)? ' ).lower().startswith('y'): logger.debug('Aborted AndOTP plaintext backup') return adb_fast_run( 'am broadcast -a org.shadowice.flocke.andotp.broadcast.PLAIN_TEXT_BACKUP org.shadowice.flocke.andotp', prefix=b'am: ') else: logger.error( 'No AndOTP backup broadcasts are setup. Please enable at least encrypted backups in the AndOTP settings.' ) return backup_data = None backup_file = None # Find all newly-created backup files for i in range(10): try: time.sleep(0.1) new_backups = list( set(adb_list_dir(backup_path)) - initial_backup_files) if not new_backups: continue backup_file = new_backups[0] backup_data = adb_read_file(backup_file) break except FileNotFoundError: logger.warning( 'Did not find any new backup files in %s (attempt %d)', backup_path, i + 1) else: logger.error( 'Could not read the AndOTP backup file. Do you have a backup password set?' ) return if 'encrypted' in allowed_backup_broadcasts: backup_password = getpass.getpass('Enter the AndOTP backup password: '******'utf-8')).digest() cipher = AES.new(key, AES.MODE_GCM, nonce=nonce) try: accounts_json = cipher.decrypt(ciphertext) cipher.verify(tag) except ValueError: logger.error( 'Could not decrypt the AndOTP backup. Is your password correct?' ) return else: accounts_json = backup_data.read() if backup_file.suffix == '.json': if not input('Do you want to delete the plaintext backup (y/N)? ' ).lower().startswith('y'): adb_fast_run(f'su -c "rm {shlex.quote(str(backup_file))}"', prefix=b'rm: ') for account in json.loads(accounts_json): if account['type'] == 'TOTP': yield TOTPAccount(account['label'], account['secret'], digits=account['digits'], period=account['period'], algorithm=account['algorithm']) elif account['type'] == 'HOTP': yield HOTPAccount(account['label'], account['secret'], digits=account['digits'], counter=account['counter'], algorithm=account['algorithm']) elif account['type'] == 'STEAM': yield SteamAccount(account['label'], account['secret']) else: logger.warning('Unknown AndOTP account type: %s', account['type'])
parser.add_argument('--no-freeotp', action='store_true', help='no FreeOTP codes') parser.add_argument('--no-google-authenticator', action='store_true', help='no Google Authenticator codes') parser.add_argument('--no-microsoft-authenticator', action='store_true', help='no Microsoft Authenticator codes') parser.add_argument('--no-steam-authenticator', action='store_true', help='no Steam Authenticator codes') parser.add_argument('--data', type=PurePosixPath, default=PurePosixPath('$ANDROID_DATA/data/'), help='path to the app data folder') parser.add_argument( '--no-show-uri', action='store_true', help='disable printing the accounts as otpauth:// URIs') parser.add_argument( '--show-qr', action='store_true', help='displays the accounts as a local webpage with scannable QR codes' ) parser.add_argument('--prepend-issuer', action='store_true', help='adds the issuer to the token name')
def split_path(path: str) -> List[str]: """ Split a relative (not absolute) POSIX path into its segments. """ pure = PurePosixPath(path) return [str(p.name) for p in list(reversed(pure.parents))[1:] + [pure]]
def _build_image(self, push: bool = True) -> tuple: """ Build a Docker image using the docker python library. Args: - push (bool, optional): Whether or not to push the built Docker image, this requires the `registry_url` to be set Returns: - tuple: generated UUID strings `image_name`, `image_tag` Raises: - ValueError: if the image fails to build - InterruptedError: if either pushing or pulling the image fails """ assert isinstance(self.image_name, str), "Image name must be provided" assert isinstance(self.image_tag, str), "An image tag must be provided" # Make temporary directory to hold serialized flow, healthcheck script, and dockerfile # note that if the user provides a custom dockerfile, we create the temporary directory # within the current working directory to preserve their build context with tempfile.TemporaryDirectory( dir="." if self.dockerfile else None) as tempdir: if sys.platform == "win32": # problem with docker and relative paths only on windows tempdir = os.path.abspath(tempdir) # Build the dockerfile if self.base_image and not self.local_image: self.pull_image() dockerfile_path = self.create_dockerfile_object(directory=tempdir) client = self._get_client() # Verify that a registry url has been provided for images that should be pushed if self.registry_url: full_name = str( PurePosixPath(self.registry_url, self.image_name)) elif push is True: warnings.warn( "This Docker storage object has no `registry_url`, and " "will not be pushed.", UserWarning, ) full_name = self.image_name else: full_name = self.image_name # Use the docker client to build the image self.logger.info("Building the flow's Docker storage...") output = client.build( path="." if self.dockerfile else tempdir, dockerfile=dockerfile_path, tag="{}:{}".format(full_name, self.image_tag), forcerm=True, ) self._parse_generator_output(output) if len(client.images(name=full_name)) == 0: raise ValueError( "Your docker image failed to build! Your flow might have " "failed one of its deployment health checks - please ensure " "that all necessary files and dependencies have been included." ) # Push the image if requested if push and self.registry_url: self.push_image(full_name, self.image_tag) # Remove the image locally after being pushed client.remove_image(image="{}:{}".format( full_name, self.image_tag), force=True) return self.image_name, self.image_tag
def test_pipes_advanced(cleanup_pipe): import d6tflow.pipes d6tflow.pipes.init(cfg['d6tpipe_pipe1'], profile=cfg['d6tpipe_profile'], local_pipe=True, reset=True) assert 'Local' in d6tflow.pipes.get_pipe().__class__.__name__ d6tflow.pipes.init(cfg['d6tpipe_pipe1'], profile=cfg['d6tpipe_profile'], reset=True) class Task1(d6tflow.tasks.TaskPqPandas): def run(self): self.save(df) t1 = Task1() pipe1 = t1.get_pipe() pipedir = pipe1.dirpath t1filepath = t1.output().path t1file = str(PurePosixPath(t1filepath.relative_to(pipedir))) d6tflow.preview(t1) assert d6tflow.run(t1) assert t1.complete() with fuckit: pipe1._pullpush_luigi([t1file], op='remove') assert pipe1.scan_remote(cached=False) == [] assert t1.pull_preview() == [] assert t1.push_preview() == [t1file] assert d6tflow.pipes.all_push_preview(t1) == { cfg['d6tpipe_pipe1']: [t1file] } assert d6tflow.pipes.all_push(t1) == {cfg['d6tpipe_pipe1']: [t1file]} class Task1(d6tflow.tasks.TaskPqPandas): external = True pipename = cfg['d6tpipe_pipe1'] class Task2(d6tflow.tasks.TaskPqPandas): persist = ['df2', 'df4'] def requires(self): return Task1() def run(self): df2fun(self) import importlib importlib.reload(d6tflow) importlib.reload(d6tflow.pipes) d6tflow.cache.pipes = {} d6tflow.pipes.init(cfg['d6tpipe_pipe2'], profile=cfg['d6tpipe_profile2'], reset=True) t1 = Task1() assert t1.get_pipename() == cfg['d6tpipe_pipe1'] assert not t1.complete() assert t1.pull_preview() == [str(t1file)] assert d6tflow.pipes.all_pull_preview(t1) == { cfg['d6tpipe_pipe1']: [t1file] } assert t1.pull() == [str(t1file)] assert t1.complete() assert t1.output().load().equals(df) t2 = Task2() d6tflow.show([t2]) assert d6tflow.run([t2]) # run as list pipe2 = t2.get_pipe() pipedir = t2.get_pipe().dirpath # assert False t2files = [ str(PurePosixPath(p.path.relative_to(pipedir))) for p in t2.output().values() ] assert d6tflow.pipes.all_push_preview(t2) == { cfg['d6tpipe_pipe2']: t2files } # cleanup pipe1._pullpush_luigi([t1file], op='remove') assert pipe1.scan_remote(cached=False) == []
def get_parent(path): if str(PurePosixPath(path).parent) == '.': return './' return format_path('/' + str(PurePosixPath(path).parent), path_type='folder')
def __init__( self, filepath: str, load_args: Dict[str, Any] = None, save_args: Dict[str, Any] = None, version: Version = None, credentials: Dict[str, Any] = None, fs_args: Dict[str, Any] = None, ) -> None: """Creates a new instance of ``TensorFlowModelDataset``. Args: filepath: Filepath in POSIX format to a TensorFlow model directory prefixed with a protocol like `s3://`. If prefix is not provided `file` protocol (local filesystem) will be used. The prefix should be any protocol supported by ``fsspec``. Note: `http(s)` doesn't support versioning. load_args: TensorFlow options for loading models. Here you can find all available arguments: https://www.tensorflow.org/api_docs/python/tf/keras/models/load_model All defaults are preserved. save_args: TensorFlow options for saving models. Here you can find all available arguments: https://www.tensorflow.org/api_docs/python/tf/keras/models/save_model All defaults are preserved, except for "save_format", which is set to "tf". version: If specified, should be an instance of ``kedro.io.core.Version``. If its ``load`` attribute is None, the latest version will be loaded. If its ``save`` attribute is None, save version will be autogenerated. credentials: Credentials required to get access to the underlying filesystem. E.g. for ``GCSFileSystem`` it should look like `{'token': None}`. fs_args: Extra arguments to pass into underlying filesystem class constructor (e.g. `{"project": "my-project"}` for ``GCSFileSystem``). """ _fs_args = copy.deepcopy(fs_args) or {} _credentials = copy.deepcopy(credentials) or {} protocol, path = get_protocol_and_path(filepath, version) if protocol == "file": _fs_args.setdefault("auto_mkdir", True) self._protocol = protocol self._fs = fsspec.filesystem(self._protocol, **_credentials, **_fs_args) super().__init__( filepath=PurePosixPath(path), version=version, exists_function=self._fs.exists, glob_function=self._fs.glob, ) self._tmp_prefix = "kedro_tensorflow_tmp" # temp prefix pattern # Handle default load and save arguments self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS) if load_args is not None: self._load_args.update(load_args) self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS) if save_args is not None: self._save_args.update(save_args) if self._save_args.get("save_format") == "h5": self._tmpfile_callable = tempfile.NamedTemporaryFile # type: Callable else: self._tmpfile_callable = tempfile.TemporaryDirectory
def results_path_recfluence(self) -> PurePath: """path that recfluence stores its latest results""" return PurePosixPath('results')
def __init__( self, filepath: str, load_args: Dict[str, Any] = None, save_args: Dict[str, Any] = None, version: Version = None, credentials: Dict[str, Any] = None, fs_args: Dict[str, Any] = None, ) -> None: """Creates a new instance of ``GeoJSONDataSet`` pointing to a concrete GeoJSON file on a specific filesystem fsspec. Args: filepath: Filepath in POSIX format to a GeoJSON file prefixed with a protocol like `s3://`. If prefix is not provided `file` protocol (local filesystem) will be used. The prefix should be any protocol supported by ``fsspec``. Note: `http(s)` doesn't support versioning. load_args: GeoPandas options for loading GeoJSON files. Here you can find all available arguments: https://geopandas.org/en/stable/docs/reference/api/geopandas.read_file.html save_args: GeoPandas options for saving geojson files. Here you can find all available arguments: https://geopandas.org/en/stable/docs/reference/api/geopandas.GeoDataFrame.to_file.html The default_save_arg driver is 'GeoJSON', all others preserved. version: If specified, should be an instance of ``kedro.io.core.Version``. If its ``load`` attribute is None, the latest version will be loaded. If its ``save`` credentials: credentials required to access the underlying filesystem. Eg. for ``GCFileSystem`` it would look like `{'token': None}`. fs_args: Extra arguments to pass into underlying filesystem class constructor (e.g. `{"project": "my-project"}` for ``GCSFileSystem``), as well as to pass to the filesystem's `open` method through nested keys `open_args_load` and `open_args_save`. Here you can find all available arguments for `open`: https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open All defaults are preserved, except `mode`, which is set to `wb` when saving. """ _fs_args = copy.deepcopy(fs_args) or {} _fs_open_args_load = _fs_args.pop("open_args_load", {}) _fs_open_args_save = _fs_args.pop("open_args_save", {}) _credentials = copy.deepcopy(credentials) or {} protocol, path = get_protocol_and_path(filepath, version) self._protocol = protocol if protocol == "file": _fs_args.setdefault("auto_mkdir", True) self._fs = fsspec.filesystem(self._protocol, **_credentials, **_fs_args) super().__init__( filepath=PurePosixPath(path), version=version, exists_function=self._fs.exists, glob_function=self._fs.glob, ) self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS) if load_args is not None: self._load_args.update(load_args) self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS) if save_args is not None: self._save_args.update(save_args) _fs_open_args_save.setdefault("mode", "wb") self._fs_open_args_load = _fs_open_args_load self._fs_open_args_save = _fs_open_args_save
def __trial_path(self, catalog: str) -> PurePath: return PurePosixPath( f'{self.storeCfg.root_path}/run/{catalog}/{self.trial_id}')
def lambda_handler(event, context): print("Lambda or NAT IP Address:", instance_id) logger.info(json.dumps(event, default=str)) for trigger_record in event['Records']: trigger_body = trigger_record['body'] job = json.loads(trigger_body) logger.info(json.dumps(job, default=str)) # 跳过初次配置时候, S3 自动写SQS的访问测试记录 if 'Event' in job: if job['Event'] == 's3:TestEvent': logger.info('Skip s3:TestEvent') continue # 判断是S3来的消息,而不是jodsender来的就转换一下 if 'Records' in job: # S3来的消息带着'Records' for One_record in job['Records']: if 's3' in One_record: Src_bucket = One_record['s3']['bucket']['name'] Src_key = One_record['s3']['object']['key'] Src_key = urllib.parse.unquote_plus(Src_key) Size = One_record['s3']['object']['size'] if "versionId" in One_record['s3']['object']: versionId = One_record['s3']['object']['versionId'] else: versionId = 'null' Des_bucket, Des_prefix = Des_bucket_default, Des_prefix_default Des_key = str(PurePosixPath(Des_prefix) / Src_key) if Src_key[-1] == '/': # 针对空目录对象 Des_key += '/' job = { 'Src_bucket': Src_bucket, 'Src_key': Src_key, 'Size': Size, 'Des_bucket': Des_bucket, 'Des_key': Des_key, 'versionId': versionId } if 'Des_bucket' not in job: # 消息结构不对 logger.warning(f'Wrong sqs job: {json.dumps(job, default=str)}') logger.warning('Try to handle next message') raise WrongRecordFormat if 'versionId' not in job: job['versionId'] = 'null' # TODO: 如果是一次多条Job并且出现一半失败的问题未处理,所以目前只设置SQS Batch=1 if job['Size'] > ResumableThreshold: upload_etag_full = step_function( job, table, s3_src_client, s3_des_client, instance_id, StorageClass, ChunkSize, MaxRetry, MaxThread, JobTimeout, ifVerifyMD5Twice, CleanUnfinishedUpload, UpdateVersionId, GetObjectWithVersionId) else: upload_etag_full = step_fn_small_file(job, table, s3_src_client, s3_des_client, instance_id, StorageClass, MaxRetry, UpdateVersionId, GetObjectWithVersionId) if upload_etag_full != "TIMEOUT" and upload_etag_full != "ERR": # 如果是超时或ERR的就不删SQS消息,是正常结束就删 # 大文件会在退出线程时设 MaxRetry 为 TIMEOUT,小文件则会返回 MaxRetry # 小文件出现该问题可以认为没必要再让下一个worker再试了,不是因为文件下载太大导致,而是权限设置导致 # 直接删除SQS,并且DDB并不会记录结束状态 # 如果希望小文件也继续让SQS消息恢复,并让下一个worker再试,则在上面判断加upload_etag_full != "MaxRetry" continue else: raise TimeoutOrMaxRetry return {'statusCode': 200, 'body': 'Jobs completed'}
def user_path(self) -> PurePath: return PurePosixPath( f'{self.storeCfg.root_path}/run/user/{self.user.email}')
def _ro_aggregates(self) -> List[Aggregate]: """Gather dictionary of files to be added to the manifest.""" def guess_mediatype( rel_path: str, ) -> Tuple[Optional[str], Optional[Union[str, List[str]]]]: """Return the mediatypes.""" media_types = { # Adapted from # https://w3id.org/bundle/2014-11-05/#media-types "txt": TEXT_PLAIN, "ttl": 'text/turtle; charset="UTF-8"', "rdf": "application/rdf+xml", "json": "application/json", "jsonld": "application/ld+json", "xml": "application/xml", ## "cwl": 'text/x+yaml; charset="UTF-8"', "provn": 'text/provenance-notation; charset="UTF-8"', "nt": "application/n-triples", } # type: Dict[str, str] conforms_to = { "provn": "http://www.w3.org/TR/2013/REC-prov-n-20130430/", "cwl": "https://w3id.org/cwl/", } # type: Dict[str, str] prov_conforms_to = { "provn": "http://www.w3.org/TR/2013/REC-prov-n-20130430/", "rdf": "http://www.w3.org/TR/2013/REC-prov-o-20130430/", "ttl": "http://www.w3.org/TR/2013/REC-prov-o-20130430/", "nt": "http://www.w3.org/TR/2013/REC-prov-o-20130430/", "jsonld": "http://www.w3.org/TR/2013/REC-prov-o-20130430/", "xml": "http://www.w3.org/TR/2013/NOTE-prov-xml-20130430/", "json": "http://www.w3.org/Submission/2013/SUBM-prov-json-20130424/", } # type: Dict[str, str] extension = rel_path.rsplit(".", 1)[-1].lower() # type: Optional[str] if extension == rel_path: # No ".", no extension extension = None mediatype = None # type: Optional[str] conformsTo = None # type: Optional[Union[str, List[str]]] if extension in media_types: mediatype = media_types[extension] if extension in conforms_to: # TODO: Open CWL file to read its declared "cwlVersion", e.g. # cwlVersion = "v1.0" conformsTo = conforms_to[extension] if (rel_path.startswith(posix_path(PROVENANCE)) and extension in prov_conforms_to): if ".cwlprov" in rel_path: # Our own! conformsTo = [ prov_conforms_to[extension], CWLPROV_VERSION, ] else: # Some other PROV # TODO: Recognize ProvOne etc. conformsTo = prov_conforms_to[extension] return (mediatype, conformsTo) aggregates = [] # type: List[Aggregate] for path in self.bagged_size.keys(): temp_path = PurePosixPath(path) folder = temp_path.parent filename = temp_path.name # NOTE: Here we end up aggregating the abstract # data items by their sha1 hash, so that it matches # the entity() in the prov files. # TODO: Change to nih:sha-256; hashes # https://tools.ietf.org/html/rfc6920#section-7 aggregate_dict = { "uri": "urn:hash::sha1:" + filename, "bundledAs": { # The arcp URI is suitable ORE proxy; local to this Research Object. # (as long as we don't also aggregate it by relative path!) "uri": self.base_uri + path, # relate it to the data/ path "folder": "/%s/" % folder, "filename": filename, }, } # type: Aggregate if path in self._file_provenance: # Made by workflow run, merge captured provenance bundledAs = aggregate_dict["bundledAs"] if bundledAs: bundledAs.update(self._file_provenance[path]) else: aggregate_dict["bundledAs"] = cast( Optional[Dict[str, Any]], self._file_provenance[path]) else: # Probably made outside wf run, part of job object? pass if path in self._content_types: aggregate_dict["mediatype"] = self._content_types[path] aggregates.append(aggregate_dict) for path in self.tagfiles: if not (path.startswith(METADATA) or path.startswith(WORKFLOW) or path.startswith(SNAPSHOT)): # probably a bagit file continue if path == str(PurePosixPath(METADATA) / "manifest.json"): # Should not really be there yet! But anyway, we won't # aggregate it. continue # These are local paths like metadata/provenance - but # we need to relativize them for our current directory for # as we are saved in metadata/manifest.json mediatype, conformsTo = guess_mediatype(path) rel_aggregates = { "uri": str(Path(os.pardir) / path), "mediatype": mediatype, "conformsTo": conformsTo, } # type: Aggregate if path in self._file_provenance: # Propagate file provenance (e.g. timestamp) rel_aggregates.update(self._file_provenance[path]) elif not path.startswith(SNAPSHOT): # make new timestamp? ( rel_aggregates["createdOn"], rel_aggregates["createdBy"], ) = self._self_made() aggregates.append(rel_aggregates) aggregates.extend(self._external_aggregates) return aggregates
def feed_json(self, scan_num: int) -> PurePath: return PurePosixPath( f'{self.__trial_user_path("feed")}.{scan_num}.json')
def get_mod_file_path(self, modid: str) -> PurePosixPath: '''Return the relative path of the expected mod output file that should be generated.''' name = self.get_name() mod_data = self.manager.arkman.getModData(modid) assert mod_data return PurePosixPath(f'{modid}-{mod_data["name"]}/{name}.json')
def from_source_path( cls, source_path: PurePosixPath ) -> 'RecordPath': if source_path.is_absolute(): raise ValueError(source_path) return cls.from_parts(source_path.parts)
def __init__( self, filepath: str, engine: str = "xlsxwriter", load_args: Dict[str, Any] = None, save_args: Dict[str, Any] = None, version: Version = None, credentials: Dict[str, Any] = None, fs_args: Dict[str, Any] = None, ) -> None: """Creates a new instance of ``ExcelDataSet`` pointing to a concrete Excel file on a specific filesystem. Args: filepath: Filepath in POSIX format to a Excel file prefixed with a protocol like `s3://`. If prefix is not provided, `file` protocol (local filesystem) will be used. The prefix should be any protocol supported by ``fsspec``. Note: `http(s)` doesn't support versioning. engine: The engine used to write to excel files. The default engine is 'xlsxwriter'. load_args: Pandas options for loading Excel files. Here you can find all available arguments: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_excel.html All defaults are preserved, but "engine", which is set to "xlrd". save_args: Pandas options for saving Excel files. Here you can find all available arguments: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_excel.html All defaults are preserved, but "index", which is set to False. If you would like to specify options for the `ExcelWriter`, you can include them under the "writer" key. Here you can find all available arguments: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.ExcelWriter.html version: If specified, should be an instance of ``kedro.io.core.Version``. If its ``load`` attribute is None, the latest version will be loaded. If its ``save`` attribute is None, save version will be autogenerated. credentials: Credentials required to get access to the underlying filesystem. E.g. for ``GCSFileSystem`` it should look like `{"token": None}`. fs_args: Extra arguments to pass into underlying filesystem class constructor (e.g. `{"project": "my-project"}` for ``GCSFileSystem``), as well as to pass to the filesystem's `open` method through nested keys `open_args_load` and `open_args_save`. Here you can find all available arguments for `open`: https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open All defaults are preserved, except `mode`, which is set to `wb` when saving. """ _fs_args = deepcopy(fs_args) or {} _fs_open_args_load = _fs_args.pop("open_args_load", {}) _fs_open_args_save = _fs_args.pop("open_args_save", {}) _credentials = deepcopy(credentials) or {} protocol, path = get_protocol_and_path(filepath, version) if protocol == "file": _fs_args.setdefault("auto_mkdir", True) self._protocol = protocol self._fs = fsspec.filesystem(self._protocol, **_credentials, **_fs_args) super().__init__( filepath=PurePosixPath(path), version=version, exists_function=self._fs.exists, glob_function=self._fs.glob, ) # Handle default load arguments self._load_args = deepcopy(self.DEFAULT_LOAD_ARGS) if load_args is not None: self._load_args.update(load_args) # Handle default save arguments self._save_args = deepcopy(self.DEFAULT_SAVE_ARGS) if save_args is not None: self._save_args.update(save_args) self._writer_args = self._save_args.pop("writer", {"engine": engine}) _fs_open_args_save.setdefault("mode", "wb") self._fs_open_args_load = _fs_open_args_load self._fs_open_args_save = _fs_open_args_save
def run(self, options, robot_class, **static_options): try: from robotpy_installer import installer except ImportError: raise ImportError( "You must have the robotpy-installer package installed to deploy code!" ) from .. import config config.mode = 'upload' # run the test suite before uploading if not options.skip_tests: from .cli_test import PyFrcTest tester = PyFrcTest() retval = tester.run_test([], robot_class, options.builtin, ignore_missing_test=True) if retval != 0: print_err("ERROR: Your robot tests failed, aborting upload.") if not sys.stdin.isatty(): print_err( "- Use --skip-tests if you want to upload anyways") return retval print() if not yesno('- Upload anyways?'): return retval if not yesno('- Are you sure? Your robot code may crash!'): return retval print() print("WARNING: Uploading code against my better judgement...") # upload all files in the robot.py source directory robot_file = abspath(inspect.getfile(robot_class)) robot_path = dirname(robot_file) robot_filename = basename(robot_file) cfg_filename = join(robot_path, '.deploy_cfg') if not options.nonstandard and robot_filename != 'robot.py': print_err( "ERROR: Your robot code must be in a file called robot.py (launched from %s)!" % robot_filename) print_err() print_err( "If you really want to do this, then specify the --nonstandard argument" ) return 1 # This probably should be configurable... oh well deploy_dir = PurePosixPath('/home/lvuser') py_deploy_subdir = 'py' py_new_deploy_subdir = 'py_new' py_deploy_dir = deploy_dir / py_deploy_subdir # note below: deployed_cmd appears that it only can be a single line # In 2015, there were stdout/stderr issues. In 2016, they seem to # have been fixed, but need to use -u for it to really work properly if options.debug: compileall_flags = '' deployed_cmd = 'env LD_LIBRARY_PATH=/usr/local/frc/lib/ /usr/local/bin/python3 -u %s/%s -v run' % ( py_deploy_dir, robot_filename) deployed_cmd_fname = 'robotDebugCommand' extra_cmd = 'touch /tmp/frcdebug; chown lvuser:ni /tmp/frcdebug' bash_cmd = '/bin/bash -cex' else: compileall_flags = '-O' deployed_cmd = 'env LD_LIBRARY_PATH=/usr/local/frc/lib/ /usr/local/bin/python3 -u -O %s/%s run' % ( py_deploy_dir, robot_filename) deployed_cmd_fname = 'robotCommand' extra_cmd = '' bash_cmd = '/bin/bash -ce' if options.in_place: replace_cmd = 'true' py_new_deploy_subdir = py_deploy_subdir else: replace_cmd = "[ -d %(py_deploy_dir)s ] && rm -rf %(py_deploy_dir)s; mv %(py_new_deploy_dir)s %(py_deploy_dir)s" py_new_deploy_dir = deploy_dir / py_new_deploy_subdir replace_cmd %= { "py_deploy_dir": py_deploy_dir, "py_new_deploy_dir": py_new_deploy_dir } check_version = '/usr/local/bin/python3 -c "exec(open(\\"$SITEPACKAGES/wpilib/version.py\\", \\"r\\").read(), globals()); print(\\"WPILib version on robot is \\" + __version__);exit(0) if __version__ == \\"%s\\" else exit(89)"' % wpilib.__version__ if options.no_version_check: check_version = '' check_startup_dlls = '(if [ "$(grep ^StartupDLLs /etc/natinst/share/ni-rt.ini)" != "" ]; then exit 91; fi)' # This is a nasty bit of code now... sshcmd = inspect.cleandoc(""" %(bash_cmd)s '[ -x /usr/local/bin/python3 ] || exit 87 SITEPACKAGES=$(/usr/local/bin/python3 -c "import site; print(site.getsitepackages()[0])") [ -f $SITEPACKAGES/wpilib/version.py ] || exit 88 %(check_version)s echo "%(deployed_cmd)s" > %(deploy_dir)s/%(deployed_cmd_fname)s %(extra_cmd)s %(check_startup_dlls)s ' """) sshcmd %= locals() sshcmd = re.sub("\n+", ";", sshcmd) nc_thread = None try: controller = installer.ssh_from_cfg(cfg_filename, username='******', password='', hostname=options.robot, allow_mitm=True, no_resolve=options.no_resolve) try: # Housekeeping first logger.debug('SSH: %s', sshcmd) controller.ssh(sshcmd) except installer.SshExecError as e: doret = True if e.retval == 87: print_err( "ERROR: python3 was not found on the roboRIO: have you installed robotpy?" ) elif e.retval == 88: print_err( "ERROR: WPILib was not found on the roboRIO: have you installed robotpy?" ) elif e.retval == 89: print_err("ERROR: expected WPILib version %s" % wpilib.__version__) print_err() print_err("You should either:") print_err( "- If the robot version is older, upgrade the RobotPy on your robot" ) print_err("- Otherwise, upgrade pyfrc on your computer") print_err() print_err( "Alternatively, you can specify --no-version-check to skip this check" ) elif e.retval == 90: print_err("ERROR: error running compileall") elif e.retval == 91: # Not an error; ssh in as admin and fix the startup dlls (Saves 24M of RAM) # -> https://github.com/wpilibsuite/EclipsePlugins/pull/154 logger.info("Fixing StartupDLLs to save RAM...") controller.username = '******' controller.ssh( 'sed -i -e "s/^StartupDLLs/;StartupDLLs/" /etc/natinst/share/ni-rt.ini' ) controller.username = '******' doret = False else: print_err("ERROR: %s" % e) if doret: return 1 # Copy the files over, copy to a temporary directory first # -> this is inefficient, but it's easier in sftp tmp_dir = tempfile.mkdtemp() try: py_tmp_dir = join(tmp_dir, py_new_deploy_subdir) self._copy_to_tmpdir(py_tmp_dir, robot_path) controller.sftp(py_tmp_dir, deploy_dir, mkdir=not options.in_place) finally: shutil.rmtree(tmp_dir) # start the netconsole listener now if requested, *before* we # actually start the robot code, so we can see all messages if options.nc or options.nc_ds: from netconsole import run nc_event = threading.Event() nc_thread = threading.Thread(target=run, args=(controller.hostname, ), kwargs=dict( connect_event=nc_event, fakeds=options.nc_ds), daemon=True) nc_thread.start() nc_event.wait(5) logger.info("Netconsole is listening...") if not options.in_place: # Restart the robot code and we're done! sshcmd = "%(bash_cmd)s '" + \ '%(replace_cmd)s;' + \ '/usr/local/bin/python3 %(compileall_flags)s -m compileall -q -r 5 /home/lvuser/py;' + \ '. /etc/profile.d/natinst-path.sh; ' + \ 'chown -R lvuser:ni %(py_deploy_dir)s; ' + \ 'sync; ' + \ '/usr/local/frc/bin/frcKillRobot.sh -t -r || true' + \ "'" sshcmd %= { 'bash_cmd': bash_cmd, 'compileall_flags': compileall_flags, 'py_deploy_dir': py_deploy_dir, 'replace_cmd': replace_cmd, } logger.debug('SSH: %s', sshcmd) controller.ssh(sshcmd) except installer.Error as e: print_err("ERROR: %s" % e) return 1 else: print("\nSUCCESS: Deploy was successful!") if nc_thread is not None: nc_thread.join() return 0
def test_convert_paths_raises_error_on_relative_project_path(): path = Path("relative/path") with pytest.raises(ValueError) as excinfo: _convert_paths_to_absolute_posix(project_path=path, conf_dictionary={}) assert (str(excinfo.value) == f"project_path must be an absolute path. Received: {path}") @pytest.mark.parametrize( "project_path,input_conf,expected", [ ( PurePosixPath("/tmp"), { "handler": { "filename": "logs/info.log" } }, { "handler": { "filename": "/tmp/logs/info.log" } }, ), ( PurePosixPath("/User/kedro"), { "my_dataset": {
def relative_to(self, path): try: return PurePosixPath.relative_to(self, path) except ValueError: return self
#!/usr/bin/python3 -tt from rpmfluff import SimpleRpmBuild from rpmfluff import YumRepoBuild from pathlib import PurePosixPath import os import shutil import subprocess work_file = os.path.realpath(__file__) work_dir = os.path.dirname(work_file) file_base_mane = PurePosixPath(work_file).stem repo_dir = os.path.join(work_dir, file_base_mane) temp_dir = os.path.join(repo_dir, 'temp') if not os.path.exists(repo_dir): os.makedirs(repo_dir) if not os.path.exists(temp_dir): os.makedirs(temp_dir) os.chdir(temp_dir) pkgs = [] rpm = SimpleRpmBuild('TestA', '1.0.0', '1', ['noarch']) rpm.add_requires('TestB') rpm.add_group('Testgroup') pkgs.append(rpm) # Used for install remove tests if requirement TestB is handled properly. rpm = SimpleRpmBuild('TestB', '1.0.0', '1', ['noarch'])