def extract_and_capitalize_headlines_from_corpus(corpus_dir, docids): """ Iterate through all the files in `corpus_dir`, extract the headlines, capitalized and return them Parameter: --------------- corpus_dir: string docids: list of string the document to be processed Return: -------------- generator of (docid, headlines): (str, list<list<str>>) """ get_tokens = partial(map, partial(get_in, ["token"])) get_features = partial(get_in, ["features"]) make_capitalized_title_new = lambda words: make_capitalized_title(title_words=words) for docid in docids: p = Path(corpus_dir) / Path(docid) auxil_p = p.with_suffix(".auxil") paf_p = p.with_suffix(".paf") if auxil_p.exists() and paf_p.exists(): try: titles, _ = separate_title_from_body(str(auxil_p), str(paf_p)) except Exception as e: yield (e, None) # pipeline: # -> get features # -> get tokens # -> capitalize headline yield (None, (p.name, list(map(compose(make_capitalized_title_new, get_tokens, get_features), titles))))
def _check_files(filename, exts): """ takes filename string and list of extensions, checks that they all exist and returns a Path """ #TODO test whether IOError (deprecated) or OSError is better handled by Orange p = Path(filename) if p.suffix == ".dmt": for child in p.parent.iterdir(): if child.suffix == ".dmt": continue elif child.stem.casefold() == p.stem.casefold(): p = child break elif child.stem.casefold() == (p.stem + "_0000_0000").casefold(): p = child.with_name(child.stem.split("_0000_0000")[0] + p.suffix) break for ext in exts: if ext == ".dmt": # Always lowercase ps = p.parent.joinpath(p.with_suffix(ext).name.lower()) elif ext in [".drd", ".dmd"]: # Always has at least _0000_0000 tile ps = p.parent.joinpath(p.stem + "_0000_0000" + ext) else: ps = p.with_suffix(ext) if not ps.is_file(): raise OSError('File "{}" was not found.'.format(ps)) return p
def convert_nb(fname, dest_path='.'): "Convert a notebook `fname` to html file in `dest_path`." from .gen_notebooks import remove_undoc_cells, remove_code_cell_jupyter_widget_state_elem nb = read_nb(fname) nb['cells'] = remove_undoc_cells(nb['cells']) nb['cells'] = remove_code_cell_jupyter_widget_state_elem(nb['cells']) fname = Path(fname).absolute() dest_name = fname.with_suffix('.html').name meta = nb['metadata'] meta_jekyll = meta['jekyll'] if 'jekyll' in meta else {'title': fname.with_suffix('').name} meta_jekyll['nb_path'] = f'{fname.parent.name}/{fname.name}' with open(f'{dest_path}/{dest_name}','w') as f: f.write(exporter.from_notebook_node(nb, resources=meta_jekyll)[0])
class CythonExtension(Extension): def __init__( self, name, cython_source, sources = None, output_dir = None, language_level = None, cplus = None, annotate = None, **kwargs ): self.cython_source = Path(cython_source) self.output_dir = Path(output_dir or "cythonized") self.language_level = language_level or 3 self.cplus = cplus or False self.annotate = annotate or False self.output_file = self.output_dir.joinpath( self.cython_source.with_suffix(".cpp" if self.cplus else ".c") ) sources = sources or [] sources.append(str(self.output_file)) super().__init__(name, sources, **kwargs)
def doplotsave(bigfn,data,rawind,clim,dohist,meanImg): if bigfn is None or data is None: return bigfn=Path(bigfn) if dohist: ax=figure().gca() hist(data.ravel(), bins=256,log=True) ax.set_title('histogram of {}'.format(bigfn)) ax.set_ylabel('frequency of occurence') ax.set_xlabel('data value') if meanImg: meanStack = data.mean(axis=0).astype(uint16) #DO NOT use dtype= here, it messes up internal calculation! fg = figure(32) ax = fg.gca() if clim: hi=ax.imshow(meanStack,cmap='gray',origin='lower', vmin=clim[0], vmax=clim[1],norm=LogNorm()) else: hi=ax.imshow(meanStack,cmap='gray',origin='lower',norm=LogNorm()) ax.set_xlabel('x') ax.set_ylabel('y') ax.set_title('mean of image frames') fg.colorbar(hi) pngfn = bigfn.with_suffix('_mean.png') print(f'writing mean PNG {pngfn}') fg.savefig(pngfn,dpi=150,bbox_inches='tight')
def download(self, destination: Path): logging.info("Downloading %s into %s", self.url, destination) destination.parent.mkdir(parents=True, exist_ok=True) tmpdestination = destination.with_suffix(".tmp") if tmpdestination.exists(): logging.warn("Removing temporary directory %s", tmpdestination) shutil.rmtree(tmpdestination) file = self.dataset.downloadURL(self.url) logging.info("Unarchiving file") with tarfile.TarFile.open(file.path, mode="r:*") as tar: tar.extractall(tmpdestination) for ix, path in enumerate(tmpdestination.iterdir()): if ix > 1: break # Just one file/folder: move if ix == 0 and path.is_dir(): logging.info("Moving single directory into destination") shutil.move(path, destination) shutil.rmtree(tmpdestination) else: shutil.move(tmpdestination, destination)
def _save(self, name, content): name = Path(name) self._validate(name) name = name.with_suffix(name.suffix.lower()) hashed_dir = Path(self._hash_dir(content)) dest_name = hashed_dir.joinpath(name) return super()._save(str(dest_name), content)
async def drop(cls, *, directory='migrations', verbose=False, connection=None): """Drops the database and migrations, if any. Parameters ----------- directory: str The migrations directory. verbose: bool Whether to output some information to stdout. connection: Optional[asyncpg.Connection] The connection to use, if not provided will acquire one from the internal pool. """ directory = Path(directory) / cls.__tablename__ p = directory.with_suffix('.json') current = directory.with_name('current-' + p.name) if not p.exists() or not current.exists(): raise RuntimeError('Could not find the appropriate data files.') try: p.unlink() except: raise RuntimeError('Could not delete migration file') try: current.unlink() except: raise RuntimeError('Could not delete current migration file') async with MaybeAcquire(connection, pool=cls._pool) as con: sql = 'DROP TABLE {0} CASCADE;'.format(cls.__tablename__) if verbose: print(sql) await con.execute(sql)
def dir2fn(ofn,ifn,suffix) -> Path: """ ofn = filename or output directory, to create filename based on ifn ifn = input filename (don't overwrite!) suffix = desired file extension e.g. .h5 """ if not ofn: # no output file desired return ofn = Path(ofn).expanduser() ifn = Path(ifn).expanduser() assert ifn.is_file() if ofn.suffix==suffix: #must already be a filename pass else: #must be a directory assert ofn.is_dir(), f'create directory {ofn}' ofn = ofn / ifn.with_suffix(suffix).name try: assert not ofn.samefile(ifn), f'do not overwrite input file! {ifn}' except FileNotFoundError: # a good thing, the output file doesn't exist and hence it's not the input file pass return ofn
def save_to_hdf(df, fname, output_subdir=None): """Save temporary HDF file in output folder for later concatenation. By default the product is stored in HOME/output. Parameters ---------- df : pd.DataFrame The dataframe to save fname : string The product filename that was used to create this dataframe to save. The saving filename for the HDF file will be auto-determined from that. output_subdir : str String to determine a subfolder inside HOME/output where this data should be stored instead of just HOME/output """ path = Path(fname) newfname = path.with_suffix('.h5').name folderpath = HOME / 'output' if output_subdir: folderpath = folderpath / output_subdir path = folderpath / newfname df.to_hdf(str(path), 'df', format='t') return str(path)
def build_ebook(self, output_file, add_page_markers=True): if not self._files: self._files = self.import_links_page() root_node = self.get_content_tree(self.get_index_doc()) output_file = Path(output_file) if output_file.suffix != '.txt': output_file = output_file.with_suffix('.txt') if add_page_markers: fp = PagedFileWriter(output_file) else: fp = output_file.open('w') # Write tree to file. with fp: fp.write('# %s\n\n' % self.title) fp.write('作者:%s\n\n' % self.author) stack = deque() stack.append((1, root_node)) while stack: level, node = stack.pop() if node.title != 'root': hashes = '#' * level fp.write('%s %s\n\n' % (hashes, node.title)) if node.content: # Strip any leading and trailing newlines. fp.write(node.content.strip('\n') + '\n\n') for child in reversed(node.children): stack.append((level+1, child))
def main(): session = create_session(database='CtyOD') engine = session.get_bind() CtyOD.metadata.drop_all(engine) CtyOD.metadata.create_all(engine) field_name = [c.name for c in CtyOD.__table__.columns] load_string = ( "LOAD DATA LOCAL INFILE 'ck/{!s}'" " INTO TABLE ctyodp4" " FIELDS TERMINATED BY ',' LINES TERMINATED BY '\\r\\n'" " IGNORE 1 LINES;" ) base_url = "http://cta.ornl.gov/transnet/" for idx in range(1, 7): csv_file = Path('CtyODp4-{}.csv'.format(idx)) archive = Path('ck') / csv_file if not archive.exists(): url = base_url + str(csv_file.with_suffix(".zip")) request = Request(url) with urlopen(request) as io: archive = ZipFile(BytesIO(io.read())) archive.extractall() session.execute(load_string.format(csv_file)) session.commit() session.close()
def extract_verb(args, file: Path): if args.ignore_extracted and file.with_suffix('.png').exists(): return with file.open('rb') as input_file: header = input_file.read(6) if header != b'mrgd00': return logging.info('Extracting from ' + file.name) logging.debug('header: {0}'.format(header.decode('ASCII'))) number_of_entries, = struct.unpack('<H', input_file.read(2)) logging.debug('found {0} entries'.format(number_of_entries)) if not number_of_entries: return entries_descriptors = [] for i in range(number_of_entries): sector_offset, offset, sector_size_upper_boundary, size = struct.unpack('<HHHH', input_file.read(8)) entries_descriptors.append( ArchiveEntry(sector_offset, offset, sector_size_upper_boundary, size, number_of_entries)) if args.bin: extract_bin(file, input_file, entries_descriptors, args.notmzx) else: MzpFile(file, input_file, entries_descriptors)
def hdf2video(data,imgh5,outfn,clim): outfn = Path(outfn).expanduser() import cv2 try: from cv2.cv import FOURCC as fourcc #Windows needs from cv2.cv except ImportError: from cv2 import VideoWriter_fourcc as fourcc outfn = outfn.with_suffix('.ogv') cc4 = fourcc(*'THEO') # we use isColor=True because some codecs have trouble with grayscale hv = cv2.VideoWriter(str(outfn),cc4, fps=33, frameSize=data.shape[1:][::-1], #frameSize needs col,row isColor=True) #right now we're only using grayscale if not hv.isOpened(): raise TypeError('trouble starting video file') for d in data: #RAM usage explodes if scaling all at once on GB class file #for d in bytescale(data,1000,4000): #for d in sixteen2eight(data,(1000,4000)): hv.write(gray2rgb(bytescale(d,clim[0],clim[1]))) hv.release()
def runtest(self): scss_file = Path(str(self.fspath)) css_file = scss_file.with_suffix('.css') with css_file.open('rb') as fh: # Output is Unicode, so decode this here expected = fh.read().decode('utf8') scss.config.STATIC_ROOT = str(scss_file.parent / 'static') search_path = [] include = scss_file.parent / 'include' if include.exists(): search_path.append(include) search_path.append(scss_file.parent) actual = compile_file( scss_file, output_style='expanded', search_path=search_path, extensions=[ CoreExtension, ExtraExtension, FontsExtension, CompassExtension, ], ) # Normalize leading and trailing newlines actual = actual.strip('\n') expected = expected.strip('\n') assert expected == actual
def write_template_desc(video_file: str): video_path = Path(video_file) if not video_path.exists(): print("File not found: {}".format(video_path)) desc_path = video_path.with_suffix(_DESCRIPTOR_SUFFIX) if desc_path.exists(): print("Error. Desc file already exists: {}".format(desc_path)) json_str = r'''{ "title": "{%TITLE%}", "site": "Unknown", "duration": "00:00:00", "resolution": 1920, "group": "Inbox", "actors": ["Unknown"], "timeStamps": [ {"ts":"00:10:00", "name":"?"}, {"ts":"00:20:00", "name":"?"} ] }''' json_str = json_str.replace("{%TITLE%}",video_path.name) fp = desc_path.open(mode="w") try: fp.write(json_str) finally: fp.close()
def plotfilt(b: np.ndarray, fs: int, ofn: Path = None): if fs is None: fs = 1 # normalized freq L = b.size fg, axs = subplots(2, 1, sharex=False) freq, response = signal.freqz(b) response_dB = 20*np.log10(abs(response)) if response_dB.max() > 0: logging.error('filter may be unstable') axs[0].plot(freq*fs/(2*np.pi), response_dB) axs[0].set_title(f'filter response {L} taps') axs[0].set_ylim((-100, None)) axs[0].set_ylabel('|H| [db]') axs[0].set_xlabel('frequency [Hz]') t = np.arange(0, L/fs, 1/fs) axs[1].plot(t, b) axs[1].set_xlabel('time [sec]') axs[1].set_title('impulse response') axs[1].set_ylabel('amplitude') axs[1].autoscale(True, tight=True) fg.tight_layout() if ofn: ofn = Path(ofn).expanduser() ofn = ofn.with_suffix('.png') print('writing', ofn) fg.savefig(str(ofn), dpi=100, bbox_inches='tight')
def sitemap(suffix='.md'): # ensure we are in the ROOT_DIR os.chdir(ROOT_DIR) multilang = ['en/', 'zh-cn/', 'zh-tw/'] pages = [] raw_bytes = check_output('scripts/gitls.sh') # ignore last blank string raw_strs = raw_bytes.decode("utf-8").split('\n')[:-1] for raw_str in raw_strs: date, raw_f = raw_str.split(' ') for lang in multilang: if raw_f.startswith(lang) and raw_f.endswith(suffix): if raw_f == lang + 'SUMMARY.md': continue p = Path(raw_f) # rename README with index if p.name == 'README.md': p = p.with_name('index.md') p = p.with_suffix('.html') fn = p.as_posix().lower() page = {} page['lastmod'] = date page['url'] = fn pages.append(page) root_url = 'http://algorithm.yuanbin.me' templates = os.path.join(BASE_DIR, 'sitemap' + os.sep + 'templates') env = Environment(loader=FileSystemLoader(templates)) template = env.get_template('sitemap.xml') sitemap_xml = template.render(root_url=root_url, pages=pages, freq='daily') sitemap_fn = os.path.join(ROOT_DIR, 'sitemap.xml') with open(sitemap_fn, 'w') as sf: sf.write(sitemap_xml)
def __init__(self): """ Reads county shapefiles Shapes is list of (lat/lons) making up the shapes. info is list of info about each shape. """ mm = basemap.Basemap() gis_file = Path(basemap.basemap_datadir) / 'UScounties' if not gis_file.with_suffix('.shp').exists(): msg = ('Cannot find {}.shp\nYou can install it with' '`conda install -c conda-forge basemap-data-hires`').format raise IOError(msg(str(gis_file))) county_info = mm.readshapefile( str(gis_file), 'counties', default_encoding='latin-1', drawbounds=False, linewidth=0.1) self.shapes = mm.counties self.info = mm.counties_info self._build_shape_lookup() self.cmap = pyplot.get_cmap('rainbow')
def main(source, dest): """Rename a Git repository and update its remote accordingly.""" basicConfig(level=DEBUG) try: repo = Repo(source) except OSError as error: logger.exception('Error:') exit(1) else: dest = Path(dest) try: dest = dest.with_suffix('.git') except ValueError: logger.exception('Error:') exit(1) logger.info('Using dest: %s', dest) remote = repo.remote() logger.debug('Old URL: %s', remote.url) origin = Path(remote.url) logger.debug('Parent: %s', origin.parent) new = origin.parent / dest logger.info('Using URL: %s', new) conf = remote.config_writer conf.set('url', str(new)) conf.release() Path(source).rename(dest) exit(0)
def __init__(self, name: str, flag='r+b'): """Initialize a Save object. The name argument will automatically be preceded by the `home` module variable. It will also automatically be followed by the file extension '.sve'. :param name: the name of the file to save to :param flag: the flags to use when opening the file (see `open`) """ path = Path(name) path = path.with_suffix('.sve') if not path.exists(): self._existed = False file = path.open('w') file.close() else: self._existed = True self._path = path self._flag = flag self._file = path.open(flag, buffering=0) self._is_closed = False # set current offset for registering self._offset = 0 self._binding = {} self._cache = {} # implement thread safe locking self._lock = Lock()
def save_figure(fig, destination, extra_artist=None): from pathlib import Path name = Path(destination) if extra_artist is not None: extra_args = {"extra_artists": (extra_artist, ), "bbox_inches": "tight"} else: extra_args = {} for extension in [".pdf", ".svgz", ".png"]: if extension != "png": fig.savefig(str(name.with_suffix(extension)), **extra_args) else: fig.savefig(str(name.with_suffix(extension)), dpi=300, **extra_args)
def save_figure(fig: plt.Figure, destination: str, extra_artist: mpl.artist.Artist=None): name = Path(destination) if extra_artist is not None: extra_args = {"extra_artists": (extra_artist, ), "bbox_inches": "tight"} else: extra_args = {} for extension in [".pdf", ".svgz", ".png"]: if extension != "png": fig.savefig(str(name.with_suffix(extension)), **extra_args) else: fig.savefig(str(name.with_suffix(extension)), dpi=300, **extra_args)
def load_local_configuration(configurable): """.configure() *configurable* with local configuration.""" # This is a bit ugly and should be something upstream invoke should be able to do by itself. for path in ['/etc/qabel', '~/.qabel', Path(__file__).with_name('qabel')]: path = Path(path).expanduser() for suffix in ['.yaml', '.py', '.json']: path_with_suffix = path.with_suffix(suffix) if try_load(path_with_suffix, configurable): print('Picked up extra configuration from', path)
def save_report(directory, source_path, report): rel_path = Path(str(source_path).replace('../', '')) path = Path(directory).joinpath(rel_path.with_suffix('.html')) try: path.parent.mkdir(parents=True) except FileExistsError: pass with path.open('w') as fp: fp.write(report)
def _check_ex_path(self, ex): from pathlib import Path file = Path(ex) if not file.suffix: file = file.with_suffix('.py') file = (Path('./examples') / file).resolve() assert file.is_file(), ('Invalid example %s' % file) return file
def _parse_filename(cls, filename, ext=''): p = Path(filename) p = p.resolve() if ext == '': ext = p.suffix # If extension not part of filename ext = '.'+(ext.strip('.')) if p.suffix != ext: p = p.with_suffix(ext) return p
def plotTexStats(data,texfn,debugon,texChanged): texfn = Path(texfn).expanduser() imgfn = texfn.with_suffix('.png') daten=[dt.fromtimestamp(ts) for ts in data[:,0]] #if debugon: print(daten) print(f"{texfn} first / last mod time {daten[0].strftime('%Y-%m-%dT%H:%M:%S')} / {daten[-1].strftime('%Y-%m-%dT%H:%M:%S')}") daten=md.date2num(daten) fg = plt.figure() ax1 = fg.gca() ax2 = ax1.twinx() ax1.set_xlabel("Date") ax1.set_ylabel('Word Count') ax2.set_ylabel('Equation, Figure, Table Count') ax1.plot(daten,data[:,1], linestyle='-', marker='.', color='b',label='Words') ax2.plot(daten,data[:,3], linestyle='-', marker='.', color='r',label='Equations') ax2.plot(daten,data[:,2], linestyle='-', marker='.', color='g', label='Figures + Tables') xLo = data[0,0]-86400 #set lower xlim to be 1 day prior to earliest data (fixes one data point corner case) xHi = data[-1,0]+86400 #set lower xlim to be 1 day after earliest data (fixes one data point corner case) ax1.set_xlim( dt.fromtimestamp(xLo), dt.fromtimestamp(xHi)) if (xHi-xLo < 3*86400): xFmt = '%Y-%m-%dT%H' else: xFmt = '%Y-%m-%d' ax1.xaxis.set_major_formatter(md.DateFormatter(xFmt)) for tl in ax1.get_yticklabels(): tl.set_color('b') for tl in ax2.get_yticklabels(): tl.set_color('r') #ax1.legend() ax2.legend(loc=2) ax1.set_title("Dissertation Progress") fg.autofmt_xdate() if texChanged: if imgfn.is_file(): #data file already exists imgModTime = dt.fromtimestamp(imgfn.stat().st_mtime).strftime('%Y-%m-%dT%H-%M-%S') oldFN = f'{imgfn}-{imgModTime}.png' if debugon: print(f"Moving {imgfn} to {oldFN}") shutil.move(imgfn,oldFN) if debugon: print(f'saving updated figure {imgfn}') fg.savefig(str(imgfn),bbox_inches='tight') plt.show()
def render(self, filename_root=None, file=None): """Render the document repeatedly until the output no longer changes due to cross-references that need some iterations to converge.""" self.error = False filename_root = Path(filename_root) if filename_root else None if filename_root and file is None: extension = self.backend_document.extension filename = filename_root.with_suffix(extension) file = filename.open('wb') elif file and filename_root is None: filename = getattr(file, 'name', None) else: raise ValueError("You need to specify either 'filename_root' or " "'file'.") def has_converged(part_page_counts): """Return `True` if the last rendering iteration converged to a stable result. Practically, this tests whether the total number of pages and page references to document elements have changed since the previous rendering iteration.""" nonlocal prev_number_of_pages, prev_page_references return (part_page_counts == prev_number_of_pages and self.page_references == prev_page_references) fake_container = FakeContainer(self) try: self.document_tree.build_document(fake_container) (prev_number_of_pages, prev_page_references) = self._load_cache(filename_root) self.part_page_counts = prev_number_of_pages self.prepare(fake_container) self.page_elements.clear() self.page_references = prev_page_references.copy() self.part_page_counts = self._render_pages() while not has_converged(self.part_page_counts): prev_number_of_pages = self.part_page_counts prev_page_references = self.page_references.copy() print('Not yet converged, rendering again...') del self.backend_document self.backend_document = self.backend.Document(self.CREATOR) self.part_page_counts = self._render_pages() self.create_outlines() if filename: self._save_cache(filename_root, self.part_page_counts, self.page_references) self.style_log.write_log(filename_root) print('Writing output: {}'.format(filename)) self.backend_document.write(file) finally: if filename_root: file.close() return not self.error
class QUBE(object): def __init__(self, fname): self.path = Path(fname) # file management self.file_id = self.path.stem self.label_fname = self.path.with_suffix(".LBL") self.data_fname = self.path.with_suffix(".DAT") # read the data self.data1D = (np.fromfile(str(self.data_fname), ">H")).astype(np.uint16) # label stuff self.label = pvl.load(str(self.label_fname)) self.cubelabel = self.label["QUBE"] self.LINE_BIN = self.cubelabel["LINE_BIN"] self.BAND_BIN = self.cubelabel["BAND_BIN"] self.shape = tuple(self.cubelabel["CORE_ITEMS"]) self.line_range = ( self.cubelabel["UL_CORNER_LINE"], self.cubelabel["LR_CORNER_LINE"] + 1, # for numpy slicing + 1 ) self.band_range = ( self.cubelabel["UL_CORNER_BAND"], self.cubelabel["LR_CORNER_BAND"] + 1, # for numpy slicing + 1 ) # reshape the data with infos from label self.data = self.data1D.reshape(self.shape, order="F")[ slice(*self.band_range), slice(*self.line_range), : ] @property def n_integrations(self): return self.shape[-1] @property def waves(self): return np.linspace(self.wave_min, self.wave_max, self.shape[0]) @property def xarray(self): return xr.DataArray(self.data)
def spoiler_path_for(save_spoiler: bool, output_file: Path) -> Optional[Path]: if save_spoiler: return output_file.with_suffix(f".{LayoutDescription.file_extension()}") else: return None
def img_name_to_int(img: Path): return int(img.with_suffix("").name)
class Mesh: def __init__(self, file, hold_history=False, vs=None, faces=None, device='cpu', gfmm=True): if file is None: return self.filename = Path(file) self.vs = self.v_mask = self.edge_areas = None self.edges = self.gemm_edges = self.sides = None self.device = device if vs is not None and faces is not None: self.vs, self.faces = vs.cpu().numpy(), faces.cpu().numpy() self.scale, self.translations = 1.0, np.zeros(3, ) else: self.vs, self.faces = load_obj(file) self.normalize_unit_bb() self.vs_in = copy.deepcopy(self.vs) self.v_mask = np.ones(len(self.vs), dtype=bool) self.build_gemm() self.history_data = None if hold_history: self.init_history() if gfmm: self.gfmm = self.build_gfmm() #TODO get rid of this DS else: self.gfmm = None if type(self.vs) is np.ndarray: self.vs = torch.from_numpy(self.vs) if type(self.faces) is np.ndarray: self.faces = torch.from_numpy(self.faces) self.vs = self.vs.to(self.device) self.faces = self.faces.to(self.device).long() self.area, self.normals = self.face_areas_normals(self.vs, self.faces) def build_gemm(self): self.ve = [[] for _ in self.vs] self.vei = [[] for _ in self.vs] edge_nb = [] sides = [] edge2key = dict() edges = [] edges_count = 0 nb_count = [] for face_id, face in enumerate(self.faces): faces_edges = [] for i in range(3): cur_edge = (face[i], face[(i + 1) % 3]) faces_edges.append(cur_edge) for idx, edge in enumerate(faces_edges): edge = tuple(sorted(list(edge))) faces_edges[idx] = edge if edge not in edge2key: edge2key[edge] = edges_count edges.append(list(edge)) edge_nb.append([-1, -1, -1, -1]) sides.append([-1, -1, -1, -1]) self.ve[edge[0]].append(edges_count) self.ve[edge[1]].append(edges_count) self.vei[edge[0]].append(0) self.vei[edge[1]].append(1) nb_count.append(0) edges_count += 1 for idx, edge in enumerate(faces_edges): edge_key = edge2key[edge] edge_nb[edge_key][nb_count[edge_key]] = edge2key[faces_edges[ (idx + 1) % 3]] edge_nb[edge_key][nb_count[edge_key] + 1] = edge2key[faces_edges[(idx + 2) % 3]] nb_count[edge_key] += 2 for idx, edge in enumerate(faces_edges): edge_key = edge2key[edge] sides[edge_key][nb_count[edge_key] - 2] = nb_count[edge2key[faces_edges[(idx + 1) % 3]]] - 1 sides[edge_key][nb_count[edge_key] - 1] = nb_count[edge2key[faces_edges[(idx + 2) % 3]]] - 2 self.edges = np.array(edges, dtype=np.int32) self.gemm_edges = np.array(edge_nb, dtype=np.int64) self.sides = np.array(sides, dtype=np.int64) self.edges_count = edges_count # lots of DS for loss self.nvs, self.nvsi, self.nvsin = [], [], [] for i, e in enumerate(self.ve): self.nvs.append(len(e)) self.nvsi.append(len(e) * [i]) self.nvsin.append(list(range(len(e)))) self.vei = torch.from_numpy( np.concatenate(np.array(self.vei)).ravel()).to(self.device).long() self.nvsi = torch.Tensor(np.concatenate(np.array( self.nvsi)).ravel()).to(self.device).long() self.nvsin = torch.from_numpy( np.concatenate(np.array(self.nvsin)).ravel()).to( self.device).long() ve_in = copy.deepcopy(self.ve) self.ve_in = torch.from_numpy(np.concatenate( np.array(ve_in)).ravel()).to(self.device).long() self.max_nvs = max(self.nvs) self.nvs = torch.Tensor(self.nvs).to(self.device).float() self.edge2key = edge2key def build_ef(self): edge_faces = dict() if type(self.faces) == torch.Tensor: faces = self.faces.cpu().numpy() else: faces = self.faces for face_id, face in enumerate(faces): for i in range(3): edge = tuple(sorted([face[i], face[(i + 1) % 3]])) if edge not in edge_faces: edge_faces[edge] = [] edge_faces[edge].append(face_id) for k in edge_faces.keys(): if len(edge_faces[k]) < 2: edge_faces[k].append(edge_faces[k][0]) return edge_faces def build_gfmm(self): edge_faces = self.build_ef() gfmm = [] if type(self.faces) == torch.Tensor: faces = self.faces.cpu().numpy() else: faces = self.faces for face_id, face in enumerate(faces): neighbors = [face_id] for i in range(3): edge = tuple(sorted([face[i], face[(i + 1) % 3]])) neighbors.extend(list(set(edge_faces[edge]) - set([face_id]))) gfmm.append(neighbors) return torch.Tensor(gfmm).long().to(self.device) def normalize_unit_bb(self): """ normalizes to unit bounding box and translates to center if no :param verts: new verts """ cache_norm_file = self.filename.with_suffix('.npz') if not cache_norm_file.exists(): scale = max( [self.vs[:, i].max() - self.vs[:, i].min() for i in range(3)]) scaled_vs = self.vs / scale target_mins = [ (scaled_vs[:, i].max() - scaled_vs[:, i].min()) / -2.0 for i in range(3) ] translations = [(target_mins[i] - scaled_vs[:, i].min()) for i in range(3)] np.savez_compressed(cache_norm_file, scale=scale, translations=translations) # load from the cache cached_data = np.load(cache_norm_file, encoding='latin1', allow_pickle=True) self.scale, self.translations = cached_data['scale'], cached_data[ 'translations'] self.vs /= self.scale self.vs += self.translations[None, :] def discrete_project(self, pc: torch.Tensor, thres=0.9, cpu=False): with torch.no_grad(): device = torch.device('cpu') if cpu else self.device pc = pc.double() if isinstance(self, Mesh): mid_points = self.vs[self.faces].mean(dim=1) normals = self.normals else: mid_points = self[:, :3] normals = self[:, 3:] pk12 = knn_points(mid_points[:, :3].unsqueeze(0), pc[:, :, :3], K=3).idx[0] pk21 = knn_points(pc[:, :, :3], mid_points[:, :3].unsqueeze(0), K=3).idx[0] loop = pk21[pk12].view(pk12.shape[0], -1) knn_mask = (loop == torch.arange( 0, pk12.shape[0], device=self.device)[:, None]).sum(dim=1) > 0 mid_points = mid_points.to(device) pc = pc[0].to(device) normals = normals.to(device)[~knn_mask, :] masked_mid_points = mid_points[~knn_mask, :] displacement = masked_mid_points[:, None, :] - pc[:, :3] torch.cuda.empty_cache() distance = displacement.norm(dim=-1) mask = (torch.abs( torch.sum((displacement / distance[:, :, None]) * normals[:, None, :], dim=-1)) > thres) if pc.shape[-1] == 6: pc_normals = pc[:, 3:] normals_correlation = torch.sum(normals[:, None, :] * pc_normals, dim=-1) mask = mask * (normals_correlation > 0) torch.cuda.empty_cache() distance[~mask] += float('inf') min, argmin = distance.min(dim=-1) pc_per_face_masked = pc[argmin, :].clone() pc_per_face_masked[min == float('inf'), :] = float('nan') pc_per_face = torch.zeros(mid_points.shape[0], 6).\ type(pc_per_face_masked.dtype).to(pc_per_face_masked.device) pc_per_face[~knn_mask, :pc.shape[-1]] = pc_per_face_masked pc_per_face[knn_mask, :] = float('nan') # clean up del knn_mask return pc_per_face.to( self.device), (pc_per_face[:, 0] == pc_per_face[:, 0]).to(device) @staticmethod def face_areas_normals(vs, faces): if type(vs) is not torch.Tensor: vs = torch.from_numpy(vs) if type(faces) is not torch.Tensor: faces = torch.from_numpy(faces) face_normals = torch.cross(vs[faces[:, 1]] - vs[faces[:, 0]], vs[faces[:, 2]] - vs[faces[:, 1]]) face_areas = torch.norm(face_normals, dim=1) face_normals = face_normals / face_areas[:, None] face_areas = 0.5 * face_areas face_areas = 0.5 * face_areas return face_areas, face_normals def update_verts(self, verts): """ update verts positions only, same connectivity :param verts: new verts """ self.vs = verts def deep_copy(self): #TODO see if can do this better new_mesh = Mesh(file=None) types = [np.ndarray, torch.Tensor, dict, list, str, int, bool, float] for attr in self.__dir__(): if attr == '__dict__': continue val = getattr(self, attr) if type(val) == types[0]: new_mesh.__setattr__(attr, val.copy()) elif type(val) == types[1]: new_mesh.__setattr__(attr, val.clone()) elif type(val) in types[2:4]: new_mesh.__setattr__(attr, pickle.loads(pickle.dumps(val, -1))) elif type(val) in types[4:]: new_mesh.__setattr__(attr, val) return new_mesh def merge_vertices(self, edge_id): self.remove_edge(edge_id) edge = self.edges[edge_id] v_a = self.vs[edge[0]] v_b = self.vs[edge[1]] # update pA v_a.__iadd__(v_b) v_a.__itruediv__(2) self.v_mask[edge[1]] = False mask = self.edges == edge[1] self.ve[edge[0]].extend(self.ve[edge[1]]) self.edges[mask] = edge[0] def remove_vertex(self, v): self.v_mask[v] = False def remove_edge(self, edge_id): vs = self.edges[edge_id] for v in vs: if edge_id not in self.ve[v]: print(self.ve[v]) print(self.filename) self.ve[v].remove(edge_id) def clean(self, edges_mask, groups): edges_mask = edges_mask.astype(bool) torch_mask = torch.from_numpy(edges_mask.copy()) self.gemm_edges = self.gemm_edges[edges_mask] self.edges = self.edges[edges_mask] self.sides = self.sides[edges_mask] new_ve = [] edges_mask = np.concatenate([edges_mask, [False]]) new_indices = np.zeros(edges_mask.shape[0], dtype=np.int32) new_indices[-1] = -1 new_indices[edges_mask] = np.arange( 0, np.ma.where(edges_mask)[0].shape[0]) self.gemm_edges[:, :] = new_indices[self.gemm_edges[:, :]] for v_index, ve in enumerate(self.ve): update_ve = [] # if self.v_mask[v_index]: for e in ve: update_ve.append(new_indices[e]) new_ve.append(update_ve) self.ve = new_ve self.__clean_history(groups, torch_mask) def export(self, file): vs = self.vs.cpu().clone() vs -= self.translations[None, :] vs *= self.scale export(file, vs, self.faces) def init_history(self): self.history_data = { 'groups': [], 'gemm_edges': [self.gemm_edges.copy()], 'occurrences': [], 'edges_count': [self.edges_count], } def get_groups(self): return self.history_data['groups'].pop() def get_occurrences(self): return self.history_data['occurrences'].pop() def __clean_history(self, groups, pool_mask): if self.history_data is not None: self.history_data['occurrences'].append(groups.get_occurrences()) self.history_data['groups'].append(groups.get_groups(pool_mask)) self.history_data['gemm_edges'].append(self.gemm_edges.copy()) self.history_data['edges_count'].append(self.edges_count) def unroll_gemm(self): self.history_data['gemm_edges'].pop() self.gemm_edges = self.history_data['gemm_edges'][-1] self.history_data['edges_count'].pop() self.edges_count = self.history_data['edges_count'][-1] @staticmethod def from_tensor(mesh, vs, faces, gfmm=True): mesh = Mesh(file=mesh.filename, vs=vs, faces=faces, device=mesh.device, hold_history=True, gfmm=gfmm) return mesh def submesh(self, vs_index): return PartMesh.create_submesh(vs_index, self)
# absolute path Path("/home/luke") # current folder Path() # relative path = Path("../classes/ecommerce/__init__.py") # addition Path() / Path("ecommerce") Path() / "ecommerce" # user home path Path.home() path.exists() path.is_file() path.is_dir() print(path.name) print(path.stem) print(path.suffix) print(path.parent) path = path.with_suffix(".txt") print(path.absolute())
def init_logger(**kwargs): args_str = ' '.join([f"{k}={v}" for (k, v) in kwargs.items()]) formatter = logging.Formatter('%(asctime)s [%(levelname)-5s] %(message)s') # stream handler chdr = logging.StreamHandler() chdr.setLevel(logging.DEBUG) chdr.setFormatter(formatter) logger.addHandler(chdr) log_dir = kwargs.pop("log_dir", "./logs") rank = kwargs.pop("rank", None) # file handler if "log_file" in kwargs: log_file = kwargs.pop("log_file") log_path = Path(log_dir, log_file).resolve() if rank is not None: log_path = log_path.with_suffix(f".{rank}{log_path.suffix}") Path.mkdir(log_path.parent, parents=True, exist_ok=True) fhdr = logging.FileHandler(log_path) fhdr.setLevel(logging.DEBUG) fhdr.setFormatter(formatter) logger.addHandler(fhdr) logger.info(f"begins logging to file: {str(log_path)}") if "slack" in kwargs and kwargs["slack"]: try: env = str(Path(log_dir).name) if rank is not None: env += f"_rank{rank}" shdr = SlackClientHandler(env=env) shdr.setLevel(logging.INFO) shdr.setFormatter(formatter) logger.addHandler(shdr) except: logger.error("error to setup slackclient") raise # prepare visdom logger.visdom = None if "visdom" in kwargs and kwargs["visdom"]: env = str(Path(log_dir).name) log_path = Path(log_dir, "visdom.log").resolve() visdom_host = kwargs.pop("visdom_host", "127.0.0.1") visdom_port = kwargs.pop("visdom_port", 8097) try: logger.visdom = VisdomLogger(host=visdom_host, port=visdom_port, env=env, log_path=log_path) except: logger.error("error to use visdom") raise # prepare tensorboard logger.tensorboard = None if "tensorboard" in kwargs and kwargs["tensorboard"]: env = str(Path(log_dir, 'tensorboard').resolve) try: logger.tensorboard = TensorboardLogger(env) except: logger.error("error to use tensorboard") # print version and args logger.info(f"PyTorch version: {torch.__version__}") logger.debug(f"command-line options: {' '.join(sys.argv)}") logger.info(f"args: {args_str}")
class Profile: def __init__(self, enabled=True, profile_file=None, **kwargs): # type: (bool, Union[Path, str], **Any) -> None if profile_file is None or isinstance(profile_file, Path): self._profile_file = profile_file else: self._profile_file = Path(profile_file) self._enabled = enabled self._kwargs = kwargs self._profile = None # type: Optional[cProfile.Profile] def __enter__(self): # type: () -> Profile if self._enabled: cmk.utils.log.logger.info("Recording profile") self._profile = cProfile.Profile(**self._kwargs) self._profile.enable() return self def __exit__(self, exc_type, exc_val, exc_tb): # type: (Optional[Type[BaseException]], Optional[BaseException], Optional[TracebackType]) -> None if not self._enabled: return if not self._profile: return self._profile.disable() if not self._profile_file: self._profile.print_stats() return self._write_profile() self._write_dump_script() def _write_profile(self): # type: () -> None if not self._profile: return self._profile.dump_stats(str(self._profile_file)) cmk.utils.log.logger.info("Created profile file: %s", self._profile_file) def _write_dump_script(self): # type: () -> None if not self._profile_file: return script_path = self._profile_file.with_suffix(".py") with script_path.open("w", encoding="utf-8") as f: f.write(u"#!/usr/bin/env python\n" "import pstats\n" "stats = pstats.Stats(\"%s\")\n" "stats.sort_stats('time').print_stats()\n" % self._profile_file) script_path.chmod(0o755) cmk.utils.log.logger.info("Created profile dump script: %s", script_path)
def path(self, destination: Path): return destination.with_suffix(".zip")
def download_raw_partial(self, url_cbin, url_ch, first_chunk=0, last_chunk=0): assert url_cbin.endswith('.cbin') assert url_ch.endswith('.ch') relpath = Path(url_cbin.replace(self._par.HTTP_DATA_SERVER, '.')).parents[0] target_dir = Path(self._get_cache_dir(None), relpath) Path(target_dir).mkdir(parents=True, exist_ok=True) # First, download the .ch file. ch_local_path = Path(wc.http_download_file( url_ch, username=self._par.HTTP_DATA_SERVER_LOGIN, password=self._par.HTTP_DATA_SERVER_PWD, cache_dir=target_dir, clobber=True, offline=False, return_md5=False)) ch_local_path = alfio.remove_uuid_file(ch_local_path) ch_local_path_renamed = ch_local_path.with_suffix('.chopped.ch') ch_local_path.rename(ch_local_path_renamed) assert ch_local_path_renamed.exists() ch_local_path = ch_local_path_renamed # Load the .ch file. with open(ch_local_path, 'r') as f: cmeta = json.load(f) # Get the first byte and number of bytes to download. total_n_samples = cmeta['chunk_bounds'][-1] i0 = cmeta['chunk_bounds'][first_chunk] cmeta['chunk_bounds'] = cmeta['chunk_bounds'][first_chunk:last_chunk + 2] cmeta['chunk_bounds'] = [_ - i0 for _ in cmeta['chunk_bounds']] assert len(cmeta['chunk_bounds']) >= 2 assert cmeta['chunk_bounds'][0] == 0 first_byte = cmeta['chunk_offsets'][first_chunk] cmeta['chunk_offsets'] = cmeta['chunk_offsets'][first_chunk:last_chunk + 2] cmeta['chunk_offsets'] = [_ - first_byte for _ in cmeta['chunk_offsets']] assert len(cmeta['chunk_offsets']) >= 2 assert cmeta['chunk_offsets'][0] == 0 n_bytes = cmeta['chunk_offsets'][-1] assert n_bytes > 0 # Save the chopped chunk bounds and ossets. cmeta['sha1_compressed'] = None cmeta['sha1_uncompressed'] = None cmeta['chopped'] = True cmeta['chopped_first_sample'] = i0 cmeta['chopped_total_samples'] = total_n_samples with open(ch_local_path, 'w') as f: json.dump(cmeta, f, indent=2, sort_keys=True) # Download the requested chunks cbin_local_path = wc.http_download_file( url_cbin, username=self._par.HTTP_DATA_SERVER_LOGIN, password=self._par.HTTP_DATA_SERVER_PWD, cache_dir=target_dir, clobber=True, offline=False, return_md5=False, chunks=(first_byte, n_bytes)) cbin_local_path = alfio.remove_uuid_file(cbin_local_path) cbin_local_path_renamed = cbin_local_path.with_suffix( '.chopped.cbin').with_suffix('.chopped.cbin') cbin_local_path.rename(cbin_local_path_renamed) assert cbin_local_path_renamed.exists() cbin_local_path = cbin_local_path_renamed import mtscomp reader = mtscomp.decompress(cbin_local_path, cmeta=ch_local_path) return reader
location name. ''' import sys from pathlib import Path import json import pprint import lxml.html from contextlib import suppress file = Path(sys.argv[1]) with open(file, 'r') as f: html = '\n'.join(f.readlines()) codes = {} root = lxml.html.document_fromstring(html) for row in root.xpath('//table/tr/td/table/tr'): cells = row.xpath('./td') with suppress(IndexError): code = cells[1].text_content() name = cells[2].text_content() try: codes[code] = name # print(f'{code:>16}: {name}') except TypeError: pprint.pprint([code, name]) with open(file.with_suffix('.json'), 'w') as f: json.dump(codes, f)
class ConfigFile(ConfigParser): """A version of ConfigParser which can easily save itself. The config will track whether any values change, and only resave if modified. get_val, get_bool, and get_int are modified to return defaults instead of erroring. """ has_changed: bool filename: Optional[Path] _writer: Optional[AtomicWriter] def __init__( self, filename: Optional[str], *, in_conf_folder: bool=True, auto_load: bool=True, ) -> None: """Initialise the config file. `filename` is the name of the config file, in the `root` directory. If `auto_load` is true, this file will immediately be read and parsed. If in_conf_folder is set, The folder is relative to the 'config/' folder in the BEE2 folder. """ super().__init__() self.has_changed = False if filename is not None: if in_conf_folder: self.filename = utils.conf_location('config') / filename else: self.filename = Path(filename) self._writer = AtomicWriter(self.filename) self.has_changed = False if auto_load: self.load() else: self.filename = self._writer = None def load(self) -> None: """Load config options from disk.""" if self.filename is None: return try: with open(self.filename, 'r') as conf: self.read_file(conf) # If we fail, just continue - we just use the default values except FileNotFoundError: LOGGER.warning( 'Config "{}" not found! Using defaults...', self.filename, ) except (IOError, ParsingError): LOGGER.warning( 'Config "{}" cannot be read! Using defaults...', self.filename, exc_info=True, ) try: self.filename.replace(self.filename.with_suffix('.err.cfg')) except IOError: pass # We're not different to the file on disk.. self.has_changed = False def save(self) -> None: """Write our values out to disk.""" LOGGER.info('Saving changes in config "{}"!', self.filename) if self.filename is None or self._writer is None: raise ValueError('No filename provided!') with self._writer as conf: self.write(conf) self.has_changed = False def save_check(self) -> None: """Check to see if we have different values, and save if needed.""" if self.has_changed: self.save() def set_defaults(self, def_settings: Mapping[str, Mapping[str, Any]]) -> None: """Set the default values if the settings file has no values defined.""" for sect, values in def_settings.items(): if sect not in self: self[sect] = {} for key, default in values.items(): if key not in self[sect]: self[sect][key] = str(default) self.save_check() def get_val(self, section: str, value: str, default: str) -> str: """Get the value in the specifed section. If either does not exist, set to the default and return it. """ if section not in self: self[section] = {} if value in self[section]: return self[section][value] else: self.has_changed = True self[section][value] = default return default def __getitem__(self, section: str) -> SectionProxy: """Allows setting/getting config[section][value].""" try: return super().__getitem__(section) except KeyError: self[section] = {} return super().__getitem__(section) def getboolean(self, section: str, value: str, default: bool=False) -> bool: """Get the value in the specified section, coercing to a Boolean. If either does not exist, set to the default and return it. """ if section not in self: self[section] = {} try: return super().getboolean(section, value) except (ValueError, NoOptionError): # Invalid boolean, or not found self.has_changed = True self[section][value] = str(int(default)) return default get_bool = getboolean def getint(self, section: str, value: str, default: int=0) -> int: """Get the value in the specified section, coercing to a Integer. If either does not exist, set to the default and return it. """ if section not in self: self[section] = {} try: return super().getint(section, value) except (ValueError, NoOptionError): self.has_changed = True self[section][value] = str(int(default)) return default get_int = getint def add_section(self, section: str) -> None: self.has_changed = True super().add_section(section) def remove_section(self, section: str) -> bool: self.has_changed = True return super().remove_section(section) def set(self, section: str, option: str, value: str) -> None: orig_val = self.get(section, option, fallback=None) value = str(value) if orig_val is None or orig_val != value: self.has_changed = True super().set(section, option, value) add_section.__doc__ = ConfigParser.add_section.__doc__ remove_section.__doc__ = ConfigParser.remove_section.__doc__ set.__doc__ = ConfigParser.set.__doc__
__version__ as sp_version, natural_time, ) import asyncio def coro(f: Any) -> Any: @wraps(f) def wrapper(*args: Any, **kwargs: Any) -> Any: return asyncio.run(f(*args, **kwargs)) return wrapper token_file = Path("~/.surepy.token").expanduser() old_token_file = token_file.with_suffix(".old_token") console = Console(width=120) CONTEXT_SETTINGS: dict[str, Any] = dict(help_option_names=["--help"]) version_message = ( f" [#ffffff]{sp_name}[/] 🐾 [#666666]v[#aaaaaa]{sp_version.replace('.', '[#ff1d5e].[/]')}" ) def print_header() -> None: """print header to terminal""" print() console.print(version_message, justify="left") print()
def import_path( p: Union[str, "os.PathLike[str]"], *, mode: Union[str, ImportMode] = ImportMode.prepend, root: Path, ) -> ModuleType: """Import and return a module from the given path, which can be a file (a module) or a directory (a package). The import mechanism used is controlled by the `mode` parameter: * `mode == ImportMode.prepend`: the directory containing the module (or package, taking `__init__.py` files into account) will be put at the *start* of `sys.path` before being imported with `__import__. * `mode == ImportMode.append`: same as `prepend`, but the directory will be appended to the end of `sys.path`, if not already in `sys.path`. * `mode == ImportMode.importlib`: uses more fine control mechanisms provided by `importlib` to import the module, which avoids having to use `__import__` and muck with `sys.path` at all. It effectively allows having same-named test modules in different places. :param root: Used as an anchor when mode == ImportMode.importlib to obtain a unique name for the module being imported so it can safely be stored into ``sys.modules``. :raises ImportPathMismatchError: If after importing the given `path` and the module `__file__` are different. Only raised in `prepend` and `append` modes. """ mode = ImportMode(mode) path = Path(p) if not path.exists(): raise ImportError(path) if mode is ImportMode.importlib: module_name = module_name_from_path(path, root) for meta_importer in sys.meta_path: spec = meta_importer.find_spec(module_name, [str(path.parent)]) if spec is not None: break else: spec = importlib.util.spec_from_file_location( module_name, str(path)) if spec is None: raise ImportError( f"Can't find module {module_name} at location {path}") mod = importlib.util.module_from_spec(spec) sys.modules[module_name] = mod spec.loader.exec_module(mod) # type: ignore[union-attr] insert_missing_modules(sys.modules, module_name) return mod pkg_path = resolve_package_path(path) if pkg_path is not None: pkg_root = pkg_path.parent names = list(path.with_suffix("").relative_to(pkg_root).parts) if names[-1] == "__init__": names.pop() module_name = ".".join(names) else: pkg_root = path.parent module_name = path.stem # Change sys.path permanently: restoring it at the end of this function would cause surprising # problems because of delayed imports: for example, a conftest.py file imported by this function # might have local imports, which would fail at runtime if we restored sys.path. if mode is ImportMode.append: if str(pkg_root) not in sys.path: sys.path.append(str(pkg_root)) elif mode is ImportMode.prepend: if str(pkg_root) != sys.path[0]: sys.path.insert(0, str(pkg_root)) else: assert_never(mode) importlib.import_module(module_name) mod = sys.modules[module_name] if path.name == "__init__.py": return mod ignore = os.environ.get("PY_IGNORE_IMPORTMISMATCH", "") if ignore != "1": module_file = mod.__file__ if module_file is None: raise ImportPathMismatchError(module_name, module_file, path) if module_file.endswith((".pyc", ".pyo")): module_file = module_file[:-1] if module_file.endswith(os.path.sep + "__init__.py"): module_file = module_file[:-(len(os.path.sep + "__init__.py"))] try: is_same = _is_same(str(path), module_file) except FileNotFoundError: is_same = False if not is_same: raise ImportPathMismatchError(module_name, module_file, path) return mod
def download_file(srcfile, ChunkSize_default): logger.info(f'Start file: {srcfile["Key"]}') dir_and_key = Path(DesDir) / srcfile["Key"] if Path.exists(dir_and_key): if dir_and_key.stat().st_size == srcfile["Size"] or dir_and_key.is_dir( ): logger.info( f'Duplicated: {dir_and_key.as_uri()} same size, goto next file.' ) return # 创建文件目录结构 path = dir_and_key.parent if not Path.exists(path): create_dir(path) # 如果是子目录就跳过下载 if srcfile["Key"][-1] == '/': Path.mkdir(dir_and_key) logger.info(f'Create empty subfolder: {dir_and_key.as_uri()}') return # 获取已下载的 part number list partnumberList = [] try: with sqlite3.connect('s3_download.db') as db: cursor = db.cursor() p_sql = cursor.execute( f"SELECT PARTNUMBER FROM S3P WHERE BUCKET='{SrcBucket}' AND KEY='{dir_and_key.as_uri()}'" ) db.commit() partnumberList = [d[0] for d in p_sql] logger.info( f'Got partnumberList {dir_and_key.as_uri()} - {json.dumps(partnumberList)}' ) except Exception as e: logger.error(f'Fail to select partnumber from DB. {str(e)}') # 获取索引列表,例如[0, 10, 20] indexList, ChunkSize_auto = split(srcfile, ChunkSize_default) # 执行download s3tmp_name = dir_and_key.with_suffix('.s3tmp') if Path.exists(s3tmp_name): mode = 'r+b' else: # 如果没有临时文件,或被删除了,则新建文件并将partnumberList清空 mode = 'wb' partnumberList = [] with open(s3tmp_name, mode) as wfile: download_part(indexList, partnumberList, srcfile, ChunkSize_auto, wfile) # 修改文件名.s3part,清理partnumber数据库 s3tmp_name.rename(dir_and_key) try: with sqlite3.connect('s3_download.db') as db: cursor = db.cursor() cursor.execute( f"DELETE FROM S3P WHERE BUCKET='{SrcBucket}' AND KEY='{dir_and_key.as_uri()}'" ) db.commit() except Exception as e: logger.warning(f'Fail to clean DB: {dir_and_key.as_uri()}. {str(e)}') logger.info(f'Finsh: {srcfile["Key"]} TO {dir_and_key.as_uri()}') return
class Mesh: def __init__(self, file, hold_history=False, vs=None, faces=None, device='cpu', gfmm=True): if file is None: return self.filename = Path(file) self.vs = self.v_mask = self.edge_areas = None self.edges = self.gemm_edges = self.sides = None self.device = device if vs is not None and faces is not None: self.vs, self.faces = vs.cpu().numpy(), faces.cpu().numpy() self.scale, self.translations = 1.0, np.zeros(3,) else: self.vs, self.faces = load_obj(file) self.normalize_unit_bb() self.vs_in = copy.deepcopy(self.vs) self.v_mask = np.ones(len(self.vs), dtype=bool) self.build_gemm() self.history_data = None if hold_history: self.init_history() if gfmm: self.gfmm = self.build_gfmm() #TODO get rid of this DS else: self.gfmm = None if type(self.vs) is np.ndarray: self.vs = torch.from_numpy(self.vs) if type(self.faces) is np.ndarray: self.faces = torch.from_numpy(self.faces) self.vs = self.vs.to(self.device) self.faces = self.faces.to(self.device) def build_gemm(self): self.ve = [[] for _ in self.vs] self.vei = [[] for _ in self.vs] edge_nb = [] sides = [] edge2key = dict() edges = [] edges_count = 0 nb_count = [] for face_id, face in enumerate(self.faces): faces_edges = [] for i in range(3): cur_edge = (face[i], face[(i + 1) % 3]) faces_edges.append(cur_edge) for idx, edge in enumerate(faces_edges): edge = tuple(sorted(list(edge))) faces_edges[idx] = edge if edge not in edge2key: edge2key[edge] = edges_count edges.append(list(edge)) edge_nb.append([-1, -1, -1, -1]) sides.append([-1, -1, -1, -1]) self.ve[edge[0]].append(edges_count) self.ve[edge[1]].append(edges_count) self.vei[edge[0]].append(0) self.vei[edge[1]].append(1) nb_count.append(0) edges_count += 1 for idx, edge in enumerate(faces_edges): edge_key = edge2key[edge] edge_nb[edge_key][nb_count[edge_key]] = edge2key[faces_edges[(idx + 1) % 3]] edge_nb[edge_key][nb_count[edge_key] + 1] = edge2key[faces_edges[(idx + 2) % 3]] nb_count[edge_key] += 2 for idx, edge in enumerate(faces_edges): edge_key = edge2key[edge] sides[edge_key][nb_count[edge_key] - 2] = nb_count[edge2key[faces_edges[(idx + 1) % 3]]] - 1 sides[edge_key][nb_count[edge_key] - 1] = nb_count[edge2key[faces_edges[(idx + 2) % 3]]] - 2 self.edges = np.array(edges, dtype=np.int32) self.gemm_edges = np.array(edge_nb, dtype=np.int64) self.sides = np.array(sides, dtype=np.int64) self.edges_count = edges_count # lots of DS for loss self.nvs, self.nvsi, self.nvsin = [], [], [] for i, e in enumerate(self.ve): self.nvs.append(len(e)) self.nvsi.append(len(e) * [i]) self.nvsin.append(list(range(len(e)))) self.vei = torch.from_numpy(np.concatenate(np.array(self.vei)).ravel()).to(self.device).long() self.nvsi = torch.Tensor(np.concatenate(np.array(self.nvsi)).ravel()).to(self.device).long() self.nvsin = torch.from_numpy(np.concatenate(np.array(self.nvsin)).ravel()).to(self.device).long() ve_in = copy.deepcopy(self.ve) self.ve_in = torch.from_numpy(np.concatenate(np.array(ve_in)).ravel()).to(self.device).long() self.max_nvs = max(self.nvs) self.nvs = torch.Tensor(self.nvs).to(self.device).float() self.edge2key = edge2key def build_ef(self): edge_faces = dict() if type(self.faces) == torch.Tensor: faces = self.faces.cpu().numpy() else: faces = self.faces for face_id, face in enumerate(faces): for i in range(3): edge = tuple(sorted([face[i], face[(i + 1) % 3]])) if edge not in edge_faces: edge_faces[edge] = [] edge_faces[edge].append(face_id) for k in edge_faces.keys(): if len(edge_faces[k]) < 2: edge_faces[k].append(edge_faces[k][0]) return edge_faces def build_gfmm(self): edge_faces = self.build_ef() gfmm = [] if type(self.faces) == torch.Tensor: faces = self.faces.cpu().numpy() else: faces = self.faces for face_id, face in enumerate(faces): neighbors = [face_id] for i in range(3): edge = tuple(sorted([face[i], face[(i + 1) % 3]])) neighbors.extend(list(set(edge_faces[edge]) - set([face_id]))) gfmm.append(neighbors) return torch.Tensor(gfmm).long().to(self.device) def normalize_unit_bb(self): """ normalizes to unit bounding box and translates to center if no :param verts: new verts """ cache_norm_file = self.filename.with_suffix('.npz') if not cache_norm_file.exists(): scale = max([self.vs[:, i].max() - self.vs[:, i].min() for i in range(3)]) scaled_vs = self.vs / scale target_mins = [(scaled_vs[:, i].max() - scaled_vs[:, i].min()) / -2.0 for i in range(3)] translations = [(target_mins[i] - scaled_vs[:, i].min()) for i in range(3)] np.savez_compressed(cache_norm_file, scale=scale, translations=translations) # load from the cache cached_data = np.load(cache_norm_file, encoding='latin1', allow_pickle=True) self.scale, self.translations = cached_data['scale'], cached_data['translations'] self.vs /= self.scale self.vs += self.translations[None, :] def update_verts(self, verts): """ update verts positions only, same connectivity :param verts: new verts """ self.vs = verts def deep_copy(self): #TODO see if can do this better new_mesh = Mesh(file=None) types = [np.ndarray, torch.Tensor, dict, list, str, int, bool, float] for attr in self.__dir__(): if attr == '__dict__': continue val = getattr(self, attr) if type(val) == types[0]: new_mesh.__setattr__(attr, val.copy()) elif type(val) == types[1]: new_mesh.__setattr__(attr, val.clone()) elif type(val) in types[2:4]: new_mesh.__setattr__(attr, pickle.loads(pickle.dumps(val, -1))) elif type(val) in types[4:]: new_mesh.__setattr__(attr, val) return new_mesh def merge_vertices(self, edge_id): self.remove_edge(edge_id) edge = self.edges[edge_id] v_a = self.vs[edge[0]] v_b = self.vs[edge[1]] # update pA v_a.__iadd__(v_b) v_a.__itruediv__(2) self.v_mask[edge[1]] = False mask = self.edges == edge[1] self.ve[edge[0]].extend(self.ve[edge[1]]) self.edges[mask] = edge[0] def remove_vertex(self, v): self.v_mask[v] = False def remove_edge(self, edge_id): vs = self.edges[edge_id] for v in vs: if edge_id not in self.ve[v]: print(self.ve[v]) print(self.filename) self.ve[v].remove(edge_id) def clean(self, edges_mask, groups): edges_mask = edges_mask.astype(bool) torch_mask = torch.from_numpy(edges_mask.copy()) self.gemm_edges = self.gemm_edges[edges_mask] self.edges = self.edges[edges_mask] self.sides = self.sides[edges_mask] new_ve = [] edges_mask = np.concatenate([edges_mask, [False]]) new_indices = np.zeros(edges_mask.shape[0], dtype=np.int32) new_indices[-1] = -1 new_indices[edges_mask] = np.arange(0, np.ma.where(edges_mask)[0].shape[0]) self.gemm_edges[:, :] = new_indices[self.gemm_edges[:, :]] for v_index, ve in enumerate(self.ve): update_ve = [] # if self.v_mask[v_index]: for e in ve: update_ve.append(new_indices[e]) new_ve.append(update_ve) self.ve = new_ve self.__clean_history(groups, torch_mask) def export(self, file): vs = self.vs.cpu().clone() vs -= self.translations[None, :] vs *= self.scale export(file, vs, self.faces) def init_history(self): self.history_data = { 'groups': [], 'gemm_edges': [self.gemm_edges.copy()], 'occurrences': [], 'edges_count': [self.edges_count], } def get_groups(self): return self.history_data['groups'].pop() def get_occurrences(self): return self.history_data['occurrences'].pop() def __clean_history(self, groups, pool_mask): if self.history_data is not None: self.history_data['occurrences'].append(groups.get_occurrences()) self.history_data['groups'].append(groups.get_groups(pool_mask)) self.history_data['gemm_edges'].append(self.gemm_edges.copy()) self.history_data['edges_count'].append(self.edges_count) def unroll_gemm(self): self.history_data['gemm_edges'].pop() self.gemm_edges = self.history_data['gemm_edges'][-1] self.history_data['edges_count'].pop() self.edges_count = self.history_data['edges_count'][-1] @staticmethod def from_tensor(mesh, vs, faces, gfmm=True): mesh = Mesh(file=mesh.filename, vs=vs, faces=faces, device=mesh.device, hold_history=True, gfmm=gfmm) return mesh def submesh(self, vs_index): return PartMesh.create_submesh(vs_index, self)
class URL(urlpath.URL,urllib.parse._NetlocResultMixinStr, PurePath): ''' Derived from https://raw.githubusercontent.com/chrono-meter/urlpath/master/urlpath.py to provide more compatibility with pathlib.Path functionality ''' ''' modified new and init ''' def __new__(cls,*args,**kwargs): self = super(URL, cls).__new__(cls,*args) self.init(**kwargs) return self def __init__(self,*args,**kwargs): # remove any trailing '/' from args args = list(args) for i,arg in enumerate(args): arg = str(arg) while arg[-1] == '/': if len(arg) == 1: break arg = arg[:-1] args[i] = arg args = tuple(args) if not kwargs: kwargs = {} self.fourOhOne = False def init(self,**kwargs): self.__dict__.update(ginit(self,**kwargs)) if 'database' in self.__dict__ and type(self.database) == Database: # already have databse stored pass else: self.database = Database(self.db_file,\ **(fdict(self.__dict__.copy()))) def __del__(self): try: del self.database self.msg(f'clone: {url.is_clone}') except: pass def __exit__(self, exc_type, exc_value, traceback): '''cleanup''' try: del self.database except: pass tempfile.clean() def dedate(self): if '_cache_original' in self.__dict__: self.__dict__ = self._cache_original.copy() if '_cache_original' in self.__dict__: del self.__dict__['_cache_original'] def update(self,*args,**kwargs): '''update args in object''' if '_cache_original' not in self.__dict__: self._cache_original = self.__dict__.copy() # whetehr we specify full URL in update or not if ('full_url' in kwargs) and (kwargs['full_url'] == True): args = list(args) else: args = [str(self)] + list(args) url = super(URL, self).__new__(self,*args) url.is_clone = True url.__dict__ = fdict(self._cache_original.copy()) return url def check_path(self,ppp): ''' You can corrupt the database by having files where we expect directories so we need to clean these up ''' parts = list(ppp.parts) for i,part in enumerate(parts): this = Path(*(parts[:i+1])) if this.exists() and (not this.is_dir()): # warning path in expected directory self.msg('found non-directory term in path {str(this)}') try: self.msg('trying to correct') this.unlink() return True except: self.msg('failed to correct') return False return True def indb(self): # might be in database store_url = str(self) store_flag = 'data' ifile = self.get_name(self.database.get_from_db(store_flag,store_url)) if ifile: old = self.local_file self.local_file = Path(ifile) if self.local_file.exists() and self.local_file.suffix == '.store': return True if self.local_file.suffix != '.store': self.local_file = old return False return True return False def call_local(self): ''' sort out and return local_file This comes from the URL and local_dir and ends .store ''' if self.indb(): if callable(self.local): sys.msg(f"**unexpected method for self.local {self.local}") else: return self.local kwargs = fdict(self.__dict__.copy()) if 'local_dir' in kwargs and \ (kwargs['local_dir'] is not None) and \ len(kwargs['local_dir']) > 0: self.local_dir = list_resolve(kwargs['local_dir']) if (self.local_dir is None) or (len(self.local_dir) == 0): self.local_dir = list_resolve(self.db_dir) self.local_file = Path(self.local_dir[0],self.as_posix().split("://")[1]) #self.local_file = Path(self.local_dir[-1],str(self.with_scheme(''))[2:]).absolute() # replace ' ' self.local_file = Path(str(self.local_file).replace(' ','_')) suffix = self.local_file.suffix self.local_file = self.local_file.with_suffix(suffix + '.store') self.check_path(self.local_file.parent) self.local_file.parent.mkdir(parents=True,exist_ok=True) return self.local_file def get_read_file(self,filelist): filelist = name_resolve(filelist) readlist,writelist = list_info(filelist) filelist = np.array(filelist,dtype=np.object)[readlist] return (filelist.size and filelist[-1]) or None def get_write_file(self,filelist): filelist = name_resolve(filelist) readlist,writelist = list_info(filelist) filelist = np.array(filelist,dtype=np.object)[writelist] return (filelist.size and filelist[-1]) or None def get_readwrite_file(self,filelist): filelist = name_resolve(filelist) readlist,writelist = list_info(filelist) filelist = np.array(filelist,dtype=np.object)[np.logical_and(np.array(writelist),np.array(readlist))] return (filelist.size and filelist[-1]) or None def _local_file(self,mode="r"): '''get local file name''' if self.indb(): return self.local_file self.call_local() # clobber if not self.noclobber: local_file = self.get_write_file(self.local_file) # file name for writing elif mode == "r": local_file = self.get_read_file(self.local_file) if local_file and not local_file.exists(): self.msg("read file {local_file} doesnt exist") self.local_file = self.local_file[self.local_file != local_file] return self._local_file(mode="r") else: # file name for writing local_file = self.get_write_file(self.local_file) if local_file == None: return local_file # local_file is real if local_file.exists(): if local_file.is_dir(): try: local_file.rmdir() return None except: pass # delete the file if noclobber is False if not self.noclobber: try: self.msg(f"deleting existing file {local_file}") local_file.unlink() except: pass else: self.msg(f"keeping existing file {local_file}") return local_file def open(self,mode='r',buffering=-1, encoding=None, errors=None, newline=None): ''' Open the file pointed by this URL and return a file object, as the built-in open() function does. ''' kwargs = {'mode':mode,'buffering':buffering,'encoding':encoding,\ 'errors':errors,'newline':newline} if self._isfile(): self.msg(f'{self} is not a URL: interpreting as Path') return Path(self).open(**kwargs) # check in database store_url = str(self) store_flag = 'data' binary = ('b' in mode) and ('t' not in mode) get_download,ifile,ofile = self._test_already_local() # get from ofile if ofile and Path(ofile).exists(): ofile = Path(ofile) if binary: data = io.BytesIO(ofile.read_bytes()) else: data = io.StringIO(ofile.read_text()) cache = {store_flag : { str(store_url) : str(ofile) }} self.database.set_db(cache) return data # get from ifile if ifile and Path(ifile).exists(): ifile = Path(ifile) if binary: data = io.BytesIO(ifile.read_bytes()) else: data = io.StringIO(ifile.read_text()) self.check_path(ifile.parent) ifile.parent.mkdir(parents=True,exist_ok=True) if ofile: ofile = Path(ofile) if binary: ofile.write_bytes(data) else: ofile.write_text(data) cache = {store_flag : { str(store_url) : str(ifile) }} self.database.set_db(cache) return data if 'r' in mode: self.msg(f"reading data from {self}") # read if binary: self.msg("open() binary stream") idata = self.read_bytes() data = io.BytesIO(idata) else: self.msg("open() text stream") idata = self.read_text() data = io.StringIO(idata) if ofile: try: ofile = Path(ofile) if binary: ofile.write_bytes(idata) else: ofile.write_text(idata) cache = {store_flag : { str(store_url) : str(ifile) }} self.database.set_db(cache) except: pass return data if ofile: return Path(ofile).open(**kwargs) def write_text(self,data, encoding=None, errors=None): '''Open the file in text mode, write to it, and close the file.''' kwargs = {'encoding':encoding} if self._isfile(): self.msg(f'{self} is not a URL: interpreting as Path') return Path(self).write_text(data) get_download,ifile,ofile = self._test_already_local() if ofile and Path(ofile).exists(): self.msg("file exists so not writing") return Path(ofile).stat().st_size if ofile: self.msg(f'opening output file {ofile}') return Path(ofile).write_text(data,**kwargs) def write_bytes(self,data): '''Open the file in bytes mode, write to it, and close the file.''' if self._isfile(): self.msg(f'{self} is not a URL: interpreting as Path') return Path(self).write_bytes(data) get_download,ifile,ofile = self._test_already_local() if ofile and Path(ofile).exists(): self.msg("file exists so not writing") return Path(ofile).stat().st_size if ofile: self.msg(f'opening output file {ofile}') return Path(ofile).write_bytes(data) def _get_login(self,head=True): u = self with requests.Session() as session: if u.username and u.password: session.auth = u.username,u.password else: uinfo = Cylog(u.anchor).login() if uinfo == (None,None): return None session.auth = uinfo[0].decode('utf-8'),uinfo[1].decode('utf-8') u.msg(f'logging in to {u.anchor}') try: r1 = session.request('get',u) if r1.status_code == 200: u.msg(f'data read from {u.anchor}') return r1 # try encoded login if head: r2 = session.head(r1.url) else: r2 = session.get(r1.url) if r2.status_code == 200: u.msg(f'data read from {u.anchor}') if type(r2) == requests.models.Response: return r2 except: u.msg(f'failure reading data from {u.anchor}') return None u.msg(f'failure reading data from {u.anchor}') return None def msg(self,*args): '''msg to self.stderr''' this = str(*args) try: # DONT REPEAT MESSAGES ... doesnt work as yet if this in self.store_msg: return self.store_msg.extend(this) except: self.store_msg = [this] try: if self.verbose or (self.log is not None): print('-->',*args,file=self.stderr) except: pass def get_name(self,ofile): if ofile == [] or ofile == {}: ofile = None if type(ofile) == list: ofile = ofile[0] if type(ofile) == dict: ofile = list(ofile.values())[0] return ofile def _test_already_local(self): # get local_filename we would use for output # delete it if not noclobber # dont greate dir if it doesnt exist # return False if already downloaded # check in database store_url = str(self) store_flag = 'data' ifile = self.get_name(self.database.get_from_db(store_flag,store_url)) if ifile is not None: ifile = Path(ifile) if not ifile.exists(): # otherwise incorrect db entry self.database.rm_from_db(store_flag,store_url) if not self.noclobber and ifile.exists(): # clobber self.msg(f'deleting local file {ifile}') ifile.unlink() ifile = None ofile = self.get_name(self._local_file("w")) if callable(ofile): print(f"ERROR in type of self.lcoal {ofile}: should be str or list") sys.exit(1) if ifile is None: return True,ifile,ofile if not ifile.exists(): return True,None,ofile # simple if no size check if (not self.size_check) and ifile.exists(): self.msg(f'local file {ifile} exists') #: no size check') # cache this in case we want to re-use it cache = {store_flag : { str(store_url) : str(ifile) }} self.database.set_db(cache) return False,ifile,ofile if self.size_check: lsize = ifile.stat().st_size rsize = self.stat().st_size if rsize < 0: # then its not available self.msg(f'not downloading file') # we might not want to download # cache this in case we want to re-use it cache = {store_flag : { str(store_url) : ifile }} self.database.set_db(cache) return False,ifile,ofile elif lsize == rsize: self.msg(f'local and remote file sizes equal {lsize}') self.msg(f'not downloading file') # we might not want to download # cache this in case we want to re-use it cache = {store_flag : { str(store_url) : ifile }} self.database.set_db(cache) return False,ifile,ofile self.msg(f'local and remote file sizes not equal {lsize}/{rsize} respectively') self.msg(f'so we need to download (or set size_check=False)') if not self.noclobber: if ifile and ifile.exists(): self.msg(f'deleting local ifile {local_file}') ifile.unlink() ifile = None if ofile and ofile.exists(): self.msg(f'deleting local ofile {local_file}') ofile.unlink() ofile = None return True,ifile,ofile def read_text(self, encoding=None, errors=None): '''Open the URL, read in text mode and return text.''' kwargs = {'encoding':encoding} u = self store_url = str(u) store_flag = 'data' if u._isfile(): self.msg(f'{u} is not a URL: interpreting as Path') return Path(u).read_text() get_download,ifile,ofile = self._test_already_local() text = None # get it from ofile if ofile and Path(ofile).exists(): text = Path(ofile).read_text(**kwargs) cache = {store_flag : { str(store_url) : str(ofile) }} self.database.set_db(cache) return text # get it from ifile if ifile and Path(ifile).exists(): self.msg(f'opening already downloaded file {ifile}') text = Path(ifile).read_text(**kwargs) if ofile: ofile = Path(ofile) ofile.write_text(text) cache = {store_flag : { str(store_url) : str(ofile) }} else: cache = {store_flag : { str(store_url) : str(ifile) }} self.database.set_db(cache) return text if text is not None: return text try: u.msg(f'trying {self}') text = u.get_text() if text and ofile: try: ofile = Path(ofile) self.check_path(ofile.parent) ofile.parent.mkdir(parents=True,exist_ok=True) ofile.write_text(text) cache = {store_flag : { str(store_url) : str(ofile) }} self.database.set_db(cache) return text except: pass if text: return text except: pass u.msg(f'getting login') r = u._get_login(head=False) if type(r) != requests.models.Response: return None if r.status_code == 200: u.msg(f'code {r.status_code}') text = r.text if ofile: ofile = Path(ofile) self.check_path(ofile.parent) ofile.parent.mkdir(parents=True,exist_ok=True) ofile.write_text(text) cache = {store_flag : { str(store_url) : str(ofile) }} self.database.set_db(cache) return text if type(r) == requests.models.Response: u.msg(f'code {r.status_code}') return r u.msg(f'failed to connect') return None def local(self,get_file=False): ''' local filename''' u = self get_download,ifile,ofile = u._test_already_local() for f in [ifile,ofile]: if f and get_file: if Path(f).exists(): return Path(f) else: # pull file self.read_bytes() return self.local(get_file=get_file) elif f: return Path(f) return None def exists(self): '''Whether this URL exists and can be accessed''' u = self store_url = str(u) store_flag = 'exists' ex = self.database.get_from_db(store_flag,store_url) if ex is not None: return ex ex = False get_download,ifile,ofile = u._test_already_local() if ofile and Path(ofile).exists(): ex = True cache = {store_flag : { str(store_url) : True }} if not ex: ex = self.ping() if ex: cache = {store_flag : { str(store_url) : True }} self.database.set_db(cache) return ex def stat(self, head=False): ''' Some of the functionality of stat for URLs Currently, only stat_result.st_size is used. ''' input = [0,0,0,0,0,0,self._st_size(head=head),0,0,0] stat_result = os.stat_result(input) return stat_result def _isfile(self): if self.scheme == '' or self.scheme == 'file': self.msg('we are a file ...') return True #self.msg('we are not a file ...') return False def _st_size(self, head=False): ''' retrieve the remote file size You should specify any required login/password with with_components(username=str,password=str) Returns: int if data available Or: -1 ''' u = self # check in database store_url = u store_flag = 'st_size' remote_size = self.database.get_from_db(store_flag,store_url) if remote_size is not None: return remote_size remote_size = -1 if u._isfile(): self.msg(f'{u} is not a URL: interpreting as Path') # not a URL u = Path(u) return u.stat().st_size try: u.msg(f'trying {u}') if head: r = u.head() else: r = u.get() if type(r) == requests.models.Response: if r.status_code == 200: u.msg(f'code 200') hdr = r.headers if "Content-Length" in hdr.keys(): remote_size = int(hdr["Content-Length"]) elif 'Transfer-Encoding' in hdr.keys() and hdr["Transfer-Encoding"] == 'chunked': u.msg(f'file is compressed, remote size not directly available') #self.msg(hdr) if remote_size > 0: # cache this in case we want to re-use it cache = {store_flag : { str(store_url) : remote_size }} self.database.set_db(cache) return(remote_size) # if r.status_code == 401: u.msg(f'code 401') self.fourOhOne = True if self.fourOhOne: # unauthorised # more complex session login and auth # e.g. needed for NASA Earthdata login u.msg(f'getting login') r = u._get_login(head=head) if r.status_code == 200: u.msg(f'code 200') hdr = r.headers if "Content-Length" in hdr: remote_size = int(hdr["Content-Length"]) if remote_size > 0: # cache this in case we want to re-use it cache = {store_flag : { str(store_url) : remote_size }} self.database.set_db(cache) return(remote_size) elif head == False: u.msg(f'code {r.status_code}') return remote_size # return it even if 0 return remote_size except: pass if head == False: u.msg(f'failed to connect') # give up remote_size = -2 # cache this in case we want to re-use it even if its -1 cache = {store_flag : { str(store_url) : remote_size }} self.database.set_db(cache) return remote_size u.msg(f'trying get') return u.st_size(head=False) def ping(self, head=True): ''' ping the URL data return True if response is 200 You should specify any required login/password with with_components(username=str,password=str) Returns: True if data available Or: False ''' u = self if u._isfile(): self.msg(f'{u} is not a URL: interpreting as Path') # not a URL u = Path(u) return u.exists() try: u.msg(f'trying {u}') if head: r = u.head() else: r = u.get() if type(r) == requests.models.Response: if r.status_code == 200: u.msg(f'code 200') return True if r.status_code == 401: u.msg(f'code 401') u.msg(f'trying another') # unauthorised # more complex session login and auth # e.g. needed for NASA Earthdata login u.msg(f'getting login') r = u._get_login(head=head) if r.status_code == 200: u.msg(f'code 200') return True elif head == False: u.msg(f'code {r.status_code}') return False except: pass if head == False: u.msg(f'failed to connect') return False u.msg(f'trying get') return u.ping(head=False) def read_bytes(self): ''' Open the URL data in bytes mode, read it and return the data This first tried self.get() but if the authorisation is more complex (e.g. when using NASA server) then a fuller 2-pass session is used. You should specify any required login/password with with_components(username=str,password=str) Returns: data from url Or: None : on failure requests.models.Response : on connection problem ''' if 'skipper' in self.__dict__: skipper = self.skipper else: skipper = False u = self store_url = str(u) store_flag = 'data' if u._isfile(): self.msg(f'{u} is not a URL: interpreting as Path') return Path(u).read_bytes() get_download,ifile,ofile = self._test_already_local() # get from ofile if ofile and Path(ofile).exists(): data = ofile.read_bytes() ofile = Path(ofile) cache = {store_flag : { str(store_url) : str(ofile) }} self.database.set_db(cache,write=True) return data # get from ifile if ifile and Path(ifile).exists(): ifile = Path(ifile) self.msg(f'opening already downloaded file {ifile}') data = ifile.read_bytes() if ofile: ofile = Path(ofile) self.check_path(ofile.parent) ofile.parent.mkdir(parents=True,exist_ok=True) ofile.write_bytes(data) cache = {store_flag : { str(store_url) : str(ofile) }} else: cache = {store_flag : { str(store_url) : str(ifile) }} self.database.set_db(cache,write=True) return data try: if not skipper: u.msg(f'trying {u}') r = u.get() if skipper or (type(r) == requests.models.Response): if (not skipper) and r.status_code == 200: u.msg(f'code {r.status_code}') data = r.content if ofile: ofile = Path(ofile) self.check_path(ofile.parent) ofile.parent.mkdir(parents=True,exist_ok=True) ofile.write_bytes(data) cache = {store_flag : { str(store_url) : str(ofile) }} self.database.set_db(cache,write=True) return data if skipper or (r.status_code == 401): if not skipper: u.msg(f'code {r.status_code}') u.msg(f'trying another') # unauthorised # more complex session login and auth # e.g. needed for NASA Earthdata login u.msg(f'getting login') r = u._get_login(head=False) if type(r) != requests.models.Response: return None if r.status_code == 200: u.msg(f'code {r.status_code}') data = r.content if ofile: ofile = Path(ofile) self.check_path(ofile.parent) ofile.parent.mkdir(parents=True,exist_ok=True) ofile.write_bytes(data) cache = {store_flag : { str(store_url) : str(ofile) }} self.database.set_db(cache,write=True) return data else: u.msg(f'code {r.status_code}') return r except: pass u.msg(f'failed to connect') return None def _convert_to_abs(self,ilist): # this is slow and may be not needed self.msg(f'parsing URLs from html file {len(ilist)} items') return [self.update(*[str(self),l.rstrip('/#')],**(fdict(self.__dict__.copy()))) for l in ilist ] def _filter(self,links,pattern,pre_filter=True): # pre-filter if pre_filter: links = np.array([str(l).rstrip('/#') for l in links]) matches = np.array([fnmatch.fnmatch(str(l), '*'+pattern) for l in links]) links = list(links[matches]) links = self._convert_to_abs(links) olist = [] try: p = self.done[pattern] except: try: self.done[pattern] = [] except: self.done = {pattern:[]} p = self.done[pattern] olist = [u for u in links if u not in p] self.done[pattern] = self.done[pattern] + olist return olist def has_wildness(self,uc): is_wild = np.logical_or(np.array(['*' in i for i in uc]), np.array(['?' in i for i in uc])) is_wild_2 = np.logical_or(np.array(['[' in i for i in uc]), np.array([']' in i for i in uc])) is_wild = np.logical_or(is_wild,is_wild_2) return is_wild def glob(self,pattern,pre_filter=True): ''' Iterate over this subtree and yield all existing files (of any kind, including directories) matching the given relative pattern. The URL here then needs to return lxml html code. Positional arguments: patterm : to search for e.g. */2021.*.01 only wildcards * and ? considered at present ''' u = self url = str(u) if url[-1] == '/': url = urls[:-1] url = self.update(url,pattern) # check in database store_url = url store_flag = 'query' olist = self.database.get_from_db(store_flag,store_url) if olist is not None: if type(olist) is list: return [self.update(o) for o in olist] return [self.update(olist)] # start at the top uc = np.array(url.parts) for i,w in enumerate(uc[1:]): if i == 0: base_list = [self.update(uc[0])] new_list = [] for b in base_list: # set to new item glob = self.update(b)._glob(w,pre_filter=pre_filter) # glob with the next item new_list = new_list + glob base_list = np.unique(np.array(new_list,dtype=np.object).flatten()) base_list = np.unique(np.array(base_list,dtype=np.object)) olist = list(np.array([self.update(i) for i in base_list]).flatten()) self.dedate() for l in olist: l.init(**(fdict(self.__dict__.copy()))) # cache this in case we want to re-use it cache = {store_flag : { str(store_url) : [str(i) for i in olist] }} self.database.set_db(cache) if type(olist) is list: return [self.update(o) for o in olist] return [self.update(olist)] def rglob(self, pattern,pre_filter=True): ''' Recursively yield all existing files (of any kind, including directories) matching the given relative pattern, anywhere in this subtree. Positional arguments: patterm : to search for e.g. 2021.*.01 only wildcards * and ? considered at present ''' return self.glob(pattern,pre_filter=pre_filter) def flush(self): try: return self.database.set_db(self.database.database,write=True) except: return None def _glob(self, pattern,pre_filter=True): ''' Iterate over this subtree and yield all existing files (of any kind, including directories) matching the given relative pattern. The URL here then needs to return lxml html code. ''' # take off training slash if pattern[-1] == '/': pattern = pattern[:-1] store_url = str(self.update(pattern)) store_flag = 'query' if not self.noclobber: # dont trust cache response = None else: response = self.database.get_from_db(store_flag,store_url) if response: self.msg(f'got response from database for {store_url}') self.msg(f'discovered {len(response)} files with pattern {pattern} in {str(self)}') return [self.update(str(f)) for f in response] try: html = self.read_text() links = np.array([mylink.attrs['href'] for mylink in BeautifulSoup(html,'lxml').find_all('a')]) links = np.array(self._filter(links,pattern,pre_filter=pre_filter)) matches = np.array([fnmatch.fnmatch(str(l), '*'+pattern) for l in links]) files = list(links[matches]) except: files = [] self.msg(f'discovered {len(files)} files with pattern {pattern} in {str(self)}') files = [str(i) for i in files] # cache this in db cache = {store_flag : { str(store_url) : files }} self.database.set_db(cache) return files
def _open_atomic(path: pathlib.Path, mode="r"): """Open file with atomic file writing support. File reading is also adapted to atomic file writing (for example, the backup file is used when an atomic write failed previously.) TODO(suquark): race condition like two processes writing the same file is still not safe. This may not be an issue, because in our current implementation, we only need to guarantee the file is either fully written or not existing. Args: path: The file path. mode: Open mode same as "open()". Returns: File object. """ if "a" in mode or "+" in mode: raise ValueError("Atomic open does not support appending.") # backup file is hidden by default backup_path = path.with_name(f".{path.name}.backup") if "r" in mode: # read mode if _file_exists(path): f = open(path, mode) else: raise FileNotFoundError(path) try: yield f finally: f.close() elif "x" in mode: # create mode if path.exists(): raise FileExistsError(path) tmp_new_fn = path.with_suffix(f".{path.name}.{uuid.uuid4().hex}") if not tmp_new_fn.parent.exists(): tmp_new_fn.parent.mkdir(parents=True) f = open(tmp_new_fn, mode) write_ok = True try: yield f except Exception: write_ok = False raise finally: f.close() if write_ok: # "commit" file if writing succeeded tmp_new_fn.rename(path) else: # remove file if writing failed tmp_new_fn.unlink() elif "w" in mode: # overwrite mode # backup existing file if path.exists(): # remove an even older backup file if backup_path.exists(): backup_path.unlink() path.rename(backup_path) tmp_new_fn = path.with_suffix(f".{path.name}.{uuid.uuid4().hex}") if not tmp_new_fn.parent.exists(): tmp_new_fn.parent.mkdir(parents=True) f = open(tmp_new_fn, mode) write_ok = True try: yield f except Exception: write_ok = False raise finally: f.close() if write_ok: tmp_new_fn.rename(path) # cleanup the backup file if backup_path.exists(): backup_path.unlink() else: # remove file if writing failed tmp_new_fn.unlink() else: raise ValueError(f"Unknown file open mode {mode}.")
def from_dir( cls, path: Path, plugins: Sequence[Plugin], ts_plugins: Sequence[Plugin], short_name: bool, ) -> "Scenario": """ Makes a Scenario out of the provided directory path. The directory must be a "scenario directory", which means that it must contain at least one HAR file or another scenario directory. Symbolic link loops are not checked but forbidden! There may exist a weight file <path>.weight. If so, its contents will be used as weight for the Scenario by calling weight_from_path. Errors are handled this way: 1. If path itself cannot be transformed into a scenario, raise SkippableScenarioError. 2. For each child of path, apply (1) but catch the exception and display a warning about skipping this child. (If all children are skipped, (1) applies to path itself.) Therefore: - If the directory contains weight files that don't match any HAR file or subdirectory, an error will be emitted as this is probably a mistake. - If the directory contains files or directory that cannot be converted into scenarios (e.g. non-JSON files or .git directories), a message is emitted and the file or subdirectory is skipped. :raise SkippableScenarioError: if the directory contains dangling weight files or no sub-scenarios. """ try: children = list(path.iterdir()) except OSError as err: raise SkippableScenarioError(path, err) weight_files: Set[Path] = { child for child in children if child.suffix == WEIGHT_FILE_SUFFIX } scenarios: List[Scenario] = [] for child in children: if child in weight_files: continue try: scenario = cls.from_path(child, plugins, ts_plugins=ts_plugins, short_name=True) except SkippableScenarioError as err: logging.warning( "while searching for HAR files, skipping %s: %s", child, err.reason) else: scenarios.append(scenario) cls._check_dangling_weights(path, scenarios, weight_files) if not scenarios: raise SkippableScenarioError(path, "no scenarios inside the directory") cls._check_name_collisions(path, scenarios) return Scenario( name=to_identifier( path.with_suffix("").name if short_name else str(path)), children=tuple(scenarios), origin=path, weight=cls.weight_from_path(path), )
def test_ener( dp: "DeepPot", data: DeepmdData, system: str, numb_test: int, detail_file: Optional[str], has_atom_ener: bool, append_detail: bool = False, ) -> Tuple[List[np.ndarray], List[int]]: """Test energy type model. Parameters ---------- dp : DeepPot instance of deep potential data: DeepmdData data container object system : str system directory numb_test : int munber of tests to do detail_file : Optional[str] file where test details will be output has_atom_ener : bool whether per atom quantities should be computed append_detail : bool, optional if true append output detail file, by default False Returns ------- Tuple[List[np.ndarray], List[int]] arrays with results and their shapes """ data.add("energy", 1, atomic=False, must=False, high_prec=True) data.add("force", 3, atomic=True, must=False, high_prec=False) data.add("virial", 9, atomic=False, must=False, high_prec=False) if dp.has_efield: data.add("efield", 3, atomic=True, must=True, high_prec=False) if has_atom_ener: data.add("atom_ener", 1, atomic=True, must=True, high_prec=False) if dp.get_dim_fparam() > 0: data.add( "fparam", dp.get_dim_fparam(), atomic=False, must=True, high_prec=False ) if dp.get_dim_aparam() > 0: data.add("aparam", dp.get_dim_aparam(), atomic=True, must=True, high_prec=False) test_data = data.get_test() natoms = len(test_data["type"][0]) nframes = test_data["box"].shape[0] numb_test = min(nframes, numb_test) coord = test_data["coord"][:numb_test].reshape([numb_test, -1]) box = test_data["box"][:numb_test] if dp.has_efield: efield = test_data["efield"][:numb_test].reshape([numb_test, -1]) else: efield = None if not data.pbc: box = None atype = test_data["type"][0] if dp.get_dim_fparam() > 0: fparam = test_data["fparam"][:numb_test] else: fparam = None if dp.get_dim_aparam() > 0: aparam = test_data["aparam"][:numb_test] else: aparam = None ret = dp.eval( coord, box, atype, fparam=fparam, aparam=aparam, atomic=has_atom_ener, efield=efield, ) energy = ret[0] force = ret[1] virial = ret[2] energy = energy.reshape([numb_test, 1]) force = force.reshape([numb_test, -1]) virial = virial.reshape([numb_test, 9]) if has_atom_ener: ae = ret[3] av = ret[4] ae = ae.reshape([numb_test, -1]) av = av.reshape([numb_test, -1]) rmse_e = rmse(energy - test_data["energy"][:numb_test].reshape([-1, 1])) rmse_f = rmse(force - test_data["force"][:numb_test]) rmse_v = rmse(virial - test_data["virial"][:numb_test]) rmse_ea = rmse_e / natoms rmse_va = rmse_v / natoms if has_atom_ener: rmse_ae = rmse( test_data["atom_ener"][:numb_test].reshape([-1]) - ae.reshape([-1]) ) # print ("# energies: %s" % energy) log.info(f"# number of test data : {numb_test:d} ") log.info(f"Energy RMSE : {rmse_e:e} eV") log.info(f"Energy RMSE/Natoms : {rmse_ea:e} eV") log.info(f"Force RMSE : {rmse_f:e} eV/A") log.info(f"Virial RMSE : {rmse_v:e} eV") log.info(f"Virial RMSE/Natoms : {rmse_va:e} eV") if has_atom_ener: log.info(f"Atomic ener RMSE : {rmse_ae:e} eV") if detail_file is not None: detail_path = Path(detail_file) pe = np.concatenate( ( np.reshape(test_data["energy"][:numb_test], [-1, 1]), np.reshape(energy, [-1, 1]), ), axis=1, ) save_txt_file( detail_path.with_suffix(".e.out"), pe, header="%s: data_e pred_e" % system, append=append_detail, ) pf = np.concatenate( ( np.reshape(test_data["force"][:numb_test], [-1, 3]), np.reshape(force, [-1, 3]), ), axis=1, ) save_txt_file( detail_path.with_suffix(".f.out"), pf, header="%s: data_fx data_fy data_fz pred_fx pred_fy pred_fz" % system, append=append_detail, ) pv = np.concatenate( ( np.reshape(test_data["virial"][:numb_test], [-1, 9]), np.reshape(virial, [-1, 9]), ), axis=1, ) save_txt_file( detail_path.with_suffix(".v.out"), pv, header=f"{system}: data_vxx data_vxy data_vxz data_vyx data_vyy " "data_vyz data_vzx data_vzy data_vzz pred_vxx pred_vxy pred_vxz pred_vyx " "pred_vyy pred_vyz pred_vzx pred_vzy pred_vzz", append=append_detail, ) return { "rmse_ea" : (rmse_ea, energy.size), "rmse_f" : (rmse_f, force.size), "rmse_va" : (rmse_va, virial.size), }
def test_model(model, data_iterator, latest_model_path, num_columns: int = 2): model = model.eval().to(global_torch_device()) inputs, labels = next(data_iterator) inputs = inputs.to(global_torch_device()) labels = labels.to(global_torch_device()) with torch.no_grad(): pred = model(inputs) y_pred = pred.data.to("cpu").numpy() y_pred_max = numpy.argmax(y_pred, axis=-1) accuracy_w = accuracy_score(labels, y_pred_max) precision_a, recall_a, fscore_a, support_a = precision_recall_fscore_support( labels, y_pred_max) precision_w, recall_w, fscore_w, support_w = precision_recall_fscore_support( labels, y_pred_max, average="weighted") _, predicted = torch.max(pred, 1) truth_labels = labels.data.to("cpu").numpy() input_images_rgb = [ default_torch_retransform(x) for x in inputs.to(global_torch_device()) ] cell_width = (800 / num_columns) - 6 - 6 * 2 pyplot.plot(numpy.random.random((3, 3))) alphabet = string.ascii_lowercase class_names = numpy.array([*alphabet]) samples = len(y_pred) predictions = [[None for _ in range(num_columns)] for _ in range(samples // num_columns)] for i, a, b, c in zip(range(samples), input_images_rgb, y_pred_max, truth_labels): pyplot.imshow(a) if b == c: outcome = "tp" else: outcome = "fn" gd = ReportEntry( name=i, figure=plt_html(format="jpg", size=[cell_width, cell_width]), prediction=class_names[b], truth=class_names[c], outcome=outcome, explanation=None, ) predictions[i // num_columns][i % num_columns] = gd confusion_matrix_plot(y_pred_max, truth_labels, class_names) title = "Classification Report" model_name = latest_model_path confusion_matrix = plt_html(format="png", size=[800, 800]) accuracy = generate_math_html("\dfrac{tp+tn}{N}"), None, accuracy_w precision = generate_math_html( "\dfrac{tp}{tp+fp}"), precision_a, precision_w recall = generate_math_html("\dfrac{tp}{tp+fn}"), recall_a, recall_w f1_score = ( generate_math_html("2*\dfrac{precision*recall}{precision+recall}"), fscore_a, fscore_w, ) support = generate_math_html("N_{class_truth}"), support_a, support_w metrics = NOD.nod_of(accuracy, precision, f1_score, recall, support).as_flat_tuples() bundle = NOD.nod_of(title, model_name, confusion_matrix, metrics, predictions) file_name = Path(title.lower().replace(" ", "_")) generate_html(file_name.with_suffix(".html"), **bundle) generate_pdf(file_name.with_suffix(".html"), file_name.with_suffix(".pdf"))
def test_polar( dp: "DeepPolar", data: DeepmdData, numb_test: int, detail_file: Optional[str], *, atomic: bool, ) -> Tuple[List[np.ndarray], List[int]]: """Test energy type model. Parameters ---------- dp : DeepPot instance of deep potential data: DeepmdData data container object numb_test : int munber of tests to do detail_file : Optional[str] file where test details will be output global_polar : bool wheter to use glovbal version of polar potential Returns ------- Tuple[List[np.ndarray], List[int]] arrays with results and their shapes """ data.add( "polarizability" if not atomic else "atomic_polarizability", 9, atomic=atomic, must=True, high_prec=False, type_sel=dp.get_sel_type(), ) test_data = data.get_test() polar, numb_test, atype = run_test(dp, test_data, numb_test) sel_type = dp.get_sel_type() sel_natoms = 0 for ii in sel_type: sel_natoms += sum(atype == ii) # YWolfeee: do summation in global polar mode if not atomic: polar = np.sum(polar.reshape((polar.shape[0],-1,9)),axis=1) rmse_f = rmse(polar - test_data["polarizability"][:numb_test]) rmse_fs = rmse_f / np.sqrt(sel_natoms) rmse_fa = rmse_f / sel_natoms else: rmse_f = rmse(polar - test_data["atomic_polarizability"][:numb_test]) log.info(f"# number of test data : {numb_test:d} ") log.info(f"Polarizability RMSE : {rmse_f:e}") if not atomic: log.info(f"Polarizability RMSE/sqrtN : {rmse_fs:e}") log.info(f"Polarizability RMSE/N : {rmse_fa:e}") log.info(f"The unit of error is the same as the unit of provided label.") if detail_file is not None: detail_path = Path(detail_file) pe = np.concatenate( ( np.reshape(test_data["polarizability"][:numb_test], [-1, 9]), np.reshape(polar, [-1, 9]), ), axis=1, ) np.savetxt( detail_path.with_suffix(".out"), pe, header="data_pxx data_pxy data_pxz data_pyx data_pyy data_pyz data_pzx " "data_pzy data_pzz pred_pxx pred_pxy pred_pxz pred_pyx pred_pyy pred_pyz " "pred_pzx pred_pzy pred_pzz", ) return { "rmse" : (rmse_f, polar.size) }
class GitRepo: def __init__(self, path, remote_url=None, branch_name='master', name=None): self.path = Path(path) self.path_str = str(self.path) self.remote_url = remote_url self.branch_name = branch_name db_latest_key = '%s:%s:%s' % (self.path_str, remote_url or '', branch_name) self.db_latest_key = sha256(db_latest_key.encode()).hexdigest() self.repo_name = name or self.path.name def git(self, *args): """Run a git command against the current repo""" curdir = os.getcwd() try: os.chdir(self.path_str) output = check_output((GIT, ) + args, stderr=STDOUT) finally: os.chdir(curdir) return force_str(output.strip()) @property def current_hash(self): """The git revision ID (hash) of the current HEAD or None if no repo""" try: return self.git('rev-parse', 'HEAD') except (OSError, CalledProcessError): return None @property def current_commit_timestamp(self): """The UNIX timestamp of the latest commit""" try: return int(self.git('show', '-s', '--format=%ct', 'HEAD')) except (OSError, CalledProcessError, ValueError): return 0 @property def last_updated(self): if self.current_commit_timestamp: latest_datetime = datetime.fromtimestamp( self.current_commit_timestamp) return timeago.format(latest_datetime) return 'unknown' def diff(self, start_hash, end_hash): """Return a 2 tuple: (modified files, deleted files)""" diff_out = StringIO( self.git('diff', '--name-status', start_hash, end_hash)) return self._parse_git_status(diff_out) def modified_files(self): """Return a list of new or modified files according to git""" self.git('add', '.') status = StringIO(self.git('status', '--porcelain')) return self._parse_git_status(status) def _parse_git_status(self, lines): modified = set() removed = set() for line in lines: parts = line.split() # delete if parts[0] == 'D': removed.add(parts[1]) # rename elif parts[0][0] == 'R': removed.add(parts[1]) modified.add(parts[2]) # everything else else: # some types (like copy) have two file entries for part in parts[1:]: modified.add(part) return modified, removed def clone(self): """Clone the repo specified in the initial arguments""" if not self.remote_url: raise RuntimeError('remote_url required to clone') self.path.mkdir(parents=True, exist_ok=True) self.git('clone', '--depth', '1', '--branch', self.branch_name, self.remote_url, '.') def reclone(self): """Safely get a fresh clone of the repo""" if self.path.exists(): new_path = self.path.with_suffix(f'.{int(time())}') new_repo = GitRepo(new_path, self.remote_url, self.branch_name) new_repo.clone() # only remove the old after the new clone succeeds rmtree(self.path_str, ignore_errors=True) new_path.rename(self.path) else: self.clone() def pull(self): """Update the repo to the latest of the remote and branch Return the previous hash and the new hash.""" old_hash = self.current_hash self.git('fetch', '-f', self.remote_url, self.branch_name) self.git('checkout', '-f', 'FETCH_HEAD') return old_hash, self.current_hash def update(self): """Updates a repo, cloning if necessary. :return a tuple of lists of modified and deleted files if updated, None if cloned """ if self.path.is_dir(): if not self.path.joinpath('.git').is_dir(): rmtree(self.path_str, ignore_errors=True) self.clone() else: return self.pull() else: self.clone() return None, None def reset(self, new_head): self.git('reset', '--hard', new_head) def clean(self): self.git('clean', '-fd') def get_db_latest(self): try: return GitRepoState.objects.get( repo_id=self.db_latest_key).latest_ref except GitRepoState.DoesNotExist: return None def has_changes(self): return self.current_hash != self.get_db_latest() @property def clean_remote_url(self): repo_base = self.remote_url if repo_base.endswith('.git'): repo_base = repo_base[:-4] elif repo_base.endswith('/'): repo_base = repo_base[:-1] return repo_base def remote_url_auth(self, auth): url = self.clean_remote_url # remove https:// url = url[8:] return f'https://{auth}@{url}' def set_db_latest(self, latest_ref=None): latest_ref = latest_ref or self.current_hash rs, created = GitRepoState.objects.get_or_create( repo_id=self.db_latest_key, defaults={ 'latest_ref': latest_ref, 'repo_name': self.repo_name, 'repo_url': self.clean_remote_url, 'latest_ref_timestamp': self.current_commit_timestamp, }, ) if not created: rs.latest_ref = latest_ref rs.repo_name = self.repo_name rs.repo_url = self.clean_remote_url rs.latest_ref_timestamp = self.current_commit_timestamp rs.save()
def test_dipole( dp: "DeepDipole", data: DeepmdData, numb_test: int, detail_file: Optional[str], atomic: bool, ) -> Tuple[List[np.ndarray], List[int]]: """Test energy type model. Parameters ---------- dp : DeepPot instance of deep potential data: DeepmdData data container object numb_test : int munber of tests to do detail_file : Optional[str] file where test details will be output atomic : bool whether atomic dipole is provided Returns ------- Tuple[List[np.ndarray], List[int]] arrays with results and their shapes """ data.add( "dipole" if not atomic else "atomic_dipole", 3, atomic=atomic, must=True, high_prec=False, type_sel=dp.get_sel_type() ) test_data = data.get_test() dipole, numb_test, atype = run_test(dp, test_data, numb_test) sel_type = dp.get_sel_type() sel_natoms = 0 for ii in sel_type: sel_natoms += sum(atype == ii) # do summation in atom dimension if not atomic: dipole = np.sum(dipole.reshape((dipole.shape[0], -1, 3)),axis=1) rmse_f = rmse(dipole - test_data["dipole"][:numb_test]) rmse_fs = rmse_f / np.sqrt(sel_natoms) rmse_fa = rmse_f / sel_natoms else: rmse_f = rmse(dipole - test_data["atomic_dipole"][:numb_test]) log.info(f"# number of test data : {numb_test:d}") log.info(f"Dipole RMSE : {rmse_f:e}") if not atomic: log.info(f"Dipole RMSE/sqrtN : {rmse_fs:e}") log.info(f"Dipole RMSE/N : {rmse_fa:e}") log.info(f"The unit of error is the same as the unit of provided label.") if detail_file is not None: detail_path = Path(detail_file) pe = np.concatenate( ( np.reshape(test_data["dipole"][:numb_test], [-1, 3]), np.reshape(dipole, [-1, 3]), ), axis=1, ) np.savetxt( detail_path.with_suffix(".out"), pe, header="data_x data_y data_z pred_x pred_y pred_z", ) return { 'rmse' : (rmse_f, dipole.size) }
def _create(name, pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None): """Creates or loads a YOLOv5 model Arguments: name (str): model name 'yolov5s' or path 'path/to/best.pt' pretrained (bool): load pretrained weights into the model channels (int): number of input channels classes (int): number of model classes autoshape (bool): apply YOLOv5 .autoshape() wrapper to model verbose (bool): print all information to screen device (str, torch.device, None): device to use for model parameters Returns: YOLOv5 model """ from pathlib import Path from models.common import AutoShape, DetectMultiBackend from models.yolo import Model from utils.downloads import attempt_download from utils.general import LOGGER, check_requirements, intersect_dicts, logging from utils.torch_utils import select_device if not verbose: LOGGER.setLevel(logging.WARNING) check_requirements(exclude=('tensorboard', 'thop', 'opencv-python')) name = Path(name) path = name.with_suffix( '.pt') if name.suffix == '' else name # checkpoint path try: device = select_device(('0' if torch.cuda.is_available() else 'cpu' ) if device is None else device) if pretrained and channels == 3 and classes == 80: model = DetectMultiBackend( path, device=device) # download/load FP32 model # model = models.experimental.attempt_load(path, map_location=device) # download/load FP32 model else: cfg = list( (Path(__file__).parent / 'models').rglob(f'{path.stem}.yaml'))[0] # model.yaml path model = Model(cfg, channels, classes) # create model if pretrained: ckpt = torch.load(attempt_download(path), map_location=device) # load csd = ckpt['model'].float().state_dict( ) # checkpoint state_dict as FP32 csd = intersect_dicts(csd, model.state_dict(), exclude=['anchors']) # intersect model.load_state_dict(csd, strict=False) # load if len(ckpt['model'].names) == classes: model.names = ckpt[ 'model'].names # set class names attribute if autoshape: model = AutoShape(model) # for file/URI/PIL/cv2/np inputs and NMS return model.to(device) except Exception as e: help_url = 'https://github.com/ultralytics/yolov5/issues/36' s = f'{e}. Cache may be out of date, try `force_reload=True` or see {help_url} for help.' raise Exception(s) from e
def _load_v13(file_prefix: str, loc: bool, tracks: bool, segment_images: bool, flatfield: bool, filtered_file_prefix) -> Dict: """Load data to a dictionary (v013 format) Parameters ---------- file_prefix Prefix used for saving via :py:meth:`save`. loc Whether to load localization data. tracks Whether to load tracking data. cell_images Whether to load cell images. flatfield Whether to load flatfield corrections. filtered_file_prefix Prefix used for saving analyzed and filtered data in v013 format, i.e. prefix passed to the ``save()`` method in the Analysis notebook. Returns ------- Dictionary of loaded data and settings. """ infile = Path(f"{file_prefix}-v013") with infile.with_suffix(".yaml").open() as f: ret = io.yaml.safe_load(f) ret["data_dir"] = Path(ret.get("data_dir", "")) ret["localizations"] = {} ret["tracks"] = {} ret["segment_images"] = defaultdict(dict) ret["flatfield"] = {} all_src = {**ret["sources"], **ret["special_sources"]} do_load = [] if loc: do_load.append((ret["localizations"], "_loc")) if tracks: do_load.append((ret["tracks"], "_trc")) if len(do_load): with pd.HDFStore(infile.with_suffix(".h5"), "r") as s: for sink, suffix in do_load: keys = (k for k in s.keys() if k.endswith(suffix)) for k in keys: new_key = k[1:-len(suffix)] loaded = s[k] src = all_src[new_key] fname_map = pd.Series(src.keys(), index=src.values()) loaded.index = loaded.index.set_levels( fname_map[loaded.index.levels[0]], level=0) if suffix == "_trc": # Restore categorical exc_type. See comment in # `save` method for details. loaded = loaded.astype({ ("fret", "exc_type"): "category" }) sink[new_key] = loaded if segment_images: seg_img_file = infile.with_suffix(".cell_img.npz") # Map file names to dataset IDs fname_map = { fname: (did, fid) for did, src in all_src.items() for fid, fname in src.items() } try: with np.load(seg_img_file) as data: ci = dict(data) for k, v in ci.items(): k_split = k.split("\n") if len(k_split) == 1: new_k = k_split[0] else: new_k = tuple(k_split) did, fid = fname_map[new_k] ret["segment_images"][did][fid] = v except Exception as e: warnings.warn("Could not load segmentation images from file " f"\"{str(seg_img_file)}\" ({e}).") if flatfield: flatfield_glob = str(infile.with_suffix(".flat_*.npz")) key_re = re.compile(r"^\.flat_([\w\s-]+)") for p in Path().glob(flatfield_glob): m = key_re.match(p.suffixes[-2]) if m is None: warnings.warn("Could not load flatfield corrector from " f"{str(p)}.") else: ret["flatfield"][m.group(1)] = _flatfield.Corrector.load(p) return ret
def main(iwavepath: Path, owavepath: Path) -> None: with wave.open(str(iwavepath, "r")) as iwave: pre = () post = () with wave.open(str(owavepath.with_suffix(".wav")), "w") as owave: pass
def _load_v14(file_prefix: str, loc: bool, tracks: bool, segment_images: bool, flatfield: bool) -> Dict: """Load data to a dictionary (v014 format) Parameters ---------- file_prefix Prefix used for saving via :py:meth:`save`. loc Whether to load localization data. tracks Whether to load tracking data. segment_images Whether to load segmentation images. flatfield Whether to load flatfield corrections. Returns ------- Dictionary of loaded data and settings. """ infile = Path(f"{file_prefix}-v014") with infile.with_suffix(".yaml").open() as f: ret = io.yaml.safe_load(f) if "data_dir" in ret: ret["data_dir"] = Path(ret["data_dir"]) if loc: ret["localizations"] = {} ret["special_localizations"] = {} with pd.HDFStore(infile.with_suffix(".loc.h5"), "r") as s: for k in s.keys(): loaded = s.get(k) if k.startswith("/locs/"): ret["localizations"][k[6:]] = loaded elif k.startswith("/special_locs/"): ret["special_localizations"][k[14:]] = loaded if tracks: ret["tracks"] = {} ret["special_tracks"] = {} with pd.HDFStore(infile.with_suffix(".tracks.h5"), "r") as s: for k in s.keys(): # Restore categorical exc_type. See comment in # `save` method for details. loaded = s.get(k).astype({ ("fret", "exc_type"): "category" }) if k.startswith("/tracks/"): ret["tracks"][k[8:]] = loaded elif k.startswith("/special_tracks/"): ret["special_tracks"][k[16:]] = loaded if segment_images: ret["segment_images"] = defaultdict(dict) seg_img_file = infile.with_suffix(".seg_img.npz") try: with np.load(seg_img_file) as data: ci = dict(data) for k, v in ci.items(): split_idx = k.rfind("/") k1 = k[:split_idx] k2 = int(k[split_idx + 1:]) ret["segment_images"][k1][k2] = v except Exception as e: warnings.warn("Could not load segmentation images from file " f"\"{str(seg_img_file)}\" ({e}).") if flatfield: ret["flatfield"] = {} flatfield_glob = str(infile.with_suffix(".flat_*.npz")) key_re = re.compile(r"^\.flat_([\w\s-]+)") for p in Path().glob(flatfield_glob): m = key_re.match(p.suffixes[-2]) if m is None: warnings.warn("Could not load flatfield corrector from " f"{str(p)}.") else: ret["flatfield"][m.group(1)] = _flatfield.Corrector.load(p) return ret
print(type(Path('/usr/local/etc/mongod.conf').parent)) print(Path('/').joinpath('home', 'yangkai', 'zhihu')) print(Path.exists(Path('~/lyanna').expanduser() / 'config.py')) print( Path.exists( Path('~/Documents').expanduser() / 'code' / 'leetcode' / 'README.txt')) p = Path('/Users/dongweiming/test.txt') print(p.parent.parent) print() print(p.parents[0]) print(p.parents[1]) print(p.parents[2]) print(p.suffix, p.stem) print(p.suffixes, p.stem) Path('new.txt').touch() p = Path('./test.txt') p.write_text('456\n') print(p.read_text()) p = Path('/home/gentoo/screenshot/abc.jpg') print(p.with_suffix('.png')) print(p.with_name(f'123{p.suffix}')) Path('1/2/3').mkdir(parents=True, exist_ok=True) print(Path('1').owner())
def save(self, file_prefix: str = "tracking", mode: str = "write"): """Save data to disk This will save attributes to disk. Parameters ---------- file_prefix Common file_prefix for all files written by this method. It will be suffixed by the output format version (v{version}) and file extensions corresponding to what is saved in each file. mode Use "write" to delete previously existing files (which contain localization and tracking data) and write a new ones. As a result, only the current data will end up in the file. Use "update" to add or modify data without deleting anything not present in this instance. """ outfile = Path(f"{file_prefix}-v014") data = self.__dict__.copy() file_mode = "w" if mode == "write" else "a" with warnings.catch_warnings(): import tables warnings.simplefilter("ignore", tables.NaturalNameWarning) with pd.HDFStore(outfile.with_suffix(".loc.h5"), file_mode) as s: for key, loc in data.pop("localizations", {}).items(): s.put(f"/locs/{key}", loc) for key, loc in data.pop("special_localizations", {}).items(): s.put(f"/special_locs/{key}", loc) with pd.HDFStore(outfile.with_suffix(".tracks.h5"), file_mode) as s: for key, trc in data.pop("tracks", {}).items(): # Categorical exc_type does not allow for storing in fixed # format while multiindex for both rows and columns does # not work with table format… s.put(f"/tracks/{key}", trc.astype({("fret", "exc_type"): str})) for key, trc in data.pop("special_tracks", {}).items(): s.put(f"/special_tracks/{key}", trc.astype({("fret", "exc_type"): str})) if mode == "write": old = {} seg = {} else: old = self.load(file_prefix, loc=False, tracks=False, segment_images="segment_images" in data, flatfield=False).__dict__.copy() seg = old.pop("segment_images", {}) if "segment_images" in data: for k, v in data.pop("segment_images").items(): for k2, v2 in v.items(): seg[f"{k}/{k2}"] = v2 np.savez_compressed(outfile.with_suffix(".seg_img.npz"), **seg) if "flatfield" in data: if mode == "write": ffiles = io.get_files(fr"^{outfile}\.flat_([\w\s-]+)\.npz$")[0] for f in ffiles: Path(f).unlink() for k, ff in data.pop("flatfield").items(): ff.save(outfile.with_suffix(f".flat_{k}.npz")) old.update(data) if "data_dir" in old: old["data_dir"] = str(old["data_dir"]) with outfile.with_suffix(".yaml").open("w") as f: io.yaml.safe_dump(old, f)
def get_path(self, compiler: str, env: environment.Environment) -> T.Optional[Path]: p = Path(self.path) canonical_compiler = compiler if ((compiler in ['clang-cl', 'intel-cl']) or (env.machines.host.is_windows() and compiler in {'pgi', 'dmd', 'ldc'})): canonical_compiler = 'msvc' has_pdb = False if self.language in {'c', 'cpp'}: has_pdb = canonical_compiler == 'msvc' elif self.language == 'd': # dmd's optlink does not genearte pdb iles has_pdb = env.coredata.compilers.host['d'].linker.id in { 'link', 'lld-link' } # Abort if the platform does not match matches = { 'msvc': canonical_compiler == 'msvc', 'gcc': canonical_compiler != 'msvc', 'cygwin': env.machines.host.is_cygwin(), '!cygwin': not env.machines.host.is_cygwin(), }.get(self.platform or '', True) if not matches: return None # Handle the different types if self.typ == 'file': return p elif self.typ == 'shared_lib': if env.machines.host.is_windows() or env.machines.host.is_cygwin(): # Windows only has foo.dll and foo-X.dll if len(self.version) > 1: return None if self.version: p = p.with_name('{}-{}'.format(p.name, self.version[0])) return p.with_suffix('.dll') p = p.with_name('lib{}'.format(p.name)) if env.machines.host.is_darwin(): # MacOS only has libfoo.dylib and libfoo.X.dylib if len(self.version) > 1: return None # pathlib.Path.with_suffix replaces, not appends suffix = '.dylib' if self.version: suffix = '.{}{}'.format(self.version[0], suffix) else: # pathlib.Path.with_suffix replaces, not appends suffix = '.so' if self.version: suffix = '{}.{}'.format(suffix, '.'.join(self.version)) return p.with_suffix(suffix) elif self.typ == 'exe': if env.machines.host.is_windows() or env.machines.host.is_cygwin(): return p.with_suffix('.exe') elif self.typ == 'pdb': if self.version: p = p.with_name('{}-{}'.format(p.name, self.version[0])) return p.with_suffix('.pdb') if has_pdb else None elif self.typ == 'implib' or self.typ == 'implibempty': if env.machines.host.is_windows() and canonical_compiler == 'msvc': # only MSVC doesn't generate empty implibs if self.typ == 'implibempty' and compiler == 'msvc': return None return p.parent / (re.sub(r'^lib', '', p.name) + '.lib') elif env.machines.host.is_windows() or env.machines.host.is_cygwin( ): return p.with_suffix('.dll.a') else: return None elif self.typ == 'expr': return Path( platform_fix_name(p.as_posix(), canonical_compiler, env)) else: raise RuntimeError('Invalid installed file type {}'.format( self.typ)) return p
required=True) if __name__ == '__main__': try: # Get raw string output and convert to Python dict process_output = subprocess.run(COMMAND, check=True, encoding='utf-8', stdout=subprocess.PIPE).stdout output_as_dict = json.loads(process_output) # Verify dict schema validate_with_humanized_errors(output_as_dict, JSON_SCHEMA) # Write data to a temp file, atomically rewrite the the IP ranges file temp_file_path = IP_RANGES_FILE.with_suffix('.tmp') with temp_file_path.open(mode='w') as temp_file: temp_file.write('\n'.join(i['network'] for i in output_as_dict)) temp_file_path.rename(IP_RANGES_FILE) except subprocess.CalledProcessError as cpe: sys.exit('An error occurred while executing the bloxtool command.') except json.JSONDecodeError as jde: sys.exit('An error occurred parsing the bloxtool output as JSON.') except VoluptuousInvalid as vi: sys.exit( 'The JSON data from bloxtool does not match the required schema.')