def _write_aspect_lex(parsed_data: Union[str, PathLike], generated_aspect_lex: dict, out_dir: PathLike): parsed_docs = _load_parsed_docs_from_dir(parsed_data) aspect_dict = {} max_examples = 20 label = 'AS' for doc in parsed_docs.values(): for sent_text, _ in doc.sent_iter(): for term, lemma in generated_aspect_lex.items(): if term in sent_text.lower(): _find_aspect_in_sentence(term, lemma, sent_text, aspect_dict, label, max_examples, False) if lemma != '' and lemma in sent_text.lower(): _find_aspect_in_sentence(term, lemma, sent_text, aspect_dict, label, max_examples, True) # write aspect lex to file header_row = ["Term", "Alias1", "Alias2", "Alias3"] for k in range(1, max_examples + 1): header_row.append("Example" + str(k)) aspect_table = [header_row] for [term, lemma], sentences in aspect_dict.items(): term_row = [term, lemma, '', ''] for sent in sentences: term_row.append(sent) aspect_table.append(term_row) out_dir.mkdir(parents=True, exist_ok=True) out_file_path = out_dir / 'generated_aspect_lex.csv' _write_table(aspect_table, out_file_path) print('Aspect lexicon written to {}'.format(out_file_path))
def extract_download( url: str, directory: PathLike, filenames: [str] = None, known_hash: str = None, overwrite: bool = False, ): if not isinstance(directory, Path): directory = Path(directory) if filenames is None: filenames = [] if not directory.exists(): directory.mkdir(parents=True, exist_ok=True) temporary_filename = directory / 'temp.tar.gz' logging.debug(f'downloading {url} -> {temporary_filename}') temporary_filename = pooch.retrieve(url, known_hash=known_hash, fname=temporary_filename) logging.debug(f'extracting {temporary_filename} -> {directory}') with tarfile.open(temporary_filename) as local_file: if len(filenames) > 0: for filename in filenames: if filename in local_file.getnames(): path = directory / filename if not path.exists() or overwrite: if path.exists(): os.remove(path) local_file.extract(filename, directory) else: local_file.extractall(directory)
def init_dir(adir: PathLike, exist_ok=False, parents=False, rmtree=False): adir = Path(adir) if adir.is_dir(): if rmtree: shutil.rmtree(adir) adir.mkdir(exist_ok=exist_ok, parents=parents) return adir
def plot_dataset( dataset: xr.Dataset, num_chains: int = 8, therm_frac: float = 0., title: str = None, outdir: os.PathLike = None, subplots_kwargs: dict[str, Any] = None, plot_kwargs: dict[str, Any] = None, ext: str = 'png', ): plot_kwargs = {} if plot_kwargs is None else plot_kwargs subplots_kwargs = {} if subplots_kwargs is None else subplots_kwargs if outdir is None: import os tstamp = get_timestamp('%Y-%m-%d-%H%M%S') outdir = Path(os.getcwd()).joinpath('plots', f'plots-{tstamp}') outdir.mkdir(exist_ok=True, parents=True) for idx, (key, val) in enumerate(dataset.data_vars.items()): color = f'C{idx%9}' plot_kwargs['color'] = color fig, subfigs, ax = plot_metric( val=val.values, key=str(key), title=title, outdir=None, therm_frac=therm_frac, num_chains=num_chains, plot_kwargs=plot_kwargs, subplots_kwargs=subplots_kwargs, ) if outdir is not None: outfile = Path(outdir).joinpath(f'{key}.{ext}') Path(outfile.parent).mkdir(exist_ok=True, parents=True) outfile = outfile.as_posix() if subfigs is not None: # edgecolor = plt.rcParams['axes.edgecolor'] plt.rcParams['axes.edgecolor'] = plt.rcParams['axes.facecolor'] ax = subfigs[0].subplots(1, 1) # ax = fig[1].subplots(constrained_layout=True) cbar_kwargs = { # 'location': 'top', # 'orientation': 'horizontal', } im = val.plot(ax=ax, cbar_kwargs=cbar_kwargs) # ax.set_ylim(0, ) im.colorbar.set_label(f'{key}') # , labelpad=1.25) sns.despine(subfigs[0], top=True, right=True, left=True, bottom=True) if outdir is not None: print(f'Saving figure to: {outfile}') plt.savefig(outfile, dpi=400, bbox_inches='tight') else: fig.savefig(outfile, dpi=400, bbox_inches='tight') return dataset
def write_csvs( dirname: PathLike, adata: AnnData, skip_data: bool = True, sep: str = ',' ): """See :meth:`~anndata.AnnData.write_csvs`. """ dirname = Path(dirname) if dirname.suffix == '.csv': dirname = dirname.with_suffix('') logger.info("writing '.csv' files to %s", dirname) if not dirname.is_dir(): dirname.mkdir(parents=True, exist_ok=True) dir_uns = dirname / 'uns' if not dir_uns.is_dir(): dir_uns.mkdir(parents=True, exist_ok=True) d = dict( obs=adata._obs, var=adata._var, obsm=adata._obsm.to_df(), varm=adata._varm.to_df(), ) if not skip_data: d['X'] = pd.DataFrame( adata._X.toarray() if issparse(adata._X) else adata._X ) d_write = {**d, **adata._uns} not_yet_raised_sparse_warning = True for key, value in d_write.items(): if issparse(value): if not_yet_raised_sparse_warning: warnings.warn( 'Omitting to write sparse annotation.', WriteWarning ) not_yet_raised_sparse_warning = False continue filename = dirname if key not in {'X', 'var', 'obs', 'obsm', 'varm'}: filename = dir_uns filename /= f'{key}.csv' df = value if not isinstance(value, pd.DataFrame): value = np.array(value) if np.ndim(value) == 0: value = value[None] try: df = pd.DataFrame(value) except Exception as e: warnings.warn( f'Omitting to write {key!r} of type {type(e)}.', WriteWarning, ) continue df.to_csv( filename, sep=sep, header=key in {'obs', 'var', 'obsm', 'varm'}, index=key in {'obs', 'var'}, )
def write_csvs(dirname: PathLike, adata: AnnData, skip_data: bool = True, sep: str = ","): """See :meth:`~anndata.AnnData.write_csvs`.""" dirname = Path(dirname) if dirname.suffix == ".csv": dirname = dirname.with_suffix("") logger.info(f"writing .csv files to {dirname}") if not dirname.is_dir(): dirname.mkdir(parents=True, exist_ok=True) dir_uns = dirname / "uns" if not dir_uns.is_dir(): dir_uns.mkdir(parents=True, exist_ok=True) d = dict( obs=adata._obs, var=adata._var, obsm=adata._obsm.to_df(), varm=adata._varm.to_df(), ) if not skip_data: d["X"] = pd.DataFrame( adata._X.toarray() if issparse(adata._X) else adata._X) d_write = {**d, **adata._uns} not_yet_raised_sparse_warning = True for key, value in d_write.items(): if issparse(value): if not_yet_raised_sparse_warning: warnings.warn("Omitting to write sparse annotation.", WriteWarning) not_yet_raised_sparse_warning = False continue filename = dirname if key not in {"X", "var", "obs", "obsm", "varm"}: filename = dir_uns filename /= f"{key}.csv" df = value if not isinstance(value, pd.DataFrame): value = np.array(value) if np.ndim(value) == 0: value = value[None] try: df = pd.DataFrame(value) except Exception as e: warnings.warn( f"Omitting to write {key!r} of type {type(e)}.", WriteWarning, ) continue df.to_csv( filename, sep=sep, header=key in {"obs", "var", "obsm", "varm"}, index=key in {"obs", "var"}, )
def download_mesh(url: str, directory: PathLike, overwrite: bool = False): if not isinstance(directory, Path): directory = Path(directory) if not directory.exists(): directory.mkdir(parents=True, exist_ok=True) if not (directory / 'fort.14').exists() or overwrite: logging.info(f'downloading mesh files to {directory}') extract_download(url, directory, ['fort.13', 'fort.14']) return directory
def train( self, episodes: int, validation: int = None, save_path: PathLike = None, gamma: float = 1.0, epsilon: float = 0.2, ) -> None: """ :param episodes: :param validation: :param save_path: path to save models and logs. :param gamma: γ - discount factor. Is used to balance immediate and future reward. :param epsilon: ε - chance to get random move in ε-greedy policy """ if save_path: save_path = pathlib.Path(save_path) / f'SIGMOID_24_negative_reward_gamma_{gamma}_epsilon_{epsilon}_q_learning' save_path.mkdir(exist_ok=False, parents=False) for episode in tqdm(range(episodes)): if validation is not None and not (episode + 1) % validation: self.validate(path=save_path) if save_path: self.save(save_path, episode + 1) self.model.train() players = (self.agent_cls(self.model), self.agent_cls(self.model)) game = bg.Game(players=players) with self.e_greedy_get_action(episode, epsilon=epsilon): for agent, new_board, prev_board, move, available_moves in game.play_step_by_step(): agent: agents.NNAgent pred_q = agent.estimate(board=prev_board) if new_board.status: reward = new_board.status self.update(pred_q, torch.Tensor([reward])) with prev_board.reverse() as reversed_board: self.update(agent.estimate(board=reversed_board), torch.Tensor([-reward])) break else: estimated_moves = list(agent.estimate_moves(available_moves=available_moves, board=prev_board)) agent_checkers, opp_checkers = prev_board.to_schema() if estimated_moves: max_q = np.max(estimated_moves) new_q = gamma * max_q else: # it is too bad, if we could not make any step. new_q = torch.Tensor([-1]) self.update(pred_q, new_q)
def ensure_directory(directory: PathLike) -> Path: """ ensure that a directory exists :param directory: directory path to ensure :returns: path to ensured directory """ if not isinstance(directory, Path): directory = Path(directory) directory = directory.expanduser() if directory.is_file(): directory = directory.parent if not directory.exists(): directory.mkdir(parents=True, exist_ok=True) return directory
def _download_file( fname: Union[AVAILABLE_MODELS, AVAILABLE_DATA], branch: str, save_dir: PathLike, type: Literal["model", "data"], ) -> Path: """Download a file to disk if it does not already exist. Args: fname: The file name. branch: Which branch of the unlockNN repository to download from. save_dir: The directory to check for already-downloaded models and in which to save newly downloaded models. type: The type of file. Returns: The path to the downloaded file/folder. """ save_dir = Path(save_dir) if not save_dir.exists(): save_dir.mkdir() specific_dir = save_dir / ( f"{fname}-{branch}" + (".parquet" if type == "data" else "") ) # Add .pkl extension only if we're downloading data url = MODELS_URL if type == "model" else DATA_URL download_url = url.format(branch=branch, fname=fname) if not specific_dir.exists(): r = requests.get(download_url) if type == "model": tar_f = tarfile.open(fileobj=BytesIO(r.content)) tar_f.extractall(specific_dir) tar_f.close() else: specific_dir.write_bytes(r.content) return specific_dir
def download_file( url: PathLike, filename: PathLike = None, directory: PathLike = None, show_progress: bool = True, silent: bool = False, timeout: int = 5, ) -> str: """ Download a file from a url and save it to the local filesystem. The file is saved to the current directory by default, or to `directory` if specified. If a filename is not given, the filename of the URL will be used. :param url: URL that points to the file to download :param filename: Name of the local file to save. Should point to the name of the file only, not the full path. If None the filename from the url will be used :param directory: Directory to save the file to. Will be created if it doesn't exist If None the file will be saved to the current working directory :param show_progress: If True, show an TQDM ProgressBar :param silent: If True, do not print a message if the file already exists :return: path to downloaded file """ try: opener = urllib.request.build_opener() opener.addheaders = [("User-agent", "Mozilla/5.0")] urllib.request.install_opener(opener) urlobject = urllib.request.urlopen(url, timeout=timeout) if filename is None: filename = urlobject.info().get_filename() or Path(urllib.parse.urlparse(url).path).name except urllib.error.URLError as error: if isinstance(error.reason, socket.timeout): raise Exception( "Connection timed out. If you access the internet through a proxy server, please " "make sure the proxy is set in the shell from where you launched Jupyter. If your " "internet connection is slow, you can call `download_file(url, timeout=30)` to " "wait for 30 seconds before raising this error." ) from None except urllib.error.HTTPError as e: raise Exception(f"File downloading failed with error: {e.code} {e.msg}") from None filename = Path(filename) if len(filename.parts) > 1: raise ValueError( "`filename` should refer to the name of the file, excluding the directory. " "Use the `directory` parameter to specify a target directory for the downloaded file." ) # create the directory if it does not exist, and add the directory to the filename if directory is not None: directory = Path(directory) directory.mkdir(parents=True, exist_ok=True) filename = directory / Path(filename) # download the file if it does not exist, or if it exists with an incorrect file size urlobject_size = int(urlobject.info().get("Content-Length", 0)) if not filename.exists() or (os.stat(filename).st_size != urlobject_size): progress_callback = DownloadProgressBar( total=urlobject_size, unit="B", unit_scale=True, unit_divisor=1024, desc=str(filename), disable=not show_progress, ) urllib.request.urlretrieve(url, filename, reporthook=progress_callback.update_to) if os.stat(filename).st_size >= urlobject_size: progress_callback.update(urlobject_size - progress_callback.n) progress_callback.refresh() else: if not silent: print(f"'{filename}' already exists.") return filename.resolve()
def make_path(p: PathLike) -> Path: p = Path(p) p.mkdir(exist_ok=True, parents=True) return p