def extract_and_capitalize_headlines_from_corpus(corpus_dir, docids):
    """
    Iterate through all the files in `corpus_dir`,
    extract the headlines, capitalized and return them
    
    Parameter:
    ---------------
    corpus_dir: string

    docids: list of string
        the document to be processed

    Return:
    --------------
    generator of (docid, headlines): (str, list<list<str>>)
    """
    get_tokens = partial(map, partial(get_in, ["token"]))
    get_features = partial(get_in, ["features"])

    make_capitalized_title_new = lambda words: make_capitalized_title(title_words=words)

    for docid in docids:
        p = Path(corpus_dir) / Path(docid)
        auxil_p = p.with_suffix(".auxil")
        paf_p = p.with_suffix(".paf")
        if auxil_p.exists() and paf_p.exists():
            try:
                titles, _ = separate_title_from_body(str(auxil_p), str(paf_p))
            except Exception as e:
                yield (e, None)
            # pipeline:
            # -> get features
            # -> get tokens
            # -> capitalize headline
            yield (None, (p.name, list(map(compose(make_capitalized_title_new, get_tokens, get_features), titles))))
Example #2
0
def _check_files(filename, exts):
    """
    takes filename string and list of extensions, checks that they all exist and
    returns a Path
    """
    #TODO test whether IOError (deprecated) or OSError is better handled by Orange
    p = Path(filename)
    if p.suffix == ".dmt":
        for child in p.parent.iterdir():
            if child.suffix == ".dmt":
                continue
            elif child.stem.casefold() == p.stem.casefold():
                p = child
                break
            elif child.stem.casefold() == (p.stem + "_0000_0000").casefold():
                p = child.with_name(child.stem.split("_0000_0000")[0] + p.suffix)
                break
    for ext in exts:
        if ext == ".dmt":
            # Always lowercase
            ps = p.parent.joinpath(p.with_suffix(ext).name.lower())
        elif ext in [".drd", ".dmd"]:
            # Always has at least _0000_0000 tile
            ps = p.parent.joinpath(p.stem + "_0000_0000" + ext)
        else:
            ps = p.with_suffix(ext)
        if not ps.is_file():
            raise OSError('File "{}" was not found.'.format(ps))
    return p
Example #3
0
def convert_nb(fname, dest_path='.'):
    "Convert a notebook `fname` to html file in `dest_path`."
    from .gen_notebooks import remove_undoc_cells, remove_code_cell_jupyter_widget_state_elem
    nb = read_nb(fname)
    nb['cells'] = remove_undoc_cells(nb['cells'])
    nb['cells'] = remove_code_cell_jupyter_widget_state_elem(nb['cells'])
    fname = Path(fname).absolute()
    dest_name = fname.with_suffix('.html').name
    meta = nb['metadata']
    meta_jekyll = meta['jekyll'] if 'jekyll' in meta else {'title': fname.with_suffix('').name}
    meta_jekyll['nb_path'] = f'{fname.parent.name}/{fname.name}'
    with open(f'{dest_path}/{dest_name}','w') as f:
        f.write(exporter.from_notebook_node(nb, resources=meta_jekyll)[0])
Example #4
0
class CythonExtension(Extension):

  def __init__(
    self,
    name,
    cython_source,
    sources = None,
    output_dir = None,
    language_level = None,
    cplus = None,
    annotate = None,
    **kwargs
  ):
    self.cython_source = Path(cython_source)
    self.output_dir = Path(output_dir or "cythonized")
    self.language_level = language_level or 3
    self.cplus = cplus or False
    self.annotate = annotate or False
    
    self.output_file = self.output_dir.joinpath(
      self.cython_source.with_suffix(".cpp" if self.cplus else ".c")
    )
    
    sources = sources or []
    sources.append(str(self.output_file))
    super().__init__(name, sources, **kwargs)
Example #5
0
def doplotsave(bigfn,data,rawind,clim,dohist,meanImg):
    if bigfn is None or data is None:
        return

    bigfn=Path(bigfn)

    if dohist:
        ax=figure().gca()
        hist(data.ravel(), bins=256,log=True)
        ax.set_title('histogram of {}'.format(bigfn))
        ax.set_ylabel('frequency of occurence')
        ax.set_xlabel('data value')

    if meanImg:
        meanStack = data.mean(axis=0).astype(uint16) #DO NOT use dtype= here, it messes up internal calculation!
        fg = figure(32)
        ax = fg.gca()
        if clim:
            hi=ax.imshow(meanStack,cmap='gray',origin='lower', vmin=clim[0], vmax=clim[1],norm=LogNorm())
        else:
            hi=ax.imshow(meanStack,cmap='gray',origin='lower',norm=LogNorm())

        ax.set_xlabel('x')
        ax.set_ylabel('y')
        ax.set_title('mean of image frames')
        fg.colorbar(hi)

        pngfn = bigfn.with_suffix('_mean.png')
        print(f'writing mean PNG {pngfn}')
        fg.savefig(pngfn,dpi=150,bbox_inches='tight')
Example #6
0
    def download(self, destination: Path):
        logging.info("Downloading %s into %s", self.url, destination)

        destination.parent.mkdir(parents=True, exist_ok=True)
        tmpdestination = destination.with_suffix(".tmp")
        if tmpdestination.exists():
            logging.warn("Removing temporary directory %s", tmpdestination)
            shutil.rmtree(tmpdestination)

        file = self.dataset.downloadURL(self.url)
        
        logging.info("Unarchiving file")
        with tarfile.TarFile.open(file.path, mode="r:*") as tar:
            tar.extractall(tmpdestination)

        for ix, path in enumerate(tmpdestination.iterdir()):
            if ix > 1: break
        
        # Just one file/folder: move
        if ix == 0 and path.is_dir():
            logging.info("Moving single directory into destination")
            shutil.move(path, destination)
            shutil.rmtree(tmpdestination)
        else:
            shutil.move(tmpdestination, destination)
Example #7
0
 def _save(self, name, content):
     name = Path(name)
     self._validate(name)
     name = name.with_suffix(name.suffix.lower())
     hashed_dir = Path(self._hash_dir(content))
     dest_name = hashed_dir.joinpath(name)
     return super()._save(str(dest_name), content)
Example #8
0
    async def drop(cls, *, directory='migrations', verbose=False, connection=None):
        """Drops the database and migrations, if any.
        Parameters
        -----------
        directory: str
            The migrations directory.
        verbose: bool
            Whether to output some information to stdout.
        connection: Optional[asyncpg.Connection]
            The connection to use, if not provided will acquire one from
            the internal pool.
        """

        directory = Path(directory) / cls.__tablename__
        p = directory.with_suffix('.json')
        current = directory.with_name('current-' + p.name)

        if not p.exists() or not current.exists():
            raise RuntimeError('Could not find the appropriate data files.')

        try:
            p.unlink()
        except:
            raise RuntimeError('Could not delete migration file')

        try:
            current.unlink()
        except:
            raise RuntimeError('Could not delete current migration file')

        async with MaybeAcquire(connection, pool=cls._pool) as con:
            sql = 'DROP TABLE {0} CASCADE;'.format(cls.__tablename__)
            if verbose:
                print(sql)
            await con.execute(sql)
Example #9
0
def dir2fn(ofn,ifn,suffix) -> Path:
    """
    ofn = filename or output directory, to create filename based on ifn
    ifn = input filename (don't overwrite!)
    suffix = desired file extension e.g. .h5
    """
    if not ofn: # no output file desired
        return

    ofn = Path(ofn).expanduser()
    ifn = Path(ifn).expanduser()
    assert ifn.is_file()

    if ofn.suffix==suffix: #must already be a filename
        pass
    else: #must be a directory
        assert ofn.is_dir(), f'create directory {ofn}'
        ofn = ofn / ifn.with_suffix(suffix).name

    try:
        assert not ofn.samefile(ifn), f'do not overwrite input file! {ifn}'
    except FileNotFoundError: # a good thing, the output file doesn't exist and hence it's not the input file
        pass

    return ofn
Example #10
0
def save_to_hdf(df, fname, output_subdir=None):
    """Save temporary HDF file in output folder for later concatenation.

    By default the product is stored in HOME/output.

    Parameters
    ----------
    df : pd.DataFrame
        The dataframe to save
    fname : string
        The product filename that was used to create this dataframe to save.
        The saving filename for the HDF file will be auto-determined from that.
    output_subdir : str
        String to determine a subfolder inside HOME/output where this data
        should be stored instead of just HOME/output

    """
    path = Path(fname)
    newfname = path.with_suffix('.h5').name
    folderpath = HOME / 'output'
    if output_subdir:
        folderpath = folderpath / output_subdir
    path = folderpath / newfname
    df.to_hdf(str(path), 'df', format='t')
    return str(path)
Example #11
0
File: base.py Project: feihong/shu
    def build_ebook(self, output_file, add_page_markers=True):
        if not self._files:
            self._files = self.import_links_page()
        root_node = self.get_content_tree(self.get_index_doc())

        output_file = Path(output_file)
        if output_file.suffix != '.txt':
            output_file = output_file.with_suffix('.txt')

        if add_page_markers:
            fp = PagedFileWriter(output_file)
        else:
            fp = output_file.open('w')

        # Write tree to file.
        with fp:
            fp.write('# %s\n\n' % self.title)
            fp.write('作者:%s\n\n' % self.author)

            stack = deque()
            stack.append((1, root_node))

            while stack:
                level, node = stack.pop()
                if node.title != 'root':
                    hashes = '#' * level
                    fp.write('%s %s\n\n' % (hashes, node.title))
                    if node.content:
                        # Strip any leading and trailing newlines.
                        fp.write(node.content.strip('\n') + '\n\n')

                for child in reversed(node.children):
                    stack.append((level+1, child))
Example #12
0
def main():

    session = create_session(database='CtyOD')
    engine = session.get_bind()
    CtyOD.metadata.drop_all(engine)
    CtyOD.metadata.create_all(engine)
    field_name = [c.name for c in CtyOD.__table__.columns]

    load_string = (
        "LOAD DATA LOCAL INFILE 'ck/{!s}'"
        " INTO TABLE ctyodp4"
        " FIELDS TERMINATED BY ',' LINES TERMINATED BY '\\r\\n'"
        " IGNORE 1 LINES;"
        )

    base_url = "http://cta.ornl.gov/transnet/"
    for idx in range(1, 7):
        
        csv_file = Path('CtyODp4-{}.csv'.format(idx)) 
        archive = Path('ck') / csv_file

        if not archive.exists():
            url = base_url + str(csv_file.with_suffix(".zip"))
            request = Request(url)
            with urlopen(request) as io:
                archive = ZipFile(BytesIO(io.read()))
            archive.extractall()
            
        session.execute(load_string.format(csv_file))
        session.commit()

    session.close()
Example #13
0
def extract_verb(args, file: Path):
    if args.ignore_extracted and file.with_suffix('.png').exists():
        return

    with file.open('rb') as input_file:
        header = input_file.read(6)
        if header != b'mrgd00':
            return

        logging.info('Extracting from ' + file.name)
        logging.debug('header: {0}'.format(header.decode('ASCII')))

        number_of_entries, = struct.unpack('<H', input_file.read(2))
        logging.debug('found {0} entries'.format(number_of_entries))
        if not number_of_entries:
            return

        entries_descriptors = []
        for i in range(number_of_entries):
            sector_offset, offset, sector_size_upper_boundary, size = struct.unpack('<HHHH', input_file.read(8))
            entries_descriptors.append(
                ArchiveEntry(sector_offset, offset, sector_size_upper_boundary, size, number_of_entries))

        if args.bin:
            extract_bin(file, input_file, entries_descriptors, args.notmzx)
        else:
            MzpFile(file, input_file, entries_descriptors)
Example #14
0
def hdf2video(data,imgh5,outfn,clim):
    outfn = Path(outfn).expanduser()

    import cv2
    try:
        from cv2.cv import FOURCC as fourcc #Windows needs from cv2.cv
    except ImportError:
        from cv2 import VideoWriter_fourcc as fourcc

    outfn = outfn.with_suffix('.ogv')
    cc4 = fourcc(*'THEO')
    # we use isColor=True because some codecs have trouble with grayscale
    hv = cv2.VideoWriter(str(outfn),cc4, fps=33,
                         frameSize=data.shape[1:][::-1],  #frameSize needs col,row
                         isColor=True) #right now we're only using grayscale
    if not hv.isOpened():
        raise TypeError('trouble starting video file')

    for d in data:
        #RAM usage explodes if scaling all at once on GB class file
    #for d in bytescale(data,1000,4000):
    #for d in sixteen2eight(data,(1000,4000)):
        hv.write(gray2rgb(bytescale(d,clim[0],clim[1])))

    hv.release()
Example #15
0
    def runtest(self):
        scss_file = Path(str(self.fspath))
        css_file = scss_file.with_suffix('.css')

        with css_file.open('rb') as fh:
            # Output is Unicode, so decode this here
            expected = fh.read().decode('utf8')

        scss.config.STATIC_ROOT = str(scss_file.parent / 'static')

        search_path = []
        include = scss_file.parent / 'include'
        if include.exists():
            search_path.append(include)
        search_path.append(scss_file.parent)

        actual = compile_file(
            scss_file,
            output_style='expanded',
            search_path=search_path,
            extensions=[
                CoreExtension,
                ExtraExtension,
                FontsExtension,
                CompassExtension,
            ],
        )

        # Normalize leading and trailing newlines
        actual = actual.strip('\n')
        expected = expected.strip('\n')

        assert expected == actual
Example #16
0
def write_template_desc(video_file: str):
    video_path = Path(video_file)
    if not video_path.exists():
        print("File not found: {}".format(video_path))

    desc_path = video_path.with_suffix(_DESCRIPTOR_SUFFIX)
    if desc_path.exists():
        print("Error. Desc file already exists: {}".format(desc_path))

    json_str = r'''{
    "title": "{%TITLE%}",
    "site": "Unknown",
    "duration": "00:00:00",
    "resolution": 1920,
    "group": "Inbox",
    "actors": ["Unknown"],
    "timeStamps": [
        {"ts":"00:10:00", "name":"?"},
        {"ts":"00:20:00", "name":"?"}
    ]
}'''

    json_str = json_str.replace("{%TITLE%}",video_path.name)

    fp = desc_path.open(mode="w")
    try:
        fp.write(json_str)
    finally:
        fp.close()
Example #17
0
def plotfilt(b: np.ndarray, fs: int, ofn: Path = None):
    if fs is None:
        fs = 1  # normalized freq

    L = b.size

    fg, axs = subplots(2, 1, sharex=False)
    freq, response = signal.freqz(b)
    response_dB = 20*np.log10(abs(response))
    if response_dB.max() > 0:
        logging.error('filter may be unstable')

    axs[0].plot(freq*fs/(2*np.pi), response_dB)
    axs[0].set_title(f'filter response  {L} taps')
    axs[0].set_ylim((-100, None))
    axs[0].set_ylabel('|H| [db]')
    axs[0].set_xlabel('frequency [Hz]')

    t = np.arange(0, L/fs, 1/fs)
    axs[1].plot(t, b)
    axs[1].set_xlabel('time [sec]')
    axs[1].set_title('impulse response')
    axs[1].set_ylabel('amplitude')
    axs[1].autoscale(True, tight=True)

    fg.tight_layout()

    if ofn:
        ofn = Path(ofn).expanduser()
        ofn = ofn.with_suffix('.png')
        print('writing', ofn)
        fg.savefig(str(ofn), dpi=100, bbox_inches='tight')
def sitemap(suffix='.md'):
    # ensure we are in the ROOT_DIR
    os.chdir(ROOT_DIR)
    multilang = ['en/', 'zh-cn/', 'zh-tw/']
    pages = []
    raw_bytes = check_output('scripts/gitls.sh')
    # ignore last blank string
    raw_strs = raw_bytes.decode("utf-8").split('\n')[:-1]
    for raw_str in raw_strs:
        date, raw_f = raw_str.split(' ')
        for lang in multilang:
            if raw_f.startswith(lang) and raw_f.endswith(suffix):
                if raw_f == lang + 'SUMMARY.md':
                    continue
                p = Path(raw_f)
                # rename README with index
                if p.name == 'README.md':
                    p = p.with_name('index.md')
                p = p.with_suffix('.html')
                fn = p.as_posix().lower()
                page = {}
                page['lastmod'] = date
                page['url'] = fn
                pages.append(page)
    root_url = 'http://algorithm.yuanbin.me'
    templates = os.path.join(BASE_DIR, 'sitemap' + os.sep + 'templates')
    env = Environment(loader=FileSystemLoader(templates))
    template = env.get_template('sitemap.xml')
    sitemap_xml = template.render(root_url=root_url, pages=pages, freq='daily')
    sitemap_fn = os.path.join(ROOT_DIR, 'sitemap.xml')
    with open(sitemap_fn, 'w') as sf:
        sf.write(sitemap_xml)
Example #19
0
    def __init__(self):
        """ Reads county shapefiles 

        Shapes is list of (lat/lons) making up the shapes.

        info is list of info about each shape.
        """

        mm = basemap.Basemap()

        gis_file = Path(basemap.basemap_datadir) / 'UScounties'
        if not gis_file.with_suffix('.shp').exists():
            msg = ('Cannot find {}.shp\nYou can install it with'
                    '`conda install -c conda-forge basemap-data-hires`').format
            raise IOError(msg(str(gis_file)))

        county_info = mm.readshapefile(
            str(gis_file), 'counties',
            default_encoding='latin-1', drawbounds=False, linewidth=0.1)

        self.shapes = mm.counties

        self.info = mm.counties_info

        self._build_shape_lookup()

        self.cmap = pyplot.get_cmap('rainbow')
Example #20
0
def main(source, dest):
    """Rename a Git repository and update its remote accordingly."""
    basicConfig(level=DEBUG)
    try:
        repo = Repo(source)
    except OSError as error:
        logger.exception('Error:')
        exit(1)
    else:
        dest = Path(dest)
        try:
            dest = dest.with_suffix('.git')
        except ValueError:
            logger.exception('Error:')
            exit(1)
        logger.info('Using dest: %s', dest)

        remote = repo.remote()
        logger.debug('Old URL: %s', remote.url)
        origin = Path(remote.url)
        logger.debug('Parent: %s', origin.parent)

        new = origin.parent / dest
        logger.info('Using URL: %s', new)

        conf = remote.config_writer
        conf.set('url', str(new))
        conf.release()

        Path(source).rename(dest)
        exit(0)
Example #21
0
    def __init__(self, name: str, flag='r+b'):
        """Initialize a Save object.

        The name argument will automatically be preceded by the `home` module variable. It will also automatically be
        followed by the file extension '.sve'.

        :param name: the name of the file to save to
        :param flag: the flags to use when opening the file (see `open`)

        """
        path = Path(name)
        path = path.with_suffix('.sve')
        if not path.exists():
            self._existed = False
            file = path.open('w')
            file.close()
        else:
            self._existed = True
        self._path = path
        self._flag = flag
        self._file = path.open(flag, buffering=0)
        self._is_closed = False

        # set current offset for registering
        self._offset = 0
        self._binding = {}
        self._cache = {}

        # implement thread safe locking
        self._lock = Lock()
def save_figure(fig, destination, extra_artist=None):

    from pathlib import Path
    name = Path(destination)

    if extra_artist is not None:
        extra_args = {"extra_artists": (extra_artist, ),
                      "bbox_inches": "tight"}
    else:
        extra_args = {}

    for extension in [".pdf", ".svgz", ".png"]:
        if extension != "png":
            fig.savefig(str(name.with_suffix(extension)), **extra_args)
        else:
            fig.savefig(str(name.with_suffix(extension)), dpi=300,
                        **extra_args)
def save_figure(fig: plt.Figure, destination: str,
                extra_artist: mpl.artist.Artist=None):

    name = Path(destination)

    if extra_artist is not None:
        extra_args = {"extra_artists": (extra_artist, ),
                      "bbox_inches": "tight"}
    else:
        extra_args = {}

    for extension in [".pdf", ".svgz", ".png"]:
        if extension != "png":
            fig.savefig(str(name.with_suffix(extension)), **extra_args)
        else:
            fig.savefig(str(name.with_suffix(extension)), dpi=300,
                        **extra_args)
Example #24
0
def load_local_configuration(configurable):
    """.configure() *configurable* with local configuration."""
    # This is a bit ugly and should be something upstream invoke should be able to do by itself.
    for path in ['/etc/qabel', '~/.qabel', Path(__file__).with_name('qabel')]:
        path = Path(path).expanduser()
        for suffix in ['.yaml', '.py', '.json']:
            path_with_suffix = path.with_suffix(suffix)
            if try_load(path_with_suffix, configurable):
                print('Picked up extra configuration from', path)
Example #25
0
def save_report(directory, source_path, report):
    rel_path = Path(str(source_path).replace('../', ''))
    path = Path(directory).joinpath(rel_path.with_suffix('.html'))
    try:
        path.parent.mkdir(parents=True)
    except FileExistsError:
        pass
    with path.open('w') as fp:
        fp.write(report)
Example #26
0
    def _check_ex_path(self, ex):
        from pathlib import Path
        file = Path(ex)
        if not file.suffix:
            file = file.with_suffix('.py')
        file = (Path('./examples') / file).resolve()

        assert file.is_file(), ('Invalid example %s' % file)
        return file
Example #27
0
 def _parse_filename(cls, filename, ext=''):
     p = Path(filename)
     p = p.resolve()
     if ext == '':
         ext = p.suffix
     # If extension not part of filename
     ext = '.'+(ext.strip('.'))
     if p.suffix != ext:
         p = p.with_suffix(ext)
     return p
Example #28
0
def plotTexStats(data,texfn,debugon,texChanged):
    texfn = Path(texfn).expanduser()

    imgfn = texfn.with_suffix('.png')

    daten=[dt.fromtimestamp(ts) for ts in data[:,0]]
    #if debugon: print(daten)
    print(f"{texfn} first / last mod time {daten[0].strftime('%Y-%m-%dT%H:%M:%S')} / {daten[-1].strftime('%Y-%m-%dT%H:%M:%S')}")


    daten=md.date2num(daten)

    fg = plt.figure()
    ax1 = fg.gca()
    ax2 = ax1.twinx()


    ax1.set_xlabel("Date")
    ax1.set_ylabel('Word Count')
    ax2.set_ylabel('Equation, Figure, Table Count')

    ax1.plot(daten,data[:,1], linestyle='-', marker='.', color='b',label='Words')

    ax2.plot(daten,data[:,3], linestyle='-', marker='.', color='r',label='Equations')
    ax2.plot(daten,data[:,2], linestyle='-', marker='.', color='g', label='Figures + Tables')
    xLo = data[0,0]-86400 #set lower xlim to be 1 day prior to earliest data (fixes one data point corner case)
    xHi = data[-1,0]+86400 #set lower xlim to be 1 day after earliest data (fixes one data point corner case)
    ax1.set_xlim( dt.fromtimestamp(xLo), dt.fromtimestamp(xHi))

    if (xHi-xLo < 3*86400): xFmt = '%Y-%m-%dT%H'
    else: xFmt = '%Y-%m-%d'

    ax1.xaxis.set_major_formatter(md.DateFormatter(xFmt))

    for tl in ax1.get_yticklabels(): tl.set_color('b')

    for tl in ax2.get_yticklabels(): tl.set_color('r')

    #ax1.legend()
    ax2.legend(loc=2)
    ax1.set_title("Dissertation Progress")
    fg.autofmt_xdate()

    if texChanged:
        if imgfn.is_file(): #data file already exists
            imgModTime = dt.fromtimestamp(imgfn.stat().st_mtime).strftime('%Y-%m-%dT%H-%M-%S')
            oldFN = f'{imgfn}-{imgModTime}.png'
            if debugon:
                print(f"Moving {imgfn} to {oldFN}")
            shutil.move(imgfn,oldFN)

        if debugon:
            print(f'saving updated figure {imgfn}')
        fg.savefig(str(imgfn),bbox_inches='tight')
    plt.show()
Example #29
0
    def render(self, filename_root=None, file=None):
        """Render the document repeatedly until the output no longer changes due
        to cross-references that need some iterations to converge."""
        self.error = False
        filename_root = Path(filename_root) if filename_root else None
        if filename_root and file is None:
            extension = self.backend_document.extension
            filename = filename_root.with_suffix(extension)
            file = filename.open('wb')
        elif file and filename_root is None:
            filename = getattr(file, 'name', None)
        else:
            raise ValueError("You need to specify either 'filename_root' or "
                             "'file'.")

        def has_converged(part_page_counts):
            """Return `True` if the last rendering iteration converged to a
            stable result.

            Practically, this tests whether the total number of pages and page
            references to document elements have changed since the previous
            rendering iteration."""
            nonlocal prev_number_of_pages, prev_page_references
            return (part_page_counts == prev_number_of_pages and
                    self.page_references == prev_page_references)

        fake_container = FakeContainer(self)
        try:
            self.document_tree.build_document(fake_container)
            (prev_number_of_pages,
             prev_page_references) = self._load_cache(filename_root)

            self.part_page_counts = prev_number_of_pages
            self.prepare(fake_container)
            self.page_elements.clear()
            self.page_references = prev_page_references.copy()
            self.part_page_counts = self._render_pages()
            while not has_converged(self.part_page_counts):
                prev_number_of_pages = self.part_page_counts
                prev_page_references = self.page_references.copy()
                print('Not yet converged, rendering again...')
                del self.backend_document
                self.backend_document = self.backend.Document(self.CREATOR)
                self.part_page_counts = self._render_pages()
            self.create_outlines()
            if filename:
                self._save_cache(filename_root, self.part_page_counts,
                                 self.page_references)
                self.style_log.write_log(filename_root)
                print('Writing output: {}'.format(filename))
            self.backend_document.write(file)
        finally:
            if filename_root:
                file.close()
        return not self.error
Example #30
0
class QUBE(object):
    def __init__(self, fname):
        self.path = Path(fname)
        # file management
        self.file_id = self.path.stem
        self.label_fname = self.path.with_suffix(".LBL")
        self.data_fname = self.path.with_suffix(".DAT")

        # read the data
        self.data1D = (np.fromfile(str(self.data_fname), ">H")).astype(np.uint16)

        # label stuff
        self.label = pvl.load(str(self.label_fname))
        self.cubelabel = self.label["QUBE"]
        self.LINE_BIN = self.cubelabel["LINE_BIN"]
        self.BAND_BIN = self.cubelabel["BAND_BIN"]
        self.shape = tuple(self.cubelabel["CORE_ITEMS"])
        self.line_range = (
            self.cubelabel["UL_CORNER_LINE"],
            self.cubelabel["LR_CORNER_LINE"] + 1,  # for numpy slicing + 1
        )
        self.band_range = (
            self.cubelabel["UL_CORNER_BAND"],
            self.cubelabel["LR_CORNER_BAND"] + 1,  # for numpy slicing + 1
        )

        # reshape the data with infos from label
        self.data = self.data1D.reshape(self.shape, order="F")[
            slice(*self.band_range), slice(*self.line_range), :
        ]

    @property
    def n_integrations(self):
        return self.shape[-1]

    @property
    def waves(self):
        return np.linspace(self.wave_min, self.wave_max, self.shape[0])

    @property
    def xarray(self):
        return xr.DataArray(self.data)
def spoiler_path_for(save_spoiler: bool, output_file: Path) -> Optional[Path]:
    if save_spoiler:
        return output_file.with_suffix(f".{LayoutDescription.file_extension()}")
    else:
        return None
Example #32
0
def img_name_to_int(img: Path):
    return int(img.with_suffix("").name)
Example #33
0
class Mesh:
    def __init__(self,
                 file,
                 hold_history=False,
                 vs=None,
                 faces=None,
                 device='cpu',
                 gfmm=True):
        if file is None:
            return
        self.filename = Path(file)
        self.vs = self.v_mask = self.edge_areas = None
        self.edges = self.gemm_edges = self.sides = None
        self.device = device
        if vs is not None and faces is not None:
            self.vs, self.faces = vs.cpu().numpy(), faces.cpu().numpy()
            self.scale, self.translations = 1.0, np.zeros(3, )
        else:
            self.vs, self.faces = load_obj(file)
            self.normalize_unit_bb()
        self.vs_in = copy.deepcopy(self.vs)
        self.v_mask = np.ones(len(self.vs), dtype=bool)
        self.build_gemm()
        self.history_data = None
        if hold_history:
            self.init_history()
        if gfmm:
            self.gfmm = self.build_gfmm()  #TODO get rid of this DS
        else:
            self.gfmm = None
        if type(self.vs) is np.ndarray:
            self.vs = torch.from_numpy(self.vs)
        if type(self.faces) is np.ndarray:
            self.faces = torch.from_numpy(self.faces)
        self.vs = self.vs.to(self.device)
        self.faces = self.faces.to(self.device).long()
        self.area, self.normals = self.face_areas_normals(self.vs, self.faces)

    def build_gemm(self):
        self.ve = [[] for _ in self.vs]
        self.vei = [[] for _ in self.vs]
        edge_nb = []
        sides = []
        edge2key = dict()
        edges = []
        edges_count = 0
        nb_count = []
        for face_id, face in enumerate(self.faces):
            faces_edges = []
            for i in range(3):
                cur_edge = (face[i], face[(i + 1) % 3])
                faces_edges.append(cur_edge)
            for idx, edge in enumerate(faces_edges):
                edge = tuple(sorted(list(edge)))
                faces_edges[idx] = edge
                if edge not in edge2key:
                    edge2key[edge] = edges_count
                    edges.append(list(edge))
                    edge_nb.append([-1, -1, -1, -1])
                    sides.append([-1, -1, -1, -1])
                    self.ve[edge[0]].append(edges_count)
                    self.ve[edge[1]].append(edges_count)
                    self.vei[edge[0]].append(0)
                    self.vei[edge[1]].append(1)
                    nb_count.append(0)
                    edges_count += 1
            for idx, edge in enumerate(faces_edges):
                edge_key = edge2key[edge]
                edge_nb[edge_key][nb_count[edge_key]] = edge2key[faces_edges[
                    (idx + 1) % 3]]
                edge_nb[edge_key][nb_count[edge_key] +
                                  1] = edge2key[faces_edges[(idx + 2) % 3]]
                nb_count[edge_key] += 2
            for idx, edge in enumerate(faces_edges):
                edge_key = edge2key[edge]
                sides[edge_key][nb_count[edge_key] -
                                2] = nb_count[edge2key[faces_edges[(idx + 1) %
                                                                   3]]] - 1
                sides[edge_key][nb_count[edge_key] -
                                1] = nb_count[edge2key[faces_edges[(idx + 2) %
                                                                   3]]] - 2
        self.edges = np.array(edges, dtype=np.int32)
        self.gemm_edges = np.array(edge_nb, dtype=np.int64)
        self.sides = np.array(sides, dtype=np.int64)
        self.edges_count = edges_count
        # lots of DS for loss
        self.nvs, self.nvsi, self.nvsin = [], [], []
        for i, e in enumerate(self.ve):
            self.nvs.append(len(e))
            self.nvsi.append(len(e) * [i])
            self.nvsin.append(list(range(len(e))))
        self.vei = torch.from_numpy(
            np.concatenate(np.array(self.vei)).ravel()).to(self.device).long()
        self.nvsi = torch.Tensor(np.concatenate(np.array(
            self.nvsi)).ravel()).to(self.device).long()
        self.nvsin = torch.from_numpy(
            np.concatenate(np.array(self.nvsin)).ravel()).to(
                self.device).long()
        ve_in = copy.deepcopy(self.ve)
        self.ve_in = torch.from_numpy(np.concatenate(
            np.array(ve_in)).ravel()).to(self.device).long()
        self.max_nvs = max(self.nvs)
        self.nvs = torch.Tensor(self.nvs).to(self.device).float()
        self.edge2key = edge2key

    def build_ef(self):
        edge_faces = dict()
        if type(self.faces) == torch.Tensor:
            faces = self.faces.cpu().numpy()
        else:
            faces = self.faces
        for face_id, face in enumerate(faces):
            for i in range(3):
                edge = tuple(sorted([face[i], face[(i + 1) % 3]]))
                if edge not in edge_faces:
                    edge_faces[edge] = []
                edge_faces[edge].append(face_id)
        for k in edge_faces.keys():
            if len(edge_faces[k]) < 2:
                edge_faces[k].append(edge_faces[k][0])
        return edge_faces

    def build_gfmm(self):
        edge_faces = self.build_ef()
        gfmm = []
        if type(self.faces) == torch.Tensor:
            faces = self.faces.cpu().numpy()
        else:
            faces = self.faces
        for face_id, face in enumerate(faces):
            neighbors = [face_id]
            for i in range(3):
                edge = tuple(sorted([face[i], face[(i + 1) % 3]]))
                neighbors.extend(list(set(edge_faces[edge]) - set([face_id])))
            gfmm.append(neighbors)
        return torch.Tensor(gfmm).long().to(self.device)

    def normalize_unit_bb(self):
        """
        normalizes to unit bounding box and translates to center
        if no
        :param verts: new verts
        """
        cache_norm_file = self.filename.with_suffix('.npz')
        if not cache_norm_file.exists():
            scale = max(
                [self.vs[:, i].max() - self.vs[:, i].min() for i in range(3)])
            scaled_vs = self.vs / scale
            target_mins = [
                (scaled_vs[:, i].max() - scaled_vs[:, i].min()) / -2.0
                for i in range(3)
            ]
            translations = [(target_mins[i] - scaled_vs[:, i].min())
                            for i in range(3)]
            np.savez_compressed(cache_norm_file,
                                scale=scale,
                                translations=translations)
        # load from the cache
        cached_data = np.load(cache_norm_file,
                              encoding='latin1',
                              allow_pickle=True)
        self.scale, self.translations = cached_data['scale'], cached_data[
            'translations']
        self.vs /= self.scale
        self.vs += self.translations[None, :]

    def discrete_project(self, pc: torch.Tensor, thres=0.9, cpu=False):
        with torch.no_grad():
            device = torch.device('cpu') if cpu else self.device
            pc = pc.double()
            if isinstance(self, Mesh):
                mid_points = self.vs[self.faces].mean(dim=1)
                normals = self.normals
            else:
                mid_points = self[:, :3]
                normals = self[:, 3:]
            pk12 = knn_points(mid_points[:, :3].unsqueeze(0),
                              pc[:, :, :3],
                              K=3).idx[0]
            pk21 = knn_points(pc[:, :, :3],
                              mid_points[:, :3].unsqueeze(0),
                              K=3).idx[0]
            loop = pk21[pk12].view(pk12.shape[0], -1)
            knn_mask = (loop == torch.arange(
                0, pk12.shape[0], device=self.device)[:, None]).sum(dim=1) > 0
            mid_points = mid_points.to(device)
            pc = pc[0].to(device)
            normals = normals.to(device)[~knn_mask, :]
            masked_mid_points = mid_points[~knn_mask, :]
            displacement = masked_mid_points[:, None, :] - pc[:, :3]
            torch.cuda.empty_cache()
            distance = displacement.norm(dim=-1)
            mask = (torch.abs(
                torch.sum((displacement / distance[:, :, None]) *
                          normals[:, None, :],
                          dim=-1)) > thres)
            if pc.shape[-1] == 6:
                pc_normals = pc[:, 3:]
                normals_correlation = torch.sum(normals[:, None, :] *
                                                pc_normals,
                                                dim=-1)
                mask = mask * (normals_correlation > 0)
            torch.cuda.empty_cache()
            distance[~mask] += float('inf')
            min, argmin = distance.min(dim=-1)

            pc_per_face_masked = pc[argmin, :].clone()
            pc_per_face_masked[min == float('inf'), :] = float('nan')
            pc_per_face = torch.zeros(mid_points.shape[0], 6).\
                type(pc_per_face_masked.dtype).to(pc_per_face_masked.device)
            pc_per_face[~knn_mask, :pc.shape[-1]] = pc_per_face_masked
            pc_per_face[knn_mask, :] = float('nan')

            # clean up
            del knn_mask
        return pc_per_face.to(
            self.device), (pc_per_face[:, 0] == pc_per_face[:, 0]).to(device)

    @staticmethod
    def face_areas_normals(vs, faces):
        if type(vs) is not torch.Tensor:
            vs = torch.from_numpy(vs)
        if type(faces) is not torch.Tensor:
            faces = torch.from_numpy(faces)
        face_normals = torch.cross(vs[faces[:, 1]] - vs[faces[:, 0]],
                                   vs[faces[:, 2]] - vs[faces[:, 1]])

        face_areas = torch.norm(face_normals, dim=1)
        face_normals = face_normals / face_areas[:, None]
        face_areas = 0.5 * face_areas
        face_areas = 0.5 * face_areas
        return face_areas, face_normals

    def update_verts(self, verts):
        """
        update verts positions only, same connectivity
        :param verts: new verts
        """
        self.vs = verts

    def deep_copy(self):  #TODO see if can do this better
        new_mesh = Mesh(file=None)
        types = [np.ndarray, torch.Tensor, dict, list, str, int, bool, float]
        for attr in self.__dir__():
            if attr == '__dict__':
                continue

            val = getattr(self, attr)
            if type(val) == types[0]:
                new_mesh.__setattr__(attr, val.copy())
            elif type(val) == types[1]:
                new_mesh.__setattr__(attr, val.clone())
            elif type(val) in types[2:4]:
                new_mesh.__setattr__(attr, pickle.loads(pickle.dumps(val, -1)))
            elif type(val) in types[4:]:
                new_mesh.__setattr__(attr, val)

        return new_mesh

    def merge_vertices(self, edge_id):
        self.remove_edge(edge_id)
        edge = self.edges[edge_id]
        v_a = self.vs[edge[0]]
        v_b = self.vs[edge[1]]
        # update pA
        v_a.__iadd__(v_b)
        v_a.__itruediv__(2)
        self.v_mask[edge[1]] = False
        mask = self.edges == edge[1]
        self.ve[edge[0]].extend(self.ve[edge[1]])
        self.edges[mask] = edge[0]

    def remove_vertex(self, v):
        self.v_mask[v] = False

    def remove_edge(self, edge_id):
        vs = self.edges[edge_id]
        for v in vs:
            if edge_id not in self.ve[v]:
                print(self.ve[v])
                print(self.filename)
            self.ve[v].remove(edge_id)

    def clean(self, edges_mask, groups):
        edges_mask = edges_mask.astype(bool)
        torch_mask = torch.from_numpy(edges_mask.copy())
        self.gemm_edges = self.gemm_edges[edges_mask]
        self.edges = self.edges[edges_mask]
        self.sides = self.sides[edges_mask]
        new_ve = []
        edges_mask = np.concatenate([edges_mask, [False]])
        new_indices = np.zeros(edges_mask.shape[0], dtype=np.int32)
        new_indices[-1] = -1
        new_indices[edges_mask] = np.arange(
            0,
            np.ma.where(edges_mask)[0].shape[0])
        self.gemm_edges[:, :] = new_indices[self.gemm_edges[:, :]]
        for v_index, ve in enumerate(self.ve):
            update_ve = []
            # if self.v_mask[v_index]:
            for e in ve:
                update_ve.append(new_indices[e])
            new_ve.append(update_ve)
        self.ve = new_ve
        self.__clean_history(groups, torch_mask)

    def export(self, file):
        vs = self.vs.cpu().clone()
        vs -= self.translations[None, :]
        vs *= self.scale
        export(file, vs, self.faces)

    def init_history(self):
        self.history_data = {
            'groups': [],
            'gemm_edges': [self.gemm_edges.copy()],
            'occurrences': [],
            'edges_count': [self.edges_count],
        }

    def get_groups(self):
        return self.history_data['groups'].pop()

    def get_occurrences(self):
        return self.history_data['occurrences'].pop()

    def __clean_history(self, groups, pool_mask):
        if self.history_data is not None:
            self.history_data['occurrences'].append(groups.get_occurrences())
            self.history_data['groups'].append(groups.get_groups(pool_mask))
            self.history_data['gemm_edges'].append(self.gemm_edges.copy())
            self.history_data['edges_count'].append(self.edges_count)

    def unroll_gemm(self):
        self.history_data['gemm_edges'].pop()
        self.gemm_edges = self.history_data['gemm_edges'][-1]
        self.history_data['edges_count'].pop()
        self.edges_count = self.history_data['edges_count'][-1]

    @staticmethod
    def from_tensor(mesh, vs, faces, gfmm=True):
        mesh = Mesh(file=mesh.filename,
                    vs=vs,
                    faces=faces,
                    device=mesh.device,
                    hold_history=True,
                    gfmm=gfmm)
        return mesh

    def submesh(self, vs_index):
        return PartMesh.create_submesh(vs_index, self)
Example #34
0
# absolute path
Path("/home/luke")

# current folder
Path()

# relative
path = Path("../classes/ecommerce/__init__.py")

# addition

Path() / Path("ecommerce")
Path() / "ecommerce"

# user home path
Path.home()

path.exists()
path.is_file()
path.is_dir()

print(path.name)
print(path.stem)
print(path.suffix)
print(path.parent)

path = path.with_suffix(".txt")

print(path.absolute())
Example #35
0
def init_logger(**kwargs):
    args_str = ' '.join([f"{k}={v}" for (k, v) in kwargs.items()])
    formatter = logging.Formatter('%(asctime)s [%(levelname)-5s] %(message)s')
    # stream handler
    chdr = logging.StreamHandler()
    chdr.setLevel(logging.DEBUG)
    chdr.setFormatter(formatter)
    logger.addHandler(chdr)

    log_dir = kwargs.pop("log_dir", "./logs")
    rank = kwargs.pop("rank", None)

    # file handler
    if "log_file" in kwargs:
        log_file = kwargs.pop("log_file")
        log_path = Path(log_dir, log_file).resolve()
        if rank is not None:
            log_path = log_path.with_suffix(f".{rank}{log_path.suffix}")
        Path.mkdir(log_path.parent, parents=True, exist_ok=True)
        fhdr = logging.FileHandler(log_path)
        fhdr.setLevel(logging.DEBUG)
        fhdr.setFormatter(formatter)
        logger.addHandler(fhdr)

    logger.info(f"begins logging to file: {str(log_path)}")

    if "slack" in kwargs and kwargs["slack"]:
        try:
            env = str(Path(log_dir).name)
            if rank is not None:
                env += f"_rank{rank}"
            shdr = SlackClientHandler(env=env)
            shdr.setLevel(logging.INFO)
            shdr.setFormatter(formatter)
            logger.addHandler(shdr)
        except:
            logger.error("error to setup slackclient")
            raise

    # prepare visdom
    logger.visdom = None
    if "visdom" in kwargs and kwargs["visdom"]:
        env = str(Path(log_dir).name)
        log_path = Path(log_dir, "visdom.log").resolve()
        visdom_host = kwargs.pop("visdom_host", "127.0.0.1")
        visdom_port = kwargs.pop("visdom_port", 8097)
        try:
            logger.visdom = VisdomLogger(host=visdom_host,
                                         port=visdom_port,
                                         env=env,
                                         log_path=log_path)
        except:
            logger.error("error to use visdom")
            raise

    # prepare tensorboard
    logger.tensorboard = None
    if "tensorboard" in kwargs and kwargs["tensorboard"]:
        env = str(Path(log_dir, 'tensorboard').resolve)
        try:
            logger.tensorboard = TensorboardLogger(env)
        except:
            logger.error("error to use tensorboard")

    # print version and args
    logger.info(f"PyTorch version: {torch.__version__}")
    logger.debug(f"command-line options: {' '.join(sys.argv)}")
    logger.info(f"args: {args_str}")
Example #36
0
class Profile:
    def __init__(self, enabled=True, profile_file=None, **kwargs):
        # type: (bool, Union[Path, str], **Any) -> None

        if profile_file is None or isinstance(profile_file, Path):
            self._profile_file = profile_file
        else:
            self._profile_file = Path(profile_file)

        self._enabled = enabled
        self._kwargs = kwargs
        self._profile = None  # type: Optional[cProfile.Profile]

    def __enter__(self):
        # type: () -> Profile
        if self._enabled:
            cmk.utils.log.logger.info("Recording profile")
            self._profile = cProfile.Profile(**self._kwargs)
            self._profile.enable()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        # type: (Optional[Type[BaseException]], Optional[BaseException], Optional[TracebackType]) -> None
        if not self._enabled:
            return

        if not self._profile:
            return

        self._profile.disable()

        if not self._profile_file:
            self._profile.print_stats()
            return

        self._write_profile()
        self._write_dump_script()

    def _write_profile(self):
        # type: () -> None
        if not self._profile:
            return
        self._profile.dump_stats(str(self._profile_file))
        cmk.utils.log.logger.info("Created profile file: %s",
                                  self._profile_file)

    def _write_dump_script(self):
        # type: () -> None
        if not self._profile_file:
            return

        script_path = self._profile_file.with_suffix(".py")
        with script_path.open("w", encoding="utf-8") as f:
            f.write(u"#!/usr/bin/env python\n"
                    "import pstats\n"
                    "stats = pstats.Stats(\"%s\")\n"
                    "stats.sort_stats('time').print_stats()\n" %
                    self._profile_file)
        script_path.chmod(0o755)
        cmk.utils.log.logger.info("Created profile dump script: %s",
                                  script_path)
Example #37
0
 def path(self, destination: Path):
     return destination.with_suffix(".zip")
Example #38
0
    def download_raw_partial(self, url_cbin, url_ch, first_chunk=0, last_chunk=0):
        assert url_cbin.endswith('.cbin')
        assert url_ch.endswith('.ch')

        relpath = Path(url_cbin.replace(self._par.HTTP_DATA_SERVER, '.')).parents[0]
        target_dir = Path(self._get_cache_dir(None), relpath)
        Path(target_dir).mkdir(parents=True, exist_ok=True)

        # First, download the .ch file.
        ch_local_path = Path(wc.http_download_file(
            url_ch,
            username=self._par.HTTP_DATA_SERVER_LOGIN,
            password=self._par.HTTP_DATA_SERVER_PWD,
            cache_dir=target_dir, clobber=True, offline=False, return_md5=False))
        ch_local_path = alfio.remove_uuid_file(ch_local_path)
        ch_local_path_renamed = ch_local_path.with_suffix('.chopped.ch')
        ch_local_path.rename(ch_local_path_renamed)
        assert ch_local_path_renamed.exists()
        ch_local_path = ch_local_path_renamed

        # Load the .ch file.
        with open(ch_local_path, 'r') as f:
            cmeta = json.load(f)

        # Get the first byte and number of bytes to download.
        total_n_samples = cmeta['chunk_bounds'][-1]
        i0 = cmeta['chunk_bounds'][first_chunk]
        cmeta['chunk_bounds'] = cmeta['chunk_bounds'][first_chunk:last_chunk + 2]
        cmeta['chunk_bounds'] = [_ - i0 for _ in cmeta['chunk_bounds']]
        assert len(cmeta['chunk_bounds']) >= 2
        assert cmeta['chunk_bounds'][0] == 0

        first_byte = cmeta['chunk_offsets'][first_chunk]
        cmeta['chunk_offsets'] = cmeta['chunk_offsets'][first_chunk:last_chunk + 2]
        cmeta['chunk_offsets'] = [_ - first_byte for _ in cmeta['chunk_offsets']]
        assert len(cmeta['chunk_offsets']) >= 2
        assert cmeta['chunk_offsets'][0] == 0
        n_bytes = cmeta['chunk_offsets'][-1]
        assert n_bytes > 0

        # Save the chopped chunk bounds and ossets.
        cmeta['sha1_compressed'] = None
        cmeta['sha1_uncompressed'] = None
        cmeta['chopped'] = True
        cmeta['chopped_first_sample'] = i0
        cmeta['chopped_total_samples'] = total_n_samples
        with open(ch_local_path, 'w') as f:
            json.dump(cmeta, f, indent=2, sort_keys=True)

        # Download the requested chunks
        cbin_local_path = wc.http_download_file(
            url_cbin,
            username=self._par.HTTP_DATA_SERVER_LOGIN,
            password=self._par.HTTP_DATA_SERVER_PWD,
            cache_dir=target_dir, clobber=True, offline=False, return_md5=False,
            chunks=(first_byte, n_bytes))
        cbin_local_path = alfio.remove_uuid_file(cbin_local_path)
        cbin_local_path_renamed = cbin_local_path.with_suffix(
            '.chopped.cbin').with_suffix('.chopped.cbin')
        cbin_local_path.rename(cbin_local_path_renamed)
        assert cbin_local_path_renamed.exists()
        cbin_local_path = cbin_local_path_renamed

        import mtscomp
        reader = mtscomp.decompress(cbin_local_path, cmeta=ch_local_path)
        return reader
Example #39
0
location name.
'''

import sys
from pathlib import Path
import json
import pprint
import lxml.html
from contextlib import suppress

file = Path(sys.argv[1])
with open(file, 'r') as f:
	html = '\n'.join(f.readlines())

codes = {}
root = lxml.html.document_fromstring(html)
for row in root.xpath('//table/tr/td/table/tr'):
	cells = row.xpath('./td')
	
	with suppress(IndexError):
		code = cells[1].text_content()
		name = cells[2].text_content()
		try:
			codes[code] = name
# 			print(f'{code:>16}: {name}')
		except TypeError:
			pprint.pprint([code, name])

with open(file.with_suffix('.json'), 'w') as f:
	json.dump(codes, f)
Example #40
0
class ConfigFile(ConfigParser):
    """A version of ConfigParser which can easily save itself.

    The config will track whether any values change, and only resave
    if modified.
    get_val, get_bool, and get_int are modified to return defaults instead
    of erroring.
    """
    has_changed: bool
    filename: Optional[Path]
    _writer: Optional[AtomicWriter]

    def __init__(
        self,
        filename: Optional[str],
        *,
        in_conf_folder: bool=True,
        auto_load: bool=True,
    ) -> None:
        """Initialise the config file.

        `filename` is the name of the config file, in the `root` directory.
        If `auto_load` is true, this file will immediately be read and parsed.
        If in_conf_folder is set, The folder is relative to the 'config/'
        folder in the BEE2 folder.
        """
        super().__init__()

        self.has_changed = False

        if filename is not None:
            if in_conf_folder:
                self.filename = utils.conf_location('config') / filename
            else:
                self.filename = Path(filename)

            self._writer = AtomicWriter(self.filename)
            self.has_changed = False

            if auto_load:
                self.load()
        else:
            self.filename = self._writer = None

    def load(self) -> None:
        """Load config options from disk."""
        if self.filename is None:
            return

        try:
            with open(self.filename, 'r') as conf:
                self.read_file(conf)
        # If we fail, just continue - we just use the default values
        except FileNotFoundError:
            LOGGER.warning(
                'Config "{}" not found! Using defaults...',
                self.filename,
            )
        except (IOError, ParsingError):
            LOGGER.warning(
                'Config "{}" cannot be read! Using defaults...',
                self.filename,
                exc_info=True,
            )
            try:
                self.filename.replace(self.filename.with_suffix('.err.cfg'))
            except IOError:
                pass

        # We're not different to the file on disk..
        self.has_changed = False

    def save(self) -> None:
        """Write our values out to disk."""
        LOGGER.info('Saving changes in config "{}"!', self.filename)
        if self.filename is None or self._writer is None:
            raise ValueError('No filename provided!')

        with self._writer as conf:
            self.write(conf)
        self.has_changed = False

    def save_check(self) -> None:
        """Check to see if we have different values, and save if needed."""
        if self.has_changed:
            self.save()

    def set_defaults(self, def_settings: Mapping[str, Mapping[str, Any]]) -> None:
        """Set the default values if the settings file has no values defined."""
        for sect, values in def_settings.items():
            if sect not in self:
                self[sect] = {}
            for key, default in values.items():
                if key not in self[sect]:
                    self[sect][key] = str(default)
        self.save_check()

    def get_val(self, section: str, value: str, default: str) -> str:
        """Get the value in the specifed section.

        If either does not exist, set to the default and return it.
        """
        if section not in self:
            self[section] = {}
        if value in self[section]:
            return self[section][value]
        else:
            self.has_changed = True
            self[section][value] = default
            return default

    def __getitem__(self, section: str) -> SectionProxy:
        """Allows setting/getting config[section][value]."""
        try:
            return super().__getitem__(section)
        except KeyError:
            self[section] = {}
            return super().__getitem__(section)

    def getboolean(self, section: str, value: str, default: bool=False) -> bool:
        """Get the value in the specified section, coercing to a Boolean.

            If either does not exist, set to the default and return it.
            """
        if section not in self:
            self[section] = {}
        try:
            return super().getboolean(section, value)
        except (ValueError, NoOptionError):
            #  Invalid boolean, or not found
            self.has_changed = True
            self[section][value] = str(int(default))
            return default

    get_bool = getboolean

    def getint(self, section: str, value: str, default: int=0) -> int:
        """Get the value in the specified section, coercing to a Integer.

            If either does not exist, set to the default and return it.
            """
        if section not in self:
            self[section] = {}
        try:
            return super().getint(section, value)
        except (ValueError, NoOptionError):
            self.has_changed = True
            self[section][value] = str(int(default))
            return default

    get_int = getint

    def add_section(self, section: str) -> None:
        self.has_changed = True
        super().add_section(section)

    def remove_section(self, section: str) -> bool:
        self.has_changed = True
        return super().remove_section(section)

    def set(self, section: str, option: str, value: str) -> None:
        orig_val = self.get(section, option, fallback=None)
        value = str(value)
        if orig_val is None or orig_val != value:
            self.has_changed = True
            super().set(section, option, value)

    add_section.__doc__ = ConfigParser.add_section.__doc__
    remove_section.__doc__ = ConfigParser.remove_section.__doc__
    set.__doc__ = ConfigParser.set.__doc__
Example #41
0
    __version__ as sp_version,
    natural_time,
)
import asyncio


def coro(f: Any) -> Any:
    @wraps(f)
    def wrapper(*args: Any, **kwargs: Any) -> Any:
        return asyncio.run(f(*args, **kwargs))

    return wrapper


token_file = Path("~/.surepy.token").expanduser()
old_token_file = token_file.with_suffix(".old_token")

console = Console(width=120)

CONTEXT_SETTINGS: dict[str, Any] = dict(help_option_names=["--help"])

version_message = (
    f" [#ffffff]{sp_name}[/] 🐾 [#666666]v[#aaaaaa]{sp_version.replace('.', '[#ff1d5e].[/]')}"
)


def print_header() -> None:
    """print header to terminal"""
    print()
    console.print(version_message, justify="left")
    print()
Example #42
0
def import_path(
    p: Union[str, "os.PathLike[str]"],
    *,
    mode: Union[str, ImportMode] = ImportMode.prepend,
    root: Path,
) -> ModuleType:
    """Import and return a module from the given path, which can be a file (a module) or
    a directory (a package).

    The import mechanism used is controlled by the `mode` parameter:

    * `mode == ImportMode.prepend`: the directory containing the module (or package, taking
      `__init__.py` files into account) will be put at the *start* of `sys.path` before
      being imported with `__import__.

    * `mode == ImportMode.append`: same as `prepend`, but the directory will be appended
      to the end of `sys.path`, if not already in `sys.path`.

    * `mode == ImportMode.importlib`: uses more fine control mechanisms provided by `importlib`
      to import the module, which avoids having to use `__import__` and muck with `sys.path`
      at all. It effectively allows having same-named test modules in different places.

    :param root:
        Used as an anchor when mode == ImportMode.importlib to obtain
        a unique name for the module being imported so it can safely be stored
        into ``sys.modules``.

    :raises ImportPathMismatchError:
        If after importing the given `path` and the module `__file__`
        are different. Only raised in `prepend` and `append` modes.
    """
    mode = ImportMode(mode)

    path = Path(p)

    if not path.exists():
        raise ImportError(path)

    if mode is ImportMode.importlib:
        module_name = module_name_from_path(path, root)

        for meta_importer in sys.meta_path:
            spec = meta_importer.find_spec(module_name, [str(path.parent)])
            if spec is not None:
                break
        else:
            spec = importlib.util.spec_from_file_location(
                module_name, str(path))

        if spec is None:
            raise ImportError(
                f"Can't find module {module_name} at location {path}")
        mod = importlib.util.module_from_spec(spec)
        sys.modules[module_name] = mod
        spec.loader.exec_module(mod)  # type: ignore[union-attr]
        insert_missing_modules(sys.modules, module_name)
        return mod

    pkg_path = resolve_package_path(path)
    if pkg_path is not None:
        pkg_root = pkg_path.parent
        names = list(path.with_suffix("").relative_to(pkg_root).parts)
        if names[-1] == "__init__":
            names.pop()
        module_name = ".".join(names)
    else:
        pkg_root = path.parent
        module_name = path.stem

    # Change sys.path permanently: restoring it at the end of this function would cause surprising
    # problems because of delayed imports: for example, a conftest.py file imported by this function
    # might have local imports, which would fail at runtime if we restored sys.path.
    if mode is ImportMode.append:
        if str(pkg_root) not in sys.path:
            sys.path.append(str(pkg_root))
    elif mode is ImportMode.prepend:
        if str(pkg_root) != sys.path[0]:
            sys.path.insert(0, str(pkg_root))
    else:
        assert_never(mode)

    importlib.import_module(module_name)

    mod = sys.modules[module_name]
    if path.name == "__init__.py":
        return mod

    ignore = os.environ.get("PY_IGNORE_IMPORTMISMATCH", "")
    if ignore != "1":
        module_file = mod.__file__
        if module_file is None:
            raise ImportPathMismatchError(module_name, module_file, path)

        if module_file.endswith((".pyc", ".pyo")):
            module_file = module_file[:-1]
        if module_file.endswith(os.path.sep + "__init__.py"):
            module_file = module_file[:-(len(os.path.sep + "__init__.py"))]

        try:
            is_same = _is_same(str(path), module_file)
        except FileNotFoundError:
            is_same = False

        if not is_same:
            raise ImportPathMismatchError(module_name, module_file, path)

    return mod
def download_file(srcfile, ChunkSize_default):
    logger.info(f'Start file: {srcfile["Key"]}')
    dir_and_key = Path(DesDir) / srcfile["Key"]
    if Path.exists(dir_and_key):
        if dir_and_key.stat().st_size == srcfile["Size"] or dir_and_key.is_dir(
        ):
            logger.info(
                f'Duplicated: {dir_and_key.as_uri()} same size, goto next file.'
            )
            return

    # 创建文件目录结构
    path = dir_and_key.parent
    if not Path.exists(path):
        create_dir(path)

    # 如果是子目录就跳过下载
    if srcfile["Key"][-1] == '/':
        Path.mkdir(dir_and_key)
        logger.info(f'Create empty subfolder: {dir_and_key.as_uri()}')
        return

    # 获取已下载的 part number list
    partnumberList = []
    try:
        with sqlite3.connect('s3_download.db') as db:
            cursor = db.cursor()
            p_sql = cursor.execute(
                f"SELECT PARTNUMBER FROM S3P WHERE BUCKET='{SrcBucket}' AND KEY='{dir_and_key.as_uri()}'"
            )
            db.commit()
            partnumberList = [d[0] for d in p_sql]
            logger.info(
                f'Got partnumberList {dir_and_key.as_uri()} - {json.dumps(partnumberList)}'
            )
    except Exception as e:
        logger.error(f'Fail to select partnumber from DB. {str(e)}')

    # 获取索引列表,例如[0, 10, 20]
    indexList, ChunkSize_auto = split(srcfile, ChunkSize_default)

    # 执行download
    s3tmp_name = dir_and_key.with_suffix('.s3tmp')
    if Path.exists(s3tmp_name):
        mode = 'r+b'
    else:
        # 如果没有临时文件,或被删除了,则新建文件并将partnumberList清空
        mode = 'wb'
        partnumberList = []
    with open(s3tmp_name, mode) as wfile:
        download_part(indexList, partnumberList, srcfile, ChunkSize_auto,
                      wfile)

    # 修改文件名.s3part,清理partnumber数据库
    s3tmp_name.rename(dir_and_key)
    try:
        with sqlite3.connect('s3_download.db') as db:
            cursor = db.cursor()
            cursor.execute(
                f"DELETE FROM S3P WHERE BUCKET='{SrcBucket}' AND KEY='{dir_and_key.as_uri()}'"
            )
            db.commit()
    except Exception as e:
        logger.warning(f'Fail to clean DB: {dir_and_key.as_uri()}. {str(e)}')
    logger.info(f'Finsh: {srcfile["Key"]} TO {dir_and_key.as_uri()}')
    return
Example #44
0
class Mesh:

    def __init__(self, file, hold_history=False, vs=None, faces=None, device='cpu', gfmm=True):
        if file is None:
            return
        self.filename = Path(file)
        self.vs = self.v_mask = self.edge_areas = None
        self.edges = self.gemm_edges = self.sides = None
        self.device = device
        if vs is not None and faces is not None:
            self.vs, self.faces = vs.cpu().numpy(), faces.cpu().numpy()
            self.scale, self.translations = 1.0, np.zeros(3,)
        else:
            self.vs, self.faces = load_obj(file)
            self.normalize_unit_bb()
        self.vs_in = copy.deepcopy(self.vs)
        self.v_mask = np.ones(len(self.vs), dtype=bool)
        self.build_gemm()
        self.history_data = None
        if hold_history:
            self.init_history()
        if gfmm:
            self.gfmm = self.build_gfmm() #TODO get rid of this DS
        else:
            self.gfmm = None
        if type(self.vs) is np.ndarray:
            self.vs = torch.from_numpy(self.vs)
        if type(self.faces) is np.ndarray:
            self.faces = torch.from_numpy(self.faces)
        self.vs = self.vs.to(self.device)
        self.faces = self.faces.to(self.device)

    def build_gemm(self):
        self.ve = [[] for _ in self.vs]
        self.vei = [[] for _ in self.vs]
        edge_nb = []
        sides = []
        edge2key = dict()
        edges = []
        edges_count = 0
        nb_count = []
        for face_id, face in enumerate(self.faces):
            faces_edges = []
            for i in range(3):
                cur_edge = (face[i], face[(i + 1) % 3])
                faces_edges.append(cur_edge)
            for idx, edge in enumerate(faces_edges):
                edge = tuple(sorted(list(edge)))
                faces_edges[idx] = edge
                if edge not in edge2key:
                    edge2key[edge] = edges_count
                    edges.append(list(edge))
                    edge_nb.append([-1, -1, -1, -1])
                    sides.append([-1, -1, -1, -1])
                    self.ve[edge[0]].append(edges_count)
                    self.ve[edge[1]].append(edges_count)
                    self.vei[edge[0]].append(0)
                    self.vei[edge[1]].append(1)
                    nb_count.append(0)
                    edges_count += 1
            for idx, edge in enumerate(faces_edges):
                edge_key = edge2key[edge]
                edge_nb[edge_key][nb_count[edge_key]] = edge2key[faces_edges[(idx + 1) % 3]]
                edge_nb[edge_key][nb_count[edge_key] + 1] = edge2key[faces_edges[(idx + 2) % 3]]
                nb_count[edge_key] += 2
            for idx, edge in enumerate(faces_edges):
                edge_key = edge2key[edge]
                sides[edge_key][nb_count[edge_key] - 2] = nb_count[edge2key[faces_edges[(idx + 1) % 3]]] - 1
                sides[edge_key][nb_count[edge_key] - 1] = nb_count[edge2key[faces_edges[(idx + 2) % 3]]] - 2
        self.edges = np.array(edges, dtype=np.int32)
        self.gemm_edges = np.array(edge_nb, dtype=np.int64)
        self.sides = np.array(sides, dtype=np.int64)
        self.edges_count = edges_count
        # lots of DS for loss
        self.nvs, self.nvsi, self.nvsin = [], [], []
        for i, e in enumerate(self.ve):
            self.nvs.append(len(e))
            self.nvsi.append(len(e) * [i])
            self.nvsin.append(list(range(len(e))))
        self.vei = torch.from_numpy(np.concatenate(np.array(self.vei)).ravel()).to(self.device).long()
        self.nvsi = torch.Tensor(np.concatenate(np.array(self.nvsi)).ravel()).to(self.device).long()
        self.nvsin = torch.from_numpy(np.concatenate(np.array(self.nvsin)).ravel()).to(self.device).long()
        ve_in = copy.deepcopy(self.ve)
        self.ve_in = torch.from_numpy(np.concatenate(np.array(ve_in)).ravel()).to(self.device).long()
        self.max_nvs = max(self.nvs)
        self.nvs = torch.Tensor(self.nvs).to(self.device).float()
        self.edge2key = edge2key

    def build_ef(self):
        edge_faces = dict()
        if type(self.faces) == torch.Tensor:
            faces = self.faces.cpu().numpy()
        else:
            faces = self.faces
        for face_id, face in enumerate(faces):
            for i in range(3):
                edge = tuple(sorted([face[i], face[(i + 1) % 3]]))
                if edge not in edge_faces:
                    edge_faces[edge] = []
                edge_faces[edge].append(face_id)
        for k in edge_faces.keys():
            if len(edge_faces[k]) < 2:
                edge_faces[k].append(edge_faces[k][0])
        return edge_faces

    def build_gfmm(self):
        edge_faces = self.build_ef()
        gfmm = []
        if type(self.faces) == torch.Tensor:
            faces = self.faces.cpu().numpy()
        else:
            faces = self.faces
        for face_id, face in enumerate(faces):
            neighbors = [face_id]
            for i in range(3):
                edge = tuple(sorted([face[i], face[(i + 1) % 3]]))
                neighbors.extend(list(set(edge_faces[edge]) - set([face_id])))
            gfmm.append(neighbors)
        return torch.Tensor(gfmm).long().to(self.device)

    def normalize_unit_bb(self):
        """
        normalizes to unit bounding box and translates to center
        if no
        :param verts: new verts
        """
        cache_norm_file = self.filename.with_suffix('.npz')
        if not cache_norm_file.exists():
            scale = max([self.vs[:, i].max() - self.vs[:, i].min() for i in range(3)])
            scaled_vs = self.vs / scale
            target_mins = [(scaled_vs[:, i].max() - scaled_vs[:, i].min()) / -2.0 for i in range(3)]
            translations = [(target_mins[i] - scaled_vs[:, i].min()) for i in range(3)]
            np.savez_compressed(cache_norm_file, scale=scale, translations=translations)
        # load from the cache
        cached_data = np.load(cache_norm_file, encoding='latin1', allow_pickle=True)
        self.scale, self.translations = cached_data['scale'], cached_data['translations']
        self.vs /= self.scale
        self.vs += self.translations[None, :]



    def update_verts(self, verts):
        """
        update verts positions only, same connectivity
        :param verts: new verts
        """
        self.vs = verts

    def deep_copy(self): #TODO see if can do this better
        new_mesh = Mesh(file=None)
        types = [np.ndarray, torch.Tensor,  dict, list, str, int, bool, float]
        for attr in self.__dir__():
            if attr == '__dict__':
                continue

            val = getattr(self, attr)
            if type(val) == types[0]:
                new_mesh.__setattr__(attr, val.copy())
            elif type(val) == types[1]:
                new_mesh.__setattr__(attr, val.clone())
            elif type(val) in types[2:4]:
                new_mesh.__setattr__(attr, pickle.loads(pickle.dumps(val, -1)))
            elif type(val) in types[4:]:
                new_mesh.__setattr__(attr, val)

        return new_mesh

    def merge_vertices(self, edge_id):
        self.remove_edge(edge_id)
        edge = self.edges[edge_id]
        v_a = self.vs[edge[0]]
        v_b = self.vs[edge[1]]
        # update pA
        v_a.__iadd__(v_b)
        v_a.__itruediv__(2)
        self.v_mask[edge[1]] = False
        mask = self.edges == edge[1]
        self.ve[edge[0]].extend(self.ve[edge[1]])
        self.edges[mask] = edge[0]

    def remove_vertex(self, v):
        self.v_mask[v] = False

    def remove_edge(self, edge_id):
        vs = self.edges[edge_id]
        for v in vs:
            if edge_id not in self.ve[v]:
                print(self.ve[v])
                print(self.filename)
            self.ve[v].remove(edge_id)

    def clean(self, edges_mask, groups):
        edges_mask = edges_mask.astype(bool)
        torch_mask = torch.from_numpy(edges_mask.copy())
        self.gemm_edges = self.gemm_edges[edges_mask]
        self.edges = self.edges[edges_mask]
        self.sides = self.sides[edges_mask]
        new_ve = []
        edges_mask = np.concatenate([edges_mask, [False]])
        new_indices = np.zeros(edges_mask.shape[0], dtype=np.int32)
        new_indices[-1] = -1
        new_indices[edges_mask] = np.arange(0, np.ma.where(edges_mask)[0].shape[0])
        self.gemm_edges[:, :] = new_indices[self.gemm_edges[:, :]]
        for v_index, ve in enumerate(self.ve):
            update_ve = []
            # if self.v_mask[v_index]:
            for e in ve:
                update_ve.append(new_indices[e])
            new_ve.append(update_ve)
        self.ve = new_ve
        self.__clean_history(groups, torch_mask)

    def export(self, file):
        vs = self.vs.cpu().clone()
        vs -= self.translations[None, :]
        vs *= self.scale
        export(file, vs, self.faces)

    def init_history(self):
        self.history_data = {
                               'groups': [],
                               'gemm_edges': [self.gemm_edges.copy()],
                               'occurrences': [],
                               'edges_count': [self.edges_count],
                              }

    def get_groups(self):
        return self.history_data['groups'].pop()

    def get_occurrences(self):
        return self.history_data['occurrences'].pop()
    
    def __clean_history(self, groups, pool_mask):
        if self.history_data is not None:
            self.history_data['occurrences'].append(groups.get_occurrences())
            self.history_data['groups'].append(groups.get_groups(pool_mask))
            self.history_data['gemm_edges'].append(self.gemm_edges.copy())
            self.history_data['edges_count'].append(self.edges_count)
    
    def unroll_gemm(self):
        self.history_data['gemm_edges'].pop()
        self.gemm_edges = self.history_data['gemm_edges'][-1]
        self.history_data['edges_count'].pop()
        self.edges_count = self.history_data['edges_count'][-1]

    @staticmethod
    def from_tensor(mesh, vs, faces, gfmm=True):
        mesh = Mesh(file=mesh.filename, vs=vs, faces=faces, device=mesh.device, hold_history=True, gfmm=gfmm)
        return mesh

    def submesh(self, vs_index):
        return PartMesh.create_submesh(vs_index, self)
Example #45
0
class URL(urlpath.URL,urllib.parse._NetlocResultMixinStr, PurePath):
  '''
  Derived from 
  https://raw.githubusercontent.com/chrono-meter/urlpath/master/urlpath.py

  to provide more compatibility with pathlib.Path functionality

  '''

  '''
  modified new and init
  '''
  def __new__(cls,*args,**kwargs):
      self = super(URL, cls).__new__(cls,*args) 
      self.init(**kwargs)
      return self

  def __init__(self,*args,**kwargs):
      # remove any trailing '/' from args
      args = list(args)
      for i,arg in enumerate(args):
        arg = str(arg)
        while arg[-1] == '/':
          if len(arg) == 1:
            break
          arg = arg[:-1]
        args[i] = arg
      args = tuple(args)
      if not kwargs:
        kwargs = {}
      self.fourOhOne = False

  def init(self,**kwargs):
      self.__dict__.update(ginit(self,**kwargs))
      if 'database' in self.__dict__ and type(self.database) == Database:
        # already have databse stored
        pass
      else:
        self.database = Database(self.db_file,\
                          **(fdict(self.__dict__.copy())))

  def __del__(self):
      try:
        del self.database
        self.msg(f'clone: {url.is_clone}')
      except:
        pass

  def __exit__(self, exc_type, exc_value, traceback):
      '''cleanup'''
      try:
        del self.database
      except:
        pass
      tempfile.clean()

  def dedate(self):
    if '_cache_original' in  self.__dict__:
      self.__dict__ = self._cache_original.copy()
      if '_cache_original' in  self.__dict__:
        del self.__dict__['_cache_original']

  def update(self,*args,**kwargs):
    '''update args in object'''
    if '_cache_original' not in  self.__dict__:
      self._cache_original = self.__dict__.copy()

    # whetehr we specify full URL in update or not

    if ('full_url' in kwargs) and (kwargs['full_url'] == True):
      args = list(args)
    else:
      args = [str(self)] + list(args)   
    url = super(URL, self).__new__(self,*args)
    url.is_clone = True
    url.__dict__ = fdict(self._cache_original.copy())
    return url

  def check_path(self,ppp):
    '''
    You can corrupt the database by having files where we expect directories
    so we need to clean these up
    '''
    parts = list(ppp.parts)
    for i,part in enumerate(parts):
      this = Path(*(parts[:i+1]))
      if this.exists() and (not this.is_dir()):
        # warning path in expected directory
        self.msg('found non-directory term in path {str(this)}')
        try: 
          self.msg('trying to correct')
          this.unlink()
          return True
        except:
          self.msg('failed to correct')
          return False
    return True

  def indb(self):
    # might be in database
    store_url  = str(self)
    store_flag = 'data'
    ifile = self.get_name(self.database.get_from_db(store_flag,store_url))
    if ifile:
      old = self.local_file
      self.local_file = Path(ifile)
      if self.local_file.exists() and self.local_file.suffix == '.store':
        return True
      if self.local_file.suffix != '.store':
        self.local_file = old
        return False
      return True
    return False

  def call_local(self):
    '''
    sort out and return local_file

    This comes from the URL and local_dir
    and ends .store
    '''
    if self.indb():
      if callable(self.local):
        sys.msg(f"**unexpected method for self.local {self.local}")
      else:
        return self.local
    
    kwargs = fdict(self.__dict__.copy())
    if 'local_dir' in kwargs and \
        (kwargs['local_dir'] is not None) and \
        len(kwargs['local_dir']) > 0:
      self.local_dir = list_resolve(kwargs['local_dir'])

    if (self.local_dir is None) or (len(self.local_dir) == 0):
      self.local_dir = list_resolve(self.db_dir)
    self.local_file = Path(self.local_dir[0],self.as_posix().split("://")[1]) 
    #self.local_file = Path(self.local_dir[-1],str(self.with_scheme(''))[2:]).absolute()
    # replace ' '
    self.local_file = Path(str(self.local_file).replace(' ','_'))
    suffix = self.local_file.suffix
    self.local_file = self.local_file.with_suffix(suffix + '.store')
    self.check_path(self.local_file.parent)
    self.local_file.parent.mkdir(parents=True,exist_ok=True) 
    return self.local_file

  def get_read_file(self,filelist):
    filelist = name_resolve(filelist)
    readlist,writelist = list_info(filelist)
    filelist = np.array(filelist,dtype=np.object)[readlist]
    return (filelist.size and filelist[-1]) or None

  def get_write_file(self,filelist):
    filelist = name_resolve(filelist)
    readlist,writelist = list_info(filelist)
    filelist = np.array(filelist,dtype=np.object)[writelist]
    return (filelist.size and filelist[-1]) or None

  def get_readwrite_file(self,filelist):
    filelist = name_resolve(filelist)
    readlist,writelist = list_info(filelist)
    filelist = np.array(filelist,dtype=np.object)[np.logical_and(np.array(writelist),np.array(readlist))]
    return (filelist.size and filelist[-1]) or None

  def _local_file(self,mode="r"):
    '''get local file name'''
    if self.indb():
      return self.local_file
    self.call_local()
    # clobber
    if not self.noclobber:
      local_file  = self.get_write_file(self.local_file)
      # file name for writing
    elif mode == "r":
      local_file = self.get_read_file(self.local_file)
      if local_file and not local_file.exists():
        self.msg("read file {local_file} doesnt exist")
        self.local_file = self.local_file[self.local_file != local_file]
        return self._local_file(mode="r")
    else:
      # file name for writing
      local_file = self.get_write_file(self.local_file)

    if local_file == None:
      return local_file

    # local_file is real
    if local_file.exists():
      if local_file.is_dir():
        try:
          local_file.rmdir()
          return None
        except:
          pass

      # delete the file if noclobber is False
      if not self.noclobber:
        try:
          self.msg(f"deleting existing file {local_file}")
          local_file.unlink()
        except:
          pass
      else:
        self.msg(f"keeping existing file {local_file}")
      
    return local_file

  def open(self,mode='r',buffering=-1, encoding=None, errors=None, newline=None):
      '''
      Open the file pointed by this URL and return a file object, as
      the built-in open() function does.
      '''
      kwargs = {'mode':mode,'buffering':buffering,'encoding':encoding,\
                'errors':errors,'newline':newline}

      if self._isfile():
        self.msg(f'{self} is not a URL: interpreting as Path')
        return Path(self).open(**kwargs)

      # check in database
      store_url  = str(self)
      store_flag = 'data'

      binary = ('b' in mode) and ('t' not in mode) 

      get_download,ifile,ofile = self._test_already_local()

      # get from ofile
      if ofile and Path(ofile).exists():
        ofile = Path(ofile)
        if binary:
          data = io.BytesIO(ofile.read_bytes())
        else:
          data = io.StringIO(ofile.read_text())
        cache = {store_flag : { str(store_url) : str(ofile) }}
        self.database.set_db(cache)
        return data

      # get from ifile
      if ifile and Path(ifile).exists():
        ifile = Path(ifile)
        if binary:
          data = io.BytesIO(ifile.read_bytes())
        else:
          data = io.StringIO(ifile.read_text())
        self.check_path(ifile.parent)
        ifile.parent.mkdir(parents=True,exist_ok=True)
        if ofile:
          ofile = Path(ofile)
          if binary:
            ofile.write_bytes(data)
          else:
            ofile.write_text(data)
        cache = {store_flag : { str(store_url) : str(ifile) }}
        self.database.set_db(cache)
        return data

      if 'r' in mode:
        self.msg(f"reading data from {self}")
        # read 
        if binary:
          self.msg("open() binary stream")
          idata = self.read_bytes()
          data = io.BytesIO(idata)
        else:
          self.msg("open() text stream")
          idata = self.read_text()
          data = io.StringIO(idata)
        if ofile:
          try:
            ofile = Path(ofile)
            if binary:
              ofile.write_bytes(idata)
            else:
              ofile.write_text(idata)
            cache = {store_flag : { str(store_url) : str(ifile) }}
            self.database.set_db(cache)
          except:
            pass
        return data

      if ofile:
        return Path(ofile).open(**kwargs)

  def write_text(self,data, encoding=None, errors=None):
      '''Open the file in text mode, write to it, and close the file.'''
      kwargs = {'encoding':encoding}
      if self._isfile():
          self.msg(f'{self} is not a URL: interpreting as Path')
          return Path(self).write_text(data)

      get_download,ifile,ofile = self._test_already_local()

      if ofile and Path(ofile).exists():
         self.msg("file exists so not writing")
         return Path(ofile).stat().st_size

      if ofile:
        self.msg(f'opening output file {ofile}')
        return Path(ofile).write_text(data,**kwargs)

  def write_bytes(self,data):
      '''Open the file in bytes mode, write to it, and close the file.'''

      if self._isfile():
          self.msg(f'{self} is not a URL: interpreting as Path')
          return Path(self).write_bytes(data)

      get_download,ifile,ofile = self._test_already_local()

      if ofile and Path(ofile).exists():
         self.msg("file exists so not writing")
         return Path(ofile).stat().st_size

      if ofile:
        self.msg(f'opening output file {ofile}')
        return Path(ofile).write_bytes(data)

  def _get_login(self,head=True):
      u = self
      with requests.Session() as session:
        if u.username and u.password:
          session.auth = u.username,u.password
        else:
          uinfo = Cylog(u.anchor).login()
          if uinfo == (None,None):
            return None
          session.auth = uinfo[0].decode('utf-8'),uinfo[1].decode('utf-8')
          u.msg(f'logging in to {u.anchor}')
        try:
          r1 = session.request('get',u)
          if r1.status_code == 200:
            u.msg(f'data read from {u.anchor}')
            return r1
          # try encoded login
          if head:
            r2 = session.head(r1.url)
          else:
            r2 = session.get(r1.url)
          if r2.status_code == 200:
            u.msg(f'data read from {u.anchor}')
          if type(r2) == requests.models.Response:
            return r2
        except:
          u.msg(f'failure reading data from {u.anchor}')
          return None
      u.msg(f'failure reading data from {u.anchor}')
      return None

  def msg(self,*args):
    '''msg to self.stderr'''
    this = str(*args)
    try:
      # DONT REPEAT MESSAGES ... doesnt work as yet
      if this in self.store_msg:
        return
      self.store_msg.extend(this)
    except:
      self.store_msg = [this]
    try:
        if self.verbose or (self.log is not None):
            print('-->',*args,file=self.stderr)
    except:
        pass

  def get_name(self,ofile):
    if ofile == [] or ofile == {}:
      ofile = None
    if type(ofile) == list:
      ofile = ofile[0]
    if type(ofile) == dict:
      ofile = list(ofile.values())[0]
    return ofile

  def _test_already_local(self):
    # get local_filename we would use for output
    # delete it if not noclobber
    # dont greate dir if it doesnt exist

    # return False if already downloaded

    # check in database
    store_url  = str(self)
    store_flag = 'data'

    ifile = self.get_name(self.database.get_from_db(store_flag,store_url))

    if ifile is not None:
      ifile = Path(ifile)
      if not ifile.exists():
        # otherwise incorrect db entry
        self.database.rm_from_db(store_flag,store_url)
      if not self.noclobber and ifile.exists():   
        # clobber
        self.msg(f'deleting local file {ifile}')
        ifile.unlink()
        ifile = None

    ofile = self.get_name(self._local_file("w"))
    if callable(ofile):
      print(f"ERROR in type of self.lcoal {ofile}: should be str or list")
      sys.exit(1)
    if ifile is None:
      return True,ifile,ofile

    if not ifile.exists():
      return True,None,ofile

    # simple if no size check
    if (not self.size_check) and ifile.exists():
      self.msg(f'local file {ifile} exists') #: no size check')
      # cache this in case we want to re-use it
      cache = {store_flag : { str(store_url) : str(ifile) }}
      self.database.set_db(cache)
      return False,ifile,ofile

    if self.size_check:
      lsize = ifile.stat().st_size
      rsize = self.stat().st_size
      if rsize < 0:
        # then its not available
        self.msg(f'not downloading file')
        # we might not want to download

        # cache this in case we want to re-use it
        cache = {store_flag : { str(store_url) : ifile }}
        self.database.set_db(cache)
        return False,ifile,ofile

      elif lsize == rsize:
        self.msg(f'local and remote file sizes equal {lsize}')
        self.msg(f'not downloading file')
        # we might not want to download
        # cache this in case we want to re-use it
        cache = {store_flag : { str(store_url) : ifile }}
        self.database.set_db(cache)
        return False,ifile,ofile
      self.msg(f'local and remote file sizes not equal {lsize}/{rsize} respectively')
      self.msg(f'so we need to download (or set size_check=False)')
      if not self.noclobber:
        if ifile and ifile.exists():
          self.msg(f'deleting local ifile {local_file}')
          ifile.unlink()
          ifile = None
        if ofile and ofile.exists():
          self.msg(f'deleting local ofile {local_file}')
          ofile.unlink()
          ofile = None

    return True,ifile,ofile


  def read_text(self, encoding=None, errors=None):
    '''Open the URL, read in text mode and return text.'''  

    kwargs = {'encoding':encoding}
    u = self
    store_url  = str(u)
    store_flag = 'data'

    if u._isfile():
      self.msg(f'{u} is not a URL: interpreting as Path')
      return Path(u).read_text()

    get_download,ifile,ofile = self._test_already_local()

    text = None

    # get it from ofile
    if ofile and Path(ofile).exists():
      text = Path(ofile).read_text(**kwargs)
      cache = {store_flag : { str(store_url) : str(ofile) }}
      self.database.set_db(cache)
      return text

    # get it from ifile 
    if ifile and Path(ifile).exists():
      self.msg(f'opening already downloaded file {ifile}')
      text = Path(ifile).read_text(**kwargs)
      if ofile:
        ofile = Path(ofile)
        ofile.write_text(text)
        cache = {store_flag : { str(store_url) : str(ofile) }}
      else:
        cache = {store_flag : { str(store_url) : str(ifile) }}
      self.database.set_db(cache)
      return text

    if text is not None:
      return text

    try:
      u.msg(f'trying {self}')
      text = u.get_text()
      if text and ofile:
        try:
          ofile = Path(ofile)
          self.check_path(ofile.parent)
          ofile.parent.mkdir(parents=True,exist_ok=True)
          ofile.write_text(text)
          cache = {store_flag : { str(store_url) : str(ofile) }}
          self.database.set_db(cache)
          return text
        except:
          pass
      if text:
        return text
    except:
      pass

    u.msg(f'getting login')
    r = u._get_login(head=False)
    if type(r) != requests.models.Response:
      return None
    if r.status_code == 200:
      u.msg(f'code {r.status_code}')
      text = r.text
      if ofile:
         ofile = Path(ofile)
         self.check_path(ofile.parent)
         ofile.parent.mkdir(parents=True,exist_ok=True)
         ofile.write_text(text)
         cache = {store_flag : { str(store_url) : str(ofile) }}
         self.database.set_db(cache)
      return text

    if type(r) == requests.models.Response:
        u.msg(f'code {r.status_code}')
        return r
    u.msg(f'failed to connect')
    return None

  def local(self,get_file=False):
    ''' local filename'''
    u = self
    get_download,ifile,ofile = u._test_already_local()
    for f in [ifile,ofile]:
      if f and get_file:
        if  Path(f).exists():
          return Path(f)
        else:
          # pull file
          self.read_bytes()
          return self.local(get_file=get_file)
      elif f:
        return Path(f)
    return None

  def exists(self):
    '''Whether this URL exists and can be accessed'''

    u = self
    store_url  = str(u)
    store_flag = 'exists' 
 
    ex = self.database.get_from_db(store_flag,store_url)
    if ex is not None:
      return ex
 
    ex = False 
    get_download,ifile,ofile = u._test_already_local()
    if ofile and Path(ofile).exists():
      ex = True
      cache = {store_flag : { str(store_url) : True }}
    if not ex:
      ex = self.ping()
    if ex:
      cache = {store_flag : { str(store_url) : True }}
      self.database.set_db(cache)
      
    return ex

  def stat(self, head=False):
    '''
    Some of the functionality of stat for URLs

    Currently, only stat_result.st_size is used.
    '''
    input = [0,0,0,0,0,0,self._st_size(head=head),0,0,0]
    stat_result = os.stat_result(input)
    return stat_result

  def _isfile(self):
    if self.scheme == '' or self.scheme == 'file':
      self.msg('we are a file ...')
      return True
    #self.msg('we are not a file ...')
    return False

  def _st_size(self, head=False):
    '''
    retrieve the remote file size

    You should specify any required login/password with
    with_components(username=str,password=str)

    Returns:
      int if data available
    Or:
      -1
    '''
    u = self
    # check in database
    store_url  = u
    store_flag = 'st_size'
    remote_size = self.database.get_from_db(store_flag,store_url)
    if remote_size is not None:
      return remote_size

    remote_size = -1
    if u._isfile():
      self.msg(f'{u} is not a URL: interpreting as Path')
      # not a URL
      u = Path(u)
      return u.stat().st_size
    try:
      u.msg(f'trying {u}')
      if head:
        r = u.head()
      else:
        r = u.get()
      if type(r) == requests.models.Response:
        if r.status_code == 200:
          u.msg(f'code 200')
          hdr = r.headers
          if "Content-Length" in hdr.keys():
              remote_size = int(hdr["Content-Length"])
          elif 'Transfer-Encoding' in hdr.keys() and hdr["Transfer-Encoding"] == 'chunked':
              u.msg(f'file is compressed, remote size not directly available')
          #self.msg(hdr)
          if remote_size > 0:
            # cache this in case we want to re-use it
            cache = {store_flag : { str(store_url) : remote_size }}
            self.database.set_db(cache)
            return(remote_size)

        # 
        if r.status_code == 401:
          u.msg(f'code 401')
          self.fourOhOne = True

        if self.fourOhOne:
          # unauthorised
          # more complex session login and auth
          # e.g. needed for NASA Earthdata login
          u.msg(f'getting login')
          r = u._get_login(head=head)
          if r.status_code == 200:
            u.msg(f'code 200')
            hdr = r.headers
            if "Content-Length" in hdr:
              remote_size = int(hdr["Content-Length"])
            if remote_size > 0:
              # cache this in case we want to re-use it
              cache = {store_flag : { str(store_url) : remote_size }}
              self.database.set_db(cache)
              return(remote_size)
        elif head == False:
          u.msg(f'code {r.status_code}')
          return remote_size
        # return it even if 0
        return remote_size
    except:
      pass
    if head == False:
      u.msg(f'failed to connect')
      # give up
      remote_size = -2
      # cache this in case we want to re-use it even if its -1
      cache = {store_flag : { str(store_url) : remote_size }}
      self.database.set_db(cache)
      return remote_size
    u.msg(f'trying get')
    return u.st_size(head=False)

  def ping(self, head=True):
    '''
    ping the URL data return True if response is 200

    You should specify any required login/password with
    with_components(username=str,password=str)

    Returns:
      True if data available
    Or:
      False
    '''
    u = self
    if u._isfile():
      self.msg(f'{u} is not a URL: interpreting as Path')
      # not a URL
      u = Path(u)
      return u.exists()
    try:
      u.msg(f'trying {u}')
      if head:
        r = u.head()
      else:
        r = u.get()
      if type(r) == requests.models.Response:
        if r.status_code == 200:
          u.msg(f'code 200')
          return True
        if r.status_code == 401:
          u.msg(f'code 401')
          u.msg(f'trying another')
          # unauthorised
          # more complex session login and auth
          # e.g. needed for NASA Earthdata login
          u.msg(f'getting login')
          r = u._get_login(head=head)
          if r.status_code == 200:
            u.msg(f'code 200')
            return True
        elif head == False:
          u.msg(f'code {r.status_code}')
          return False
    except:
      pass
    if head == False:
      u.msg(f'failed to connect')
      return False
    u.msg(f'trying get')
    return u.ping(head=False)

  def read_bytes(self):
    '''
    Open the URL data in bytes mode, read it and return the data

    This first tried self.get() but if the authorisation is more complex
    (e.g. when using NASA server) then a fuller 2-pass session
    is used.

    You should specify any required login/password with 
    with_components(username=str,password=str) 

    Returns:
      data from url
    Or:
      None                     : on failure 
      requests.models.Response : on connection problem
    '''

    if 'skipper' in self.__dict__:
      skipper = self.skipper
    else:
      skipper = False

    u = self
    store_url  = str(u)
    store_flag = 'data'
    if u._isfile():
      self.msg(f'{u} is not a URL: interpreting as Path')
      return Path(u).read_bytes()

    get_download,ifile,ofile = self._test_already_local()

    # get from ofile
    if ofile and Path(ofile).exists():
      data = ofile.read_bytes()
      ofile = Path(ofile)
      cache = {store_flag : { str(store_url) : str(ofile) }}
      self.database.set_db(cache,write=True)
      return data

    # get from ifile
    if ifile and Path(ifile).exists():
      ifile = Path(ifile)
      self.msg(f'opening already downloaded file {ifile}')
      data = ifile.read_bytes()
      if ofile: 
        ofile = Path(ofile)
        self.check_path(ofile.parent)
        ofile.parent.mkdir(parents=True,exist_ok=True)
        ofile.write_bytes(data)
        cache = {store_flag : { str(store_url) : str(ofile) }}
      else:
        cache = {store_flag : { str(store_url) : str(ifile) }}
      self.database.set_db(cache,write=True)
      return data

    try:
      if not skipper: 
        u.msg(f'trying {u}')
        r = u.get()
        
      if skipper or (type(r) == requests.models.Response):
        if (not skipper) and r.status_code == 200:
          u.msg(f'code {r.status_code}')
          data = r.content
          if ofile:
            ofile = Path(ofile)
            self.check_path(ofile.parent)
            ofile.parent.mkdir(parents=True,exist_ok=True)
            ofile.write_bytes(data)
            cache = {store_flag : { str(store_url) : str(ofile) }}
            self.database.set_db(cache,write=True)
          return data
        if skipper or (r.status_code == 401):
          if not skipper:
            u.msg(f'code {r.status_code}')
            u.msg(f'trying another')
          # unauthorised
          # more complex session login and auth
          # e.g. needed for NASA Earthdata login
          u.msg(f'getting login')
          r = u._get_login(head=False)
          if type(r) != requests.models.Response:
            return None
          if r.status_code == 200:
            u.msg(f'code {r.status_code}')
            data = r.content
            if ofile:
              ofile = Path(ofile)
              self.check_path(ofile.parent)
              ofile.parent.mkdir(parents=True,exist_ok=True)
              ofile.write_bytes(data)
              cache = {store_flag : { str(store_url) : str(ofile) }}
              self.database.set_db(cache,write=True)
            return data
        else:
          u.msg(f'code {r.status_code}')
          return r
    except:
      pass

    u.msg(f'failed to connect')
    return None 

  def _convert_to_abs(self,ilist):
    # this is slow and may be not needed
    self.msg(f'parsing URLs from html file {len(ilist)} items')
    return [self.update(*[str(self),l.rstrip('/#')],**(fdict(self.__dict__.copy()))) for l in ilist ]

  def _filter(self,links,pattern,pre_filter=True):
    # pre-filter
    if pre_filter: 
      links = np.array([str(l).rstrip('/#') for l in links])
      matches = np.array([fnmatch.fnmatch(str(l), '*'+pattern) for l in links])
      links = list(links[matches])
    

    links = self._convert_to_abs(links)
    olist = []
    try:
      p = self.done[pattern]
    except:
      try:
        self.done[pattern] = []
        
      except:
        self.done = {pattern:[]}
    p = self.done[pattern]
    
    olist = [u for u in links if u not in p]    
    self.done[pattern] = self.done[pattern] + olist
    return olist

  def has_wildness(self,uc):
    is_wild   = np.logical_or(np.array(['*' in i for i in uc]),
                              np.array(['?' in i for i in uc]))
    is_wild_2 = np.logical_or(np.array(['[' in i for i in uc]),
                              np.array([']' in i for i in uc]))
    is_wild = np.logical_or(is_wild,is_wild_2)
    return is_wild

  def glob(self,pattern,pre_filter=True):
    '''
    Iterate over this subtree and yield all existing files (of any
    kind, including directories) matching the given relative pattern.

    The URL here then needs to return lxml html code.

    Positional arguments:
       patterm  : to search for e.g. */2021.*.01
                  only wildcards * and ? considered at present

    '''
    u = self
    url = str(u)
    if url[-1] == '/':
      url = urls[:-1]
    url = self.update(url,pattern)
    # check in database
    store_url  = url
    store_flag = 'query' 
    olist = self.database.get_from_db(store_flag,store_url)
    if olist is not None:
      if type(olist) is list:
        return [self.update(o) for o in olist]
      return [self.update(olist)]

    # start at the top
    uc = np.array(url.parts)
    for i,w in enumerate(uc[1:]): 
      if i == 0:
        base_list = [self.update(uc[0])]
      new_list = []
      for b in base_list:
        # set to new item
        glob = self.update(b)._glob(w,pre_filter=pre_filter)
        
        # glob with the next item
        new_list = new_list + glob
      base_list = np.unique(np.array(new_list,dtype=np.object).flatten())

    base_list = np.unique(np.array(base_list,dtype=np.object))
 
    olist = list(np.array([self.update(i) for i in base_list]).flatten())
    self.dedate()

    for l in olist:
      l.init(**(fdict(self.__dict__.copy())))

    # cache this in case we want to re-use it
    cache = {store_flag : { str(store_url) : [str(i) for i in olist] }}
    self.database.set_db(cache)
    if type(olist) is list: 
      return [self.update(o) for o in olist]
    return [self.update(olist)]

  def rglob(self, pattern,pre_filter=True):
    '''
    Recursively yield all existing files (of any kind, including
    directories) matching the given relative pattern, anywhere in
    this subtree.

    Positional arguments:
       patterm  : to search for e.g. 2021.*.01
                  only wildcards * and ? considered at present


    '''
    return self.glob(pattern,pre_filter=pre_filter)

  def flush(self):
    try:
      return self.database.set_db(self.database.database,write=True)
    except:
      return None

  def _glob(self, pattern,pre_filter=True):
    '''
    Iterate over this subtree and yield all existing files (of any
    kind, including directories) matching the given relative pattern.

    The URL here then needs to return lxml html code.
    '''
    # take off training slash
    if pattern[-1] == '/':
      pattern = pattern[:-1]
    store_url  = str(self.update(pattern))
    store_flag = 'query'
    if not self.noclobber:
      # dont trust cache
      response = None
    else:
      response = self.database.get_from_db(store_flag,store_url)
    if response:
      self.msg(f'got response from database for {store_url}')
      self.msg(f'discovered {len(response)} files with pattern {pattern} in {str(self)}')
      return [self.update(str(f)) for f in response] 

    try:
      html = self.read_text()
      links = np.array([mylink.attrs['href'] for mylink in BeautifulSoup(html,'lxml').find_all('a')])
      links = np.array(self._filter(links,pattern,pre_filter=pre_filter))
      matches = np.array([fnmatch.fnmatch(str(l), '*'+pattern) for l in links]) 
      files = list(links[matches])
    except:
      files = []
    self.msg(f'discovered {len(files)} files with pattern {pattern} in {str(self)}')
   
    files = [str(i) for i in files]
    # cache this in db
    cache = {store_flag : { str(store_url) : files }}
    self.database.set_db(cache)
 
    return files 
Example #46
0
def _open_atomic(path: pathlib.Path, mode="r"):
    """Open file with atomic file writing support. File reading is also
    adapted to atomic file writing (for example, the backup file
    is used when an atomic write failed previously.)

    TODO(suquark): race condition like two processes writing the
    same file is still not safe. This may not be an issue, because
    in our current implementation, we only need to guarantee the
    file is either fully written or not existing.

    Args:
        path: The file path.
        mode: Open mode same as "open()".

    Returns:
        File object.
    """
    if "a" in mode or "+" in mode:
        raise ValueError("Atomic open does not support appending.")
    # backup file is hidden by default
    backup_path = path.with_name(f".{path.name}.backup")
    if "r" in mode:  # read mode
        if _file_exists(path):
            f = open(path, mode)
        else:
            raise FileNotFoundError(path)
        try:
            yield f
        finally:
            f.close()
    elif "x" in mode:  # create mode
        if path.exists():
            raise FileExistsError(path)
        tmp_new_fn = path.with_suffix(f".{path.name}.{uuid.uuid4().hex}")
        if not tmp_new_fn.parent.exists():
            tmp_new_fn.parent.mkdir(parents=True)
        f = open(tmp_new_fn, mode)
        write_ok = True
        try:
            yield f
        except Exception:
            write_ok = False
            raise
        finally:
            f.close()
            if write_ok:
                # "commit" file if writing succeeded
                tmp_new_fn.rename(path)
            else:
                # remove file if writing failed
                tmp_new_fn.unlink()
    elif "w" in mode:  # overwrite mode
        # backup existing file
        if path.exists():
            # remove an even older backup file
            if backup_path.exists():
                backup_path.unlink()
            path.rename(backup_path)
        tmp_new_fn = path.with_suffix(f".{path.name}.{uuid.uuid4().hex}")
        if not tmp_new_fn.parent.exists():
            tmp_new_fn.parent.mkdir(parents=True)
        f = open(tmp_new_fn, mode)
        write_ok = True
        try:
            yield f
        except Exception:
            write_ok = False
            raise
        finally:
            f.close()
            if write_ok:
                tmp_new_fn.rename(path)
                # cleanup the backup file
                if backup_path.exists():
                    backup_path.unlink()
            else:
                # remove file if writing failed
                tmp_new_fn.unlink()
    else:
        raise ValueError(f"Unknown file open mode {mode}.")
Example #47
0
    def from_dir(
        cls,
        path: Path,
        plugins: Sequence[Plugin],
        ts_plugins: Sequence[Plugin],
        short_name: bool,
    ) -> "Scenario":
        """
        Makes a Scenario out of the provided directory path.

        The directory must be a "scenario directory", which means that it must
        contain at least one HAR file or another scenario directory.
        Symbolic link loops are not checked but forbidden!

        There may exist a weight file <path>.weight. If so, its contents will
        be used as weight for the Scenario by calling weight_from_path.

        Errors are handled this way:
        1. If path itself cannot be transformed into a scenario,
           raise SkippableScenarioError.
        2. For each child of path, apply (1) but catch the exception and display
           a warning about skipping this child. (If all children are skipped, (1)
           applies to path itself.)

        Therefore:
        - If the directory contains weight files that don't match any HAR file or
          subdirectory, an error will be emitted as this is probably a mistake.
        - If the directory contains files or directory that cannot be converted
          into scenarios (e.g. non-JSON files or .git directories), a message
          is emitted and the file or subdirectory is skipped.

        :raise SkippableScenarioError: if the directory contains dangling weight
            files or no sub-scenarios.
        """
        try:
            children = list(path.iterdir())
        except OSError as err:
            raise SkippableScenarioError(path, err)

        weight_files: Set[Path] = {
            child
            for child in children if child.suffix == WEIGHT_FILE_SUFFIX
        }

        scenarios: List[Scenario] = []
        for child in children:
            if child in weight_files:
                continue
            try:
                scenario = cls.from_path(child,
                                         plugins,
                                         ts_plugins=ts_plugins,
                                         short_name=True)
            except SkippableScenarioError as err:
                logging.warning(
                    "while searching for HAR files, skipping %s: %s", child,
                    err.reason)
            else:
                scenarios.append(scenario)

        cls._check_dangling_weights(path, scenarios, weight_files)
        if not scenarios:
            raise SkippableScenarioError(path,
                                         "no scenarios inside the directory")
        cls._check_name_collisions(path, scenarios)

        return Scenario(
            name=to_identifier(
                path.with_suffix("").name if short_name else str(path)),
            children=tuple(scenarios),
            origin=path,
            weight=cls.weight_from_path(path),
        )
Example #48
0
def test_ener(
    dp: "DeepPot",
    data: DeepmdData,
    system: str,
    numb_test: int,
    detail_file: Optional[str],
    has_atom_ener: bool,
    append_detail: bool = False,
) -> Tuple[List[np.ndarray], List[int]]:
    """Test energy type model.

    Parameters
    ----------
    dp : DeepPot
        instance of deep potential
    data: DeepmdData
        data container object
    system : str
        system directory
    numb_test : int
        munber of tests to do
    detail_file : Optional[str]
        file where test details will be output
    has_atom_ener : bool
        whether per atom quantities should be computed
    append_detail : bool, optional
        if true append output detail file, by default False

    Returns
    -------
    Tuple[List[np.ndarray], List[int]]
        arrays with results and their shapes
    """
    data.add("energy", 1, atomic=False, must=False, high_prec=True)
    data.add("force", 3, atomic=True, must=False, high_prec=False)
    data.add("virial", 9, atomic=False, must=False, high_prec=False)
    if dp.has_efield:
        data.add("efield", 3, atomic=True, must=True, high_prec=False)
    if has_atom_ener:
        data.add("atom_ener", 1, atomic=True, must=True, high_prec=False)
    if dp.get_dim_fparam() > 0:
        data.add(
            "fparam", dp.get_dim_fparam(), atomic=False, must=True, high_prec=False
        )
    if dp.get_dim_aparam() > 0:
        data.add("aparam", dp.get_dim_aparam(), atomic=True, must=True, high_prec=False)

    test_data = data.get_test()
    natoms = len(test_data["type"][0])
    nframes = test_data["box"].shape[0]
    numb_test = min(nframes, numb_test)

    coord = test_data["coord"][:numb_test].reshape([numb_test, -1])
    box = test_data["box"][:numb_test]
    if dp.has_efield:
        efield = test_data["efield"][:numb_test].reshape([numb_test, -1])
    else:
        efield = None
    if not data.pbc:
        box = None
    atype = test_data["type"][0]
    if dp.get_dim_fparam() > 0:
        fparam = test_data["fparam"][:numb_test]
    else:
        fparam = None
    if dp.get_dim_aparam() > 0:
        aparam = test_data["aparam"][:numb_test]
    else:
        aparam = None

    ret = dp.eval(
        coord,
        box,
        atype,
        fparam=fparam,
        aparam=aparam,
        atomic=has_atom_ener,
        efield=efield,
    )
    energy = ret[0]
    force = ret[1]
    virial = ret[2]
    energy = energy.reshape([numb_test, 1])
    force = force.reshape([numb_test, -1])
    virial = virial.reshape([numb_test, 9])
    if has_atom_ener:
        ae = ret[3]
        av = ret[4]
        ae = ae.reshape([numb_test, -1])
        av = av.reshape([numb_test, -1])

    rmse_e = rmse(energy - test_data["energy"][:numb_test].reshape([-1, 1]))
    rmse_f = rmse(force - test_data["force"][:numb_test])
    rmse_v = rmse(virial - test_data["virial"][:numb_test])
    rmse_ea = rmse_e / natoms
    rmse_va = rmse_v / natoms
    if has_atom_ener:
        rmse_ae = rmse(
            test_data["atom_ener"][:numb_test].reshape([-1]) - ae.reshape([-1])
        )

    # print ("# energies: %s" % energy)
    log.info(f"# number of test data : {numb_test:d} ")
    log.info(f"Energy RMSE        : {rmse_e:e} eV")
    log.info(f"Energy RMSE/Natoms : {rmse_ea:e} eV")
    log.info(f"Force  RMSE        : {rmse_f:e} eV/A")
    log.info(f"Virial RMSE        : {rmse_v:e} eV")
    log.info(f"Virial RMSE/Natoms : {rmse_va:e} eV")
    if has_atom_ener:
        log.info(f"Atomic ener RMSE   : {rmse_ae:e} eV")

    if detail_file is not None:
        detail_path = Path(detail_file)

        pe = np.concatenate(
            (
                np.reshape(test_data["energy"][:numb_test], [-1, 1]),
                np.reshape(energy, [-1, 1]),
            ),
            axis=1,
        )
        save_txt_file(
            detail_path.with_suffix(".e.out"),
            pe,
            header="%s: data_e pred_e" % system,
            append=append_detail,
        )
        pf = np.concatenate(
            (
                np.reshape(test_data["force"][:numb_test], [-1, 3]),
                np.reshape(force, [-1, 3]),
            ),
            axis=1,
        )
        save_txt_file(
            detail_path.with_suffix(".f.out"),
            pf,
            header="%s: data_fx data_fy data_fz pred_fx pred_fy pred_fz" % system,
            append=append_detail,
        )
        pv = np.concatenate(
            (
                np.reshape(test_data["virial"][:numb_test], [-1, 9]),
                np.reshape(virial, [-1, 9]),
            ),
            axis=1,
        )
        save_txt_file(
            detail_path.with_suffix(".v.out"),
            pv,
            header=f"{system}: data_vxx data_vxy data_vxz data_vyx data_vyy "
            "data_vyz data_vzx data_vzy data_vzz pred_vxx pred_vxy pred_vxz pred_vyx "
            "pred_vyy pred_vyz pred_vzx pred_vzy pred_vzz",
            append=append_detail,
        )
    return {
        "rmse_ea" : (rmse_ea, energy.size),
        "rmse_f" : (rmse_f, force.size),
        "rmse_va" : (rmse_va, virial.size),
    }
Example #49
0
def test_model(model, data_iterator, latest_model_path, num_columns: int = 2):
    model = model.eval().to(global_torch_device())

    inputs, labels = next(data_iterator)

    inputs = inputs.to(global_torch_device())
    labels = labels.to(global_torch_device())
    with torch.no_grad():
        pred = model(inputs)

    y_pred = pred.data.to("cpu").numpy()
    y_pred_max = numpy.argmax(y_pred, axis=-1)
    accuracy_w = accuracy_score(labels, y_pred_max)
    precision_a, recall_a, fscore_a, support_a = precision_recall_fscore_support(
        labels, y_pred_max)
    precision_w, recall_w, fscore_w, support_w = precision_recall_fscore_support(
        labels, y_pred_max, average="weighted")

    _, predicted = torch.max(pred, 1)

    truth_labels = labels.data.to("cpu").numpy()

    input_images_rgb = [
        default_torch_retransform(x) for x in inputs.to(global_torch_device())
    ]

    cell_width = (800 / num_columns) - 6 - 6 * 2

    pyplot.plot(numpy.random.random((3, 3)))

    alphabet = string.ascii_lowercase
    class_names = numpy.array([*alphabet])

    samples = len(y_pred)
    predictions = [[None for _ in range(num_columns)]
                   for _ in range(samples // num_columns)]
    for i, a, b, c in zip(range(samples), input_images_rgb, y_pred_max,
                          truth_labels):
        pyplot.imshow(a)
        if b == c:
            outcome = "tp"
        else:
            outcome = "fn"

        gd = ReportEntry(
            name=i,
            figure=plt_html(format="jpg", size=[cell_width, cell_width]),
            prediction=class_names[b],
            truth=class_names[c],
            outcome=outcome,
            explanation=None,
        )

        predictions[i // num_columns][i % num_columns] = gd

    confusion_matrix_plot(y_pred_max, truth_labels, class_names)

    title = "Classification Report"
    model_name = latest_model_path
    confusion_matrix = plt_html(format="png", size=[800, 800])

    accuracy = generate_math_html("\dfrac{tp+tn}{N}"), None, accuracy_w
    precision = generate_math_html(
        "\dfrac{tp}{tp+fp}"), precision_a, precision_w
    recall = generate_math_html("\dfrac{tp}{tp+fn}"), recall_a, recall_w
    f1_score = (
        generate_math_html("2*\dfrac{precision*recall}{precision+recall}"),
        fscore_a,
        fscore_w,
    )
    support = generate_math_html("N_{class_truth}"), support_a, support_w
    metrics = NOD.nod_of(accuracy, precision, f1_score, recall,
                         support).as_flat_tuples()

    bundle = NOD.nod_of(title, model_name, confusion_matrix, metrics,
                        predictions)

    file_name = Path(title.lower().replace(" ", "_"))

    generate_html(file_name.with_suffix(".html"), **bundle)
    generate_pdf(file_name.with_suffix(".html"), file_name.with_suffix(".pdf"))
Example #50
0
def test_polar(
    dp: "DeepPolar",
    data: DeepmdData,
    numb_test: int,
    detail_file: Optional[str],
    *,
    atomic: bool,
) -> Tuple[List[np.ndarray], List[int]]:
    """Test energy type model.

    Parameters
    ----------
    dp : DeepPot
        instance of deep potential
    data: DeepmdData
        data container object
    numb_test : int
        munber of tests to do
    detail_file : Optional[str]
        file where test details will be output
    global_polar : bool
        wheter to use glovbal version of polar potential

    Returns
    -------
    Tuple[List[np.ndarray], List[int]]
        arrays with results and their shapes
    """
    data.add(
        "polarizability" if not atomic else "atomic_polarizability",
        9,
        atomic=atomic,
        must=True,
        high_prec=False,
        type_sel=dp.get_sel_type(),
    )
    
    test_data = data.get_test()
    polar, numb_test, atype = run_test(dp, test_data, numb_test)

    sel_type = dp.get_sel_type()
    sel_natoms = 0
    for ii in sel_type:
        sel_natoms += sum(atype == ii)

    # YWolfeee: do summation in global polar mode
    if not atomic:
        polar = np.sum(polar.reshape((polar.shape[0],-1,9)),axis=1)    
        rmse_f = rmse(polar - test_data["polarizability"][:numb_test])
        rmse_fs = rmse_f / np.sqrt(sel_natoms)
        rmse_fa = rmse_f / sel_natoms
    else:
        rmse_f = rmse(polar - test_data["atomic_polarizability"][:numb_test])
    
    log.info(f"# number of test data : {numb_test:d} ")
    log.info(f"Polarizability  RMSE       : {rmse_f:e}")
    if not atomic:
        log.info(f"Polarizability  RMSE/sqrtN : {rmse_fs:e}")
        log.info(f"Polarizability  RMSE/N     : {rmse_fa:e}")
    log.info(f"The unit of error is the same as the unit of provided label.")

    if detail_file is not None:
        detail_path = Path(detail_file)

        pe = np.concatenate(
            (
                np.reshape(test_data["polarizability"][:numb_test], [-1, 9]),
                np.reshape(polar, [-1, 9]),
            ),
            axis=1,
        )
        np.savetxt(
            detail_path.with_suffix(".out"),
            pe,
            header="data_pxx data_pxy data_pxz data_pyx data_pyy data_pyz data_pzx "
            "data_pzy data_pzz pred_pxx pred_pxy pred_pxz pred_pyx pred_pyy pred_pyz "
            "pred_pzx pred_pzy pred_pzz",
        )
    return {
        "rmse" : (rmse_f, polar.size)
    }
Example #51
0
class GitRepo:
    def __init__(self, path, remote_url=None, branch_name='master', name=None):
        self.path = Path(path)
        self.path_str = str(self.path)
        self.remote_url = remote_url
        self.branch_name = branch_name
        db_latest_key = '%s:%s:%s' % (self.path_str, remote_url
                                      or '', branch_name)
        self.db_latest_key = sha256(db_latest_key.encode()).hexdigest()
        self.repo_name = name or self.path.name

    def git(self, *args):
        """Run a git command against the current repo"""
        curdir = os.getcwd()
        try:
            os.chdir(self.path_str)
            output = check_output((GIT, ) + args, stderr=STDOUT)
        finally:
            os.chdir(curdir)

        return force_str(output.strip())

    @property
    def current_hash(self):
        """The git revision ID (hash) of the current HEAD or None if no repo"""
        try:
            return self.git('rev-parse', 'HEAD')
        except (OSError, CalledProcessError):
            return None

    @property
    def current_commit_timestamp(self):
        """The UNIX timestamp of the latest commit"""
        try:
            return int(self.git('show', '-s', '--format=%ct', 'HEAD'))
        except (OSError, CalledProcessError, ValueError):
            return 0

    @property
    def last_updated(self):
        if self.current_commit_timestamp:
            latest_datetime = datetime.fromtimestamp(
                self.current_commit_timestamp)
            return timeago.format(latest_datetime)

        return 'unknown'

    def diff(self, start_hash, end_hash):
        """Return a 2 tuple: (modified files, deleted files)"""
        diff_out = StringIO(
            self.git('diff', '--name-status', start_hash, end_hash))
        return self._parse_git_status(diff_out)

    def modified_files(self):
        """Return a list of new or modified files according to git"""
        self.git('add', '.')
        status = StringIO(self.git('status', '--porcelain'))
        return self._parse_git_status(status)

    def _parse_git_status(self, lines):
        modified = set()
        removed = set()
        for line in lines:
            parts = line.split()
            # delete
            if parts[0] == 'D':
                removed.add(parts[1])
            # rename
            elif parts[0][0] == 'R':
                removed.add(parts[1])
                modified.add(parts[2])
            # everything else
            else:
                # some types (like copy) have two file entries
                for part in parts[1:]:
                    modified.add(part)

        return modified, removed

    def clone(self):
        """Clone the repo specified in the initial arguments"""
        if not self.remote_url:
            raise RuntimeError('remote_url required to clone')

        self.path.mkdir(parents=True, exist_ok=True)
        self.git('clone', '--depth', '1', '--branch', self.branch_name,
                 self.remote_url, '.')

    def reclone(self):
        """Safely get a fresh clone of the repo"""
        if self.path.exists():
            new_path = self.path.with_suffix(f'.{int(time())}')
            new_repo = GitRepo(new_path, self.remote_url, self.branch_name)
            new_repo.clone()
            # only remove the old after the new clone succeeds
            rmtree(self.path_str, ignore_errors=True)
            new_path.rename(self.path)
        else:
            self.clone()

    def pull(self):
        """Update the repo to the latest of the remote and branch

        Return the previous hash and the new hash."""
        old_hash = self.current_hash
        self.git('fetch', '-f', self.remote_url, self.branch_name)
        self.git('checkout', '-f', 'FETCH_HEAD')
        return old_hash, self.current_hash

    def update(self):
        """Updates a repo, cloning if necessary.

        :return a tuple of lists of modified and deleted files if updated, None if cloned
        """
        if self.path.is_dir():
            if not self.path.joinpath('.git').is_dir():
                rmtree(self.path_str, ignore_errors=True)
                self.clone()
            else:
                return self.pull()
        else:
            self.clone()

        return None, None

    def reset(self, new_head):
        self.git('reset', '--hard', new_head)

    def clean(self):
        self.git('clean', '-fd')

    def get_db_latest(self):
        try:
            return GitRepoState.objects.get(
                repo_id=self.db_latest_key).latest_ref
        except GitRepoState.DoesNotExist:
            return None

    def has_changes(self):
        return self.current_hash != self.get_db_latest()

    @property
    def clean_remote_url(self):
        repo_base = self.remote_url
        if repo_base.endswith('.git'):
            repo_base = repo_base[:-4]
        elif repo_base.endswith('/'):
            repo_base = repo_base[:-1]

        return repo_base

    def remote_url_auth(self, auth):
        url = self.clean_remote_url
        # remove https://
        url = url[8:]
        return f'https://{auth}@{url}'

    def set_db_latest(self, latest_ref=None):
        latest_ref = latest_ref or self.current_hash
        rs, created = GitRepoState.objects.get_or_create(
            repo_id=self.db_latest_key,
            defaults={
                'latest_ref': latest_ref,
                'repo_name': self.repo_name,
                'repo_url': self.clean_remote_url,
                'latest_ref_timestamp': self.current_commit_timestamp,
            },
        )
        if not created:
            rs.latest_ref = latest_ref
            rs.repo_name = self.repo_name
            rs.repo_url = self.clean_remote_url
            rs.latest_ref_timestamp = self.current_commit_timestamp
            rs.save()
Example #52
0
def test_dipole(
    dp: "DeepDipole",
    data: DeepmdData,
    numb_test: int,
    detail_file: Optional[str],
    atomic: bool,
) -> Tuple[List[np.ndarray], List[int]]:
    """Test energy type model.

    Parameters
    ----------
    dp : DeepPot
        instance of deep potential
    data: DeepmdData
        data container object
    numb_test : int
        munber of tests to do
    detail_file : Optional[str]
        file where test details will be output
    atomic : bool
        whether atomic dipole is provided

    Returns
    -------
    Tuple[List[np.ndarray], List[int]]
        arrays with results and their shapes
    """
    data.add(
        "dipole" if not atomic else "atomic_dipole",
        3, 
        atomic=atomic, 
        must=True, 
        high_prec=False, 
        type_sel=dp.get_sel_type()
    )
    test_data = data.get_test()
    dipole, numb_test, atype = run_test(dp, test_data, numb_test)

    sel_type = dp.get_sel_type()
    sel_natoms = 0
    for ii in sel_type:
        sel_natoms += sum(atype == ii)
    
    # do summation in atom dimension
    if not atomic:
        dipole = np.sum(dipole.reshape((dipole.shape[0], -1, 3)),axis=1)
        rmse_f = rmse(dipole - test_data["dipole"][:numb_test])
        rmse_fs = rmse_f / np.sqrt(sel_natoms)
        rmse_fa = rmse_f / sel_natoms
    else:
        rmse_f = rmse(dipole - test_data["atomic_dipole"][:numb_test])
    
    log.info(f"# number of test data : {numb_test:d}")
    log.info(f"Dipole  RMSE       : {rmse_f:e}")
    if not atomic:
        log.info(f"Dipole  RMSE/sqrtN : {rmse_fs:e}")
        log.info(f"Dipole  RMSE/N     : {rmse_fa:e}")
    log.info(f"The unit of error is the same as the unit of provided label.")

    if detail_file is not None:
        detail_path = Path(detail_file)

        pe = np.concatenate(
            (
                np.reshape(test_data["dipole"][:numb_test], [-1, 3]),
                np.reshape(dipole, [-1, 3]),
            ),
            axis=1,
        )
        np.savetxt(
            detail_path.with_suffix(".out"),
            pe,
            header="data_x data_y data_z pred_x pred_y pred_z",
        )
    return {
        'rmse' : (rmse_f, dipole.size)
    }
Example #53
0
def _create(name,
            pretrained=True,
            channels=3,
            classes=80,
            autoshape=True,
            verbose=True,
            device=None):
    """Creates or loads a YOLOv5 model

    Arguments:
        name (str): model name 'yolov5s' or path 'path/to/best.pt'
        pretrained (bool): load pretrained weights into the model
        channels (int): number of input channels
        classes (int): number of model classes
        autoshape (bool): apply YOLOv5 .autoshape() wrapper to model
        verbose (bool): print all information to screen
        device (str, torch.device, None): device to use for model parameters

    Returns:
        YOLOv5 model
    """
    from pathlib import Path

    from models.common import AutoShape, DetectMultiBackend
    from models.yolo import Model
    from utils.downloads import attempt_download
    from utils.general import LOGGER, check_requirements, intersect_dicts, logging
    from utils.torch_utils import select_device

    if not verbose:
        LOGGER.setLevel(logging.WARNING)

    check_requirements(exclude=('tensorboard', 'thop', 'opencv-python'))
    name = Path(name)
    path = name.with_suffix(
        '.pt') if name.suffix == '' else name  # checkpoint path
    try:
        device = select_device(('0' if torch.cuda.is_available() else 'cpu'
                                ) if device is None else device)

        if pretrained and channels == 3 and classes == 80:
            model = DetectMultiBackend(
                path, device=device)  # download/load FP32 model
            # model = models.experimental.attempt_load(path, map_location=device)  # download/load FP32 model
        else:
            cfg = list(
                (Path(__file__).parent /
                 'models').rglob(f'{path.stem}.yaml'))[0]  # model.yaml path
            model = Model(cfg, channels, classes)  # create model
            if pretrained:
                ckpt = torch.load(attempt_download(path),
                                  map_location=device)  # load
                csd = ckpt['model'].float().state_dict(
                )  # checkpoint state_dict as FP32
                csd = intersect_dicts(csd,
                                      model.state_dict(),
                                      exclude=['anchors'])  # intersect
                model.load_state_dict(csd, strict=False)  # load
                if len(ckpt['model'].names) == classes:
                    model.names = ckpt[
                        'model'].names  # set class names attribute
        if autoshape:
            model = AutoShape(model)  # for file/URI/PIL/cv2/np inputs and NMS
        return model.to(device)

    except Exception as e:
        help_url = 'https://github.com/ultralytics/yolov5/issues/36'
        s = f'{e}. Cache may be out of date, try `force_reload=True` or see {help_url} for help.'
        raise Exception(s) from e
Example #54
0
    def _load_v13(file_prefix: str, loc: bool, tracks: bool,
                  segment_images: bool, flatfield: bool,
                  filtered_file_prefix) -> Dict:
        """Load data to a dictionary (v013 format)

        Parameters
        ----------
        file_prefix
            Prefix used for saving via :py:meth:`save`.
        loc
            Whether to load localization data.
        tracks
            Whether to load tracking data.
        cell_images
            Whether to load cell images.
        flatfield
            Whether to load flatfield corrections.
        filtered_file_prefix
            Prefix used for saving analyzed and filtered data in v013 format,
            i.e. prefix passed to the ``save()`` method in the Analysis
            notebook.

        Returns
        -------
            Dictionary of loaded data and settings.
        """
        infile = Path(f"{file_prefix}-v013")
        with infile.with_suffix(".yaml").open() as f:
            ret = io.yaml.safe_load(f)

        ret["data_dir"] = Path(ret.get("data_dir", ""))
        ret["localizations"] = {}
        ret["tracks"] = {}
        ret["segment_images"] = defaultdict(dict)
        ret["flatfield"] = {}

        all_src = {**ret["sources"], **ret["special_sources"]}

        do_load = []
        if loc:
            do_load.append((ret["localizations"], "_loc"))
        if tracks:
            do_load.append((ret["tracks"], "_trc"))
        if len(do_load):
            with pd.HDFStore(infile.with_suffix(".h5"), "r") as s:
                for sink, suffix in do_load:
                    keys = (k for k in s.keys() if k.endswith(suffix))
                    for k in keys:
                        new_key = k[1:-len(suffix)]
                        loaded = s[k]
                        src = all_src[new_key]
                        fname_map = pd.Series(src.keys(), index=src.values())
                        loaded.index = loaded.index.set_levels(
                            fname_map[loaded.index.levels[0]], level=0)
                        if suffix == "_trc":
                            # Restore categorical exc_type. See comment in
                            # `save` method for details.
                            loaded = loaded.astype({
                                ("fret", "exc_type"):
                                "category"
                            })
                        sink[new_key] = loaded

        if segment_images:
            seg_img_file = infile.with_suffix(".cell_img.npz")
            # Map file names to dataset IDs
            fname_map = {
                fname: (did, fid)
                for did, src in all_src.items() for fid, fname in src.items()
            }
            try:
                with np.load(seg_img_file) as data:
                    ci = dict(data)
                for k, v in ci.items():
                    k_split = k.split("\n")
                    if len(k_split) == 1:
                        new_k = k_split[0]
                    else:
                        new_k = tuple(k_split)
                    did, fid = fname_map[new_k]
                    ret["segment_images"][did][fid] = v
            except Exception as e:
                warnings.warn("Could not load segmentation images from file "
                              f"\"{str(seg_img_file)}\" ({e}).")
        if flatfield:
            flatfield_glob = str(infile.with_suffix(".flat_*.npz"))
            key_re = re.compile(r"^\.flat_([\w\s-]+)")
            for p in Path().glob(flatfield_glob):
                m = key_re.match(p.suffixes[-2])
                if m is None:
                    warnings.warn("Could not load flatfield corrector from "
                                  f"{str(p)}.")
                else:
                    ret["flatfield"][m.group(1)] = _flatfield.Corrector.load(p)

        return ret
Example #55
0
def main(iwavepath: Path, owavepath: Path) -> None:
    with wave.open(str(iwavepath, "r")) as iwave:
        pre = ()
    post = ()
    with wave.open(str(owavepath.with_suffix(".wav")), "w") as owave:
        pass
Example #56
0
    def _load_v14(file_prefix: str, loc: bool, tracks: bool,
                  segment_images: bool, flatfield: bool) -> Dict:
        """Load data to a dictionary (v014 format)

        Parameters
        ----------
        file_prefix
            Prefix used for saving via :py:meth:`save`.
        loc
            Whether to load localization data.
        tracks
            Whether to load tracking data.
        segment_images
            Whether to load segmentation images.
        flatfield
            Whether to load flatfield corrections.

        Returns
        -------
            Dictionary of loaded data and settings.
        """
        infile = Path(f"{file_prefix}-v014")
        with infile.with_suffix(".yaml").open() as f:
            ret = io.yaml.safe_load(f)

        if "data_dir" in ret:
            ret["data_dir"] = Path(ret["data_dir"])

        if loc:
            ret["localizations"] = {}
            ret["special_localizations"] = {}
            with pd.HDFStore(infile.with_suffix(".loc.h5"), "r") as s:
                for k in s.keys():
                    loaded = s.get(k)
                    if k.startswith("/locs/"):
                        ret["localizations"][k[6:]] = loaded
                    elif k.startswith("/special_locs/"):
                        ret["special_localizations"][k[14:]] = loaded
        if tracks:
            ret["tracks"] = {}
            ret["special_tracks"] = {}
            with pd.HDFStore(infile.with_suffix(".tracks.h5"), "r") as s:
                for k in s.keys():
                    # Restore categorical exc_type. See comment in
                    # `save` method for details.
                    loaded = s.get(k).astype({
                        ("fret", "exc_type"): "category"
                    })
                    if k.startswith("/tracks/"):
                        ret["tracks"][k[8:]] = loaded
                    elif k.startswith("/special_tracks/"):
                        ret["special_tracks"][k[16:]] = loaded
        if segment_images:
            ret["segment_images"] = defaultdict(dict)
            seg_img_file = infile.with_suffix(".seg_img.npz")
            try:
                with np.load(seg_img_file) as data:
                    ci = dict(data)
                for k, v in ci.items():
                    split_idx = k.rfind("/")
                    k1 = k[:split_idx]
                    k2 = int(k[split_idx + 1:])
                    ret["segment_images"][k1][k2] = v
            except Exception as e:
                warnings.warn("Could not load segmentation images from file "
                              f"\"{str(seg_img_file)}\" ({e}).")
        if flatfield:
            ret["flatfield"] = {}
            flatfield_glob = str(infile.with_suffix(".flat_*.npz"))
            key_re = re.compile(r"^\.flat_([\w\s-]+)")
            for p in Path().glob(flatfield_glob):
                m = key_re.match(p.suffixes[-2])
                if m is None:
                    warnings.warn("Could not load flatfield corrector from "
                                  f"{str(p)}.")
                else:
                    ret["flatfield"][m.group(1)] = _flatfield.Corrector.load(p)

        return ret
Example #57
0
print(type(Path('/usr/local/etc/mongod.conf').parent))

print(Path('/').joinpath('home', 'yangkai', 'zhihu'))
print(Path.exists(Path('~/lyanna').expanduser() / 'config.py'))
print(
    Path.exists(
        Path('~/Documents').expanduser() / 'code' / 'leetcode' / 'README.txt'))

p = Path('/Users/dongweiming/test.txt')
print(p.parent.parent)
print()
print(p.parents[0])
print(p.parents[1])
print(p.parents[2])

print(p.suffix, p.stem)
print(p.suffixes, p.stem)

Path('new.txt').touch()

p = Path('./test.txt')
p.write_text('456\n')
print(p.read_text())

p = Path('/home/gentoo/screenshot/abc.jpg')
print(p.with_suffix('.png'))
print(p.with_name(f'123{p.suffix}'))

Path('1/2/3').mkdir(parents=True, exist_ok=True)
print(Path('1').owner())
Example #58
0
    def save(self, file_prefix: str = "tracking", mode: str = "write"):
        """Save data to disk

        This will save attributes to disk.

        Parameters
        ----------
        file_prefix
            Common file_prefix for all files written by this method. It will be
            suffixed by the output format version (v{version}) and file
            extensions corresponding to what is saved in each file.
        mode
            Use "write" to delete previously existing files (which contain
            localization and tracking data) and write a new ones. As a result,
            only the current data will end up in the file. Use "update" to
            add or modify data without deleting anything not present in this
            instance.
            """
        outfile = Path(f"{file_prefix}-v014")
        data = self.__dict__.copy()
        file_mode = "w" if mode == "write" else "a"

        with warnings.catch_warnings():
            import tables
            warnings.simplefilter("ignore", tables.NaturalNameWarning)

            with pd.HDFStore(outfile.with_suffix(".loc.h5"), file_mode) as s:
                for key, loc in data.pop("localizations", {}).items():
                    s.put(f"/locs/{key}", loc)
                for key, loc in data.pop("special_localizations", {}).items():
                    s.put(f"/special_locs/{key}", loc)
            with pd.HDFStore(outfile.with_suffix(".tracks.h5"),
                             file_mode) as s:
                for key, trc in data.pop("tracks", {}).items():
                    # Categorical exc_type does not allow for storing in fixed
                    # format while multiindex for both rows and columns does
                    # not work with table format…
                    s.put(f"/tracks/{key}",
                          trc.astype({("fret", "exc_type"): str}))
                for key, trc in data.pop("special_tracks", {}).items():
                    s.put(f"/special_tracks/{key}",
                          trc.astype({("fret", "exc_type"): str}))

        if mode == "write":
            old = {}
            seg = {}
        else:
            old = self.load(file_prefix,
                            loc=False,
                            tracks=False,
                            segment_images="segment_images" in data,
                            flatfield=False).__dict__.copy()
            seg = old.pop("segment_images", {})

        if "segment_images" in data:
            for k, v in data.pop("segment_images").items():
                for k2, v2 in v.items():
                    seg[f"{k}/{k2}"] = v2
            np.savez_compressed(outfile.with_suffix(".seg_img.npz"), **seg)
        if "flatfield" in data:
            if mode == "write":
                ffiles = io.get_files(fr"^{outfile}\.flat_([\w\s-]+)\.npz$")[0]
                for f in ffiles:
                    Path(f).unlink()
            for k, ff in data.pop("flatfield").items():
                ff.save(outfile.with_suffix(f".flat_{k}.npz"))
        old.update(data)
        if "data_dir" in old:
            old["data_dir"] = str(old["data_dir"])
        with outfile.with_suffix(".yaml").open("w") as f:
            io.yaml.safe_dump(old, f)
Example #59
0
    def get_path(self, compiler: str,
                 env: environment.Environment) -> T.Optional[Path]:
        p = Path(self.path)
        canonical_compiler = compiler
        if ((compiler in ['clang-cl', 'intel-cl'])
                or (env.machines.host.is_windows()
                    and compiler in {'pgi', 'dmd', 'ldc'})):
            canonical_compiler = 'msvc'

        has_pdb = False
        if self.language in {'c', 'cpp'}:
            has_pdb = canonical_compiler == 'msvc'
        elif self.language == 'd':
            # dmd's optlink does not genearte pdb iles
            has_pdb = env.coredata.compilers.host['d'].linker.id in {
                'link', 'lld-link'
            }

        # Abort if the platform does not match
        matches = {
            'msvc': canonical_compiler == 'msvc',
            'gcc': canonical_compiler != 'msvc',
            'cygwin': env.machines.host.is_cygwin(),
            '!cygwin': not env.machines.host.is_cygwin(),
        }.get(self.platform or '', True)
        if not matches:
            return None

        # Handle the different types
        if self.typ == 'file':
            return p
        elif self.typ == 'shared_lib':
            if env.machines.host.is_windows() or env.machines.host.is_cygwin():
                # Windows only has foo.dll and foo-X.dll
                if len(self.version) > 1:
                    return None
                if self.version:
                    p = p.with_name('{}-{}'.format(p.name, self.version[0]))
                return p.with_suffix('.dll')

            p = p.with_name('lib{}'.format(p.name))
            if env.machines.host.is_darwin():
                # MacOS only has libfoo.dylib and libfoo.X.dylib
                if len(self.version) > 1:
                    return None

                # pathlib.Path.with_suffix replaces, not appends
                suffix = '.dylib'
                if self.version:
                    suffix = '.{}{}'.format(self.version[0], suffix)
            else:
                # pathlib.Path.with_suffix replaces, not appends
                suffix = '.so'
                if self.version:
                    suffix = '{}.{}'.format(suffix, '.'.join(self.version))
            return p.with_suffix(suffix)
        elif self.typ == 'exe':
            if env.machines.host.is_windows() or env.machines.host.is_cygwin():
                return p.with_suffix('.exe')
        elif self.typ == 'pdb':
            if self.version:
                p = p.with_name('{}-{}'.format(p.name, self.version[0]))
            return p.with_suffix('.pdb') if has_pdb else None
        elif self.typ == 'implib' or self.typ == 'implibempty':
            if env.machines.host.is_windows() and canonical_compiler == 'msvc':
                # only MSVC doesn't generate empty implibs
                if self.typ == 'implibempty' and compiler == 'msvc':
                    return None
                return p.parent / (re.sub(r'^lib', '', p.name) + '.lib')
            elif env.machines.host.is_windows() or env.machines.host.is_cygwin(
            ):
                return p.with_suffix('.dll.a')
            else:
                return None
        elif self.typ == 'expr':
            return Path(
                platform_fix_name(p.as_posix(), canonical_compiler, env))
        else:
            raise RuntimeError('Invalid installed file type {}'.format(
                self.typ))

        return p
                     required=True)

if __name__ == '__main__':
    try:
        # Get raw string output and convert to Python dict
        process_output = subprocess.run(COMMAND,
                                        check=True,
                                        encoding='utf-8',
                                        stdout=subprocess.PIPE).stdout
        output_as_dict = json.loads(process_output)

        # Verify dict schema
        validate_with_humanized_errors(output_as_dict, JSON_SCHEMA)

        # Write data to a temp file, atomically rewrite the the IP ranges file
        temp_file_path = IP_RANGES_FILE.with_suffix('.tmp')
        with temp_file_path.open(mode='w') as temp_file:
            temp_file.write('\n'.join(i['network'] for i in output_as_dict))

        temp_file_path.rename(IP_RANGES_FILE)

    except subprocess.CalledProcessError as cpe:
        sys.exit('An error occurred while executing the bloxtool command.')

    except json.JSONDecodeError as jde:
        sys.exit('An error occurred parsing the bloxtool output as JSON.')

    except VoluptuousInvalid as vi:
        sys.exit(
            'The JSON data from bloxtool does not match the required schema.')