def main(args): from cifpdfsearch.cifpdf import HDFStorage from cifpdfsearch import config, normcodid from cifpdfsearch._utils import getargswithstdin if args.config: config.initialize(args.config) npyfiles = list(getargswithstdin(args.args)) filename = args.output if args.output is not None else config.PDFSTORAGE hdb = HDFStorage(filename) # TODO - add check for existing config in HDFStorage hdb.writeConfig(config.PDFCALCULATOR) r = hdb.rgrid progress_step = len(npyfiles) / (80.0 + 1) progress_next = 0 for i, f in enumerate(npyfiles): if os.path.getsize(f) == 0: continue g = numpy.load(f) codid = normcodid(os.path.basename(f)) hdb.writePDF(codid, r, g) if i >= progress_next: print('.', end='', flush=True) progress_next += progress_step print() return
def codsearch_composition(composition, tolerance): from diffpy.pdfgetx.functs import composition_analysis from elasticsearch import Elasticsearch from elasticsearch.helpers import scan es = Elasticsearch(ELASTICHOST) smbls, counts = composition_analysis(composition) if tolerance == 0: mustterms = [{ 'term': { ("composition." + s): c } } for s, c in zip(smbls, counts)] q = {"bool": {"must": mustterms}} else: rangeterms = [{ 'range': { ("composition." + s): { "gte": c - tolerance, "lte": c + tolerance, } } } for s, c in zip(smbls, counts)] q = {"bool": {"must": rangeterms}} gscan = scan(es, query={'query': q}, index='cod', doc_type='cif', _source=False) for e in gscan: codid = normcodid(e['_id']) yield codid pass
def readPDF(self, codid): scid = normcodid(codid) dsname = self._dspdfpath.format(scid) with self._openhdf('r') as hfile: g = hfile[dsname][()] rv = (self.rgrid, g) return rv
def main(args): from cifpdfsearch import config, cifpdf, normcodid from cifpdfsearch._utils import getargswithstdin from numpy.random import randint if args.config: config.initialize(args.config) if not os.path.isdir(args.output): emsg = "{} must be a directory".format(args.output) raise ValueError(emsg) ciflist = list(getargswithstdin(args.cifs)) pdfc = cifpdf.calculator.fromConfig(config.PDFCALCULATOR) while ciflist: cf = ciflist.pop(randint(len(ciflist))) codid = normcodid(cf) out = os.path.join(args.output, 'cod{}.npy'.format(codid)) if not args.force and os.path.isfile(out): continue # create the file open(out, 'w').close() try: r, g = calculate(pdfc, cf) except Exception as e: print('{}: {} {}'.format(cf, type(e).__name__, e)) continue numpy.save(out, g.astype(numpy.float32)) return
def readPDF(self, codid): cid = codid if isinstance(cid, str): cid = int(normcodid(cid)) row = self.index[cid] g = self.gdata[row] rv = (self.rgrid, g) return rv
def writePDF(self, codid, r, g): from numpy import allclose if r.shape != self.rgrid.shape or not allclose(r, self.rgrid): emsg = "r must equal the {} dataset".format(self._dsrgridpath) raise ValueError(emsg) scid = normcodid(codid) nm = self._dspdfpath.format(scid) with self._openhdf('a') as hfile: ds = hfile.require_dataset(nm, shape=g.shape, dtype=self.dtype) ds[:] = g return
def genidpdf_all_hdf(hfile): grp = hfile['pdfc'] for n, v in grp.items(): yield normcodid(n), v[()] pass