コード例 #1
0
ファイル: cifpdfsearch.py プロジェクト: pavoljuhas/ciflastic
def codsearch_composition(composition, tolerance):
    from diffpy.pdfgetx.functs import composition_analysis
    from elasticsearch import Elasticsearch
    from elasticsearch.helpers import scan
    es = Elasticsearch(ELASTICHOST)
    smbls, counts = composition_analysis(composition)
    if tolerance == 0:
        mustterms = [{
            'term': {
                ("composition." + s): c
            }
        } for s, c in zip(smbls, counts)]
        q = {"bool": {"must": mustterms}}
    else:
        rangeterms = [{
            'range': {
                ("composition." + s): {
                    "gte": c - tolerance,
                    "lte": c + tolerance,
                }
            }
        } for s, c in zip(smbls, counts)]
        q = {"bool": {"must": rangeterms}}
    gscan = scan(es,
                 query={'query': q},
                 index='cod',
                 doc_type='cif',
                 _source=False)
    for e in gscan:
        codid = normcodid(e['_id'])
        yield codid
    pass
コード例 #2
0
def normalized_formula(formula):
    from diffpy.pdfgetx.functs import composition_analysis
    smbls, counts = composition_analysis(formula)
    totalcount = sum(counts)
    rv = dict.fromkeys(smbls, 0.0)
    for s, c in zip(smbls, counts):
        rv[s] += c
    for s in rv:
        rv[s] /= totalcount
    return rv
コード例 #3
0
ファイル: cifpdfsearch.py プロジェクト: pavoljuhas/ciflastic
def cifsearch(q=None, composition=None, tol=None, fields=None, **kwargs):
    """
    Execute search for CIF structures using Lucene query string syntax.

    Parameters
    ----------
    q : str, optional
        The string search query in Lucene syntax.
    query : dict, optional, keyword-only
        The search definition using the Query DSL.
    composition : str, optional
        Normalized chemical stoichiometry to be matched.
    tol : float, optional
        Maximum allowed difference from stoichiometry.
    fields : list or str, optional
        Name of CIF fields to be returned.
    kwargs : misc, optional
        Extra arguments passed to the `Elasticsearch.search` function.

    Returns
    -------
    databroker.Results
        Iterable object encapsulating the matching databroker Headers.
    """
    from diffpy.pdfgetx.functs import composition_analysis
    from elasticsearch import Elasticsearch
    es = Elasticsearch(ELASTICHOST)
    kw = dict(q=q, index='cod')
    if 'query' in kwargs:
        kw['body'] = kwargs.pop('query')
    if composition:
        smbls, counts = composition_analysis(composition)
        if not tol:
            mustterms = [{
                'term': {
                    ("composition." + s): c
                }
            } for s, c in zip(smbls, counts)]
            cq = {"bool": {"must": mustterms}}
        else:
            rangeterms = [{
                'range': {
                    ("composition." + s): {
                        "gte": c - tol,
                        "lte": c + tol,
                    }
                }
            } for s, c in zip(smbls, counts)]
            cq = {"bool": {"must": rangeterms}}
        kw['body'] = {'query': cq}
    kw.update(**kwargs)
    if isinstance(fields, str):
        fields = fields.replace(',', ' ').split()
    if fields:
        kw['_source'] = fields
    res = es.search(**kw)
    rv = res
    if fields:
        rv = [
            tuple(hit['_source'].get(n) for n in fields)
            for hit in res['hits']['hits']
        ]
        if len(fields) == 1:
            rv = [x[0] for x in rv]
    return rv