Example #1
0
class MatstractSearch:
    """The class running all search queries"""
    def __init__(self):
        self._ac = AtlasConnection(db="production")
        self._ec = ElasticConnection()
        self.filters = []

    def search(self, text='', materials=(), max_results=1000):
        if materials is not None:
            max_results = 10000
        print("searching for {} and {}".format(text, materials))
        pipeline = list()
        if materials:
            self.material_filter = MaterialFilter(materials)
            for cond in self.material_filter.conditions:
                pipeline.append(cond)
            pipeline.append({
                "$lookup": {
                    "from": "abstracts",
                    "localField": "doi",
                    "foreignField": "doi",
                    "as": "abstracts"
                }
            })
            pipeline.append({"$match": {"abstracts": {"$ne": []}}})
            pipeline.append({"$unwind": "$abstracts"})
            pipeline.append({
                "$project": {
                    "_id": "$abstracts._id",
                    "doi": 1,
                    "abstract": "$abstracts.abstract",
                    "year": "$abstracts.year",
                    "authors": "$abstracts.authors",
                    "title": "$abstracts.title",
                    "journal": "$abstracts.journal",
                    "link": "$abstracts.link",
                    "chem_mentions": "$unique_mats"
                }
            })
            pipeline.append({"$project": {"abstracts": 0}})
            pipeline.append({"$limit": max_results})
        if text:
            ids = self._ec.query(text, max_results=max_results)
            self.document_filter = DocumentFilter(ids)
            if not materials or not len(materials):
                return self._ac.get_documents_by_id(ids)
            for cond in self.document_filter.conditions:
                pipeline.append(cond)
        return self._ac.db.mats_.aggregate(pipeline)

    def more_like_this(self, text='', materials=(), max_results=100):
        if text is None or text == '':
            return None

        query = {
            "query": {
                "more_like_this": {
                    "fields": ['title', 'abstract'],
                    "like": text
                }
            }
        }
        hits = self._ec.search(index="tri_abstracts",
                               body=query,
                               size=max_results,
                               request_timeout=60)["hits"]["hits"]
        ids = [ObjectId(h["_id"]) for h in hits]
        return self._ac.get_documents_by_id(ids)
Example #2
0
class MatstractSearch:
    """The class running all search queries"""

    VALID_FILTERS = [
        "material", "property", "application", "descriptor",
        "characterization", "synthesis", "phase"
    ]
    FILTER_DICT = {
        "material": "MAT",
        "property": "PRO",
        "application": "APL",
        "descriptor": "DSC",
        "characterization": "CMT",
        "synthesis": "SMT",
        "phase": "SPL",
    }

    def __init__(self, local=False):
        self._ac = AtlasConnection(db="production", local=local)
        self._ec = ElasticConnection()
        self.filters = []

    def search(self,
               text=None,
               materials=None,
               max_results=1000,
               filters=None):
        print("searching for '{}' and {}".format(text, filters))
        pipeline = list()
        if filters:
            for f in filters:
                if f is not None:
                    search_filter = SearchFilter(
                        filter_type=self.FILTER_DICT[f[0]],
                        values=f[1].split(","))
                    for cond in search_filter.conditions:
                        pipeline.append(cond)
            pipeline.append({
                "$lookup": {
                    "from": "abstracts",
                    "localField": "doi",
                    "foreignField": "doi",
                    "as": "abstracts"
                }
            })
            pipeline.append({"$match": {"abstracts": {"$ne": []}}})
            pipeline.append({"$unwind": "$abstracts"})
            pipeline.append({
                "$project": {
                    "_id": "$abstracts._id",
                    "doi": 1,
                    "abstract": "$abstracts.abstract",
                    "year": "$abstracts.year",
                    "authors": "$abstracts.authors",
                    "title": "$abstracts.title",
                    "journal": "$abstracts.journal",
                    "link": "$abstracts.link",
                    "chem_mentions": "$unique_mats"
                }
            })
            pipeline.append({"$project": {"abstracts": 0}})
        elif materials:  # if filters are supplied don't look at materials
            for material in materials:
                if material is not None:
                    material_filter = MaterialFilter(material.split(","))
                    for cond in material_filter.conditions:
                        pipeline.append(cond)
            pipeline.append({
                "$lookup": {
                    "from": "abstracts",
                    "localField": "doi",
                    "foreignField": "doi",
                    "as": "abstracts"
                }
            })
            pipeline.append({"$match": {"abstracts": {"$ne": []}}})
            pipeline.append({"$unwind": "$abstracts"})
            pipeline.append({
                "$project": {
                    "_id": "$abstracts._id",
                    "doi": 1,
                    "abstract": "$abstracts.abstract",
                    "year": "$abstracts.year",
                    "authors": "$abstracts.authors",
                    "title": "$abstracts.title",
                    "journal": "$abstracts.journal",
                    "link": "$abstracts.link",
                    "chem_mentions": "$unique_mats"
                }
            })
            pipeline.append({"$project": {"abstracts": 0}})
        if len(pipeline) > 0:
            results = self._ac.db.ne_071018.aggregate(pipeline)
            ids = [str(entry["_id"]) for entry in results]
        else:
            ids = None
        if text and (ids is None or len(ids) > 0):
            ids = self._ec.query(text, ids=ids, max_results=max_results)
        return self._ac.get_documents_by_id(ids)

    def more_like_this(self, text='', materials=(), max_results=100):
        if text is None or text == '':
            return None

        query = {
            "query": {
                "more_like_this": {
                    "fields": ['title', 'abstract'],
                    "like": text
                }
            }
        }
        hits = self._ec.search(index="tri_abstracts",
                               body=query,
                               size=max_results,
                               request_timeout=60)["hits"]["hits"]
        ids = [ObjectId(h["_id"]) for h in hits]
        return self._ac.get_documents_by_id(ids)