Example #1
0
 def __init__(self):
     self._ac = AtlasConnection(db="production")
     self._ec = ElasticConnection()
     self.filters = []
Example #2
0
class MatstractSearch:
    """The class running all search queries"""
    def __init__(self):
        self._ac = AtlasConnection(db="production")
        self._ec = ElasticConnection()
        self.filters = []

    def search(self, text='', materials=(), max_results=1000):
        if materials is not None:
            max_results = 10000
        print("searching for {} and {}".format(text, materials))
        pipeline = list()
        if materials:
            self.material_filter = MaterialFilter(materials)
            for cond in self.material_filter.conditions:
                pipeline.append(cond)
            pipeline.append({
                "$lookup": {
                    "from": "abstracts",
                    "localField": "doi",
                    "foreignField": "doi",
                    "as": "abstracts"
                }
            })
            pipeline.append({"$match": {"abstracts": {"$ne": []}}})
            pipeline.append({"$unwind": "$abstracts"})
            pipeline.append({
                "$project": {
                    "_id": "$abstracts._id",
                    "doi": 1,
                    "abstract": "$abstracts.abstract",
                    "year": "$abstracts.year",
                    "authors": "$abstracts.authors",
                    "title": "$abstracts.title",
                    "journal": "$abstracts.journal",
                    "link": "$abstracts.link",
                    "chem_mentions": "$unique_mats"
                }
            })
            pipeline.append({"$project": {"abstracts": 0}})
            pipeline.append({"$limit": max_results})
        if text:
            ids = self._ec.query(text, max_results=max_results)
            self.document_filter = DocumentFilter(ids)
            if not materials or not len(materials):
                return self._ac.get_documents_by_id(ids)
            for cond in self.document_filter.conditions:
                pipeline.append(cond)
        return self._ac.db.mats_.aggregate(pipeline)

    def more_like_this(self, text='', materials=(), max_results=100):
        if text is None or text == '':
            return None

        query = {
            "query": {
                "more_like_this": {
                    "fields": ['title', 'abstract'],
                    "like": text
                }
            }
        }
        hits = self._ec.search(index="tri_abstracts",
                               body=query,
                               size=max_results,
                               request_timeout=60)["hits"]["hits"]
        ids = [ObjectId(h["_id"]) for h in hits]
        return self._ac.get_documents_by_id(ids)
Example #3
0
import dash_html_components as html
import dash_core_components as dcc
import pandas as pd
from matstract.models.database import AtlasConnection, ElasticConnection
from matstract.extract import parsing
from matstract.models.search import MatstractSearch
import re

db = AtlasConnection(db="production").db
client = ElasticConnection()


def highlight_material(body, material):
    highlighted_phrase = html.Mark(material)
    if len(material) > 0 and material in body:
        chopped = body.split(material)
        newtext = []
        for piece in chopped[:-1]:
            newtext.append(piece)
            newtext.append(highlighted_phrase)
        newtext.append(chopped[-1])
        return newtext
    return body


def highlight_multiple_materials(body, materials):
    if len(materials) > 0 and any([material in body for material in materials]):
        newtext = []
        for material in materials:
            highlighted_phrase = html.Mark(material)
            if len(newtext) > 0:
Example #4
0
from dash.dependencies import Input, Output, State
from matstract.web.view import keyword_app
from matstract.models.database import AtlasConnection, ElasticConnection
from matstract.web.view.similar_app import random_abstract

db = AtlasConnection().db
es = ElasticConnection()


def bind(app):
    @app.callback(Output('keywords-extrated',
                         'children'), [Input('keyword-button', 'n_clicks')],
                  [State('keyword-material', 'value')])
    def keywords_table(n_clicks, text):
        if text is not None and text != '':
            return keyword_app.get_keywords(text)
        else:
            return ""

    @app.callback(Output('themes-textarea', 'value'),
                  [Input('themes-random-abstract', 'n_clicks')])
    def fill_random(n_clicks):
        print("filling random")
        return random_abstract()

    @app.callback(Output('themes-extrated',
                         'children'), [Input('themes-button', 'n_clicks')],
                  [State('themes-textarea', 'value')])
    def themes(n_clicks, text):
        if text is not None and text != '':
            return keyword_app.get_themes(text, es)
Example #5
0
class MatstractSearch:
    """The class running all search queries"""

    VALID_FILTERS = [
        "material", "property", "application", "descriptor",
        "characterization", "synthesis", "phase"
    ]
    FILTER_DICT = {
        "material": "MAT",
        "property": "PRO",
        "application": "APL",
        "descriptor": "DSC",
        "characterization": "CMT",
        "synthesis": "SMT",
        "phase": "SPL",
    }

    def __init__(self, local=False):
        self._ac = AtlasConnection(db="production", local=local)
        self._ec = ElasticConnection()
        self.filters = []

    def search(self,
               text=None,
               materials=None,
               max_results=1000,
               filters=None):
        print("searching for '{}' and {}".format(text, filters))
        pipeline = list()
        if filters:
            for f in filters:
                if f is not None:
                    search_filter = SearchFilter(
                        filter_type=self.FILTER_DICT[f[0]],
                        values=f[1].split(","))
                    for cond in search_filter.conditions:
                        pipeline.append(cond)
            pipeline.append({
                "$lookup": {
                    "from": "abstracts",
                    "localField": "doi",
                    "foreignField": "doi",
                    "as": "abstracts"
                }
            })
            pipeline.append({"$match": {"abstracts": {"$ne": []}}})
            pipeline.append({"$unwind": "$abstracts"})
            pipeline.append({
                "$project": {
                    "_id": "$abstracts._id",
                    "doi": 1,
                    "abstract": "$abstracts.abstract",
                    "year": "$abstracts.year",
                    "authors": "$abstracts.authors",
                    "title": "$abstracts.title",
                    "journal": "$abstracts.journal",
                    "link": "$abstracts.link",
                    "chem_mentions": "$unique_mats"
                }
            })
            pipeline.append({"$project": {"abstracts": 0}})
        elif materials:  # if filters are supplied don't look at materials
            for material in materials:
                if material is not None:
                    material_filter = MaterialFilter(material.split(","))
                    for cond in material_filter.conditions:
                        pipeline.append(cond)
            pipeline.append({
                "$lookup": {
                    "from": "abstracts",
                    "localField": "doi",
                    "foreignField": "doi",
                    "as": "abstracts"
                }
            })
            pipeline.append({"$match": {"abstracts": {"$ne": []}}})
            pipeline.append({"$unwind": "$abstracts"})
            pipeline.append({
                "$project": {
                    "_id": "$abstracts._id",
                    "doi": 1,
                    "abstract": "$abstracts.abstract",
                    "year": "$abstracts.year",
                    "authors": "$abstracts.authors",
                    "title": "$abstracts.title",
                    "journal": "$abstracts.journal",
                    "link": "$abstracts.link",
                    "chem_mentions": "$unique_mats"
                }
            })
            pipeline.append({"$project": {"abstracts": 0}})
        if len(pipeline) > 0:
            results = self._ac.db.ne_071018.aggregate(pipeline)
            ids = [str(entry["_id"]) for entry in results]
        else:
            ids = None
        if text and (ids is None or len(ids) > 0):
            ids = self._ec.query(text, ids=ids, max_results=max_results)
        return self._ac.get_documents_by_id(ids)

    def more_like_this(self, text='', materials=(), max_results=100):
        if text is None or text == '':
            return None

        query = {
            "query": {
                "more_like_this": {
                    "fields": ['title', 'abstract'],
                    "like": text
                }
            }
        }
        hits = self._ec.search(index="tri_abstracts",
                               body=query,
                               size=max_results,
                               request_timeout=60)["hits"]["hits"]
        ids = [ObjectId(h["_id"]) for h in hits]
        return self._ac.get_documents_by_id(ids)
Example #6
0
 def __init__(self, local=False):
     self._ac = AtlasConnection(db="production", local=local)
     self._ec = ElasticConnection()
     self.filters = []