def __init__(self): self._ac = AtlasConnection(db="production") self._ec = ElasticConnection() self.filters = []
class MatstractSearch: """The class running all search queries""" def __init__(self): self._ac = AtlasConnection(db="production") self._ec = ElasticConnection() self.filters = [] def search(self, text='', materials=(), max_results=1000): if materials is not None: max_results = 10000 print("searching for {} and {}".format(text, materials)) pipeline = list() if materials: self.material_filter = MaterialFilter(materials) for cond in self.material_filter.conditions: pipeline.append(cond) pipeline.append({ "$lookup": { "from": "abstracts", "localField": "doi", "foreignField": "doi", "as": "abstracts" } }) pipeline.append({"$match": {"abstracts": {"$ne": []}}}) pipeline.append({"$unwind": "$abstracts"}) pipeline.append({ "$project": { "_id": "$abstracts._id", "doi": 1, "abstract": "$abstracts.abstract", "year": "$abstracts.year", "authors": "$abstracts.authors", "title": "$abstracts.title", "journal": "$abstracts.journal", "link": "$abstracts.link", "chem_mentions": "$unique_mats" } }) pipeline.append({"$project": {"abstracts": 0}}) pipeline.append({"$limit": max_results}) if text: ids = self._ec.query(text, max_results=max_results) self.document_filter = DocumentFilter(ids) if not materials or not len(materials): return self._ac.get_documents_by_id(ids) for cond in self.document_filter.conditions: pipeline.append(cond) return self._ac.db.mats_.aggregate(pipeline) def more_like_this(self, text='', materials=(), max_results=100): if text is None or text == '': return None query = { "query": { "more_like_this": { "fields": ['title', 'abstract'], "like": text } } } hits = self._ec.search(index="tri_abstracts", body=query, size=max_results, request_timeout=60)["hits"]["hits"] ids = [ObjectId(h["_id"]) for h in hits] return self._ac.get_documents_by_id(ids)
import dash_html_components as html import dash_core_components as dcc import pandas as pd from matstract.models.database import AtlasConnection, ElasticConnection from matstract.extract import parsing from matstract.models.search import MatstractSearch import re db = AtlasConnection(db="production").db client = ElasticConnection() def highlight_material(body, material): highlighted_phrase = html.Mark(material) if len(material) > 0 and material in body: chopped = body.split(material) newtext = [] for piece in chopped[:-1]: newtext.append(piece) newtext.append(highlighted_phrase) newtext.append(chopped[-1]) return newtext return body def highlight_multiple_materials(body, materials): if len(materials) > 0 and any([material in body for material in materials]): newtext = [] for material in materials: highlighted_phrase = html.Mark(material) if len(newtext) > 0:
from dash.dependencies import Input, Output, State from matstract.web.view import keyword_app from matstract.models.database import AtlasConnection, ElasticConnection from matstract.web.view.similar_app import random_abstract db = AtlasConnection().db es = ElasticConnection() def bind(app): @app.callback(Output('keywords-extrated', 'children'), [Input('keyword-button', 'n_clicks')], [State('keyword-material', 'value')]) def keywords_table(n_clicks, text): if text is not None and text != '': return keyword_app.get_keywords(text) else: return "" @app.callback(Output('themes-textarea', 'value'), [Input('themes-random-abstract', 'n_clicks')]) def fill_random(n_clicks): print("filling random") return random_abstract() @app.callback(Output('themes-extrated', 'children'), [Input('themes-button', 'n_clicks')], [State('themes-textarea', 'value')]) def themes(n_clicks, text): if text is not None and text != '': return keyword_app.get_themes(text, es)
class MatstractSearch: """The class running all search queries""" VALID_FILTERS = [ "material", "property", "application", "descriptor", "characterization", "synthesis", "phase" ] FILTER_DICT = { "material": "MAT", "property": "PRO", "application": "APL", "descriptor": "DSC", "characterization": "CMT", "synthesis": "SMT", "phase": "SPL", } def __init__(self, local=False): self._ac = AtlasConnection(db="production", local=local) self._ec = ElasticConnection() self.filters = [] def search(self, text=None, materials=None, max_results=1000, filters=None): print("searching for '{}' and {}".format(text, filters)) pipeline = list() if filters: for f in filters: if f is not None: search_filter = SearchFilter( filter_type=self.FILTER_DICT[f[0]], values=f[1].split(",")) for cond in search_filter.conditions: pipeline.append(cond) pipeline.append({ "$lookup": { "from": "abstracts", "localField": "doi", "foreignField": "doi", "as": "abstracts" } }) pipeline.append({"$match": {"abstracts": {"$ne": []}}}) pipeline.append({"$unwind": "$abstracts"}) pipeline.append({ "$project": { "_id": "$abstracts._id", "doi": 1, "abstract": "$abstracts.abstract", "year": "$abstracts.year", "authors": "$abstracts.authors", "title": "$abstracts.title", "journal": "$abstracts.journal", "link": "$abstracts.link", "chem_mentions": "$unique_mats" } }) pipeline.append({"$project": {"abstracts": 0}}) elif materials: # if filters are supplied don't look at materials for material in materials: if material is not None: material_filter = MaterialFilter(material.split(",")) for cond in material_filter.conditions: pipeline.append(cond) pipeline.append({ "$lookup": { "from": "abstracts", "localField": "doi", "foreignField": "doi", "as": "abstracts" } }) pipeline.append({"$match": {"abstracts": {"$ne": []}}}) pipeline.append({"$unwind": "$abstracts"}) pipeline.append({ "$project": { "_id": "$abstracts._id", "doi": 1, "abstract": "$abstracts.abstract", "year": "$abstracts.year", "authors": "$abstracts.authors", "title": "$abstracts.title", "journal": "$abstracts.journal", "link": "$abstracts.link", "chem_mentions": "$unique_mats" } }) pipeline.append({"$project": {"abstracts": 0}}) if len(pipeline) > 0: results = self._ac.db.ne_071018.aggregate(pipeline) ids = [str(entry["_id"]) for entry in results] else: ids = None if text and (ids is None or len(ids) > 0): ids = self._ec.query(text, ids=ids, max_results=max_results) return self._ac.get_documents_by_id(ids) def more_like_this(self, text='', materials=(), max_results=100): if text is None or text == '': return None query = { "query": { "more_like_this": { "fields": ['title', 'abstract'], "like": text } } } hits = self._ec.search(index="tri_abstracts", body=query, size=max_results, request_timeout=60)["hits"]["hits"] ids = [ObjectId(h["_id"]) for h in hits] return self._ac.get_documents_by_id(ids)
def __init__(self, local=False): self._ac = AtlasConnection(db="production", local=local) self._ec = ElasticConnection() self.filters = []