Ejemplo n.º 1
0
    def post_save(cls, sender, document, **kwargs):
        # avoid circular import
        from mpcontribs.api.notebooks.document import Notebooks
        from mpcontribs.api.cards.document import Cards

        Notebooks.objects(pk=document.id).delete()
        Cards.objects(pk=document.id).delete()
Ejemplo n.º 2
0
    def post_save(cls, sender, document, **kwargs):
        admin_email = current_app.config["MAIL_DEFAULT_SENDER"]
        admin_topic = current_app.config["MAIL_TOPIC"]
        if kwargs.get("created"):
            ts = current_app.config["USTS"]
            email_project = [document.owner, document.project]
            token = ts.dumps(email_project)
            scheme = "http" if current_app.config["DEBUG"] else "https"
            link = url_for("projects.applications",
                           token=token,
                           _scheme=scheme,
                           _external=True)
            subject = f'New project "{document.project}"'
            hours = int(current_app.config["USTS_MAX_AGE"] / 3600)
            html = render_template("admin_email.html",
                                   doc=document,
                                   link=link,
                                   hours=hours)
            send_email(admin_topic, subject, html)
            resp = sns_client.create_topic(
                Name=f"mpcontribs_{document.project}",
                Attributes={"DisplayName": f"MPContribs {document.title}"},
            )
            sns_client.subscribe(TopicArn=resp["TopicArn"],
                                 Protocol="email",
                                 Endpoint=document.owner)
        else:
            set_keys = document._delta()[0].keys()
            if "is_approved" in set_keys and document.is_approved:
                subject = f'Your project "{document.project}" has been approved'
                if current_app.config["DEBUG"]:
                    portal = "http://localhost:" + os.environ["PORTAL_PORT"]
                else:
                    portal = "https://" + os.environ["PORTAL_CNAME"]
                html = render_template(
                    "owner_email.html",
                    approved=True,
                    admin_email=admin_email,
                    host=portal,
                )
                topic_arn = ":".join(
                    admin_topic.split(":")[:-1] +
                    ["mpcontribs_" + document.project])
                send_email(topic_arn, subject, html)
            if set_keys:
                # import here to avoid circular
                from mpcontribs.api.contributions.document import Contributions
                from mpcontribs.api.notebooks.document import Notebooks
                from mpcontribs.api.cards.document import Cards

                contributions = Contributions.objects.only("pk").filter(
                    project=document.project)
                Notebooks.objects(contribution__in=contributions).delete()
                Cards.objects(contribution__in=contributions).delete()
Ejemplo n.º 3
0
    def pre_delete(cls, sender, document, **kwargs):
        args = ["notebook"] + list(COMPONENTS.keys())
        document.reload(*args)

        # remove reference documents
        if document.notebook is not None:
            from mpcontribs.api.notebooks.document import Notebooks

            Notebooks.objects(id=document.notebook.id).delete()

        for component in COMPONENTS.keys():
            # check if other contributions exist before deletion!
            for obj in getattr(document, component):
                q = {component: obj.id}
                if sender.objects(**q).count() < 2:
                    obj.delete()
Ejemplo n.º 4
0
    def get(self, cid):
        """Retrieve (and build) notebook for a single contribution [internal].
        ---
        operationId: get_entry
        parameters:
            - name: cid
              in: path
              type: string
              pattern: '^[a-f0-9]{24}$'
              required: true
              description: contribution ID (ObjectId)
        responses:
            200:
                description: single notebook
                schema:
                    $ref: '#/definitions/NotebooksSchema'
        """
        try:
            nb = Notebooks.objects.get(id=cid)
            nb.restore()
        except DoesNotExist:
            cells = [
                nbf.new_code_cell(
                    "# provide apikey to `load_client` in order to connect to api.mpcontribs.org\n"
                    "# or use bravado (see https://mpcontribs.org/api)\n"
                    "from mpcontribs.client import load_client\n"
                    "client = load_client()"
                ), nbf.new_code_cell(
                    "from mpcontribs.io.archieml.mpfile import MPFile\n"
                    f"result = client.contributions.get_entry(cid='{cid}').response().result\n"
                    "mpfile = MPFile.from_contribution(result)"
                )
            ]
            for typ in ['h', 't', 'g', 's']:
                cells.append(nbf.new_code_cell(f"mpfile.{typ}data"))
            nb = nbf.new_notebook()
            nb['cells'] = cells
            exprep.preprocess(nb, {})
            nb = Notebooks(**nb)
            nb.id = cid # to link to the according contribution
            nb.save() # calls Notebooks.clean()

        del nb.id
        return nb
Ejemplo n.º 5
0
 def post_save(cls, sender, document, **kwargs):
     set_root_keys = set(k.split(".", 1)[0] for k in document._delta()[0].keys())
     cid = document.contribution.id
     nbs = Notebooks.objects(pk=cid)
     if not set_root_keys or set_root_keys == {"is_public"}:
         nbs.update(set__is_public=document.is_public)
     else:
         nbs.delete()
         document.update(unset__cif=True)
         Contributions.objects(pk=cid).update(unset__structures=True)
Ejemplo n.º 6
0
    def get(self, cid):
        """Retrieve (and build) notebook for a single contribution [internal].
        ---
        operationId: get_entry
        parameters:
            - name: cid
              in: path
              type: string
              pattern: '^[a-f0-9]{24}$'
              required: true
              description: contribution ID (ObjectId)
        responses:
            200:
                description: single notebook
                schema:
                    $ref: '#/definitions/NotebooksSchema'
        """
        try:
            nb = Notebooks.objects.get(id=cid)
            nb.restore()
        except DoesNotExist:
            cells = [
                nbf.new_code_cell(
                    "# provide apikey to `load_client` in order to connect to api.mpcontribs.org\n"
                    "# or use bravado (see https://mpcontribs.org/api)\n"
                    "from mpcontribs.client import load_client\n"
                    "client = load_client()"),
                nbf.new_code_cell(
                    "from mpcontribs.io.archieml.mpfile import MPFile\n"
                    f"result = client.contributions.get_entry(cid='{cid}').response().result\n"
                    "mpfile = MPFile.from_contribution(result)")
            ]
            for typ in ['h', 't', 'g', 's']:
                cells.append(nbf.new_code_cell(f"mpfile.{typ}data"))
            nb = nbf.new_notebook()
            nb['cells'] = cells
            exprep.preprocess(nb, {})
            nb = Notebooks(**nb)
            nb.id = cid  # to link to the according contribution
            nb.save()  # calls Notebooks.clean()

        del nb.id
        return nb
Ejemplo n.º 7
0
    def post_save(cls, sender, document, **kwargs):
        # avoid circular import
        from mpcontribs.api.notebooks.document import Notebooks
        from mpcontribs.api.cards.document import Cards

        # TODO unset and rebuild columns key in Project for updated (nested) keys only
        set_root_keys = set(
            k.split(".", 1)[0] for k in document._delta()[0].keys())
        nbs = Notebooks.objects(pk=document.id)
        cards = Cards.objects(pk=document.id)
        if not set_root_keys or set_root_keys == {"is_public"}:
            nbs.update(set__is_public=document.is_public)
            cards.update(set__is_public=document.is_public)
        else:
            nbs.delete()
            cards.delete()
            if "data" in set_root_keys:
                Projects.objects(pk=document.project.id).update(
                    unset__columns=True)
Ejemplo n.º 8
0
    def get(self, **kwargs):
        cid = kwargs["pk"]
        try:
            super().get(**kwargs)  # trigger DoesNotExist if necessary
            nb = Notebooks.objects.get(pk=cid)
            try:
                if not nb.cells[-1]["outputs"]:
                    kernel = client.start_kernel("python3")

                    for idx, cell in enumerate(nb.cells):
                        if cell["cell_type"] == "code":
                            output = kernel.execute(cell["source"])
                            if output:
                                outtype = ("text/html"
                                           if output.startswith("<div") else
                                           "text/plain")
                                cell["outputs"].append({
                                    "data": {
                                        outtype: output
                                    },
                                    "metadata": {},
                                    "transient": {},
                                    "output_type":
                                    "display_data",
                                })
                            sse.publish({"message": idx + 1},
                                        type="notebook",
                                        channel=cid)

                    nb.cells[1] = nbf.new_code_cell(
                        "client = load_client('<your-api-key-here>')")
                    nb.save()  # calls Notebooks.clean()
                    sse.publish({"message": 0}, type="notebook", channel=cid)
                    client.shutdown_kernel(kernel)
            except Exception as ex:
                print(ex)
                sse.publish({"message": -1}, type="notebook", channel=cid)
            return super().get(**kwargs)

        except DoesNotExist:
            nb = None
            try:
                nb = Notebooks.objects.only("pk").get(pk=cid)
            except DoesNotExist:
                # create and save unexecuted notebook, also start entry to avoid rebuild on subsequent requests
                contrib = Contributions.objects.get(id=cid)
                cells = [
                    nbf.new_code_cell(
                        "headers = {'X-Consumer-Groups': 'admin', 'X-Consumer-Username': '******'}\n"
                        "client = load_client(headers=headers)"),
                    nbf.new_code_cell(
                        f"contrib = client.contributions.get_entry(pk='{cid}', _fields=['_all']).result()"
                    ),
                    nbf.new_markdown_cell("## Info"),
                    nbf.new_code_cell(
                        "fields = ['title', 'owner', 'authors', 'description', 'urls']\n"
                        "prov = client.projects.get_entry(pk=contrib['project'], _fields=fields).result()\n"
                        "HierarchicalData(prov)"),
                    nbf.new_markdown_cell("## HData"),
                    nbf.new_code_cell("HierarchicalData(contrib['data'])"),
                ]

                tables = Tables.objects.only("id",
                                             "name").filter(contribution=cid)
                if tables:
                    cells.append(nbf.new_markdown_cell("## Tables"))
                    for table in tables:
                        cells.append(nbf.new_markdown_cell(table.name))
                        cells.append(
                            nbf.new_code_cell(
                                f"table = client.tables.get_entry(pk='{table.id}', _fields=['_all']).result()\n"
                                "Table.from_dict(table)"))
                        cells.append(
                            nbf.new_code_cell("Plot.from_dict(table)"))

                structures = Structures.objects.only(
                    "id", "name").filter(contribution=cid)
                if structures:
                    cells.append(nbf.new_markdown_cell("## Structures"))
                    for structure in structures:
                        cells.append(nbf.new_markdown_cell(structure.name))
                        cells.append(
                            nbf.new_code_cell(
                                "structure = client.structures.get_entry(\n"
                                f"\tpk='{structure.id}', _fields=['lattice', 'sites', 'charge']\n"
                                ").result()\n"
                                "Structure.from_dict(structure)"))

                nb = Notebooks(pk=cid, is_public=contrib.is_public)
                doc = deepcopy(seed_nb)
                doc["cells"] += cells
                self.Schema().update(nb, doc)
                nb.save()  # calls Notebooks.clean()
                return super().get(**kwargs)

            if nb is not None:
                raise DoesNotExist(
                    f"Notebook {nb.id} exists but user not in project group")
Ejemplo n.º 9
0
    def post_save(cls, sender, document, **kwargs):
        if kwargs.get("skip"):
            return

        # avoid circular imports
        from mpcontribs.api.projects.document import Column
        from mpcontribs.api.notebooks.document import Notebooks

        # project is LazyReferenceField
        project = document.project.fetch()

        # set columns field for project
        def update_columns(path, key, value):
            path = delimiter.join(["data"] + list(path) + [key])
            is_quantity = isinstance(value, dict) and quantity_keys == set(value.keys())
            is_text = bool(
                not is_quantity and isinstance(value, str) and key not in quantity_keys
            )
            if is_quantity or is_text:
                try:
                    column = project.columns.get(path=path)
                    if is_quantity:
                        v = value["value"]
                        if v > column.max:
                            column.max = v
                            project.save().reload("columns")
                        elif v < column.min:
                            column.min = v
                            project.save().reload("columns")

                except DoesNotExist:
                    column = Column(path=path)
                    if is_quantity:
                        column.unit = value["unit"]
                        column.min = column.max = value["value"]

                    project.modify(push__columns=column)

                ncolumns = len(project.columns)
                if ncolumns > 50:
                    raise ValueError("Reached maximum number of columns (50)!")

            return True

        # run update_columns over document data
        remap(document.data, visit=update_columns, enter=enter)

        # add/remove columns for other components
        for path in ["structures", "tables"]:
            try:
                project.columns.get(path=path)
            except DoesNotExist:
                if getattr(document, path):
                    project.update(push__columns=Column(path=path))

        # generate notebook for this contribution
        if document.notebook is not None:
            document.notebook.delete()

        cells = [
            nbf.new_code_cell(
                "client = Client(\n"
                '\theaders={"X-Consumer-Groups": "admin"},\n'
                f'\thost="{MPCONTRIBS_API_HOST}"\n'
                ")"
            ),
            nbf.new_code_cell(f'client.get_contribution("{document.id}").pretty()'),
        ]

        if document.tables:
            cells.append(nbf.new_markdown_cell("## Tables"))
            for table in document.tables:
                cells.append(
                    nbf.new_code_cell(f'client.get_table("{table.id}").plot()')
                )

        if document.structures:
            cells.append(nbf.new_markdown_cell("## Structures"))
            for structure in document.structures:
                cells.append(
                    nbf.new_code_cell(f'client.get_structure("{structure.id}")')
                )

        ws = connect_kernel()
        for cell in cells:
            if cell["cell_type"] == "code":
                cell["outputs"] = execute(ws, str(document.id), cell["source"])

        ws.close()
        cells[0] = nbf.new_code_cell("client = Client('<your-api-key-here>')")
        doc = deepcopy(seed_nb)
        doc["cells"] += cells
        document.notebook = Notebooks(**doc).save()
        document.last_modified = datetime.utcnow()
        document.save(signal_kwargs={"skip": True})
Ejemplo n.º 10
0
 def post_save(cls, sender, document, **kwargs):
     Notebooks.objects(pk=document.contribution.id).delete()
Ejemplo n.º 11
0
def make(projects=None, cids=None, force=False):
    """build the notebook / details page"""
    start = time.perf_counter()
    remaining_time = rq.default_timeout - 5
    mask = ["id", "needs_build", "notebook"]
    query = Q()

    if projects:
        query &= Q(project__in=projects)
    if cids:
        query &= Q(id__in=cids)
    if not force:
        query &= Q(needs_build=True) | Q(needs_build__exists=False)

    job = get_current_job()
    ret = {"input": {"projects": projects, "cids": cids, "force": force}}
    if job:
        ret["job"] = {
            "id": job.id,
            "enqueued_at": job.enqueued_at.isoformat(),
            "started_at": job.started_at.isoformat()
        }

    exclude = list(Contributions._fields.keys())
    documents = Contributions.objects(query).exclude(*exclude).only(*mask)
    total = documents.count()
    count = 0

    for idx, document in enumerate(documents):
        stop = time.perf_counter()
        remaining_time -= stop - start

        if remaining_time < 0:
            if job:
                restart_kernels()

            ret["result"] = {
                "status": "TIMEOUT",
                "count": count,
                "total": total
            }
            return ret

        start = time.perf_counter()

        if not force and document.notebook and \
                not getattr(document, "needs_build", True):
            continue

        if document.notebook:
            try:
                nb = Notebooks.objects.get(id=document.notebook.id)
                nb.delete()
                document.update(unset__notebook="")
                logger.debug(f"Notebook {document.notebook.id} deleted.")
            except DoesNotExist:
                pass

        cid = str(document.id)
        logger.debug(f"prep notebook for {cid} ...")
        document.reload("tables", "structures", "attachments")

        cells = [
            # define client only once in kernel
            # avoids API calls for regex expansion for query parameters
            nbf.new_code_cell("\n".join([
                "if 'client' not in locals():",
                "\tclient = Client(",
                f'\t\theaders={{"X-Authenticated-Groups": "{ADMIN_GROUP}"}},',
                f'\t\thost="{MPCONTRIBS_API_HOST}"',
                "\t)",
                "print(client.get_totals())",
                # return something. See while loop in `run_cells`
            ])),
            nbf.new_code_cell("\n".join([
                f'c = client.get_contribution("{document.id}")', 'c.display()'
            ])),
        ]

        if document.tables:
            cells.append(nbf.new_markdown_cell("## Tables"))
            for table in document.tables:
                cells.append(
                    nbf.new_code_cell("\n".join(
                        [f't = client.get_table("{table.id}")',
                         't.display()'])))

        if document.structures:
            cells.append(nbf.new_markdown_cell("## Structures"))
            for structure in document.structures:
                cells.append(
                    nbf.new_code_cell("\n".join([
                        f's = client.get_structure("{structure.id}")',
                        's.display()'
                    ])))

        if document.attachments:
            cells.append(nbf.new_markdown_cell("## Attachments"))
            for attachment in document.attachments:
                cells.append(
                    nbf.new_code_cell("\n".join([
                        f'a = client.get_attachment("{attachment.id}")',
                        'a.info()'
                    ])))

        try:
            outputs = execute_cells(cid, cells)
        except Exception as e:
            if job:
                restart_kernels()

            ret["result"] = {
                "status": "ERROR",
                "cid": cid,
                "count": count,
                "total": total,
                "exc": str(e)
            }
            return ret

        if not outputs:
            if job:
                restart_kernels()

            ret["result"] = {
                "status": "ERROR: NO OUTPUTS",
                "cid": cid,
                "count": count,
                "total": total
            }
            return ret

        for idx, output in outputs.items():
            cells[idx]["outputs"] = output

        doc = nbf.new_notebook()
        doc["cells"] = [
            nbf.new_code_cell("from mpcontribs.client import Client"),
            nbf.new_code_cell(f'client = Client()'),
        ]
        doc["cells"] += cells[1:]  # skip localhost Client

        try:
            nb = Notebooks(**doc).save()
            document.update(notebook=nb, needs_build=False)
        except Exception as e:
            if job:
                restart_kernels()

            ret["result"] = {
                "status": "ERROR",
                "cid": cid,
                "count": count,
                "total": total,
                "exc": str(e)
            }
            return ret

        count += 1

    if total and job:
        restart_kernels()

    ret["result"] = {"status": "COMPLETED", "count": count, "total": total}
    return ret
Ejemplo n.º 12
0
def build():
    with no_dereference(Contributions) as Contribs:
        # TODO get a random max_docs slice to avoid collisions in parallel Fargate tasks

        # remove dangling and unset missing notebooks
        nbs_total, nbs_count = -1, -1
        ctrbs_cnt = Contribs.objects._cursor.collection.estimated_document_count(
        )
        nbs_cnt = Notebooks.objects._cursor.collection.estimated_document_count(
        )

        if ctrbs_cnt != nbs_cnt:
            contribs = Contribs.objects(notebook__exists=True).only("notebook")
            nids = [contrib.notebook.id for contrib in contribs]
            if len(nids) < nbs_cnt:
                nbs = Notebooks.objects(id__nin=nids).only("id")
                nbs_total = nbs.count()
                max_docs = 2500
                nbs[:max_docs].delete()
                nbs_count = nbs_total if nbs_total < max_docs else max_docs
            else:
                missing_nids = set(nids) - set(
                    Notebooks.objects.distinct("id"))
                if missing_nids:
                    upd_contribs = Contribs.objects(
                        notebook__in=list(missing_nids))
                    nupd_total = upd_contribs.count()
                    nupd = upd_contribs.update(unset__notebook="")
                    print(
                        f"unset notebooks for {nupd}/{nupd_total} contributions"
                    )

        # build missing notebooks
        max_docs = NotebooksResource.max_limit
        cids = request.args.get("cids", "").split(",")[:max_docs]

        if cids[0]:
            documents = Contribs.objects(id__in=cids)
        else:
            documents = Contribs.objects(notebook__exists=False)[:max_docs]

        total = documents.count()
        count = 0

        for document in documents:
            if document.notebook is not None:
                # NOTE document.notebook.delete() doesn't trigger pre_delete signal?
                nb = Notebooks.objects.get(id=document.notebook.id)
                nb.delete()

            cells = [
                # define client only once in kernel
                # avoids API calls for regex expansion for query parameters
                nbf.new_code_cell("\n".join([
                    "if 'client' not in locals():",
                    "\tclient = Client(",
                    '\t\theaders={"X-Authenticated-Groups": "admin"},',
                    f'\t\thost="{MPCONTRIBS_API_HOST}"',
                    "\t)",
                ])),
                nbf.new_code_cell(
                    f'client.get_contribution("{document.id}").pretty()'),
            ]

            if document.tables:
                cells.append(nbf.new_markdown_cell("## Tables"))
                for table in document.tables:
                    cells.append(
                        nbf.new_code_cell("\n".join([
                            f'df = client.get_table("{table.id}")',
                            "df.plot(**df.attrs)",
                        ])))

            if document.structures:
                cells.append(nbf.new_markdown_cell("## Structures"))
                for structure in document.structures:
                    cells.append(
                        nbf.new_code_cell(
                            f'client.get_structure("{structure.id}")'))

            cid = str(document.id)
            outputs = execute_cells(cid, cells)
            if not outputs:
                raise ValueError(f"notebook generation for {cid} failed!")

            for idx, output in outputs.items():
                cells[idx]["outputs"] = output

            doc = deepcopy(seed_nb)
            doc["cells"] += cells[1:]  # skip localhost Client

            document.notebook = Notebooks(**doc).save()
            document.save(signal_kwargs={"skip": True})
            count += 1

        return f"{count}/{total} notebooks built & {nbs_count}/{nbs_total} notebooks deleted"
Ejemplo n.º 13
0
    def get(self, **kwargs):
        cid = kwargs["pk"]
        qfilter = lambda qs: self.has_read_permission(request, qs.clone())
        try:
            # trigger DoesNotExist if necessary (due to permissions or non-existence)
            nb = self._resource.get_object(cid, qfilter=qfilter)
            try:
                if not nb.cells[-1]["outputs"]:
                    ws = connect_kernel()
                    for idx, cell in enumerate(nb.cells):
                        if cell["cell_type"] == "code":
                            cell["outputs"] = execute(ws, cid, cell["source"])
                            sse.publish({"message": idx + 1},
                                        type="notebook",
                                        channel=cid)

                    ws.close()
                    nb.cells[1] = nbf.new_code_cell(
                        "client = Client('<your-api-key-here>')")
                    nb.save()  # calls Notebooks.clean()
                    sse.publish({"message": 0}, type="notebook", channel=cid)
            except Exception as ex:
                print(ex)
                sse.publish({"message": -1}, type="notebook", channel=cid)

            return self._resource.serialize(nb, params=request.args)

        except DoesNotExist:
            nb = None
            try:
                nb = Notebooks.objects.only("pk").get(pk=cid)
            except DoesNotExist:
                # create and save unexecuted notebook, also start entry to avoid rebuild on subsequent requests
                from mpcontribs.api.contributions.views import ContributionsResource

                res = ContributionsResource()
                res._params = {"_fields": "_all"}
                contrib = res.get_object(cid, qfilter=qfilter)
                cells = [
                    nbf.new_code_cell(
                        'client = Client(headers={"X-Consumer-Groups": "admin"})'
                    ),
                    nbf.new_markdown_cell("## Project"),
                    nbf.new_code_cell(
                        f'client.get_project("{contrib.project.pk}").pretty()'
                    ),
                    nbf.new_markdown_cell("## Contribution"),
                    nbf.new_code_cell(
                        f'client.get_contribution("{cid}").pretty()'),
                ]

                if contrib.tables:
                    cells.append(nbf.new_markdown_cell("## Tables"))
                    for _, tables in contrib.tables.items():
                        for table in tables:
                            tid = table["id"]
                            cells.append(
                                nbf.new_code_cell(
                                    f'client.get_table("{tid}").plot()'))

                if contrib.structures:
                    cells.append(nbf.new_markdown_cell("## Structures"))
                    for _, structures in contrib.structures.items():
                        for structure in structures:
                            sid = structure["id"]
                            cells.append(
                                nbf.new_code_cell(
                                    f'client.get_structure("{sid}")'))

                nb = Notebooks(pk=cid, is_public=contrib.is_public)
                doc = deepcopy(seed_nb)
                doc["cells"] += cells
                self.Schema().update(nb, doc)
                nb.save()  # calls Notebooks.clean()
                return self._resource.serialize(nb, params=request.args)

            if nb is not None:
                raise DoesNotExist(
                    f"Notebook {nb.id} exists but user not in project group")
Ejemplo n.º 14
0
    def get(self, cid):
        """Retrieve (and build) notebook for a single contribution [internal].
        ---
        operationId: get_entry
        parameters:
            - name: cid
              in: path
              type: string
              pattern: '^[a-f0-9]{24}$'
              required: true
              description: contribution ID (ObjectId)
        responses:
            200:
                description: single notebook
                schema:
                    $ref: '#/definitions/NotebooksSchema'
        """
        try:
            nb = Notebooks.objects.get(id=cid)
            nb.restore()
        except DoesNotExist:
            contrib = Contributions.objects.no_dereference().get(id=cid)
            cells = [
                nbf.new_code_cell(
                    "client = load_client() # provide apikey as argument to use api.mpcontribs.org\n"
                    f"contrib = client.contributions.get_entry(cid='{cid}').response().result"
                ),
                nbf.new_markdown_cell("## Provenance Info"),
                nbf.new_code_cell(
                    "mask = ['title', 'authors', 'description', 'urls', 'other', 'project']\n"
                    "prov = client.projects.get_entry(project=contrib['project'], mask=mask).response().result\n"
                    "RecursiveDict(prov)"),
                nbf.new_markdown_cell(
                    f"## Hierarchical Data for {contrib['identifier']}"),
                nbf.new_code_cell("HierarchicalData(contrib['content'])")
            ]

            tables = contrib.content['tables']
            if tables:
                cells.append(
                    nbf.new_markdown_cell(
                        f"## Tabular Data for {contrib['identifier']}"))
                for ref in tables:
                    cells.append(
                        nbf.new_code_cell(
                            f"table = client.tables.get_entry(tid='{ref.id}').response().result # Pandas DataFrame format\n"
                            "Table.from_dict(table)"))
                    cells.append(nbf.new_code_cell("Plot.from_dict(table)"))

            structures = contrib.content['structures']
            if structures:
                cells.append(
                    nbf.new_markdown_cell(
                        f"## Pymatgen Structures for {contrib['identifier']}"))
                for ref in structures:
                    cells.append(
                        nbf.new_code_cell(
                            f"Structure.from_dict(client.structures.get_entry(sid='{ref.id}').response().result)"
                        ))

            kernel = client.start_kernel()
            for cell in cells:
                if cell.cell_type == 'code':
                    cell.outputs = kernel.execute(cell.source)
            client.shutdown_kernel(kernel)

            nb = deepcopy(seed_nb)
            nb.cells += cells
            nb = Notebooks(**nb)
            nb.id = cid  # to link to the according contribution
            nb.save()  # calls Notebooks.clean()

        del nb.id
        return nb
Ejemplo n.º 15
0
    def post_save(cls, sender, document, **kwargs):
        if kwargs.get("skip"):
            return

        # project is LazyReferenceField
        project = document.project.fetch()

        # set columns field for project
        def update_columns(path, key, value):
            path = delimiter.join(["data"] + list(path) + [key])
            is_quantity = isinstance(value, dict) and quantity_keys.issubset(
                value.keys())
            is_text = bool(not is_quantity and isinstance(value, str)
                           and key not in quantity_keys)
            if is_quantity or is_text:
                project.reload("columns")
                try:
                    column = project.columns.get(path=path)
                    if is_quantity:
                        v = value["value"]
                        if isnan(column.max) or v > column.max:
                            column.max = v
                        if isnan(column.min) or v < column.min:
                            column.min = v

                except DoesNotExist:
                    column = {"path": path}
                    if is_quantity:
                        column["unit"] = value["unit"]
                        column["min"] = column["max"] = value["value"]

                    project.columns.create(**column)

                project.save().reload("columns")
                ncolumns = len(project.columns)
                if ncolumns > 50:
                    raise ValueError("Reached maximum number of columns (50)!")

            return True

        # run update_columns over document data
        remap(document.data, visit=update_columns, enter=enter)

        # add/remove columns for other components
        for path in ["structures", "tables"]:
            try:
                project.columns.get(path=path)
            except DoesNotExist:
                if getattr(document, path):
                    project.columns.create(path=path)
                    project.save().reload("columns")

        # generate notebook for this contribution
        if document.notebook is not None:
            document.notebook.delete()

        cells = [
            nbf.new_code_cell("client = Client(\n"
                              '\theaders={"X-Consumer-Groups": "admin"},\n'
                              f'\thost="{MPCONTRIBS_API_HOST}"\n'
                              ")"),
            nbf.new_code_cell(
                f'client.get_contribution("{document.id}").pretty()'),
        ]

        if document.tables:
            cells.append(nbf.new_markdown_cell("## Tables"))
            for table in document.tables:
                cells.append(
                    nbf.new_code_cell(
                        f'client.get_table("{table.id}").plot()'))

        if document.structures:
            cells.append(nbf.new_markdown_cell("## Structures"))
            for structure in document.structures:
                cells.append(
                    nbf.new_code_cell(
                        f'client.get_structure("{structure.id}")'))

        loop = asyncio.new_event_loop()
        task = loop.create_task(
            execute_cells(str(document.id), cells, loop=loop))
        outputs = loop.run_until_complete(task)

        for task in asyncio.all_tasks(loop=loop):
            print(f"Cancelling {task}")
            task.cancel()
            outputs = loop.run_until_complete(task)

        loop.close()

        for idx, output in outputs.items():
            cells[idx]["outputs"] = output

        cells[0] = nbf.new_code_cell("client = Client()")
        doc = deepcopy(seed_nb)
        doc["cells"] += cells

        # avoid circular imports
        from mpcontribs.api.notebooks.document import Notebooks

        document.notebook = Notebooks(**doc).save()
        document.last_modified = datetime.utcnow()
        document.save(signal_kwargs={"skip": True})