Exemple #1
0
    def _classify_paper_with_deposit(obj, eng):
        from invenio_deposit.models import Deposition
        deposition = Deposition(obj)
        data = None
        if not fast_mode:
            for f in deposition.files:
                if f.name and ".pdf" in f.name.lower():
                    data = f.get_syspath()
                    break
            callback = get_keywords_from_local_file
        if not data:
            try:
                metadata = deposition.get_latest_sip().metadata
            except AttributeError as err:
                obj.log.error("Error getting data: {0}".format(err))

            data = [metadata.get("titles", {}).get("title", ""),
                    metadata.get("abstracts", {}).get("value", "")]
            callback = get_keywords_from_text

        classify_paper(obj, eng, callback, data,
                       taxonomy, rebuild_cache,
                       no_cache, output_limit,
                       spires, match_mode, with_author_keywords,
                       extract_acronyms, only_core_tags, fast_mode)
Exemple #2
0
def arxiv_fft_get(obj, eng):
    """Get FFT from arXiv, if arXiv ID is provided."""
    deposition = Deposition(obj)
    sip = deposition.get_latest_sip(sealed=False)
    metadata = sip.metadata

    if 'arxiv_id' in metadata and metadata['arxiv_id']:
        arxiv_pdf_url = cfg.get("ARXIV_PDF_URL", "http://arxiv.org/pdf/") + \
            "{0}.{1}"

        from invenio.config import CFG_TMPSHAREDDIR
        arxiv_file, arxiv_file_path = mkstemp(
            prefix="%s_" % (metadata['arxiv_id'].replace("/", "_")),
            suffix='.pdf',
            dir=CFG_TMPSHAREDDIR,
        )
        os.close(arxiv_file)

        download_url(url=arxiv_pdf_url.format(metadata['arxiv_id'], "pdf"),
                     content_type="pdf",
                     download_to_file=arxiv_file_path)

        # To get 1111.2222.pdf as filename.
        filename = "{0}.pdf".format(metadata['arxiv_id'].replace("/", "_"))

        try:
            try:
                save_deposition_file(deposition,
                                     filename,
                                     arxiv_file_path)
            except FilenameAlreadyExists:
                obj.log.error("PDF file not saved: filename already exists.")
        except Exception as e:
            obj.log.error("PDF file not saved: {}.".format(e.message))
Exemple #3
0
    def _classify_paper_with_deposit(obj, eng):
        from invenio_deposit.models import Deposition
        deposition = Deposition(obj)
        data = None
        if not fast_mode:
            for f in deposition.files:
                if f.name and ".pdf" in f.name.lower():
                    data = f.get_syspath()
                    break
            callback = get_keywords_from_local_file
        if not data:
            try:
                metadata = deposition.get_latest_sip().metadata
            except AttributeError as err:
                obj.log.error("Error getting data: {0}".format(err))

            data = [
                metadata.get("title", {}).get("title", ""),
                metadata.get("abstract", {}).get("summary", "")
            ]
            callback = get_keywords_from_text

        classify_paper(obj, eng, callback, data, taxonomy, rebuild_cache,
                       no_cache, output_limit, spires, match_mode,
                       with_author_keywords, extract_acronyms, only_core_tags,
                       fast_mode)
Exemple #4
0
def add_note_entry(obj, eng):
    """Add note entry to sip metadata on approval."""
    entry = {'value': '*Temporary entry*'} if obj.extra_data.get("core") \
        else {'value': '*Brief entry*'}
    deposition = Deposition(obj)
    metadata = deposition.get_latest_sip(sealed=True).metadata
    if metadata.get('public_notes') is None or not isinstance(metadata.get("public_notes"), list):
        metadata['public_notes'] = [entry]
    else:
        metadata['public_notes'].append(entry)
    deposition.update()
Exemple #5
0
    def get_description(bwo):
        """Return description of object."""
        from invenio_access.control import acc_get_user_email

        results = bwo.get_tasks_results()
        try:
            deposit_object = Deposition(bwo)
        except InvalidDepositionType:
            return "This submission is disabled: {0}.".format(bwo.workflow.name)

        id_user = deposit_object.workflow_object.id_user
        user_email = acc_get_user_email(id_user)

        sip = deposit_object.get_latest_sip()
        if sip:
            record = Record(sip.metadata)
            identifiers = []
            report_numbers = record.get("report_numbers", [])
            dois = record.get("dois.value", [])
            if report_numbers:
                for report_number in report_numbers:
                    number = report_number.get("value", "")
                    if number:
                        identifiers.append(number)
            if dois:
                identifiers.extend(["doi:{0}".format(d) for d in dois])

            categories = []
            subjects = record.get("subject_terms", [])
            if subjects:
                for subject in subjects:
                    if isinstance(subject, string_types):
                        categories.append(subject)
                    elif isinstance(subject, dict):
                        if subject.get("term"):
                            categories.append(subject.get("term"))
            categories = [record.get("type_of_doc", "")] + categories

            authors = []
            authors += [record.get("_first_author", {})]
            authors += record.get("_additional_authors", [])
            return render_template(
                "workflows/styles/submission_record.html",
                categories=categories,
                authors=authors,
                identifiers=identifiers,
                results=results,
                user_email=user_email,
                object=bwo,
                record=record,
            )
        else:
            return "Submitter: {0}".format(user_email)
Exemple #6
0
 def get_title(bwo):
     """Return title of object."""
     try:
         deposit_object = Deposition(bwo)
     except InvalidDepositionType:
         return "This submission is disabled: {0}.".format(bwo.workflow.name)
     sip = deposit_object.get_latest_sip()
     if sip:
         # Get the SmartJSON object
         record = Record(sip.metadata)
         return record.get("titles.title", ["No title"])[0]
     else:
         return "User submission in progress"
Exemple #7
0
def user_pdf_get(obj, eng):
    """Upload user PDF file, if requested."""
    if obj.extra_data.get('pdf_upload', False):
        fft = {'url': obj.extra_data.get('submission_data').get('pdf'),
               'docfile_type': 'INSPIRE-PUBLIC'}
        deposition = Deposition(obj)
        metadata = deposition.get_latest_sip(sealed=True).metadata
        if metadata.get('fft'):
            metadata['fft'].append(fft)
        else:
            metadata['fft'] = [fft]
        deposition.update()
        obj.log.info("PDF file added to FFT.")
Exemple #8
0
    def formatter(bwo, **kwargs):
        """Return formatted data of object."""
        try:
            deposit_object = Deposition(bwo)
        except InvalidDepositionType:
            return "This submission is disabled: {0}.".format(bwo.workflow.name)

        submission_data = deposit_object.get_latest_sip(deposit_object.submitted)
        record = submission_data.metadata

        return render_template(
            'format/record/Holding_Pen_HTML_detailed.tpl',
            record=Record(record)
        )
Exemple #9
0
def add_submission_extra_data(obj, eng):
    """ Add extra data to workflow object. """
    deposition = Deposition(obj)
    sip = deposition.get_latest_sip(sealed=True)
    metadata = sip.metadata
    submission_data = {}
    if "references" in metadata:
        submission_data["references"] = metadata["references"]
        del metadata["references"]
    if "extra_comments" in metadata:
        submission_data["extra_comments"] = metadata["extra_comments"]
        del metadata["extra_comments"]
    if "pdf" in metadata:
        submission_data["pdf"] = metadata["pdf"]
        del metadata["pdf"]
    obj.extra_data["submission_data"] = submission_data
    deposition.save()
Exemple #10
0
    def _create_ticket(obj, eng):
        from invenio_access.control import acc_get_user_email

        deposition = Deposition(obj)
        requestors = acc_get_user_email(obj.id_user)

        subject, body = get_ticket_body(template,
                                        deposition,
                                        deposition.get_latest_sip(sealed=True).metadata,
                                        requestors,
                                        obj)
        submit_rt_ticket(obj,
                         queue,
                         subject,
                         body,
                         requestors,
                         ticket_id_key)
Exemple #11
0
    def _create_curation_ticket(obj, eng):
        from invenio_access.control import acc_get_user_email

        deposition = Deposition(obj)
        requestors = acc_get_user_email(obj.id_user)
        metadata = deposition.get_latest_sip(sealed=True).metadata

        if obj.extra_data.get("core"):
            subject, body = get_curation_body(template,
                                              metadata,
                                              requestors,
                                              obj.extra_data)
            submit_rt_ticket(obj,
                             queue,
                             subject,
                             body,
                             requestors,
                             ticket_id_key)
Exemple #12
0
def migrate_workflow_object(obj_id):
    try:
        obj = BibWorkflowObject.query.get(obj_id)
        rename_object_action(obj)
        if obj.workflow.name == "process_record_arxiv":
            metadata = obj.get_data()
            if isinstance(metadata, string_types):
                # Ignore records that have string as data
                return
            if 'drafts' in metadata:
                # New data model detected, just save and exit
                obj.save()
                return
            if hasattr(metadata, 'dumps'):
                metadata = metadata.dumps(clean=True)
            obj.data = bibfield.do(metadata)
            payload = Payload.create(
                type=obj.workflow.name,
                workflow_object=obj
            )
            payload.save()
        elif obj.workflow.name == "literature":
            d = Deposition(obj)
            sip = d.get_latest_sip()
            if sip:
                sip.metadata = bibfield.do(sip.metadata)
                sip.package = legacy_export_as_marc(hep2marc.do(sip.metadata))
                d.save()
        elif obj.workflow.name in ("authornew", "authorupdate"):
            data = obj.get_data()
            obj.set_data(author_bibfield.do(data))
            obj.save()
        else:
            obj.save()  # To update and trigger indexing
        reset_workflow_object_states(obj)
    except Exception as err:
        current_app.logger.error("Problem migrating record {0}".format(obj_id))
        current_app.logger.exception(err)
        msg = "Error: %r\n%s" % \
              (err, traceback.format_exc())
        obj.set_error_message(msg)
        obj.save(version=ObjectVersion.ERROR)
        raise
Exemple #13
0
def halt_to_render(obj, eng):
    """Halt the workflow - waiting to be resumed."""
    deposition = Deposition(obj)
    sip = deposition.get_latest_sip(sealed=False)
    deposition.set_render_context(dict(
        template_name_or_list="deposit/completed.html",
        deposition=deposition,
        deposition_type=(
            None if deposition.type.is_default() else
            deposition.type.get_identifier()
        ),
        uuid=deposition.id,
        sip=sip,
        my_depositions=Deposition.get_depositions(
            current_user, type=deposition.type
        ),
        format_record=format_record,
    ))
    obj.last_task = "halt_to_render"
    eng.halt("User submission complete.")
Exemple #14
0
    def formatter(bwo, **kwargs):
        """Return formatted data of object."""
        try:
            deposit_object = Deposition(bwo)
        except InvalidDepositionType:
            return "This submission is disabled: {0}.".format(bwo.workflow.name)

        sip = deposit_object.get_latest_sip(deposit_object.submitted)
        record = sip.metadata

        if hasattr(sip, "package"):
            marcxml = sip.package
        else:
            return "No data found in submission (no package)."

        of = kwargs.get("of", "hd")
        if of == "xm":
            return marcxml
        else:
            return render_template("format/record/Holding_Pen_HTML_detailed.tpl", record=Record(record))
    def test_agnostic_deposit(self):
        """A deposition still has the same data model."""
        from invenio_deposit.models import Deposition
        from invenio.ext.login.legacy_user import UserInfo

        u = UserInfo(uid=1)
        d = Deposition.create(u, type='DepositModelTest')
        d.save()
        d.run_workflow()

        completed_object = d.engine.completed_objects[0]
        for l in ['files', 'sips', 'type', 'drafts', 'title']:
            self.assertIn(l, completed_object.data)
    def test_create(self):
        """Test."""
        from invenio_ext.login.legacy_user import UserInfo
        from invenio_deposit.models import Deposition

        user = UserInfo(uid=1)
        d = Deposition.create(user)
        assert d.type == self.DefaultType
        assert Deposition.get(d.id).type == self.DefaultType
        d2 = Deposition.create(user, type=self.AnotherType)
        assert d2.type == self.AnotherType
        assert Deposition.get(d2.id).type == self.AnotherType

        # remove the records
        Deposition.delete(d)
        Deposition.delete(d2)
    def test_create(self):
        """Test."""
        from invenio.ext.login.legacy_user import UserInfo
        from invenio_deposit.models import Deposition

        user = UserInfo(uid=1)
        d = Deposition.create(user)
        assert d.type == self.DefaultType
        assert Deposition.get(d.id).type == self.DefaultType
        d2 = Deposition.create(user, type=self.AnotherType)
        assert d2.type == self.AnotherType
        assert Deposition.get(d2.id).type == self.AnotherType

        # remove the records
        Deposition.delete(d)
        Deposition.delete(d2)
Exemple #18
0
    def run_deposition_tasks(self, deposition_id, with_webcoll=True):
        """
        Run all task ids specified in the latest SIP and optionally run
        webcoll.
        """
        # Run submitted tasks
        from invenio_deposit.models import Deposition
        dep = Deposition.get(deposition_id)
        sip = dep.get_latest_sip(sealed=True)

        for task_id in sip.task_ids:
            self.run_task_id(task_id)

        if with_webcoll:
            # Check if record is accessible
            response = self.client.get(
                url_for('record.metadata', recid=sip.metadata['recid']),
                base_url=self.app.config['CFG_SITE_SECURE_URL'],
            )
            self.assertStatus(response, 200)
Exemple #19
0
    def run_deposition_tasks(self, deposition_id, with_webcoll=True):
        """
        Run all task ids specified in the latest SIP and optionally run
        webcoll.
        """
        # Run submitted tasks
        from invenio_deposit.models import Deposition
        dep = Deposition.get(deposition_id)
        sip = dep.get_latest_sip(sealed=True)

        for task_id in sip.task_ids:
            self.run_task_id(task_id)

        if with_webcoll:
            # Check if record is accessible
            response = self.client.get(
                url_for('record.metadata', recid=sip.metadata['recid']),
                base_url=self.app.config['CFG_SITE_SECURE_URL'],
            )
            self.assertStatus(response, 200)
Exemple #20
0
def show_stats(deposition_type):
    """Render the stats for all the depositions."""
    if len(DepositionType.keys()) <= 1 and \
       DepositionType.get_default() is not None:
        abort(404)

    form = FilterDateForm()

    deptype = DepositionType.get(deposition_type)
    submitted_depositions = [d for d in Deposition.get_depositions(type=deptype) if d.has_sip(sealed=True)]

    ctx = process_metadata_for_charts(submitted_depositions,
                                      group_by=request.args.get('group_by', 'type_of_doc'))
    ctx.update(dict(
        deposition_type=deptype,
        depositions=submitted_depositions,
        form=form,
        chart_types=CHART_TYPES
    ))

    return render_template('deposit/stats/all_depositions.html', **ctx)
Exemple #21
0
def stats_api(deposition_type):
    """Get stats JSON."""
    deptype = DepositionType.get(deposition_type)
    submitted_depositions = [d for d in Deposition.get_depositions(type=deptype) if d.has_sip(sealed=True)]

    if request.args.get('since_date') is not None:
        since_date = datetime.strptime(request.args['since_date'],
                                       "%Y-%m-%d").replace(hour=0, minute=0)
        submitted_depositions = [d for d in submitted_depositions if d.created >= since_date]

    if request.args.get('until_date') is not None:
        until_date = datetime.strptime(request.args['until_date'],
                                       "%Y-%m-%d").replace(hour=23, minute=59)
        submitted_depositions = [d for d in submitted_depositions if d.created <= until_date]

    result = process_metadata_for_charts(submitted_depositions,
                                         request.args.get('group_by', 'type_of_doc'),
                                         bool(request.args.get('include_hidden', None)))

    resp = jsonify(result)

    return resp
def do_upgrade():
    """Implement your upgrades here."""
    from invenio_workflows.models import (
        BibWorkflowObject,
        Workflow,
        ObjectVersion
    )
    from invenio_workflows.registry import workflows
    from invenio_deposit.models import Deposition

    from inspire.dojson.utils import legacy_export_as_marc
    from inspire.dojson.hep import hep2marc
    from inspire.modules.workflows.dojson import bibfield
    from inspire.modules.workflows.models import Payload

    def rename_object_action(obj):
        if obj.get_action() == "arxiv_approval":
            obj.set_action("hep_approval", obj.get_action_message())

    def reset_workflow_object_states(obj):
        """Fix workflow positions and states.

        Old states from Prod/QA:
        {(), (0,), (5, 3, 14), (5, 3, 14, 0), (5, 3, 15), (5, 3, 15, 1)}

        {(),
         (0,),
         (5,),
         (5, 3, 1),
         (5, 3, 10),
         (5, 3, 11),
         (5, 3, 12),
         (5, 3, 14),
         (5, 3, 14, 0),
         (6, 3, 4)}

        OLD -> NEW
        5, 3, 14 -> 0 end
        5, 3, 10 -> 14, 0 halted
        """
        pos = obj.get_current_task()
        if obj.version == ObjectVersion.COMPLETED:
            obj.save(task_counter=[len(workflows.get(obj.workflow.name).workflow) - 1])
            return
        elif obj.version == ObjectVersion.RUNNING:
            # Running? Nah that cannot be.
            obj.version = ObjectVersion.ERROR
        try:
            obj.get_current_task_info()
        except IndexError:
            # The current task counter is Invalid
            obj.version = ObjectVersion.ERROR

        if obj.workflow.name == "process_record_arxiv":
            if tuple(pos) in [
                    (5,), (5, 3, 14), (5, 3, 14, 0), (5, 3, 15), (5, 3, 15, 1)]:
                pos = [len(workflows.get(obj.workflow.name).workflow) - 1]  # finished
            elif tuple(pos) in [(5, 3, 10), (5, 3, 11), (5, 3, 12)]:
                pos = [14, 0]  # halted
            elif len(pos) > 1 and pos[0] == 6:
                # We need to update pos from 6 to start of pre_processing part
                pos = [7]
            else:
                pos = [0]  # Nothing here, we go to start
            obj.save(task_counter=pos)

    # Special submission handling
    for deposit in BibWorkflowObject.query.filter(
            BibWorkflowObject.module_name == "webdeposit"):
        reset_workflow_object_states(deposit)
        d = Deposition(deposit)
        sip = d.get_latest_sip()
        if sip:
            sip.metadata = bibfield.do(sip.metadata)
            sip.package = legacy_export_as_marc(hep2marc.do(sip.metadata))
            d.update()
        deposit.save()

    # Special workflow handling
    workflows_to_fix = ["process_record_arxiv"]
    workflow_objects = []
    for workflow_name in workflows_to_fix:
        workflow_objects += BibWorkflowObject.query.join(
            BibWorkflowObject.workflow).filter(Workflow.name == workflow_name).all()

    for obj in workflow_objects:
        metadata = obj.get_data()
        reset_workflow_object_states(obj)
        rename_object_action(obj)
        if isinstance(metadata, six.string_types):
            # Ignore records that have string as data
            continue
        if 'drafts' in metadata:
            # New data model detected
            continue
        if hasattr(metadata, 'dumps'):
            metadata = metadata.dumps(clean=True)
        obj.data = bibfield.do(metadata)
        payload = Payload.create(
            type=obj.workflow.name,
            workflow_object=obj
        )
        payload.save()