Example #1
0
def prefetch_converted_files(inspire_ids, force, targets):
    """
    Goes through all HEPData submissions and creates their YAML, ROOT, CSV, and YODA representations.
    This avoids any wait time for users when trying to retrieve converted files.
    NOTE: Does not pre-fetch all individual files, since this would be too much and probably not
    necessary.
    """
    if inspire_ids:
        submission_ids = inspire_ids.split(',')
    else:
        submissions = HEPSubmission.query.filter_by(overall_status='finished') \
            .with_entities(HEPSubmission.inspire_id).all()
        submission_ids = [i for (i,) in submissions]

    submission_ids = set(submission_ids)

    file_formats = targets.split(',')
    for inspire_id in submission_ids:
        for file_format in file_formats:
            convert_and_store.delay(inspire_id, file_format, force=force)
Example #2
0
def prefetch_converted_files(inspire_ids, force, targets):
    """
    Goes through all HEPData submissions and creates their ROOT, CSV, and YODA representations.
    This avoids any wait time for users when trying to retrieve converted files.
    NOTE: Does not pre-fetch all individual files, since this would be too much and probably not
    necessary
    :return:
    """
    if inspire_ids:
        submission_ids = inspire_ids.split(',')
    else:
        submissions = HEPSubmission.query.filter_by(overall_status='finished') \
            .with_entities(HEPSubmission.inspire_id).all()
        submission_ids = [i for (i,) in submissions]

    submission_ids = set(submission_ids)

    file_formats = targets.split(',')
    for inspire_id in submission_ids:
        for file_format in file_formats:
            convert_and_store.delay(inspire_id, file_format, force=force)
Example #3
0
def do_finalise(recid,
                publication_record=None,
                force_finalise=False,
                commit_message=None,
                send_tweet=False,
                update=False,
                convert=True):
    """
        Creates record SIP for each data record with a link to the associated
        publication
        :param synchronous: if true then workflow execution and creation is
        waited on, then everything is indexed in one go.
        If False, object creation is asynchronous, however reindexing is not
        performed. This is only really useful for the full migration of
        content.
    """
    print('Finalising record {}'.format(recid))

    hep_submission = HEPSubmission.query.filter_by(
        publication_recid=recid, overall_status="todo").first()

    generated_record_ids = []
    if hep_submission \
        and (force_finalise or hep_submission.coordinator == int(current_user.get_id())):

        submissions = DataSubmission.query.filter_by(
            publication_recid=recid, version=hep_submission.version).all()

        version = hep_submission.version

        existing_submissions = {}
        if hep_submission.version > 1 or update:
            # we need to determine which are the existing record ids.
            existing_data_records = get_records_matching_field(
                'related_publication', recid, doc_type=CFG_DATA_TYPE)

            for record in existing_data_records["hits"]["hits"]:

                if "recid" in record["_source"]:
                    existing_submissions[record["_source"]["title"]] = \
                        record["_source"]["recid"]
                    delete_item_from_index(
                        record["_id"],
                        doc_type=CFG_DATA_TYPE,
                        parent=record["_source"]["related_publication"])

        current_time = "{:%Y-%m-%d %H:%M:%S}".format(datetime.now())

        for submission in submissions:
            finalise_datasubmission(current_time, existing_submissions,
                                    generated_record_ids, publication_record,
                                    recid, submission, version)

        try:
            record = get_record_by_id(recid)
            # If we have a commit message, then we have a record update.
            # We will store the commit message and also update the
            # last_updated flag for the record.
            record['hepdata_doi'] = hep_submission.doi

            if commit_message:
                # On a revision, the last updated date will
                # be the current date.
                hep_submission.last_updated = datetime.now()

                commit_record = RecordVersionCommitMessage(
                    recid=recid, version=version, message=str(commit_message))

                db.session.add(commit_record)

            record['last_updated'] = datetime.strftime(
                hep_submission.last_updated, '%Y-%m-%d %H:%M:%S')
            record['version'] = version

            record.commit()

            hep_submission.inspire_id = record['inspire_id']
            hep_submission.overall_status = "finished"
            db.session.add(hep_submission)

            db.session.commit()

            create_celery_app(current_app)

            # only mint DOIs if not testing.
            if not current_app.config.get(
                    'TESTING', False) and not current_app.config.get(
                        'NO_DOI_MINTING', False):
                for submission in submissions:
                    generate_doi_for_data_submission.delay(
                        submission.id, submission.version)
                log.info("Generating DOIs for ins{0}".format(
                    hep_submission.inspire_id))
                generate_doi_for_submission.delay(recid, version)

            # Reindex everything.
            index_record_ids([recid] + generated_record_ids)
            push_data_keywords(pub_ids=[recid])

            try:
                admin_indexer = AdminIndexer()
                admin_indexer.index_submission(hep_submission)
            except ConnectionTimeout as ct:
                log.error('Unable to add ins{0} to admin index.\n{1}'.format(
                    hep_submission.inspire_id, ct))

            send_finalised_email(hep_submission)

            if convert:
                for file_format in ['yaml', 'csv', 'yoda', 'root']:
                    convert_and_store.delay(hep_submission.inspire_id,
                                            file_format,
                                            force=True)

            if send_tweet:
                tweet(
                    record.get('title'), record.get('collaborations'),
                    "http://www.hepdata.net/record/ins{0}".format(
                        record.get('inspire_id')), version)

            return json.dumps({
                "success": True,
                "recid": recid,
                "data_count": len(submissions),
                "generated_records": generated_record_ids
            })

        except NoResultFound:
            print('No record found to update. Which is super strange.')

    else:
        return json.dumps({
            "success":
            False,
            "recid":
            recid,
            "errors": [
                "You do not have permission to finalise this "
                "submission. Only coordinators can do that."
            ]
        })
Example #4
0
def do_finalise(recid, publication_record=None, force_finalise=False,
                commit_message=None, send_tweet=False, update=False):
    """
        Creates record SIP for each data record with a link to the associated
        publication
        :param synchronous: if true then workflow execution and creation is
        waited on, then everything is indexed in one go.
        If False, object creation is asynchronous, however reindexing is not
        performed. This is only really useful for the full migration of
        content.
    """
    hep_submission = HEPSubmission.query.filter_by(
        publication_recid=recid, overall_status="todo").first()

    print('Finalising record {}'.format(recid))

    generated_record_ids = []
    if hep_submission \
        and (force_finalise or hep_submission.coordinator == int(current_user.get_id())):

        submissions = DataSubmission.query.filter_by(
            publication_recid=recid,
            version=hep_submission.version).all()

        version = hep_submission.version

        existing_submissions = {}
        if hep_submission.version > 1 or update:
            # we need to determine which are the existing record ids.
            existing_data_records = get_records_matching_field(
                'related_publication', recid, doc_type=CFG_DATA_TYPE)

            for record in existing_data_records["hits"]["hits"]:

                if "recid" in record["_source"]:
                    existing_submissions[record["_source"]["title"]] = \
                        record["_source"]["recid"]
                    delete_item_from_index(record["_id"],
                                           doc_type=CFG_DATA_TYPE, parent=record["_source"]["related_publication"])

        current_time = "{:%Y-%m-%d %H:%M:%S}".format(datetime.now())

        for submission in submissions:
            finalise_datasubmission(current_time, existing_submissions,
                                    generated_record_ids,
                                    publication_record, recid, submission,
                                    version)

        try:
            record = get_record_by_id(recid)
            # If we have a commit message, then we have a record update.
            # We will store the commit message and also update the
            # last_updated flag for the record.
            record['hepdata_doi'] = hep_submission.doi

            if commit_message:
                # On a revision, the last updated date will
                # be the current date.
                hep_submission.last_updated = datetime.now()

                commit_record = RecordVersionCommitMessage(
                    recid=recid,
                    version=version,
                    message=str(commit_message))

                db.session.add(commit_record)

            record['last_updated'] = datetime.strftime(
                hep_submission.last_updated, '%Y-%m-%d %H:%M:%S')
            record['version'] = version

            record.commit()

            hep_submission.inspire_id = record['inspire_id']
            hep_submission.overall_status = "finished"
            db.session.add(hep_submission)

            db.session.commit()

            create_celery_app(current_app)

            # only mint DOIs if not testing.
            if not current_app.config.get('TESTING', False) and not current_app.config.get('NO_DOI_MINTING', False):
                for submission in submissions:
                    generate_doi_for_data_submission.delay(submission.id, submission.version)

                generate_doi_for_submission.delay(recid, version)

            # Reindex everything.
            index_record_ids([recid] + generated_record_ids)
            push_data_keywords(pub_ids=[recid])

            admin_indexer = AdminIndexer()
            admin_indexer.index_submission(hep_submission)

            send_finalised_email(hep_submission)

            for file_format in ['csv', 'yoda', 'root']:
                convert_and_store.delay(hep_submission.inspire_id, file_format, force=True)

            if send_tweet:
                tweet(record.get('title'), record.get('collaborations'),
                      "http://www.hepdata.net/record/ins{0}".format(record.get('inspire_id')))

            return json.dumps({"success": True, "recid": recid,
                               "data_count": len(submissions),
                               "generated_records": generated_record_ids})

        except NoResultFound:
            print('No record found to update. Which is super strange.')

    else:
        return json.dumps(
            {"success": False, "recid": recid,
             "errors": ["You do not have permission to finalise this "
                        "submission. Only coordinators can do that."]})