Esempio n. 1
0
def unload_submission(record_id, version=1):

    submission = get_latest_hepsubmission(publication_recid=record_id)

    if not submission:
        print('Record {0} not found'.format(record_id))
        return

    if version == submission.version:
        print('Unloading record {0} version {1}...'.format(record_id, version))
        remove_submission(record_id, version)
    else:
        print('Not unloading record {0} version {1} (latest version {2})...'.format(record_id, version, submission.version))
        return

    if version == 1:

        data_records = get_records_matching_field("related_publication", record_id)
        for record in data_records["hits"]["hits"]:
            print("\t Removed data table {0} from index".format(record["_id"]))
            try:
                delete_item_from_index(doc_type=CFG_DATA_TYPE, id=record["_id"], parent=record_id)
            except Exception as e:
                logging.error("Unable to remove {0} from index. {1}".format(record["_id"], e))

        try:
            delete_item_from_index(doc_type=CFG_PUB_TYPE, id=record_id)
            print("Removed publication {0} from index".format(record_id))
        except NotFoundError as nfe:
            print(nfe)

    print('Finished unloading record {0} version {1}.'.format(record_id, version))
Esempio n. 2
0
def unload_submission(record_id):
    print('unloading {}...'.format(record_id))
    remove_submission(record_id)

    data_records = get_records_matching_field("related_publication", record_id)
    for record in data_records["hits"]["hits"]:
        print("\t Removed data table {0} from index".format(record["_id"]))
        try:
            delete_item_from_index(doc_type=CFG_DATA_TYPE, id=record["_id"], parent=record_id)
        except Exception as e:
            logging.error("Unable to remove {0} from index. {1}".format(record["_id"], e))

    try:
        delete_item_from_index(doc_type=CFG_PUB_TYPE, id=record_id)
        print("Removed publication {0} from index".format(record_id))
    except NotFoundError as nfe:
        print(nfe.message)

    print('Finished unloading {0}.'.format(record_id))
Esempio n. 3
0
def unload_submission(record_id):
    print('unloading {}...'.format(record_id))
    remove_submission(record_id)

    data_records = get_records_matching_field("related_publication", record_id)
    for record in data_records["hits"]["hits"]:
        print("\t Removed data table {0} from index".format(record["_id"]))
        try:
            delete_item_from_index(doc_type=CFG_DATA_TYPE, id=record["_id"], parent=record_id)
        except Exception as e:
            logging.error("Unable to remove {0} from index. {1}".format(record["_id"], e))

    try:
        delete_item_from_index(doc_type=CFG_PUB_TYPE, id=record_id)
        print("Removed publication {0} from index".format(record_id))
    except NotFoundError as nfe:
        print(nfe.message)

    print('Finished unloading {0}.'.format(record_id))
Esempio n. 4
0
def do_finalise(recid,
                publication_record=None,
                force_finalise=False,
                commit_message=None,
                send_tweet=False,
                update=False,
                convert=True):
    """
        Creates record SIP for each data record with a link to the associated
        publication
        :param synchronous: if true then workflow execution and creation is
        waited on, then everything is indexed in one go.
        If False, object creation is asynchronous, however reindexing is not
        performed. This is only really useful for the full migration of
        content.
    """
    print('Finalising record {}'.format(recid))

    hep_submission = HEPSubmission.query.filter_by(
        publication_recid=recid, overall_status="todo").first()

    generated_record_ids = []
    if hep_submission \
        and (force_finalise or hep_submission.coordinator == int(current_user.get_id())):

        submissions = DataSubmission.query.filter_by(
            publication_recid=recid, version=hep_submission.version).all()

        version = hep_submission.version

        existing_submissions = {}
        if hep_submission.version > 1 or update:
            # we need to determine which are the existing record ids.
            existing_data_records = get_records_matching_field(
                'related_publication', recid, doc_type=CFG_DATA_TYPE)

            for record in existing_data_records["hits"]["hits"]:

                if "recid" in record["_source"]:
                    existing_submissions[record["_source"]["title"]] = \
                        record["_source"]["recid"]
                    delete_item_from_index(
                        record["_id"],
                        doc_type=CFG_DATA_TYPE,
                        parent=record["_source"]["related_publication"])

        current_time = "{:%Y-%m-%d %H:%M:%S}".format(datetime.now())

        for submission in submissions:
            finalise_datasubmission(current_time, existing_submissions,
                                    generated_record_ids, publication_record,
                                    recid, submission, version)

        try:
            record = get_record_by_id(recid)
            # If we have a commit message, then we have a record update.
            # We will store the commit message and also update the
            # last_updated flag for the record.
            record['hepdata_doi'] = hep_submission.doi

            if commit_message:
                # On a revision, the last updated date will
                # be the current date.
                hep_submission.last_updated = datetime.now()

                commit_record = RecordVersionCommitMessage(
                    recid=recid, version=version, message=str(commit_message))

                db.session.add(commit_record)

            record['last_updated'] = datetime.strftime(
                hep_submission.last_updated, '%Y-%m-%d %H:%M:%S')
            record['version'] = version

            record.commit()

            hep_submission.inspire_id = record['inspire_id']
            hep_submission.overall_status = "finished"
            db.session.add(hep_submission)

            db.session.commit()

            create_celery_app(current_app)

            # only mint DOIs if not testing.
            if not current_app.config.get(
                    'TESTING', False) and not current_app.config.get(
                        'NO_DOI_MINTING', False):
                for submission in submissions:
                    generate_doi_for_data_submission.delay(
                        submission.id, submission.version)
                log.info("Generating DOIs for ins{0}".format(
                    hep_submission.inspire_id))
                generate_doi_for_submission.delay(recid, version)

            # Reindex everything.
            index_record_ids([recid] + generated_record_ids)
            push_data_keywords(pub_ids=[recid])

            try:
                admin_indexer = AdminIndexer()
                admin_indexer.index_submission(hep_submission)
            except ConnectionTimeout as ct:
                log.error('Unable to add ins{0} to admin index.\n{1}'.format(
                    hep_submission.inspire_id, ct))

            send_finalised_email(hep_submission)

            if convert:
                for file_format in ['yaml', 'csv', 'yoda', 'root']:
                    convert_and_store.delay(hep_submission.inspire_id,
                                            file_format,
                                            force=True)

            if send_tweet:
                tweet(
                    record.get('title'), record.get('collaborations'),
                    "http://www.hepdata.net/record/ins{0}".format(
                        record.get('inspire_id')), version)

            return json.dumps({
                "success": True,
                "recid": recid,
                "data_count": len(submissions),
                "generated_records": generated_record_ids
            })

        except NoResultFound:
            print('No record found to update. Which is super strange.')

    else:
        return json.dumps({
            "success":
            False,
            "recid":
            recid,
            "errors": [
                "You do not have permission to finalise this "
                "submission. Only coordinators can do that."
            ]
        })
Esempio n. 5
0
def do_finalise(recid, publication_record=None, force_finalise=False,
                commit_message=None, send_tweet=False, update=False):
    """
        Creates record SIP for each data record with a link to the associated
        publication
        :param synchronous: if true then workflow execution and creation is
        waited on, then everything is indexed in one go.
        If False, object creation is asynchronous, however reindexing is not
        performed. This is only really useful for the full migration of
        content.
    """
    hep_submission = HEPSubmission.query.filter_by(
        publication_recid=recid, overall_status="todo").first()

    print('Finalising record {}'.format(recid))

    generated_record_ids = []
    if hep_submission \
        and (force_finalise or hep_submission.coordinator == int(current_user.get_id())):

        submissions = DataSubmission.query.filter_by(
            publication_recid=recid,
            version=hep_submission.version).all()

        version = hep_submission.version

        existing_submissions = {}
        if hep_submission.version > 1 or update:
            # we need to determine which are the existing record ids.
            existing_data_records = get_records_matching_field(
                'related_publication', recid, doc_type=CFG_DATA_TYPE)

            for record in existing_data_records["hits"]["hits"]:

                if "recid" in record["_source"]:
                    existing_submissions[record["_source"]["title"]] = \
                        record["_source"]["recid"]
                    delete_item_from_index(record["_id"],
                                           doc_type=CFG_DATA_TYPE, parent=record["_source"]["related_publication"])

        current_time = "{:%Y-%m-%d %H:%M:%S}".format(datetime.now())

        for submission in submissions:
            finalise_datasubmission(current_time, existing_submissions,
                                    generated_record_ids,
                                    publication_record, recid, submission,
                                    version)

        try:
            record = get_record_by_id(recid)
            # If we have a commit message, then we have a record update.
            # We will store the commit message and also update the
            # last_updated flag for the record.
            record['hepdata_doi'] = hep_submission.doi

            if commit_message:
                # On a revision, the last updated date will
                # be the current date.
                hep_submission.last_updated = datetime.now()

                commit_record = RecordVersionCommitMessage(
                    recid=recid,
                    version=version,
                    message=str(commit_message))

                db.session.add(commit_record)

            record['last_updated'] = datetime.strftime(
                hep_submission.last_updated, '%Y-%m-%d %H:%M:%S')
            record['version'] = version

            record.commit()

            hep_submission.inspire_id = record['inspire_id']
            hep_submission.overall_status = "finished"
            db.session.add(hep_submission)

            db.session.commit()

            create_celery_app(current_app)

            # only mint DOIs if not testing.
            if not current_app.config.get('TESTING', False) and not current_app.config.get('NO_DOI_MINTING', False):
                for submission in submissions:
                    generate_doi_for_data_submission.delay(submission.id, submission.version)

                generate_doi_for_submission.delay(recid, version)

            # Reindex everything.
            index_record_ids([recid] + generated_record_ids)
            push_data_keywords(pub_ids=[recid])

            admin_indexer = AdminIndexer()
            admin_indexer.index_submission(hep_submission)

            send_finalised_email(hep_submission)

            for file_format in ['csv', 'yoda', 'root']:
                convert_and_store.delay(hep_submission.inspire_id, file_format, force=True)

            if send_tweet:
                tweet(record.get('title'), record.get('collaborations'),
                      "http://www.hepdata.net/record/ins{0}".format(record.get('inspire_id')))

            return json.dumps({"success": True, "recid": recid,
                               "data_count": len(submissions),
                               "generated_records": generated_record_ids})

        except NoResultFound:
            print('No record found to update. Which is super strange.')

    else:
        return json.dumps(
            {"success": False, "recid": recid,
             "errors": ["You do not have permission to finalise this "
                        "submission. Only coordinators can do that."]})