Ejemplo n.º 1
0
    def update_file(inspire_id, recid, only_record_information=False, send_tweet=False):
        self = Migrator()

        output_location = self.prepare_files_for_submission(inspire_id, force_retrieval=True)
        if output_location:
            updated_record_information = self.retrieve_publication_information(inspire_id)
            record_information = update_record(recid, updated_record_information)

            if not only_record_information:
                try:
                    recid = self.load_submission(
                        record_information, output_location, os.path.join(output_location, "submission.yaml"),
                        update=True)

                    if recid is not None:
                        do_finalise(recid, publication_record=record_information,
                                    force_finalise=True, send_tweet=send_tweet, update=True)

                except FailedSubmission as fe:
                    log.error(fe.message)
                    fe.print_errors()
                    remove_submission(fe.record_id)
            else:
                index_record_ids([record_information['recid']])

        else:
            log.error('Failed to load {0}'.format(inspire_id))
Ejemplo n.º 2
0
Archivo: api.py Proyecto: ruphy/hepdata
    def load_file(inspire_id,
                  send_tweet=False,
                  convert=False,
                  base_url='http://hepdata.cedar.ac.uk/view/{0}/yaml'):
        self = Migrator(base_url)
        output_location, oldsite_last_updated = self.prepare_files_for_submission(
            inspire_id)
        if output_location:

            record_information = create_record(
                self.retrieve_publication_information(inspire_id))

            try:
                recid = self.load_submission(
                    record_information, output_location,
                    os.path.join(output_location, "submission.yaml"))
                if recid is not None:
                    do_finalise(recid,
                                publication_record=record_information,
                                force_finalise=True,
                                send_tweet=send_tweet,
                                convert=convert)
                    return True

            except FailedSubmission as fe:
                log.error(fe.message)
                fe.print_errors()
                remove_submission(fe.record_id)
                return False
        else:
            log.error("Failed to load " + inspire_id)
            return False
Ejemplo n.º 3
0
Archivo: api.py Proyecto: ruphy/hepdata
    def update_file(inspire_id,
                    recid,
                    force=False,
                    only_record_information=False,
                    send_tweet=False,
                    convert=False):
        self = Migrator()

        output_location, oldsite_last_updated = self.prepare_files_for_submission(
            inspire_id, force_retrieval=True)
        if output_location:
            updated_record_information = self.retrieve_publication_information(
                inspire_id)
            record_information = update_record(recid,
                                               updated_record_information)

            hep_submission = HEPSubmission.query.filter_by(
                publication_recid=recid).first()
            version_count = HEPSubmission.query.filter_by(
                publication_recid=recid).count()
            print('Old site last updated {}'.format(str(oldsite_last_updated)))
            print('New site last updated {}'.format(
                str(hep_submission.last_updated)))
            print('Coordinator ID is {}, version count is {}'.format(
                hep_submission.coordinator, version_count))
            allow_update = hep_submission.last_updated < oldsite_last_updated and \
                           hep_submission.coordinator == 1 and version_count == 1

            if not only_record_information and (allow_update or force):
                try:
                    recid = self.load_submission(record_information,
                                                 output_location,
                                                 os.path.join(
                                                     output_location,
                                                     "submission.yaml"),
                                                 update=True)
                    print('Loaded record {}'.format(recid))

                    if recid is not None:
                        do_finalise(recid,
                                    publication_record=record_information,
                                    force_finalise=True,
                                    send_tweet=send_tweet,
                                    update=True,
                                    convert=convert)

                except FailedSubmission as fe:
                    log.error(fe.message)
                    fe.print_errors()
                    remove_submission(fe.record_id)
            elif not only_record_information:
                print('Not updating record {}'.format(recid))
            else:
                index_record_ids([record_information["recid"]])

        else:
            log.error("Failed to load {0}".format(inspire_id))
Ejemplo n.º 4
0
    def load_file(inspire_id, send_tweet):
        self = Migrator()
        output_location = self.prepare_files_for_submission(inspire_id)
        if output_location:

            record_information = self.retrieve_publication_information(inspire_id)

            try:
                recid = self.load_submission(
                    record_information, output_location,
                    os.path.join(output_location, "submission.yaml"))
                if recid is not None:
                    do_finalise(recid, publication_record=record_information,
                                force_finalise=True, send_tweet=send_tweet)

            except FailedSubmission as fe:
                log.error(fe.message)
                fe.print_errors()
                remove_submission(fe.record_id)
        else:
            log.error('Failed to load ' + inspire_id)
Ejemplo n.º 5
0
    def update_file(inspire_id,
                    recid,
                    force=False,
                    only_record_information=False,
                    send_email=False,
                    send_tweet=False,
                    convert=False):
        self = Migrator()

        output_location, oldsite_last_updated = self.prepare_files_for_submission(
            inspire_id, force_retrieval=True)
        if output_location:
            updated_record_information, status = self.retrieve_publication_information(
                inspire_id)
            if status == 'success':
                record_information = update_record(recid,
                                                   updated_record_information)
            else:
                log.error("Failed to retrieve publication information for {0}".
                          format(inspire_id))
                return

            hep_submission = HEPSubmission.query.filter_by(
                publication_recid=recid).first()
            version_count = HEPSubmission.query.filter_by(
                publication_recid=recid).count()
            print('Old site last updated {}'.format(str(oldsite_last_updated)))
            print('New site last updated {}'.format(
                str(hep_submission.last_updated)))
            print('Coordinator ID is {}, version count is {}'.format(
                hep_submission.coordinator, version_count))
            allow_update = (hep_submission.last_updated < oldsite_last_updated or force) and \
                           hep_submission.coordinator == 1 and version_count == 1

            if not only_record_information and allow_update:
                try:
                    recid = self.load_submission(record_information,
                                                 output_location,
                                                 os.path.join(
                                                     output_location,
                                                     "submission.yaml"),
                                                 update=True)
                    print('Loaded record {}'.format(recid))

                    if recid is not None:
                        do_finalise(recid,
                                    publication_record=record_information,
                                    force_finalise=True,
                                    send_tweet=send_tweet,
                                    update=True,
                                    convert=convert)

                except FailedSubmission as fe:
                    log.error(fe.message)
                    fe.print_errors()
                    remove_submission(fe.record_id)
            elif not only_record_information:
                print('Not updating record {}'.format(recid))
            else:
                index_record_ids([record_information["recid"]])
                _cleaned_id = inspire_id.replace("ins", "")
                generate_dois_for_submission.delay(
                    inspire_id=_cleaned_id
                )  # update metadata stored in DataCite
                if send_email:
                    notify_publication_update(
                        hep_submission,
                        record_information)  # send email to all participants

        else:
            log.error("Failed to load {0}".format(inspire_id))
Ejemplo n.º 6
0
def mock_import_old_record(inspire_id=mock_inspire_ids[1], send_email=False):
    """Creates a submission but mimics the old migrated paths. (See hepdata
    master branch at ccd691b for old migrator module.)
    """
    if inspire_id not in mock_inspire_ids:
        raise ValueError('Invalid inspire id %s. Accepted values are: %s' %
                         (inspire_id, ', '.join(mock_inspire_ids)))

    # Use zipped test data for specific record(s)
    publication_information, status = get_inspire_record_information(
        inspire_id)
    publication_information["inspire_id"] = inspire_id

    # Create record
    if status == "success":
        record_information = create_record(publication_information)
    else:
        log.error("Failed to retrieve publication information for " +
                  inspire_id)
        return False

    # Unzip into correct data dir
    data_path = get_data_path_for_record(record_information['recid'])
    base_dir = os.path.dirname(os.path.realpath(__file__))
    zip_path = os.path.join(base_dir, 'old_hepdata_zips',
                            'ins%s.zip' % inspire_id)
    if os.path.isfile(zip_path):
        log.info('Unzipping %s to %s' % (zip_path, data_path))
        shutil.unpack_archive(zip_path, data_path)
        time_stamp = str(int(round(time.time())))
        yaml_path = os.path.join(data_path, time_stamp)
        sub_zip_path = os.path.join(data_path, 'ins%s.zip' % inspire_id)
        shutil.unpack_archive(sub_zip_path, yaml_path)
    else:
        log.error('Invalid path %s' % zip_path)
        return False

    # Create submission
    admin_user_id = 1

    # Consume data payload and store in db.
    get_or_create_hepsubmission(record_information["recid"], admin_user_id)

    errors = process_submission_directory(yaml_path,
                                          os.path.join(yaml_path,
                                                       "submission.yaml"),
                                          record_information["recid"],
                                          old_submission_schema=True,
                                          old_data_schema=True)

    if errors:
        log.error(
            "Submission failed for {0}.".format(record_information["recid"]),
            errors, record_information["recid"])
        return False

    do_finalise(record_information['recid'],
                publication_record=record_information,
                force_finalise=True,
                convert=False,
                send_email=send_email)
Ejemplo n.º 7
0
def _import_record(inspire_id,
                   update_existing=False,
                   base_url='https://hepdata.net',
                   send_email=False):
    publication_information, status = get_inspire_record_information(
        inspire_id)
    if status != "success":
        log.error("Failed to retrieve publication information for " +
                  inspire_id)
        return False

    current_submission = get_latest_hepsubmission(inspire_id=inspire_id)

    if not current_submission:
        log.info(
            "The record with id {0} does not exist in the database, so we're loading it."
            .format(inspire_id))
        publication_information["inspire_id"] = inspire_id
        record_information = create_record(publication_information)
        recid = record_information['recid']
    else:
        log.info("The record with inspire id {0} already exists.".format(
            inspire_id))
        if update_existing:
            log.info("Updating instead")
            recid = current_submission.publication_recid
        else:
            log.info("Not updating as update_existing is False")
            return False

    try:
        download_path = _download_file(base_url, inspire_id)

        filename = os.path.basename(download_path)

        time_stamp = str(int(round(time.time())))
        file_save_directory = get_data_path_for_record(str(recid), time_stamp)
        if not os.path.exists(file_save_directory):
            os.makedirs(file_save_directory)

        file_path = os.path.join(file_save_directory, filename)
        log.info("Moving file to %s" % file_path)
        shutil.copy(download_path, file_path)

        # Create submission
        admin_user_id = 1
        hepsubmission = get_or_create_hepsubmission(recid, admin_user_id)
        db.session.add(hepsubmission)
        db.session.commit()

        # Then process the payload as for any other record
        errors = process_zip_archive(file_path, recid)
        if errors:
            log.info("Errors processing archive. Re-trying with old schema.")
            # Try again with old schema
            # Need to clean up first to avoid errors
            # First delete tables
            cleanup_submission(recid, 1, [])
            # Next remove remaining files
            file_save_directory = os.path.dirname(file_path)
            submission_path = os.path.join(file_save_directory,
                                           remove_file_extension(filename))
            shutil.rmtree(submission_path)

            errors = process_zip_archive(file_path,
                                         recid,
                                         old_submission_schema=True,
                                         old_data_schema=True)

            if errors:
                log.error("Could not process zip archive: ")
                for file, file_errors in errors.items():
                    log.error("    %s:" % file)
                    for error in file_errors:
                        log.error("        %s" % error['message'])

                raise ValueError("Could not validate record.")

        # Delete any previous upload folders
        cleanup_old_files(hepsubmission)

        log.info("Finalising record %s" % recid)

        result_json = do_finalise(recid,
                                  force_finalise=True,
                                  update=(current_submission is not None),
                                  convert=False,
                                  send_email=send_email)
        result = json.loads(result_json)

        if result and result['success']:
            log.info("Imported record %s with %s submissions" %
                     (recid, result['data_count']))
            return True
        else:
            raise ValueError("Failed to finalise record.")
    except Exception as e:
        # Unload record
        unload_submission(recid)
        log.error(e)
        return False