Exemple #1
0
def upload_keywords(filename, mode='correct', recids=None):
    """Store the extracted keywords in the database.

    :param filename: fullpath to the file with marc record.
    :keyword mode: correct|replace|add|delete
        use correct to add fields if they are different
        replace all fields with fields from the file
        add - add (even duplicate) fields
        delete - delete fields which are inside the file.
    :keyword recids: list of record ids, this arg comes from
        the bibclassify daemon and it is used when the recids
        contains one entry (recid) - ie. one individual document
        was processed. We use it to mark the job title so that
        it is possible to query database if the bibclassify
        was run over that document (in case of collections with
        many recids, we simply construct a general title).
    """
    if mode == 'correct':
        m = '-c'
    elif mode == 'replace':
        m = '-r'
    elif mode == 'add':
        m = '-a'
    elif mode == 'delete':
        m = '-d'
    else:
        raise Exception('Unknown mode')

    # let's use the user column to store the information, cause no better alternative in sight...
    user_title = 'bibclassify.upload'
    if recids and len(recids) == 1:
        user_title = 'extract:%d' % recids[0]
    bibtask.task_low_level_submission('bibupload', user_title, '-n', m,
                                      filename)
Exemple #2
0
def upload_keywords(filename, mode="correct", recids=None):
    """Store the extracted keywords in the database.

    :param filename: fullpath to the file with marc record.
    :keyword mode: correct|replace|add|delete
        use correct to add fields if they are different
        replace all fields with fields from the file
        add - add (even duplicate) fields
        delete - delete fields which are inside the file.
    :keyword recids: list of record ids, this arg comes from
        the bibclassify daemon and it is used when the recids
        contains one entry (recid) - ie. one individual document
        was processed. We use it to mark the job title so that
        it is possible to query database if the bibclassify
        was run over that document (in case of collections with
        many recids, we simply construct a general title).
    """
    if mode == "correct":
        m = "-c"
    elif mode == "replace":
        m = "-r"
    elif mode == "add":
        m = "-a"
    elif mode == "delete":
        m = "-d"
    else:
        raise Exception("Unknown mode")

    # let's use the user column to store the information, cause no better alternative in sight...
    user_title = "bibclassify.upload"
    if recids and len(recids) == 1:
        user_title = "extract:%d" % recids[0]
    bibtask.task_low_level_submission("bibupload", user_title, "-n", m, filename)
Exemple #3
0
    def _run_tasks(obj, dummy_eng):
        from invenio.legacy.bibsched.bibtask import task_low_level_submission

        d = Deposition(obj)
        sip = d.get_latest_sip(sealed=True)

        recid = sip.metadata['recid']
        communities = sip.metadata.get('provisional_communities', [])

        common_args = []
        sequenceid = getattr(d.workflow_object, 'task_sequence_id', None)
        if sequenceid:
            common_args += ['-I', str(sequenceid)]

        if update:
            tasklet_name = 'bst_openaire_update_upload'
        else:
            tasklet_name = 'bst_openaire_new_upload'

        task_id = task_low_level_submission(
            'bibtasklet', 'webdeposit', '-T', tasklet_name,
            '--argument', 'recid=%s' % recid, *common_args
        )
        sip.task_ids.append(task_id)

        for c in communities:
            task_id = task_low_level_submission(
                'webcoll', 'webdeposit', '-c', 'provisional-user-%s' % c,
                *common_args
            )
            sip.task_ids.append(task_id)
        d.update()
Exemple #4
0
def bibupload_record(record=None, collection=None,
                     file_prefix="bibuploadutils", mode="-c",
                     alias='bibuploadutils', opts=[]):
    """Write a MARCXML file and bibupload it."""
    if collection is None and record is None:
        return

    (file_out, filename) = open_temp_file(file_prefix)

    if collection is not None:
        file_out.write("<collection>")
        tot = 0
        for rec in collection:
            file_out.write(create_marcxml(record))
            tot += 1
            if tot == CFG_MAX_RECORDS:
                file_out.write("</collection>")
                close_temp_file(file_out, filename)
                task_low_level_submission(
                    'bibupload', alias, mode, filename, *opts
                )

                (file_out, filename) = open_temp_file(file_prefix)
                file_out.write("<collection>")
                tot = 0
        file_out.write("</collection>")
    elif record is not None:
        tot = 1
        file_out.write(create_marcxml(record))

    close_temp_file(file_out, filename)
    if tot > 0:
        task_low_level_submission('bibupload', alias, mode, filename, *opts)
Exemple #5
0
def bibupload(record=None, collection=None, file_prefix="", mode="-c"):
    """
    General purpose function that will write a MARCXML file and call bibupload
    on it.
    """
    if collection is None and record is None:
        return

    (file_out, filename) = open_temp_file(file_prefix)

    if collection is not None:
        file_out.write("<collection>")
        tot = 0
        for rec in collection:
            file_out.write(record_xml_output(rec))
            tot += 1
            if tot == MAX_RECORDS:
                file_out.write("</collection>")
                file_out.close()
                logger.debug("Submitting bibupload %s -n %s" % (mode, filename))
                task_low_level_submission('bibupload', 'openaire', mode, filename, '-n')

                (file_out, filename) = open_temp_file(file_prefix)
                file_out.write("<collection>")
                tot = 0
        file_out.write("</collection>")
    elif record is not None:
        tot = 1
        file_out.write(record_xml_output(record))

    file_out.close()
    if tot > 0:
        logger.debug("Submitting bibupload %s -n %s" % (mode, filename))
        task_low_level_submission('bibupload', 'openaire', mode, filename, '-n')
Exemple #6
0
def bst_twitter_fetcher(query):
    """
    Fetch the tweets related to the user and upload them into Invenio.
    @param user: the user
    """
    ## We prepare a temporary MARCXML file to upload.
    fd, name = tempfile.mkstemp(suffix='.xml', prefix='tweets', dir=CFG_TMPDIR)
    tweets = get_tweets(query)
    if tweets:
        os.write(fd, """<collection>\n""")
        for i, tweet in enumerate(tweets):
            ## For every tweet we transform it to MARCXML and we dump it in the file.
            task_update_progress('DONE: tweet %s out %s' % (i, len(tweets)))
            os.write(fd, tweet_to_record(tweet, query))

        os.write(fd, """</collection\n>""")
        os.close(fd)

        ## Invenio magic: we schedule an upload of the created MARCXML to be inserted
        ## ASAP in the system.
        task_low_level_submission('bibupload', 'admin', '-i', '-r', name,
                                  '-P5')
        write_message("Uploaded file %s with %s new tweets about %s" %
                      (name, len(tweets), query))
    else:
        write_message("No new tweets about %s" % query)
Exemple #7
0
def update_references(recid, overwrite=True):
    """Update references for a record

    First, we extract references from a record.
    Then, we are not updating the record directly but adding a bibupload
    task in -c mode which takes care of updating the record.

    Parameters:
    * recid: the id of the record
    """

    if not overwrite:
        # Check for references in record
        record = get_record(recid)
        if record and record_has_field(record, '999'):
            raise RecordHasReferences('Record has references and overwrite '
                                      'mode is disabled: %s' % recid)

    if get_fieldvalues(recid, '999C59'):
        raise RecordHasReferences('Record has been curated: %s' % recid)

    # Parse references
    references_xml = extract_references_from_record_xml(recid)

    # Save new record to file
    (temp_fd, temp_path) = mkstemp(prefix=CFG_REFEXTRACT_FILENAME,
                                   dir=CFG_TMPSHAREDDIR)
    temp_file = os.fdopen(temp_fd, 'w')
    temp_file.write(references_xml)
    temp_file.close()

    # Update record
    task_low_level_submission('bibupload', 'refextract', '-P', '4',
                              '-c', temp_path)
Exemple #8
0
def bibupload_record(record=None,
                     collection=None,
                     file_prefix="bibuploadutils",
                     mode="-c",
                     alias='bibuploadutils',
                     opts=[]):
    """Write a MARCXML file and bibupload it."""
    if collection is None and record is None:
        return

    (file_out, filename) = open_temp_file(file_prefix)

    if collection is not None:
        file_out.write("<collection>")
        tot = 0
        for rec in collection:
            file_out.write(create_marcxml(record))
            tot += 1
            if tot == CFG_MAX_RECORDS:
                file_out.write("</collection>")
                close_temp_file(file_out, filename)
                task_low_level_submission('bibupload', alias, mode, filename,
                                          *opts)

                (file_out, filename) = open_temp_file(file_prefix)
                file_out.write("<collection>")
                tot = 0
        file_out.write("</collection>")
    elif record is not None:
        tot = 1
        file_out.write(create_marcxml(record))

    close_temp_file(file_out, filename)
    if tot > 0:
        task_low_level_submission('bibupload', alias, mode, filename, *opts)
Exemple #9
0
    def _run_tasks(obj, dummy_eng):
        d = Deposition(obj)
        sip = d.get_latest_sip(sealed=True)

        recid = sip.metadata['recid']
        communities = sip.metadata.get('provisional_communities', [])

        common_args = ['-P5', ]
        sequenceid = getattr(d.workflow_object, 'task_sequence_id', None)
        if sequenceid:
            common_args += ['-I', str(sequenceid)]

        if update:
            tasklet_name = 'bst_openaire_update_upload'
        else:
            tasklet_name = 'bst_openaire_new_upload'

        task_id = task_low_level_submission(
            'bibtasklet', 'webdeposit', '-T', tasklet_name,
            '--argument', 'recid=%s' % recid, *common_args
        )
        sip.task_ids.append(task_id)

        for c in communities:
            task_id = task_low_level_submission(
                'webcoll', 'webdeposit', '-c', 'provisional-user-%s' % c,
                *common_args
            )
            sip.task_ids.append(task_id)
        d.update()
Exemple #10
0
def save_xml_record(recid, uid, xml_record='', to_upload=True, to_merge=False,
                    task_name="bibedit", sequence_id=None):
    """Write XML record to file. Default behaviour is to read the record from
    a BibEdit cache file, filter out the unchanged volatile subfields,
    write it back to an XML file and then pass this file to BibUpload.

    @param xml_record: give XML as string in stead of reading cache file
    @param to_upload: pass the XML file to BibUpload
    @param to_merge: prepare an XML file for BibMerge to use

    """
    if not xml_record:
        # Read record from cache file.
        cache = get_cache_contents(recid, uid)
        if cache:
            record = cache[2]
            used_changes = cache[4]
            xml_record = record_xml_output(record)
            delete_cache(recid, uid)
            delete_disabled_changes(used_changes)
    else:
        record = create_record(xml_record)[0]

    # clean the record from unfilled volatile fields
    record_strip_empty_volatile_subfields(record)
    record_strip_empty_fields(record)

    # order subfields alphabetically before saving the record
    record_order_subfields(record)

    xml_to_write = wash_for_xml(record_xml_output(record))

    # Write XML file.
    if not to_merge:
        fd, file_path = tempfile.mkstemp(dir=cfg['CFG_BIBEDIT_CACHEDIR'],
                                         prefix="%s_" % cfg['CFG_BIBEDIT_FILENAME'],
                                         suffix="_%s_%s.xml" % (recid, uid))
        f = os.fdopen(fd, 'w')
        f.write(xml_to_write)
        f.close()
    else:
        file_path = '%s_%s.xml' % (_get_file_path(recid, uid),
                                   cfg['CFG_BIBEDIT_TO_MERGE_SUFFIX'])
        xml_file = open(file_path, 'w')
        xml_file.write(xml_to_write)
        xml_file.close()

    user_name = get_user_info(uid)[1]
    if to_upload:
        args = ['bibupload', user_name, '-P', '5', '-r',
                file_path, '-u', user_name]
        if task_name == "bibedit":
            args.extend(['--name', 'bibedit'])
        if sequence_id:
            args.extend(["-I", sequence_id])
        args.extend(['--email-logs-on-error'])
        task_low_level_submission(*args)
    return True
Exemple #11
0
def openaire_create_icon(docid=None, recid=None, reformat=True):
    """
    Celery task to create an icon for all documents in a given record or for
    just a specific document.
    """
    if recid:
        docs = BibRecDocs(recid).list_bibdocs()
    else:
        docs = [BibDoc(docid)]

    # Celery task will fail if BibDoc does not exists (on purpose ;-)
    for d in docs:
        logger.debug("Checking document %s" % d)
        if not d.get_icon(subformat_re=re.compile(ICON_SUBFORMAT)):
            logger.debug("Document has no icon")
            for f in d.list_latest_files():
                logger.debug("Checking file %s" % f)
                if not f.is_icon():
                    logger.debug("File not an icon")
                    file_path = f.get_full_path()
                    icon_path = None
                    try:
                        filename = os.path.splitext(os.path.basename(file_path))[0]
                        logger.info("Creating icon from file %s" % file_path)
                        (icon_dir, icon_name) = create_icon(
                            {
                                "input-file": file_path,
                                "icon-name": "icon-%s" % filename,
                                "multipage-icon": False,
                                "multipage-icon-delay": 0,
                                "icon-scale": ICON_SIZE,
                                "icon-file-format": ICON_FILEFORMAT,
                                "verbosity": 0,
                            }
                        )
                        icon_path = os.path.join(icon_dir, icon_name)
                    except InvenioWebSubmitIconCreatorError as e:
                        logger.warning("Icon for file %s could not be created: %s" % (file_path, str(e)))
                        register_exception(
                            prefix="Icon for file %s could not be created: %s" % (file_path, str(e)), alert_admin=False
                        )

                    try:
                        if icon_path and os.path.exists(icon_path):
                            logger.debug("Adding icon %s to document" % icon_path)
                            d.add_icon(icon_path, subformat=ICON_SUBFORMAT)
                            recid_list = ",".join([str(x["recid"]) for x in d.bibrec_links])
                            if reformat:
                                task_low_level_submission("bibreformat", "openaire", "-i", recid_list)

                    except InvenioBibDocFileError as e:
                        logger.warning(
                            "Icon %s for file %s could not be added to " "document: %s" % (icon_path, f, str(e))
                        )
                        register_exception(
                            prefix="Icon %s for file %s could not be added" " to document: %s" % (icon_path, f, str(e)),
                            alert_admin=False,
                        )
Exemple #12
0
def save_xml_record(recid, uid, xml_record='', to_upload=True, to_merge=False,
                    task_name="bibedit", sequence_id=None):
    """Write XML record to file. Default behaviour is to read the record from
    a BibEdit cache file, filter out the unchanged volatile subfields,
    write it back to an XML file and then pass this file to BibUpload.

    @param xml_record: give XML as string in stead of reading cache file
    @param to_upload: pass the XML file to BibUpload
    @param to_merge: prepare an XML file for BibMerge to use

    """
    if not xml_record:
        # Read record from cache file.
        cache = get_cache_contents(recid, uid)
        if cache:
            record = cache[2]
            used_changes = cache[4]
            xml_record = record_xml_output(record)
            delete_cache(recid, uid)
            delete_disabled_changes(used_changes)
    else:
        record = create_record(xml_record)[0]

    # clean the record from unfilled volatile fields
    record_strip_empty_volatile_subfields(record)
    record_strip_empty_fields(record)

    # order subfields alphabetically before saving the record
    record_order_subfields(record)

    xml_to_write = wash_for_xml(record_xml_output(record))

    # Write XML file.
    if not to_merge:
        fd, file_path = tempfile.mkstemp(dir=cfg['CFG_BIBEDIT_CACHEDIR'],
                                         prefix="%s_" % cfg['CFG_BIBEDIT_FILENAME'],
                                         suffix="_%s_%s.xml" % (recid, uid))
        f = os.fdopen(fd, 'w')
        f.write(xml_to_write)
        f.close()
    else:
        file_path = '%s_%s.xml' % (_get_file_path(recid, uid),
                                   cfg['CFG_BIBEDIT_TO_MERGE_SUFFIX'])
        xml_file = open(file_path, 'w')
        xml_file.write(xml_to_write)
        xml_file.close()

    user_name = get_user_info(uid)[1]
    if to_upload:
        args = ['bibupload', user_name, '-P', '5', '-r',
                file_path, '-u', user_name]
        if task_name == "bibedit":
            args.extend(['--name', 'bibedit'])
        if sequence_id:
            args.extend(["-I", sequence_id])
        args.extend(['--email-logs-on-error'])
        task_low_level_submission(*args)
    return True
Exemple #13
0
def submit_refextract_task(recids):
    """Submit a refextract task if needed"""
    # First filter out recids we cannot safely extract references from
    # (mostly because they have been curated)
    recids = [recid for recid in recids if check_record_for_refextract(recid)]

    if recids:
        recids_str = ','.join(str(recid) for recid in recids)
        task_low_level_submission('refextract', NAME, '-i', recids_str)
def addmeta(request, sub_id):
    """
    Checks the submitted metadata form for validity.
    Returns a new page with success message if valid, otherwise it returns a
    form with the errors marked.
    """
    if sub_id is None:
        #just return to deposit
        return redirect(url_for('.deposit'))

    CFG_B2SHARE_UPLOAD_FOLDER = current_app.config.get(
        "CFG_B2SHARE_UPLOAD_FOLDER")
    updir = os.path.join(CFG_B2SHARE_UPLOAD_FOLDER, sub_id)
    if (not os.path.isdir(updir)) or (not os.listdir(updir)):
        return render_template('500.html', message="Uploads not found"), 500

    domain = request.form['domain'].lower()
    if domain in metadata_classes():
        meta = metadata_classes()[domain]()
    else:
        from b2share_model.model import SubmissionMetadata
        meta = SubmissionMetadata()

    if not is_current_user_allowed_to_deposit(meta):
        return jsonify(
            valid=False,
            html=render_template('b2share-addmeta-table-denied.html'))

    MetaForm = model_form(meta.__class__,
                          base_class=FormWithKey,
                          exclude=['submission', 'submission_type'],
                          field_args=meta.field_args,
                          converter=HTML5ModelConverter())

    meta_form = MetaForm(request.form, meta)

    if meta_form.validate_on_submit():
        recid, marc = b2share_marc_handler.create_marc(request.form, sub_id,
                                                       current_user['email'],
                                                       meta)
        tmp_file = write_marc_to_temp_file(marc)
        # all usual tasks have priority 0; we want the bibuploads to run first
        from invenio.legacy.bibsched.bibtask import task_low_level_submission
        task_low_level_submission('bibupload', 'webdeposit', '--priority', '1',
                                  '-r', tmp_file)
        return jsonify(valid=True,
                       newurl=url_for("record.metadata", recid=recid),
                       html=render_template('record_waitforit.html',
                                            recid=recid,
                                            marc=marc))

    return jsonify(valid=False,
                   html=render_template('b2share-addmeta-table.html',
                                        sub_id=sub_id,
                                        metadata=meta,
                                        form=meta_form,
                                        getattr=getattr))
Exemple #15
0
def submit_refextract_task(recids):
    """Submit a refextract task if needed"""
    # First filter out recids we cannot safely extract references from
    # (mostly because they have been curated)
    recids = [recid for recid in recids if check_record_for_refextract(recid)]

    if recids:
        recids_str = ",".join(str(recid) for recid in recids)
        task_low_level_submission("refextract", NAME, "-i", recids_str)
Exemple #16
0
 def upload_marcxml_file(marcxml):
     """ Creates a temporary marcxml file and sends it to bibupload
     """
     xml_filename = "bibencode_" + str(batch_job["recid"]) + "_" + str(uuid.uuid4()) + ".xml"
     xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR, xml_filename)
     xml_file = file(xml_filename, "w")
     xml_file.write(marcxml)
     xml_file.close()
     targs = ["-c", xml_filename]
     task_low_level_submission("bibupload", "bibencode", *targs)
Exemple #17
0
 def upload_marcxml_file(marcxml):
     """ Creates a temporary marcxml file and sends it to bibupload
     """
     xml_filename = 'bibencode_'+ str(batch_job['recid']) + '_' + str(uuid.uuid4()) + '.xml'
     xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR, xml_filename)
     xml_file = file(xml_filename, 'w')
     xml_file.write(marcxml)
     xml_file.close()
     targs = ['-c', xml_filename]
     task_low_level_submission('bibupload', 'bibencode', *targs)
 def upload_marcxml_file(marcxml):
     """ Creates a temporary marcxml file and sends it to bibupload
     """
     xml_filename = 'bibencode_'+ str(batch_job['recid']) + '_' + str(uuid.uuid4()) + '.xml'
     xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR, xml_filename)
     xml_file = file(xml_filename, 'w')
     xml_file.write(marcxml)
     xml_file.close()
     targs = ['-c', xml_filename]
     task_low_level_submission('bibupload', 'bibencode', *targs)
Exemple #19
0
def create_ill_record(book_info):
    """
    Create a new ILL record

    @param book_info: book's information
    @type book_info: tuple

    @return MARC record
    """

    (title, author, place, publisher, year, edition, isbn) = book_info

    ill_record = """
        <record>
            <datafield tag="020" ind1=" " ind2=" ">
                <subfield code="a">%(isbn)s</subfield>
            </datafield>
            <datafield tag="100" ind1=" " ind2=" ">
                <subfield code="a">%(author)s</subfield>
            </datafield>
            <datafield tag="245" ind1=" " ind2=" ">
                <subfield code="a">%(title)s</subfield>
            </datafield>
            <datafield tag="250" ind1=" " ind2=" ">
                <subfield code="a">%(edition)s</subfield>
            </datafield>
            <datafield tag="260" ind1=" " ind2=" ">
                <subfield code="a">%(place)s</subfield>
                <subfield code="b">%(publisher)s</subfield>
                <subfield code="c">%(year)s</subfield>
            </datafield>
            <datafield tag="980" ind1=" " ind2=" ">
                <subfield code="a">ILLBOOK</subfield>
            </datafield>
        </record>
  """ % {'isbn':      encode_for_xml(isbn),
         'author':    encode_for_xml(author),
         'title':     encode_for_xml(title),
         'edition':   encode_for_xml(edition),
         'place':     encode_for_xml(place),
         'publisher': encode_for_xml(publisher),
         'year':      encode_for_xml(year)}

    file_path = '%s/%s_%s.xml' % (CFG_TMPDIR, 'bibcirculation_ill_book',
                                  time.strftime("%Y%m%d_%H%M%S"))

    xml_file = open(file_path, 'w')
    xml_file.write(ill_record)
    xml_file.close()

    # Pass XML file to BibUpload.
    task_low_level_submission('bibupload', 'bibcirculation',
                              '-P', '5', '-i', file_path)

    return ill_record
Exemple #20
0
def create_ill_record(book_info):
    """
    Create a new ILL record

    @param book_info: book's information
    @type book_info: tuple

    @return MARC record
    """

    (title, author, place, publisher, year, edition, isbn) = book_info

    ill_record = """
        <record>
            <datafield tag="020" ind1=" " ind2=" ">
                <subfield code="a">%(isbn)s</subfield>
            </datafield>
            <datafield tag="100" ind1=" " ind2=" ">
                <subfield code="a">%(author)s</subfield>
            </datafield>
            <datafield tag="245" ind1=" " ind2=" ">
                <subfield code="a">%(title)s</subfield>
            </datafield>
            <datafield tag="250" ind1=" " ind2=" ">
                <subfield code="a">%(edition)s</subfield>
            </datafield>
            <datafield tag="260" ind1=" " ind2=" ">
                <subfield code="a">%(place)s</subfield>
                <subfield code="b">%(publisher)s</subfield>
                <subfield code="c">%(year)s</subfield>
            </datafield>
            <datafield tag="980" ind1=" " ind2=" ">
                <subfield code="a">ILLBOOK</subfield>
            </datafield>
        </record>
  """ % {'isbn':      encode_for_xml(isbn),
         'author':    encode_for_xml(author),
         'title':     encode_for_xml(title),
         'edition':   encode_for_xml(edition),
         'place':     encode_for_xml(place),
         'publisher': encode_for_xml(publisher),
         'year':      encode_for_xml(year)}

    file_path = '%s/%s_%s.xml' % (CFG_TMPDIR, 'bibcirculation_ill_book',
                                  time.strftime("%Y%m%d_%H%M%S"))

    xml_file = open(file_path, 'w')
    xml_file.write(ill_record)
    xml_file.close()

    # Pass XML file to BibUpload.
    task_low_level_submission('bibupload', 'bibcirculation',
                              '-P', '5', '-i', file_path)

    return ill_record
Exemple #21
0
def cb_submit_bibupload(bibcatalog_system=None, records=None):
    if records:
        references_xml = print_records(records)

        # Save new record to file
        temp_fd, temp_path = mkstemp(prefix=CFG_REFEXTRACT_FILENAME, dir=CFG_TMPSHAREDDIR)
        temp_file = os.fdopen(temp_fd, "w")
        temp_file.write(references_xml)
        temp_file.close()

        # Update record
        task_low_level_submission("bibupload", "refextract", "-c", temp_path)
def addmeta(request, sub_id):
    """
    Checks the submitted metadata form for validity.
    Returns a new page with success message if valid, otherwise it returns a
    form with the errors marked.
    """
    if sub_id is None:
        # just return to deposit
        return redirect(url_for(".deposit"))

    CFG_B2SHARE_UPLOAD_FOLDER = current_app.config.get("CFG_B2SHARE_UPLOAD_FOLDER")
    updir = os.path.join(CFG_B2SHARE_UPLOAD_FOLDER, sub_id)
    if (not os.path.isdir(updir)) or (not os.listdir(updir)):
        return render_template("500.html", message="Uploads not found"), 500

    domain = request.form["domain"].lower()
    if domain in metadata_classes():
        meta = metadata_classes()[domain]()
    else:
        from b2share_model.model import SubmissionMetadata

        meta = SubmissionMetadata()

    MetaForm = model_form(
        meta.__class__,
        base_class=FormWithKey,
        exclude=["submission", "submission_type"],
        field_args=meta.field_args,
        converter=HTML5ModelConverter(),
    )

    meta_form = MetaForm(request.form, meta)

    if meta_form.validate_on_submit():
        recid, marc = b2share_marc_handler.create_marc(request.form, sub_id, current_user["email"], meta)
        tmp_file = write_marc_to_temp_file(marc)
        # all usual tasks have priority 0; we want the bibuploads to run first
        from invenio.legacy.bibsched.bibtask import task_low_level_submission

        task_low_level_submission("bibupload", "webdeposit", "--priority", "1", "-r", tmp_file)
        return jsonify(
            valid=True,
            newurl=url_for("record.metadata", recid=recid),
            html=render_template("record_waitforit.html", recid=recid, marc=marc),
        )

    return jsonify(
        valid=False,
        html=render_template(
            "b2share-addmeta-table.html", sub_id=sub_id, metadata=meta, form=meta_form, getattr=getattr
        ),
    )
Exemple #23
0
def cb_submit_bibupload(bibcatalog_system=None, records=None):
    if records:
        references_xml = print_records(records)

        # Save new record to file
        temp_fd, temp_path = mkstemp(prefix=CFG_REFEXTRACT_FILENAME,
                                     dir=CFG_TMPSHAREDDIR)
        temp_file = os.fdopen(temp_fd, 'w')
        temp_file.write(references_xml)
        temp_file.close()

        # Update record
        task_low_level_submission('bibupload', 'refextract', '-c', temp_path)
Exemple #24
0
    def run_deposition_tasks(self, deposition_id, with_webcoll=True):
        """
        Run all task ids specified in the latest SIP and optionally run
        webcoll.
        """
        # Run submitted tasks
        from invenio.modules.deposit.models import Deposition
        dep = Deposition.get(deposition_id)
        sip = dep.get_latest_sip(sealed=True)

        for task_id in sip.task_ids:
            self.run_task_id(task_id)

        if with_webcoll:
            # Run webcoll (to ensure record is assigned permissions)
            from invenio.legacy.bibsched.bibtask import \
                task_low_level_submission
            task_id = task_low_level_submission('webcoll', 'webdeposit', '-q')
            self.run_task_id(task_id)

            # Check if record is accessible
            response = self.client.get(
                url_for('record.metadata', recid=sip.metadata['recid']),
                base_url=self.app.config['CFG_SITE_SECURE_URL'],
            )
            self.assertStatus(response, 200)
Exemple #25
0
    def create(obj, dummy_eng):
        #FIXME change share tmp directory
        from invenio.config import CFG_TMPSHAREDDIR
        from invenio.legacy.bibsched.bibtask import task_low_level_submission, \
            bibtask_allocate_sequenceid
        d = Deposition(obj)

        sip = d.get_latest_sip(sealed=False)
        sip.seal()

        tmp_file_fd, tmp_file_path = mkstemp(
            prefix="webdeposit-%s-%s" % (d.id, sip.uuid),
            suffix='.xml',
            dir=CFG_TMPSHAREDDIR,
        )

        os.write(tmp_file_fd, sip.package)
        os.close(tmp_file_fd)

        # Trick to have access to task_sequence_id in subsequent tasks.
        d.workflow_object.task_sequence_id = bibtask_allocate_sequenceid()

        task_id = task_low_level_submission(
            'bibupload', 'webdeposit',
            '-r' if 'recid' in sip.metadata else '-i', tmp_file_path,
            '-I', str(d.workflow_object.task_sequence_id)
        )

        sip.task_ids.append(task_id)

        d.update()
def Insert_Modify_Record(parameters, curdir, form, user_info=None):
    """
    Modify existing record using 'curdir/recmysql' and BibUpload correct
    mode. The file must therefore already have been created prior to this
    execution of this function, for eg. using "Make_Modify_Record".

    This function gets the output of BibConvert and uploads it into
    the MySQL bibliographical database.
    """
    global rn
    sequence_id = bibtask_allocate_sequenceid(curdir)
    if os.path.exists(os.path.join(curdir, "recmysqlfmt")):
        recfile = "recmysqlfmt"
    elif os.path.exists(os.path.join(curdir, "recmysql")):
        recfile = "recmysql"
    else:
        raise InvenioWebSubmitFunctionError("Could not find record file")
    initial_file = os.path.join(curdir, recfile)
    tmp_fd, final_file = tempfile.mkstemp(dir=CFG_TMPDIR,
                                          prefix="%s_%s" % \
                                          (rn.replace('/', '_'),
                                           time.strftime("%Y-%m-%d_%H:%M:%S")))
    os.close(tmp_fd)
    shutil.copy(initial_file, final_file)
    bibupload_id = task_low_level_submission('bibupload',
                                             'websubmit.Insert_Modify_Record',
                                             '-c', final_file, '-P', '3', '-I',
                                             str(sequence_id))
    open(os.path.join(curdir, 'bibupload_id'), 'w').write(str(bibupload_id))
    return ""
Exemple #27
0
def call_bibupload(marcxmlfile, mode=None, oai_src_id=-1, sequence_id=None):
    """
    Creates a bibupload task for the task scheduler in given mode
    on given file. Returns the generated task id and logs the event
    in oaiHARVESTLOGS, also adding any given oai source identifier.


    :param marcxmlfile: base-marcxmlfilename to upload
    :param mode: mode to upload in
    :param oai_src_id: id of current source config
    :param sequence_id: sequence-number, if relevant

    :return: task_id if successful, otherwise None.
    """
    if mode is None:
        mode = ["-r", "-i"]
    if os.path.exists(marcxmlfile):
        try:
            args = mode
            # Add job with priority 6 (above normal bibedit tasks)
            # and file to upload to arguments
            args.extend(["-P", "6", marcxmlfile])
            if sequence_id:
                args.extend(['-I', str(sequence_id)])
            task_id = task_low_level_submission("bibupload", "oaiharvest", *tuple(args))
            create_oaiharvest_log(task_id, oai_src_id, marcxmlfile)
        except Exception as msg:
            write_message("An exception during submitting oaiharvest task occured : %s " % (str(msg)))
            return None
        return task_id
    else:
        write_message("marcxmlfile %s does not exist" % (marcxmlfile,))
        return None
def Insert_Modify_Record(parameters, curdir, form, user_info=None):
    """
    Modify existing record using 'curdir/recmysql' and BibUpload correct
    mode. The file must therefore already have been created prior to this
    execution of this function, for eg. using "Make_Modify_Record".

    This function gets the output of BibConvert and uploads it into
    the MySQL bibliographical database.
    """
    global rn
    sequence_id = bibtask_allocate_sequenceid(curdir)
    if os.path.exists(os.path.join(curdir, "recmysqlfmt")):
        recfile = "recmysqlfmt"
    elif os.path.exists(os.path.join(curdir, "recmysql")):
        recfile = "recmysql"
    else:
        raise InvenioWebSubmitFunctionError("Could not find record file")
    initial_file = os.path.join(curdir, recfile)
    tmp_fd, final_file = tempfile.mkstemp(dir=CFG_TMPDIR,
                                          prefix="%s_%s" % \
                                          (rn.replace('/', '_'),
                                           time.strftime("%Y-%m-%d_%H:%M:%S")))
    os.close(tmp_fd)
    shutil.copy(initial_file, final_file)
    bibupload_id = task_low_level_submission('bibupload', 'websubmit.Insert_Modify_Record', '-c', final_file, '-P', '3', '-I', str(sequence_id))
    open(os.path.join(curdir, 'bibupload_id'), 'w').write(str(bibupload_id))
    return ""
Exemple #29
0
def submit_task(to_submit, mode, sequence_id):
    """call bibupload with all records to be modified.

    :param to_submit: list of xml snippets to be submitted
    :type: list
    :param mode: mode to be used in bibupload
    :type: list
    :param sequence_id: sequence id to be included in the task_id
    :type: str

    :return: id of the submitted task
    :rtype: int
    """
    (temp_fd, temp_path) = mkstemp(prefix=PREFIX,
                                   dir=CFG_TMPSHAREDDIR)
    temp_file = os.fdopen(temp_fd, 'w')
    temp_file.write('<?xml version="1.0" encoding="UTF-8"?>')
    temp_file.write('<collection>')
    for el in to_submit:
        temp_file.write(el)
    temp_file.write('</collection>')
    temp_file.close()

    return task_low_level_submission('bibupload', PREFIX, '-P', '3', '-I',
                                     sequence_id, '-%s' % mode,
                                     temp_path)
Exemple #30
0
    def upload_marcxml(self, marcxml, mode):
        """
        Uploads a record to the server

        Parameters:
          marcxml - *str* the XML to upload.
             mode - *str* the mode to use for the upload.
                    "-i" insert new records
                    "-r" replace existing records
                    "-c" correct fields of records
                    "-a" append fields to records
                    "-ir" insert record or replace if it exists
        """
        if mode not in ["-i", "-r", "-c", "-a", "-ir"]:
            raise NameError, "Incorrect mode " + str(mode)

        # Are we running locally? If so, submit directly
        if self.local:
            (code, marcxml_filepath) = tempfile.mkstemp(prefix="upload_%s" % \
                                                        time.strftime("%Y%m%d_%H%M%S_",
                                                                      time.localtime()))
            marcxml_file_d = os.fdopen(code, "w")
            marcxml_file_d.write(marcxml)
            marcxml_file_d.close()
            return task_low_level_submission("bibupload", "", mode, marcxml_filepath)
        else:
            params = urllib.urlencode({'file': marcxml,
                                        'mode': mode})
            ## We don't use self.browser as batchuploader is protected by IP
            opener = urllib2.build_opener()
            opener.addheaders = [('User-Agent', CFG_USER_AGENT)]
            return opener.open(self.server_url + "/batchuploader/robotupload", params,)
Exemple #31
0
def upload_amendments(records, holdingpen):
    """ Upload a modified record """

    if task_get_option("no_upload", False) or len(records) == 0:
        return

    xml = '<collection xmlns="http://www.loc.gov/MARC21/slim">'
    for record in records:
        xml += record_xml_output(record)
    xml += "</collection>"

    tmp_file_fd, tmp_file = mkstemp(
        suffix='.xml',
        prefix="bibcheckfile_%s" % time.strftime("%Y-%m-%d_%H:%M:%S"),
        dir=CFG_TMPSHAREDDIR
    )
    os.write(tmp_file_fd, xml)
    os.close(tmp_file_fd)
    os.chmod(tmp_file, 0644)
    if holdingpen:
        flag = "-o"
    else:
        flag = "-r"
    task = task_low_level_submission('bibupload', 'bibcheck', flag, tmp_file)
    write_message("Submitted bibupload task %s" % task)
Exemple #32
0
    def _run_tasks(obj, dummy_eng):
        from invenio.legacy.bibsched.bibtask import task_low_level_submission

        d = Deposition(obj)
        sip = d.get_latest_sip(sealed=True)
        # XXX XXX XXX
        return

        recid = sip.metadata['recid']

        common_args = []
        sequenceid = getattr(d.workflow_object, 'task_sequence_id', None)
        if sequenceid:
            common_args += ['-I', str(sequenceid)]

        if update:
            tasklet_name = 'bst_openaire_update_upload'
        else:
            tasklet_name = 'bst_openaire_new_upload'

        task_id = task_low_level_submission(
            'bibtasklet', 'webdeposit', '-T', tasklet_name,
            '--argument', 'recid=%s' % recid, *common_args
        )
        sip.task_ids.append(task_id)

        d.update()
Exemple #33
0
def submit_task(to_submit, mode, sequence_id):
    """call bibupload with all records to be modified.

    :param to_submit: list of xml snippets to be submitted
    :type: list
    :param mode: mode to be used in bibupload
    :type: list
    :param sequence_id: sequence id to be included in the task_id
    :type: str

    :return: id of the submitted task
    :rtype: int
    """
    (temp_fd, temp_path) = mkstemp(prefix=PREFIX,
                                   dir=CFG_TMPSHAREDDIR)
    temp_file = os.fdopen(temp_fd, 'w')
    temp_file.write('<?xml version="1.0" encoding="UTF-8"?>')
    temp_file.write('<collection>')
    for el in to_submit:
        temp_file.write(el)
    temp_file.write('</collection>')
    temp_file.close()

    return task_low_level_submission('bibupload', PREFIX, '-P', '3', '-I',
                                     sequence_id, '-%s' % mode,
                                     temp_path)
Exemple #34
0
    def run_deposition_tasks(self, deposition_id, with_webcoll=True):
        """
        Run all task ids specified in the latest SIP and optionally run
        webcoll.
        """
        # Run submitted tasks
        from invenio.modules.deposit.models import Deposition
        dep = Deposition.get(deposition_id)
        sip = dep.get_latest_sip(sealed=True)

        for task_id in sip.task_ids:
            self.run_task_id(task_id)

        if with_webcoll:
            # Run webcoll (to ensure record is assigned permissions)
            from invenio.legacy.bibsched.bibtask import \
                task_low_level_submission
            task_id = task_low_level_submission('webcoll', 'webdeposit', '-q')
            self.run_task_id(task_id)

            # Check if record is accessible
            response = self.client.get(
                url_for('record.metadata', recid=sip.metadata['recid']),
                base_url=self.app.config['CFG_SITE_SECURE_URL'],
            )
            self.assertStatus(response, 200)
Exemple #35
0
    def _upload_amendments(obj, eng, holdingpen=False):
        # Load everything
        extra_data = obj.get_extra_data()
        _ensure_key('modified_records', extra_data)
        modified_records = extra_data['modified_records']
        upload = extra_data['common']['upload']
        tickets = extra_data['common']['tickets']
        queue = extra_data['common']['queue']

        modified_records = (Record(r) for r in modified_records.values())
        records_xml = (
            '<collection xmlns="http://www.loc.gov/MARC21/slim">\n'
            '{}'
            '</collection>'
            .format("".join((record.legacy_export_as_marc()
                             for record in modified_records)))
        )

        # Upload
        if not upload or not modified_records:
            return

        tmp_file_fd, tmp_file = tempfile.mkstemp(
            suffix='.xml',
            prefix="bibcheckfile_%s" % time.strftime("%Y-%m-%d_%H:%M:%S"),
            dir=cfg['CFG_TMPSHAREDDIR']
        )
        os.write(tmp_file_fd, records_xml)
        os.close(tmp_file_fd)
        os.chmod(tmp_file, 0644)
        if holdingpen:
            flag = "-o"
        else:
            flag = "-r"
        task = task_low_level_submission('bibupload', 'bibcheck', flag, tmp_file)
Exemple #36
0
    def upload_marcxml(self, marcxml, mode):
        """
        Uploads a record to the server

        Parameters:
          marcxml - *str* the XML to upload.
             mode - *str* the mode to use for the upload.
                    "-i" insert new records
                    "-r" replace existing records
                    "-c" correct fields of records
                    "-a" append fields to records
                    "-ir" insert record or replace if it exists
        """
        if mode not in ["-i", "-r", "-c", "-a", "-ir"]:
            raise NameError, "Incorrect mode " + str(mode)

        # Are we running locally? If so, submit directly
        if self.local:
            (code, marcxml_filepath) = tempfile.mkstemp(prefix="upload_%s" % \
                                                        time.strftime("%Y%m%d_%H%M%S_",
                                                                      time.localtime()))
            marcxml_file_d = os.fdopen(code, "w")
            marcxml_file_d.write(marcxml)
            marcxml_file_d.close()
            return task_low_level_submission("bibupload", "", mode, marcxml_filepath)
        else:
            params = urllib.urlencode({'file': marcxml,
                                        'mode': mode})
            ## We don't use self.browser as batchuploader is protected by IP
            opener = urllib2.build_opener()
            opener.addheaders = [('User-Agent', CFG_USER_AGENT)]
            return opener.open(self.server_url + "/batchuploader/robotupload", params,)
Exemple #37
0
    def create(obj, dummy_eng):
        #FIXME change share tmp directory
        from invenio.config import CFG_TMPSHAREDDIR
        from invenio.legacy.bibsched.bibtask import task_low_level_submission, \
            bibtask_allocate_sequenceid
        d = Deposition(obj)

        sip = d.get_latest_sip(sealed=False)
        sip.seal()

        tmp_file_fd, tmp_file_path = mkstemp(
            prefix="webdeposit-%s-%s" % (d.id, sip.uuid),
            suffix='.xml',
            dir=CFG_TMPSHAREDDIR,
        )

        os.write(tmp_file_fd, sip.package)
        os.close(tmp_file_fd)

        # Trick to have access to task_sequence_id in subsequent tasks.
        d.workflow_object.task_sequence_id = bibtask_allocate_sequenceid()

        task_id = task_low_level_submission(
            'bibupload', 'webdeposit',
            '-r' if 'recid' in sip.metadata else '-i', tmp_file_path, '-I',
            str(d.workflow_object.task_sequence_id))

        sip.task_ids.append(task_id)

        d.update()
Exemple #38
0
def save_xml_record(recid, uid, xml_record='', to_upload=True, to_merge=False):
    """Write XML record to file. Default behaviour is to read the record from
    a BibEdit cache file, filter out the unchanged volatile subfields,
    write it back to an XML file and then pass this file to BibUpload.

    @param xml_record: give XML as string in stead of reading cache file
    @param to_upload: pass the XML file to BibUpload
    @param to_merge: prepare an XML file for BibMerge to use

    """
    if not xml_record:
        # Read record from cache file.
        cache = get_cache_file_contents(recid, uid)
        if cache:
            record = cache[2]
            used_changes = cache[4]
            xml_record = record_xml_output(record)
            delete_cache_file(recid, uid)
            delete_disabled_changes(used_changes)
    else:
        record = create_record(xml_record)[0]

    # clean the record from unfilled volatile fields
    record_strip_empty_volatile_subfields(record)
    record_strip_empty_fields(record)

    # order subfields alphabetically before saving the record
    record_order_subfields(record)

    xml_to_write = wash_for_xml(record_xml_output(record))

    # Write XML file.
    if not to_merge:
        file_path = '%s.xml' % _get_file_path(recid, uid)
    else:
        file_path = '%s_%s.xml' % (_get_file_path(recid, uid),
                                   CFG_BIBEDIT_TO_MERGE_SUFFIX)
    xml_file = open(file_path, 'w')
    xml_file.write(xml_to_write)
    xml_file.close()

    user_name = get_user_info(uid)[1]
    if to_upload:
        # Pass XML file to BibUpload.
        task_low_level_submission('bibupload', 'bibedit', '-P', '5', '-r',
                                  file_path, '-u', user_name)
    return True
Exemple #39
0
 def _upload_record(obj, eng):
     from invenio.legacy.bibsched.bibtask import task_low_level_submission
     eng.log_info("Saving data to temporary file for upload")
     filename = obj.save_to_file()
     params = ["-%s" % (mode, ), filename]
     task_id = task_low_level_submission("bibupload", "bibworkflow",
                                         *tuple(params))
     eng.log_info("Submitted task #%s" % (task_id, ))
Exemple #40
0
 def _upload_record(obj, eng):
     from invenio.legacy.bibsched.bibtask import task_low_level_submission
     eng.log_info("Saving data to temporary file for upload")
     filename = obj.save_to_file()
     params = ["-%s" % (mode,), filename]
     task_id = task_low_level_submission("bibupload", "bibworkflow",
                                         *tuple(params))
     eng.log_info("Submitted task #%s" % (task_id,))
Exemple #41
0
def upload_to_site(marcxml, yes_i_know):
    """
    makes the appropriate calls to bibupload to get the MARCXML record onto
    the site.

    @param: marcxml (string): the absolute location of the MARCXML that was
        generated by this programme
    @param: yes_i_know (boolean): if true, no confirmation.  if false, prompt.

    @output: a new record on the invenio site

    @return: None
    """
    if not yes_i_know:
        wait_for_user(wrap_text_in_a_box('You are going to upload new ' +
                                         'plots to the server.'))
    task_low_level_submission('bibupload', 'admin', '-a', marcxml)
def update_marcxml_with_info(recid,
                             username,
                             current_date,
                             remote_id,
                             action='append'):
    '''
        This function add a field in the marc file to informat that the
        record has been submitted to a remote server
        @param recid: id of the record to update
    '''

    # concatenation of the string to append to the marc file
    node = '''<record>
    <controlfield tag="001">%(recid)s</controlfield>
    <datafield tag="%(tag)s" ind1=" " ind2=" ">
        <subfield code="a">%(submit_info)s</subfield>
    </datafield>
</record>''' % {
        'recid':
        recid,
        'tag':
        CFG_MARC_RECORD_SUBMIT_INFO,
        'submit_info':
        CFG_SUBMIT_ARXIV_INFO_MESSAGE % (username, current_date, remote_id)
    }

    # creation of the tmp file containing the xml node to append
    (tmpfd, filename) = mkstemp(suffix='.xml',
                                prefix='bibsword_append_submit_info_',
                                dir=CFG_TMPDIR)
    tmpfile = os.fdopen(tmpfd, 'w')
    tmpfile.write(node)
    tmpfile.close()

    # insert a task in bibschedul to add the node in the marc file
    if action == 'append':
        result = \
            task_low_level_submission('bibupload', 'BibSword', '-a', filename)
    elif action == 'delete':
        result = \
            task_low_level_submission('bibupload', 'BibSword', '-d', filename)

    return result
def update_marcxml_with_remote_id(recid, remote_id, action="append"):
    '''
        Write a new entry in the given marc file. This entry is the remote record
        id given by the server where the submission has been done
        @param remote_id: the string containing the id to add to the marc file
        return: boolean true if update done, false if problems
    '''

    field_tag = CFG_MARC_ADDITIONAL_REPORT_NUMBER
    tag_id = "%s%s%s" % (field_tag[0], field_tag[1], field_tag[2])
    tag_code = field_tag[5]

    # concatenation of the string to append to the marc file
    node = '''<record>
    <controlfield tag="001">%(recid)s</controlfield>
    <datafield tag="%(tagid)s" ind1=" " ind2=" ">
        <subfield code="%(tagcode)s">%(remote_id)s</subfield>
    </datafield>
</record>''' % {
        'recid': recid,
        'tagid': tag_id,
        'tagcode': tag_code,
        'remote_id': remote_id
    }

    # creation of the tmp file containing the xml node to append
    (tmpfd, filename) = mkstemp(suffix='.xml',
                                prefix='bibsword_append_remote_id_',
                                dir=CFG_TMPDIR)
    tmpfile = os.fdopen(tmpfd, 'w')
    tmpfile.write(node)
    tmpfile.close()

    # insert a task in bibsched to add the node in the marc file
    if action == 'append':
        result = \
            task_low_level_submission('bibupload', 'BibSword', '-a', filename)
    elif action == 'delete':
        result = \
            task_low_level_submission('bibupload', 'BibSword', '-d', filename)

    return result
Exemple #44
0
def update_marcxml_with_remote_id(recid, remote_id, action="append"):
    '''
        Write a new entry in the given marc file. This entry is the remote record
        id given by the server where the submission has been done
        @param remote_id: the string containing the id to add to the marc file
        return: boolean true if update done, false if problems
    '''

    field_tag = CFG_MARC_ADDITIONAL_REPORT_NUMBER
    tag_id = "%s%s%s" % (field_tag[0], field_tag[1], field_tag[2])
    tag_code = field_tag[5]

    # concatenation of the string to append to the marc file
    node = '''<record>
    <controlfield tag="001">%(recid)s</controlfield>
    <datafield tag="%(tagid)s" ind1=" " ind2=" ">
        <subfield code="%(tagcode)s">%(remote_id)s</subfield>
    </datafield>
</record>''' % {
                 'recid': recid,
                 'tagid': tag_id,
                 'tagcode': tag_code,
                 'remote_id': remote_id
             }

    # creation of the tmp file containing the xml node to append
    (tmpfd, filename) = mkstemp(suffix='.xml', prefix='bibsword_append_remote_id_',
                                dir=CFG_TMPDIR)
    tmpfile = os.fdopen(tmpfd, 'w')
    tmpfile.write(node)
    tmpfile.close()

    # insert a task in bibsched to add the node in the marc file
    if action == 'append':
        result = \
            task_low_level_submission('bibupload', 'BibSword', '-a', filename)
    elif action == 'delete':
        result = \
            task_low_level_submission('bibupload', 'BibSword', '-d', filename)

    return result
Exemple #45
0
def bst_run_bibtask(taskname, user, **args):
    """
    Initiate a bibsched task.

    @param taskname: name of the task to run
    @type taskname: string

    @param user: the user to run the task under.
    @type user: string
    """
    arglist = []
    # Transform dict to list: {'a': 0, 'b': 1} -> ['a', 0, 'b', 1]
    for name, value in args.items():
        if len(name) == 1:
            name = '-' + name
        else:
            name = '--' + name
        arglist.append(name)
        if value:
            arglist.append(value)
    task_low_level_submission(taskname, user, *tuple(arglist))
Exemple #46
0
def bst_run_bibtask(taskname, user, **args):
    """
    Initiate a bibsched task.

    @param taskname: name of the task to run
    @type taskname: string

    @param user: the user to run the task under.
    @type user: string
    """
    arglist = []
    # Transform dict to list: {'a': 0, 'b': 1} -> ['a', 0, 'b', 1]
    for name, value in args.items():
        if len(name) == 1:
            name = '-' + name
        else:
            name = '--' + name
        arglist.append(name)
        if value:
            arglist.append(value)
    task_low_level_submission(taskname, user, *tuple(arglist))
Exemple #47
0
def _upload_file_with_bibupload(file_path, upload_mode, num_records, req):
    """
    Uploads file with bibupload

       @param file_path: path to the file where the XML will be saved.
       @param upload_mode: -c for correct or -r for replace
       @return tuple formed by status of the upload:
           0-changes to be made instantly
           1-changes to be made only in limited hours
           2-user is superadmin. Changes made in limited hours
           3-no rights to upload
           and the upload file path
    """
    if num_records < CFG_BIBEDITMULTI_LIMIT_INSTANT_PROCESSING:
        task_low_level_submission('bibupload', 'multiedit', '-P', '5', upload_mode, '%s' % file_path)
        return (0, file_path)
    elif num_records < CFG_BIBEDITMULTI_LIMIT_DELAYED_PROCESSING:
        task_low_level_submission('bibupload', 'multiedit', '-P', '5', upload_mode, '-L', CFG_BIBEDITMULTI_LIMIT_DELAYED_PROCESSING_TIME,'%s' % file_path)
        return (1, file_path)
    else:
        user_info = collect_user_info(req)
        if isUserSuperAdmin(user_info):
            task_low_level_submission('bibupload', 'multiedit', '-P', '5', upload_mode, '-L', CFG_BIBEDITMULTI_LIMIT_DELAYED_PROCESSING_TIME, '%s' % file_path)
            return (2, file_path)
        return (3, file_path)
Exemple #48
0
def metadata_upload(req, metafile=None, filetype=None, mode=None, exec_date=None,
                    exec_time=None, metafilename=None, ln=CFG_SITE_LANG,
                    priority="1", email_logs_to=None):
    """
    Metadata web upload service. Get upload parameters and exec bibupload for the given file.
    Finally, write upload history.
    @return: tuple (error code, message)
        error code: code that indicates if an error ocurred
        message: message describing the error
    """
    # start output:
    req.content_type = "text/html"
    req.send_http_header()

    error_codes = {'not_authorized': 1}

    user_info = collect_user_info(req)
    (fd, filename) = tempfile.mkstemp(prefix="batchupload_" + \
        user_info['nickname'] + "_" + time.strftime("%Y%m%d%H%M%S",
        time.localtime()) + "_", dir=CFG_TMPSHAREDDIR)
    filedesc = os.fdopen(fd, 'w')
    filedesc.write(metafile)
    filedesc.close()

    # check if this client can run this file:
    if req is not None:
        allow = _check_client_can_submit_file(req=req, metafile=metafile, webupload=1, ln=ln)
        if allow[0] != 0:
            return (error_codes['not_authorized'], allow[1])

    # run upload command:
    task_arguments = ('bibupload', user_info['nickname'], mode,
                      "--priority=" + priority, "-N", "batchupload")
    if exec_date:
        date = exec_date
        if exec_time:
            date += ' ' + exec_time
        task_arguments += ("-t", date)
    if email_logs_to:
        task_arguments += ('--email-logs-to', email_logs_to)
    task_arguments += (filename, )
    jobid = task_low_level_submission(*task_arguments)

    # write batch upload history
    run_sql("""INSERT INTO hstBATCHUPLOAD (user, submitdate,
            filename, execdate, id_schTASK, batch_mode)
            VALUES (%s, NOW(), %s, %s, %s, "metadata")""",
            (user_info['nickname'], metafilename,
            exec_date != "" and (exec_date + ' ' + exec_time)
            or time.strftime("%Y-%m-%d %H:%M:%S"), str(jobid), ))
    return (0, "Task %s queued" % str(jobid))
Exemple #49
0
def bibupload(record=None, collection=None, file_prefix="", mode="-c"):
    """
    General purpose function that will write a MARCXML file and call bibupload
    on it.
    """
    if collection is None and record is None:
        return

    (file_out, filename) = open_temp_file(file_prefix)

    if collection is not None:
        file_out.write("<collection>")
        tot = 0
        for rec in collection:
            file_out.write(record_xml_output(rec))
            tot += 1
            if tot == MAX_RECORDS:
                file_out.write("</collection>")
                file_out.close()
                logger.debug(
                    "Submitting bibupload %s -n %s" % (mode, filename))
                task_low_level_submission(
                    'bibupload', 'openaire', mode, filename, '-n')

                (file_out, filename) = open_temp_file(file_prefix)
                file_out.write("<collection>")
                tot = 0
        file_out.write("</collection>")
    elif record is not None:
        tot = 1
        file_out.write(record_xml_output(record))

    file_out.close()
    if tot > 0:
        logger.debug("Submitting bibupload %s -n %s" % (mode, filename))
        task_low_level_submission(
            'bibupload', 'openaire', mode, filename, '-n')
Exemple #50
0
def submit_bibindex_task(to_update, sequence_id):
    """submit a bibindex task for a set of records.

    :param to_update: list of recids to be updated by bibindex
    :type: list
    :param sequence_id: sequence id to be included in the task_id
    :type: str

    :return: id of bibindex task
    :rtype: int
    """
    recids = [str(r) for r in to_update]
    return task_low_level_submission('bibindex', PREFIX, '-I',
                                     sequence_id, '-P', '2', '-w', 'global',
                                     '-i', ','.join(recids))
Exemple #51
0
def submit_bibindex_task(to_update, sequence_id):
    """ submits a bibindex task for a set of records

    @param to_update: list of recids to be updated by bibindex
    @type: list
    @param sequence_id: sequence id to be included in the task_id
    @type: str

    @return: id of bibindex task
    @rtype: int
    """
    recids = [str(r) for r in to_update]
    return task_low_level_submission('bibindex', PREFIX, '-I', sequence_id,
                                     '-P', '2', '-w', 'global', '-i',
                                     ','.join(recids))
Exemple #52
0
def call_bibupload(marcxmlfile, mode=None, oai_src_id=-1, sequence_id=None):
    """
    Creates a bibupload task for the task scheduler in given mode
    on given file. Returns the generated task id and logs the event
    in oaiHARVESTLOGS, also adding any given oai source identifier.


    :param marcxmlfile: base-marcxmlfilename to upload
    :param mode: mode to upload in
    :param oai_src_id: id of current source config
    :param sequence_id: sequence-number, if relevant

    :return: task_id if successful, otherwise None.
    """
    if mode is None:
        mode = ["-r", "-i"]
    if os.path.exists(marcxmlfile):
        try:
            args = mode
            # Add job with priority 6 (above normal bibedit tasks)
            # and file to upload to arguments
            args.extend(["-P", "6", marcxmlfile])
            if sequence_id:
                args.extend(['-I', str(sequence_id)])
            task_id = task_low_level_submission("bibupload", "oaiharvest",
                                                *tuple(args))
            create_oaiharvest_log(task_id, oai_src_id, marcxmlfile)
        except Exception as msg:
            write_message(
                "An exception during submitting oaiharvest task occured : %s "
                % (str(msg)))
            return None
        return task_id
    else:
        write_message("marcxmlfile %s does not exist" % (marcxmlfile, ))
        return None
Exemple #53
0
def Notify_URL(parameters, curdir, form, user_info=None):
    """
    Access a given URL, and possibly post some content.

    Could be used to notify that a record has been fully integrated.
    (the URL is only accessed once the BibTask created by this
    function runs in BibSched, not the when the function is run. The
    BibTask uses a task sequence ID to respect ordering of tasks)

    if URL is empty, skip the notification.

    @param parameters: (dictionary) - contains the following parameter
         strings used by this function:

         + url: (string) - the URL to be contacted by this function
                           (must start with http/https)
                           If value starts with "FILE:", will look for
                           the URL in a file on curdir with the given name.
                           for eg: "FILE:my_url"
                           (value retrieved when function is run)

         + data: (string) - (optional) the data to be posted at the
                            given URL.  if no value is given, the URL
                            will be accessed via GET.
                            If value starts with "FILE:", will look for
                            the data in a file on curdir with the given name.
                            for eg: "FILE:my_data"
                            (value retrieved when function is run)

         + content_type: (string) - (optional) the content-type to use
                                    to post data. Default is 'text/plain'.
                                    Ignored if not data is posted.

         + attempt_times: (int) - (optional) up to how many time shall
                                  we try to contact the URL in case we
                                  fail at contacting it?

         + attempt_sleeptime: (int) - (optional) how many seconds to
                                       sleep between each attempt?

         + admin_emails: (string) - (optional) list of emails (comma-separated
                                    values) to contact in case the URL
                                    cannot be accessed after all attempts.
                                    If value starts with "FILE:", will look for
                                    the emails in a file on curdir with the given name.
                                    for eg: "FILE:my_email"
                                    (value retrieved when function is run)

         + user: (string) - the user to be used to launch the task
                            (visible in BibSched).  If value starts
                            with"FILE:", will look for the emails in a file on
                            curdir with the given name.
                            for eg:"FILE:my_user"
                            (value retrieved when function is run)

    """

    other_bibtasklet_arguments = []
    sequence_id = bibtask_allocate_sequenceid(curdir)

    url               = parameters["url"]
    data              = parameters["data"]
    admin_emails      = parameters["admin_emails"]
    content_type      = parameters["content_type"]
    attempt_times     = parameters["attempt_times"]
    attempt_sleeptime = parameters["attempt_sleeptime"]
    user              = parameters["user"]

    # Maybe some params must be read from disk
    if url.startswith('FILE:'):
        url = ParamFromFile(os.path.join(curdir, url[5:]))
    if not url:
        return ""
    if data.startswith('FILE:'):
        data = ParamFromFile(os.path.join(curdir, data[5:]))
    if admin_emails.startswith('FILE:'):
        admin_emails = ParamFromFile(os.path.join(curdir, admin_emails[5:]))
    if user.startswith('FILE:'):
        user = ParamFromFile(os.path.join(curdir, user[5:]))

    if data:
        other_bibtasklet_arguments.extend(("-a", "data=%s" % data))
        other_bibtasklet_arguments.extend(("-a", "content_type=%s" % content_type))

    return task_low_level_submission(
        "bibtasklet", user, "-T", "bst_notify_url",
        "-I", str(sequence_id),
        "-a", "url=%s" % url,
        "-a", "attempt_times=%s" % attempt_times,
        "-a", "attempt_sleeptime=%s" % attempt_sleeptime,
        "-a", "admin_emails=%s" % admin_emails,
        *other_bibtasklet_arguments)
Exemple #54
0
def task_run_core():
    """ Walks through all directories where metadata files are located
        and uploads them.
        Files are then moved to the corresponding DONE folders.
    """
    daemon_dir = CFG_BATCHUPLOADER_DAEMON_DIR[0] == '/' and CFG_BATCHUPLOADER_DAEMON_DIR \
                 or CFG_PREFIX + '/' + CFG_BATCHUPLOADER_DAEMON_DIR
    # Check if directory /batchupload exists
    if not task_get_option('documents'):
        # Metadata upload
        parent_dir = daemon_dir + "/metadata/"
        progress = 0
        try:
            os.makedirs(parent_dir)
        except OSError:
            pass
        list_of_folders = [
            "insert", "append", "correct", "replace", "holdingpen"
        ]
        for folder in list_of_folders:
            files_dir = os.path.join(parent_dir, folder)
            files_done_dir = os.path.join(files_dir, "DONE")
            try:
                files = os.listdir(files_dir)
            except OSError as e:
                os.mkdir(files_dir)
                files = []
                write_message(e, sys.stderr)
                write_message("Created new folder %s" % (files_dir, ))
            # Create directory DONE/ if doesn't exist
            try:
                os.mkdir(files_done_dir)
            except OSError:
                # Directory exists
                pass
            for metafile in files:
                if os.path.isfile(os.path.join(files_dir, metafile)):
                    # Create temporary file to be uploaded
                    (fd, filename) = tempfile.mkstemp(
                        prefix=metafile + "_" +
                        time.strftime("%Y%m%d%H%M%S", time.localtime()) + "_",
                        dir=CFG_TMPSHAREDDIR)
                    shutil.copy(os.path.join(files_dir, metafile), filename)
                    # Send bibsched task
                    mode = "--" + folder
                    jobid = str(
                        task_low_level_submission('bibupload', 'batchupload',
                                                  mode, filename))
                    # Move file to done folder
                    filename = metafile + "_" + time.strftime(
                        "%Y%m%d%H%M%S", time.localtime()) + "_" + jobid
                    os.rename(os.path.join(files_dir, metafile),
                              os.path.join(files_done_dir, filename))
                    task_sleep_now_if_required(can_stop_too=True)
            progress += 1
            task_update_progress("Done %d out of %d." %
                                 (progress, len(list_of_folders)))
    else:
        # Documents upload
        parent_dir = daemon_dir + "/documents/"
        try:
            os.makedirs(parent_dir)
        except OSError:
            pass
        matching_order = CFG_BATCHUPLOADER_FILENAME_MATCHING_POLICY
        for folder in ["append/", "revise/"]:
            try:
                os.mkdir(parent_dir + folder)
            except:
                pass
            for matching in matching_order:
                errors = document_upload(folder=parent_dir + folder,
                                         matching=matching,
                                         mode=folder[:-1])[0]
                if not errors:
                    break  # All documents succedeed with that matching
                for error in errors:
                    write_message(
                        "File: %s - %s with matching %s" %
                        (error[0], error[1], matching), sys.stderr)
            task_sleep_now_if_required(can_stop_too=True)
    return 1
Exemple #55
0
def _dbdump_run_task_core():
    """
    Run DB dumper core stuff.

    Note: do not use task_can_sleep() stuff here because we don't want
    other tasks to interrupt us while we are dumping the DB content.
    """
    # read params:
    host = CFG_DATABASE_HOST
    port = CFG_DATABASE_PORT
    connection = None
    active_queues = []
    try:
        if task_get_option('slave') and not task_get_option('dump_on_slave_helper_mode'):
            connection = get_connection_for_dump_on_slave()
            write_message("Dump on slave requested")
            write_message("... checking if slave is well up...")
            check_slave_is_up(connection)
            write_message("... checking if slave is in consistent state...")
            check_slave_is_in_consistent_state(connection)
            write_message("... detaching slave database...")
            detach_slave(connection)
            write_message("... scheduling dump on slave helper...")
            helper_arguments = []
            if task_get_option("number"):
                helper_arguments += ["--number", str(task_get_option("number"))]
            if task_get_option("output"):
                helper_arguments += ["--output", str(task_get_option("output"))]
            if task_get_option("params"):
                helper_arguments += ["--params", str(task_get_option("params"))]
            if task_get_option("ignore_tables"):
                helper_arguments += ["--ignore-tables", str(task_get_option("ignore_tables"))]
            if task_get_option("compress"):
                helper_arguments += ["--compress"]
            if task_get_option("slave"):
                helper_arguments += ["--slave", str(task_get_option("slave"))]
            helper_arguments += ['-N', 'slavehelper', '--dump-on-slave-helper']
            task_id = task_low_level_submission('dbdump', task_get_task_param('user'), '-P4', *helper_arguments)
            write_message("Slave scheduled with ID %s" % task_id)
            task_update_progress("DONE")
            return True
        elif task_get_option('dump_on_slave_helper_mode'):
            write_message("Dumping on slave mode")
            connection = get_connection_for_dump_on_slave()
            write_message("... checking if slave is well down...")
            check_slave_is_down(connection)
            host = CFG_DATABASE_SLAVE

        task_update_progress("Reading parameters")
        write_message("Reading parameters started")
        output_dir = task_get_option('output', CFG_LOGDIR)
        output_num = task_get_option('number', 5)
        params = task_get_option('params', None)
        compress = task_get_option('compress', False)
        slave = task_get_option('slave', False)
        ignore_tables = task_get_option('ignore_tables', None)
        if ignore_tables:
            ignore_tables = get_table_names(ignore_tables)
        else:
            ignore_tables = None

        output_file_suffix = task_get_task_param('task_starting_time')
        output_file_suffix = output_file_suffix.replace(' ', '_') + '.sql'
        if compress:
            output_file_suffix = "%s.gz" % (output_file_suffix,)
        write_message("Reading parameters ended")

        if task_get_option('disable_workers'):
            active_queues = get_queues()
            if active_queues:
                write_message("Suspend workers and wait for any running tasks to complete")
                suspend_queues(active_queues)
                write_message("Workers suspended")

        # make dump:
        task_update_progress("Dumping database")
        write_message("Database dump started")

        if slave:
            output_file_prefix = 'slave-%s-dbdump-' % (CFG_DATABASE_NAME,)
        else:
            output_file_prefix = '%s-dbdump-' % (CFG_DATABASE_NAME,)
        output_file = output_file_prefix + output_file_suffix
        dump_path = output_dir + os.sep + output_file
        dump_database(dump_path, \
                        host=host,
                        port=port,
                        params=params, \
                        compress=compress, \
                        ignore_tables=ignore_tables)
        write_message("Database dump ended")
    finally:
        for queue in active_queues:
            enable_queue(queue)
        if connection and task_get_option('dump_on_slave_helper_mode'):
            write_message("Reattaching slave")
            attach_slave(connection)
    # prune old dump files:
    task_update_progress("Pruning old dump files")
    write_message("Pruning old dump files started")
    _delete_old_dumps(output_dir, output_file_prefix, output_num)
    write_message("Pruning old dump files ended")
    # we are done:
    task_update_progress("Done.")
    return True
Exemple #56
0
def scheduled_send_email(
    fromaddr,
    toaddr,
    subject="",
    content="",
    header=None,
    footer=None,
    copy_to_admin=0,
    attempt_times=1,
    attempt_sleeptime=10,
    user=None,
    other_bibtasklet_arguments=None,
    replytoaddr="",
    bccaddr="",
):
    """
    Like send_email, but send an email via the bibsched
    infrastructure.
    @param fromaddr: sender
    @type fromaddr: string
    @param toaddr: list of receivers
    @type toaddr: string (comma separated) or list of strings
    @param subject: the subject
    @param content: the body of the message
    @param header: optional header, otherwise default is used
    @param footer: optional footer, otherwise default is used
    @param copy_to_admin: set to 1 in order to send email the admins
    @param attempt_times: try at least n times before giving up sending
    @param attempt_sleeptime: number of seconds to sleep between two attempts
    @param user: the user name to user when scheduling the bibtasklet. If
        None, the sender will be used
    @param other_bibtasklet_arguments: other arguments to append to the list
        of arguments to the call of task_low_level_submission
    @param replytoaddr: [string or list-of-strings] to be used for the
                        reply-to header of the email (if string, then
                        receivers are separated by ',')
    @param bccaddr: [string or list-of-strings] to be used for BCC header
                     of the email
                    (if string, then receivers are separated by ',')
    @return: the scheduled bibtasklet
    """
    from invenio.legacy.bibsched.bibtask import task_low_level_submission
    if not isinstance(toaddr, (unicode, str)):
        toaddr = ','.join(toaddr)
    if not isinstance(replytoaddr, (unicode, str)):
        replytoaddr = ','.join(replytoaddr)

    toaddr = remove_temporary_emails(toaddr)

    if user is None:
        user = fromaddr
    if other_bibtasklet_arguments is None:
        other_bibtasklet_arguments = []
    else:
        other_bibtasklet_arguments = list(other_bibtasklet_arguments)
    if not header is None:
        other_bibtasklet_arguments.extend(("-a", "header=%s" % header))
    if not footer is None:
        other_bibtasklet_arguments.extend(("-a", "footer=%s" % footer))
    return task_low_level_submission(
        "bibtasklet", user, "-T", "bst_send_email", "-a",
        "fromaddr=%s" % fromaddr, "-a", "toaddr=%s" % toaddr, "-a",
        "replytoaddr=%s" % replytoaddr, "-a", "subject=%s" % subject, "-a",
        "content=%s" % content, "-a", "copy_to_admin=%s" % copy_to_admin, "-a",
        "attempt_times=%s" % attempt_times, "-a",
        "attempt_sleeptime=%s" % attempt_sleeptime, "-a",
        "bccaddr=%s" % bccaddr, *other_bibtasklet_arguments)
Exemple #57
0
def schedule_extraction(recid, taxonomy):
    bibtask.task_low_level_submission('bibclassify', 'extract:%s' % recid,
                                      '-k', taxonomy, '-i', '%s' % recid)
Exemple #58
0
    def post(self, deposit_id, **kwargs):
        """
        Creates a new deposition

        Test this with:
        $ curl -v -X POST -H "Content-Type: application/json"
          -d '{"domain":"generic", "title":"REST Test Title", "description":"REST Test Description"}'
          http://0.0.0.0:4000/api/deposition/DEPOSITION_ID/commit\?access_token\=xxx
        """
        CFG_B2SHARE_UPLOAD_FOLDER = current_app.config.get(
                                "CFG_B2SHARE_UPLOAD_FOLDER")

        deposition_status = os.path.join(CFG_B2SHARE_UPLOAD_FOLDER,
                                        deposit_id, 'uncommitted')
        if not os.path.exists(deposition_status):
            return {'message': 'Bad deposit_id parameter or already closed deposition.',
                    'status': 404}, 404

        if not get_depositing_files_metadata(deposit_id):
            return {'message':'No files: add files to this deposition first', 'status':400}, 400

        try:
            form = request.get_json()
        except:
            return {'message':'Invalid POST data', 'status':400}, 400

        os.remove(deposition_status)
        domain = form.get('domain', '').lower()
        if domain in metadata_classes():
            metaclass = metadata_classes()[domain]
            meta = metaclass()
        else:
            domains = ", ".join(metadata_classes().keys())
            json_data = {
                'message': 'Invalid domain. The submitted metadata must '+\
                            'contain a valid "domain" field. Valid domains '+\
                            'are: '+ domains,
                'status': 400,
            }
            return json_data, 400

        if not is_current_user_allowed_to_deposit(meta):
            return {'message':'depositions to this domain are restricted', 'status':401}, 401

        if 'open_access' not in form:
            return {'message':'open_access boolean field required', 'status':400}, 400
        if not form['open_access'] or form['open_access'] == 'restricted':
            del form['open_access'] # action required by the b2share_marc_handler

        if not form.get('language'):
            form['language'] = meta.language_default

        form = ImmutableMultiDict(form)

        MetaForm = model_form(meta.__class__, base_class=FormWithKey,
                              exclude=['submission', 'submission_type'],
                              field_args=meta.field_args,
                              converter=HTML5ModelConverter())

        meta_form = MetaForm(form, meta, csrf_enabled=False)

        if meta_form.validate_on_submit():
            recid, marc = create_marc(form, deposit_id, current_user['email'], meta)
            tmp_file = write_marc_to_temp_file(marc)
            # all usual tasks have priority 0; we want the bibuploads to run first
            from invenio.legacy.bibsched.bibtask import task_low_level_submission
            task_low_level_submission('bibupload', 'webdeposit', '--priority', '1', '-r', tmp_file)

            #TODO: remove the existing deposition folder?; the user can now
            #      repeatedly create records with the same deposition

            location = "/api/record/%d" % (recid,)
            json_data = {
                'message': "New record submitted for processing",
                'location': "/api/record/%d" % (recid,),
                'record_id': recid,
            }
            return json_data, 201, {'Location':location} # return location header
        else:
            fields = {}
            for (fname, field) in meta.field_args.iteritems():
                if not field.get('hidden'):
                    fields[fname] = { 'description' : field.get('description') }
                    if self.is_required_field(metaclass, fname):
                        fields[fname]['required'] = True
                    if field.get('cardinality') == 'n':
                        fields[fname]['multiple'] = True
                    if field.get('data_source'):
                        fields[fname]['options'] = field.get('data_source')

            json_data = {
                'message': 'Invalid metadata, please review the required fields',
                'status': 400,
                'fields': fields,
            }
            return json_data, 400