def upload_keywords(filename, mode='correct', recids=None): """Store the extracted keywords in the database. :param filename: fullpath to the file with marc record. :keyword mode: correct|replace|add|delete use correct to add fields if they are different replace all fields with fields from the file add - add (even duplicate) fields delete - delete fields which are inside the file. :keyword recids: list of record ids, this arg comes from the bibclassify daemon and it is used when the recids contains one entry (recid) - ie. one individual document was processed. We use it to mark the job title so that it is possible to query database if the bibclassify was run over that document (in case of collections with many recids, we simply construct a general title). """ if mode == 'correct': m = '-c' elif mode == 'replace': m = '-r' elif mode == 'add': m = '-a' elif mode == 'delete': m = '-d' else: raise Exception('Unknown mode') # let's use the user column to store the information, cause no better alternative in sight... user_title = 'bibclassify.upload' if recids and len(recids) == 1: user_title = 'extract:%d' % recids[0] bibtask.task_low_level_submission('bibupload', user_title, '-n', m, filename)
def upload_keywords(filename, mode="correct", recids=None): """Store the extracted keywords in the database. :param filename: fullpath to the file with marc record. :keyword mode: correct|replace|add|delete use correct to add fields if they are different replace all fields with fields from the file add - add (even duplicate) fields delete - delete fields which are inside the file. :keyword recids: list of record ids, this arg comes from the bibclassify daemon and it is used when the recids contains one entry (recid) - ie. one individual document was processed. We use it to mark the job title so that it is possible to query database if the bibclassify was run over that document (in case of collections with many recids, we simply construct a general title). """ if mode == "correct": m = "-c" elif mode == "replace": m = "-r" elif mode == "add": m = "-a" elif mode == "delete": m = "-d" else: raise Exception("Unknown mode") # let's use the user column to store the information, cause no better alternative in sight... user_title = "bibclassify.upload" if recids and len(recids) == 1: user_title = "extract:%d" % recids[0] bibtask.task_low_level_submission("bibupload", user_title, "-n", m, filename)
def _run_tasks(obj, dummy_eng): from invenio.legacy.bibsched.bibtask import task_low_level_submission d = Deposition(obj) sip = d.get_latest_sip(sealed=True) recid = sip.metadata['recid'] communities = sip.metadata.get('provisional_communities', []) common_args = [] sequenceid = getattr(d.workflow_object, 'task_sequence_id', None) if sequenceid: common_args += ['-I', str(sequenceid)] if update: tasklet_name = 'bst_openaire_update_upload' else: tasklet_name = 'bst_openaire_new_upload' task_id = task_low_level_submission( 'bibtasklet', 'webdeposit', '-T', tasklet_name, '--argument', 'recid=%s' % recid, *common_args ) sip.task_ids.append(task_id) for c in communities: task_id = task_low_level_submission( 'webcoll', 'webdeposit', '-c', 'provisional-user-%s' % c, *common_args ) sip.task_ids.append(task_id) d.update()
def bibupload_record(record=None, collection=None, file_prefix="bibuploadutils", mode="-c", alias='bibuploadutils', opts=[]): """Write a MARCXML file and bibupload it.""" if collection is None and record is None: return (file_out, filename) = open_temp_file(file_prefix) if collection is not None: file_out.write("<collection>") tot = 0 for rec in collection: file_out.write(create_marcxml(record)) tot += 1 if tot == CFG_MAX_RECORDS: file_out.write("</collection>") close_temp_file(file_out, filename) task_low_level_submission( 'bibupload', alias, mode, filename, *opts ) (file_out, filename) = open_temp_file(file_prefix) file_out.write("<collection>") tot = 0 file_out.write("</collection>") elif record is not None: tot = 1 file_out.write(create_marcxml(record)) close_temp_file(file_out, filename) if tot > 0: task_low_level_submission('bibupload', alias, mode, filename, *opts)
def bibupload(record=None, collection=None, file_prefix="", mode="-c"): """ General purpose function that will write a MARCXML file and call bibupload on it. """ if collection is None and record is None: return (file_out, filename) = open_temp_file(file_prefix) if collection is not None: file_out.write("<collection>") tot = 0 for rec in collection: file_out.write(record_xml_output(rec)) tot += 1 if tot == MAX_RECORDS: file_out.write("</collection>") file_out.close() logger.debug("Submitting bibupload %s -n %s" % (mode, filename)) task_low_level_submission('bibupload', 'openaire', mode, filename, '-n') (file_out, filename) = open_temp_file(file_prefix) file_out.write("<collection>") tot = 0 file_out.write("</collection>") elif record is not None: tot = 1 file_out.write(record_xml_output(record)) file_out.close() if tot > 0: logger.debug("Submitting bibupload %s -n %s" % (mode, filename)) task_low_level_submission('bibupload', 'openaire', mode, filename, '-n')
def bst_twitter_fetcher(query): """ Fetch the tweets related to the user and upload them into Invenio. @param user: the user """ ## We prepare a temporary MARCXML file to upload. fd, name = tempfile.mkstemp(suffix='.xml', prefix='tweets', dir=CFG_TMPDIR) tweets = get_tweets(query) if tweets: os.write(fd, """<collection>\n""") for i, tweet in enumerate(tweets): ## For every tweet we transform it to MARCXML and we dump it in the file. task_update_progress('DONE: tweet %s out %s' % (i, len(tweets))) os.write(fd, tweet_to_record(tweet, query)) os.write(fd, """</collection\n>""") os.close(fd) ## Invenio magic: we schedule an upload of the created MARCXML to be inserted ## ASAP in the system. task_low_level_submission('bibupload', 'admin', '-i', '-r', name, '-P5') write_message("Uploaded file %s with %s new tweets about %s" % (name, len(tweets), query)) else: write_message("No new tweets about %s" % query)
def update_references(recid, overwrite=True): """Update references for a record First, we extract references from a record. Then, we are not updating the record directly but adding a bibupload task in -c mode which takes care of updating the record. Parameters: * recid: the id of the record """ if not overwrite: # Check for references in record record = get_record(recid) if record and record_has_field(record, '999'): raise RecordHasReferences('Record has references and overwrite ' 'mode is disabled: %s' % recid) if get_fieldvalues(recid, '999C59'): raise RecordHasReferences('Record has been curated: %s' % recid) # Parse references references_xml = extract_references_from_record_xml(recid) # Save new record to file (temp_fd, temp_path) = mkstemp(prefix=CFG_REFEXTRACT_FILENAME, dir=CFG_TMPSHAREDDIR) temp_file = os.fdopen(temp_fd, 'w') temp_file.write(references_xml) temp_file.close() # Update record task_low_level_submission('bibupload', 'refextract', '-P', '4', '-c', temp_path)
def bibupload_record(record=None, collection=None, file_prefix="bibuploadutils", mode="-c", alias='bibuploadutils', opts=[]): """Write a MARCXML file and bibupload it.""" if collection is None and record is None: return (file_out, filename) = open_temp_file(file_prefix) if collection is not None: file_out.write("<collection>") tot = 0 for rec in collection: file_out.write(create_marcxml(record)) tot += 1 if tot == CFG_MAX_RECORDS: file_out.write("</collection>") close_temp_file(file_out, filename) task_low_level_submission('bibupload', alias, mode, filename, *opts) (file_out, filename) = open_temp_file(file_prefix) file_out.write("<collection>") tot = 0 file_out.write("</collection>") elif record is not None: tot = 1 file_out.write(create_marcxml(record)) close_temp_file(file_out, filename) if tot > 0: task_low_level_submission('bibupload', alias, mode, filename, *opts)
def _run_tasks(obj, dummy_eng): d = Deposition(obj) sip = d.get_latest_sip(sealed=True) recid = sip.metadata['recid'] communities = sip.metadata.get('provisional_communities', []) common_args = ['-P5', ] sequenceid = getattr(d.workflow_object, 'task_sequence_id', None) if sequenceid: common_args += ['-I', str(sequenceid)] if update: tasklet_name = 'bst_openaire_update_upload' else: tasklet_name = 'bst_openaire_new_upload' task_id = task_low_level_submission( 'bibtasklet', 'webdeposit', '-T', tasklet_name, '--argument', 'recid=%s' % recid, *common_args ) sip.task_ids.append(task_id) for c in communities: task_id = task_low_level_submission( 'webcoll', 'webdeposit', '-c', 'provisional-user-%s' % c, *common_args ) sip.task_ids.append(task_id) d.update()
def save_xml_record(recid, uid, xml_record='', to_upload=True, to_merge=False, task_name="bibedit", sequence_id=None): """Write XML record to file. Default behaviour is to read the record from a BibEdit cache file, filter out the unchanged volatile subfields, write it back to an XML file and then pass this file to BibUpload. @param xml_record: give XML as string in stead of reading cache file @param to_upload: pass the XML file to BibUpload @param to_merge: prepare an XML file for BibMerge to use """ if not xml_record: # Read record from cache file. cache = get_cache_contents(recid, uid) if cache: record = cache[2] used_changes = cache[4] xml_record = record_xml_output(record) delete_cache(recid, uid) delete_disabled_changes(used_changes) else: record = create_record(xml_record)[0] # clean the record from unfilled volatile fields record_strip_empty_volatile_subfields(record) record_strip_empty_fields(record) # order subfields alphabetically before saving the record record_order_subfields(record) xml_to_write = wash_for_xml(record_xml_output(record)) # Write XML file. if not to_merge: fd, file_path = tempfile.mkstemp(dir=cfg['CFG_BIBEDIT_CACHEDIR'], prefix="%s_" % cfg['CFG_BIBEDIT_FILENAME'], suffix="_%s_%s.xml" % (recid, uid)) f = os.fdopen(fd, 'w') f.write(xml_to_write) f.close() else: file_path = '%s_%s.xml' % (_get_file_path(recid, uid), cfg['CFG_BIBEDIT_TO_MERGE_SUFFIX']) xml_file = open(file_path, 'w') xml_file.write(xml_to_write) xml_file.close() user_name = get_user_info(uid)[1] if to_upload: args = ['bibupload', user_name, '-P', '5', '-r', file_path, '-u', user_name] if task_name == "bibedit": args.extend(['--name', 'bibedit']) if sequence_id: args.extend(["-I", sequence_id]) args.extend(['--email-logs-on-error']) task_low_level_submission(*args) return True
def openaire_create_icon(docid=None, recid=None, reformat=True): """ Celery task to create an icon for all documents in a given record or for just a specific document. """ if recid: docs = BibRecDocs(recid).list_bibdocs() else: docs = [BibDoc(docid)] # Celery task will fail if BibDoc does not exists (on purpose ;-) for d in docs: logger.debug("Checking document %s" % d) if not d.get_icon(subformat_re=re.compile(ICON_SUBFORMAT)): logger.debug("Document has no icon") for f in d.list_latest_files(): logger.debug("Checking file %s" % f) if not f.is_icon(): logger.debug("File not an icon") file_path = f.get_full_path() icon_path = None try: filename = os.path.splitext(os.path.basename(file_path))[0] logger.info("Creating icon from file %s" % file_path) (icon_dir, icon_name) = create_icon( { "input-file": file_path, "icon-name": "icon-%s" % filename, "multipage-icon": False, "multipage-icon-delay": 0, "icon-scale": ICON_SIZE, "icon-file-format": ICON_FILEFORMAT, "verbosity": 0, } ) icon_path = os.path.join(icon_dir, icon_name) except InvenioWebSubmitIconCreatorError as e: logger.warning("Icon for file %s could not be created: %s" % (file_path, str(e))) register_exception( prefix="Icon for file %s could not be created: %s" % (file_path, str(e)), alert_admin=False ) try: if icon_path and os.path.exists(icon_path): logger.debug("Adding icon %s to document" % icon_path) d.add_icon(icon_path, subformat=ICON_SUBFORMAT) recid_list = ",".join([str(x["recid"]) for x in d.bibrec_links]) if reformat: task_low_level_submission("bibreformat", "openaire", "-i", recid_list) except InvenioBibDocFileError as e: logger.warning( "Icon %s for file %s could not be added to " "document: %s" % (icon_path, f, str(e)) ) register_exception( prefix="Icon %s for file %s could not be added" " to document: %s" % (icon_path, f, str(e)), alert_admin=False, )
def save_xml_record(recid, uid, xml_record='', to_upload=True, to_merge=False, task_name="bibedit", sequence_id=None): """Write XML record to file. Default behaviour is to read the record from a BibEdit cache file, filter out the unchanged volatile subfields, write it back to an XML file and then pass this file to BibUpload. @param xml_record: give XML as string in stead of reading cache file @param to_upload: pass the XML file to BibUpload @param to_merge: prepare an XML file for BibMerge to use """ if not xml_record: # Read record from cache file. cache = get_cache_contents(recid, uid) if cache: record = cache[2] used_changes = cache[4] xml_record = record_xml_output(record) delete_cache(recid, uid) delete_disabled_changes(used_changes) else: record = create_record(xml_record)[0] # clean the record from unfilled volatile fields record_strip_empty_volatile_subfields(record) record_strip_empty_fields(record) # order subfields alphabetically before saving the record record_order_subfields(record) xml_to_write = wash_for_xml(record_xml_output(record)) # Write XML file. if not to_merge: fd, file_path = tempfile.mkstemp(dir=cfg['CFG_BIBEDIT_CACHEDIR'], prefix="%s_" % cfg['CFG_BIBEDIT_FILENAME'], suffix="_%s_%s.xml" % (recid, uid)) f = os.fdopen(fd, 'w') f.write(xml_to_write) f.close() else: file_path = '%s_%s.xml' % (_get_file_path(recid, uid), cfg['CFG_BIBEDIT_TO_MERGE_SUFFIX']) xml_file = open(file_path, 'w') xml_file.write(xml_to_write) xml_file.close() user_name = get_user_info(uid)[1] if to_upload: args = ['bibupload', user_name, '-P', '5', '-r', file_path, '-u', user_name] if task_name == "bibedit": args.extend(['--name', 'bibedit']) if sequence_id: args.extend(["-I", sequence_id]) args.extend(['--email-logs-on-error']) task_low_level_submission(*args) return True
def submit_refextract_task(recids): """Submit a refextract task if needed""" # First filter out recids we cannot safely extract references from # (mostly because they have been curated) recids = [recid for recid in recids if check_record_for_refextract(recid)] if recids: recids_str = ','.join(str(recid) for recid in recids) task_low_level_submission('refextract', NAME, '-i', recids_str)
def addmeta(request, sub_id): """ Checks the submitted metadata form for validity. Returns a new page with success message if valid, otherwise it returns a form with the errors marked. """ if sub_id is None: #just return to deposit return redirect(url_for('.deposit')) CFG_B2SHARE_UPLOAD_FOLDER = current_app.config.get( "CFG_B2SHARE_UPLOAD_FOLDER") updir = os.path.join(CFG_B2SHARE_UPLOAD_FOLDER, sub_id) if (not os.path.isdir(updir)) or (not os.listdir(updir)): return render_template('500.html', message="Uploads not found"), 500 domain = request.form['domain'].lower() if domain in metadata_classes(): meta = metadata_classes()[domain]() else: from b2share_model.model import SubmissionMetadata meta = SubmissionMetadata() if not is_current_user_allowed_to_deposit(meta): return jsonify( valid=False, html=render_template('b2share-addmeta-table-denied.html')) MetaForm = model_form(meta.__class__, base_class=FormWithKey, exclude=['submission', 'submission_type'], field_args=meta.field_args, converter=HTML5ModelConverter()) meta_form = MetaForm(request.form, meta) if meta_form.validate_on_submit(): recid, marc = b2share_marc_handler.create_marc(request.form, sub_id, current_user['email'], meta) tmp_file = write_marc_to_temp_file(marc) # all usual tasks have priority 0; we want the bibuploads to run first from invenio.legacy.bibsched.bibtask import task_low_level_submission task_low_level_submission('bibupload', 'webdeposit', '--priority', '1', '-r', tmp_file) return jsonify(valid=True, newurl=url_for("record.metadata", recid=recid), html=render_template('record_waitforit.html', recid=recid, marc=marc)) return jsonify(valid=False, html=render_template('b2share-addmeta-table.html', sub_id=sub_id, metadata=meta, form=meta_form, getattr=getattr))
def submit_refextract_task(recids): """Submit a refextract task if needed""" # First filter out recids we cannot safely extract references from # (mostly because they have been curated) recids = [recid for recid in recids if check_record_for_refextract(recid)] if recids: recids_str = ",".join(str(recid) for recid in recids) task_low_level_submission("refextract", NAME, "-i", recids_str)
def upload_marcxml_file(marcxml): """ Creates a temporary marcxml file and sends it to bibupload """ xml_filename = "bibencode_" + str(batch_job["recid"]) + "_" + str(uuid.uuid4()) + ".xml" xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR, xml_filename) xml_file = file(xml_filename, "w") xml_file.write(marcxml) xml_file.close() targs = ["-c", xml_filename] task_low_level_submission("bibupload", "bibencode", *targs)
def upload_marcxml_file(marcxml): """ Creates a temporary marcxml file and sends it to bibupload """ xml_filename = 'bibencode_'+ str(batch_job['recid']) + '_' + str(uuid.uuid4()) + '.xml' xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR, xml_filename) xml_file = file(xml_filename, 'w') xml_file.write(marcxml) xml_file.close() targs = ['-c', xml_filename] task_low_level_submission('bibupload', 'bibencode', *targs)
def upload_marcxml_file(marcxml): """ Creates a temporary marcxml file and sends it to bibupload """ xml_filename = 'bibencode_'+ str(batch_job['recid']) + '_' + str(uuid.uuid4()) + '.xml' xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR, xml_filename) xml_file = file(xml_filename, 'w') xml_file.write(marcxml) xml_file.close() targs = ['-c', xml_filename] task_low_level_submission('bibupload', 'bibencode', *targs)
def create_ill_record(book_info): """ Create a new ILL record @param book_info: book's information @type book_info: tuple @return MARC record """ (title, author, place, publisher, year, edition, isbn) = book_info ill_record = """ <record> <datafield tag="020" ind1=" " ind2=" "> <subfield code="a">%(isbn)s</subfield> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">%(author)s</subfield> </datafield> <datafield tag="245" ind1=" " ind2=" "> <subfield code="a">%(title)s</subfield> </datafield> <datafield tag="250" ind1=" " ind2=" "> <subfield code="a">%(edition)s</subfield> </datafield> <datafield tag="260" ind1=" " ind2=" "> <subfield code="a">%(place)s</subfield> <subfield code="b">%(publisher)s</subfield> <subfield code="c">%(year)s</subfield> </datafield> <datafield tag="980" ind1=" " ind2=" "> <subfield code="a">ILLBOOK</subfield> </datafield> </record> """ % {'isbn': encode_for_xml(isbn), 'author': encode_for_xml(author), 'title': encode_for_xml(title), 'edition': encode_for_xml(edition), 'place': encode_for_xml(place), 'publisher': encode_for_xml(publisher), 'year': encode_for_xml(year)} file_path = '%s/%s_%s.xml' % (CFG_TMPDIR, 'bibcirculation_ill_book', time.strftime("%Y%m%d_%H%M%S")) xml_file = open(file_path, 'w') xml_file.write(ill_record) xml_file.close() # Pass XML file to BibUpload. task_low_level_submission('bibupload', 'bibcirculation', '-P', '5', '-i', file_path) return ill_record
def create_ill_record(book_info): """ Create a new ILL record @param book_info: book's information @type book_info: tuple @return MARC record """ (title, author, place, publisher, year, edition, isbn) = book_info ill_record = """ <record> <datafield tag="020" ind1=" " ind2=" "> <subfield code="a">%(isbn)s</subfield> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">%(author)s</subfield> </datafield> <datafield tag="245" ind1=" " ind2=" "> <subfield code="a">%(title)s</subfield> </datafield> <datafield tag="250" ind1=" " ind2=" "> <subfield code="a">%(edition)s</subfield> </datafield> <datafield tag="260" ind1=" " ind2=" "> <subfield code="a">%(place)s</subfield> <subfield code="b">%(publisher)s</subfield> <subfield code="c">%(year)s</subfield> </datafield> <datafield tag="980" ind1=" " ind2=" "> <subfield code="a">ILLBOOK</subfield> </datafield> </record> """ % {'isbn': encode_for_xml(isbn), 'author': encode_for_xml(author), 'title': encode_for_xml(title), 'edition': encode_for_xml(edition), 'place': encode_for_xml(place), 'publisher': encode_for_xml(publisher), 'year': encode_for_xml(year)} file_path = '%s/%s_%s.xml' % (CFG_TMPDIR, 'bibcirculation_ill_book', time.strftime("%Y%m%d_%H%M%S")) xml_file = open(file_path, 'w') xml_file.write(ill_record) xml_file.close() # Pass XML file to BibUpload. task_low_level_submission('bibupload', 'bibcirculation', '-P', '5', '-i', file_path) return ill_record
def cb_submit_bibupload(bibcatalog_system=None, records=None): if records: references_xml = print_records(records) # Save new record to file temp_fd, temp_path = mkstemp(prefix=CFG_REFEXTRACT_FILENAME, dir=CFG_TMPSHAREDDIR) temp_file = os.fdopen(temp_fd, "w") temp_file.write(references_xml) temp_file.close() # Update record task_low_level_submission("bibupload", "refextract", "-c", temp_path)
def addmeta(request, sub_id): """ Checks the submitted metadata form for validity. Returns a new page with success message if valid, otherwise it returns a form with the errors marked. """ if sub_id is None: # just return to deposit return redirect(url_for(".deposit")) CFG_B2SHARE_UPLOAD_FOLDER = current_app.config.get("CFG_B2SHARE_UPLOAD_FOLDER") updir = os.path.join(CFG_B2SHARE_UPLOAD_FOLDER, sub_id) if (not os.path.isdir(updir)) or (not os.listdir(updir)): return render_template("500.html", message="Uploads not found"), 500 domain = request.form["domain"].lower() if domain in metadata_classes(): meta = metadata_classes()[domain]() else: from b2share_model.model import SubmissionMetadata meta = SubmissionMetadata() MetaForm = model_form( meta.__class__, base_class=FormWithKey, exclude=["submission", "submission_type"], field_args=meta.field_args, converter=HTML5ModelConverter(), ) meta_form = MetaForm(request.form, meta) if meta_form.validate_on_submit(): recid, marc = b2share_marc_handler.create_marc(request.form, sub_id, current_user["email"], meta) tmp_file = write_marc_to_temp_file(marc) # all usual tasks have priority 0; we want the bibuploads to run first from invenio.legacy.bibsched.bibtask import task_low_level_submission task_low_level_submission("bibupload", "webdeposit", "--priority", "1", "-r", tmp_file) return jsonify( valid=True, newurl=url_for("record.metadata", recid=recid), html=render_template("record_waitforit.html", recid=recid, marc=marc), ) return jsonify( valid=False, html=render_template( "b2share-addmeta-table.html", sub_id=sub_id, metadata=meta, form=meta_form, getattr=getattr ), )
def cb_submit_bibupload(bibcatalog_system=None, records=None): if records: references_xml = print_records(records) # Save new record to file temp_fd, temp_path = mkstemp(prefix=CFG_REFEXTRACT_FILENAME, dir=CFG_TMPSHAREDDIR) temp_file = os.fdopen(temp_fd, 'w') temp_file.write(references_xml) temp_file.close() # Update record task_low_level_submission('bibupload', 'refextract', '-c', temp_path)
def run_deposition_tasks(self, deposition_id, with_webcoll=True): """ Run all task ids specified in the latest SIP and optionally run webcoll. """ # Run submitted tasks from invenio.modules.deposit.models import Deposition dep = Deposition.get(deposition_id) sip = dep.get_latest_sip(sealed=True) for task_id in sip.task_ids: self.run_task_id(task_id) if with_webcoll: # Run webcoll (to ensure record is assigned permissions) from invenio.legacy.bibsched.bibtask import \ task_low_level_submission task_id = task_low_level_submission('webcoll', 'webdeposit', '-q') self.run_task_id(task_id) # Check if record is accessible response = self.client.get( url_for('record.metadata', recid=sip.metadata['recid']), base_url=self.app.config['CFG_SITE_SECURE_URL'], ) self.assertStatus(response, 200)
def create(obj, dummy_eng): #FIXME change share tmp directory from invenio.config import CFG_TMPSHAREDDIR from invenio.legacy.bibsched.bibtask import task_low_level_submission, \ bibtask_allocate_sequenceid d = Deposition(obj) sip = d.get_latest_sip(sealed=False) sip.seal() tmp_file_fd, tmp_file_path = mkstemp( prefix="webdeposit-%s-%s" % (d.id, sip.uuid), suffix='.xml', dir=CFG_TMPSHAREDDIR, ) os.write(tmp_file_fd, sip.package) os.close(tmp_file_fd) # Trick to have access to task_sequence_id in subsequent tasks. d.workflow_object.task_sequence_id = bibtask_allocate_sequenceid() task_id = task_low_level_submission( 'bibupload', 'webdeposit', '-r' if 'recid' in sip.metadata else '-i', tmp_file_path, '-I', str(d.workflow_object.task_sequence_id) ) sip.task_ids.append(task_id) d.update()
def Insert_Modify_Record(parameters, curdir, form, user_info=None): """ Modify existing record using 'curdir/recmysql' and BibUpload correct mode. The file must therefore already have been created prior to this execution of this function, for eg. using "Make_Modify_Record". This function gets the output of BibConvert and uploads it into the MySQL bibliographical database. """ global rn sequence_id = bibtask_allocate_sequenceid(curdir) if os.path.exists(os.path.join(curdir, "recmysqlfmt")): recfile = "recmysqlfmt" elif os.path.exists(os.path.join(curdir, "recmysql")): recfile = "recmysql" else: raise InvenioWebSubmitFunctionError("Could not find record file") initial_file = os.path.join(curdir, recfile) tmp_fd, final_file = tempfile.mkstemp(dir=CFG_TMPDIR, prefix="%s_%s" % \ (rn.replace('/', '_'), time.strftime("%Y-%m-%d_%H:%M:%S"))) os.close(tmp_fd) shutil.copy(initial_file, final_file) bibupload_id = task_low_level_submission('bibupload', 'websubmit.Insert_Modify_Record', '-c', final_file, '-P', '3', '-I', str(sequence_id)) open(os.path.join(curdir, 'bibupload_id'), 'w').write(str(bibupload_id)) return ""
def call_bibupload(marcxmlfile, mode=None, oai_src_id=-1, sequence_id=None): """ Creates a bibupload task for the task scheduler in given mode on given file. Returns the generated task id and logs the event in oaiHARVESTLOGS, also adding any given oai source identifier. :param marcxmlfile: base-marcxmlfilename to upload :param mode: mode to upload in :param oai_src_id: id of current source config :param sequence_id: sequence-number, if relevant :return: task_id if successful, otherwise None. """ if mode is None: mode = ["-r", "-i"] if os.path.exists(marcxmlfile): try: args = mode # Add job with priority 6 (above normal bibedit tasks) # and file to upload to arguments args.extend(["-P", "6", marcxmlfile]) if sequence_id: args.extend(['-I', str(sequence_id)]) task_id = task_low_level_submission("bibupload", "oaiharvest", *tuple(args)) create_oaiharvest_log(task_id, oai_src_id, marcxmlfile) except Exception as msg: write_message("An exception during submitting oaiharvest task occured : %s " % (str(msg))) return None return task_id else: write_message("marcxmlfile %s does not exist" % (marcxmlfile,)) return None
def Insert_Modify_Record(parameters, curdir, form, user_info=None): """ Modify existing record using 'curdir/recmysql' and BibUpload correct mode. The file must therefore already have been created prior to this execution of this function, for eg. using "Make_Modify_Record". This function gets the output of BibConvert and uploads it into the MySQL bibliographical database. """ global rn sequence_id = bibtask_allocate_sequenceid(curdir) if os.path.exists(os.path.join(curdir, "recmysqlfmt")): recfile = "recmysqlfmt" elif os.path.exists(os.path.join(curdir, "recmysql")): recfile = "recmysql" else: raise InvenioWebSubmitFunctionError("Could not find record file") initial_file = os.path.join(curdir, recfile) tmp_fd, final_file = tempfile.mkstemp(dir=CFG_TMPDIR, prefix="%s_%s" % \ (rn.replace('/', '_'), time.strftime("%Y-%m-%d_%H:%M:%S"))) os.close(tmp_fd) shutil.copy(initial_file, final_file) bibupload_id = task_low_level_submission('bibupload', 'websubmit.Insert_Modify_Record', '-c', final_file, '-P', '3', '-I', str(sequence_id)) open(os.path.join(curdir, 'bibupload_id'), 'w').write(str(bibupload_id)) return ""
def submit_task(to_submit, mode, sequence_id): """call bibupload with all records to be modified. :param to_submit: list of xml snippets to be submitted :type: list :param mode: mode to be used in bibupload :type: list :param sequence_id: sequence id to be included in the task_id :type: str :return: id of the submitted task :rtype: int """ (temp_fd, temp_path) = mkstemp(prefix=PREFIX, dir=CFG_TMPSHAREDDIR) temp_file = os.fdopen(temp_fd, 'w') temp_file.write('<?xml version="1.0" encoding="UTF-8"?>') temp_file.write('<collection>') for el in to_submit: temp_file.write(el) temp_file.write('</collection>') temp_file.close() return task_low_level_submission('bibupload', PREFIX, '-P', '3', '-I', sequence_id, '-%s' % mode, temp_path)
def upload_marcxml(self, marcxml, mode): """ Uploads a record to the server Parameters: marcxml - *str* the XML to upload. mode - *str* the mode to use for the upload. "-i" insert new records "-r" replace existing records "-c" correct fields of records "-a" append fields to records "-ir" insert record or replace if it exists """ if mode not in ["-i", "-r", "-c", "-a", "-ir"]: raise NameError, "Incorrect mode " + str(mode) # Are we running locally? If so, submit directly if self.local: (code, marcxml_filepath) = tempfile.mkstemp(prefix="upload_%s" % \ time.strftime("%Y%m%d_%H%M%S_", time.localtime())) marcxml_file_d = os.fdopen(code, "w") marcxml_file_d.write(marcxml) marcxml_file_d.close() return task_low_level_submission("bibupload", "", mode, marcxml_filepath) else: params = urllib.urlencode({'file': marcxml, 'mode': mode}) ## We don't use self.browser as batchuploader is protected by IP opener = urllib2.build_opener() opener.addheaders = [('User-Agent', CFG_USER_AGENT)] return opener.open(self.server_url + "/batchuploader/robotupload", params,)
def upload_amendments(records, holdingpen): """ Upload a modified record """ if task_get_option("no_upload", False) or len(records) == 0: return xml = '<collection xmlns="http://www.loc.gov/MARC21/slim">' for record in records: xml += record_xml_output(record) xml += "</collection>" tmp_file_fd, tmp_file = mkstemp( suffix='.xml', prefix="bibcheckfile_%s" % time.strftime("%Y-%m-%d_%H:%M:%S"), dir=CFG_TMPSHAREDDIR ) os.write(tmp_file_fd, xml) os.close(tmp_file_fd) os.chmod(tmp_file, 0644) if holdingpen: flag = "-o" else: flag = "-r" task = task_low_level_submission('bibupload', 'bibcheck', flag, tmp_file) write_message("Submitted bibupload task %s" % task)
def _run_tasks(obj, dummy_eng): from invenio.legacy.bibsched.bibtask import task_low_level_submission d = Deposition(obj) sip = d.get_latest_sip(sealed=True) # XXX XXX XXX return recid = sip.metadata['recid'] common_args = [] sequenceid = getattr(d.workflow_object, 'task_sequence_id', None) if sequenceid: common_args += ['-I', str(sequenceid)] if update: tasklet_name = 'bst_openaire_update_upload' else: tasklet_name = 'bst_openaire_new_upload' task_id = task_low_level_submission( 'bibtasklet', 'webdeposit', '-T', tasklet_name, '--argument', 'recid=%s' % recid, *common_args ) sip.task_ids.append(task_id) d.update()
def submit_task(to_submit, mode, sequence_id): """call bibupload with all records to be modified. :param to_submit: list of xml snippets to be submitted :type: list :param mode: mode to be used in bibupload :type: list :param sequence_id: sequence id to be included in the task_id :type: str :return: id of the submitted task :rtype: int """ (temp_fd, temp_path) = mkstemp(prefix=PREFIX, dir=CFG_TMPSHAREDDIR) temp_file = os.fdopen(temp_fd, 'w') temp_file.write('<?xml version="1.0" encoding="UTF-8"?>') temp_file.write('<collection>') for el in to_submit: temp_file.write(el) temp_file.write('</collection>') temp_file.close() return task_low_level_submission('bibupload', PREFIX, '-P', '3', '-I', sequence_id, '-%s' % mode, temp_path)
def run_deposition_tasks(self, deposition_id, with_webcoll=True): """ Run all task ids specified in the latest SIP and optionally run webcoll. """ # Run submitted tasks from invenio.modules.deposit.models import Deposition dep = Deposition.get(deposition_id) sip = dep.get_latest_sip(sealed=True) for task_id in sip.task_ids: self.run_task_id(task_id) if with_webcoll: # Run webcoll (to ensure record is assigned permissions) from invenio.legacy.bibsched.bibtask import \ task_low_level_submission task_id = task_low_level_submission('webcoll', 'webdeposit', '-q') self.run_task_id(task_id) # Check if record is accessible response = self.client.get( url_for('record.metadata', recid=sip.metadata['recid']), base_url=self.app.config['CFG_SITE_SECURE_URL'], ) self.assertStatus(response, 200)
def _upload_amendments(obj, eng, holdingpen=False): # Load everything extra_data = obj.get_extra_data() _ensure_key('modified_records', extra_data) modified_records = extra_data['modified_records'] upload = extra_data['common']['upload'] tickets = extra_data['common']['tickets'] queue = extra_data['common']['queue'] modified_records = (Record(r) for r in modified_records.values()) records_xml = ( '<collection xmlns="http://www.loc.gov/MARC21/slim">\n' '{}' '</collection>' .format("".join((record.legacy_export_as_marc() for record in modified_records))) ) # Upload if not upload or not modified_records: return tmp_file_fd, tmp_file = tempfile.mkstemp( suffix='.xml', prefix="bibcheckfile_%s" % time.strftime("%Y-%m-%d_%H:%M:%S"), dir=cfg['CFG_TMPSHAREDDIR'] ) os.write(tmp_file_fd, records_xml) os.close(tmp_file_fd) os.chmod(tmp_file, 0644) if holdingpen: flag = "-o" else: flag = "-r" task = task_low_level_submission('bibupload', 'bibcheck', flag, tmp_file)
def upload_marcxml(self, marcxml, mode): """ Uploads a record to the server Parameters: marcxml - *str* the XML to upload. mode - *str* the mode to use for the upload. "-i" insert new records "-r" replace existing records "-c" correct fields of records "-a" append fields to records "-ir" insert record or replace if it exists """ if mode not in ["-i", "-r", "-c", "-a", "-ir"]: raise NameError, "Incorrect mode " + str(mode) # Are we running locally? If so, submit directly if self.local: (code, marcxml_filepath) = tempfile.mkstemp(prefix="upload_%s" % \ time.strftime("%Y%m%d_%H%M%S_", time.localtime())) marcxml_file_d = os.fdopen(code, "w") marcxml_file_d.write(marcxml) marcxml_file_d.close() return task_low_level_submission("bibupload", "", mode, marcxml_filepath) else: params = urllib.urlencode({'file': marcxml, 'mode': mode}) ## We don't use self.browser as batchuploader is protected by IP opener = urllib2.build_opener() opener.addheaders = [('User-Agent', CFG_USER_AGENT)] return opener.open(self.server_url + "/batchuploader/robotupload", params,)
def create(obj, dummy_eng): #FIXME change share tmp directory from invenio.config import CFG_TMPSHAREDDIR from invenio.legacy.bibsched.bibtask import task_low_level_submission, \ bibtask_allocate_sequenceid d = Deposition(obj) sip = d.get_latest_sip(sealed=False) sip.seal() tmp_file_fd, tmp_file_path = mkstemp( prefix="webdeposit-%s-%s" % (d.id, sip.uuid), suffix='.xml', dir=CFG_TMPSHAREDDIR, ) os.write(tmp_file_fd, sip.package) os.close(tmp_file_fd) # Trick to have access to task_sequence_id in subsequent tasks. d.workflow_object.task_sequence_id = bibtask_allocate_sequenceid() task_id = task_low_level_submission( 'bibupload', 'webdeposit', '-r' if 'recid' in sip.metadata else '-i', tmp_file_path, '-I', str(d.workflow_object.task_sequence_id)) sip.task_ids.append(task_id) d.update()
def save_xml_record(recid, uid, xml_record='', to_upload=True, to_merge=False): """Write XML record to file. Default behaviour is to read the record from a BibEdit cache file, filter out the unchanged volatile subfields, write it back to an XML file and then pass this file to BibUpload. @param xml_record: give XML as string in stead of reading cache file @param to_upload: pass the XML file to BibUpload @param to_merge: prepare an XML file for BibMerge to use """ if not xml_record: # Read record from cache file. cache = get_cache_file_contents(recid, uid) if cache: record = cache[2] used_changes = cache[4] xml_record = record_xml_output(record) delete_cache_file(recid, uid) delete_disabled_changes(used_changes) else: record = create_record(xml_record)[0] # clean the record from unfilled volatile fields record_strip_empty_volatile_subfields(record) record_strip_empty_fields(record) # order subfields alphabetically before saving the record record_order_subfields(record) xml_to_write = wash_for_xml(record_xml_output(record)) # Write XML file. if not to_merge: file_path = '%s.xml' % _get_file_path(recid, uid) else: file_path = '%s_%s.xml' % (_get_file_path(recid, uid), CFG_BIBEDIT_TO_MERGE_SUFFIX) xml_file = open(file_path, 'w') xml_file.write(xml_to_write) xml_file.close() user_name = get_user_info(uid)[1] if to_upload: # Pass XML file to BibUpload. task_low_level_submission('bibupload', 'bibedit', '-P', '5', '-r', file_path, '-u', user_name) return True
def _upload_record(obj, eng): from invenio.legacy.bibsched.bibtask import task_low_level_submission eng.log_info("Saving data to temporary file for upload") filename = obj.save_to_file() params = ["-%s" % (mode, ), filename] task_id = task_low_level_submission("bibupload", "bibworkflow", *tuple(params)) eng.log_info("Submitted task #%s" % (task_id, ))
def _upload_record(obj, eng): from invenio.legacy.bibsched.bibtask import task_low_level_submission eng.log_info("Saving data to temporary file for upload") filename = obj.save_to_file() params = ["-%s" % (mode,), filename] task_id = task_low_level_submission("bibupload", "bibworkflow", *tuple(params)) eng.log_info("Submitted task #%s" % (task_id,))
def upload_to_site(marcxml, yes_i_know): """ makes the appropriate calls to bibupload to get the MARCXML record onto the site. @param: marcxml (string): the absolute location of the MARCXML that was generated by this programme @param: yes_i_know (boolean): if true, no confirmation. if false, prompt. @output: a new record on the invenio site @return: None """ if not yes_i_know: wait_for_user(wrap_text_in_a_box('You are going to upload new ' + 'plots to the server.')) task_low_level_submission('bibupload', 'admin', '-a', marcxml)
def update_marcxml_with_info(recid, username, current_date, remote_id, action='append'): ''' This function add a field in the marc file to informat that the record has been submitted to a remote server @param recid: id of the record to update ''' # concatenation of the string to append to the marc file node = '''<record> <controlfield tag="001">%(recid)s</controlfield> <datafield tag="%(tag)s" ind1=" " ind2=" "> <subfield code="a">%(submit_info)s</subfield> </datafield> </record>''' % { 'recid': recid, 'tag': CFG_MARC_RECORD_SUBMIT_INFO, 'submit_info': CFG_SUBMIT_ARXIV_INFO_MESSAGE % (username, current_date, remote_id) } # creation of the tmp file containing the xml node to append (tmpfd, filename) = mkstemp(suffix='.xml', prefix='bibsword_append_submit_info_', dir=CFG_TMPDIR) tmpfile = os.fdopen(tmpfd, 'w') tmpfile.write(node) tmpfile.close() # insert a task in bibschedul to add the node in the marc file if action == 'append': result = \ task_low_level_submission('bibupload', 'BibSword', '-a', filename) elif action == 'delete': result = \ task_low_level_submission('bibupload', 'BibSword', '-d', filename) return result
def update_marcxml_with_remote_id(recid, remote_id, action="append"): ''' Write a new entry in the given marc file. This entry is the remote record id given by the server where the submission has been done @param remote_id: the string containing the id to add to the marc file return: boolean true if update done, false if problems ''' field_tag = CFG_MARC_ADDITIONAL_REPORT_NUMBER tag_id = "%s%s%s" % (field_tag[0], field_tag[1], field_tag[2]) tag_code = field_tag[5] # concatenation of the string to append to the marc file node = '''<record> <controlfield tag="001">%(recid)s</controlfield> <datafield tag="%(tagid)s" ind1=" " ind2=" "> <subfield code="%(tagcode)s">%(remote_id)s</subfield> </datafield> </record>''' % { 'recid': recid, 'tagid': tag_id, 'tagcode': tag_code, 'remote_id': remote_id } # creation of the tmp file containing the xml node to append (tmpfd, filename) = mkstemp(suffix='.xml', prefix='bibsword_append_remote_id_', dir=CFG_TMPDIR) tmpfile = os.fdopen(tmpfd, 'w') tmpfile.write(node) tmpfile.close() # insert a task in bibsched to add the node in the marc file if action == 'append': result = \ task_low_level_submission('bibupload', 'BibSword', '-a', filename) elif action == 'delete': result = \ task_low_level_submission('bibupload', 'BibSword', '-d', filename) return result
def update_marcxml_with_remote_id(recid, remote_id, action="append"): ''' Write a new entry in the given marc file. This entry is the remote record id given by the server where the submission has been done @param remote_id: the string containing the id to add to the marc file return: boolean true if update done, false if problems ''' field_tag = CFG_MARC_ADDITIONAL_REPORT_NUMBER tag_id = "%s%s%s" % (field_tag[0], field_tag[1], field_tag[2]) tag_code = field_tag[5] # concatenation of the string to append to the marc file node = '''<record> <controlfield tag="001">%(recid)s</controlfield> <datafield tag="%(tagid)s" ind1=" " ind2=" "> <subfield code="%(tagcode)s">%(remote_id)s</subfield> </datafield> </record>''' % { 'recid': recid, 'tagid': tag_id, 'tagcode': tag_code, 'remote_id': remote_id } # creation of the tmp file containing the xml node to append (tmpfd, filename) = mkstemp(suffix='.xml', prefix='bibsword_append_remote_id_', dir=CFG_TMPDIR) tmpfile = os.fdopen(tmpfd, 'w') tmpfile.write(node) tmpfile.close() # insert a task in bibsched to add the node in the marc file if action == 'append': result = \ task_low_level_submission('bibupload', 'BibSword', '-a', filename) elif action == 'delete': result = \ task_low_level_submission('bibupload', 'BibSword', '-d', filename) return result
def bst_run_bibtask(taskname, user, **args): """ Initiate a bibsched task. @param taskname: name of the task to run @type taskname: string @param user: the user to run the task under. @type user: string """ arglist = [] # Transform dict to list: {'a': 0, 'b': 1} -> ['a', 0, 'b', 1] for name, value in args.items(): if len(name) == 1: name = '-' + name else: name = '--' + name arglist.append(name) if value: arglist.append(value) task_low_level_submission(taskname, user, *tuple(arglist))
def bst_run_bibtask(taskname, user, **args): """ Initiate a bibsched task. @param taskname: name of the task to run @type taskname: string @param user: the user to run the task under. @type user: string """ arglist = [] # Transform dict to list: {'a': 0, 'b': 1} -> ['a', 0, 'b', 1] for name, value in args.items(): if len(name) == 1: name = '-' + name else: name = '--' + name arglist.append(name) if value: arglist.append(value) task_low_level_submission(taskname, user, *tuple(arglist))
def _upload_file_with_bibupload(file_path, upload_mode, num_records, req): """ Uploads file with bibupload @param file_path: path to the file where the XML will be saved. @param upload_mode: -c for correct or -r for replace @return tuple formed by status of the upload: 0-changes to be made instantly 1-changes to be made only in limited hours 2-user is superadmin. Changes made in limited hours 3-no rights to upload and the upload file path """ if num_records < CFG_BIBEDITMULTI_LIMIT_INSTANT_PROCESSING: task_low_level_submission('bibupload', 'multiedit', '-P', '5', upload_mode, '%s' % file_path) return (0, file_path) elif num_records < CFG_BIBEDITMULTI_LIMIT_DELAYED_PROCESSING: task_low_level_submission('bibupload', 'multiedit', '-P', '5', upload_mode, '-L', CFG_BIBEDITMULTI_LIMIT_DELAYED_PROCESSING_TIME,'%s' % file_path) return (1, file_path) else: user_info = collect_user_info(req) if isUserSuperAdmin(user_info): task_low_level_submission('bibupload', 'multiedit', '-P', '5', upload_mode, '-L', CFG_BIBEDITMULTI_LIMIT_DELAYED_PROCESSING_TIME, '%s' % file_path) return (2, file_path) return (3, file_path)
def metadata_upload(req, metafile=None, filetype=None, mode=None, exec_date=None, exec_time=None, metafilename=None, ln=CFG_SITE_LANG, priority="1", email_logs_to=None): """ Metadata web upload service. Get upload parameters and exec bibupload for the given file. Finally, write upload history. @return: tuple (error code, message) error code: code that indicates if an error ocurred message: message describing the error """ # start output: req.content_type = "text/html" req.send_http_header() error_codes = {'not_authorized': 1} user_info = collect_user_info(req) (fd, filename) = tempfile.mkstemp(prefix="batchupload_" + \ user_info['nickname'] + "_" + time.strftime("%Y%m%d%H%M%S", time.localtime()) + "_", dir=CFG_TMPSHAREDDIR) filedesc = os.fdopen(fd, 'w') filedesc.write(metafile) filedesc.close() # check if this client can run this file: if req is not None: allow = _check_client_can_submit_file(req=req, metafile=metafile, webupload=1, ln=ln) if allow[0] != 0: return (error_codes['not_authorized'], allow[1]) # run upload command: task_arguments = ('bibupload', user_info['nickname'], mode, "--priority=" + priority, "-N", "batchupload") if exec_date: date = exec_date if exec_time: date += ' ' + exec_time task_arguments += ("-t", date) if email_logs_to: task_arguments += ('--email-logs-to', email_logs_to) task_arguments += (filename, ) jobid = task_low_level_submission(*task_arguments) # write batch upload history run_sql("""INSERT INTO hstBATCHUPLOAD (user, submitdate, filename, execdate, id_schTASK, batch_mode) VALUES (%s, NOW(), %s, %s, %s, "metadata")""", (user_info['nickname'], metafilename, exec_date != "" and (exec_date + ' ' + exec_time) or time.strftime("%Y-%m-%d %H:%M:%S"), str(jobid), )) return (0, "Task %s queued" % str(jobid))
def bibupload(record=None, collection=None, file_prefix="", mode="-c"): """ General purpose function that will write a MARCXML file and call bibupload on it. """ if collection is None and record is None: return (file_out, filename) = open_temp_file(file_prefix) if collection is not None: file_out.write("<collection>") tot = 0 for rec in collection: file_out.write(record_xml_output(rec)) tot += 1 if tot == MAX_RECORDS: file_out.write("</collection>") file_out.close() logger.debug( "Submitting bibupload %s -n %s" % (mode, filename)) task_low_level_submission( 'bibupload', 'openaire', mode, filename, '-n') (file_out, filename) = open_temp_file(file_prefix) file_out.write("<collection>") tot = 0 file_out.write("</collection>") elif record is not None: tot = 1 file_out.write(record_xml_output(record)) file_out.close() if tot > 0: logger.debug("Submitting bibupload %s -n %s" % (mode, filename)) task_low_level_submission( 'bibupload', 'openaire', mode, filename, '-n')
def submit_bibindex_task(to_update, sequence_id): """submit a bibindex task for a set of records. :param to_update: list of recids to be updated by bibindex :type: list :param sequence_id: sequence id to be included in the task_id :type: str :return: id of bibindex task :rtype: int """ recids = [str(r) for r in to_update] return task_low_level_submission('bibindex', PREFIX, '-I', sequence_id, '-P', '2', '-w', 'global', '-i', ','.join(recids))
def submit_bibindex_task(to_update, sequence_id): """ submits a bibindex task for a set of records @param to_update: list of recids to be updated by bibindex @type: list @param sequence_id: sequence id to be included in the task_id @type: str @return: id of bibindex task @rtype: int """ recids = [str(r) for r in to_update] return task_low_level_submission('bibindex', PREFIX, '-I', sequence_id, '-P', '2', '-w', 'global', '-i', ','.join(recids))
def call_bibupload(marcxmlfile, mode=None, oai_src_id=-1, sequence_id=None): """ Creates a bibupload task for the task scheduler in given mode on given file. Returns the generated task id and logs the event in oaiHARVESTLOGS, also adding any given oai source identifier. :param marcxmlfile: base-marcxmlfilename to upload :param mode: mode to upload in :param oai_src_id: id of current source config :param sequence_id: sequence-number, if relevant :return: task_id if successful, otherwise None. """ if mode is None: mode = ["-r", "-i"] if os.path.exists(marcxmlfile): try: args = mode # Add job with priority 6 (above normal bibedit tasks) # and file to upload to arguments args.extend(["-P", "6", marcxmlfile]) if sequence_id: args.extend(['-I', str(sequence_id)]) task_id = task_low_level_submission("bibupload", "oaiharvest", *tuple(args)) create_oaiharvest_log(task_id, oai_src_id, marcxmlfile) except Exception as msg: write_message( "An exception during submitting oaiharvest task occured : %s " % (str(msg))) return None return task_id else: write_message("marcxmlfile %s does not exist" % (marcxmlfile, )) return None
def Notify_URL(parameters, curdir, form, user_info=None): """ Access a given URL, and possibly post some content. Could be used to notify that a record has been fully integrated. (the URL is only accessed once the BibTask created by this function runs in BibSched, not the when the function is run. The BibTask uses a task sequence ID to respect ordering of tasks) if URL is empty, skip the notification. @param parameters: (dictionary) - contains the following parameter strings used by this function: + url: (string) - the URL to be contacted by this function (must start with http/https) If value starts with "FILE:", will look for the URL in a file on curdir with the given name. for eg: "FILE:my_url" (value retrieved when function is run) + data: (string) - (optional) the data to be posted at the given URL. if no value is given, the URL will be accessed via GET. If value starts with "FILE:", will look for the data in a file on curdir with the given name. for eg: "FILE:my_data" (value retrieved when function is run) + content_type: (string) - (optional) the content-type to use to post data. Default is 'text/plain'. Ignored if not data is posted. + attempt_times: (int) - (optional) up to how many time shall we try to contact the URL in case we fail at contacting it? + attempt_sleeptime: (int) - (optional) how many seconds to sleep between each attempt? + admin_emails: (string) - (optional) list of emails (comma-separated values) to contact in case the URL cannot be accessed after all attempts. If value starts with "FILE:", will look for the emails in a file on curdir with the given name. for eg: "FILE:my_email" (value retrieved when function is run) + user: (string) - the user to be used to launch the task (visible in BibSched). If value starts with"FILE:", will look for the emails in a file on curdir with the given name. for eg:"FILE:my_user" (value retrieved when function is run) """ other_bibtasklet_arguments = [] sequence_id = bibtask_allocate_sequenceid(curdir) url = parameters["url"] data = parameters["data"] admin_emails = parameters["admin_emails"] content_type = parameters["content_type"] attempt_times = parameters["attempt_times"] attempt_sleeptime = parameters["attempt_sleeptime"] user = parameters["user"] # Maybe some params must be read from disk if url.startswith('FILE:'): url = ParamFromFile(os.path.join(curdir, url[5:])) if not url: return "" if data.startswith('FILE:'): data = ParamFromFile(os.path.join(curdir, data[5:])) if admin_emails.startswith('FILE:'): admin_emails = ParamFromFile(os.path.join(curdir, admin_emails[5:])) if user.startswith('FILE:'): user = ParamFromFile(os.path.join(curdir, user[5:])) if data: other_bibtasklet_arguments.extend(("-a", "data=%s" % data)) other_bibtasklet_arguments.extend(("-a", "content_type=%s" % content_type)) return task_low_level_submission( "bibtasklet", user, "-T", "bst_notify_url", "-I", str(sequence_id), "-a", "url=%s" % url, "-a", "attempt_times=%s" % attempt_times, "-a", "attempt_sleeptime=%s" % attempt_sleeptime, "-a", "admin_emails=%s" % admin_emails, *other_bibtasklet_arguments)
def task_run_core(): """ Walks through all directories where metadata files are located and uploads them. Files are then moved to the corresponding DONE folders. """ daemon_dir = CFG_BATCHUPLOADER_DAEMON_DIR[0] == '/' and CFG_BATCHUPLOADER_DAEMON_DIR \ or CFG_PREFIX + '/' + CFG_BATCHUPLOADER_DAEMON_DIR # Check if directory /batchupload exists if not task_get_option('documents'): # Metadata upload parent_dir = daemon_dir + "/metadata/" progress = 0 try: os.makedirs(parent_dir) except OSError: pass list_of_folders = [ "insert", "append", "correct", "replace", "holdingpen" ] for folder in list_of_folders: files_dir = os.path.join(parent_dir, folder) files_done_dir = os.path.join(files_dir, "DONE") try: files = os.listdir(files_dir) except OSError as e: os.mkdir(files_dir) files = [] write_message(e, sys.stderr) write_message("Created new folder %s" % (files_dir, )) # Create directory DONE/ if doesn't exist try: os.mkdir(files_done_dir) except OSError: # Directory exists pass for metafile in files: if os.path.isfile(os.path.join(files_dir, metafile)): # Create temporary file to be uploaded (fd, filename) = tempfile.mkstemp( prefix=metafile + "_" + time.strftime("%Y%m%d%H%M%S", time.localtime()) + "_", dir=CFG_TMPSHAREDDIR) shutil.copy(os.path.join(files_dir, metafile), filename) # Send bibsched task mode = "--" + folder jobid = str( task_low_level_submission('bibupload', 'batchupload', mode, filename)) # Move file to done folder filename = metafile + "_" + time.strftime( "%Y%m%d%H%M%S", time.localtime()) + "_" + jobid os.rename(os.path.join(files_dir, metafile), os.path.join(files_done_dir, filename)) task_sleep_now_if_required(can_stop_too=True) progress += 1 task_update_progress("Done %d out of %d." % (progress, len(list_of_folders))) else: # Documents upload parent_dir = daemon_dir + "/documents/" try: os.makedirs(parent_dir) except OSError: pass matching_order = CFG_BATCHUPLOADER_FILENAME_MATCHING_POLICY for folder in ["append/", "revise/"]: try: os.mkdir(parent_dir + folder) except: pass for matching in matching_order: errors = document_upload(folder=parent_dir + folder, matching=matching, mode=folder[:-1])[0] if not errors: break # All documents succedeed with that matching for error in errors: write_message( "File: %s - %s with matching %s" % (error[0], error[1], matching), sys.stderr) task_sleep_now_if_required(can_stop_too=True) return 1
def _dbdump_run_task_core(): """ Run DB dumper core stuff. Note: do not use task_can_sleep() stuff here because we don't want other tasks to interrupt us while we are dumping the DB content. """ # read params: host = CFG_DATABASE_HOST port = CFG_DATABASE_PORT connection = None active_queues = [] try: if task_get_option('slave') and not task_get_option('dump_on_slave_helper_mode'): connection = get_connection_for_dump_on_slave() write_message("Dump on slave requested") write_message("... checking if slave is well up...") check_slave_is_up(connection) write_message("... checking if slave is in consistent state...") check_slave_is_in_consistent_state(connection) write_message("... detaching slave database...") detach_slave(connection) write_message("... scheduling dump on slave helper...") helper_arguments = [] if task_get_option("number"): helper_arguments += ["--number", str(task_get_option("number"))] if task_get_option("output"): helper_arguments += ["--output", str(task_get_option("output"))] if task_get_option("params"): helper_arguments += ["--params", str(task_get_option("params"))] if task_get_option("ignore_tables"): helper_arguments += ["--ignore-tables", str(task_get_option("ignore_tables"))] if task_get_option("compress"): helper_arguments += ["--compress"] if task_get_option("slave"): helper_arguments += ["--slave", str(task_get_option("slave"))] helper_arguments += ['-N', 'slavehelper', '--dump-on-slave-helper'] task_id = task_low_level_submission('dbdump', task_get_task_param('user'), '-P4', *helper_arguments) write_message("Slave scheduled with ID %s" % task_id) task_update_progress("DONE") return True elif task_get_option('dump_on_slave_helper_mode'): write_message("Dumping on slave mode") connection = get_connection_for_dump_on_slave() write_message("... checking if slave is well down...") check_slave_is_down(connection) host = CFG_DATABASE_SLAVE task_update_progress("Reading parameters") write_message("Reading parameters started") output_dir = task_get_option('output', CFG_LOGDIR) output_num = task_get_option('number', 5) params = task_get_option('params', None) compress = task_get_option('compress', False) slave = task_get_option('slave', False) ignore_tables = task_get_option('ignore_tables', None) if ignore_tables: ignore_tables = get_table_names(ignore_tables) else: ignore_tables = None output_file_suffix = task_get_task_param('task_starting_time') output_file_suffix = output_file_suffix.replace(' ', '_') + '.sql' if compress: output_file_suffix = "%s.gz" % (output_file_suffix,) write_message("Reading parameters ended") if task_get_option('disable_workers'): active_queues = get_queues() if active_queues: write_message("Suspend workers and wait for any running tasks to complete") suspend_queues(active_queues) write_message("Workers suspended") # make dump: task_update_progress("Dumping database") write_message("Database dump started") if slave: output_file_prefix = 'slave-%s-dbdump-' % (CFG_DATABASE_NAME,) else: output_file_prefix = '%s-dbdump-' % (CFG_DATABASE_NAME,) output_file = output_file_prefix + output_file_suffix dump_path = output_dir + os.sep + output_file dump_database(dump_path, \ host=host, port=port, params=params, \ compress=compress, \ ignore_tables=ignore_tables) write_message("Database dump ended") finally: for queue in active_queues: enable_queue(queue) if connection and task_get_option('dump_on_slave_helper_mode'): write_message("Reattaching slave") attach_slave(connection) # prune old dump files: task_update_progress("Pruning old dump files") write_message("Pruning old dump files started") _delete_old_dumps(output_dir, output_file_prefix, output_num) write_message("Pruning old dump files ended") # we are done: task_update_progress("Done.") return True
def scheduled_send_email( fromaddr, toaddr, subject="", content="", header=None, footer=None, copy_to_admin=0, attempt_times=1, attempt_sleeptime=10, user=None, other_bibtasklet_arguments=None, replytoaddr="", bccaddr="", ): """ Like send_email, but send an email via the bibsched infrastructure. @param fromaddr: sender @type fromaddr: string @param toaddr: list of receivers @type toaddr: string (comma separated) or list of strings @param subject: the subject @param content: the body of the message @param header: optional header, otherwise default is used @param footer: optional footer, otherwise default is used @param copy_to_admin: set to 1 in order to send email the admins @param attempt_times: try at least n times before giving up sending @param attempt_sleeptime: number of seconds to sleep between two attempts @param user: the user name to user when scheduling the bibtasklet. If None, the sender will be used @param other_bibtasklet_arguments: other arguments to append to the list of arguments to the call of task_low_level_submission @param replytoaddr: [string or list-of-strings] to be used for the reply-to header of the email (if string, then receivers are separated by ',') @param bccaddr: [string or list-of-strings] to be used for BCC header of the email (if string, then receivers are separated by ',') @return: the scheduled bibtasklet """ from invenio.legacy.bibsched.bibtask import task_low_level_submission if not isinstance(toaddr, (unicode, str)): toaddr = ','.join(toaddr) if not isinstance(replytoaddr, (unicode, str)): replytoaddr = ','.join(replytoaddr) toaddr = remove_temporary_emails(toaddr) if user is None: user = fromaddr if other_bibtasklet_arguments is None: other_bibtasklet_arguments = [] else: other_bibtasklet_arguments = list(other_bibtasklet_arguments) if not header is None: other_bibtasklet_arguments.extend(("-a", "header=%s" % header)) if not footer is None: other_bibtasklet_arguments.extend(("-a", "footer=%s" % footer)) return task_low_level_submission( "bibtasklet", user, "-T", "bst_send_email", "-a", "fromaddr=%s" % fromaddr, "-a", "toaddr=%s" % toaddr, "-a", "replytoaddr=%s" % replytoaddr, "-a", "subject=%s" % subject, "-a", "content=%s" % content, "-a", "copy_to_admin=%s" % copy_to_admin, "-a", "attempt_times=%s" % attempt_times, "-a", "attempt_sleeptime=%s" % attempt_sleeptime, "-a", "bccaddr=%s" % bccaddr, *other_bibtasklet_arguments)
def schedule_extraction(recid, taxonomy): bibtask.task_low_level_submission('bibclassify', 'extract:%s' % recid, '-k', taxonomy, '-i', '%s' % recid)
def post(self, deposit_id, **kwargs): """ Creates a new deposition Test this with: $ curl -v -X POST -H "Content-Type: application/json" -d '{"domain":"generic", "title":"REST Test Title", "description":"REST Test Description"}' http://0.0.0.0:4000/api/deposition/DEPOSITION_ID/commit\?access_token\=xxx """ CFG_B2SHARE_UPLOAD_FOLDER = current_app.config.get( "CFG_B2SHARE_UPLOAD_FOLDER") deposition_status = os.path.join(CFG_B2SHARE_UPLOAD_FOLDER, deposit_id, 'uncommitted') if not os.path.exists(deposition_status): return {'message': 'Bad deposit_id parameter or already closed deposition.', 'status': 404}, 404 if not get_depositing_files_metadata(deposit_id): return {'message':'No files: add files to this deposition first', 'status':400}, 400 try: form = request.get_json() except: return {'message':'Invalid POST data', 'status':400}, 400 os.remove(deposition_status) domain = form.get('domain', '').lower() if domain in metadata_classes(): metaclass = metadata_classes()[domain] meta = metaclass() else: domains = ", ".join(metadata_classes().keys()) json_data = { 'message': 'Invalid domain. The submitted metadata must '+\ 'contain a valid "domain" field. Valid domains '+\ 'are: '+ domains, 'status': 400, } return json_data, 400 if not is_current_user_allowed_to_deposit(meta): return {'message':'depositions to this domain are restricted', 'status':401}, 401 if 'open_access' not in form: return {'message':'open_access boolean field required', 'status':400}, 400 if not form['open_access'] or form['open_access'] == 'restricted': del form['open_access'] # action required by the b2share_marc_handler if not form.get('language'): form['language'] = meta.language_default form = ImmutableMultiDict(form) MetaForm = model_form(meta.__class__, base_class=FormWithKey, exclude=['submission', 'submission_type'], field_args=meta.field_args, converter=HTML5ModelConverter()) meta_form = MetaForm(form, meta, csrf_enabled=False) if meta_form.validate_on_submit(): recid, marc = create_marc(form, deposit_id, current_user['email'], meta) tmp_file = write_marc_to_temp_file(marc) # all usual tasks have priority 0; we want the bibuploads to run first from invenio.legacy.bibsched.bibtask import task_low_level_submission task_low_level_submission('bibupload', 'webdeposit', '--priority', '1', '-r', tmp_file) #TODO: remove the existing deposition folder?; the user can now # repeatedly create records with the same deposition location = "/api/record/%d" % (recid,) json_data = { 'message': "New record submitted for processing", 'location': "/api/record/%d" % (recid,), 'record_id': recid, } return json_data, 201, {'Location':location} # return location header else: fields = {} for (fname, field) in meta.field_args.iteritems(): if not field.get('hidden'): fields[fname] = { 'description' : field.get('description') } if self.is_required_field(metaclass, fname): fields[fname]['required'] = True if field.get('cardinality') == 'n': fields[fname]['multiple'] = True if field.get('data_source'): fields[fname]['options'] = field.get('data_source') json_data = { 'message': 'Invalid metadata, please review the required fields', 'status': 400, 'fields': fields, } return json_data, 400