def main(): """Main that construct all the bibtask.""" short_options = "lpgbdacTkoS" long_options = [ "logs", "tempfiles", "guests", "bibxxx", "documents", "all", "cache", "tasks", "check-tables", "optimise-tables", "sessions", "bibedit-cache" ] task_init( authorization_action='runinveniogc', authorization_msg="InvenioGC Task Submission", help_specific_usage=" -l, --logs\t\tClean old logs.\n" " -p, --tempfiles\tClean old temporary files.\n" " -g, --guests\t\tClean expired guest user related information. [default action]\n" " -b, --bibxxx\t\tClean unreferenced bibliographic values in bibXXx tables.\n" " -c, --cache\t\tClean cache by removing old files.\n" " -d, --documents\tClean deleted documents and revisions older than %s days.\n" " -T, --tasks\t\tClean the BibSched queue removing/archiving old DONE tasks.\n" " -a, --all\t\tClean all of the above (but do not run check/optimise table options below).\n" " -k, --check-tables\tCheck DB tables to discover potential problems.\n" " -o, --optimise-tables\tOptimise DB tables to increase performance.\n" " -S, --sessions\tClean expired sessions from the DB.\n" " --bibedit-cache Clean expired bibedit cache entries from the DB.\n" % CFG_DELETED_BIBDOC_MAXLIFE, version=__revision__, specific_params=(short_options, long_options), task_submit_elaborate_specific_parameter_fnc= task_submit_elaborate_specific_parameter, task_submit_check_options_fnc=task_submit_check_options, task_run_fnc=task_run_core)
def main(): """Main that construct all the bibtask.""" short_options = "lpgbdacTkoS" long_options = ["logs", "tempfiles", "guests", "bibxxx", "documents", "all", "cache", "tasks", "check-tables", "optimise-tables", "sessions", "bibedit-cache"] task_init(authorization_action='runinveniogc', authorization_msg="InvenioGC Task Submission", help_specific_usage=" -l, --logs\t\tClean old logs.\n" " -p, --tempfiles\tClean old temporary files.\n" " -g, --guests\t\tClean expired guest user related information. [default action]\n" " -b, --bibxxx\t\tClean unreferenced bibliographic values in bibXXx tables.\n" " -c, --cache\t\tClean cache by removing old files.\n" " -d, --documents\tClean deleted documents and revisions older than %s days.\n" " -T, --tasks\t\tClean the BibSched queue removing/archiving old DONE tasks.\n" " -a, --all\t\tClean all of the above (but do not run check/optimise table options below).\n" " -k, --check-tables\tCheck DB tables to discover potential problems.\n" " -o, --optimise-tables\tOptimise DB tables to increase performance.\n" " -S, --sessions\tClean expired sessions from the DB.\n" " --bibedit-cache Clean expired bibedit cache entries from the DB.\n" % CFG_DELETED_BIBDOC_MAXLIFE, version=__revision__, specific_params=(short_options, long_options), task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter, task_submit_check_options_fnc=task_submit_check_options, task_run_fnc=task_run_core)
def main(): """Main dealing with all the BibTask magic.""" task_init( authorization_action="runwebstatadmin", authorization_msg="Webstat Administrator", description="Description: %s Creates/deletes custom events. Can be set\n" " to cache key events and previously defined custom events.\n" % sys.argv[0], help_specific_usage= " -n, --new-event=ID create a new custom event with the human-readable ID\n" " -r, --remove-event=ID remote the custom event with id ID and all its data\n" " -S, --show-events show all currently available custom events\n" " -c, --cache-events=CLASS|[ID] caches the events defined by the class or IDs, e.g.:\n" " -c ALL\n" " -c KEYEVENTS\n" " -c CUSTOMEVENTS\n" " -c 'event id1',id2,'testevent'\n" " -d,--dump-config dump default config file\n" " -e,--load-config create the custom events described in config_file\n" "\nWhen creating events (-n) the following parameters are also applicable:\n" " -l, --event-label=NAME set a descriptive label to the custom event\n" " -a, --args=[NAME] set column headers for additional custom event arguments\n" " (e.g. -a country,person,car)\n", version=__revision__, specific_params=("n:r:Sl:a:c:de", [ "new-event=", "remove-event=", "show-events", "event-label=", "args=", "cache-events=", "dump-config", "load-config" ]), task_submit_elaborate_specific_parameter_fnc= task_submit_elaborate_specific_parameter, task_submit_check_options_fnc=task_submit_check_options, task_run_fnc=task_run_core)
def bibclassify_daemon(): """Constructs the BibClassify bibtask.""" bibtask.task_init(authorization_action='runbibclassify', authorization_msg="BibClassify Task Submission", description="Extract keywords and create a BibUpload " "task.\nExamples:\n" " $ bibclassify\n" " $ bibclassify -i 79 -k HEP\n" " $ bibclassify -c 'Articles' -k HEP\n", help_specific_usage=" -i, --recid\t\tkeywords are extracted from " "this record\n" " -c, --collection\t\tkeywords are extracted from this collection\n" " -k, --taxonomy\t\tkeywords are based on that reference", version="Invenio BibClassify v%s" % bconfig.VERSION, specific_params=("i:c:k:f", [ "recid=", "collection=", "taxonomy=", "force" ]), task_submit_elaborate_specific_parameter_fnc= _task_submit_elaborate_specific_parameter, task_submit_check_options_fnc=_task_submit_check_options, task_run_fnc=_task_run_core)
def main(): """Main dealing with all the BibTask magic.""" task_init(authorization_action="runwebstatadmin", authorization_msg="Webstat Administrator", description="Description: %s Creates/deletes custom events. Can be set\n" " to cache key events and previously defined custom events.\n" % sys.argv[0], help_specific_usage=" -n, --new-event=ID create a new custom event with the human-readable ID\n" " -r, --remove-event=ID remote the custom event with id ID and all its data\n" " -S, --show-events show all currently available custom events\n" " -c, --cache-events=CLASS|[ID] caches the events defined by the class or IDs, e.g.:\n" " -c ALL\n" " -c KEYEVENTS\n" " -c CUSTOMEVENTS\n" " -c 'event id1',id2,'testevent'\n" " -d,--dump-config dump default config file\n" " -e,--load-config create the custom events described in config_file\n" "\nWhen creating events (-n) the following parameters are also applicable:\n" " -l, --event-label=NAME set a descriptive label to the custom event\n" " -a, --args=[NAME] set column headers for additional custom event arguments\n" " (e.g. -a country,person,car)\n", version=__revision__, specific_params=("n:r:Sl:a:c:de", ["new-event=", "remove-event=", "show-events", "event-label=", "args=", "cache-events=", "dump-config", "load-config"]), task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter, task_submit_check_options_fnc=task_submit_check_options, task_run_fnc=task_run_core)
def main(): """Main that construct all the bibtask.""" # if there is any -r or --report option (or other similar options) # in the arguments, just print the status and exit (do not run # through BibSched...) if (CFG_OAI_ID_FIELD[:5] != CFG_OAI_SET_FIELD[:5]) or \ (CFG_OAI_ID_FIELD[:5] != CFG_OAI_PREVIOUS_SET_FIELD[:5]): print( """\ ERROR: since Invenio 1.0 the OAI ID and the OAI Set must be stored in the same field. Please revise your configuration for the variables CFG_OAI_ID_FIELD (currently set to %s) CFG_OAI_SET_FIELD (currently set to %s) CFG_OAI_PREVIOUS_SET_FIELD (currently set to %s)""" % (CFG_OAI_ID_FIELD, CFG_OAI_SET_FIELD, CFG_OAI_PREVIOUS_SET_FIELD), file=sys.stderr) sys.exit(1) mode = -1 if '-d' in sys.argv[1:] or '--detailed-report' in sys.argv[1:]: mode = 2 elif '-r' in sys.argv[1:] or '--report' in sys.argv[1:]: mode = 1 if mode != -1: def local_write_message(*args): """Overload BibTask function so that it does not need to run in BibSched environment""" sys.stdout.write(args[0] + '\n') print_repository_status(local_write_message=local_write_message, verbose=mode) return task_init( authorization_action='runoairepository', authorization_msg="OAI Archive Task Submission", description="Examples:\n" " Expose records according to sets defined in OAI Repository admin interface\n" " $ oairepositoryupdater \n" " Expose records according to sets defined in OAI Repository admin interface and update them every day\n" " $ oairepositoryupdater -s24\n" " Print OAI repository status\n" " $ oairepositoryupdater -r\n" " Print OAI repository detailed status\n" " $ oairepositoryupdater -d\n\n", help_specific_usage="Options:\n" " -r --report\t\tOAI repository status\n" " -d --detailed-report\t\tOAI repository detailed status\n" " -n --no-process\tDo no upload the modifications\n" " --notimechange\tDo not update record modification_date\n" "NOTE: --notimechange should be used with care, basically only the first time a new set is added.", specific_params=("rdn", [ "report", "detailed-report", "no-process", "notimechange" ]), task_submit_elaborate_specific_parameter_fnc= task_submit_elaborate_specific_parameter, task_run_fnc=oairepositoryupdater_task)
def main(): """Constructs the refextract bibtask.""" if CFG_BIBCATALOG_SYSTEM == 'RT': bibcatalog_system = BibCatalogSystemRT() else: bibcatalog_system = None extra_vars = {'bibcatalog_system': bibcatalog_system} # Build and submit the task task_init(authorization_action='runrefextract', authorization_msg="Refextract Task Submission", description=DESCRIPTION, # get the global help_message variable imported from refextract.py help_specific_usage=HELP_MESSAGE + """ Scheduled (daemon) options: -a, --new Run on all newly inserted records. -m, --modified Run on all newly modified records. -r, --recids Record id for extraction. -c, --collections Entire Collection for extraction. --arxiv All arxiv modified records within last week Special (daemon) options: --create-ticket Create a RT ticket for record references Examples: (run a daemon job) refextract -a (run on a set of records) refextract --recids 1,2 -r 3 (run on a collection) refextract --collections "Reports" (run as standalone) refextract -o /home/chayward/refs.xml /home/chayward/thesis.pdf """, version="Invenio v%s" % CFG_VERSION, specific_params=("hVv:x:r:c:nai", ["help", "version", "verbose=", "inspire", "kb-journals=", "kb-journals-re=", "kb-report-numbers=", "kb-authors=", "kb-books=", "recids=", "collections=", "new", "modified", "no-overwrite", "arxiv", "create-ticket"]), task_submit_elaborate_specific_parameter_fnc=cb_parse_option, task_submit_check_options_fnc=check_options, task_run_fnc=task_run_core_wrapper('refextract', task_run_core, extra_vars=extra_vars))
def main(): """Main that construct all the bibtask.""" # if there is any -r or --report option (or other similar options) # in the arguments, just print the status and exit (do not run # through BibSched...) if (CFG_OAI_ID_FIELD[:5] != CFG_OAI_SET_FIELD[:5]) or \ (CFG_OAI_ID_FIELD[:5] != CFG_OAI_PREVIOUS_SET_FIELD[:5]): print("""\ ERROR: since Invenio 1.0 the OAI ID and the OAI Set must be stored in the same field. Please revise your configuration for the variables CFG_OAI_ID_FIELD (currently set to %s) CFG_OAI_SET_FIELD (currently set to %s) CFG_OAI_PREVIOUS_SET_FIELD (currently set to %s)""" % ( CFG_OAI_ID_FIELD, CFG_OAI_SET_FIELD, CFG_OAI_PREVIOUS_SET_FIELD ), file=sys.stderr) sys.exit(1) mode = -1 if '-d' in sys.argv[1:] or '--detailed-report' in sys.argv[1:]: mode = 2 elif '-r' in sys.argv[1:] or '--report' in sys.argv[1:]: mode = 1 if mode != -1: def local_write_message(*args): """Overload BibTask function so that it does not need to run in BibSched environment""" sys.stdout.write(args[0] + '\n') print_repository_status(local_write_message=local_write_message, verbose=mode) return task_init(authorization_action='runoairepository', authorization_msg="OAI Archive Task Submission", description="Examples:\n" " Expose records according to sets defined in OAI Repository admin interface\n" " $ oairepositoryupdater \n" " Expose records according to sets defined in OAI Repository admin interface and update them every day\n" " $ oairepositoryupdater -s24\n" " Print OAI repository status\n" " $ oairepositoryupdater -r\n" " Print OAI repository detailed status\n" " $ oairepositoryupdater -d\n\n", help_specific_usage="Options:\n" " -r --report\t\tOAI repository status\n" " -d --detailed-report\t\tOAI repository detailed status\n" " -n --no-process\tDo no upload the modifications\n" " --notimechange\tDo not update record modification_date\n" "NOTE: --notimechange should be used with care, basically only the first time a new set is added.", specific_params=("rdn", [ "report", "detailed-report", "no-process", "notimechange"]), task_submit_elaborate_specific_parameter_fnc= task_submit_elaborate_specific_parameter, task_run_fnc=oairepositoryupdater_task)
def main(): """Main function that constructs full bibtask.""" task_init(authorization_action='runbibexport', authorization_msg="BibExport Task Submission", help_specific_usage="""Export options: -w, --wjob=j1[,j2]\tRun specific exporting jobs j1, j2, etc (e.g. 'sitemap'). """, version=__revision__, specific_params=("w:", ["wjob=",]), task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter, task_submit_check_options_fnc=task_submit_check_options, task_run_fnc=task_run_core)
def main(): """Main that construct all the bibtask.""" task_init(authorization_action='runbibtaskex', authorization_msg="BibTaskEx Task Submission", help_specific_usage="""\ -n, --number Print Fibonacci numbers for up to NUM. [default=30] -e, --error Raise an error from time to time """, version=__revision__, specific_params=("n:e", ["number=", "error"]), task_submit_elaborate_specific_parameter_fnc= task_submit_elaborate_specific_parameter, task_run_fnc=task_run_core)
def main(): task_init(authorization_action='runbibcircd', authorization_msg="BibCirculation Task Submission", help_specific_usage="""-o, --overdue-letters\tCheck overdue loans and send recall emails if necessary.\n -b, --update-borrowers\tUpdate borrowers information from ldap.\n -r, --update-requests\tUpdate pending requests of users\n\n""", description="""Example: %s -u admin \n\n""" % (sys.argv[0]), specific_params=("obr", ["overdue-letters", "update-borrowers", "update-requests"]), task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter, version=__revision__, task_run_fnc = task_run_core )
def main(): """Main that construct all the bibtask.""" task_init(authorization_action='runbibtaskex', authorization_msg="BibTaskEx Task Submission", help_specific_usage="""\ -n, --number Print Fibonacci numbers for up to NUM. [default=30] -e, --error Raise an error from time to time """, version=__revision__, specific_params=("n:e", ["number=", "error"]), task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter, task_run_fnc=task_run_core)
def main(): """Constructs the bibtask.""" # Build and submit the task task_init( authorization_action='runtexkeygeneration', authorization_msg="Texkey generator task submission", description=DESCRIPTION, help_specific_usage=HELP_MESSAGE, version="Invenio v%s" % CFG_VERSION, specific_params=("", []), # task_submit_elaborate_specific_parameter_fnc=parse_option, # task_submit_check_options_fnc=check_options, task_run_fnc=task_run_core)
def main(): """Construct the bibtask.""" # Build and submit the task task_init(authorization_action='runtexkeygeneration', authorization_msg="Texkey generator task submission", description=DESCRIPTION, help_specific_usage=HELP_MESSAGE, version="Invenio v%s" % CFG_VERSION, specific_params=("", []), # task_submit_elaborate_specific_parameter_fnc=parse_option, # task_submit_check_options_fnc=check_options, task_run_fnc=task_run_core )
def main(): """Main that construct all the bibtask.""" task_init(authorization_action='rundbdump', authorization_msg="DB Dump Task Submission", help_specific_usage="""\ -o, --output=DIR Output directory. [default=%s] -n, --number=NUM Keep up to NUM previous dump files. [default=5] """ % CFG_LOGDIR, version=__revision__, specific_params=("n:o:", ["number=", "output="]), task_submit_elaborate_specific_parameter_fnc=_dbdump_elaborate_submit_param, task_run_fnc=_dbdump_run_task_core)
def main(): """Constructs the refextract bibtask.""" extra_vars = {'bibcatalog_system': BIBCATALOG_SYSTEM, 'records': []} # Build and submit the task task_init( authorization_action='runrefextract', authorization_msg="Refextract Task Submission", description=DESCRIPTION, # get the global help_message variable imported from refextract.py help_specific_usage=HELP_MESSAGE + """ Scheduled (daemon) options: -a, --new Run on all newly inserted records. -m, --modified Run on all newly modified records. -r, --recids Record id for extraction. -c, --collections Entire Collection for extraction. --arxiv All arxiv modified records within last week Special (daemon) options: --create-ticket Create a RT ticket for record references Examples: (run a daemon job) refextract -a (run on a set of records) refextract --recids 1,2 -r 3 (run on a collection) refextract --collections "Reports" (run as standalone) refextract -o /home/chayward/refs.xml /home/chayward/thesis.pdf """, version="Invenio v%s" % CFG_VERSION, specific_params=("hVv:x:r:c:nai:f:", [ "help", "version", "verbose=", "inspire", "kb-journals=", "kb-journals-re=", "kb-report-numbers=", "kb-authors=", "kb-books=", "recids=", "id=", "collections=", "new", "modified", "no-overwrite", "arxiv", "create-ticket" ]), task_submit_elaborate_specific_parameter_fnc=cb_parse_option, task_submit_check_options_fnc=check_options, task_run_fnc=task_run_core_wrapper('refextract', task_run_core, extra_vars=extra_vars, post_process=cb_submit_bibupload))
def main(): """Main body of bibtasklet.""" task_init( authorization_action='runbibtasklet', authorization_msg="BibTaskLet Task Submission", help_specific_usage="""\ -T, --tasklet Execute the specific tasklet -a, --argument Specify an argument to be passed to tasklet in the form param=value, e.g. --argument foo=bar -l, --list-tasklets List the existing tasklets """, version=__version__, specific_params=("T:a:l", ["tasklet=", "argument=", "list-tasklets"]), task_submit_elaborate_specific_parameter_fnc=( task_submit_elaborate_specific_parameter ), task_run_fnc=task_run_core, task_submit_check_options_fnc=task_submit_check_options)
def main(): """ Main that constructs all the bibtask. """ task_init(authorization_action='runbatchuploader', authorization_msg="Batch Uploader", description="""Description: The batch uploader has two different run modes. If --metadata is specified (by default) then all files in folders insert, append, correct and replace are uploaded using the corresponding mode. If mode --documents is selected all documents present in folders named append and revise are uploaded using the corresponding mode. Parent directory for batch uploader must be specified in the invenio configuration file.\n""", help_specific_usage=""" -m, --metadata\t Batch Uploader will look for metadata files in the corresponding folders -d, --documents\t Batch Uploader will look for documents in the corresponding folders """, version=__revision__, specific_params=("md:", ["metadata", "documents"]), task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter, task_run_fnc=task_run_core)
def main(): """ Main that constructs all the bibtask. """ task_init(authorization_action='runbatchuploader', authorization_msg="Batch Uploader", description="""Description: The batch uploader has two different run modes. If --metadata is specified (by default) then all files in folders insert, append, correct and replace are uploaded using the corresponding mode. If mode --documents is selected all documents present in folders named append and revise are uploaded using the corresponding mode. Parent directory for batch uploader must be specified in the invenio configuration file.\n""", help_specific_usage=""" -m, --metadata\t Batch Uploader will look for metadata files in the corresponding folders -d, --documents\t Batch Uploader will look for documents in the corresponding folders """, version=__revision__, specific_params=("md:", ["metadata", "documents"]), task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter, task_run_fnc=task_run_core)
def main(): """Main function that constructs the bibtask""" task_init(authorization_action='runbibsort', authorization_msg="BibSort Task Submission", description = "", help_specific_usage=""" Specific options: -l, --load-config Loads the configuration from bibsort.conf into the database -d, --dump-config Outputs a database dump in form of a config file -p, --print-sorting-methods Prints the available sorting methods -R, --rebalance Runs the sorting methods given in '--methods'and rebalances all the buckets. If no method is specified, the rebalance will be done for all the methods in the config file. -S, --update-sorting Runs the sorting methods given in '--methods' for the recids given in '--id'. If no method is specified, the update will be done for all the methods in the config file. If no recids are specified, the update will be done for all the records that have been modified/inserted from the last run of the sorting. If you want to run the sorting for all records, you should use the '-B' option -M, --methods=METHODS Specify the sorting methods for which the update_sorting or rebalancing will run (ex: --methods=method1,method2,method3). -i, --id=RECIDS Specify the records for which the update_sorting will run (ex: --id=1,2-56,72) """, version=__revision__, specific_params=("ldpRSM:i:", ["load-config", "dump-config", "print-sorting-methods", "rebalance", "update-sorting", "methods=", "id="]), task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter, task_run_fnc=task_run_core)
def bibclassify_daemon(): """Constructs the BibClassify bibtask.""" bibtask.task_init( authorization_action='runbibclassify', authorization_msg="BibClassify Task Submission", description="Extract keywords and create a BibUpload " "task.\nExamples:\n" " $ bibclassify\n" " $ bibclassify -i 79 -k HEP\n" " $ bibclassify -c 'Articles' -k HEP\n", help_specific_usage=" -i, --recid\t\tkeywords are extracted from " "this record\n" " -c, --collection\t\tkeywords are extracted from this collection\n" " -k, --taxonomy\t\tkeywords are based on that reference", version="Invenio BibClassify v%s" % bconfig.VERSION, specific_params=("i:c:k:f", ["recid=", "collection=", "taxonomy=", "force"]), task_submit_elaborate_specific_parameter_fnc= _task_submit_elaborate_specific_parameter, task_submit_check_options_fnc=_task_submit_check_options, task_run_fnc=_task_run_core)
def main(): """Main function that constructs full bibtask.""" if '--force-recrawling' in sys.argv: force_recrawling() print "Recrawling forced" sys.exit(1) task_init(authorization_action='runbibexport', authorization_msg="BibExport Task Submission", help_specific_usage="""Export options: -w, --wjob=j1[,j2]\tRun specific exporting jobs j1, j2, etc (e.g. 'sitemap'). --force-recrawling\tWhen using the sitemap export will force all the timestamp \tthere included to refer to correspond at least to now. In \tthis way crawlers are going to crawl all the content again. \tThis is useful in case of a major update in the detailed \tview of records. """, version=__revision__, specific_params=("w:", ["wjob="]), task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter, task_submit_check_options_fnc=task_submit_check_options, task_run_fnc=task_run_core)
def main(): """Constructs the refextract bibtask.""" # Build and submit the task task_init( authorization_action="runarxivpdfchecker", authorization_msg="Arxiv Pdf Checker Task Submission", description="""Daemon that checks if we have the latest version of arxiv PDFs""", # get the global help_message variable imported from refextract.py help_specific_usage=""" Scheduled (daemon) options: -i, --id Record id to check. Examples: (run a daemon job) arxiv-pdf-checker """, version="Invenio v%s" % CFG_VERSION, specific_params=("i:", ["id="]), task_submit_elaborate_specific_parameter_fnc=cb_parse_option, task_run_fnc=task_run_core, )
def main(): """Constructs the refextract bibtask.""" # Build and submit the task task_init( authorization_action='runarxivpdfchecker', authorization_msg="Arxiv Pdf Checker Task Submission", description= """Daemon that checks if we have the latest version of arxiv PDFs""", # get the global help_message variable imported from refextract.py help_specific_usage=""" Scheduled (daemon) options: -i, --id Record id to check. Examples: (run a daemon job) arxiv-pdf-checker """, version="Invenio v%s" % CFG_VERSION, specific_params=("i:", ["id="]), task_submit_elaborate_specific_parameter_fnc=cb_parse_option, task_run_fnc=task_run_core)
def main(): """Main that construct all the bibtask.""" from invenio.legacy.bibsched.bibtask import task_init from invenio.legacy.websearch.webcoll import ( task_submit_elaborate_specific_parameter, task_submit_check_options, task_run_core, __revision__) task_init(authorization_action="runwebcoll", authorization_msg="WebColl Task Submission", description="""Description: webcoll updates the collection cache (record universe for a given collection plus web page elements) based on invenio.conf and DB configuration parameters. If the collection name is passed as an argument, only this collection's cache will be updated. If the recursive option is set as well, the collection's descendants will also be updated.\n""", help_specific_usage=" -c, --collection\t Update cache for the given " "collection only. [all]\n" " -r, --recursive\t Update cache for the given collection and all its\n" "\t\t\t descendants (to be used in combination with -c). [no]\n" " -q, --quick\t\t Skip webpage cache update for those collections whose\n" "\t\t\t reclist was not changed. Note: if you use this option, it is advised\n" "\t\t\t to schedule, e.g. a nightly 'webcoll --force'. [no]\n" " -f, --force\t\t Force update even if cache is up to date. [no]\n" " -p, --part\t\t Update only certain cache parts (1=reclist," " 2=webpage). [both]\n" " -l, --language\t Update pages in only certain language" " (e.g. fr,it,...). [all]\n", version=__revision__, specific_params=("c:rqfp:l:", [ "collection=", "recursive", "quick", "force", "part=", "language=" ]), task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter, task_submit_check_options_fnc=task_submit_check_options, task_run_fnc=task_run_core)
def main(): """Main function that constructs full bibtask.""" if '--force-recrawling' in sys.argv: force_recrawling() print "Recrawling forced" sys.exit(1) task_init(authorization_action='runbibexport', authorization_msg="BibExport Task Submission", help_specific_usage="""Export options: -w, --wjob=j1[,j2]\tRun specific exporting jobs j1, j2, etc (e.g. 'sitemap'). --force-recrawling\tWhen using the sitemap export will force all the timestamp \tthere included to refer to correspond at least to now. In \tthis way crawlers are going to crawl all the content again. \tThis is useful in case of a major update in the detailed \tview of records. """, version=__revision__, specific_params=("w:", ["wjob="]), task_submit_elaborate_specific_parameter_fnc= task_submit_elaborate_specific_parameter, task_submit_check_options_fnc=task_submit_check_options, task_run_fnc=task_run_core)
def main(): """Main function that constructs the bibtask""" task_init(authorization_action='runbibsort', authorization_msg="BibSort Task Submission", description="", help_specific_usage=""" Specific options: -l, --load-config Loads the configuration from bibsort.conf into the database -d, --dump-config Outputs a database dump in form of a config file -p, --print-sorting-methods Prints the available sorting methods -R, --rebalance Runs the sorting methods given in '--methods'and rebalances all the buckets. If no method is specified, the rebalance will be done for all the methods in the config file. -S, --update-sorting Runs the sorting methods given in '--methods' for the recids given in '--id'. If no method is specified, the update will be done for all the methods in the config file. If no recids are specified, the update will be done for all the records that have been modified/inserted from the last run of the sorting. If you want to run the sorting for all records, you should use the '-B' option -M, --methods=METHODS Specify the sorting methods for which the update_sorting or rebalancing will run (ex: --methods=method1,method2,method3). -i, --id=RECIDS Specify the records for which the update_sorting will run (ex: --id=1,2-56,72) """, version=__revision__, specific_params=("ldpRSM:i:", [ "load-config", "dump-config", "print-sorting-methods", "rebalance", "update-sorting", "methods=", "id=" ]), task_submit_elaborate_specific_parameter_fnc= task_submit_elaborate_specific_parameter, task_run_fnc=task_run_core)
def main(): """ Initialises the task """ task_init( authorization_action='bibarchive', authorization_msg="BibArchive Task Submission", help_specific_usage="""\ -r, --record=RECID_VERSION Create an archive package of specific record -m, --mount=RECID_VERSION View a tree of a given record. Default VERSION is latest. -d, --delete=RECID_VERSION Delete the latest package for a given record. Default VERSION is latest. -D, --delete-all=RECID Delete all archive packages for a given record. --PURGE Purge all archive packages. """, version=__revision__, specific_params=("r:m:d:D:", ["mount=", "delete=", "delete-all=", "PURGE"]), task_submit_elaborate_specific_parameter_fnc=task_submit_esp, task_run_fnc = task_run_core )
def main(): """Main that construct all the bibtask.""" task_init(authorization_action='rundbdump', authorization_msg="DB Dump Task Submission", help_specific_usage="""\ -o, --output=DIR Output directory. [default=%s] -n, --number=NUM Keep up to NUM previous dump files. [default=5] --params=PARAMS Specify your own mysqldump parameters. Optional. --compress Compress dump directly into gzip. -S, --slave=HOST Perform the dump from a slave, if no host use CFG_DATABASE_SLAVE. --ignore-tables=regex Ignore tables matching the given regular expression --disable-workers Disable any task queue workers while dumping. Examples: $ dbdump --ignore-tables '^(idx|rnk)' $ dbdump -n3 -o/tmp -s1d -L 02:00-04:00 """ % CFG_LOGDIR, specific_params=("n:o:p:S:", ["number=", "output=", "params=", "slave=", "compress", 'ignore-tables=', "dump-on-slave-helper", "disable-workers"]), task_submit_elaborate_specific_parameter_fnc=_dbdump_elaborate_submit_param, task_run_fnc=_dbdump_run_task_core)
def main(): """Main that construct all the bibtask.""" from invenio.legacy.bibsched.bibtask import task_init from invenio.legacy.websearch.webcoll import ( task_submit_elaborate_specific_parameter, task_submit_check_options, task_run_core, __revision__) task_init( authorization_action="runwebcoll", authorization_msg="WebColl Task Submission", description="""Description: webcoll updates the collection cache (record universe for a given collection plus web page elements) based on invenio.conf and DB configuration parameters. If the collection name is passed as an argument, only this collection's cache will be updated. If the recursive option is set as well, the collection's descendants will also be updated.\n""", help_specific_usage=" -c, --collection\t Update cache for the given " "collection only. [all]\n" " -r, --recursive\t Update cache for the given collection and all its\n" "\t\t\t descendants (to be used in combination with -c). [no]\n" " -q, --quick\t\t Skip webpage cache update for those collections whose\n" "\t\t\t reclist was not changed. Note: if you use this option, it is advised\n" "\t\t\t to schedule, e.g. a nightly 'webcoll --force'. [no]\n" " -f, --force\t\t Force update even if cache is up to date. [no]\n" " -p, --part\t\t Update only certain cache parts (1=reclist," " 2=webpage). [both]\n" " -l, --language\t Update pages in only certain language" " (e.g. fr,it,...). [all]\n", version=__revision__, specific_params=("c:rqfp:l:", [ "collection=", "recursive", "quick", "force", "part=", "language=" ]), task_submit_elaborate_specific_parameter_fnc= task_submit_elaborate_specific_parameter, task_submit_check_options_fnc=task_submit_check_options, task_run_fnc=task_run_core)
def main(): """Constructs the BibCheck bibtask.""" usage = """ Scheduled (daemon) options: -l, --list-plugins List all plugins and exit -r, --list-rules List all rules and exit -e, --enable-rules=rules Enable only some rules (comma separated) -a, --all=rules Run the specified rules in all matching records (not only modified ones) -i, --id=ids Run only in the specified record ids or ranges (comma separated), ignoring all other filters -q, --queue=queue Create tickets in the specified RT Queue (Default Bibcheck) -t, --no-tickets Don't create any ticket in RT. Useful for debugging -b, --no-upload Don't upload changes to the database -n, --dry-run Like --no-tickets and --no-upload -c, --config By default bibcheck reads the file rules.cfg. This allows to specify a different config file If any of the options --id, --no-tickets, --no-upload or --dry-run is enabled, bibcheck won't update the last-run-time of a task in the database. Examples: (run a periodical daemon job that checks the rules from rules.cfg) bibcheck -s1d (Run bibcheck on records 1, 2, 3, 5, 6, 7, 8, 9 and 10) bibcheck -i 1,2,3,5-10 (Run only the rule foobar in all the records) bibcheck -a foobar -e foobar (Run only the rules foo and bar on modified records) bibcheck -e foo,bar """ try: opts = getopt.getopt(sys.argv[1:], "lr", ["list-plugins", "list-rules"])[0] except getopt.GetoptError: opts = [] for opt, dummy in opts: if opt in ["-l", "--list-plugins"]: print_plugins() return elif opt in ["-r", "--list-rules"]: print_rules() return # Build and submit the task task_init(authorization_action='runbibcheck', authorization_msg="BibCheck Task Submission", description="", help_specific_usage=usage, version="Invenio v%s" % CFG_VERSION, specific_params=("hvtbnV:e:a:i:q:c:", ["help", "version", "verbose=", "enable-rules=", "all=", "id=", "queue=", "no-tickets", "no-upload", "dry-run", "config"]), task_submit_elaborate_specific_parameter_fnc=task_parse_options, task_run_fnc=task_run_core)
def main(): """Main that construct all the bibtask.""" task_init(authorization_action='runbibencode', authorization_msg="Bibencode Task Submission", help_specific_usage=( """ General options: -m, --mode= Selects the mode for BibEncode Modes: 'meta', 'encode', 'extract', 'daemon', 'batch' -i, --input= Input file -o, --output= Output file Options for mode 'meta': -D, --dump= Dumps metadata from a video to a file Options: "ffprobe", "mediainfo", "pbcore" -W, --write= Write metadata to a copy of the file Either a filename or a serialized JSON object. Options for mode 'encode' -p Profile to use for encoding --acodec= Audiocodec for the transcoded video --vcodec= Videocodec for the transcoded video --abitrate= Bitrate auf the audio stream --vbitrate= Bitrate of the video stream --resolution= Resolution of the transcoded video --passes= Number of passes --special= Pure FFmpeg options that will be appended to the command --specialfirst= Pure FFmpeg options for the first pass --specialsecond= Pure FFmpeg options for the second pass --width= Horizontal resolution --height= Vertical resolution --aspect= Aspect ratio fallback if undetectable Options for mode 'extract': -p Profile to use for frame extraction --resolution= Resolution of the extracted frame(s) --number= Number of frames to extract --positions= Specific positions inside the video to extract from Python list notation Either in seconds like '10' or '10.5' Or as a timecode like '00:00:10.5' Example:'[10, 10.5, 00:00:12.5, 20, 00:08:45:11.26]' -o, --output= Output filename can be substituted by bibencode: %(input)s for the input filename %(timecode)s for the timecode %(size)s for the frame size %(number)d for sequential numbers --width= Horizontal resolution --height= Vertical resolution --aspect= Aspect ratio fallback if undetectable Options for mode 'batch': --collection= Updates the whole collection acc. to a batch template --search= Updates all records matching the search query Options for mode 'daemon': --newjobdir= Optional folder to look for new job descriptions --oldjobdir= Optional folder to move the job desc. of done jobs """ ), version=__revision__, specific_params=("m:i:o:p:W:D:", [ "mode=", "input=", "output=", "write=", "dump=", "acodec=", "vcodec=", "abitrate=", "vbitrate=", "resolution=", "passes=", "special=", "specialfirst=", "specialsecond=", "height=", "width=", "number=", "positions=", "substitute", "newjobdir=", "oldjobdir=", "recid=", "aspect=", "collection=", "search=" ]), task_submit_elaborate_specific_parameter_fnc= \ task_submit_elaborate_specific_parameter, task_submit_check_options_fnc=task_submit_check_options, task_run_fnc=task_run_core)
def main(): """Constructs the BibCatalog bibtask.""" usage = """ Non-daemon options: -l, --list-tickets List available tickets. Scheduled (daemon) options: Selection of records (Required): -a, --new Run on all newly inserted records. -m, --modified Run on all newly modified records. -i, --recids= Record id for extraction. -c, --collections= Run on all records in a specific collection. -q, --query= Specify a search query to fetch records to run on. -r, --reportnumbers= Run on all records related with specific arXiv ids. Selection of tickets (Required): --tickets= Specify which tickets to run. --all-tickets Run on all tickets Examples: (run a periodical daemon job on a given ticket template) bibcatalog -a --tickets metadata_curation -s1h (run all tickets on a set of records) bibcatalog --recids 1,2 -i 3 --all-tickets (run some tickets on a collection) bibcatalog --collections "Articles" --tickets metadata_curation,reference_curation """ try: opts, dummy = getopt.getopt(sys.argv[1:], "l", ["list-tickets"]) except getopt.GetoptError: opts = [] for opt, dummy in opts: if opt in ["-l", "--list-tickets"]: all_plugins, error_messages = load_ticket_plugins() if error_messages: # We got broken plugins. We alert only for now. print >>sys.stderr, "\n".join(error_messages) print "Enabled tickets:" for plugin in all_plugins.get_enabled_plugins(): print " " + plugin print "Run `$ bibcatalog --tickets=<ticket-name>` to select a ticket template." return # Build and submit the task task_init(authorization_action='runbibcatalog', authorization_msg="BibCatalog Task Submission", description="", help_specific_usage=usage, version="Invenio v%s" % CFG_VERSION, specific_params=("hVv:i:c:q:r:am", ["help", "version", "verbose=", "recids=", "collections=", "query=", "reportnumbers=", "new", "modified", "tickets=", "all-tickets"]), task_submit_elaborate_specific_parameter_fnc=task_parse_options, task_submit_check_options_fnc=task_check_options, task_run_fnc=task_run_core)
def main(): """Main that construct all the bibtask.""" task_init( authorization_action='runbibformat', authorization_msg="BibReformat Task Submission", description=""" BibReformat formats the records and saves the produced outputs for later retrieval. BibReformat is usually run periodically via BibSched in order to (1) format new records in the database and to (2) reformat records for which the meta data has been modified. BibReformat has to be run manually when (3) format config files have been modified, in order to see the changes in the web interface. Although it is not necessary to run BibReformat to display formatted records in the web interface, BibReformat allows to improve serving speed by precreating the outputs. It is suggested to run BibReformat for 'HB' output. Option -m cannot be used at the same time as option -c. Option -c prevents from finding records in private collections. Examples: bibreformat Format all new or modified records (in HB and RECJSON). bibreformat -o HD Format all new or modified records in HD. bibreformat -o HD,HB Format all new or modified records in HD and HB. bibreformat -a Force reformatting all records (in HB). bibreformat -c 'Photos' Force reformatting all records in 'Photos' collection (in HB). bibreformat -c 'Photos' -o HD Force reformatting all records in 'Photos' collection in HD. bibreformat -i 15 Force reformatting record 15 (in HB). bibreformat -i 15:20 Force reformatting records 15 to 20 (in HB). bibreformat -i 15,16,17 Force reformatting records 15, 16 and 17 (in HB). bibreformat -n Show how many records are to be (re)formatted. bibreformat -n -c 'Articles' Show how many records are to be (re)formatted in 'Articles' collection. bibreformat -oHB -s1h Format all new and modified records every hour, in HB. """, help_specific_usage= """ -o, --formats \t Specify output format/s (default HB) -n, --noprocess \t Count records to be formatted (no processing done) Reformatting options: -a, --all \t Force reformatting all records -c, --collection \t Force reformatting records by collection -f, --field \t Force reformatting records by field -p, --pattern \t Force reformatting records by pattern -i, --id \t Force reformatting records by record id(s) --no-missing \t Ignore reformatting records without format Pattern options: -m, --matching \t Specify if pattern is exact (e), regular expression (r), \t partial (p), any of the words (o) or all of the words (a) """, version=__revision__, specific_params=("ac:f:p:lo:nm:i:", [ "all", "collection=", "matching=", "field=", "pattern=", "format=", "noprocess", "id=", "no-missing" ]), task_submit_check_options_fnc=task_submit_check_options, task_submit_elaborate_specific_parameter_fnc= task_submit_elaborate_specific_parameter, task_run_fnc=task_run_core)
def main(): """Main that construct all the bibtask.""" task_init(authorization_action='runbibencode', authorization_msg="Bibencode Task Submission", help_specific_usage=( """ General options: -m, --mode= Selects the mode for BibEncode Modes: 'meta', 'encode', 'extract', 'daemon', 'batch' -i, --input= Input file -o, --output= Output file Options for mode 'meta': -D, --dump= Dumps metadata from a video to a file Options: "ffprobe", "mediainfo", "pbcore" -W, --write= Write metadata to a copy of the file Either a filename or a serialized JSON object. Options for mode 'encode' -p Profile to use for encoding --acodec= Audiocodec for the transcoded video --vcodec= Videocodec for the transcoded video --abitrate= Bitrate auf the audio stream --vbitrate= Bitrate of the video stream --resolution= Resolution of the transcoded video --passes= Number of passes --special= Pure FFmpeg options that will be appended to the command --specialfirst= Pure FFmpeg options for the first pass --specialsecond= Pure FFmpeg options for the second pass --width= Horizontal resolution --height= Vertical resolution --aspect= Aspect ratio fallback if undetectable Options for mode 'extract': -p Profile to use for frame extraction --resolution= Resolution of the extracted frame(s) --number= Number of frames to extract --positions= Specific positions inside the video to extract from Python list notation Either in seconds like '10' or '10.5' Or as a timecode like '00:00:10.5' Example:'[10, 10.5, 00:00:12.5, 20, 00:08:45:11.26]' -o, --output= Output filename can be substituted by bibencode: %(input)s for the input filename %(timecode)s for the timecode %(size)s for the frame size %(number)d for sequential numbers --width= Horizontal resolution --height= Vertical resolution --aspect= Aspect ratio fallback if undetectable Options for mode 'batch': --collection= Updates the whole collection acc. to a batch template --search= Updates all records matching the search query Options for mode 'daemon': --newjobdir= Optional folder to look for new job descriptions --oldjobdir= Optional folder to move the job desc. of done jobs """ ), version=__revision__, specific_params=("m:i:o:p:W:D:", [ "mode=", "input=", "output=", "write=", "dump=", "acodec=", "vcodec=", "abitrate=", "vbitrate=", "resolution=", "passes=", "special=", "specialfirst=", "specialsecond=", "height=", "width=", "number=", "positions=", "substitute", "newjobdir=", "oldjobdir=", "recid=", "aspect=", "collection=", "search=" ]), task_submit_elaborate_specific_parameter_fnc= \ task_submit_elaborate_specific_parameter, task_submit_check_options_fnc=task_submit_check_options, task_run_fnc=task_run_core)
def main(): """Start the tool. If the command line arguments are those of the 'manual' mode, then starts a manual one-time harvesting. Else trigger a BibSched task for automated harvesting based on the OAIHarvest admin settings. """ # Let's try to parse the arguments as used in manual harvesting: try: opts, args = getopt.getopt(sys.argv[1:], "o:v:m:p:i:s:f:u:r:c:k:l:w:", ["output=", "verb=", "method=", "metadataPrefix=", "identifier=", "set=", "from=", "until=", "resumptionToken=", "certificate=", "key=", "user="******"password="******"workflow=", ]) # So everything went smoothly: start harvesting in manual mode if len([opt for opt, opt_value in opts if opt in ['-v', '--verb']]) > 0: # verb parameter is given http_param_dict = {} method = "POST" output = "" user = None password = None cert_file = None key_file = None sets = [] # get options and arguments for opt, opt_value in opts: if opt in ["-v", "--verb"]: http_param_dict['verb'] = opt_value elif opt in ["-m", '--method']: if opt_value == "GET" or opt_value == "POST": method = opt_value elif opt in ["-p", "--metadataPrefix"]: http_param_dict['metadataPrefix'] = opt_value elif opt in ["-i", "--identifier"]: http_param_dict['identifier'] = opt_value elif opt in ["-s", "--set"]: sets = opt_value.split() elif opt in ["-f", "--from"]: http_param_dict['from'] = opt_value elif opt in ["-u", "--until"]: http_param_dict['until'] = opt_value elif opt in ["-r", "--resumptionToken"]: http_param_dict['resumptionToken'] = opt_value elif opt in ["-o", "--output"]: output = opt_value elif opt in ["-c", "--certificate"]: cert_file = opt_value elif opt in ["-k", "--key"]: key_file = opt_value elif opt in ["-l", "--user"]: user = opt_value elif opt in ["-w", "--password"]: password = opt_value elif opt in ["-V", "--version"]: print(__revision__) sys.exit(0) else: usage(1, "Option %s is not allowed" % opt) if len(args) > 0: base_url = args[-1] if not base_url.lower().startswith('http'): base_url = 'http://' + base_url (addressing_scheme, network_location, path, dummy1, dummy2, dummy3) = urllib.parse.urlparse(base_url) secure = (addressing_scheme == "https") if (cert_file and not key_file) or \ (key_file and not cert_file): # Both are needed if one specified usage(1, "You must specify both certificate and key files") if password and not user: # User must be specified when password is given usage(1, "You must specify a username") elif user and not password: if not secure: sys.stderr.write( "*WARNING* Your password will be sent in clear!\n") try: password = getpass.getpass() except KeyboardInterrupt as error: sys.stderr.write("\n%s\n" % (error,)) sys.exit(0) getter.harvest(network_location, path, http_param_dict, method, output, sets, secure, user, password, cert_file, key_file) sys.stderr.write("Harvesting completed at: %s\n\n" % time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime())) return else: usage(1, "You must specify the URL to harvest") else: # verb is not given. We will continue with periodic # harvesting. But first check if URL parameter is given: # if it is, then warn directly now if len([opt for opt, opt_value in opts if opt in ['-i', '--identifier']]) == 0 \ and len(args) > 1 or \ (len(args) == 1 and not args[0].isdigit()): usage(1, "You must specify the --verb parameter") except getopt.error: # So could it be that we are using different arguments? Try to # start the BibSched task (automated harvesting) and see if it # validates pass # BibSched mode - periodical harvesting # Note that the 'help' is common to both manual and automated # mode. num_of_critical_parameter = 0 num_of_critical_parameterb = 0 repositories = [] for opt in sys.argv[1:]: if opt in "-r" or opt in "--repository": num_of_critical_parameter += 1 elif opt in "--workflow": num_of_critical_parameterb += 1 if num_of_critical_parameter > 1 or num_of_critical_parameterb > 1: usage(1, "You can't specify twice -r or --workflow") if num_of_critical_parameter == 1: if "-r" in sys.argv: position = sys.argv.index("-r") else: position = sys.argv.index("--repository") repositories = sys.argv[position + 1].split(",") if len(repositories) > 1 and \ ("-i" in sys.argv or "--identifier" in sys.argv): usage(1, "It is impossible to harvest an identifier from several " "repositories.") if num_of_critical_parameterb == 1: position = sys.argv.index("--workflow") workflows = sys.argv[position + 1].split(",") for workflow_candidate in workflows: if workflow_candidate not in registry_workflows: usage(1, "The workflow %s doesn't exist." % workflow_candidate) if num_of_critical_parameter == 1 and num_of_critical_parameterb == 0: for name_repository in repositories: try: oaiharvest_instance = OaiHARVEST.get( OaiHARVEST.name == name_repository).one() if oaiharvest_instance.workflows not in registry_workflows: usage(1, "The repository %s doesn't have a valid workflow specified." % name_repository) except orm.exc.NoResultFound: usage(1, "The repository %s doesn't exist in our database." % name_repository) elif num_of_critical_parameter == 1 and num_of_critical_parameterb == 1: for name_repository in repositories: try: OaiHARVEST.get(OaiHARVEST.name == name_repository).one() except orm.exc.NoResultFound: usage(1, "The repository %s doesn't exist in our database." % name_repository) print("A workflow has been specified, overriding the repository one.") task_set_option("repository", None) task_set_option("dates", None) task_set_option("workflow", None) task_set_option("identifiers", None) task_init(authorization_action='runoaiharvest', authorization_msg="oaiharvest Task Submission", description=""" Harvest records from OAI sources. Manual vs automatic harvesting: - Manual harvesting retrieves records from the specified URL, with the specified OAI arguments. Harvested records are displayed on the standard output or saved to a file, but are not integrated into the repository. This mode is useful to 'play' with OAI repositories or to build special harvesting scripts. - Automatic harvesting relies on the settings defined in the OAI Harvest admin interface to periodically retrieve the repositories and sets to harvest. It also take care of harvesting only new or modified records. Records harvested using this mode are converted and integrated into the repository, according to the settings defined in the OAI Harvest admin interface. Examples: Manual (single-shot) harvesting mode: Save to /tmp/z.xml records from CDS added/modified between 2004-04-01 and 2004-04-02, in MARCXML: $ oaiharvest -vListRecords -f2004-04-01 -u2004-04-02 -pmarcxml -o/tmp/z.xml http://cds.cern.ch/oai2d Automatic (periodical) harvesting mode: Schedule daily harvesting of all repositories defined in OAIHarvest admin: $ oaiharvest -s 24h Schedule daily harvesting of repository 'arxiv', defined in OAIHarvest admin: $ oaiharvest -r arxiv -s 24h Harvest in 10 minutes from 'pubmed' repository records added/modified between 2005-05-05 and 2005-05-10: $ oaiharvest -r pubmed -d 2005-05-05:2005-05-10 -t 10m """, help_specific_usage='Manual single-shot harvesting mode:\n' ' -o, --output specify output file\n' ' -v, --verb OAI verb to be executed\n' ' -m, --method http method (default POST)\n' ' -p, --metadataPrefix metadata format\n' ' -i, --identifier OAI identifier\n' ' -s, --set OAI set(s). Whitespace-separated list\n' ' -r, --resuptionToken Resume previous harvest\n' ' -f, --from from date (datestamp)\n' ' -u, --until until date (datestamp)\n' ' -c, --certificate path to public certificate (in case of certificate-based harvesting)\n' ' -k, --key path to private key (in case of certificate-based harvesting)\n' ' -l, --user username (in case of password-protected harvesting)\n' ' -w, --password password (in case of password-protected harvesting)\n' 'Deamon mode (periodical or one-shot harvesting mode):\n' ' -r, --repository="repo A"[,"repo B"] \t which repositories to harvest (default=all)\n' ' -d, --dates=yyyy-mm-dd:yyyy-mm-dd \t reharvest given dates only\n' ' -i, --identifier OAI identifier if wished to run in as a task.\n' ' --notify-email-to Receive notifications on given email on successful upload and/or finished harvest.\n' ' --workflow specify the workflow to execute.\n' ' --create-ticket-in Provide desired ticketing queue to create a ticket in it on upload and/or finished harvest.\n' ' Requires a configured ticketing system (BibCatalog).\n', specific_params=( "r:i:d:W", ["repository=", "identifier=", "dates=", "workflow=", "notify-email-to=", "create-ticket-in="]), task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter, task_run_fnc=task_run_core)
def main(): """Main that construct all the bibtask.""" task_init(authorization_action='runbibformat', authorization_msg="BibReformat Task Submission", description=""" BibReformat formats the records and saves the produced outputs for later retrieval. BibReformat is usually run periodically via BibSched in order to (1) format new records in the database and to (2) reformat records for which the meta data has been modified. BibReformat has to be run manually when (3) format config files have been modified, in order to see the changes in the web interface. Although it is not necessary to run BibReformat to display formatted records in the web interface, BibReformat allows to improve serving speed by precreating the outputs. It is suggested to run BibReformat for 'HB' output. Option -m cannot be used at the same time as option -c. Option -c prevents from finding records in private collections. Examples: bibreformat Format all new or modified records (in HB and RECJSON). bibreformat -o HD Format all new or modified records in HD. bibreformat -o HD,HB Format all new or modified records in HD and HB. bibreformat -a Force reformatting all records (in HB). bibreformat -c 'Photos' Force reformatting all records in 'Photos' collection (in HB). bibreformat -c 'Photos' -o HD Force reformatting all records in 'Photos' collection in HD. bibreformat -i 15 Force reformatting record 15 (in HB). bibreformat -i 15:20 Force reformatting records 15 to 20 (in HB). bibreformat -i 15,16,17 Force reformatting records 15, 16 and 17 (in HB). bibreformat -n Show how many records are to be (re)formatted. bibreformat -n -c 'Articles' Show how many records are to be (re)formatted in 'Articles' collection. bibreformat -oHB -s1h Format all new and modified records every hour, in HB. """, help_specific_usage=""" -o, --formats \t Specify output format/s (default HB) -n, --noprocess \t Count records to be formatted (no processing done) Reformatting options: -a, --all \t Force reformatting all records -c, --collection \t Force reformatting records by collection -f, --field \t Force reformatting records by field -p, --pattern \t Force reformatting records by pattern -i, --id \t Force reformatting records by record id(s) --no-missing \t Ignore reformatting records without format Pattern options: -m, --matching \t Specify if pattern is exact (e), regular expression (r), \t partial (p), any of the words (o) or all of the words (a) """, version=__revision__, specific_params=("ac:f:p:lo:nm:i:", ["all", "collection=", "matching=", "field=", "pattern=", "format=", "noprocess", "id=", "no-missing"]), task_submit_check_options_fnc=task_submit_check_options, task_submit_elaborate_specific_parameter_fnc= task_submit_elaborate_specific_parameter, task_run_fnc=task_run_core)
def main(): """Constructs the BibCatalog bibtask.""" usage = """ Non-daemon options: -l, --list-tickets List available tickets. Scheduled (daemon) options: Selection of records (Required): -a, --new Run on all newly inserted records. -m, --modified Run on all newly modified records. -i, --recids= Record id for extraction. -c, --collections= Run on all records in a specific collection. -q, --query= Specify a search query to fetch records to run on. -r, --reportnumbers= Run on all records related with specific arXiv ids. Selection of tickets (Required): --tickets= Specify which tickets to run. --all-tickets Run on all tickets Examples: (run a periodical daemon job on a given ticket template) bibcatalog -a --tickets metadata_curation -s1h (run all tickets on a set of records) bibcatalog --recids 1,2 -i 3 --all-tickets (run some tickets on a collection) bibcatalog --collections "Articles" --tickets metadata_curation,reference_curation """ try: opts, dummy = getopt.getopt(sys.argv[1:], "l", ["list-tickets"]) except getopt.GetoptError: opts = [] for opt, dummy in opts: if opt in ["-l", "--list-tickets"]: all_plugins, error_messages = load_ticket_plugins() if error_messages: # We got broken plugins. We alert only for now. print >>sys.stderr, "\n".join(error_messages) print "Enabled tickets:" for plugin in all_plugins.get_enabled_plugins(): print " " + plugin print "Run `$ bibcatalog --tickets=<ticket-name>` to select a ticket template." return # Build and submit the task task_init(authorization_action='runbibcatalog', authorization_msg="BibCatalog Task Submission", description="", help_specific_usage=usage, version="Invenio v%s" % CFG_VERSION, specific_params=("hVv:i:c:q:r:am", ["help", "version", "verbose=", "recids=", "collections=", "query=", "reportnumbers=", "new", "modified", "tickets=", "all-tickets"]), task_submit_elaborate_specific_parameter_fnc=task_parse_options, task_submit_check_options_fnc=task_check_options, task_run_fnc=task_run_core)
def main(): """Main that construct all the bibtask.""" task_init(authorization_action='runbibrank', authorization_msg="BibRank Task Submission", description="""Ranking examples: bibrank -wjif -a --id=0-30000,30001-860000 --verbose=9 bibrank -wjif -d --modified='2002-10-27 13:57:26' bibrank -wjif --rebalance --collection=Articles bibrank -wsbr -a -i 234-250,293,300-500 -u admin bibrank -u admin -w citation -E 10 bibrank -u admin -w citation -A """, help_specific_usage="""Ranking options: -w, --run=r1[,r2] runs each rank method in the order given -c, --collection=c1[,c2] select according to collection -i, --id=low[-high] select according to doc recID -m, --modified=from[,to] select according to modification date -l, --lastupdate select according to last update -a, --add add or update words for selected records -d, --del delete words for selected records -S, --stat show statistics for a method -R, --recalculate recalculate weight data, used by word frequency and citation methods, should be used if ca 1% of the documents have been changed since last time -R was used. NOTE: This will replace the entire set of weights, regardless of date/id selection. -E, --extcites=NUM print the top entries of the external cites table. These are entries that should be entered in your collection, since they have been cited by NUM or more other records present in the system. Useful for cataloguers to input external papers manually. -A --author-citations Calculate author citations. Repairing options: -k, --check check consistency for all records in the table(s) check if update of ranking data is necessary -r, --repair try to repair all records in the table(s) """, version=__revision__, specific_params=("AE:ladSi:m:c:kUrRM:f:w:", [ "author-citations", "print-extcites=", "lastupdate", "add", "del", "repair", "maxmem", "flush", "stat", "rebalance", "id=", "collection=", "check", "modified=", "update", "run="]), task_submit_elaborate_specific_parameter_fnc= task_submit_elaborate_specific_parameter, task_run_fnc=task_run_core)
def main(): """Start the tool. If the command line arguments are those of the 'manual' mode, then starts a manual one-time harvesting. Else trigger a BibSched task for automated harvesting based on the OAIHarvest admin settings. """ # Let's try to parse the arguments as used in manual harvesting: try: opts, args = getopt.getopt(sys.argv[1:], "o:v:m:p:i:s:f:u:r:c:k:l:w:", [ "output=", "verb=", "method=", "metadataPrefix=", "identifier=", "set=", "from=", "until=", "resumptionToken=", "certificate=", "key=", "user="******"password="******"workflow=", ]) # So everything went smoothly: start harvesting in manual mode if len([opt for opt, opt_value in opts if opt in ['-v', '--verb']]) > 0: # verb parameter is given http_param_dict = {} method = "POST" output = "" user = None password = None cert_file = None key_file = None sets = [] # get options and arguments for opt, opt_value in opts: if opt in ["-v", "--verb"]: http_param_dict['verb'] = opt_value elif opt in ["-m", '--method']: if opt_value == "GET" or opt_value == "POST": method = opt_value elif opt in ["-p", "--metadataPrefix"]: http_param_dict['metadataPrefix'] = opt_value elif opt in ["-i", "--identifier"]: http_param_dict['identifier'] = opt_value elif opt in ["-s", "--set"]: sets = opt_value.split() elif opt in ["-f", "--from"]: http_param_dict['from'] = opt_value elif opt in ["-u", "--until"]: http_param_dict['until'] = opt_value elif opt in ["-r", "--resumptionToken"]: http_param_dict['resumptionToken'] = opt_value elif opt in ["-o", "--output"]: output = opt_value elif opt in ["-c", "--certificate"]: cert_file = opt_value elif opt in ["-k", "--key"]: key_file = opt_value elif opt in ["-l", "--user"]: user = opt_value elif opt in ["-w", "--password"]: password = opt_value elif opt in ["-V", "--version"]: print(__revision__) sys.exit(0) else: usage(1, "Option %s is not allowed" % opt) if len(args) > 0: base_url = args[-1] if not base_url.lower().startswith('http'): base_url = 'http://' + base_url (addressing_scheme, network_location, path, dummy1, dummy2, dummy3) = urllib.parse.urlparse(base_url) secure = (addressing_scheme == "https") if (cert_file and not key_file) or \ (key_file and not cert_file): # Both are needed if one specified usage(1, "You must specify both certificate and key files") if password and not user: # User must be specified when password is given usage(1, "You must specify a username") elif user and not password: if not secure: sys.stderr.write( "*WARNING* Your password will be sent in clear!\n") try: password = getpass.getpass() except KeyboardInterrupt as error: sys.stderr.write("\n%s\n" % (error, )) sys.exit(0) getter.harvest(network_location, path, http_param_dict, method, output, sets, secure, user, password, cert_file, key_file) sys.stderr.write( "Harvesting completed at: %s\n\n" % time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime())) return else: usage(1, "You must specify the URL to harvest") else: # verb is not given. We will continue with periodic # harvesting. But first check if URL parameter is given: # if it is, then warn directly now if len([opt for opt, opt_value in opts if opt in ['-i', '--identifier']]) == 0 \ and len(args) > 1 or \ (len(args) == 1 and not args[0].isdigit()): usage(1, "You must specify the --verb parameter") except getopt.error: # So could it be that we are using different arguments? Try to # start the BibSched task (automated harvesting) and see if it # validates pass # BibSched mode - periodical harvesting # Note that the 'help' is common to both manual and automated # mode. num_of_critical_parameter = 0 num_of_critical_parameterb = 0 repositories = [] for opt in sys.argv[1:]: if opt in "-r" or opt in "--repository": num_of_critical_parameter += 1 elif opt in "--workflow": num_of_critical_parameterb += 1 if num_of_critical_parameter > 1 or num_of_critical_parameterb > 1: usage(1, "You can't specify twice -r or --workflow") if num_of_critical_parameter == 1: if "-r" in sys.argv: position = sys.argv.index("-r") else: position = sys.argv.index("--repository") repositories = sys.argv[position + 1].split(",") if len(repositories) > 1 and \ ("-i" in sys.argv or "--identifier" in sys.argv): usage( 1, "It is impossible to harvest an identifier from several " "repositories.") if num_of_critical_parameterb == 1: position = sys.argv.index("--workflow") workflows = sys.argv[position + 1].split(",") for workflow_candidate in workflows: if workflow_candidate not in registry_workflows: usage(1, "The workflow %s doesn't exist." % workflow_candidate) if num_of_critical_parameter == 1 and num_of_critical_parameterb == 0: for name_repository in repositories: try: oaiharvest_instance = OaiHARVEST.get( OaiHARVEST.name == name_repository).one() if oaiharvest_instance.workflows not in registry_workflows: usage( 1, "The repository %s doesn't have a valid workflow specified." % name_repository) except orm.exc.NoResultFound: usage( 1, "The repository %s doesn't exist in our database." % name_repository) elif num_of_critical_parameter == 1 and num_of_critical_parameterb == 1: for name_repository in repositories: try: OaiHARVEST.get(OaiHARVEST.name == name_repository).one() except orm.exc.NoResultFound: usage( 1, "The repository %s doesn't exist in our database." % name_repository) print("A workflow has been specified, overriding the repository one.") task_set_option("repository", None) task_set_option("dates", None) task_set_option("workflow", None) task_set_option("identifiers", None) task_init( authorization_action='runoaiharvest', authorization_msg="oaiharvest Task Submission", description=""" Harvest records from OAI sources. Manual vs automatic harvesting: - Manual harvesting retrieves records from the specified URL, with the specified OAI arguments. Harvested records are displayed on the standard output or saved to a file, but are not integrated into the repository. This mode is useful to 'play' with OAI repositories or to build special harvesting scripts. - Automatic harvesting relies on the settings defined in the OAI Harvest admin interface to periodically retrieve the repositories and sets to harvest. It also take care of harvesting only new or modified records. Records harvested using this mode are converted and integrated into the repository, according to the settings defined in the OAI Harvest admin interface. Examples: Manual (single-shot) harvesting mode: Save to /tmp/z.xml records from CDS added/modified between 2004-04-01 and 2004-04-02, in MARCXML: $ oaiharvest -vListRecords -f2004-04-01 -u2004-04-02 -pmarcxml -o/tmp/z.xml http://cds.cern.ch/oai2d Automatic (periodical) harvesting mode: Schedule daily harvesting of all repositories defined in OAIHarvest admin: $ oaiharvest -s 24h Schedule daily harvesting of repository 'arxiv', defined in OAIHarvest admin: $ oaiharvest -r arxiv -s 24h Harvest in 10 minutes from 'pubmed' repository records added/modified between 2005-05-05 and 2005-05-10: $ oaiharvest -r pubmed -d 2005-05-05:2005-05-10 -t 10m """, help_specific_usage='Manual single-shot harvesting mode:\n' ' -o, --output specify output file\n' ' -v, --verb OAI verb to be executed\n' ' -m, --method http method (default POST)\n' ' -p, --metadataPrefix metadata format\n' ' -i, --identifier OAI identifier\n' ' -s, --set OAI set(s). Whitespace-separated list\n' ' -r, --resuptionToken Resume previous harvest\n' ' -f, --from from date (datestamp)\n' ' -u, --until until date (datestamp)\n' ' -c, --certificate path to public certificate (in case of certificate-based harvesting)\n' ' -k, --key path to private key (in case of certificate-based harvesting)\n' ' -l, --user username (in case of password-protected harvesting)\n' ' -w, --password password (in case of password-protected harvesting)\n' 'Deamon mode (periodical or one-shot harvesting mode):\n' ' -r, --repository="repo A"[,"repo B"] \t which repositories to harvest (default=all)\n' ' -d, --dates=yyyy-mm-dd:yyyy-mm-dd \t reharvest given dates only\n' ' -i, --identifier OAI identifier if wished to run in as a task.\n' ' --notify-email-to Receive notifications on given email on successful upload and/or finished harvest.\n' ' --workflow specify the workflow to execute.\n' ' --create-ticket-in Provide desired ticketing queue to create a ticket in it on upload and/or finished harvest.\n' ' Requires a configured ticketing system (BibCatalog).\n', specific_params=("r:i:d:W", [ "repository=", "identifier=", "dates=", "workflow=", "notify-email-to=", "create-ticket-in=" ]), task_submit_elaborate_specific_parameter_fnc= task_submit_elaborate_specific_parameter, task_run_fnc=task_run_core)
def bibauthorid_daemon(): """Constructs the Bibauthorid bibtask.""" bibtask.task_init(authorization_action='runbibclassify', authorization_msg="Bibauthorid Task Submission", description=""" Purpose: Disambiguate Authors and find their identities. Examples: - Process all records that hold an author with last name 'Ellis': $ bibauthorid -u admin --update-personid --all-records - Disambiguate all records on a fresh installation $ bibauthorid -u admin --disambiguate --from-scratch """, help_specific_usage=""" bibauthorid [COMMAND] [OPTIONS] COMMAND You can choose only one from the following: --update-personid Updates personid adding not yet assigned papers to the system, in a fast, best effort basis. Cleans the table from stale records. --disambiguate Disambiguates all signatures in the database using the tortoise/wedge algorithm. This usually takes a LOT of time so the results are stored in a special table. Use --merge to use the results. --merge Updates the personid tables with the results from the --disambiguate algorithm. OPTIONS Options for update personid (default) Will update only the modified records since last run. -i, --record-ids Force the procedure to work only on the specified records. This option is exclusive with --all-records. --all-records Force the procedure to work on all records. This option is exclusive with --record-ids. Options for disambiguate (default) Performs full disambiguation of all records in the current personid tables with respect to the user decisions. --from-scratch Ignores the current information in the personid tables and disambiguates everything from scratch. There are no options for the merger. """, version="Invenio Bibauthorid v%s" % bconfig.VERSION, specific_params=("i:", [ "record-ids=", "disambiguate", "merge", "all-records", "update-personid", "from-scratch" ]), task_submit_elaborate_specific_parameter_fnc=_task_submit_elaborate_specific_parameter, task_submit_check_options_fnc=_task_submit_check_options, task_run_fnc=_task_run_core)