Beispiel #1
0
def main():
    """Main that construct all the bibtask."""
    short_options = "lpgbdacTkoS"
    long_options = [
        "logs", "tempfiles", "guests", "bibxxx", "documents", "all", "cache",
        "tasks", "check-tables", "optimise-tables", "sessions", "bibedit-cache"
    ]
    task_init(
        authorization_action='runinveniogc',
        authorization_msg="InvenioGC Task Submission",
        help_specific_usage="  -l, --logs\t\tClean old logs.\n"
        "  -p, --tempfiles\tClean old temporary files.\n"
        "  -g, --guests\t\tClean expired guest user related information. [default action]\n"
        "  -b, --bibxxx\t\tClean unreferenced bibliographic values in bibXXx tables.\n"
        "  -c, --cache\t\tClean cache by removing old files.\n"
        "  -d, --documents\tClean deleted documents and revisions older than %s days.\n"
        "  -T, --tasks\t\tClean the BibSched queue removing/archiving old DONE tasks.\n"
        "  -a, --all\t\tClean all of the above (but do not run check/optimise table options below).\n"
        "  -k, --check-tables\tCheck DB tables to discover potential problems.\n"
        "  -o, --optimise-tables\tOptimise DB tables to increase performance.\n"
        "  -S, --sessions\tClean expired sessions from the DB.\n"
        "  --bibedit-cache Clean expired bibedit cache entries from the DB.\n"
        % CFG_DELETED_BIBDOC_MAXLIFE,
        version=__revision__,
        specific_params=(short_options, long_options),
        task_submit_elaborate_specific_parameter_fnc=
        task_submit_elaborate_specific_parameter,
        task_submit_check_options_fnc=task_submit_check_options,
        task_run_fnc=task_run_core)
Beispiel #2
0
def main():
    """Main that construct all the bibtask."""
    short_options = "lpgbdacTkoS"
    long_options = ["logs",
                    "tempfiles",
                    "guests",
                    "bibxxx",
                    "documents",
                    "all",
                    "cache",
                    "tasks",
                    "check-tables",
                    "optimise-tables",
                    "sessions",
                    "bibedit-cache"]
    task_init(authorization_action='runinveniogc',
            authorization_msg="InvenioGC Task Submission",
            help_specific_usage="  -l, --logs\t\tClean old logs.\n"
                "  -p, --tempfiles\tClean old temporary files.\n"
                "  -g, --guests\t\tClean expired guest user related information. [default action]\n"
                "  -b, --bibxxx\t\tClean unreferenced bibliographic values in bibXXx tables.\n"
                "  -c, --cache\t\tClean cache by removing old files.\n"
                "  -d, --documents\tClean deleted documents and revisions older than %s days.\n"
                "  -T, --tasks\t\tClean the BibSched queue removing/archiving old DONE tasks.\n"
                "  -a, --all\t\tClean all of the above (but do not run check/optimise table options below).\n"
                "  -k, --check-tables\tCheck DB tables to discover potential problems.\n"
                "  -o, --optimise-tables\tOptimise DB tables to increase performance.\n"
                "  -S, --sessions\tClean expired sessions from the DB.\n"
                "  --bibedit-cache Clean expired bibedit cache entries from the DB.\n" % CFG_DELETED_BIBDOC_MAXLIFE,
            version=__revision__,
            specific_params=(short_options, long_options),
            task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter,
            task_submit_check_options_fnc=task_submit_check_options,
            task_run_fnc=task_run_core)
Beispiel #3
0
def main():
    """Main dealing with all the BibTask magic."""
    task_init(
        authorization_action="runwebstatadmin",
        authorization_msg="Webstat Administrator",
        description="Description: %s Creates/deletes custom events. Can be set\n"
        "             to cache key events and previously defined custom events.\n"
        % sys.argv[0],
        help_specific_usage=
        "  -n, --new-event=ID            create a new custom event with the human-readable ID\n"
        "  -r, --remove-event=ID         remote the custom event with id ID and all its data\n"
        "  -S, --show-events             show all currently available custom events\n"
        "  -c, --cache-events=CLASS|[ID] caches the events defined by the class or IDs, e.g.:\n"
        "                                  -c ALL\n"
        "                                  -c KEYEVENTS\n"
        "                                  -c CUSTOMEVENTS\n"
        "                                  -c 'event id1',id2,'testevent'\n"
        "  -d,--dump-config              dump default config file\n"
        "  -e,--load-config              create the custom events described in config_file\n"
        "\nWhen creating events (-n) the following parameters are also applicable:\n"
        "  -l, --event-label=NAME  set a descriptive label to the custom event\n"
        "  -a, --args=[NAME]       set column headers for additional custom event arguments\n"
        "                          (e.g. -a country,person,car)\n",
        version=__revision__,
        specific_params=("n:r:Sl:a:c:de", [
            "new-event=", "remove-event=", "show-events", "event-label=",
            "args=", "cache-events=", "dump-config", "load-config"
        ]),
        task_submit_elaborate_specific_parameter_fnc=
        task_submit_elaborate_specific_parameter,
        task_submit_check_options_fnc=task_submit_check_options,
        task_run_fnc=task_run_core)
Beispiel #4
0
def bibclassify_daemon():
    """Constructs the BibClassify bibtask."""
    bibtask.task_init(authorization_action='runbibclassify',
                      authorization_msg="BibClassify Task Submission",
                      description="Extract keywords and create a BibUpload "
                                  "task.\nExamples:\n"
                                  "    $ bibclassify\n"
                                  "    $ bibclassify -i 79 -k HEP\n"
                                  "    $ bibclassify -c 'Articles' -k HEP\n",
                      help_specific_usage="  -i, --recid\t\tkeywords are extracted from "
                                          "this record\n"
                                          "  -c, --collection\t\tkeywords are extracted from this collection\n"
                                          "  -k, --taxonomy\t\tkeywords are based on that reference",
                      version="Invenio BibClassify v%s" % bconfig.VERSION,
                      specific_params=("i:c:k:f",
                                       [
                                           "recid=",
                                           "collection=",
                                           "taxonomy=",
                                           "force"
                                       ]),
                      task_submit_elaborate_specific_parameter_fnc=
                      _task_submit_elaborate_specific_parameter,
                      task_submit_check_options_fnc=_task_submit_check_options,
                      task_run_fnc=_task_run_core)
Beispiel #5
0
def main():
    """Main dealing with all the BibTask magic."""
    task_init(authorization_action="runwebstatadmin",
              authorization_msg="Webstat Administrator",
              description="Description: %s Creates/deletes custom events. Can be set\n"
                          "             to cache key events and previously defined custom events.\n" % sys.argv[0],
              help_specific_usage="  -n, --new-event=ID            create a new custom event with the human-readable ID\n"
                                  "  -r, --remove-event=ID         remote the custom event with id ID and all its data\n"
                                  "  -S, --show-events             show all currently available custom events\n"
                                  "  -c, --cache-events=CLASS|[ID] caches the events defined by the class or IDs, e.g.:\n"
                                  "                                  -c ALL\n"
                                  "                                  -c KEYEVENTS\n"
                                  "                                  -c CUSTOMEVENTS\n"
                                  "                                  -c 'event id1',id2,'testevent'\n"
                                  "  -d,--dump-config              dump default config file\n"
                                  "  -e,--load-config              create the custom events described in config_file\n"
                                  "\nWhen creating events (-n) the following parameters are also applicable:\n"
                                  "  -l, --event-label=NAME  set a descriptive label to the custom event\n"
                                  "  -a, --args=[NAME]       set column headers for additional custom event arguments\n"
                                  "                          (e.g. -a country,person,car)\n",
              version=__revision__,
              specific_params=("n:r:Sl:a:c:de", ["new-event=", "remove-event=", "show-events",
                                                  "event-label=", "args=", "cache-events=", "dump-config",
                                                  "load-config"]),
              task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter,
              task_submit_check_options_fnc=task_submit_check_options,
              task_run_fnc=task_run_core)
Beispiel #6
0
def main():
    """Main that construct all the bibtask."""

    # if there is any -r or --report option (or other similar options)
    # in the arguments, just print the status and exit (do not run
    # through BibSched...)
    if (CFG_OAI_ID_FIELD[:5] != CFG_OAI_SET_FIELD[:5]) or \
            (CFG_OAI_ID_FIELD[:5] != CFG_OAI_PREVIOUS_SET_FIELD[:5]):
        print(
            """\
ERROR: since Invenio 1.0 the OAI ID and the OAI Set must be stored in the same
field. Please revise your configuration for the variables
    CFG_OAI_ID_FIELD (currently set to %s)
    CFG_OAI_SET_FIELD (currently set to %s)
    CFG_OAI_PREVIOUS_SET_FIELD (currently set to %s)""" %
            (CFG_OAI_ID_FIELD, CFG_OAI_SET_FIELD, CFG_OAI_PREVIOUS_SET_FIELD),
            file=sys.stderr)
        sys.exit(1)
    mode = -1
    if '-d' in sys.argv[1:] or '--detailed-report' in sys.argv[1:]:
        mode = 2
    elif '-r' in sys.argv[1:] or '--report' in sys.argv[1:]:
        mode = 1

    if mode != -1:

        def local_write_message(*args):
            """Overload BibTask function so that it does not need to
            run in BibSched environment"""
            sys.stdout.write(args[0] + '\n')

        print_repository_status(local_write_message=local_write_message,
                                verbose=mode)
        return

    task_init(
        authorization_action='runoairepository',
        authorization_msg="OAI Archive Task Submission",
        description="Examples:\n"
        " Expose records according to sets defined in OAI Repository admin interface\n"
        "   $ oairepositoryupdater \n"
        " Expose records according to sets defined in OAI Repository admin interface and update them every day\n"
        "   $ oairepositoryupdater -s24\n"
        " Print OAI repository status\n"
        "   $ oairepositoryupdater -r\n"
        " Print OAI repository detailed status\n"
        "   $ oairepositoryupdater -d\n\n",
        help_specific_usage="Options:\n"
        " -r --report\t\tOAI repository status\n"
        " -d --detailed-report\t\tOAI repository detailed status\n"
        " -n --no-process\tDo no upload the modifications\n"
        " --notimechange\tDo not update record modification_date\n"
        "NOTE: --notimechange should be used with care, basically only the first time a new set is added.",
        specific_params=("rdn", [
            "report", "detailed-report", "no-process", "notimechange"
        ]),
        task_submit_elaborate_specific_parameter_fnc=
        task_submit_elaborate_specific_parameter,
        task_run_fnc=oairepositoryupdater_task)
Beispiel #7
0
def main():
    """Constructs the refextract bibtask."""
    if CFG_BIBCATALOG_SYSTEM == 'RT':
        bibcatalog_system = BibCatalogSystemRT()
    else:
        bibcatalog_system = None

    extra_vars = {'bibcatalog_system': bibcatalog_system}
    # Build and submit the task
    task_init(authorization_action='runrefextract',
        authorization_msg="Refextract Task Submission",
        description=DESCRIPTION,
        # get the global help_message variable imported from refextract.py
        help_specific_usage=HELP_MESSAGE + """

  Scheduled (daemon) options:
  -a, --new          Run on all newly inserted records.
  -m, --modified     Run on all newly modified records.
  -r, --recids       Record id for extraction.
  -c, --collections  Entire Collection for extraction.
  --arxiv            All arxiv modified records within last week

  Special (daemon) options:
  --create-ticket    Create a RT ticket for record references

  Examples:
   (run a daemon job)
      refextract -a
   (run on a set of records)
      refextract --recids 1,2 -r 3
   (run on a collection)
      refextract --collections "Reports"
   (run as standalone)
      refextract -o /home/chayward/refs.xml /home/chayward/thesis.pdf

""",
        version="Invenio v%s" % CFG_VERSION,
        specific_params=("hVv:x:r:c:nai",
                            ["help",
                             "version",
                             "verbose=",
                             "inspire",
                             "kb-journals=",
                             "kb-journals-re=",
                             "kb-report-numbers=",
                             "kb-authors=",
                             "kb-books=",
                             "recids=",
                             "collections=",
                             "new",
                             "modified",
                             "no-overwrite",
                             "arxiv",
                             "create-ticket"]),
        task_submit_elaborate_specific_parameter_fnc=cb_parse_option,
        task_submit_check_options_fnc=check_options,
        task_run_fnc=task_run_core_wrapper('refextract',
                                           task_run_core,
                                           extra_vars=extra_vars))
Beispiel #8
0
def main():
    """Main that construct all the bibtask."""

    # if there is any -r or --report option (or other similar options)
    # in the arguments, just print the status and exit (do not run
    # through BibSched...)
    if (CFG_OAI_ID_FIELD[:5] != CFG_OAI_SET_FIELD[:5]) or \
            (CFG_OAI_ID_FIELD[:5] != CFG_OAI_PREVIOUS_SET_FIELD[:5]):
        print("""\
ERROR: since Invenio 1.0 the OAI ID and the OAI Set must be stored in the same
field. Please revise your configuration for the variables
    CFG_OAI_ID_FIELD (currently set to %s)
    CFG_OAI_SET_FIELD (currently set to %s)
    CFG_OAI_PREVIOUS_SET_FIELD (currently set to %s)""" % (
            CFG_OAI_ID_FIELD,
            CFG_OAI_SET_FIELD,
            CFG_OAI_PREVIOUS_SET_FIELD
        ), file=sys.stderr)
        sys.exit(1)
    mode = -1
    if '-d' in sys.argv[1:] or '--detailed-report' in sys.argv[1:]:
        mode = 2
    elif '-r' in sys.argv[1:] or '--report' in sys.argv[1:]:
        mode = 1

    if mode != -1:
        def local_write_message(*args):
            """Overload BibTask function so that it does not need to
            run in BibSched environment"""
            sys.stdout.write(args[0] + '\n')
        print_repository_status(local_write_message=local_write_message, verbose=mode)
        return

    task_init(authorization_action='runoairepository',
            authorization_msg="OAI Archive Task Submission",
            description="Examples:\n"
                " Expose records according to sets defined in OAI Repository admin interface\n"
                "   $ oairepositoryupdater \n"
                " Expose records according to sets defined in OAI Repository admin interface and update them every day\n"
                "   $ oairepositoryupdater -s24\n"
                " Print OAI repository status\n"
                "   $ oairepositoryupdater -r\n"
                " Print OAI repository detailed status\n"
                "   $ oairepositoryupdater -d\n\n",
            help_specific_usage="Options:\n"
                " -r --report\t\tOAI repository status\n"
                " -d --detailed-report\t\tOAI repository detailed status\n"
                " -n --no-process\tDo no upload the modifications\n"
                " --notimechange\tDo not update record modification_date\n"
                "NOTE: --notimechange should be used with care, basically only the first time a new set is added.",
            specific_params=("rdn", [
                "report",
                "detailed-report",
                "no-process",
                "notimechange"]),
            task_submit_elaborate_specific_parameter_fnc=
                task_submit_elaborate_specific_parameter,
            task_run_fnc=oairepositoryupdater_task)
Beispiel #9
0
def main():
    """Main function that constructs full bibtask."""
    task_init(authorization_action='runbibexport',
              authorization_msg="BibExport Task Submission",
              help_specific_usage="""Export options:
  -w,  --wjob=j1[,j2]\tRun specific exporting jobs j1, j2, etc (e.g. 'sitemap').
""",
              version=__revision__,
              specific_params=("w:", ["wjob=",]),
              task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter,
              task_submit_check_options_fnc=task_submit_check_options,
              task_run_fnc=task_run_core)
Beispiel #10
0
def main():
    """Main that construct all the bibtask."""
    task_init(authorization_action='runbibtaskex',
              authorization_msg="BibTaskEx Task Submission",
              help_specific_usage="""\
-n,  --number         Print Fibonacci numbers for up to NUM. [default=30]
-e,  --error          Raise an error from time to time
""",
              version=__revision__,
              specific_params=("n:e", ["number=", "error"]),
              task_submit_elaborate_specific_parameter_fnc=
              task_submit_elaborate_specific_parameter,
              task_run_fnc=task_run_core)
Beispiel #11
0
def main():

    task_init(authorization_action='runbibcircd',
              authorization_msg="BibCirculation Task Submission",
              help_specific_usage="""-o,  --overdue-letters\tCheck overdue loans and send recall emails if necessary.\n
-b,  --update-borrowers\tUpdate borrowers information from ldap.\n
-r,  --update-requests\tUpdate pending requests of users\n\n""",
              description="""Example: %s -u admin \n\n""" % (sys.argv[0]),
              specific_params=("obr", ["overdue-letters", "update-borrowers", "update-requests"]),
              task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter,
              version=__revision__,
              task_run_fnc = task_run_core
              )
Beispiel #12
0
def main():
    """Main that construct all the bibtask."""
    task_init(authorization_action='runbibtaskex',
            authorization_msg="BibTaskEx Task Submission",
            help_specific_usage="""\
-n,  --number         Print Fibonacci numbers for up to NUM. [default=30]
-e,  --error          Raise an error from time to time
""",
            version=__revision__,
            specific_params=("n:e",
                ["number=", "error"]),
            task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter,
            task_run_fnc=task_run_core)
Beispiel #13
0
def main():
    """Constructs the bibtask."""
    # Build and submit the task
    task_init(
        authorization_action='runtexkeygeneration',
        authorization_msg="Texkey generator task submission",
        description=DESCRIPTION,
        help_specific_usage=HELP_MESSAGE,
        version="Invenio v%s" % CFG_VERSION,
        specific_params=("", []),
        # task_submit_elaborate_specific_parameter_fnc=parse_option,
        # task_submit_check_options_fnc=check_options,
        task_run_fnc=task_run_core)
Beispiel #14
0
def main():
    """Construct the bibtask."""
    # Build and submit the task
    task_init(authorization_action='runtexkeygeneration',
              authorization_msg="Texkey generator task submission",
              description=DESCRIPTION,
              help_specific_usage=HELP_MESSAGE,
              version="Invenio v%s" % CFG_VERSION,
              specific_params=("", []),
              # task_submit_elaborate_specific_parameter_fnc=parse_option,
              # task_submit_check_options_fnc=check_options,
              task_run_fnc=task_run_core
              )
Beispiel #15
0
def main():
    """Main that construct all the bibtask."""
    task_init(authorization_action='rundbdump',
              authorization_msg="DB Dump Task Submission",
              help_specific_usage="""\
  -o, --output=DIR      Output directory. [default=%s]
  -n, --number=NUM      Keep up to NUM previous dump files. [default=5]
""" % CFG_LOGDIR,
              version=__revision__,
              specific_params=("n:o:",
                               ["number=", "output="]),
              task_submit_elaborate_specific_parameter_fnc=_dbdump_elaborate_submit_param,
              task_run_fnc=_dbdump_run_task_core)
Beispiel #16
0
def main():
    """Constructs the refextract bibtask."""
    extra_vars = {'bibcatalog_system': BIBCATALOG_SYSTEM, 'records': []}
    # Build and submit the task
    task_init(
        authorization_action='runrefextract',
        authorization_msg="Refextract Task Submission",
        description=DESCRIPTION,
        # get the global help_message variable imported from refextract.py
        help_specific_usage=HELP_MESSAGE + """

  Scheduled (daemon) options:
  -a, --new          Run on all newly inserted records.
  -m, --modified     Run on all newly modified records.
  -r, --recids       Record id for extraction.
  -c, --collections  Entire Collection for extraction.
  --arxiv            All arxiv modified records within last week

  Special (daemon) options:
  --create-ticket    Create a RT ticket for record references

  Examples:
   (run a daemon job)
      refextract -a
   (run on a set of records)
      refextract --recids 1,2 -r 3
   (run on a collection)
      refextract --collections "Reports"
   (run as standalone)
      refextract -o /home/chayward/refs.xml /home/chayward/thesis.pdf

""",
        version="Invenio v%s" % CFG_VERSION,
        specific_params=("hVv:x:r:c:nai:f:", [
            "help", "version", "verbose=", "inspire", "kb-journals=",
            "kb-journals-re=", "kb-report-numbers=", "kb-authors=",
            "kb-books=", "recids=", "id=", "collections=", "new", "modified",
            "no-overwrite", "arxiv", "create-ticket"
        ]),
        task_submit_elaborate_specific_parameter_fnc=cb_parse_option,
        task_submit_check_options_fnc=check_options,
        task_run_fnc=task_run_core_wrapper('refextract',
                                           task_run_core,
                                           extra_vars=extra_vars,
                                           post_process=cb_submit_bibupload))
Beispiel #17
0
def main():
    """Main body of bibtasklet."""
    task_init(
        authorization_action='runbibtasklet',
        authorization_msg="BibTaskLet Task Submission",
        help_specific_usage="""\
  -T, --tasklet         Execute the specific tasklet
  -a, --argument        Specify an argument to be passed to tasklet in the form
                            param=value, e.g. --argument foo=bar
  -l, --list-tasklets   List the existing tasklets
""",
        version=__version__,
        specific_params=("T:a:l", ["tasklet=", "argument=", "list-tasklets"]),
        task_submit_elaborate_specific_parameter_fnc=(
            task_submit_elaborate_specific_parameter
        ),
        task_run_fnc=task_run_core,
        task_submit_check_options_fnc=task_submit_check_options)
Beispiel #18
0
def main():
    """ Main that constructs all the bibtask. """
    task_init(authorization_action='runbatchuploader',
              authorization_msg="Batch Uploader",
              description="""Description:
    The batch uploader has two different run modes.
    If --metadata is specified (by default) then all files in folders insert,
    append, correct and replace are uploaded using the corresponding mode.
    If mode --documents is selected all documents present in folders named
    append and revise are uploaded using the corresponding mode.
    Parent directory for batch uploader must be specified in the
    invenio configuration file.\n""",
              help_specific_usage=""" -m, --metadata\t Batch Uploader will look for metadata files in the corresponding folders
 -d, --documents\t Batch Uploader will look for documents in the corresponding folders
                                """,
              version=__revision__,
              specific_params=("md:", ["metadata", "documents"]),
              task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter,
              task_run_fnc=task_run_core)
Beispiel #19
0
def main():
    """ Main that constructs all the bibtask. """
    task_init(authorization_action='runbatchuploader',
            authorization_msg="Batch Uploader",
            description="""Description:
    The batch uploader has two different run modes.
    If --metadata is specified (by default) then all files in folders insert,
    append, correct and replace are uploaded using the corresponding mode.
    If mode --documents is selected all documents present in folders named
    append and revise are uploaded using the corresponding mode.
    Parent directory for batch uploader must be specified in the
    invenio configuration file.\n""",
            help_specific_usage=""" -m, --metadata\t Batch Uploader will look for metadata files in the corresponding folders
 -d, --documents\t Batch Uploader will look for documents in the corresponding folders
                                """,
            version=__revision__,
            specific_params=("md:", ["metadata", "documents"]),
            task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter,
            task_run_fnc=task_run_core)
Beispiel #20
0
def main():
    """Main function that constructs the bibtask"""
    task_init(authorization_action='runbibsort',
              authorization_msg="BibSort Task Submission",
              description = "",
              help_specific_usage="""
 Specific options:
  -l, --load-config     Loads the configuration from bibsort.conf into the
                        database
  -d, --dump-config     Outputs a database dump in form of a config file
  -p, --print-sorting-methods
                        Prints the available sorting methods
  -R, --rebalance       Runs the sorting methods given in '--methods'and
                        rebalances all the buckets. If no method is
                        specified, the rebalance will be done for all
                        the methods in the config file.
  -S, --update-sorting  Runs the sorting methods given in '--methods' for the
                        recids given in '--id'. If no method is
                        specified, the update will be done for all the
                        methods in the config file. If no recids are
                        specified, the update will be done for all the records
                        that have been modified/inserted from the last
                        run of the sorting. If you want to run the
                        sorting for all records, you should use the '-B'
                        option
  -M, --methods=METHODS Specify the sorting methods for which the
                        update_sorting or rebalancing will run (ex:
                        --methods=method1,method2,method3).
  -i, --id=RECIDS       Specify the records for which the update_sorting will
                        run (ex: --id=1,2-56,72)
""",
              version=__revision__,
              specific_params=("ldpRSM:i:",
                               ["load-config",
                                "dump-config",
                                "print-sorting-methods",
                                "rebalance",
                                "update-sorting",
                                "methods=",
                                "id="]),
              task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter,
              task_run_fnc=task_run_core)
Beispiel #21
0
def bibclassify_daemon():
    """Constructs the BibClassify bibtask."""
    bibtask.task_init(
        authorization_action='runbibclassify',
        authorization_msg="BibClassify Task Submission",
        description="Extract keywords and create a BibUpload "
        "task.\nExamples:\n"
        "    $ bibclassify\n"
        "    $ bibclassify -i 79 -k HEP\n"
        "    $ bibclassify -c 'Articles' -k HEP\n",
        help_specific_usage="  -i, --recid\t\tkeywords are extracted from "
        "this record\n"
        "  -c, --collection\t\tkeywords are extracted from this collection\n"
        "  -k, --taxonomy\t\tkeywords are based on that reference",
        version="Invenio BibClassify v%s" % bconfig.VERSION,
        specific_params=("i:c:k:f",
                         ["recid=", "collection=", "taxonomy=", "force"]),
        task_submit_elaborate_specific_parameter_fnc=
        _task_submit_elaborate_specific_parameter,
        task_submit_check_options_fnc=_task_submit_check_options,
        task_run_fnc=_task_run_core)
Beispiel #22
0
def main():
    """Main function that constructs full bibtask."""
    if '--force-recrawling' in sys.argv:
        force_recrawling()
        print "Recrawling forced"
        sys.exit(1)
    task_init(authorization_action='runbibexport',
              authorization_msg="BibExport Task Submission",
              help_specific_usage="""Export options:
  -w,  --wjob=j1[,j2]\tRun specific exporting jobs j1, j2, etc (e.g. 'sitemap').
  --force-recrawling\tWhen using the sitemap export will force all the timestamp
                    \tthere included to refer to correspond at least to now. In
                    \tthis way crawlers are going to crawl all the content again.
                    \tThis is useful in case of a major update in the detailed
                    \tview of records.
""",
              version=__revision__,
              specific_params=("w:", ["wjob="]),
              task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter,
              task_submit_check_options_fnc=task_submit_check_options,
              task_run_fnc=task_run_core)
Beispiel #23
0
def main():
    """Constructs the refextract bibtask."""
    # Build and submit the task
    task_init(
        authorization_action="runarxivpdfchecker",
        authorization_msg="Arxiv Pdf Checker Task Submission",
        description="""Daemon that checks if we have the latest version of arxiv PDFs""",
        # get the global help_message variable imported from refextract.py
        help_specific_usage="""
  Scheduled (daemon) options:
  -i, --id       Record id to check.

  Examples:
   (run a daemon job)
      arxiv-pdf-checker

""",
        version="Invenio v%s" % CFG_VERSION,
        specific_params=("i:", ["id="]),
        task_submit_elaborate_specific_parameter_fnc=cb_parse_option,
        task_run_fnc=task_run_core,
    )
Beispiel #24
0
def main():
    """Constructs the refextract bibtask."""
    # Build and submit the task
    task_init(
        authorization_action='runarxivpdfchecker',
        authorization_msg="Arxiv Pdf Checker Task Submission",
        description=
        """Daemon that checks if we have the latest version of arxiv PDFs""",
        # get the global help_message variable imported from refextract.py
        help_specific_usage="""
  Scheduled (daemon) options:
  -i, --id       Record id to check.

  Examples:
   (run a daemon job)
      arxiv-pdf-checker

""",
        version="Invenio v%s" % CFG_VERSION,
        specific_params=("i:", ["id="]),
        task_submit_elaborate_specific_parameter_fnc=cb_parse_option,
        task_run_fnc=task_run_core)
Beispiel #25
0
def main():
    """Main that construct all the bibtask."""
    from invenio.legacy.bibsched.bibtask import task_init
    from invenio.legacy.websearch.webcoll import (
        task_submit_elaborate_specific_parameter, task_submit_check_options,
        task_run_core, __revision__)

    task_init(authorization_action="runwebcoll",
            authorization_msg="WebColl Task Submission",
            description="""Description:
    webcoll updates the collection cache (record universe for a
    given collection plus web page elements) based on invenio.conf and DB
    configuration parameters. If the collection name is passed as an argument,
    only this collection's cache will be updated. If the recursive option is
    set as well, the collection's descendants will also be updated.\n""",
            help_specific_usage="  -c, --collection\t Update cache for the given "
                     "collection only. [all]\n"
                    "  -r, --recursive\t Update cache for the given collection and all its\n"
                    "\t\t\t descendants (to be used in combination with -c). [no]\n"
                    "  -q, --quick\t\t Skip webpage cache update for those collections whose\n"
                    "\t\t\t reclist was not changed. Note: if you use this option, it is advised\n"
                    "\t\t\t to schedule, e.g. a nightly 'webcoll --force'. [no]\n"
                    "  -f, --force\t\t Force update even if cache is up to date. [no]\n"
                    "  -p, --part\t\t Update only certain cache parts (1=reclist,"
                    " 2=webpage). [both]\n"
                    "  -l, --language\t Update pages in only certain language"
                    " (e.g. fr,it,...). [all]\n",
            version=__revision__,
            specific_params=("c:rqfp:l:", [
                    "collection=",
                    "recursive",
                    "quick",
                    "force",
                    "part=",
                    "language="
                ]),
            task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter,
            task_submit_check_options_fnc=task_submit_check_options,
            task_run_fnc=task_run_core)
Beispiel #26
0
def main():
    """Main function that constructs full bibtask."""
    if '--force-recrawling' in sys.argv:
        force_recrawling()
        print "Recrawling forced"
        sys.exit(1)
    task_init(authorization_action='runbibexport',
              authorization_msg="BibExport Task Submission",
              help_specific_usage="""Export options:
  -w,  --wjob=j1[,j2]\tRun specific exporting jobs j1, j2, etc (e.g. 'sitemap').
  --force-recrawling\tWhen using the sitemap export will force all the timestamp
                    \tthere included to refer to correspond at least to now. In
                    \tthis way crawlers are going to crawl all the content again.
                    \tThis is useful in case of a major update in the detailed
                    \tview of records.
""",
              version=__revision__,
              specific_params=("w:", ["wjob="]),
              task_submit_elaborate_specific_parameter_fnc=
              task_submit_elaborate_specific_parameter,
              task_submit_check_options_fnc=task_submit_check_options,
              task_run_fnc=task_run_core)
Beispiel #27
0
def main():
    """Main function that constructs the bibtask"""
    task_init(authorization_action='runbibsort',
              authorization_msg="BibSort Task Submission",
              description="",
              help_specific_usage="""
 Specific options:
  -l, --load-config     Loads the configuration from bibsort.conf into the
                        database
  -d, --dump-config     Outputs a database dump in form of a config file
  -p, --print-sorting-methods
                        Prints the available sorting methods
  -R, --rebalance       Runs the sorting methods given in '--methods'and
                        rebalances all the buckets. If no method is
                        specified, the rebalance will be done for all
                        the methods in the config file.
  -S, --update-sorting  Runs the sorting methods given in '--methods' for the
                        recids given in '--id'. If no method is
                        specified, the update will be done for all the
                        methods in the config file. If no recids are
                        specified, the update will be done for all the records
                        that have been modified/inserted from the last
                        run of the sorting. If you want to run the
                        sorting for all records, you should use the '-B'
                        option
  -M, --methods=METHODS Specify the sorting methods for which the
                        update_sorting or rebalancing will run (ex:
                        --methods=method1,method2,method3).
  -i, --id=RECIDS       Specify the records for which the update_sorting will
                        run (ex: --id=1,2-56,72)
""",
              version=__revision__,
              specific_params=("ldpRSM:i:", [
                  "load-config", "dump-config", "print-sorting-methods",
                  "rebalance", "update-sorting", "methods=", "id="
              ]),
              task_submit_elaborate_specific_parameter_fnc=
              task_submit_elaborate_specific_parameter,
              task_run_fnc=task_run_core)
Beispiel #28
0
def main():
    """
    Initialises the task
    """
    task_init(
        authorization_action='bibarchive',
        authorization_msg="BibArchive Task Submission",
        help_specific_usage="""\
-r, --record=RECID_VERSION      Create an archive package of specific record
-m, --mount=RECID_VERSION           View a tree of a given record.
                                    Default VERSION is latest.
-d, --delete=RECID_VERSION      Delete the latest package for a given record.
                                    Default VERSION is latest.
-D, --delete-all=RECID          Delete all archive packages for a given record.
--PURGE                 Purge all archive packages.

""",
        version=__revision__,
        specific_params=("r:m:d:D:",
                         ["mount=", "delete=", "delete-all=", "PURGE"]),
        task_submit_elaborate_specific_parameter_fnc=task_submit_esp,
        task_run_fnc = task_run_core
    )
Beispiel #29
0
def main():
    """Main that construct all the bibtask."""
    task_init(authorization_action='rundbdump',
              authorization_msg="DB Dump Task Submission",
              help_specific_usage="""\
  -o, --output=DIR      Output directory. [default=%s]
  -n, --number=NUM      Keep up to NUM previous dump files. [default=5]
  --params=PARAMS       Specify your own mysqldump parameters. Optional.
  --compress            Compress dump directly into gzip.
  -S, --slave=HOST      Perform the dump from a slave, if no host use CFG_DATABASE_SLAVE.
  --ignore-tables=regex Ignore tables matching the given regular expression
  --disable-workers     Disable any task queue workers while dumping.

Examples:
    $ dbdump --ignore-tables '^(idx|rnk)'
    $ dbdump -n3 -o/tmp -s1d -L 02:00-04:00
""" % CFG_LOGDIR,
              specific_params=("n:o:p:S:",
                               ["number=", "output=", "params=", "slave=",
                                "compress", 'ignore-tables=',
                                "dump-on-slave-helper", "disable-workers"]),
              task_submit_elaborate_specific_parameter_fnc=_dbdump_elaborate_submit_param,
              task_run_fnc=_dbdump_run_task_core)
Beispiel #30
0
def main():
    """Main that construct all the bibtask."""
    from invenio.legacy.bibsched.bibtask import task_init
    from invenio.legacy.websearch.webcoll import (
        task_submit_elaborate_specific_parameter, task_submit_check_options,
        task_run_core, __revision__)

    task_init(
        authorization_action="runwebcoll",
        authorization_msg="WebColl Task Submission",
        description="""Description:
    webcoll updates the collection cache (record universe for a
    given collection plus web page elements) based on invenio.conf and DB
    configuration parameters. If the collection name is passed as an argument,
    only this collection's cache will be updated. If the recursive option is
    set as well, the collection's descendants will also be updated.\n""",
        help_specific_usage="  -c, --collection\t Update cache for the given "
        "collection only. [all]\n"
        "  -r, --recursive\t Update cache for the given collection and all its\n"
        "\t\t\t descendants (to be used in combination with -c). [no]\n"
        "  -q, --quick\t\t Skip webpage cache update for those collections whose\n"
        "\t\t\t reclist was not changed. Note: if you use this option, it is advised\n"
        "\t\t\t to schedule, e.g. a nightly 'webcoll --force'. [no]\n"
        "  -f, --force\t\t Force update even if cache is up to date. [no]\n"
        "  -p, --part\t\t Update only certain cache parts (1=reclist,"
        " 2=webpage). [both]\n"
        "  -l, --language\t Update pages in only certain language"
        " (e.g. fr,it,...). [all]\n",
        version=__revision__,
        specific_params=("c:rqfp:l:", [
            "collection=", "recursive", "quick", "force", "part=", "language="
        ]),
        task_submit_elaborate_specific_parameter_fnc=
        task_submit_elaborate_specific_parameter,
        task_submit_check_options_fnc=task_submit_check_options,
        task_run_fnc=task_run_core)
Beispiel #31
0
def main():
    """Constructs the BibCheck bibtask."""
    usage = """

  Scheduled (daemon) options:

  -l, --list-plugins       List all plugins and exit
  -r, --list-rules         List all rules and exit
  -e, --enable-rules=rules Enable only some rules (comma separated)
  -a, --all=rules          Run the specified rules in all matching records (not
                               only modified ones)
  -i, --id=ids             Run only in the specified record ids or ranges (comma
                               separated), ignoring all other filters
  -q, --queue=queue        Create tickets in the specified RT Queue (Default
                               Bibcheck)
  -t, --no-tickets         Don't create any ticket in RT. Useful for debugging
  -b, --no-upload          Don't upload changes to the database
  -n, --dry-run            Like --no-tickets and --no-upload
  -c, --config             By default bibcheck reads the file rules.cfg. This
                           allows to specify a different config file

  If any of the options --id, --no-tickets, --no-upload or --dry-run is enabled,
    bibcheck won't update the last-run-time of a task in the database.

  Examples:
   (run a periodical daemon job that checks the rules from rules.cfg)
      bibcheck -s1d

   (Run bibcheck on records 1, 2, 3, 5, 6, 7, 8, 9 and 10)
      bibcheck -i 1,2,3,5-10

   (Run only the rule foobar in all the records)
      bibcheck -a foobar -e foobar

   (Run only the rules foo and bar on modified records)
      bibcheck -e foo,bar
    """
    try:
        opts = getopt.getopt(sys.argv[1:], "lr",
                                   ["list-plugins", "list-rules"])[0]
    except getopt.GetoptError:
        opts = []

    for opt, dummy in opts:
        if opt in ["-l", "--list-plugins"]:
            print_plugins()
            return
        elif opt in ["-r", "--list-rules"]:
            print_rules()
            return

    # Build and submit the task
    task_init(authorization_action='runbibcheck',
              authorization_msg="BibCheck Task Submission",
              description="",
              help_specific_usage=usage,
              version="Invenio v%s" % CFG_VERSION,
              specific_params=("hvtbnV:e:a:i:q:c:", ["help", "version",
                  "verbose=", "enable-rules=", "all=", "id=", "queue=",
                  "no-tickets", "no-upload", "dry-run", "config"]),
              task_submit_elaborate_specific_parameter_fnc=task_parse_options,
              task_run_fnc=task_run_core)
Beispiel #32
0
def main():
    """Main that construct all the bibtask."""
    task_init(authorization_action='runbibencode',
            authorization_msg="Bibencode Task Submission",
            help_specific_usage=(
                """
  General options:
  -m, --mode=           Selects the mode for BibEncode
                           Modes: 'meta', 'encode', 'extract', 'daemon', 'batch'
  -i, --input=          Input file
  -o, --output=         Output file


  Options for mode 'meta':
  -D, --dump=           Dumps metadata from a video to a file
                           Options: "ffprobe", "mediainfo", "pbcore"
  -W, --write=          Write metadata to a copy of the file
                            Either a filename or a serialized JSON object.

  Options for mode 'encode'
  -p                    Profile to use for encoding
  --acodec=             Audiocodec for the transcoded video
  --vcodec=             Videocodec for the transcoded video
  --abitrate=           Bitrate auf the audio stream
  --vbitrate=           Bitrate of the video stream
  --resolution=         Resolution of the transcoded video
  --passes=             Number of passes
  --special=            Pure FFmpeg options that will be appended to the command
  --specialfirst=       Pure FFmpeg options for the first pass
  --specialsecond=      Pure FFmpeg options for the second pass
  --width=              Horizontal resolution
  --height=             Vertical resolution
  --aspect=             Aspect ratio fallback if undetectable

  Options for mode 'extract':
  -p                    Profile to use for frame extraction
  --resolution=         Resolution of the extracted frame(s)
  --number=             Number of frames to extract
  --positions=          Specific positions inside the video to extract from
                            Python list notation
                            Either in seconds like '10' or '10.5'
                            Or as a timecode like '00:00:10.5'
                            Example:'[10, 10.5, 00:00:12.5, 20, 00:08:45:11.26]'
  -o, --output=         Output filename can be substituted by bibencode:
                            %(input)s for the input filename
                            %(timecode)s for the timecode
                            %(size)s for the frame size
                            %(number)d for sequential numbers
  --width=              Horizontal resolution
  --height=             Vertical resolution
  --aspect=             Aspect ratio fallback if undetectable

  Options for mode 'batch':
  --collection=         Updates the whole collection acc. to a batch template
  --search=             Updates all records matching the search query

  Options for mode 'daemon':
  --newjobdir=          Optional folder to look for new job descriptions
  --oldjobdir=          Optional folder to move the job desc. of done jobs

"""
            ),
            version=__revision__,
            specific_params=("m:i:o:p:W:D:",
                [
                 "mode=",
                 "input=",
                 "output=",
                 "write=",
                 "dump=",
                 "acodec=",
                 "vcodec=",
                 "abitrate=",
                 "vbitrate=",
                 "resolution=",
                 "passes=",
                 "special=",
                 "specialfirst=",
                 "specialsecond=",
                 "height=",
                 "width=",
                 "number=",
                 "positions=",
                 "substitute",
                 "newjobdir=",
                 "oldjobdir=",
                 "recid=",
                 "aspect=",
                 "collection=",
                 "search="
                 ]),
            task_submit_elaborate_specific_parameter_fnc= \
                            task_submit_elaborate_specific_parameter,
            task_submit_check_options_fnc=task_submit_check_options,
            task_run_fnc=task_run_core)
Beispiel #33
0
def main():
    """Constructs the BibCatalog bibtask."""
    usage = """

  Non-daemon options:

  -l, --list-tickets      List available tickets.


  Scheduled (daemon) options:

  Selection of records (Required):

  -a, --new               Run on all newly inserted records.
  -m, --modified          Run on all newly modified records.
  -i, --recids=           Record id for extraction.
  -c, --collections=      Run on all records in a specific collection.
  -q, --query=            Specify a search query to fetch records to run on.
  -r, --reportnumbers=    Run on all records related with specific arXiv ids.

  Selection of tickets (Required):

  --tickets=         Specify which tickets to run.
  --all-tickets      Run on all tickets

  Examples:
   (run a periodical daemon job on a given ticket template)
      bibcatalog -a --tickets metadata_curation -s1h
   (run all tickets on a set of records)
      bibcatalog --recids 1,2 -i 3 --all-tickets
   (run some tickets on a collection)
      bibcatalog --collections "Articles" --tickets metadata_curation,reference_curation

    """
    try:
        opts, dummy = getopt.getopt(sys.argv[1:], "l", ["list-tickets"])
    except getopt.GetoptError:
        opts = []

    for opt, dummy in opts:
        if opt in ["-l", "--list-tickets"]:
            all_plugins, error_messages = load_ticket_plugins()
            if error_messages:
                # We got broken plugins. We alert only for now.
                print >>sys.stderr, "\n".join(error_messages)
            print "Enabled tickets:"
            for plugin in all_plugins.get_enabled_plugins():
                print " " + plugin
            print "Run `$ bibcatalog --tickets=<ticket-name>` to select a ticket template."
            return

    # Build and submit the task
    task_init(authorization_action='runbibcatalog',
              authorization_msg="BibCatalog Task Submission",
              description="",
              help_specific_usage=usage,
              version="Invenio v%s" % CFG_VERSION,
              specific_params=("hVv:i:c:q:r:am",
                                ["help",
                                 "version",
                                 "verbose=",
                                 "recids=",
                                 "collections=",
                                 "query=",
                                 "reportnumbers=",
                                 "new",
                                 "modified",
                                 "tickets=",
                                 "all-tickets"]),
              task_submit_elaborate_specific_parameter_fnc=task_parse_options,
              task_submit_check_options_fnc=task_check_options,
              task_run_fnc=task_run_core)
Beispiel #34
0
def main():
    """Main that construct all the bibtask."""
    task_init(
        authorization_action='runbibformat',
        authorization_msg="BibReformat Task Submission",
        description="""
BibReformat formats the records and saves the produced outputs for
later retrieval.

BibReformat is usually run periodically via BibSched in order to (1)
format new records in the database and to (2) reformat records for
which the meta data has been modified.

BibReformat has to be run manually when (3) format config files have
been modified, in order to see the changes in the web interface.

Although it is not necessary to run BibReformat to display formatted
records in the web interface, BibReformat allows to improve serving
speed by precreating the outputs. It is suggested to run
BibReformat for 'HB' output.

Option -m cannot be used at the same time as option -c.
Option -c prevents from finding records in private collections.

Examples:
  bibreformat                    Format all new or modified records (in HB and RECJSON).
  bibreformat -o HD              Format all new or modified records in HD.
  bibreformat -o HD,HB           Format all new or modified records in HD and HB.

  bibreformat -a                 Force reformatting all records (in HB).
  bibreformat -c 'Photos'        Force reformatting all records in 'Photos' collection (in HB).
  bibreformat -c 'Photos' -o HD  Force reformatting all records in 'Photos' collection in HD.

  bibreformat -i 15              Force reformatting record 15 (in HB).
  bibreformat -i 15:20           Force reformatting records 15 to 20 (in HB).
  bibreformat -i 15,16,17        Force reformatting records 15, 16 and 17 (in HB).

  bibreformat -n                 Show how many records are to be (re)formatted.
  bibreformat -n -c 'Articles'   Show how many records are to be (re)formatted in 'Articles' collection.

  bibreformat -oHB -s1h          Format all new and modified records every hour, in HB.
""",
        help_specific_usage=
        """  -o,  --formats         \t Specify output format/s (default HB)
  -n,  --noprocess      \t Count records to be formatted (no processing done)
Reformatting options:
  -a,  --all            \t Force reformatting all records
  -c,  --collection     \t Force reformatting records by collection
  -f,  --field          \t Force reformatting records by field
  -p,  --pattern        \t Force reformatting records by pattern
  -i,  --id             \t Force reformatting records by record id(s)
  --no-missing          \t Ignore reformatting records without format
Pattern options:
  -m,  --matching       \t Specify if pattern is exact (e), regular expression (r),
                        \t partial (p), any of the words (o) or all of the words (a)
""",
        version=__revision__,
        specific_params=("ac:f:p:lo:nm:i:", [
            "all", "collection=", "matching=", "field=", "pattern=", "format=",
            "noprocess", "id=", "no-missing"
        ]),
        task_submit_check_options_fnc=task_submit_check_options,
        task_submit_elaborate_specific_parameter_fnc=
        task_submit_elaborate_specific_parameter,
        task_run_fnc=task_run_core)
Beispiel #35
0
def main():
    """Main that construct all the bibtask."""
    task_init(authorization_action='runbibencode',
            authorization_msg="Bibencode Task Submission",
            help_specific_usage=(
"""
  General options:
  -m, --mode=           Selects the mode for BibEncode
                           Modes: 'meta', 'encode', 'extract', 'daemon', 'batch'
  -i, --input=          Input file
  -o, --output=         Output file


  Options for mode 'meta':
  -D, --dump=           Dumps metadata from a video to a file
                           Options: "ffprobe", "mediainfo", "pbcore"
  -W, --write=          Write metadata to a copy of the file
                            Either a filename or a serialized JSON object.

  Options for mode 'encode'
  -p                    Profile to use for encoding
  --acodec=             Audiocodec for the transcoded video
  --vcodec=             Videocodec for the transcoded video
  --abitrate=           Bitrate auf the audio stream
  --vbitrate=           Bitrate of the video stream
  --resolution=         Resolution of the transcoded video
  --passes=             Number of passes
  --special=            Pure FFmpeg options that will be appended to the command
  --specialfirst=       Pure FFmpeg options for the first pass
  --specialsecond=      Pure FFmpeg options for the second pass
  --width=              Horizontal resolution
  --height=             Vertical resolution
  --aspect=             Aspect ratio fallback if undetectable

  Options for mode 'extract':
  -p                    Profile to use for frame extraction
  --resolution=         Resolution of the extracted frame(s)
  --number=             Number of frames to extract
  --positions=          Specific positions inside the video to extract from
                            Python list notation
                            Either in seconds like '10' or '10.5'
                            Or as a timecode like '00:00:10.5'
                            Example:'[10, 10.5, 00:00:12.5, 20, 00:08:45:11.26]'
  -o, --output=         Output filename can be substituted by bibencode:
                            %(input)s for the input filename
                            %(timecode)s for the timecode
                            %(size)s for the frame size
                            %(number)d for sequential numbers
  --width=              Horizontal resolution
  --height=             Vertical resolution
  --aspect=             Aspect ratio fallback if undetectable

  Options for mode 'batch':
  --collection=         Updates the whole collection acc. to a batch template
  --search=             Updates all records matching the search query

  Options for mode 'daemon':
  --newjobdir=          Optional folder to look for new job descriptions
  --oldjobdir=          Optional folder to move the job desc. of done jobs

"""
            ),
            version=__revision__,
            specific_params=("m:i:o:p:W:D:",
                [
                 "mode=",
                 "input=",
                 "output=",
                 "write=",
                 "dump=",
                 "acodec=",
                 "vcodec=",
                 "abitrate=",
                 "vbitrate=",
                 "resolution=",
                 "passes=",
                 "special=",
                 "specialfirst=",
                 "specialsecond=",
                 "height=",
                 "width=",
                 "number=",
                 "positions=",
                 "substitute",
                 "newjobdir=",
                 "oldjobdir=",
                 "recid=",
                 "aspect=",
                 "collection=",
                 "search="
                 ]),
            task_submit_elaborate_specific_parameter_fnc= \
                            task_submit_elaborate_specific_parameter,
            task_submit_check_options_fnc=task_submit_check_options,
            task_run_fnc=task_run_core)
Beispiel #36
0
def main():
    """Start the tool.

    If the command line arguments are those of the 'manual' mode, then
    starts a manual one-time harvesting. Else trigger a BibSched task
    for automated harvesting based on the OAIHarvest admin settings.
    """
    # Let's try to parse the arguments as used in manual harvesting:
    try:

        opts, args = getopt.getopt(sys.argv[1:], "o:v:m:p:i:s:f:u:r:c:k:l:w:",
                                   ["output=",
                                    "verb=",
                                    "method=",
                                    "metadataPrefix=",
                                    "identifier=",
                                    "set=",
                                    "from=",
                                    "until=",
                                    "resumptionToken=",
                                    "certificate=",
                                    "key=",
                                    "user="******"password="******"workflow=",
                                    ])

        # So everything went smoothly: start harvesting in manual mode
        if len([opt for opt, opt_value in opts
                if opt in ['-v', '--verb']]) > 0:
            # verb parameter is given
            http_param_dict = {}
            method = "POST"
            output = ""
            user = None
            password = None
            cert_file = None
            key_file = None
            sets = []
            # get options and arguments
            for opt, opt_value in opts:
                if opt in ["-v", "--verb"]:
                    http_param_dict['verb'] = opt_value
                elif opt in ["-m", '--method']:
                    if opt_value == "GET" or opt_value == "POST":
                        method = opt_value
                elif opt in ["-p", "--metadataPrefix"]:
                    http_param_dict['metadataPrefix'] = opt_value
                elif opt in ["-i", "--identifier"]:
                    http_param_dict['identifier'] = opt_value
                elif opt in ["-s", "--set"]:
                    sets = opt_value.split()
                elif opt in ["-f", "--from"]:
                    http_param_dict['from'] = opt_value
                elif opt in ["-u", "--until"]:
                    http_param_dict['until'] = opt_value
                elif opt in ["-r", "--resumptionToken"]:
                    http_param_dict['resumptionToken'] = opt_value
                elif opt in ["-o", "--output"]:
                    output = opt_value
                elif opt in ["-c", "--certificate"]:
                    cert_file = opt_value
                elif opt in ["-k", "--key"]:
                    key_file = opt_value
                elif opt in ["-l", "--user"]:
                    user = opt_value
                elif opt in ["-w", "--password"]:
                    password = opt_value
                elif opt in ["-V", "--version"]:
                    print(__revision__)
                    sys.exit(0)
                else:
                    usage(1, "Option %s is not allowed" % opt)

            if len(args) > 0:
                base_url = args[-1]
                if not base_url.lower().startswith('http'):
                    base_url = 'http://' + base_url
                (addressing_scheme, network_location, path, dummy1,
                 dummy2, dummy3) = urllib.parse.urlparse(base_url)
                secure = (addressing_scheme == "https")

                if (cert_file and not key_file) or \
                        (key_file and not cert_file):
                    # Both are needed if one specified
                    usage(1, "You must specify both certificate and key files")

                if password and not user:
                    # User must be specified when password is given
                    usage(1, "You must specify a username")
                elif user and not password:
                    if not secure:
                        sys.stderr.write(
                            "*WARNING* Your password will be sent in clear!\n")
                    try:
                        password = getpass.getpass()
                    except KeyboardInterrupt as error:
                        sys.stderr.write("\n%s\n" % (error,))
                        sys.exit(0)

                getter.harvest(network_location, path,
                               http_param_dict, method,
                               output, sets, secure, user,
                               password, cert_file,
                               key_file)

                sys.stderr.write("Harvesting completed at: %s\n\n" %
                                 time.strftime("%Y-%m-%d %H:%M:%S --> ",
                                               time.localtime()))
                return
            else:
                usage(1, "You must specify the URL to harvest")
        else:
            # verb is not given. We will continue with periodic
            # harvesting. But first check if URL parameter is given:
            # if it is, then warn directly now

            if len([opt for opt, opt_value in opts if
                    opt in ['-i', '--identifier']]) == 0 \
                and len(args) > 1 or \
                    (len(args) == 1 and not args[0].isdigit()):
                usage(1, "You must specify the --verb parameter")
    except getopt.error:
        # So could it be that we are using different arguments? Try to
        # start the BibSched task (automated harvesting) and see if it
        # validates
        pass
        # BibSched mode - periodical harvesting
    # Note that the 'help' is common to both manual and automated
    # mode.

    num_of_critical_parameter = 0
    num_of_critical_parameterb = 0
    repositories = []

    for opt in sys.argv[1:]:
        if opt in "-r" or opt in "--repository":
            num_of_critical_parameter += 1
        elif opt in "--workflow":
            num_of_critical_parameterb += 1
        if num_of_critical_parameter > 1 or num_of_critical_parameterb > 1:
            usage(1, "You can't specify twice -r or --workflow")

    if num_of_critical_parameter == 1:
        if "-r" in sys.argv:
            position = sys.argv.index("-r")
        else:
            position = sys.argv.index("--repository")
        repositories = sys.argv[position + 1].split(",")
        if len(repositories) > 1 and \
                ("-i" in sys.argv or "--identifier" in sys.argv):
            usage(1,
                  "It is impossible to harvest an identifier from several "
                  "repositories.")

    if num_of_critical_parameterb == 1:

        position = sys.argv.index("--workflow")
        workflows = sys.argv[position + 1].split(",")

        for workflow_candidate in workflows:
            if workflow_candidate not in registry_workflows:
                usage(1, "The workflow %s doesn't exist." % workflow_candidate)

    if num_of_critical_parameter == 1 and num_of_critical_parameterb == 0:

        for name_repository in repositories:
            try:
                oaiharvest_instance = OaiHARVEST.get(
                    OaiHARVEST.name == name_repository).one()
                if oaiharvest_instance.workflows not in registry_workflows:
                    usage(1,
                          "The repository %s doesn't have a valid workflow specified." % name_repository)
            except orm.exc.NoResultFound:
                usage(1,
                      "The repository %s doesn't exist in our database." % name_repository)

    elif num_of_critical_parameter == 1 and num_of_critical_parameterb == 1:

        for name_repository in repositories:
            try:
                OaiHARVEST.get(OaiHARVEST.name == name_repository).one()
            except orm.exc.NoResultFound:
                usage(1,
                      "The repository %s doesn't exist in our database." % name_repository)

        print("A workflow has been specified, overriding the repository one.")

    task_set_option("repository", None)
    task_set_option("dates", None)
    task_set_option("workflow", None)
    task_set_option("identifiers", None)
    task_init(authorization_action='runoaiharvest',
              authorization_msg="oaiharvest Task Submission",
              description="""
Harvest records from OAI sources.
Manual vs automatic harvesting:
   - Manual harvesting retrieves records from the specified URL,
     with the specified OAI arguments. Harvested records are displayed
     on the standard output or saved to a file, but are not integrated
     into the repository. This mode is useful to 'play' with OAI
     repositories or to build special harvesting scripts.
   - Automatic harvesting relies on the settings defined in the OAI
     Harvest admin interface to periodically retrieve the repositories
     and sets to harvest. It also take care of harvesting only new or
     modified records. Records harvested using this mode are converted
     and integrated into the repository, according to the settings
     defined in the OAI Harvest admin interface.

Examples:
Manual (single-shot) harvesting mode:
   Save to /tmp/z.xml records from CDS added/modified between 2004-04-01
   and 2004-04-02, in MARCXML:
     $ oaiharvest -vListRecords -f2004-04-01 -u2004-04-02 -pmarcxml -o/tmp/z.xml http://cds.cern.ch/oai2d
Automatic (periodical) harvesting mode:
   Schedule daily harvesting of all repositories defined in OAIHarvest admin:
     $ oaiharvest -s 24h
   Schedule daily harvesting of repository 'arxiv', defined in OAIHarvest admin:
     $ oaiharvest -r arxiv -s 24h
   Harvest in 10 minutes from 'pubmed' repository records added/modified
   between 2005-05-05 and 2005-05-10:
     $ oaiharvest -r pubmed -d 2005-05-05:2005-05-10 -t 10m
""",

              help_specific_usage='Manual single-shot harvesting mode:\n'
                                  '  -o, --output         specify output file\n'
                                  '  -v, --verb           OAI verb to be executed\n'
                                  '  -m, --method         http method (default POST)\n'
                                  '  -p, --metadataPrefix metadata format\n'
                                  '  -i, --identifier     OAI identifier\n'
                                  '  -s, --set            OAI set(s). Whitespace-separated list\n'
                                  '  -r, --resuptionToken Resume previous harvest\n'
                                  '  -f, --from           from date (datestamp)\n'
                                  '  -u, --until          until date (datestamp)\n'
                                  '  -c, --certificate    path to public certificate (in case of certificate-based harvesting)\n'
                                  '  -k, --key            path to private key (in case of certificate-based harvesting)\n'
                                  '  -l, --user           username (in case of password-protected harvesting)\n'
                                  '  -w, --password       password (in case of password-protected harvesting)\n'
                                  'Deamon mode (periodical or one-shot harvesting mode):\n'
                                  '  -r, --repository="repo A"[,"repo B"] \t which repositories to harvest (default=all)\n'
                                  '  -d, --dates=yyyy-mm-dd:yyyy-mm-dd \t reharvest given dates only\n'
                                  '  -i, --identifier     OAI identifier if wished to run in as a task.\n'
                                  '  --notify-email-to    Receive notifications on given email on successful upload and/or finished harvest.\n'
                                  '  --workflow       specify the workflow to execute.\n'
                                  '  --create-ticket-in   Provide desired ticketing queue to create a ticket in it on upload and/or finished harvest.\n'
                                  '                       Requires a configured ticketing system (BibCatalog).\n',
              specific_params=(
                  "r:i:d:W",
                  ["repository=", "identifier=", "dates=", "workflow=",
                   "notify-email-to=", "create-ticket-in="]),
              task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter,
              task_run_fnc=task_run_core)
Beispiel #37
0
def main():
    """Main that construct all the bibtask."""
    task_init(authorization_action='runbibformat',
              authorization_msg="BibReformat Task Submission",
              description="""
BibReformat formats the records and saves the produced outputs for
later retrieval.

BibReformat is usually run periodically via BibSched in order to (1)
format new records in the database and to (2) reformat records for
which the meta data has been modified.

BibReformat has to be run manually when (3) format config files have
been modified, in order to see the changes in the web interface.

Although it is not necessary to run BibReformat to display formatted
records in the web interface, BibReformat allows to improve serving
speed by precreating the outputs. It is suggested to run
BibReformat for 'HB' output.

Option -m cannot be used at the same time as option -c.
Option -c prevents from finding records in private collections.

Examples:
  bibreformat                    Format all new or modified records (in HB and RECJSON).
  bibreformat -o HD              Format all new or modified records in HD.
  bibreformat -o HD,HB           Format all new or modified records in HD and HB.

  bibreformat -a                 Force reformatting all records (in HB).
  bibreformat -c 'Photos'        Force reformatting all records in 'Photos' collection (in HB).
  bibreformat -c 'Photos' -o HD  Force reformatting all records in 'Photos' collection in HD.

  bibreformat -i 15              Force reformatting record 15 (in HB).
  bibreformat -i 15:20           Force reformatting records 15 to 20 (in HB).
  bibreformat -i 15,16,17        Force reformatting records 15, 16 and 17 (in HB).

  bibreformat -n                 Show how many records are to be (re)formatted.
  bibreformat -n -c 'Articles'   Show how many records are to be (re)formatted in 'Articles' collection.

  bibreformat -oHB -s1h          Format all new and modified records every hour, in HB.
""", help_specific_usage="""  -o,  --formats         \t Specify output format/s (default HB)
  -n,  --noprocess      \t Count records to be formatted (no processing done)
Reformatting options:
  -a,  --all            \t Force reformatting all records
  -c,  --collection     \t Force reformatting records by collection
  -f,  --field          \t Force reformatting records by field
  -p,  --pattern        \t Force reformatting records by pattern
  -i,  --id             \t Force reformatting records by record id(s)
  --no-missing          \t Ignore reformatting records without format
Pattern options:
  -m,  --matching       \t Specify if pattern is exact (e), regular expression (r),
                        \t partial (p), any of the words (o) or all of the words (a)
""",
              version=__revision__,
              specific_params=("ac:f:p:lo:nm:i:",
                               ["all",
                                "collection=",
                                "matching=",
                                "field=",
                                "pattern=",
                                "format=",
                                "noprocess",
                                "id=",
                                "no-missing"]),
              task_submit_check_options_fnc=task_submit_check_options,
              task_submit_elaborate_specific_parameter_fnc=
                 task_submit_elaborate_specific_parameter,
              task_run_fnc=task_run_core)
Beispiel #38
0
def main():
    """Constructs the BibCatalog bibtask."""
    usage = """

  Non-daemon options:

  -l, --list-tickets      List available tickets.


  Scheduled (daemon) options:

  Selection of records (Required):

  -a, --new               Run on all newly inserted records.
  -m, --modified          Run on all newly modified records.
  -i, --recids=           Record id for extraction.
  -c, --collections=      Run on all records in a specific collection.
  -q, --query=            Specify a search query to fetch records to run on.
  -r, --reportnumbers=    Run on all records related with specific arXiv ids.

  Selection of tickets (Required):

  --tickets=         Specify which tickets to run.
  --all-tickets      Run on all tickets

  Examples:
   (run a periodical daemon job on a given ticket template)
      bibcatalog -a --tickets metadata_curation -s1h
   (run all tickets on a set of records)
      bibcatalog --recids 1,2 -i 3 --all-tickets
   (run some tickets on a collection)
      bibcatalog --collections "Articles" --tickets metadata_curation,reference_curation

    """
    try:
        opts, dummy = getopt.getopt(sys.argv[1:], "l", ["list-tickets"])
    except getopt.GetoptError:
        opts = []

    for opt, dummy in opts:
        if opt in ["-l", "--list-tickets"]:
            all_plugins, error_messages = load_ticket_plugins()
            if error_messages:
                # We got broken plugins. We alert only for now.
                print >>sys.stderr, "\n".join(error_messages)
            print "Enabled tickets:"
            for plugin in all_plugins.get_enabled_plugins():
                print " " + plugin
            print "Run `$ bibcatalog --tickets=<ticket-name>` to select a ticket template."
            return

    # Build and submit the task
    task_init(authorization_action='runbibcatalog',
              authorization_msg="BibCatalog Task Submission",
              description="",
              help_specific_usage=usage,
              version="Invenio v%s" % CFG_VERSION,
              specific_params=("hVv:i:c:q:r:am",
                                ["help",
                                 "version",
                                 "verbose=",
                                 "recids=",
                                 "collections=",
                                 "query=",
                                 "reportnumbers=",
                                 "new",
                                 "modified",
                                 "tickets=",
                                 "all-tickets"]),
              task_submit_elaborate_specific_parameter_fnc=task_parse_options,
              task_submit_check_options_fnc=task_check_options,
              task_run_fnc=task_run_core)
Beispiel #39
0
def main():
    """Main that construct all the bibtask."""
    task_init(authorization_action='runbibrank',
            authorization_msg="BibRank Task Submission",
            description="""Ranking examples:
       bibrank -wjif -a --id=0-30000,30001-860000 --verbose=9
       bibrank -wjif -d --modified='2002-10-27 13:57:26'
       bibrank -wjif --rebalance --collection=Articles
       bibrank -wsbr -a -i 234-250,293,300-500 -u admin
       bibrank -u admin -w citation -E 10
       bibrank -u admin -w citation -A
""",
            help_specific_usage="""Ranking options:
 -w, --run=r1[,r2]         runs each rank method in the order given

 -c, --collection=c1[,c2]  select according to collection
 -i, --id=low[-high]       select according to doc recID
 -m, --modified=from[,to]  select according to modification date
 -l, --lastupdate          select according to last update

 -a, --add                 add or update words for selected records
 -d, --del                 delete words for selected records
 -S, --stat                show statistics for a method

 -R, --recalculate         recalculate weight data, used by word frequency
                           and citation methods, should be used if ca 1%
                           of the documents have been changed since last
                           time -R was used.  NOTE: This will replace the
                           entire set of weights, regardless of date/id
                           selection.

 -E, --extcites=NUM        print the top entries of the external cites table.
                           These are entries that should be entered in
                           your collection, since they have been cited
                           by NUM or more other records present in the
                           system.  Useful for cataloguers to input
                           external papers manually.

 -A --author-citations     Calculate author citations.

 Repairing options:
 -k,  --check              check consistency for all records in the table(s)
                           check if update of ranking data is necessary
 -r, --repair              try to repair all records in the table(s)
""",
            version=__revision__,
            specific_params=("AE:ladSi:m:c:kUrRM:f:w:", [
                "author-citations",
                "print-extcites=",
                "lastupdate",
                "add",
                "del",
                "repair",
                "maxmem",
                "flush",
                "stat",
                "rebalance",
                "id=",
                "collection=",
                "check",
                "modified=",
                "update",
                "run="]),
            task_submit_elaborate_specific_parameter_fnc=
                task_submit_elaborate_specific_parameter,
            task_run_fnc=task_run_core)
Beispiel #40
0
def main():
    """Start the tool.

    If the command line arguments are those of the 'manual' mode, then
    starts a manual one-time harvesting. Else trigger a BibSched task
    for automated harvesting based on the OAIHarvest admin settings.
    """
    # Let's try to parse the arguments as used in manual harvesting:
    try:

        opts, args = getopt.getopt(sys.argv[1:], "o:v:m:p:i:s:f:u:r:c:k:l:w:",
                                   [
                                       "output=",
                                       "verb=",
                                       "method=",
                                       "metadataPrefix=",
                                       "identifier=",
                                       "set=",
                                       "from=",
                                       "until=",
                                       "resumptionToken=",
                                       "certificate=",
                                       "key=",
                                       "user="******"password="******"workflow=",
                                   ])

        # So everything went smoothly: start harvesting in manual mode
        if len([opt
                for opt, opt_value in opts if opt in ['-v', '--verb']]) > 0:
            # verb parameter is given
            http_param_dict = {}
            method = "POST"
            output = ""
            user = None
            password = None
            cert_file = None
            key_file = None
            sets = []
            # get options and arguments
            for opt, opt_value in opts:
                if opt in ["-v", "--verb"]:
                    http_param_dict['verb'] = opt_value
                elif opt in ["-m", '--method']:
                    if opt_value == "GET" or opt_value == "POST":
                        method = opt_value
                elif opt in ["-p", "--metadataPrefix"]:
                    http_param_dict['metadataPrefix'] = opt_value
                elif opt in ["-i", "--identifier"]:
                    http_param_dict['identifier'] = opt_value
                elif opt in ["-s", "--set"]:
                    sets = opt_value.split()
                elif opt in ["-f", "--from"]:
                    http_param_dict['from'] = opt_value
                elif opt in ["-u", "--until"]:
                    http_param_dict['until'] = opt_value
                elif opt in ["-r", "--resumptionToken"]:
                    http_param_dict['resumptionToken'] = opt_value
                elif opt in ["-o", "--output"]:
                    output = opt_value
                elif opt in ["-c", "--certificate"]:
                    cert_file = opt_value
                elif opt in ["-k", "--key"]:
                    key_file = opt_value
                elif opt in ["-l", "--user"]:
                    user = opt_value
                elif opt in ["-w", "--password"]:
                    password = opt_value
                elif opt in ["-V", "--version"]:
                    print(__revision__)
                    sys.exit(0)
                else:
                    usage(1, "Option %s is not allowed" % opt)

            if len(args) > 0:
                base_url = args[-1]
                if not base_url.lower().startswith('http'):
                    base_url = 'http://' + base_url
                (addressing_scheme, network_location, path, dummy1, dummy2,
                 dummy3) = urllib.parse.urlparse(base_url)
                secure = (addressing_scheme == "https")

                if (cert_file and not key_file) or \
                        (key_file and not cert_file):
                    # Both are needed if one specified
                    usage(1, "You must specify both certificate and key files")

                if password and not user:
                    # User must be specified when password is given
                    usage(1, "You must specify a username")
                elif user and not password:
                    if not secure:
                        sys.stderr.write(
                            "*WARNING* Your password will be sent in clear!\n")
                    try:
                        password = getpass.getpass()
                    except KeyboardInterrupt as error:
                        sys.stderr.write("\n%s\n" % (error, ))
                        sys.exit(0)

                getter.harvest(network_location, path, http_param_dict, method,
                               output, sets, secure, user, password, cert_file,
                               key_file)

                sys.stderr.write(
                    "Harvesting completed at: %s\n\n" %
                    time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime()))
                return
            else:
                usage(1, "You must specify the URL to harvest")
        else:
            # verb is not given. We will continue with periodic
            # harvesting. But first check if URL parameter is given:
            # if it is, then warn directly now

            if len([opt for opt, opt_value in opts if
                    opt in ['-i', '--identifier']]) == 0 \
                and len(args) > 1 or \
                    (len(args) == 1 and not args[0].isdigit()):
                usage(1, "You must specify the --verb parameter")
    except getopt.error:
        # So could it be that we are using different arguments? Try to
        # start the BibSched task (automated harvesting) and see if it
        # validates
        pass
        # BibSched mode - periodical harvesting
    # Note that the 'help' is common to both manual and automated
    # mode.

    num_of_critical_parameter = 0
    num_of_critical_parameterb = 0
    repositories = []

    for opt in sys.argv[1:]:
        if opt in "-r" or opt in "--repository":
            num_of_critical_parameter += 1
        elif opt in "--workflow":
            num_of_critical_parameterb += 1
        if num_of_critical_parameter > 1 or num_of_critical_parameterb > 1:
            usage(1, "You can't specify twice -r or --workflow")

    if num_of_critical_parameter == 1:
        if "-r" in sys.argv:
            position = sys.argv.index("-r")
        else:
            position = sys.argv.index("--repository")
        repositories = sys.argv[position + 1].split(",")
        if len(repositories) > 1 and \
                ("-i" in sys.argv or "--identifier" in sys.argv):
            usage(
                1, "It is impossible to harvest an identifier from several "
                "repositories.")

    if num_of_critical_parameterb == 1:

        position = sys.argv.index("--workflow")
        workflows = sys.argv[position + 1].split(",")

        for workflow_candidate in workflows:
            if workflow_candidate not in registry_workflows:
                usage(1, "The workflow %s doesn't exist." % workflow_candidate)

    if num_of_critical_parameter == 1 and num_of_critical_parameterb == 0:

        for name_repository in repositories:
            try:
                oaiharvest_instance = OaiHARVEST.get(
                    OaiHARVEST.name == name_repository).one()
                if oaiharvest_instance.workflows not in registry_workflows:
                    usage(
                        1,
                        "The repository %s doesn't have a valid workflow specified."
                        % name_repository)
            except orm.exc.NoResultFound:
                usage(
                    1, "The repository %s doesn't exist in our database." %
                    name_repository)

    elif num_of_critical_parameter == 1 and num_of_critical_parameterb == 1:

        for name_repository in repositories:
            try:
                OaiHARVEST.get(OaiHARVEST.name == name_repository).one()
            except orm.exc.NoResultFound:
                usage(
                    1, "The repository %s doesn't exist in our database." %
                    name_repository)

        print("A workflow has been specified, overriding the repository one.")

    task_set_option("repository", None)
    task_set_option("dates", None)
    task_set_option("workflow", None)
    task_set_option("identifiers", None)
    task_init(
        authorization_action='runoaiharvest',
        authorization_msg="oaiharvest Task Submission",
        description="""
Harvest records from OAI sources.
Manual vs automatic harvesting:
   - Manual harvesting retrieves records from the specified URL,
     with the specified OAI arguments. Harvested records are displayed
     on the standard output or saved to a file, but are not integrated
     into the repository. This mode is useful to 'play' with OAI
     repositories or to build special harvesting scripts.
   - Automatic harvesting relies on the settings defined in the OAI
     Harvest admin interface to periodically retrieve the repositories
     and sets to harvest. It also take care of harvesting only new or
     modified records. Records harvested using this mode are converted
     and integrated into the repository, according to the settings
     defined in the OAI Harvest admin interface.

Examples:
Manual (single-shot) harvesting mode:
   Save to /tmp/z.xml records from CDS added/modified between 2004-04-01
   and 2004-04-02, in MARCXML:
     $ oaiharvest -vListRecords -f2004-04-01 -u2004-04-02 -pmarcxml -o/tmp/z.xml http://cds.cern.ch/oai2d
Automatic (periodical) harvesting mode:
   Schedule daily harvesting of all repositories defined in OAIHarvest admin:
     $ oaiharvest -s 24h
   Schedule daily harvesting of repository 'arxiv', defined in OAIHarvest admin:
     $ oaiharvest -r arxiv -s 24h
   Harvest in 10 minutes from 'pubmed' repository records added/modified
   between 2005-05-05 and 2005-05-10:
     $ oaiharvest -r pubmed -d 2005-05-05:2005-05-10 -t 10m
""",
        help_specific_usage='Manual single-shot harvesting mode:\n'
        '  -o, --output         specify output file\n'
        '  -v, --verb           OAI verb to be executed\n'
        '  -m, --method         http method (default POST)\n'
        '  -p, --metadataPrefix metadata format\n'
        '  -i, --identifier     OAI identifier\n'
        '  -s, --set            OAI set(s). Whitespace-separated list\n'
        '  -r, --resuptionToken Resume previous harvest\n'
        '  -f, --from           from date (datestamp)\n'
        '  -u, --until          until date (datestamp)\n'
        '  -c, --certificate    path to public certificate (in case of certificate-based harvesting)\n'
        '  -k, --key            path to private key (in case of certificate-based harvesting)\n'
        '  -l, --user           username (in case of password-protected harvesting)\n'
        '  -w, --password       password (in case of password-protected harvesting)\n'
        'Deamon mode (periodical or one-shot harvesting mode):\n'
        '  -r, --repository="repo A"[,"repo B"] \t which repositories to harvest (default=all)\n'
        '  -d, --dates=yyyy-mm-dd:yyyy-mm-dd \t reharvest given dates only\n'
        '  -i, --identifier     OAI identifier if wished to run in as a task.\n'
        '  --notify-email-to    Receive notifications on given email on successful upload and/or finished harvest.\n'
        '  --workflow       specify the workflow to execute.\n'
        '  --create-ticket-in   Provide desired ticketing queue to create a ticket in it on upload and/or finished harvest.\n'
        '                       Requires a configured ticketing system (BibCatalog).\n',
        specific_params=("r:i:d:W", [
            "repository=", "identifier=", "dates=", "workflow=",
            "notify-email-to=", "create-ticket-in="
        ]),
        task_submit_elaborate_specific_parameter_fnc=
        task_submit_elaborate_specific_parameter,
        task_run_fnc=task_run_core)
Beispiel #41
0
def bibauthorid_daemon():
    """Constructs the Bibauthorid bibtask."""
    bibtask.task_init(authorization_action='runbibclassify',
        authorization_msg="Bibauthorid Task Submission",
        description="""
Purpose:
  Disambiguate Authors and find their identities.
Examples:
  - Process all records that hold an author with last name 'Ellis':
      $ bibauthorid -u admin --update-personid --all-records
  - Disambiguate all records on a fresh installation
      $ bibauthorid -u admin --disambiguate --from-scratch
""",
        help_specific_usage="""
  bibauthorid [COMMAND] [OPTIONS]

  COMMAND
    You can choose only one from the following:
      --update-personid     Updates personid adding not yet assigned papers
                            to the system, in a fast, best effort basis.
                            Cleans the table from stale records.

      --disambiguate        Disambiguates all signatures in the database
                            using the tortoise/wedge algorithm. This usually
                            takes a LOT of time so the results are stored in
                            a special table. Use --merge to use the results.

      --merge               Updates the personid tables with the results from
                            the --disambiguate algorithm.

  OPTIONS
    Options for update personid
      (default)             Will update only the modified records since last
                            run.

      -i, --record-ids      Force the procedure to work only on the specified
                            records. This option is exclusive with --all-records.

      --all-records         Force the procedure to work on all records. This
                            option is exclusive with --record-ids.

    Options for disambiguate
      (default)             Performs full disambiguation of all records in the
                            current personid tables with respect to the user
                            decisions.

      --from-scratch        Ignores the current information in the personid
                            tables and disambiguates everything from scratch.

    There are no options for the merger.
""",
        version="Invenio Bibauthorid v%s" % bconfig.VERSION,
        specific_params=("i:",
            [
             "record-ids=",
             "disambiguate",
             "merge",
             "all-records",
             "update-personid",
             "from-scratch"
            ]),
        task_submit_elaborate_specific_parameter_fnc=_task_submit_elaborate_specific_parameter,
        task_submit_check_options_fnc=_task_submit_check_options,
        task_run_fnc=_task_run_core)