Exemple #1
0
def task_run_core():
    """Run the indexing task. The row argument is the BibSched task
    queue row, containing if, arguments, etc.
    Return 1 in case of success and 0 in case of failure.
    """
    if not task_get_option("run"):
        task_set_option("run", [name[0] for name in run_sql("SELECT name from rnkMETHOD")])

    for key in task_get_option("run"):
        task_sleep_now_if_required(can_stop_too=True)
        write_message("")
        filename = configuration.get(key + '.cfg', '')
        write_message("Getting configuration from file: %s" % filename,
            verbose=9)
        config = ConfigParser.ConfigParser()
        try:
            config.readfp(open(filename))
        except StandardError:
            write_message("Cannot find configuration file: %s. "
                "The rankmethod may also not be registered using "
                "the BibRank Admin Interface." % filename, sys.stderr)
            raise

        #Using the function variable to call the function related to the
        #rank method
        cfg_function = config.get("rank_method", "function")
        func_object = globals().get(cfg_function)
        if func_object:
            func_object(key)
        else:
            write_message("Cannot run method '%s', no function to call"
                % key)

    return True
Exemple #2
0
def task_submit_check_options():
    """Last checks and updating on the options..."""
    if not (task_has_option('all') or task_has_option('collection')
            or task_has_option('field') or task_has_option('pattern')
            or task_has_option('matching') or task_has_option('recids')):
        task_set_option('last', 1)
    return True
Exemple #3
0
def task_submit_elaborate_specific_parameter(key, value, opts, args):
    """Usual 'elaboration' of task specific parameters adapted to the bibexport task."""
    if key in ("-w", "--wjob"):
        task_set_option("wjob", value)
    else:
        return False
    return True
Exemple #4
0
def task_submit_elaborate_specific_parameter(key, value, opts, args):
    """Usual 'elaboration' of task specific parameters adapted to the bibexport task."""
    if key in ("-w", "--wjob"):
        task_set_option("wjob", value)
    else:
        return False
    return True
Exemple #5
0
def task_submit_check_options():
    """Last checks and updating on the options..."""
    if not (task_has_option('all') or task_has_option('collection')
            or task_has_option('field') or task_has_option('pattern')
            or task_has_option('matching') or task_has_option('recids')):
        task_set_option('last', 1)
    return True
Exemple #6
0
def parse_option(key, value, dummy, args):
    """Parse command line options"""

    if args:
        # There should be no standalone arguments for any refextract job
        # This will catch args before the job is shipped to Bibsched
        raise StandardError("Error: Unrecognised argument '%s'." % args[0])

    if key in ('-a', '--new'):
        task_set_option('new', True)
    elif key in ('-m', '--modified'):
        task_set_option('modified', True)
    elif key == '--rebuild':
        task_set_option('rebuild', True)
    elif key in ('-c', '--collections'):
        collections = task_get_option('collections')
        if not collections:
            collections = set()
            task_set_option('collections', collections)
        collections.update(split_cli_ids_arg(value))
    elif key in ('-r', '--recids'):
        recids = task_get_option('recids')
        if not recids:
            recids = set()
            task_set_option('recids', recids)
        recids.update(split_cli_ids_arg(value))

    return True
Exemple #7
0
def task_submit_elaborate_specific_parameter(key, value, opts, args):
    """ Given the string key, checks its meaning and returns True if
        has elaborated the key.
        Possible keys:
    """
    if key in ('-d', '--documents'):
        task_set_option('documents', "documents")
        return True
    elif key in ('-m', '--metadata'):
        task_set_option('metadata', "metadata")
        return True
    return False
Exemple #8
0
def task_submit_elaborate_specific_parameter(key, value, opts, args):
    """ Given the string key, checks its meaning and returns True if
        has elaborated the key.
        Possible keys:
    """
    if key in ('-d', '--documents'):
        task_set_option('documents', "documents")
        return True
    elif key in ('-m', '--metadata'):
        task_set_option('metadata', "metadata")
        return True
    return False
Exemple #9
0
def task_submit_check_options():
    if not task_get_option('logs') and \
       not task_get_option('tempfiles') and \
       not task_get_option('guests') and \
       not task_get_option('bibxxx') and \
       not task_get_option('documents') and \
       not task_get_option('cache') and \
       not task_get_option('tasks') and \
       not task_get_option('check-tables') and \
       not task_get_option('optimise-tables'):
        task_set_option('sessions', True)
    return True
Exemple #10
0
def _task_submit_elaborate_specific_parameter(key, value, opts, args):
    """Given the string key it checks it's meaning, eventually using the
    value. Usually it fills some key in the options dict.
    It must return True if it has elaborated the key, False, if it doesn't
    know that key.
    eg:
    if key in ('-n', '--number'):
        bibtask.task_get_option(\1) = value
        return True
    return False
    """
    # Recid option
    if key in ("-i", "--recid"):
        try:
            value = int(value)
        except ValueError:
            bibtask.write_message("The value specified for --recid must be a "
                                  "valid integer, not '%s'." % value,
                                  stream=sys.stderr,
                                  verbose=0)
        if not _recid_exists(value):
            bibtask.write_message("ERROR: '%s' is not a valid record ID." %
                                  value,
                                  stream=sys.stderr,
                                  verbose=0)
            return False
        recids = bibtask.task_get_option('recids')
        if recids is None:
            recids = []
        recids.append(value)
        bibtask.task_set_option('recids', recids)

    # Collection option
    elif key in ("-c", "--collection"):
        if not _collection_exists(value):
            bibtask.write_message("ERROR: '%s' is not a valid collection." %
                                  value,
                                  stream=sys.stderr,
                                  verbose=0)
            return False
        collections = bibtask.task_get_option("collections")
        collections = collections or []
        collections.append(value)
        bibtask.task_set_option("collections", collections)

    # Taxonomy option
    elif key in ("-k", "--taxonomy"):
        if not _ontology_exists(value):
            bibtask.write_message("ERROR: '%s' is not a valid taxonomy name." %
                                  value,
                                  stream=sys.stderr,
                                  verbose=0)
            return False
        bibtask.task_set_option("taxonomy", value)
    elif key in ("-f", "--force"):
        bibtask.task_set_option("force", True)
    else:
        return False

    return True
Exemple #11
0
def cb_parse_option(key, value, opts, args):
    """Parse command line options"""
    if args:
        # There should be no standalone arguments
        raise StandardError("Error: Unrecognised argument '%s'." % args[0])

    if key in ("-i", "--id"):
        recids = task_get_option("recids")
        if not recids:
            recids = set()
            task_set_option("recids", recids)
        recids.update(split_cli_ids_arg(value))

    return True
Exemple #12
0
def task_submit_check_options():
    if not task_get_option('logs') and \
       not task_get_option('tempfiles') and \
       not task_get_option('guests') and \
       not task_get_option('bibxxx') and \
       not task_get_option('documents') and \
       not task_get_option('cache') and \
       not task_get_option('tasks') and \
       not task_get_option('check-tables') and \
       not task_get_option('sessions') and \
       not task_get_option('optimise-tables') and \
       not task_get_option('bibedit-cache'):
        task_set_option('sessions', True)
    return True
Exemple #13
0
def cb_parse_option(key, value, opts, args):
    """Parse command line options"""
    if args:
        # There should be no standalone arguments
        raise StandardError("Error: Unrecognised argument '%s'." % args[0])

    if key in ('-i', '--id'):
        recids = task_get_option('recids')
        if not recids:
            recids = set()
            task_set_option('recids', recids)
        recids.update(split_cli_ids_arg(value))

    return True
Exemple #14
0
def task_check_options():
    """ Reimplement this method for having the possibility to check options
    before submitting the task, in order for example to provide default
    values. It must return False if there are errors in the options.
    """
    if not task_get_option('new') \
            and not task_get_option('modified') \
            and not task_get_option('recids') \
            and not task_get_option('collections')\
            and not task_get_option('reportnumbers'):
        print >>sys.stderr, 'Error: No records specified, you need' \
            ' to specify which records to run on'
        return False

    ticket_plugins = {}
    all_plugins, error_messages = load_ticket_plugins()

    if error_messages:
        # We got broken plugins. We alert only for now.
        print >>sys.stderr, "\n".join(error_messages)

    if task_get_option('tickets'):
        # Tickets specified
        for ticket in task_get_option('tickets'):
            if ticket not in all_plugins.get_enabled_plugins():
                print ticket
                print >>sys.stderr, 'Error: plugin %s is broken or does not exist'
                return False
            ticket_plugins[ticket] = all_plugins[ticket]
    elif task_get_option('all-tickets'):
        ticket_plugins = all_plugins.get_enabled_plugins()
    else:
        print >>sys.stderr, 'Error: No tickets specified, you need' \
            ' to specify at least one ticket type to create'
        return False

    task_set_option('tickets', ticket_plugins)

    if not BIBCATALOG_SYSTEM:
        print >>sys.stderr, 'Error: no cataloging system defined'
        return False

    res = BIBCATALOG_SYSTEM.check_system()
    if res:
        print >>sys.stderr, 'Error while checking cataloging system: %s' % \
            (res,)
    return True
Exemple #15
0
def _task_submit_elaborate_specific_parameter(key, value, opts, args):
    """Given the string key it checks it's meaning, eventually using the
    value. Usually it fills some key in the options dict.
    It must return True if it has elaborated the key, False, if it doesn't
    know that key.
    eg:
    if key in ('-n', '--number'):
        bibtask.task_get_option(\1) = value
        return True
    return False
    """
    # Recid option
    if key in ("-i", "--recid"):
        try:
            value = int(value)
        except ValueError:
            bibtask.write_message("The value specified for --recid must be a "
                                  "valid integer, not '%s'." % value,
                                  stream=sys.stderr,
                                  verbose=0)
        if not _recid_exists(value):
            bibtask.write_message(
                "ERROR: '%s' is not a valid record ID." % value,
                stream=sys.stderr, verbose=0)
            return False
        recids = bibtask.task_get_option('recids')
        if recids is None:
            recids = []
        recids.append(value)
        bibtask.task_set_option('recids', recids)

    # Collection option
    elif key in ("-c", "--collection"):
        if not _collection_exists(value):
            bibtask.write_message(
                "ERROR: '%s' is not a valid collection." % value,
                stream=sys.stderr, verbose=0)
            return False
        collections = bibtask.task_get_option("collections")
        collections = collections or []
        collections.append(value)
        bibtask.task_set_option("collections", collections)

    # Taxonomy option
    elif key in ("-k", "--taxonomy"):
        if not _ontology_exists(value):
            bibtask.write_message(
                "ERROR: '%s' is not a valid taxonomy name." % value,
                stream=sys.stderr, verbose=0)
            return False
        bibtask.task_set_option("taxonomy", value)
    elif key in ("-f", "--force"):
        bibtask.task_set_option("force", True)
    else:
        return False

    return True
Exemple #16
0
def task_check_options():
    """ Reimplement this method for having the possibility to check options
    before submitting the task, in order for example to provide default
    values. It must return False if there are errors in the options.
    """
    if not task_get_option('new') \
            and not task_get_option('modified') \
            and not task_get_option('recids') \
            and not task_get_option('collections')\
            and not task_get_option('reportnumbers'):
        print >>sys.stderr, 'Error: No records specified, you need' \
            ' to specify which records to run on'
        return False

    ticket_plugins = {}
    all_plugins, error_messages = load_ticket_plugins()

    if error_messages:
        # We got broken plugins. We alert only for now.
        print >>sys.stderr, "\n".join(error_messages)

    if task_get_option('tickets'):
        # Tickets specified
        for ticket in task_get_option('tickets'):
            if ticket not in all_plugins.get_enabled_plugins():
                print ticket
                print >>sys.stderr, 'Error: plugin %s is broken or does not exist'
                return False
            ticket_plugins[ticket] = all_plugins[ticket]
    elif task_get_option('all-tickets'):
        ticket_plugins = all_plugins.get_enabled_plugins()
    else:
        print >>sys.stderr, 'Error: No tickets specified, you need' \
            ' to specify at least one ticket type to create'
        return False

    task_set_option('tickets', ticket_plugins)

    if not BIBCATALOG_SYSTEM:
        print >>sys.stderr, 'Error: no cataloging system defined'
        return False

    res = BIBCATALOG_SYSTEM.check_system()
    if res:
        print >>sys.stderr, 'Error while checking cataloging system: %s' % \
            (res,)
    return True
Exemple #17
0
def task_submit_elaborate_specific_parameter(key, value, opts, args):
    """ Given the string key it checks it's meaning, eventually using the
    value. Usually it fills some key in the options dict.
    It must return True if it has elaborated the key, False, if it doesn't
    know that key.
    eg:
    if key in ('-n', '--number'):
        task_set_option('number', value)
        return True
    return False
    """
    if key in ('-n', '--number'):
        task_set_option('number', value)
        return True
    elif key in ('-e', '--error'):
        task_set_option('error', True)
        return True
    return False
Exemple #18
0
def task_submit_elaborate_specific_parameter(key, value, opts, args):
    """ Given the string key it checks it's meaning, eventually using the
    value. Usually it fills some key in the options dict.
    It must return True if it has elaborated the key, False, if it doesn't
    know that key.
    eg:
    if key in ('-n', '--number'):
        task_set_option('number', value)
        return True
    return False
    """
    if key in ('-n', '--number'):
        task_set_option('number', value)
        return True
    elif key in ('-e', '--error'):
        task_set_option('error', True)
        return True
    return False
Exemple #19
0
def _dbdump_elaborate_submit_param(key, value, dummyopts, dummyargs):
    """
    Elaborate task submission parameter.  See bibtask's
    task_submit_elaborate_specific_parameter_fnc for help.
    """
    if key in ('-n', '--number'):
        try:
            task_set_option('number', int(value))
        except ValueError:
            raise StandardError("ERROR: Number '%s' is not integer." % value)
    elif key in ('-o', '--output'):
        if os.path.isdir(value):
            task_set_option('output', value)
        else:
            raise StandardError("ERROR: Output '%s' is not a directory." % \
                  value)
    else:
        return False
    return True
Exemple #20
0
def task_submit_elaborate_specific_parameter(key, _value, _opts, _args):
    """Elaborate specific CLI parameters of oairepositoryupdater"""
    if key in ("-r", "--report"):
        task_set_option("report", 1)
    if key in ("-d", "--detailed-report"):
        task_set_option("report", 2)
    elif key in ("-n", "--no-process"):
        task_set_option("no_upload", 1)
    elif key in ("--notimechange",):
        task_set_option("notimechange", 1)
    else:
        return False
    return True
Exemple #21
0
def task_submit_elaborate_specific_parameter(key, _value, _opts, _args):
    """Elaborate specific CLI parameters of oairepositoryupdater"""
    if key in ("-r", "--report"):
        task_set_option("report", 1)
    if key in ("-d", "--detailed-report"):
        task_set_option("report", 2)
    elif key in ("-n", "--no-process"):
        task_set_option("no_upload", 1)
    elif key in ("--notimechange", ):
        task_set_option("notimechange", 1)
    else:
        return False
    return True
Exemple #22
0
def task_submit_esp(key, value, opts, args):
    """
    Checks each possible option to see if one was passed
    and sets the value accordingly
    @returns: True
    """

    if key in ('-r', '--record'):
        task_set_option('record', value)

    if key in ('-m', '--mount'):
        task_set_option('mount', value)

    if key in ('-d', '--delete'):
        task_set_option('delete', value)

    if key in ('-D', '--delete-all'):
        task_set_option('delete_all', value)

    if key in ('--PURGE'):
        task_set_option('purge', True)

    return True
Exemple #23
0
def task_submit_elaborate_specific_parameter(key, value,
                                             dummy_opts, dummy_args):
    """Check meaning of given string key.

    Eventually use the value for check. Usually it fills some key in the
    options dict. It must return True if it has elaborated the key, False,
    if it doesn't know that key.

    Example:

    .. code-block:: python

        if key in ('-n', '--number'):
            task_set_option('number', value)
            return True
        return False
    """
    if key in ('-T', '--tasklet'):
        task_set_option('tasklet', value)
        return True
    elif key in ('-a', '--argument'):
        arguments = task_get_option('arguments', {})
        try:
            key, value = value.split('=', 1)
        except NameError:
            print('ERROR: an argument must be in the form '
                  'param=value, not "%s"' % (value, ),
                  file=sys.stderr)
            return False
        arguments[key] = value
        task_set_option('arguments', arguments)
        return True
    elif key in ('-l', '--list-tasklets'):
        cli_list_tasklets()
        return True
    return False
Exemple #24
0
def task_submit_elaborate_specific_parameter(key, value, opts, args):
    """ Given the string key, checks its meaning and returns True if
        has elaborated the key.
        Possible keys:
    """
    write_message(key)
    if key in ('-o', '--overdue-letters'):
        task_set_option('overdue-letters', True)
    elif key in ('-b', '--update-borrowers'):
        task_set_option('update-borrowers', True)
    elif key in ('-r', '--update-requests'):
        task_set_option('update-requests', True)
    else:
        return False
    return True
Exemple #25
0
def main():
    """Start the tool.

    If the command line arguments are those of the 'manual' mode, then
    starts a manual one-time harvesting. Else trigger a BibSched task
    for automated harvesting based on the OAIHarvest admin settings.
    """
    # Let's try to parse the arguments as used in manual harvesting:
    try:

        opts, args = getopt.getopt(sys.argv[1:], "o:v:m:p:i:s:f:u:r:c:k:l:w:",
                                   ["output=",
                                    "verb=",
                                    "method=",
                                    "metadataPrefix=",
                                    "identifier=",
                                    "set=",
                                    "from=",
                                    "until=",
                                    "resumptionToken=",
                                    "certificate=",
                                    "key=",
                                    "user="******"password="******"workflow=",
                                    ])

        # So everything went smoothly: start harvesting in manual mode
        if len([opt for opt, opt_value in opts
                if opt in ['-v', '--verb']]) > 0:
            # verb parameter is given
            http_param_dict = {}
            method = "POST"
            output = ""
            user = None
            password = None
            cert_file = None
            key_file = None
            sets = []
            # get options and arguments
            for opt, opt_value in opts:
                if opt in ["-v", "--verb"]:
                    http_param_dict['verb'] = opt_value
                elif opt in ["-m", '--method']:
                    if opt_value == "GET" or opt_value == "POST":
                        method = opt_value
                elif opt in ["-p", "--metadataPrefix"]:
                    http_param_dict['metadataPrefix'] = opt_value
                elif opt in ["-i", "--identifier"]:
                    http_param_dict['identifier'] = opt_value
                elif opt in ["-s", "--set"]:
                    sets = opt_value.split()
                elif opt in ["-f", "--from"]:
                    http_param_dict['from'] = opt_value
                elif opt in ["-u", "--until"]:
                    http_param_dict['until'] = opt_value
                elif opt in ["-r", "--resumptionToken"]:
                    http_param_dict['resumptionToken'] = opt_value
                elif opt in ["-o", "--output"]:
                    output = opt_value
                elif opt in ["-c", "--certificate"]:
                    cert_file = opt_value
                elif opt in ["-k", "--key"]:
                    key_file = opt_value
                elif opt in ["-l", "--user"]:
                    user = opt_value
                elif opt in ["-w", "--password"]:
                    password = opt_value
                elif opt in ["-V", "--version"]:
                    print(__revision__)
                    sys.exit(0)
                else:
                    usage(1, "Option %s is not allowed" % opt)

            if len(args) > 0:
                base_url = args[-1]
                if not base_url.lower().startswith('http'):
                    base_url = 'http://' + base_url
                (addressing_scheme, network_location, path, dummy1,
                 dummy2, dummy3) = urllib.parse.urlparse(base_url)
                secure = (addressing_scheme == "https")

                if (cert_file and not key_file) or \
                        (key_file and not cert_file):
                    # Both are needed if one specified
                    usage(1, "You must specify both certificate and key files")

                if password and not user:
                    # User must be specified when password is given
                    usage(1, "You must specify a username")
                elif user and not password:
                    if not secure:
                        sys.stderr.write(
                            "*WARNING* Your password will be sent in clear!\n")
                    try:
                        password = getpass.getpass()
                    except KeyboardInterrupt as error:
                        sys.stderr.write("\n%s\n" % (error,))
                        sys.exit(0)

                getter.harvest(network_location, path,
                               http_param_dict, method,
                               output, sets, secure, user,
                               password, cert_file,
                               key_file)

                sys.stderr.write("Harvesting completed at: %s\n\n" %
                                 time.strftime("%Y-%m-%d %H:%M:%S --> ",
                                               time.localtime()))
                return
            else:
                usage(1, "You must specify the URL to harvest")
        else:
            # verb is not given. We will continue with periodic
            # harvesting. But first check if URL parameter is given:
            # if it is, then warn directly now

            if len([opt for opt, opt_value in opts if
                    opt in ['-i', '--identifier']]) == 0 \
                and len(args) > 1 or \
                    (len(args) == 1 and not args[0].isdigit()):
                usage(1, "You must specify the --verb parameter")
    except getopt.error:
        # So could it be that we are using different arguments? Try to
        # start the BibSched task (automated harvesting) and see if it
        # validates
        pass
        # BibSched mode - periodical harvesting
    # Note that the 'help' is common to both manual and automated
    # mode.

    num_of_critical_parameter = 0
    num_of_critical_parameterb = 0
    repositories = []

    for opt in sys.argv[1:]:
        if opt in "-r" or opt in "--repository":
            num_of_critical_parameter += 1
        elif opt in "--workflow":
            num_of_critical_parameterb += 1
        if num_of_critical_parameter > 1 or num_of_critical_parameterb > 1:
            usage(1, "You can't specify twice -r or --workflow")

    if num_of_critical_parameter == 1:
        if "-r" in sys.argv:
            position = sys.argv.index("-r")
        else:
            position = sys.argv.index("--repository")
        repositories = sys.argv[position + 1].split(",")
        if len(repositories) > 1 and \
                ("-i" in sys.argv or "--identifier" in sys.argv):
            usage(1,
                  "It is impossible to harvest an identifier from several "
                  "repositories.")

    if num_of_critical_parameterb == 1:

        position = sys.argv.index("--workflow")
        workflows = sys.argv[position + 1].split(",")

        for workflow_candidate in workflows:
            if workflow_candidate not in registry_workflows:
                usage(1, "The workflow %s doesn't exist." % workflow_candidate)

    if num_of_critical_parameter == 1 and num_of_critical_parameterb == 0:

        for name_repository in repositories:
            try:
                oaiharvest_instance = OaiHARVEST.get(
                    OaiHARVEST.name == name_repository).one()
                if oaiharvest_instance.workflows not in registry_workflows:
                    usage(1,
                          "The repository %s doesn't have a valid workflow specified." % name_repository)
            except orm.exc.NoResultFound:
                usage(1,
                      "The repository %s doesn't exist in our database." % name_repository)

    elif num_of_critical_parameter == 1 and num_of_critical_parameterb == 1:

        for name_repository in repositories:
            try:
                OaiHARVEST.get(OaiHARVEST.name == name_repository).one()
            except orm.exc.NoResultFound:
                usage(1,
                      "The repository %s doesn't exist in our database." % name_repository)

        print("A workflow has been specified, overriding the repository one.")

    task_set_option("repository", None)
    task_set_option("dates", None)
    task_set_option("workflow", None)
    task_set_option("identifiers", None)
    task_init(authorization_action='runoaiharvest',
              authorization_msg="oaiharvest Task Submission",
              description="""
Harvest records from OAI sources.
Manual vs automatic harvesting:
   - Manual harvesting retrieves records from the specified URL,
     with the specified OAI arguments. Harvested records are displayed
     on the standard output or saved to a file, but are not integrated
     into the repository. This mode is useful to 'play' with OAI
     repositories or to build special harvesting scripts.
   - Automatic harvesting relies on the settings defined in the OAI
     Harvest admin interface to periodically retrieve the repositories
     and sets to harvest. It also take care of harvesting only new or
     modified records. Records harvested using this mode are converted
     and integrated into the repository, according to the settings
     defined in the OAI Harvest admin interface.

Examples:
Manual (single-shot) harvesting mode:
   Save to /tmp/z.xml records from CDS added/modified between 2004-04-01
   and 2004-04-02, in MARCXML:
     $ oaiharvest -vListRecords -f2004-04-01 -u2004-04-02 -pmarcxml -o/tmp/z.xml http://cds.cern.ch/oai2d
Automatic (periodical) harvesting mode:
   Schedule daily harvesting of all repositories defined in OAIHarvest admin:
     $ oaiharvest -s 24h
   Schedule daily harvesting of repository 'arxiv', defined in OAIHarvest admin:
     $ oaiharvest -r arxiv -s 24h
   Harvest in 10 minutes from 'pubmed' repository records added/modified
   between 2005-05-05 and 2005-05-10:
     $ oaiharvest -r pubmed -d 2005-05-05:2005-05-10 -t 10m
""",

              help_specific_usage='Manual single-shot harvesting mode:\n'
                                  '  -o, --output         specify output file\n'
                                  '  -v, --verb           OAI verb to be executed\n'
                                  '  -m, --method         http method (default POST)\n'
                                  '  -p, --metadataPrefix metadata format\n'
                                  '  -i, --identifier     OAI identifier\n'
                                  '  -s, --set            OAI set(s). Whitespace-separated list\n'
                                  '  -r, --resuptionToken Resume previous harvest\n'
                                  '  -f, --from           from date (datestamp)\n'
                                  '  -u, --until          until date (datestamp)\n'
                                  '  -c, --certificate    path to public certificate (in case of certificate-based harvesting)\n'
                                  '  -k, --key            path to private key (in case of certificate-based harvesting)\n'
                                  '  -l, --user           username (in case of password-protected harvesting)\n'
                                  '  -w, --password       password (in case of password-protected harvesting)\n'
                                  'Deamon mode (periodical or one-shot harvesting mode):\n'
                                  '  -r, --repository="repo A"[,"repo B"] \t which repositories to harvest (default=all)\n'
                                  '  -d, --dates=yyyy-mm-dd:yyyy-mm-dd \t reharvest given dates only\n'
                                  '  -i, --identifier     OAI identifier if wished to run in as a task.\n'
                                  '  --notify-email-to    Receive notifications on given email on successful upload and/or finished harvest.\n'
                                  '  --workflow       specify the workflow to execute.\n'
                                  '  --create-ticket-in   Provide desired ticketing queue to create a ticket in it on upload and/or finished harvest.\n'
                                  '                       Requires a configured ticketing system (BibCatalog).\n',
              specific_params=(
                  "r:i:d:W",
                  ["repository=", "identifier=", "dates=", "workflow=",
                   "notify-email-to=", "create-ticket-in="]),
              task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter,
              task_run_fnc=task_run_core)
Exemple #26
0
def task_submit_elaborate_specific_parameter(key, value, opts, dummy):
    """Elaborate a specific parameter of CLI bibrank."""
    if key in ("-a", "--add"):
        task_set_option("cmd", "add")
        if ("-x","") in opts or ("--del","") in opts:
            raise StandardError, "--add incompatible with --del"
    elif key in ("--run", "-w"):
        task_set_option("run", [])
        run = value.split(",")
        for run_key in range(0, len(run)):
            task_get_option('run').append(run[run_key])
    elif key in ("-r", "--repair"):
        task_set_option("cmd", "repair")
    elif key in ("-E", "--print-extcites"):
        try:
            task_set_option("print-extcites", int(value))
        except:
            task_set_option("print-extcites", 10) # default fallback value
        task_set_option("cmd", "print-missing")
    elif key in ("-A", "--author-citations"):
        task_set_option("author-citations", "1")
    elif key in ("-d", "--del"):
        task_set_option("cmd", "del")
    elif key in ("-k", "--check"):
        task_set_option("cmd", "check")
    elif key in ("-S", "--stat"):
        task_set_option("cmd", "stat")
    elif key in ("-i", "--id"):
        task_set_option("id", task_get_option("id") + split_ranges(value))
        task_set_option("last_updated", "")
    elif key in ("-c", "--collection"):
        task_set_option("collection", value)
    elif key in ("-R", "--rebalance"):
        task_set_option("quick", "no")
    elif key in ("-f", "--flush"):
        task_set_option("flush", int(value))
    elif key in ("-M", "--maxmem"):
        task_set_option("maxmem", int(value))
        if task_get_option("maxmem") < base_process_size + 1000:
            raise StandardError, "Memory usage should be higher than %d kB" % \
                (base_process_size + 1000)
    elif key in ("-m", "--modified"):
        task_set_option("modified", get_date_range(value))#2002-10-27 13:57:26)
        task_set_option("last_updated", "")
    elif key in ("-l", "--lastupdate"):
        task_set_option("last_updated", "last_updated")
    else:
        return False
    return True
Exemple #27
0
def task_submit_elaborate_specific_parameter(key, value, opts, args):
    """ Given the string key it checks it's meaning, eventually using the
    value. Usually it fills some key in the options dict.
    It must return True if it has elaborated the key, False, if it doesn't
    know that key.
    eg:
    if key in ['-n', '--number']:
        self.options['number'] = value
        return True
    return False
    """
    if key in ('-l', '--logs'):
        task_set_option('logs', True)
        return True
    elif key in ('-p', '--tempfiles'):
        task_set_option('tempfiles', True)
        return True
    elif key in ('-g', '--guests'):
        task_set_option('guests', True)
        return True
    elif key in ('-b', '--bibxxx'):
        task_set_option('bibxxx', True)
        return True
    elif key in ('-d', '--documents'):
        task_set_option('documents', True)
        return True
    elif key in ('-c', '--cache'):
        task_set_option('cache', True)
        return True
    elif key in ('-t', '--tasks'):
        task_set_option('tasks', True)
        return True
    elif key in ('-k', '--check-tables'):
        task_set_option('check-tables', True)
        return True
    elif key in ('-o', '--optimise-tables'):
        task_set_option('optimise-tables', True)
        return True
    elif key in ('-a', '--all'):
        task_set_option('logs', True)
        task_set_option('tempfiles', True)
        task_set_option('guests', True)
        task_set_option('bibxxx', True)
        task_set_option('documents', True)
        task_set_option('cache', True)
        task_set_option('tasks', True)
        return True
    return False
Exemple #28
0
def task_submit_elaborate_specific_parameter(key, value, opts, dummy_args):
    """Given the string key it checks it's meaning, eventually using the
    value. Usually it fills some key in the options dict.
    It must return True if it has elaborated the key, False, if it doesn't
    know that key."""

    #Load configuration
    if key in ('-l', '--load-config'):
        task_set_option('cmd', 'load')
        if ('-d', '') in opts or ('--dump-conf', '') in opts:
            raise StandardError(".. conflicting options, please add only one")

    #Dump configuration
    elif key in ('-d', '--dump_conf'):
        task_set_option('cmd', 'dump')

    #Print sorting methods
    elif key in ('-p', '--print-sorting-methods'):
        task_set_option('cmd', 'print')

    #Rebalance
    elif key in ('-R', '--rebalance'):
        task_set_option('cmd', 'rebalance')
        if ('-S', '') in opts or ('--update-sorting', '') in opts:
            raise StandardError(".. conflicting options, please add only one")

    #Update sorting
    elif key in ('-S', '--update-sorting'):
        task_set_option('cmd', 'sort')

    #Define methods
    elif key in ('-M', '--methods'):
        task_set_option('methods', value)

    #Define records
    elif key in ('-i', '--id'):
        task_set_option('recids', value)

    else:
        return False

    return True
Exemple #29
0
def task_submit_elaborate_specific_parameter(key, value, opts, args):
    """
    Given the string key it checks it's meaning, eventually using the value.
    Usually it fills some key in the options dict. It must return True if
    it has elaborated the key, False, if it doesn't know that key.  eg:
    """
    if key in ("-n", "--new-event"):
        task_set_option("create_event_with_id", value)

    elif key in ("-r", "--remove-event"):
        task_set_option("destroy_event_with_id", value)

    elif key in ("-S", "--show-events"):
        task_set_option("list_events", True)

    elif key in ("-l", "--event-label"):
        task_set_option("event_name", value)

    elif key in ("-a", "--args"):
        task_set_option("column_headers", value.split(','))

    elif key in ("-c", "--cache-events"):
        task_set_option("cache_events", value.split(','))

    elif key in ("-d", "--dump-config"):
        task_set_option("dump_config", True)

    elif key in ("-e", "--load-config"):
        task_set_option("load_config", True)

    else:
        return False

    return True
Exemple #30
0
def task_submit_elaborate_specific_parameter(
        key, value, opts, args):  # pylint: disable-msg=W0613
    """
    Elaborate specific CLI parameters of BibReformat.

    @param key: a parameter key to check
    @param value: a value associated to parameter X{Key}
    @return: True for known X{Key} else False.
    """
    if key in ("-a", "--all"):
        task_set_option("all", 1)
    elif key in ("--no-missing", ):
        task_set_option("ignore_without", 1)
    elif key in ("-c", "--collection"):
        task_set_option("collection", value)
    elif key in ("-n", "--noprocess"):
        task_set_option("noprocess", 1)
    elif key in ("-f", "--field"):
        task_set_option("field", value)
    elif key in ("-p", "--pattern"):
        task_set_option("pattern", value)
    elif key in ("-m", "--matching"):
        task_set_option("matching", value)
    elif key in ("-o", "--format"):
        input_formats = value.split(',')
        # check the validity of the given output formats
        invalid_format = check_validity_input_formats(input_formats)
        if invalid_format:
            try:
                raise Exception('Invalid output format.')
            except Exception:  # pylint: disable-msg=W0703
                from invenio.ext.logging import register_exception
                register_exception(
                    prefix="The given output format '%s' is not available or "
                    "is invalid. Please try again" % (invalid_format, ),
                    alert_admin=True)
                return
        else:  # every given format is available
            task_set_option("format", value)
    elif key in ("-i", "--id"):
        task_set_option("recids", value)
    else:
        return False
    return True
Exemple #31
0
def task_submit_elaborate_specific_parameter(key, value, opts, args):
    """ Given the string key it checks it's meaning, eventually using the
    value. Usually it fills some key in the options dict.
    It must return True if it has elaborated the key, False, if it doesn't
    know that key.
    eg:
    if key in ['-n', '--number']:
        self.options['number'] = value
        return True
    return False
    """
    if key in ('-l', '--logs'):
        task_set_option('logs', True)
        return True
    elif key in ('-p', '--tempfiles'):
        task_set_option('tempfiles', True)
        return True
    elif key in ('-g', '--guests'):
        task_set_option('guests', True)
        return True
    elif key in ('-b', '--bibxxx'):
        task_set_option('bibxxx', True)
        return True
    elif key in ('-d', '--documents'):
        task_set_option('documents', True)
        return True
    elif key in ('-c', '--cache'):
        task_set_option('cache', True)
        return True
    elif key in ('-t', '--tasks'):
        task_set_option('tasks', True)
        return True
    elif key in ('-k', '--check-tables'):
        task_set_option('check-tables', True)
        return True
    elif key in ('-o', '--optimise-tables'):
        task_set_option('optimise-tables', True)
        return True
    elif key in ('-S', '--sessions'):
        task_set_option('sessions', True)
        return True
    elif key == '--bibedit-cache':
        task_set_option('bibedit-cache', True)
        return True
    elif key in ('-a', '--all'):
        task_set_option('logs', True)
        task_set_option('tempfiles', True)
        task_set_option('guests', True)
        task_set_option('bibxxx', True)
        task_set_option('documents', True)
        task_set_option('cache', True)
        task_set_option('tasks', True)
        task_set_option('sessions', True)
        task_set_option('bibedit-cache', True)
        return True
    return False
Exemple #32
0
def task_parse_options(key, value, opts, args):   # pylint: disable-msg=W0613
    """ Must be defined for bibtask to create a task """
    if args:
        # There should be no standalone arguments for any bibcatalog job
        # This will catch args before the job is shipped to Bibsched
        raise StandardError("Error: Unrecognised argument '%s'." % args[0])

    if key in ('-a', '--new'):
        task_set_option('new', True)
    elif key in ('-m', '--modified'):
        task_set_option('modified', True)
    elif key in ('-c', '--collections'):
        collections = task_get_option('collections')
        if not collections:
            collections = set()
            task_set_option('collections', collections)
        for v in value.split(","):
            collections.update(get_collection_reclist(v))
    elif key in ('-i', '--recids'):
        recids = task_get_option('recids')
        if not recids:
            recids = set()
            task_set_option('recids', recids)
        recids.update(split_ids(value))
    elif key in ('--tickets',):
        tickets = task_get_option('tickets')
        if not tickets:
            tickets = set()
            task_set_option('tickets', tickets)
        for item in value.split(','):
            tickets.add(item.strip())
    elif key in ('--all-tickets',):
        task_set_option('all-tickets', True)
    elif key in ('-q', '--query'):
        query = task_get_option('query')
        if not query:
            query = set()
            task_set_option('query', query)
        query.add(value)
    elif key in ('-r', '--reportnumbers'):
        reportnumbers = task_get_option('reportnumbers')
        if not reportnumbers:
            reportnumbers = set()
            task_set_option('reportnumbers', reportnumbers)
        reportnumbers.add(value)
    return True
Exemple #33
0
def task_submit_elaborate_specific_parameter(key, value, opts, args):
    """
    Given the string key it checks it's meaning, eventually using the value.
    Usually it fills some key in the options dict. It must return True if
    it has elaborated the key, False, if it doesn't know that key.  eg:
    """
    if key in ("-n", "--new-event"):
        task_set_option("create_event_with_id", value)

    elif key in ("-r", "--remove-event"):
        task_set_option("destroy_event_with_id", value)

    elif key in ("-S", "--show-events"):
        task_set_option("list_events", True)

    elif key in ("-l", "--event-label"):
        task_set_option("event_name", value)

    elif key in ("-a", "--args"):
        task_set_option("column_headers", value.split(','))

    elif key in ("-c", "--cache-events"):
        task_set_option("cache_events", value.split(','))

    elif key in ("-d", "--dump-config"):
        task_set_option("dump_config", True)

    elif key in ("-e", "--load-config"):
        task_set_option("load_config", True)

    else:
        return False

    return True
Exemple #34
0
def task_submit_elaborate_specific_parameter(key, value, opts, args):
    """ Given the string key it checks it's meaning, eventually using the
    value. Usually it fills some key in the options dict.
    It must return True if it has elaborated the key, False, if it doesn't
    know that key.
    eg:
    if key in ('-n', '--number'):
        self.options['number'] = value
        return True
    return False
    """
    ## A dictionary used for mapping CLI parameters to task_option keys+-
    parameter_mapping = {
        '-p': 'profile_name',
        '-i': 'input',
        '--input': 'input',
        '-o': 'output',
        '--output': 'output',
        '-m': 'mode',
        '--mode': 'mode',
        '--acodec': 'acodec',
        '--vcodec': 'vcodec',
        '--abitrate': 'abitrate',
        '--vbitrate': 'vbitrate',
        '--resolution': 'size',
        '--passes': 'passes',
        '--special': 'special',
        '--specialfirst': 'specialfirst',
        '--specialsecond': 'specialsecond',
        '--width': 'width',
        '--height': 'height',
        '--aspect': 'aspect',
        '--number': 'numberof',
        '--positions': 'positions',
        '-D': 'meta_dump',
        '-W': 'meta_input',
        '--dump': 'meta_dump',
        '--write': 'meta_input',
        '--newjobfolder': 'new_job_folder',
        '--oldjobfolder': 'old_job_folder',
        '--recid': 'recid',
        '--collection': 'collection',
        '--search': 'search'
    }

    ## PASSES ##
    ## Transform 'passes' to integer
    if key in ('--passes', ):
        try:
            value = int(value)
        except ValueError:
            write_message('Value of \'--passes\' must be an integer')
            return False

    ## HEIGHT, WIDTH ##
    if key in ('--height', '--width'):
        try:
            value = int(value)
        except ValueError:
            write_message('Value of \'--height\' or \'--width\''
                          ' must be an integer')
            return False

    ## META MODE ##
    ## Transform meta mode values to boolean
    if key in ('-D', '--dump'):
        if not value in ("ffprobe", "mediainfo", "pbcore"):
            write_message(
                "Unknown dumping format, must be 'ffprobe', 'mediainfo' or 'pbcore'"
            )
            return False
    if key in ('--substitute', ):
        value = True
    ## Transform the 'positions' parameter into a list
    if key in ('--positions', ):
        try:
            parsed = json.loads(value)
            if type(parsed) is not type(list()):
                write_message('Value of \'--positions\' must be a json list')
                return False
            else:
                value = parsed
        except ValueError:
            write_message('Value of \'--positions\' must be a json list')
            return False

    ## NUMBEROF ##
    ## Transform 'number' to integer
    if key in ('--number'):
        try:
            value = int(value)
        except ValueError:
            write_message('Value of \'--number\' must be an integer')
            return False
    ## ASPECT ##
    if key in ('--aspect'):
        try:
            xasp, yasp = str(value).split(':')
            xasp = float(xasp)
            yasp = float(yasp)
            value = xasp / yasp
        except:
            write_message('Value of \'--aspect\' must be in \'4:3\' format')
            return False
    ## RECID ##
    if key in ('--recid'):
        try:
            value = int(value)
        except ValueError:
            write_message('Value of \'--recid\' must be an integer')
            return False

    ## GENERAL MAPPING ##
    ## For all general or other parameters just use the mapping dictionary
    if key in parameter_mapping:
        task_set_option(parameter_mapping[key], value)
        return True
    return False
Exemple #35
0
def cb_parse_option(key, value, opts, args):
    """ Must be defined for bibtask to create a task """
    if args and len(args) > 0:
        # There should be no standalone arguments for any refextract job
        # This will catch args before the job is shipped to Bibsched
        raise StandardError("Error: Unrecognised argument '%s'." % args[0])

    if key in ('-a', '--new'):
        task_set_option('new', True)
        task_set_option('no-overwrite', True)
    elif key in ('-m', '--modified'):
        task_set_option('modified', True)
        task_set_option('no-overwrite', True)
    elif key == '--inspire':
        msg = """The --inspire option does not exist anymore.
Please set the config variable CFG_INSPIRE_SITE instead."""
        raise StandardError(msg)
    elif key in ('--kb-reports', ):
        task_set_option('kb-reports', value)
    elif key in ('--kb-journals', ):
        task_set_option('kb-journals', value)
    elif key in ('--kb-journals-re', ):
        task_set_option('kb-journals-re', value)
    elif key in ('--kb-authors', ):
        task_set_option('kb-authors', value)
    elif key in ('--kb-books', ):
        task_set_option('kb-books', value)
    elif key in ('--kb-conferences', ):
        task_set_option('kb-conferences', value)
    elif key in ('--create-ticket', ):
        task_set_option('create-ticket', True)
    elif key in ('--no-overwrite', ):
        task_set_option('no-overwrite', True)
    elif key in ('--arxiv'):
        task_set_option('arxiv', True)
    elif key in ('-c', '--collections'):
        collections = task_get_option('collections')
        if not collections:
            collections = set()
            task_set_option('collections', collections)
        for v in value.split(","):
            collections.update(perform_request_search(c=v))
    elif key in ('-i', '--id'):
        recids = task_get_option('recids')
        if not recids:
            recids = set()
            task_set_option('recids', recids)
        recids.update(split_ids(value))
    elif key in ('-r', '--recids'):
        msg = """The --recids has been renamed.
please use --id for specifying recids."""
        raise StandardError(msg)
    elif key == '-f':
        msg = """refextract is now used to run in daemon mode only.
If you would like to run reference extraction on a standalone PDF file,
please use "docextract file.pdf\""""
        raise StandardError(msg)

    return True
Exemple #36
0
def task_run_core():
    """Runs the task by fetching arguments from the BibSched task queue.
    This is what BibSched will be invoking via daemon call.
    Return 1 in case of success and 0 in case of failure."""

    #---------------#
    # Encoding Mode #
    #---------------#

    if _topt('mode') == 'encode':
        return encode.encode_video(input_file=_topt('input'),
                                   output_file=_topt('output'),
                                   acodec=_topt('acodec'),
                                   vcodec=_topt('vcodec'),
                                   abitrate=_topt('abitrate'),
                                   vbitrate=_topt('vbitrate'),
                                   resolution=_topt('size'),
                                   passes=_topt('passes'),
                                   special=_topt('special'),
                                   specialfirst=_topt('specialfirst'),
                                   specialsecond=_topt('specialsecond'),
                                   width=_topt('width'),
                                   height=_topt('height'),
                                   aspect=_topt('aspect'),
                                   profile=_topt('profile'))

    #-----------------#
    # Extraction Mode #
    #-----------------#

    elif _topt('mode') == 'extract':
        return extract.extract_frames(input_file=_topt('input'),
                                      output_file=_topt('output'),
                                      size=_topt('size'),
                                      positions=_topt('positions'),
                                      numberof=_topt('numberof'),
                                      width=_topt('width'),
                                      height=_topt('height'),
                                      aspect=_topt('aspect'),
                                      profile=_topt('profile'))

    #---------------#
    # Metadata Mode #
    #---------------#
    elif _topt('mode') == 'meta':
        if _topt('meta_dump') is not None:
            metadata.dump_metadata(input_file=_topt('input'),
                                   output_file=_topt('output'),
                                   meta_type=_topt('meta_dump'))
            return True
        elif _topt('meta_input') is not None:
            if type(_topt('meta_input')) is not type(dict()):
                the_metadata = metadata.json_decode_file(
                    filename=_topt('meta_input'))
                task_set_option('meta_input', the_metadata)
            return metadata.write_metadata(input_file=_topt('input'),
                                           output_file=_topt('output'),
                                           metadata=_topt('meta_input'))

    #------------#
    # Batch Mode #
    #------------#
    elif _topt('mode') == 'batch':
        if _topt('collection'):
            return batch_engine.create_update_jobs_by_collection(
                batch_template_file=_topt('input'),
                collection=_topt('collection'),
                job_directory=_topt('new_job_dir',
                                    CFG_BIBENCODE_DAEMON_DIR_NEWJOBS))
        elif _topt('search'):
            return batch_engine.create_update_jobs_by_search(
                pattern=_topt('search'),
                batch_template_file=_topt('input'),
                job_directory=_topt('new_job_dir',
                                    CFG_BIBENCODE_DAEMON_DIR_NEWJOBS))
        else:
            return batch_engine.process_batch_job(_topt('input'))

    #-------------#
    # Daemon Mode #
    #-------------#
    elif _topt('mode') == 'daemon':
        return daemon.watch_directory(
            _topt('new_job_dir', CFG_BIBENCODE_DAEMON_DIR_NEWJOBS),
            _topt('old_job_dir', CFG_BIBENCODE_DAEMON_DIR_OLDJOBS))
Exemple #37
0
def task_submit_elaborate_specific_parameter(key, value, opts, args):
    """Elaborate specific cli parameters for oaiharvest."""
    if key in ("-r", "--repository"):
        task_set_option('repository', get_repository_names(value))
    elif key in ("--workflow"):
        task_set_option('workflow', get_repository_names(value))
    elif key in ("-i", "--identifier"):
        task_set_option('identifiers', get_identifier_names(value))
    elif key in ("-d", "--dates"):
        task_set_option('dates', get_dates(value))
        if value is not None and task_get_option("dates") is None:
            raise StandardError("Date format not valid.")
    elif key in ("--notify-email-to",):
        if email_valid_p(value):
            task_set_option('notify-email-to', value)
        else:
            raise StandardError("E-mail format not valid.")
    elif key in ("--create-ticket-in",):
        task_set_option('create-ticket-in', value)
    else:
        return False
    return True
Exemple #38
0
def main():
    """Start the tool.

    If the command line arguments are those of the 'manual' mode, then
    starts a manual one-time harvesting. Else trigger a BibSched task
    for automated harvesting based on the OAIHarvest admin settings.
    """
    # Let's try to parse the arguments as used in manual harvesting:
    try:

        opts, args = getopt.getopt(sys.argv[1:], "o:v:m:p:i:s:f:u:r:c:k:l:w:",
                                   [
                                       "output=",
                                       "verb=",
                                       "method=",
                                       "metadataPrefix=",
                                       "identifier=",
                                       "set=",
                                       "from=",
                                       "until=",
                                       "resumptionToken=",
                                       "certificate=",
                                       "key=",
                                       "user="******"password="******"workflow=",
                                   ])

        # So everything went smoothly: start harvesting in manual mode
        if len([opt
                for opt, opt_value in opts if opt in ['-v', '--verb']]) > 0:
            # verb parameter is given
            http_param_dict = {}
            method = "POST"
            output = ""
            user = None
            password = None
            cert_file = None
            key_file = None
            sets = []
            # get options and arguments
            for opt, opt_value in opts:
                if opt in ["-v", "--verb"]:
                    http_param_dict['verb'] = opt_value
                elif opt in ["-m", '--method']:
                    if opt_value == "GET" or opt_value == "POST":
                        method = opt_value
                elif opt in ["-p", "--metadataPrefix"]:
                    http_param_dict['metadataPrefix'] = opt_value
                elif opt in ["-i", "--identifier"]:
                    http_param_dict['identifier'] = opt_value
                elif opt in ["-s", "--set"]:
                    sets = opt_value.split()
                elif opt in ["-f", "--from"]:
                    http_param_dict['from'] = opt_value
                elif opt in ["-u", "--until"]:
                    http_param_dict['until'] = opt_value
                elif opt in ["-r", "--resumptionToken"]:
                    http_param_dict['resumptionToken'] = opt_value
                elif opt in ["-o", "--output"]:
                    output = opt_value
                elif opt in ["-c", "--certificate"]:
                    cert_file = opt_value
                elif opt in ["-k", "--key"]:
                    key_file = opt_value
                elif opt in ["-l", "--user"]:
                    user = opt_value
                elif opt in ["-w", "--password"]:
                    password = opt_value
                elif opt in ["-V", "--version"]:
                    print(__revision__)
                    sys.exit(0)
                else:
                    usage(1, "Option %s is not allowed" % opt)

            if len(args) > 0:
                base_url = args[-1]
                if not base_url.lower().startswith('http'):
                    base_url = 'http://' + base_url
                (addressing_scheme, network_location, path, dummy1, dummy2,
                 dummy3) = urllib.parse.urlparse(base_url)
                secure = (addressing_scheme == "https")

                if (cert_file and not key_file) or \
                        (key_file and not cert_file):
                    # Both are needed if one specified
                    usage(1, "You must specify both certificate and key files")

                if password and not user:
                    # User must be specified when password is given
                    usage(1, "You must specify a username")
                elif user and not password:
                    if not secure:
                        sys.stderr.write(
                            "*WARNING* Your password will be sent in clear!\n")
                    try:
                        password = getpass.getpass()
                    except KeyboardInterrupt as error:
                        sys.stderr.write("\n%s\n" % (error, ))
                        sys.exit(0)

                getter.harvest(network_location, path, http_param_dict, method,
                               output, sets, secure, user, password, cert_file,
                               key_file)

                sys.stderr.write(
                    "Harvesting completed at: %s\n\n" %
                    time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime()))
                return
            else:
                usage(1, "You must specify the URL to harvest")
        else:
            # verb is not given. We will continue with periodic
            # harvesting. But first check if URL parameter is given:
            # if it is, then warn directly now

            if len([opt for opt, opt_value in opts if
                    opt in ['-i', '--identifier']]) == 0 \
                and len(args) > 1 or \
                    (len(args) == 1 and not args[0].isdigit()):
                usage(1, "You must specify the --verb parameter")
    except getopt.error:
        # So could it be that we are using different arguments? Try to
        # start the BibSched task (automated harvesting) and see if it
        # validates
        pass
        # BibSched mode - periodical harvesting
    # Note that the 'help' is common to both manual and automated
    # mode.

    num_of_critical_parameter = 0
    num_of_critical_parameterb = 0
    repositories = []

    for opt in sys.argv[1:]:
        if opt in "-r" or opt in "--repository":
            num_of_critical_parameter += 1
        elif opt in "--workflow":
            num_of_critical_parameterb += 1
        if num_of_critical_parameter > 1 or num_of_critical_parameterb > 1:
            usage(1, "You can't specify twice -r or --workflow")

    if num_of_critical_parameter == 1:
        if "-r" in sys.argv:
            position = sys.argv.index("-r")
        else:
            position = sys.argv.index("--repository")
        repositories = sys.argv[position + 1].split(",")
        if len(repositories) > 1 and \
                ("-i" in sys.argv or "--identifier" in sys.argv):
            usage(
                1, "It is impossible to harvest an identifier from several "
                "repositories.")

    if num_of_critical_parameterb == 1:

        position = sys.argv.index("--workflow")
        workflows = sys.argv[position + 1].split(",")

        for workflow_candidate in workflows:
            if workflow_candidate not in registry_workflows:
                usage(1, "The workflow %s doesn't exist." % workflow_candidate)

    if num_of_critical_parameter == 1 and num_of_critical_parameterb == 0:

        for name_repository in repositories:
            try:
                oaiharvest_instance = OaiHARVEST.get(
                    OaiHARVEST.name == name_repository).one()
                if oaiharvest_instance.workflows not in registry_workflows:
                    usage(
                        1,
                        "The repository %s doesn't have a valid workflow specified."
                        % name_repository)
            except orm.exc.NoResultFound:
                usage(
                    1, "The repository %s doesn't exist in our database." %
                    name_repository)

    elif num_of_critical_parameter == 1 and num_of_critical_parameterb == 1:

        for name_repository in repositories:
            try:
                OaiHARVEST.get(OaiHARVEST.name == name_repository).one()
            except orm.exc.NoResultFound:
                usage(
                    1, "The repository %s doesn't exist in our database." %
                    name_repository)

        print("A workflow has been specified, overriding the repository one.")

    task_set_option("repository", None)
    task_set_option("dates", None)
    task_set_option("workflow", None)
    task_set_option("identifiers", None)
    task_init(
        authorization_action='runoaiharvest',
        authorization_msg="oaiharvest Task Submission",
        description="""
Harvest records from OAI sources.
Manual vs automatic harvesting:
   - Manual harvesting retrieves records from the specified URL,
     with the specified OAI arguments. Harvested records are displayed
     on the standard output or saved to a file, but are not integrated
     into the repository. This mode is useful to 'play' with OAI
     repositories or to build special harvesting scripts.
   - Automatic harvesting relies on the settings defined in the OAI
     Harvest admin interface to periodically retrieve the repositories
     and sets to harvest. It also take care of harvesting only new or
     modified records. Records harvested using this mode are converted
     and integrated into the repository, according to the settings
     defined in the OAI Harvest admin interface.

Examples:
Manual (single-shot) harvesting mode:
   Save to /tmp/z.xml records from CDS added/modified between 2004-04-01
   and 2004-04-02, in MARCXML:
     $ oaiharvest -vListRecords -f2004-04-01 -u2004-04-02 -pmarcxml -o/tmp/z.xml http://cds.cern.ch/oai2d
Automatic (periodical) harvesting mode:
   Schedule daily harvesting of all repositories defined in OAIHarvest admin:
     $ oaiharvest -s 24h
   Schedule daily harvesting of repository 'arxiv', defined in OAIHarvest admin:
     $ oaiharvest -r arxiv -s 24h
   Harvest in 10 minutes from 'pubmed' repository records added/modified
   between 2005-05-05 and 2005-05-10:
     $ oaiharvest -r pubmed -d 2005-05-05:2005-05-10 -t 10m
""",
        help_specific_usage='Manual single-shot harvesting mode:\n'
        '  -o, --output         specify output file\n'
        '  -v, --verb           OAI verb to be executed\n'
        '  -m, --method         http method (default POST)\n'
        '  -p, --metadataPrefix metadata format\n'
        '  -i, --identifier     OAI identifier\n'
        '  -s, --set            OAI set(s). Whitespace-separated list\n'
        '  -r, --resuptionToken Resume previous harvest\n'
        '  -f, --from           from date (datestamp)\n'
        '  -u, --until          until date (datestamp)\n'
        '  -c, --certificate    path to public certificate (in case of certificate-based harvesting)\n'
        '  -k, --key            path to private key (in case of certificate-based harvesting)\n'
        '  -l, --user           username (in case of password-protected harvesting)\n'
        '  -w, --password       password (in case of password-protected harvesting)\n'
        'Deamon mode (periodical or one-shot harvesting mode):\n'
        '  -r, --repository="repo A"[,"repo B"] \t which repositories to harvest (default=all)\n'
        '  -d, --dates=yyyy-mm-dd:yyyy-mm-dd \t reharvest given dates only\n'
        '  -i, --identifier     OAI identifier if wished to run in as a task.\n'
        '  --notify-email-to    Receive notifications on given email on successful upload and/or finished harvest.\n'
        '  --workflow       specify the workflow to execute.\n'
        '  --create-ticket-in   Provide desired ticketing queue to create a ticket in it on upload and/or finished harvest.\n'
        '                       Requires a configured ticketing system (BibCatalog).\n',
        specific_params=("r:i:d:W", [
            "repository=", "identifier=", "dates=", "workflow=",
            "notify-email-to=", "create-ticket-in="
        ]),
        task_submit_elaborate_specific_parameter_fnc=
        task_submit_elaborate_specific_parameter,
        task_run_fnc=task_run_core)
Exemple #39
0
def task_submit_elaborate_specific_parameter(key, value, opts, args):
    """Elaborate specific cli parameters for oaiharvest."""
    if key in ("-r", "--repository"):
        task_set_option('repository', get_repository_names(value))
    elif key in ("--workflow"):
        task_set_option('workflow', get_repository_names(value))
    elif key in ("-i", "--identifier"):
        task_set_option('identifiers', get_identifier_names(value))
    elif key in ("-d", "--dates"):
        task_set_option('dates', get_dates(value))
        if value is not None and task_get_option("dates") is None:
            raise StandardError("Date format not valid.")
    elif key in ("--notify-email-to", ):
        if email_valid_p(value):
            task_set_option('notify-email-to', value)
        else:
            raise StandardError("E-mail format not valid.")
    elif key in ("--create-ticket-in", ):
        task_set_option('create-ticket-in', value)
    else:
        return False
    return True
Exemple #40
0
def task_submit_check_options():
    """
    NOTE: Depending on the parameters, either "BibSched mode" or plain
          straigh-forward execution mode is entered.
    """
    if task_has_option("create_event_with_id"):
        print(
            webstat.create_customevent(task_get_option("create_event_with_id"),
                                       task_get_option("event_name", None),
                                       task_get_option("column_headers", [])))
        sys.exit(0)

    elif task_has_option("destroy_event_with_id"):
        print(
            webstat.destroy_customevent(
                task_get_option("destroy_event_with_id")))
        sys.exit(0)

    elif task_has_option("list_events"):
        events = webstat._get_customevents()
        if len(events) == 0:
            print("There are no custom events available.")
        else:
            print("Available custom events are:\n")
            print('\n'.join([
                x[0] + ": " +
                ((x[1] == None) and "No descriptive name" or str(x[1]))
                for x in events
            ]))
        sys.exit(0)

    elif task_has_option("cache_events"):
        events = task_get_option("cache_events")

        write_message(str(events), verbose=9)

        if events[0] == 'ALL':
            keyevents_to_cache = webstat.KEYEVENT_REPOSITORY.keys()
            customevents_to_cache = [x[0] for x in webstat._get_customevents()]

        elif events[0] == 'KEYEVENTS':
            keyevents_to_cache = webstat.KEYEVENT_REPOSITORY.keys()
            customevents_to_cache = []

        elif events[0] == 'CUSTOMEVENTS':
            keyevents_to_cache = []
            customevents_to_cache = [x[0] for x in webstat._get_customevents()]

        elif events[0] != '':
            keyevents_to_cache = [
                x for x in webstat.KEYEVENT_REPOSITORY.keys() if x in events
            ]
            customevents_to_cache = [
                x[0] for x in webstat._get_customevents() if x in events
            ]

        # Control so that we have valid event names
        if len(keyevents_to_cache + customevents_to_cache) == 0:
            # Oops, no events. Abort and display help.
            return False
        else:
            task_set_option("keyevents", keyevents_to_cache)
            task_set_option("customevents", customevents_to_cache)

        return True

    elif task_has_option("dump_config"):
        print("""\
[general]
visitors_box = True
search_box = True
record_box = True
bibsched_box = True
basket_box = True
apache_box = True
uptime_box = True

[webstat_custom_event_1]
name = baskets
param1 = action
param2 = basket
param3 = user

[apache_log_analyzer]
profile = nil
nb-histogram-items-to-print = 20
exclude-ip-list = ("137.138.249.162")
home-collection = "Atlantis Institute of Fictive Science"
search-interface-url = "/?"
detailed-record-url = "/%s/"
search-engine-url = "/search?"
search-engine-url-old-style = "/search.py?"
basket-url = "/yourbaskets/"
add-to-basket-url = "/yourbaskets/add"
display-basket-url = "/yourbaskets/display"
display-public-basket-url = "/yourbaskets/display_public"
alert-url = "/youralerts/"
display-your-alerts-url = "/youralerts/list"
display-your-searches-url = "/youralerts/display"
""" % CFG_SITE_RECORD)
        sys.exit(0)

    elif task_has_option("load_config"):
        from ConfigParser import ConfigParser
        conf = ConfigParser()
        conf.read(CFG_WEBSTAT_CONFIG_PATH)
        for section in conf.sections():
            if section[:21] == "webstat_custom_event_":
                cols = []
                name = ""
                for option, value in conf.items(section):
                    if option == "name":
                        name = value
                    if option[:5] == "param":
                        # add the column name in it's position
                        index = int(option[-1]) - 1
                        while len(cols) <= index:
                            cols.append("")
                        cols[index] = value
                if name:
                    res = run_sql(
                        "SELECT COUNT(id) FROM staEVENT WHERE id = %s",
                        (name, ))
                    if res[0][0] == 0:
                        # name does not exist, create customevent
                        webstat.create_customevent(name, name, cols)
                    else:
                        # name already exists, update customevent
                        webstat.modify_customevent(name, cols=cols)

        sys.exit(0)

    else:
        # False means that the --help should be displayed
        return False
Exemple #41
0
def _dbdump_elaborate_submit_param(key, value, dummyopts, dummyargs):
    """
    Elaborate task submission parameter.  See bibtask's
    task_submit_elaborate_specific_parameter_fnc for help.
    """
    if key in ('-n', '--number'):
        try:
            task_set_option('number', int(value))
        except ValueError:
            raise StandardError("ERROR: Number '%s' is not integer." % (value,))
    elif key in ('-o', '--output'):
        if os.path.isdir(value):
            task_set_option('output', value)
        else:
            raise StandardError("ERROR: Output '%s' is not a directory." % \
                  (value,))
    elif key in ('--params',):
        task_set_option('params', value)
    elif key in ('--compress',):
        if not CFG_PATH_GZIP or (CFG_PATH_GZIP and not os.path.exists(CFG_PATH_GZIP)):
            raise StandardError("ERROR: No valid gzip path is defined.")
        task_set_option('compress', True)
    elif key in ('-S', '--slave'):
        if value:
            task_set_option('slave', value)
        else:
            if not CFG_DATABASE_SLAVE:
                raise StandardError("ERROR: No slave defined.")
            task_set_option('slave', CFG_DATABASE_SLAVE)
    elif key in ('--dump-on-slave-helper', ):
        task_set_option('dump_on_slave_helper_mode', True)
    elif key in ('--ignore-tables',):
        try:
            re.compile(value)
            task_set_option("ignore_tables", value)
        except re.error:
            raise StandardError, "ERROR: Passed string: '%s' is not a valid regular expression." % value
    elif key in ('--disable-workers', ):
        task_set_option('disable_workers', True)
    else:
        return False
    return True
Exemple #42
0
def cb_parse_option(key, value, opts, args):
    """ Must be defined for bibtask to create a task """
    if args and len(args) > 0:
        # There should be no standalone arguments for any refextract job
        # This will catch args before the job is shipped to Bibsched
        raise StandardError("Error: Unrecognised argument '%s'." % args[0])

    if key in ('-a', '--new'):
        task_set_option('new', True)
        task_set_option('no-overwrite', True)
    elif key in ('-m', '--modified'):
        task_set_option('modified', True)
        task_set_option('no-overwrite', True)
    elif key in ('-i', '--inspire', ):
        task_set_option('inspire', True)
    elif key in ('--kb-reports', ):
        task_set_option('kb-reports', value)
    elif key in ('--kb-journals', ):
        task_set_option('kb-journals', value)
    elif key in ('--kb-journals-re', ):
        task_set_option('kb-journals-re', value)
    elif key in ('--kb-authors', ):
        task_set_option('kb-authors', value)
    elif key in ('--kb-books', ):
        task_set_option('kb-books', value)
    elif key in ('--kb-conferences', ):
        task_set_option('kb-conferences', value)
    elif key in ('--create-ticket', ):
        task_set_option('create-ticket', True)
    elif key in ('--no-overwrite', ):
        task_set_option('no-overwrite', True)
    elif key in ('--arxiv'):
        task_set_option('arxiv', True)
    elif key in ('-c', '--collections'):
        collections = task_get_option('collections')
        if not collections:
            collections = set()
            task_set_option('collections', collections)
        for v in value.split(","):
            collections.update(perform_request_search(c=v))
    elif key in ('-r', '--recids'):
        recids = task_get_option('recids')
        if not recids:
            recids = set()
            task_set_option('recids', recids)
        recids.update(split_ids(value))

    return True
Exemple #43
0
def task_run_core():
    """
    Main daemon task.

    Returns True when run successfully. False otherwise.
    """
    plugins = load_plugins()
    rules = load_rules(plugins)
    task_set_option('plugins', plugins)
    recids_for_rules = get_recids_for_rules(rules)

    all_recids = intbitset([])
    single_rules = set()
    batch_rules = set()
    for rule_name, rule_recids in recids_for_rules.iteritems():
        all_recids.union_update(rule_recids)
        if plugins[rules[rule_name]["check"]]["batch"]:
            batch_rules.add(rule_name)
        else:
            single_rules.add(rule_name)

    records_to_upload_holdingpen = []
    records_to_upload_replace = []
    for batch in iter_batches(all_recids, CFG_BATCH_SIZE):

        for rule_name in batch_rules:
            rule = rules[rule_name]
            rule_recids = recids_for_rules[rule_name]
            task_sleep_now_if_required(can_stop_too=True)
            records = []
            for i, record_id, record in batch:
                if record_id in rule_recids:
                    records.append(record)
            if len(records):
                check_records(rule, records)

        # Then run them trught normal rules
        for i, record_id, record in batch:
            progress_percent = int(float(i) / len(all_recids) * 100)
            task_update_progress("Processing record %s/%s (%i%%)." %
                        (i, len(all_recids), progress_percent))
            write_message("Processing record %s" % record_id)

            for rule_name in single_rules:
                rule = rules[rule_name]
                rule_recids = recids_for_rules[rule_name]
                task_sleep_now_if_required(can_stop_too=True)
                if record_id in rule_recids:
                    check_record(rule, record)

            if record.amended:
                if record.holdingpen:
                    records_to_upload_holdingpen.append(record)
                else:
                    records_to_upload_replace.append(record)

            if not record.valid:
                submit_ticket(record, record_id)

        if len(records_to_upload_holdingpen) >= CFG_BATCH_SIZE:
            upload_amendments(records_to_upload_holdingpen, True)
            records_to_upload_holdingpen = []
        if len(records_to_upload_replace) >= CFG_BATCH_SIZE:
            upload_amendments(records_to_upload_replace, False)
            records_to_upload_replace = []

    ## In case there are still some remaining amended records
    if records_to_upload_holdingpen:
        upload_amendments(records_to_upload_holdingpen, True)
    if records_to_upload_replace:
        upload_amendments(records_to_upload_replace, False)

    # Update the database with the last time the rules was ran
    for rule in rules.keys():
        update_rule_last_run(rule)

    return True
Exemple #44
0
def task_submit_check_options():
    """
    NOTE: Depending on the parameters, either "BibSched mode" or plain
          straigh-forward execution mode is entered.
    """
    if task_has_option("create_event_with_id"):
        print(webstat.create_customevent(task_get_option("create_event_with_id"),
                                         task_get_option("event_name", None),
                                         task_get_option("column_headers", [])))
        sys.exit(0)

    elif task_has_option("destroy_event_with_id"):
        print(webstat.destroy_customevent(task_get_option("destroy_event_with_id")))
        sys.exit(0)

    elif task_has_option("list_events"):
        events = webstat._get_customevents()
        if len(events) == 0:
            print("There are no custom events available.")
        else:
            print("Available custom events are:\n")
            print('\n'.join([x[0] + ": " + ((x[1] == None) and "No descriptive name" or str(x[1])) for x in events]))
        sys.exit(0)

    elif task_has_option("cache_events"):
        events = task_get_option("cache_events")

        write_message(str(events), verbose=9)

        if events[0] == 'ALL':
            keyevents_to_cache = webstat.KEYEVENT_REPOSITORY.keys()
            customevents_to_cache = [x[0] for x in webstat._get_customevents()]

        elif events[0] == 'KEYEVENTS':
            keyevents_to_cache = webstat.KEYEVENT_REPOSITORY.keys()
            customevents_to_cache = []

        elif events[0] == 'CUSTOMEVENTS':
            keyevents_to_cache = []
            customevents_to_cache = [x[0] for x in webstat._get_customevents()]

        elif events[0] != '':
            keyevents_to_cache = [x for x in webstat.KEYEVENT_REPOSITORY.keys() if x in events]
            customevents_to_cache = [x[0] for x in webstat._get_customevents() if x in events]

        # Control so that we have valid event names
        if len(keyevents_to_cache + customevents_to_cache) == 0:
            # Oops, no events. Abort and display help.
            return False
        else:
            task_set_option("keyevents", keyevents_to_cache)
            task_set_option("customevents", customevents_to_cache)

        return True

    elif task_has_option("dump_config"):
        print("""\
[general]
visitors_box = True
search_box = True
record_box = True
bibsched_box = True
basket_box = True
apache_box = True
uptime_box = True

[webstat_custom_event_1]
name = baskets
param1 = action
param2 = basket
param3 = user

[apache_log_analyzer]
profile = nil
nb-histogram-items-to-print = 20
exclude-ip-list = ("137.138.249.162")
home-collection = "Atlantis Institute of Fictive Science"
search-interface-url = "/?"
detailed-record-url = "/%s/"
search-engine-url = "/search?"
search-engine-url-old-style = "/search.py?"
basket-url = "/yourbaskets/"
add-to-basket-url = "/yourbaskets/add"
display-basket-url = "/yourbaskets/display"
display-public-basket-url = "/yourbaskets/display_public"
alert-url = "/youralerts/"
display-your-alerts-url = "/youralerts/list"
display-your-searches-url = "/youralerts/display"
""" % CFG_SITE_RECORD)
        sys.exit(0)

    elif task_has_option("load_config"):
        from ConfigParser import ConfigParser
        conf = ConfigParser()
        conf.read(CFG_WEBSTAT_CONFIG_PATH)
        for section in conf.sections():
            if section[:21] == "webstat_custom_event_":
                cols = []
                name = ""
                for option, value in conf.items(section):
                    if option == "name":
                        name = value
                    if option[:5] == "param":
                        # add the column name in it's position
                        index = int(option[-1]) - 1
                        while len(cols) <= index:
                            cols.append("")
                        cols[index] = value
                if name:
                    res = run_sql("SELECT COUNT(id) FROM staEVENT WHERE id = %s", (name, ))
                    if res[0][0] == 0:
                        # name does not exist, create customevent
                        webstat.create_customevent(name, name, cols)
                    else:
                        # name already exists, update customevent
                        webstat.modify_customevent(name, cols=cols)

        sys.exit(0)

    else:
        # False means that the --help should be displayed
        return False
Exemple #45
0
def task_parse_options(key, val, *_):
    """ Must be defined for bibtask to create a task """

    if key in ("--all", "-a"):
        for rule_name in val.split(","):
            reset_rule_last_run(rule_name)
    elif key in ("--enable-rules", "-e"):
        task_set_option("enabled_rules", set(val.split(",")))
    elif key in ("--id", "-i"):
        task_set_option("record_ids", intbitset(split_cli_ids_arg(val)))
    elif key in ("--queue", "-q"):
        task_set_option("queue", val)
    elif key in ("--no-tickets", "-t"):
        task_set_option("no_tickets", True)
    elif key in ("--no-upload", "-b"):
        task_set_option("no_upload", True)
    elif key in ("--dry-run", "-n"):
        task_set_option("no_upload", True)
        task_set_option("no_tickets", True)
    elif key in ("--config", "-c"):
        task_set_option("config", val)
    else:
        raise StandardError("Error: Unrecognised argument '%s'." % key)
    return True
Exemple #46
0
def task_submit_elaborate_specific_parameter(key, value, opts, args):
    """ Given the string key it checks it's meaning, eventually using the
    value. Usually it fills some key in the options dict.
    It must return True if it has elaborated the key, False, if it doesn't
    know that key.
    eg:
    if key in ('-n', '--number'):
        self.options['number'] = value
        return True
    return False
    """
    ## A dictionary used for mapping CLI parameters to task_option keys+-
    parameter_mapping = {
        '-p': 'profile_name',
        '-i': 'input',
        '--input': 'input',
        '-o': 'output',
        '--output': 'output',
        '-m': 'mode',
        '--mode': 'mode',
        '--acodec': 'acodec',
        '--vcodec': 'vcodec',
        '--abitrate': 'abitrate',
        '--vbitrate': 'vbitrate',
        '--resolution': 'size',
        '--passes': 'passes',
        '--special': 'special',
        '--specialfirst': 'specialfirst',
        '--specialsecond': 'specialsecond',
        '--width': 'width',
        '--height': 'height',
        '--aspect': 'aspect',
        '--number': 'numberof',
        '--positions': 'positions',
        '-D': 'meta_dump',
        '-W': 'meta_input',
        '--dump': 'meta_dump',
        '--write': 'meta_input',
        '--newjobfolder': 'new_job_folder',
        '--oldjobfolder': 'old_job_folder',
        '--recid': 'recid',
        '--collection': 'collection',
        '--search': 'search'
    }

    ## PASSES ##
    ## Transform 'passes' to integer
    if key in ('--passes', ):
        try:
            value = int(value)
        except ValueError:
            write_message('Value of \'--passes\' must be an integer')
            return False

    ## HEIGHT, WIDTH ##
    if key in ('--height', '--width'):
        try:
            value = int(value)
        except ValueError:
            write_message('Value of \'--height\' or \'--width\''
                          ' must be an integer')
            return False

    ## META MODE ##
    ## Transform meta mode values to boolean
    if key in ('-D', '--dump'):
        if not value in ("ffprobe", "mediainfo", "pbcore"):
            write_message("Unknown dumping format, must be 'ffprobe', 'mediainfo' or 'pbcore'")
            return False
    if key in ('--substitute', ):
        value = True
    ## Transform the 'positions' parameter into a list
    if key in ('--positions',):
        try:
            parsed = json.loads(value)
            if type(parsed) is not type(list()):
                write_message(
                    'Value of \'--positions\' must be a json list'
                )
                return False
            else:
                value = parsed
        except ValueError:
            write_message(
                    'Value of \'--positions\' must be a json list'
                )
            return False

    ## NUMBEROF ##
    ## Transform 'number' to integer
    if key in ('--number'):
        try:
            value = int(value)
        except ValueError:
            write_message('Value of \'--number\' must be an integer')
            return False
    ## ASPECT ##
    if key in ('--aspect'):
        try:
            xasp, yasp = str(value).split(':')
            xasp = float(xasp)
            yasp = float(yasp)
            value = xasp / yasp
        except:
            write_message('Value of \'--aspect\' must be in \'4:3\' format')
            return False
    ## RECID ##
    if key in ('--recid'):
        try:
            value = int(value)
        except ValueError:
            write_message('Value of \'--recid\' must be an integer')
            return False

    ## GENERAL MAPPING ##
    ## For all general or other parameters just use the mapping dictionary
    if key in parameter_mapping:
        task_set_option(parameter_mapping[key], value)
        return True
    return False
Exemple #47
0
def task_submit_elaborate_specific_parameter(key, value, opts, args):  # pylint: disable-msg=W0613
    """
    Elaborate specific CLI parameters of BibReformat.

    @param key: a parameter key to check
    @param value: a value associated to parameter X{Key}
    @return: True for known X{Key} else False.
    """
    if key in ("-a", "--all"):
        task_set_option("all", 1)
    elif key in ("--no-missing", ):
        task_set_option("ignore_without", 1)
    elif key in ("-c", "--collection"):
        task_set_option("collection", value)
    elif key in ("-n", "--noprocess"):
        task_set_option("noprocess", 1)
    elif key in ("-f", "--field"):
        task_set_option("field", value)
    elif key in ("-p", "--pattern"):
        task_set_option("pattern", value)
    elif key in ("-m", "--matching"):
        task_set_option("matching", value)
    elif key in ("-o", "--format"):
        input_formats = value.split(',')
        # check the validity of the given output formats
        invalid_format = check_validity_input_formats(input_formats)
        if invalid_format:
            try:
                raise Exception('Invalid output format.')
            except Exception:  # pylint: disable-msg=W0703
                from invenio.ext.logging import register_exception
                register_exception(
                    prefix="The given output format '%s' is not available or "
                           "is invalid. Please try again" %
                           (invalid_format, ), alert_admin=True)
                return
        else:  # every given format is available
            task_set_option("format", value)
    elif key in ("-i", "--id"):
        task_set_option("recids", value)
    else:
        return False
    return True
Exemple #48
0
def task_submit_elaborate_specific_parameter(key, value, opts, dummy_args):
    """Given the string key it checks it's meaning, eventually using the
    value. Usually it fills some key in the options dict.
    It must return True if it has elaborated the key, False, if it doesn't
    know that key."""

    #Load configuration
    if key in ('-l', '--load-config'):
        task_set_option('cmd', 'load')
        if ('-d', '') in opts or ('--dump-conf', '') in opts:
            raise StandardError(".. conflicting options, please add only one")

    #Dump configuration
    elif key in ('-d', '--dump_conf'):
        task_set_option('cmd', 'dump')

    #Print sorting methods
    elif key in ('-p', '--print-sorting-methods'):
        task_set_option('cmd', 'print')

    #Rebalance
    elif key in ('-R', '--rebalance'):
        task_set_option('cmd', 'rebalance')
        if ('-S', '') in opts or ('--update-sorting', '') in opts:
            raise StandardError(".. conflicting options, please add only one")

    #Update sorting
    elif key in ('-S', '--update-sorting'):
        task_set_option('cmd', 'sort')

    #Define methods
    elif key in ('-M', '--methods'):
        task_set_option('methods', value)

    #Define records
    elif key in ('-i', '--id'):
        task_set_option('recids', value)

    else:
        return False

    return True
Exemple #49
0
def task_parse_options(key, value, opts, args):   # pylint: disable-msg=W0613
    """ Must be defined for bibtask to create a task """
    if args:
        # There should be no standalone arguments for any bibcatalog job
        # This will catch args before the job is shipped to Bibsched
        raise StandardError("Error: Unrecognised argument '%s'." % args[0])

    if key in ('-a', '--new'):
        task_set_option('new', True)
    elif key in ('-m', '--modified'):
        task_set_option('modified', True)
    elif key in ('-c', '--collections'):
        collections = task_get_option('collections')
        if not collections:
            collections = set()
            task_set_option('collections', collections)
        for v in value.split(","):
            collections.update(get_collection_reclist(v))
    elif key in ('-i', '--recids'):
        recids = task_get_option('recids')
        if not recids:
            recids = set()
            task_set_option('recids', recids)
        recids.update(split_ids(value))
    elif key in ('--tickets',):
        tickets = task_get_option('tickets')
        if not tickets:
            tickets = set()
            task_set_option('tickets', tickets)
        for item in value.split(','):
            tickets.add(item.strip())
    elif key in ('--all-tickets',):
        task_set_option('all-tickets', True)
    elif key in ('-q', '--query'):
        query = task_get_option('query')
        if not query:
            query = set()
            task_set_option('query', query)
        query.add(value)
    elif key in ('-r', '--reportnumbers'):
        reportnumbers = task_get_option('reportnumbers')
        if not reportnumbers:
            reportnumbers = set()
            task_set_option('reportnumbers', reportnumbers)
        reportnumbers.add(value)
    return True
Exemple #50
0
def task_submit_check_options():
    """ Checks the tasks arguments for validity
    """

    #----------------#
    # General Checks #
    #----------------#

    ## FFMPEG CONFIGURATION ##
    ## The status of ffmpeg should be checked before a task is submitted
    ## There is a minimum configuration that ffmpeg must be compiled with
    ## See bibencode_utils and bibencode_config
    config = check_ffmpeg_configuration()
    if config:
        ## Prints missing configuration
        string = ''
        for item in config:
            string += ('\t' + item + '\n')
        write_message(
            "FFmpeg options are missing. Please recompile and add:\n" + string)
        return False

    ## MODE ##
    ## Check if the mode is a valid
    if _topt('mode') is None:
        write_message('You have to specify a mode using \'-m MODE\'')
        return False
    if _topt('mode') not in CFG_BIBENCODE_VALID_MODES:
        write_message('%s is not a valid mode. Use one of %s' %
                      (_topt('mode'), CFG_BIBENCODE_VALID_MODES))
        return False

    ## INPUT ##
    ## Check if the input file is given and if it exists
    ## You should always use an absolute path to the file
    if _topt('mode') in ('encode', 'extract', 'meta', 'batch'):
        if _topt('input') is None:
            write_message('You must specify an input file using \'-i FILE\'')
            return False
        else:
            if not os.path.exists(_topt('input')):
                print(("The file %s does not exist" % _topt('input')))
                return False

    ## OUTPUT ##
    ## Check if the output file is given and if it exists
    ## You should always use an absolute path to the file
    if _topt('mode') in ('encode', 'extract', 'meta'):
        if _topt('output') is None:
            write_message('No output file is given. Please specify with'
                          ' \'-o NAME\'')
            return False

    #---------------#
    # Encoding Mode #
    #---------------#
    if _topt('mode') == 'encode':

        ## PROFILE ## Check for a valid profile if this is given
        if _topt('profile_name') is not None:
            if _topt('profile_name') not in get_encoding_profiles():
                write_message(
                    '%s not found in %s' %
                    (_topt('profile_name'), CFG_BIBENCODE_PROFILES_ENCODING))
                return False
            ## If the profile exists
            else:
                pass

        ## AUDIOCODEC ##
        ## Checks if the audiocodec is one of the predefined
        if _topt('acodec') is not None:
            if _topt('acodec') not in CFG_BIBENCODE_FFMPEG_VALID_ACODECS:
                write_message(
                    '%s is not a valid audiocodec.\nAvailable codecs: %s' %
                    (_topt('acodec'), CFG_BIBENCODE_FFMPEG_VALID_ACODECS))
                return False

        ## VIDEOCODEC ## Checks if the videocodec is one of the predefined
        if _topt('vcodec') is not None:
            if _topt('vcodec') not in CFG_BIBENCODE_FFMPEG_VALID_VCODECS:
                write_message(
                    '%s is not a valid videocodec.\nAvailable codecs: %s' %
                    (_topt('vcodec'), CFG_BIBENCODE_FFMPEG_VALID_VCODECS))
                return False

        ## SIZE ##
        ## Checks if the size is either WxH or an FFMPEG preset
        if _topt('size') is not None:
            if not CFG_BIBENCODE_FFMPEG_RE_VALID_SIZE.match(_topt('size')):
                if _topt('size') not in CFG_BIBENCODE_FFMPEG_VALID_SIZES:
                    write_message(
                        '%s is not a valid frame size.\nEither use the'
                        ' \'WxH\' notation or one of these values:\n%s' %
                        (_topt('size'), CFG_BIBENCODE_FFMPEG_VALID_SIZES))
                    return False
        ## Check if both a size and vertical or horizontal resolution
        if (_topt('width') or _topt('height')) and _topt('size'):
            write_message('Options \'width\' and \'height\' can not be '
                          'combined with \'resolution\'')
            return False

        ## PASSES ##
        ## If a number of passes is given, it should be either 1 oder 2.
        ## You could do an infinite number of passes with ffmpeg,
        ## But it will almost never make a difference above 2 passes.
        ## So, we currently only support 2 passes.
        if _topt('passes') is not None:
            if _topt('passes') not in (1, 2):
                write_message('The number of passes must be either 1 or 2')
                return False
        else:
            task_set_option('passes', 1)

        ## BITRATE ##
        ## Check if the given bitrate is either 1000 sth. or 1000k sth.
        if _topt('abitrate') is not None:
            pass
        if _topt('vbitrate') is not None:
            pass

    #-----------------#
    # Extraction Mode #
    #-----------------#
    elif _topt('mode') == 'extract':

        ## PROFILE ##
        ## If a profile is given, check its validity
        if _topt('profile_name') is not None:
            if _topt('profile_name') not in get_extract_profiles():
                write_message(
                    '%s not found in %s' %
                    (_topt('profile_name'), CFG_BIBENCODE_PROFILES_EXTRACT))
                return False
            ## If the profile exists
            else:
                pass

        ## You cannot give both a number and specific positions
        ## !!! Think about allowing both -> First extract by number,
        ## !!! then additionally the specific positions
        if (((_topt('numberof') is not None) and
             (_topt('positions') is not None)) or
            ((_topt('numberof') is None) and (_topt('positions') is None))):
            write_message('Please specify either a number of frames to '
                          'take or specific positions')
            return False

        ## SIZE ##
        ## Checks if the size is either WxH or an FFMPEG specific value
        if _topt('size') is not None:
            if not CFG_BIBENCODE_FFMPEG_RE_VALID_SIZE.match(_topt('size')):
                if _topt('size') not in CFG_BIBENCODE_FFMPEG_VALID_SIZES:
                    write_message(
                        '%s is not a valid frame size.\nEither use the'
                        '\'WxH\' notation or one of these valus:\n%s' %
                        (_topt('size'), CFG_BIBENCODE_FFMPEG_VALID_SIZES))
                    return False

    #---------------#
    # Metadata Mode #
    #---------------#
    elif _topt('mode') == 'meta':

        ## You have to give exactly one meta suboption
        if not _xor(_topt('meta_input'), _topt('meta_dump')):
            write_message("You can either dump or write metadata")
            return False

        ## METADATA INPUT ##
        if _topt('meta_input') is not None:
            ## Check if this is either a filename (that should exist)
            ## or if this a jsonic metadata notation
            if os.path.exists(_topt('meta_input')):
                pass
            else:
                try:
                    metadict = json.loads(_topt('meta_input'))
                    task_set_option('meta_input', metadict)
                except ValueError:
                    write_message(
                        'The value %s of the \'--meta\' parameter is '
                        'neither a valid filename nor a jsonic dict' %
                        _topt('meta_input'))
                    return False

    #------------#
    # Batch Mode #
    #------------#
    elif _topt('mode') == 'batch':
        if _topt('collection') and _topt('search'):
            write_message('You can either use \'search\' or \'collection\'')
            return False
        elif _topt('collection'):
            template = json_decode_file(_topt('input'))
            print('\n')
            print("#---------------------------------------------#")
            print("# YOU ARE ABOUT TO UPDATE A WHOLE COLLECTION  #")
            print("#---------------------------------------------#")
            print('\n')
            print('The selected template file contains:')
            pprint(template)
            print('\n')
        elif _topt('search'):
            template = json_decode_file(_topt('input'))
            message = "# YOU ARE ABOUT TO UPDATE RECORDS MATCHING '%s'  #" % _topt(
                'search')
            print('\n')
            print(("#" + "-" * (len(message) - 2) + "#"))
            print(message)
            print(("#" + "-" * (len(message) - 2) + "#"))
            print('\n')
            print('The selected template file contains:')
            pprint(template)
            print('\n')

    #-------------#
    # Daemon Mode #
    #-------------#
    elif _topt('mode') == 'daemon':
        task_set_task_param('task_specific_name', 'daemon')
        ## You can either give none or both folders, but not only one
        if _xor(_topt('new_job_folder'), _topt('old_job_folder')):
            write_message('When specifying folders for the daemon mode, you '
                          'have to specify both the folder for the new jobs '
                          'and the old ones')
            return False

    ## If every check went fine
    return True
Exemple #51
0
def _task_submit_elaborate_specific_parameter(key, value, opts, args):
    """
    Given the string key it checks it's meaning, eventually using the
    value. Usually, it fills some key in the options dict.
    It must return True if it has elaborated the key, False, if it doesn't
    know that key.
    """

    if key in ("--update-personid",):
        bibtask.task_set_option("update_personid", True)
    elif key in ("--record-ids", '-i'):
        if value.count("="):
            value = value[1:]
        value = value.split(",")
        bibtask.task_set_option("record_ids", value)
    elif key in ("--all-records",):
        bibtask.task_set_option("all_records", True)
    elif key in ("--disambiguate",):
        bibtask.task_set_option("disambiguate", True)
    elif key in ("--merge",):
        bibtask.task_set_option("merge", True)
    elif key in ("--from-scratch",):
        bibtask.task_set_option("from_scratch", True)
    else:
        return False

    return True