Beispiel #1
0
def update_references(recid, overwrite=True):
    """Update references for a record.

    First, we extract references from a record.
    Then, we are not updating the record directly but adding a bibupload
    task in -c mode which takes care of updating the record.

    Parameters:
    * recid: the id of the record
    """
    if not overwrite:
        # Check for references in record
        record = get_record(recid)
        if record and record_has_field(record, '999'):
            raise RecordHasReferences('Record has references and overwrite '
                                      'mode is disabled: %s' % recid)

    if get_fieldvalues(recid, '999C59'):
        raise RecordHasReferences('Record has been curated: %s' % recid)

    # Parse references
    references_xml = extract_references_from_record_xml(recid)

    # Save new record to file
    (temp_fd, temp_path) = mkstemp(prefix=cfg.get("CFG_REFEXTRACT_FILENAME"),
                                   dir=cfg.get("CFG_TMPSHAREDDIR"))
    temp_file = os.fdopen(temp_fd, 'w')
    temp_file.write(references_xml)
    temp_file.close()

    # Update record
    task_low_level_submission('bibupload', 'refextract', '-P', '4', '-c',
                              temp_path)
Beispiel #2
0
    def __init__(self,
                 username=None,
                 password=None,
                 url=None,
                 prefix=None,
                 test_mode=None,
                 api_ver="2"):
        """Initialize API client.

        Compatibility layer on top of external DataCite API client.
        """
        warnings.warn(
            "Use of invenio.utils.datacite:DataCite is "
            "deprecated in favor of "
            "http://datacite.readthedocs.org/en/latest/.",
            RemovedInInvenio22Warning)

        super(DataCite, self).__init__(
            username=username or cfg.get('CFG_DATACITE_USERNAME', ''),
            password=password or cfg.get('CFG_DATACITE_PASSWORD', ''),
            url=url
            or cfg.get('CFG_DATACITE_URL', 'https://mds.datacite.org/'),
            prefix=prefix or cfg.get('CFG_DATACITE_DOI_PREFIX', '10.5072'),
            test_mode=test_mode if test_mode is not None else cfg.get(
                'CFG_DATACITE_TESTMODE', False),
            api_ver=api_ver or "2")
Beispiel #3
0
def index(p, so, page):
    """Index page with uploader and list of existing depositions."""
    ctx = mycommunities_ctx()

    if not so:
        so = cfg.get('COMMUNITIES_DEFAULT_SORTING_OPTION')

    communities = Community.filter_communities(p, so)
    featured_community = FeaturedCommunity.get_current()
    form = SearchForm(p=p)
    per_page = cfg.get('COMMUNITIES_DISPLAYED_PER_PAGE', 10)
    page = max(page, 1)
    p = Pagination(page, per_page, communities.count())

    ctx.update({
        'r_from': max(p.per_page*(p.page-1), 0),
        'r_to': min(p.per_page*p.page, p.total_count),
        'r_total': p.total_count,
        'pagination': p,
        'form': form,
        'title': _('Community Collections'),
        'communities': communities.slice(
            per_page*(page-1), per_page*page).all(),
        'featured_community': featured_community,
        'format_record': format_record,
    })

    return render_template(
        "communities/index.html",
        **ctx
    )
Beispiel #4
0
def get_canonical_and_alternates_urls(url,
                                      drop_ln=True,
                                      washed_argd=None,
                                      quote_path=False):
    """
    Given an Invenio URL returns a tuple with two elements. The first is the
    canonical URL, that is the original URL with CFG_SITE_URL prefix, and
    where the ln= argument stripped. The second element element is mapping,
    language code -> alternate URL

    @param quote_path: if True, the path section of the given C{url}
                       is quoted according to RFC 2396
    """
    dummy_scheme, dummy_netloc, path, dummy_params, query, fragment = urlparse(
        url)
    canonical_scheme, canonical_netloc = urlparse(cfg.get('CFG_SITE_URL'))[0:2]
    parsed_query = washed_argd or parse_qsl(query)
    no_ln_parsed_query = [(key, value) for (key, value) in parsed_query
                          if key != 'ln']
    if drop_ln:
        canonical_parsed_query = no_ln_parsed_query
    else:
        canonical_parsed_query = parsed_query
    if quote_path:
        path = urllib.quote(path)
    canonical_query = urlencode(canonical_parsed_query)
    canonical_url = urlunparse((canonical_scheme, canonical_netloc, path,
                                dummy_params, canonical_query, fragment))
    alternate_urls = {}
    for ln in cfg.get('CFG_SITE_LANGS'):
        alternate_query = urlencode(no_ln_parsed_query + [('ln', ln)])
        alternate_url = urlunparse((canonical_scheme, canonical_netloc, path,
                                    dummy_params, alternate_query, fragment))
        alternate_urls[ln] = alternate_url
    return canonical_url, alternate_urls
Beispiel #5
0
def is_user_owner_of_record(user_info, recid):
    """Check if the user is owner of the record.

    I.e. he is the submitter and/or belongs to a owner-like group authorized
    to 'see' the record.

    :param user_info: the user_info dictionary that describe the user.
    :type user_info: user_info dictionary
    :param recid: the record identifier.
    :type recid: positive integer
    :return: True if the user is 'owner' of the record; False otherwise
    """
    from invenio.modules.access.local_config import \
        CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS, \
        CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_USERIDS_IN_TAGS

    if not isinstance(recid, MutableMapping):
        record = get_record(int(recid))
    else:
        record = recid

    uid_tags = cfg.get('CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_USERIDS_IN_TAGS',
                       CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS)

    email_tags = cfg.get('CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_USERIDS_IN_TAGS',
                         CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_USERIDS_IN_TAGS)

    return is_user_in_tags(record, user_info, uid_tags, email_tags)
Beispiel #6
0
def send_account_activation_email(user):
    """Send an account activation email."""
    expires_in = cfg.get('CFG_WEBSESSION_ADDRESS_ACTIVATION_EXPIRE_IN_DAYS')

    address_activation_key = EmailConfirmationSerializer(
        expires_in=timedelta(days=expires_in).total_seconds()
    ).create_token(user.id, {'email': user.email})

    # Render context.
    ctx = {
        "ip_address": None,
        "user": user,
        "email": user.email,
        "activation_link": url_for(
            'webaccount.access',
            mailcookie=address_activation_key,
            _external=True,
            _scheme='https',
        ),
        "days": expires_in,
    }

    # Send email
    send_email(
        cfg.get('CFG_SITE_SUPPORT_EMAIL'),
        user.email,
        _("Account registration at %(sitename)s",
          sitename=cfg["CFG_SITE_NAME_INTL"].get(
              getattr(g, 'ln', cfg['CFG_SITE_LANG']),
              cfg['CFG_SITE_NAME'])),
        render_template("accounts/emails/activation.tpl", **ctx)
    )
Beispiel #7
0
def is_user_viewer_of_record(user_info, recid):
    """Check if the user is allow to view the record based in the marc tags.

    Checks inside CFG_ACC_GRANT_VIEWER_RIGHTS_TO_EMAILS_IN_TAGS
    i.e. his email is inside the 506__m tag or he is inside an e-group listed
    in the 506__m tag

    :param user_info: the user_info dictionary that describe the user.
    :type user_info: user_info dictionary
    :param recid: the record identifier.
    :type recid: positive integer
    @return: True if the user is 'allow to view' the record; False otherwise
    @rtype: bool
    """
    from invenio.modules.access.local_config import \
        CFG_ACC_GRANT_VIEWER_RIGHTS_TO_EMAILS_IN_TAGS, \
        CFG_ACC_GRANT_VIEWER_RIGHTS_TO_USERIDS_IN_TAGS

    uid_tags = cfg.get('CFG_ACC_GRANT_VIEWER_RIGHTS_TO_USERIDS_IN_TAGS',
                       CFG_ACC_GRANT_VIEWER_RIGHTS_TO_USERIDS_IN_TAGS)

    email_tags = cfg.get('CFG_ACC_GRANT_VIEWER_RIGHTS_TO_EMAILS_IN_TAGS',
                         CFG_ACC_GRANT_VIEWER_RIGHTS_TO_EMAILS_IN_TAGS)

    return is_user_in_tags(recid, user_info, uid_tags, email_tags)
Beispiel #8
0
def nato_context():
    context_script = b64encode(
        render_template_to_string('analyze/etcd-updater.sh',
                                  etcd_url=cfg.get('CFG_ANALYZE_ETCD_URL'),
                                  ttl=600,
                                  root=cfg.get('CFG_ANALYZE_NODES_KEY')))
    context_script_path = '/usr/local/bin/etcd-updater.sh'
    crontab = b64encode(
        render_template_to_string('analyze/etcd_updater_cron',
                                  context_script_path=context_script_path))
    context = {
        'write_files': [
            {
                'encoding': 'b64',
                'content': context_script,
                'permissions': '755',
                'path': context_script_path,
            },
            {
                'encoding': 'b64',
                'content': crontab,
                'permissions': '755',
                'path': '/etc/cron.d/etcd_updater'
            },
        ],
        # run it as soon as the VM is booted
        'runcmd': [
            [context_script_path],
        ],
    }
    if cfg.get('CFG_ANALYZE_PUBLIC_KEY'):
        context['ssh_authorized_keys'] = [cfg.get('CFG_ANALYZE_PUBLIC_KEY')]
    return context
Beispiel #9
0
def get_canonical_and_alternates_urls(url, drop_ln=True, washed_argd=None, quote_path=False):
    """
    Given an Invenio URL returns a tuple with two elements. The first is the
    canonical URL, that is the original URL with CFG_SITE_URL prefix, and
    where the ln= argument stripped. The second element element is mapping,
    language code -> alternate URL

    @param quote_path: if True, the path section of the given C{url}
                       is quoted according to RFC 2396
    """
    dummy_scheme, dummy_netloc, path, dummy_params, query, fragment = urlparse(url)
    canonical_scheme, canonical_netloc = urlparse(cfg.get('CFG_SITE_URL'))[0:2]
    parsed_query = washed_argd or parse_qsl(query)
    no_ln_parsed_query = [(key, value) for (key, value) in parsed_query if key != 'ln']
    if drop_ln:
        canonical_parsed_query = no_ln_parsed_query
    else:
        canonical_parsed_query = parsed_query
    if quote_path:
        path = urllib.quote(path)
    canonical_query = urlencode(canonical_parsed_query)
    canonical_url = urlunparse((canonical_scheme, canonical_netloc, path, dummy_params, canonical_query, fragment))
    alternate_urls = {}
    for ln in cfg.get('CFG_SITE_LANGS'):
        alternate_query = urlencode(no_ln_parsed_query + [('ln', ln)])
        alternate_url = urlunparse((canonical_scheme, canonical_netloc, path, dummy_params, alternate_query, fragment))
        alternate_urls[ln] = alternate_url
    return canonical_url, alternate_urls
Beispiel #10
0
 def run(self, *args, **kwargs):
     logger.info("Fetching CRLs")
     proc = subprocess.Popen(['fetch-crl'], shell=False)
     proc.wait()
     logger.info("Renewing proxy")
     with NamedTemporaryFile(mode='rw') as new_proxy:
         cmd = ['voms-proxy-init', '--out', new_proxy.name, '-rfc']
         vo = cfg.get('CFG_DELEGATION_VO')
         if vo:
             cmd.extend(['--voms', vo])
         proc = subprocess.Popen(cmd,
                                 shell=False,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.STDOUT)
         logger.debug("CMD %s", ' '.join(cmd))
         out = ''.join([l for l in proc.stdout])
         logger.debug("OUTPUT: %s", out)
         proc.wait()
         if proc.returncode != 0:
             # and not _check_proxy_validity(new_proxy):
             msg = ("Proxy generation failed (%d): %s" %
                    (proc.returncode, out))
             logger.error(msg)
             raise self.retry(Exception(msg))
         # dump new proxy to proper location
         # XXX should this file be locked somehow?
         with open(cfg.get('CFG_LWDAAP_ROBOT_PROXY'), 'w+') as f:
             f.write(new_proxy.read())
             f.flush()
         os.chmod(cfg.get('CFG_LWDAAP_ROBOT_PROXY'),
                  stat.S_IRUSR | stat.S_IWUSR)
Beispiel #11
0
def index(p, so, page):
    """
    Index page with uploader and list of existing depositions
    """
    ctx = mycommunities_ctx()

    if not so:
        so = cfg.get('COMMUNITIES_DEFAULT_SORTING_OPTION')

    communities = Community.filter_communities(p, so)
    form = SearchForm()
    per_page = cfg.get('COMMUNITIES_DISPLAYED_PER_PAGE', 10)
    page = max(page, 1)

    ctx.update({
        'pagination': Pagination(page, per_page, communities.count()),
        'form': form,
        'title': _('Community Collections'),
        'communities': communities.slice(
            per_page*(page-1), per_page*page).all(),
    })

    return render_template(
        "communities/index.html",
        **ctx
    )
Beispiel #12
0
def index(p, so, page):
    """Index page with uploader and list of existing depositions."""
    ctx = mycommunities_ctx()

    if not so:
        so = cfg.get('COMMUNITIES_DEFAULT_SORTING_OPTION')

    communities = Community.filter_communities(p, so)
    featured_community = FeaturedCommunity.get_current()
    form = SearchForm(p=p)
    per_page = cfg.get('COMMUNITIES_DISPLAYED_PER_PAGE', 10)
    page = max(page, 1)
    p = Pagination(page, per_page, communities.count())

    ctx.update({
        'r_from': max(p.per_page*(p.page-1), 0),
        'r_to': min(p.per_page*p.page, p.total_count),
        'r_total': p.total_count,
        'pagination': p,
        'form': form,
        'title': _('Community Collections'),
        'communities': communities.slice(
            per_page*(page-1), per_page*page).all(),
        'featured_community': featured_community,
        'format_record': format_record,
    })

    return render_template(
        "communities/index.html",
        **ctx
    )
Beispiel #13
0
def is_user_owner_of_record(user_info, recid):
    """Check if the user is owner of the record.

    I.e. he is the submitter and/or belongs to a owner-like group authorized
    to 'see' the record.

    :param user_info: the user_info dictionary that describe the user.
    :type user_info: user_info dictionary
    :param recid: the record identifier.
    :type recid: positive integer
    :return: True if the user is 'owner' of the record; False otherwise
    """
    from invenio.modules.access.local_config import \
        CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS, \
        CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_USERIDS_IN_TAGS

    if not isinstance(recid, MutableMapping):
        record = get_record(int(recid))
    else:
        record = recid

    uid_tags = cfg.get('CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_USERIDS_IN_TAGS',
                       CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS)

    email_tags = cfg.get('CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_USERIDS_IN_TAGS',
                         CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_USERIDS_IN_TAGS)

    return is_user_in_tags(record, user_info, uid_tags, email_tags)
Beispiel #14
0
def submit_rt_ticket(obj, queue, subject, body, requestors, ticket_id_key):
    """Submit ticket to RT with the given parameters."""
    from inspire.utils.tickets import get_instance

    # Trick to prepare ticket body
    body = "\n ".join([line.strip() for line in body.split("\n")])
    rt_instance = get_instance() if cfg.get("PRODUCTION_MODE") else None
    rt_queue = cfg.get("CFG_BIBCATALOG_QUEUES") or queue
    recid = obj.extra_data.get("recid", "")
    if not recid:
        recid = obj.data.get("recid", "")
    if not rt_instance:
        obj.log.error("No RT instance available. Skipping!")
        obj.log.info("Ticket submission ignored.")
    else:
        ticket_id = rt_instance.create_ticket(
            Queue=rt_queue,
            Subject=subject,
            Text=body,
            Requestors=requestors,
            CF_RecordID=recid
        )
        obj.extra_data[ticket_id_key] = ticket_id
        obj.log.info("Ticket {0} created:\n{1}".format(
            ticket_id,
            body.encode("utf-8", "ignore")
        ))
    return True
Beispiel #15
0
def send_account_activation_email(user):
    """Send an account activation email."""
    from invenio.modules.access.mailcookie import \
        mail_cookie_create_mail_activation

    expires_in = cfg.get('CFG_WEBSESSION_ADDRESS_ACTIVATION_EXPIRE_IN_DAYS')

    address_activation_key = mail_cookie_create_mail_activation(
        user.email,
        cookie_timeout=timedelta(days=expires_in)
    )

    # Render context.
    ctx = {
        "ip_address": None,
        "user": user,
        "email": user.email,
        "activation_link": url_for(
            'webaccount.access',
            mailcookie=address_activation_key,
            _external=True,
            _scheme='https',
        ),
        "days": expires_in,
    }

    # Send email
    send_email(
        cfg.get('CFG_SITE_SUPPORT_EMAIL'),
        user.email,
        _("Account registration at %(sitename)s",
          sitename=cfg['CFG_SITE_NAME']),
        render_template("accounts/emails/activation.tpl", **ctx)
    )
Beispiel #16
0
def index(p, so, page):
    """Index page with uploader and list of existing depositions."""
    ctx = mycommunities_ctx()

    if not so:
        so = cfg.get("COMMUNITIES_DEFAULT_SORTING_OPTION")

    communities = Community.filter_communities(p, so)
    featured_community = FeaturedCommunity.get_current()
    form = SearchForm(p=p)
    per_page = cfg.get("COMMUNITIES_DISPLAYED_PER_PAGE", 10)
    page = max(page, 1)
    p = Pagination(page, per_page, communities.count())

    ctx.update(
        {
            "r_from": max(p.per_page * (p.page - 1), 0),
            "r_to": min(p.per_page * p.page, p.total_count),
            "r_total": p.total_count,
            "pagination": p,
            "form": form,
            "title": _("Community Collections"),
            "communities": communities.slice(per_page * (page - 1), per_page * page).all(),
            "featured_community": featured_community,
            "format_record": format_record,
        }
    )

    return render_template("communities/index.html", **ctx)
Beispiel #17
0
 def run(self, *args, **kwargs):
     logger.info("Fetching CRLs")
     proc = subprocess.Popen(['fetch-crl'], shell=False)
     proc.wait()
     logger.info("Renewing proxy")
     with NamedTemporaryFile(mode='rw') as new_proxy:
         cmd = ['voms-proxy-init',
                '--out', new_proxy.name,
                '-rfc'
                ]
         vo = cfg.get('CFG_DELEGATION_VO')
         if vo:
             cmd.extend(['--voms', vo])
         proc = subprocess.Popen(cmd, shell=False,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.STDOUT)
         logger.debug("CMD %s", ' '.join(cmd))
         out = ''.join([l for l in proc.stdout])
         logger.debug("OUTPUT: %s", out)
         proc.wait()
         if proc.returncode != 0:
             # and not _check_proxy_validity(new_proxy):
             msg = ("Proxy generation failed (%d): %s"
                    % (proc.returncode, out))
             logger.error(msg)
             raise self.retry(Exception(msg))
         # dump new proxy to proper location
         # XXX should this file be locked somehow?
         with open(cfg.get('CFG_LWDAAP_ROBOT_PROXY'), 'w+') as f:
             f.write(new_proxy.read())
             f.flush()
         os.chmod(cfg.get('CFG_LWDAAP_ROBOT_PROXY'),
                  stat.S_IRUSR | stat.S_IWUSR)
Beispiel #18
0
def update_references(recid, overwrite=True):
    """Update references for a record.

    First, we extract references from a record.
    Then, we are not updating the record directly but adding a bibupload
    task in -c mode which takes care of updating the record.

    Parameters:
    * recid: the id of the record
    """
    if not overwrite:
        # Check for references in record
        record = get_record(recid)
        if record and record_has_field(record, '999'):
            raise RecordHasReferences('Record has references and overwrite '
                                      'mode is disabled: %s' % recid)

    if get_fieldvalues(recid, '999C59'):
        raise RecordHasReferences('Record has been curated: %s' % recid)

    # Parse references
    references_xml = extract_references_from_record_xml(recid)

    # Save new record to file
    (temp_fd, temp_path) = mkstemp(prefix=cfg.get("CFG_REFEXTRACT_FILENAME"),
                                   dir=cfg.get("CFG_TMPSHAREDDIR"))
    temp_file = os.fdopen(temp_fd, 'w')
    temp_file.write(references_xml)
    temp_file.close()

    # Update record
    task_low_level_submission('bibupload', 'refextract', '-P', '4',
                              '-c', temp_path)
Beispiel #19
0
def filter_step(obj, eng):
    """Run an external python script."""
    from invenio_records.api import Record
    from invenio.utils.shell import run_shell_command

    repository = obj.extra_data.get("repository", {})
    arguments = repository.get("arguments", {})
    script_name = arguments.get("f_filter-file")
    if script_name:
        marcxml_value = Record(obj.data.dumps()).legacy_export_as_marc()
        extract_path = os.path.join(
            cfg.get('OAIHARVESTER_STORAGEDIR', cfg.get('CFG_TMPSHAREDDIR')),
            str(eng.uuid))
        if not os.path.exists(extract_path):
            os.makedirs(extract_path)

        # Now we launch BibUpload tasks for the final MARCXML files
        marcxmlfile = extract_path + os.sep + str(obj.id)
        file_fd = open(marcxmlfile, 'w')
        file_fd.write(marcxml_value)
        file_fd.close()

        exitcode, cmd_stdout, cmd_stderr = run_shell_command(
            cmd="%s '%s'", args=(str(script_name), str(marcxmlfile)))
        if exitcode != 0 or cmd_stderr != "":
            obj.log.error(
                "Error while running filtering script on %s\nError:%s" %
                (marcxmlfile, cmd_stderr))
        else:
            obj.log.info(cmd_stdout)
    else:
        obj.log.error("No script file found!")
Beispiel #20
0
def is_user_viewer_of_record(user_info, recid):
    """Check if the user is allow to view the record based in the marc tags.

    Checks inside CFG_ACC_GRANT_VIEWER_RIGHTS_TO_EMAILS_IN_TAGS
    i.e. his email is inside the 506__m tag or he is inside an e-group listed
    in the 506__m tag

    :param user_info: the user_info dictionary that describe the user.
    :type user_info: user_info dictionary
    :param recid: the record identifier.
    :type recid: positive integer
    @return: True if the user is 'allow to view' the record; False otherwise
    @rtype: bool
    """
    from invenio.modules.access.local_config import \
        CFG_ACC_GRANT_VIEWER_RIGHTS_TO_EMAILS_IN_TAGS, \
        CFG_ACC_GRANT_VIEWER_RIGHTS_TO_USERIDS_IN_TAGS

    uid_tags = cfg.get('CFG_ACC_GRANT_VIEWER_RIGHTS_TO_USERIDS_IN_TAGS',
                       CFG_ACC_GRANT_VIEWER_RIGHTS_TO_USERIDS_IN_TAGS)

    email_tags = cfg.get('CFG_ACC_GRANT_VIEWER_RIGHTS_TO_EMAILS_IN_TAGS',
                         CFG_ACC_GRANT_VIEWER_RIGHTS_TO_EMAILS_IN_TAGS)

    return is_user_in_tags(recid, user_info, uid_tags, email_tags)
Beispiel #21
0
 def __init__(self):
     """Initialize provider."""
     self.api = DataCiteMDSClient(username=cfg.get('CFG_DATACITE_USERNAME'),
                                  password=cfg.get('CFG_DATACITE_PASSWORD'),
                                  prefix=cfg.get('CFG_DATACITE_DOI_PREFIX'),
                                  test_mode=cfg.get('CFG_DATACITE_TESTMODE',
                                                    False),
                                  url=cfg.get('CFG_DATACITE_URL'))
    def _plot_extract(obj, eng):
        from invenio.utils.plotextractor.api import (
            get_tarball_from_arxiv,
            get_marcxml_plots_from_tarball
        )
        from invenio.modules.workflows.utils import convert_marcxml_to_bibfield
        from invenio.utils.shell import Timeout

        if "_result" not in obj.extra_data:
            obj.extra_data["_result"] = {}

        repository = obj.extra_data.get("repository", {})
        arguments = repository.get("arguments", {})

        chosen_type = plotextractor_types

        if not chosen_type:
            chosen_type = arguments.get('p_extraction-source', [])

        if not isinstance(chosen_type, list):
            chosen_type = [chosen_type]

        if 'latex' in chosen_type:
            # Run LaTeX plotextractor
            if "tarball" not in obj.extra_data["_result"]:
                extract_path = os.path.join(
                    cfg.get('OAIHARVESTER_STORAGEDIR', cfg.get('CFG_TMPSHAREDDIR')),
                    str(eng.uuid)
                )
                tarball = get_tarball_from_arxiv(
                    obj.data.get(cfg.get('OAIHARVESTER_RECORD_ARXIV_ID_LOOKUP')),
                    extract_path
                )
                if tarball is None:
                    obj.log.error("No tarball found")
                    return
                obj.extra_data["_result"]["tarball"] = tarball
            else:
                tarball = obj.extra_data["_result"]["tarball"]

            try:
                marcxml = get_marcxml_plots_from_tarball(tarball)
            except Timeout:
                eng.log.error(
                    'Timeout during tarball extraction on {0}'.format(tarball)
                )
            if marcxml:
                # We store the path to the directory the tarball contents lives
                new_dict = convert_marcxml_to_bibfield(marcxml)
                _attach_files_to_obj(obj, new_dict)
                obj.update_task_results(
                    "Plots",
                    [{
                        "name": "Plots",
                        "result": new_dict["fft"],
                        "template": "workflows/results/plots.html"
                    }]
                )
Beispiel #23
0
def make_user_agent_string(component=None):
    """
    Return a nice and uniform user-agent string to be used when Invenio
    act as a client in HTTP requests.
    """
    ret = "Invenio-%s (+%s; \"%s\")" % (cfg.get('CFG_VERSION'), cfg.get('CFG_SITE_URL'), cfg.get('CFG_SITE_NAME'))
    if component:
        ret += " %s" % component
    return ret
Beispiel #24
0
 def __init__(self):
     """Initialize provider."""
     self.api = DataCiteMDSClient(
         username=cfg.get('CFG_DATACITE_USERNAME'),
         password=cfg.get('CFG_DATACITE_PASSWORD'),
         prefix=cfg.get('CFG_DATACITE_DOI_PREFIX'),
         test_mode=cfg.get('CFG_DATACITE_TESTMODE', False),
         url=cfg.get('CFG_DATACITE_URL')
     )
Beispiel #25
0
    def __init__(self,
                 username=None,
                 password=None,
                 url=None,
                 prefix=None,
                 test_mode=None,
                 api_ver="2"):
        """
        Initialize DataCite API. In case parameters are not specified via
        keyword arguments, they will be read from the Invenio configuration.

        @param username: DataCite username (or CFG_DATACITE_USERNAME)
        @type  username: str

        @param password: DataCite password (or CFG_DATACITE_PASSWORD)
        @type  password: str

        @param url: DataCite API base URL (or CFG_DATACITE_URL). Defaults to
            https://mds.datacite.org/.
        @type  url: str

        @param prefix: DOI prefix (or CFG_DATACITE_DOI_PREFIX). Defaults to
            10.5072 (DataCite test prefix).
        @type  prefix: str

        @param test_mode: Set to True to enable test mode (or
            CFG_DATACITE_TESTMODE). Defaults to False.
        @type  test_mode: boolean

        @param api_ver: DataCite API version. Currently has no effect.
            Default to 2.
        @type  api_ver: str
        """
        if not HAS_SSL:
            warn("Module ssl not installed. Please install with e.g. "
                 "'pip install ssl'. Required for HTTPS connections to "
                 "DataCite.")

        self.username = username or cfg.get('CFG_DATACITE_USERNAME', '')
        self.password = password or cfg.get('CFG_DATACITE_PASSWORD', '')
        self.prefix = prefix or cfg.get('CFG_DATACITE_DOI_PREFIX', '10.5072')
        self.api_ver = api_ver  # Currently not used

        self.api_url = url or cfg.get('CFG_DATACITE_URL',
                                      'https://mds.datacite.org/')
        if self.api_url[-1] != '/':
            self.api_url = self.api_url + "/"

        if test_mode is not None:
            self.test_mode = test_mode
        else:
            self.test_mode = cfg.get('CFG_DATACITE_TESTMODE', False)

        # If in test mode, set prefix to 10.5072, the default DataCite test
        # prefix.
        if self.test_mode:
            self.prefix = "10.5072"
Beispiel #26
0
def get_vm_connection(client, vm_id):
    vm = get_vm(client, vm_id)
    if not vm:
        return dict(
            error=True,
            msg='Instance is not known to the system'
        )
    if vm['status'] != 'ACTIVE':
        return dict(
            error=True,
            msg='Instance must be ACTIVE to get connected to it.'
        )

    u = urlsplit(cfg.get('CFG_ANALYZE_ETCD_URL'))
    netloc = u[1].split(':')
    if len(netloc) > 1:
        etcd_client = etcd.Client(host=netloc[0], port=int(netloc[1]))
    else:
        etcd_client = etcd.Client(host=netloc[0])

    vm_dir = '/'.join([cfg.get('CFG_ANALYZE_MAPPINGS_KEY', '/'), vm_id])
    try:
        r = etcd_client.read(vm_dir, recursive=True)
        d = {c.key.split('/')[-1]: c.value for c in r.children}
        app_env = vm.get('app_env')
        if app_env == 'ssh':
            d['user'] = '******'
            return dict(
                error=False,
                msg=('<p>You can connect via SSH to %(ip)s, '
                     'port %(port)s with '
                     'user "%(user)s":</p>'
                     '<p>ssh -i &lt;your ssh key&gt; -p %(port)s '
                     '%(user)s@%(ip)s</p>') % d
            )
        elif app_env in ['jupyter-python', 'jupyter-r']:
            return dict(
                error=False,
                msg=('<p>You can connect to <a href="%(http)s" '
                     'class="btn btn-info">jupyter</a>.') % d
            )
        else:
            return dict(
                error=True,
                msg='Unknown application environment "%s".' % app_env
            )
    except etcd.EtcdKeyNotFound:
        return dict(
            error=True,
            msg='Connection details are still not available.'
        )
    except etcd.EtcdException as e:
        return dict(
            error=True,
            msg='Unable to get connection details (%s).' % e
        )
Beispiel #27
0
def get_tarball_for_model(eng, arxiv_id):
    """We download it."""
    extract_path = os.path.join(
        cfg.get('OAIHARVESTER_STORAGEDIR', cfg.get('CFG_TMPSHAREDDIR')),
        str(eng.uuid)
    )
    return get_tarball_from_arxiv(
        arxiv_id,
        extract_path
    )
Beispiel #28
0
def arxiv_fulltext_download(obj, eng):
    """Perform the fulltext download step for arXiv records.

    :param obj: Bibworkflow Object to process
    :param eng: BibWorkflowEngine processing the object
    """
    from invenio.utils.plotextractor.api import get_pdf_from_arxiv

    if "result" not in obj.extra_data:
        obj.extra_data["_result"] = {}

    if "pdf" not in obj.extra_data["_result"]:
        extract_path = os.path.join(
            cfg.get('OAIHARVESTER_STORAGEDIR', cfg.get('CFG_TMPSHAREDDIR')),
            str(eng.uuid))
        pdf = get_pdf_from_arxiv(
            obj.data.get(cfg.get('OAIHARVESTER_RECORD_ARXIV_ID_LOOKUP')),
            extract_path)
        arguments = obj.extra_data["repository"]["arguments"]
        try:
            if not arguments['t_doctype'] == '':
                doctype = arguments['t_doctype']
            else:
                doctype = 'arXiv'
        except KeyError:
            eng.log.error("WARNING: HASARDOUS BEHAVIOUR EXPECTED, "
                          "You didn't specified t_doctype in argument"
                          " for fulltext_download,"
                          "try to recover by using the default one!")
            doctype = 'arXiv'
        if pdf:
            obj.extra_data["_result"]["pdf"] = pdf
            new_dict_representation = {
                "fft": [{
                    "url": pdf,
                    "docfile_type": doctype
                }]
            }
            _attach_files_to_obj(obj, new_dict_representation)
            fileinfo = {
                "type": "fulltext",
                "filename": os.path.basename(pdf),
                "full_path": pdf,
            }
            obj.update_task_results(
                "PDF", [{
                    "name": "PDF",
                    "result": fileinfo,
                    "template": "workflows/results/fft.html"
                }])
        else:
            obj.log.info("No PDF found.")
    else:
        eng.log.info("There was already a pdf register for this record,"
                     "perhaps a duplicate task in you workflow.")
Beispiel #29
0
def make_user_agent_string(component=None):
    """
    Return a nice and uniform user-agent string to be used when Invenio
    act as a client in HTTP requests.
    """
    ret = "Invenio-%s (+%s; \"%s\")" % (cfg.get('CFG_VERSION'),
                                        cfg.get('CFG_SITE_URL'),
                                        cfg.get('CFG_SITE_NAME'))
    if component:
        ret += " %s" % component
    return ret
Beispiel #30
0
def register():
    req = request.get_legacy_request()

    # FIXME
    if cfg.get('CFG_ACCESS_CONTROL_LEVEL_SITE') > 0:
        return webuser.page_not_authorized(req, "../youraccount/register?ln=%s" % g.ln,
                                           navmenuid='youraccount')

    form = RegisterForm(request.values, csrf_enabled=False)
    #uid = current_user.get_id()

    title = _("Register")
    messages = []
    state = ""

    if form.validate_on_submit():
        ruid = webuser.registerUser(req, form.email.data.encode('utf8'),
                                    form.password.data.encode('utf8'),
                                    form.nickname.data.encode('utf8'),
                                    ln=g.ln)
        if ruid == 0:
            title = _("Account created")
            messages.append(_("Your account has been successfully created."))
            state = "success"
            if cfg.get('CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT') == 1:
                messages.append(_("In order to confirm its validity, an email message containing an account activation key has been sent to the given email address."))
                messages.append(_("Please follow instructions presented there in order to complete the account registration process."))
            if cfg.get('CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS') >= 1:
                messages.append(_("A second email will be sent when the account has been activated and can be used."))
            elif cfg.get('CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT') != 1:
                user = User.query.filter(User.email == form.email.data.lower()).one()
                login_user(user.get_id())
                messages.append(_("You can now access your account."))
        else:
            title = _("Registration failure")
            state = "danger"
            if ruid == 5:
                messages.append(_("Users cannot register themselves, only admin can register them."))
            elif ruid == 6 or ruid == 1:
                # Note, code 1 is used both for invalid email, and email sending
                # problems, however the email address is validated by the form,
                # so we only have to report a problem sending the email here
                messages.append(_("The site is having troubles in sending you an email for confirming your email address."))
                messages.append(_("The error has been logged and will be taken in consideration as soon as possible."))
            else:
                # Errors [-2, (1), 2, 3, 4] taken care of by form validation
                messages.append(_("Internal error %(ruid)s", ruid=ruid))
    elif request.method == 'POST':
        title = _("Registration failure")
        state = "warning"

    return render_template('accounts/register.html', form=form, title=title,
                           messages=messages, state=state)
Beispiel #31
0
def register():
    req = request.get_legacy_request()

    # FIXME
    if cfg.get('CFG_ACCESS_CONTROL_LEVEL_SITE') > 0:
        return webuser.page_not_authorized(req, "../youraccount/register?ln=%s" % g.ln,
                                           navmenuid='youraccount')

    form = RegisterForm(request.values, csrf_enabled=False)
    #uid = current_user.get_id()

    title = _("Register")
    messages = []
    state = ""

    if form.validate_on_submit():
        ruid = webuser.registerUser(req, form.email.data.encode('utf8'),
                                    form.password.data.encode('utf8'),
                                    form.nickname.data.encode('utf8'),
                                    ln=g.ln)
        if ruid == 0:
            title = _("Account created")
            messages.append(_("Your account has been successfully created."))
            state = "success"
            if cfg.get('CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT') == 1:
                messages.append(_("In order to confirm its validity, an email message containing an account activation key has been sent to the given email address."))
                messages.append(_("Please follow instructions presented there in order to complete the account registration process."))
            if cfg.get('CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS') >= 1:
                messages.append(_("A second email will be sent when the account has been activated and can be used."))
            elif cfg.get('CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT') != 1:
                user = User.query.filter(User.email == form.email.data.lower()).one()
                login_user(user.get_id())
                messages.append(_("You can now access your account."))
        else:
            title = _("Registration failure")
            state = "danger"
            if ruid == 5:
                messages.append(_("Users cannot register themselves, only admin can register them."))
            elif ruid == 6 or ruid == 1:
                # Note, code 1 is used both for invalid email, and email sending
                # problems, however the email address is validated by the form,
                # so we only have to report a problem sending the email here
                messages.append(_("The site is having troubles in sending you an email for confirming your email address."))
                messages.append(_("The error has been logged and will be taken in consideration as soon as possible."))
            else:
                # Errors [-2, (1), 2, 3, 4] taken care of by form validation
                messages.append(_("Internal error %(ruid)s", ruid=ruid))
    elif request.method == 'POST':
        title = _("Registration failure")
        state = "warning"

    return render_template('accounts/register.html', form=form, title=title,
                           messages=messages, state=state)
Beispiel #32
0
def get_storage_path(suffix=""):
    """Return a path ready to store files."""
    from invenio.base.globals import cfg

    storage_path = os.path.join(
        cfg.get("CFG_PREFIX"),
        cfg.get("HARVESTER_STORAGE_PREFIX"),
        suffix
    )
    if not os.path.exists(storage_path):
        os.makedirs(storage_path)
    return storage_path
Beispiel #33
0
    def _plot_extract(obj, eng):
        from invenio.utils.plotextractor.api import (
            get_tarball_from_arxiv, get_marcxml_plots_from_tarball)
        from invenio.modules.workflows.utils import convert_marcxml_to_bibfield
        from invenio.utils.shell import Timeout

        if "_result" not in obj.extra_data:
            obj.extra_data["_result"] = {}

        repository = obj.extra_data.get("repository", {})
        arguments = repository.get("arguments", {})

        chosen_type = plotextractor_types

        if not chosen_type:
            chosen_type = arguments.get('p_extraction-source', [])

        if not isinstance(chosen_type, list):
            chosen_type = [chosen_type]

        if 'latex' in chosen_type:
            # Run LaTeX plotextractor
            if "tarball" not in obj.extra_data["_result"]:
                extract_path = os.path.join(
                    cfg.get('OAIHARVESTER_STORAGEDIR',
                            cfg.get('CFG_TMPSHAREDDIR')), str(eng.uuid))
                tarball = get_tarball_from_arxiv(
                    obj.data.get(
                        cfg.get('OAIHARVESTER_RECORD_ARXIV_ID_LOOKUP')),
                    extract_path)
                if tarball is None:
                    obj.log.error("No tarball found")
                    return
                obj.extra_data["_result"]["tarball"] = tarball
            else:
                tarball = obj.extra_data["_result"]["tarball"]

            try:
                marcxml = get_marcxml_plots_from_tarball(tarball)
            except Timeout:
                eng.log.error(
                    'Timeout during tarball extraction on {0}'.format(tarball))
            if marcxml:
                # We store the path to the directory the tarball contents lives
                new_dict = convert_marcxml_to_bibfield(marcxml)
                _attach_files_to_obj(obj, new_dict)
                obj.update_task_results(
                    "Plots", [{
                        "name": "Plots",
                        "result": new_dict["fft"],
                        "template": "workflows/results/plots.html"
                    }])
Beispiel #34
0
    def __init__(self, username=None, password=None, url=None, prefix=None,
                 test_mode=None, api_ver="2"):
        """
        Initialize DataCite API. In case parameters are not specified via
        keyword arguments, they will be read from the Invenio configuration.

        @param username: DataCite username (or CFG_DATACITE_USERNAME)
        @type  username: str

        @param password: DataCite password (or CFG_DATACITE_PASSWORD)
        @type  password: str

        @param url: DataCite API base URL (or CFG_DATACITE_URL). Defaults to
            https://mds.datacite.org/.
        @type  url: str

        @param prefix: DOI prefix (or CFG_DATACITE_DOI_PREFIX). Defaults to
            10.5072 (DataCite test prefix).
        @type  prefix: str

        @param test_mode: Set to True to enable test mode (or
            CFG_DATACITE_TESTMODE). Defaults to False.
        @type  test_mode: boolean

        @param api_ver: DataCite API version. Currently has no effect.
            Default to 2.
        @type  api_ver: str
        """
        if not HAS_SSL:
            warn("Module ssl not installed. Please install with e.g. "
                 "'pip install ssl'. Required for HTTPS connections to "
                 "DataCite.")

        self.username = username or cfg.get('CFG_DATACITE_USERNAME', '')
        self.password = password or cfg.get('CFG_DATACITE_PASSWORD', '')
        self.prefix = prefix or cfg.get('CFG_DATACITE_DOI_PREFIX', '10.5072')
        self.api_ver = api_ver  # Currently not used

        self.api_url = url or cfg.get('CFG_DATACITE_URL',
                                      'https://mds.datacite.org/')
        if self.api_url[-1] != '/':
            self.api_url = self.api_url + "/"

        if test_mode is not None:
            self.test_mode = test_mode
        else:
            self.test_mode = cfg.get('CFG_DATACITE_TESTMODE', False)

        # If in test mode, set prefix to 10.5072, the default DataCite test
        # prefix.
        if self.test_mode:
            self.prefix = "10.5072"
Beispiel #35
0
def arxiv_refextract(obj, eng):
    """Perform the reference extraction step.

    :param obj: Bibworkflow Object to process
    :param eng: BibWorkflowEngine processing the object
    """
    from invenio.legacy.refextract.api import extract_references_from_file_xml
    from invenio.utils.plotextractor.api import get_pdf_from_arxiv
    from invenio.modules.workflows.utils import convert_marcxml_to_bibfield

    if "_result" not in obj.extra_data:
        obj.extra_data["_result"] = {}

    try:
        pdf = obj.extra_data["_result"]["pdf"]
    except KeyError:
        pdf = None

    if not pdf:
        extract_path = os.path.join(
            cfg.get('OAIHARVESTER_STORAGEDIR', cfg.get('CFG_TMPSHAREDDIR')),
            str(eng.uuid)
        )
        pdf = get_pdf_from_arxiv(
            obj.data.get(cfg.get('OAIHARVESTER_RECORD_ARXIV_ID_LOOKUP')),
            extract_path
        )
        obj.extra_data["_result"]["pdf"] = pdf

    if pdf and os.path.isfile(pdf):
        references_xml = extract_references_from_file_xml(
            obj.extra_data["_result"]["pdf"]
        )
        if references_xml:
            updated_xml = '<?xml version="1.0" encoding="UTF-8"?>\n' \
                          '<collection>\n' + references_xml + \
                          "\n</collection>"
            new_dict_representation = convert_marcxml_to_bibfield(updated_xml)
            if "reference" in new_dict_representation:
                obj.data["reference"] = new_dict_representation["reference"]
                obj.log.info("Extracted {0} references".format(len(obj.data["reference"])))
                obj.update_task_results(
                    "References",
                    [{"name": "References",
                      "result": new_dict_representation['reference'],
                      "template": "workflows/results/refextract.html"}]
                )
                return
        else:
            obj.log.info("No references extracted")
    else:
        obj.log.error("Not able to download and process the PDF")
Beispiel #36
0
def arxiv_refextract(obj, eng):
    """Perform the reference extraction step.

    :param obj: Bibworkflow Object to process
    :param eng: BibWorkflowEngine processing the object
    """
    from invenio.legacy.refextract.api import extract_references_from_file_xml
    from invenio.utils.plotextractor.api import get_pdf_from_arxiv
    from invenio.modules.workflows.utils import convert_marcxml_to_bibfield

    if "_result" not in obj.extra_data:
        obj.extra_data["_result"] = {}

    try:
        pdf = obj.extra_data["_result"]["pdf"]
    except KeyError:
        pdf = None

    if not pdf:
        extract_path = os.path.join(
            cfg.get('OAIHARVESTER_STORAGEDIR', cfg.get('CFG_TMPSHAREDDIR')),
            str(eng.uuid))
        pdf = get_pdf_from_arxiv(
            obj.data.get(cfg.get('OAIHARVESTER_RECORD_ARXIV_ID_LOOKUP')),
            extract_path)
        obj.extra_data["_result"]["pdf"] = pdf

    if pdf and os.path.isfile(pdf):
        references_xml = extract_references_from_file_xml(
            obj.extra_data["_result"]["pdf"])
        if references_xml:
            updated_xml = '<?xml version="1.0" encoding="UTF-8"?>\n' \
                          '<collection>\n' + references_xml + \
                          "\n</collection>"
            new_dict_representation = convert_marcxml_to_bibfield(updated_xml)
            if "reference" in new_dict_representation:
                obj.data["reference"] = new_dict_representation["reference"]
                obj.log.info("Extracted {0} references".format(
                    len(obj.data["reference"])))
                obj.update_task_results(
                    "References",
                    [{
                        "name": "References",
                        "result": new_dict_representation['reference'],
                        "template": "workflows/results/refextract.html"
                    }])
                return
        else:
            obj.log.info("No references extracted")
    else:
        obj.log.error("Not able to download and process the PDF")
Beispiel #37
0
    def _arxiv_fulltext_download(obj, eng):
        """Perform the fulltext download step for arXiv records.

        :param obj: Bibworkflow Object to process
        :param eng: BibWorkflowEngine processing the object
        """
        from invenio.utils.plotextractor.api import get_pdf_from_arxiv

        if "result" not in obj.extra_data:
            obj.extra_data["_result"] = {}

        if "pdf" not in obj.extra_data["_result"]:
            extract_path = os.path.join(
                cfg.get('OAIHARVESTER_STORAGEDIR', cfg.get('CFG_TMPSHAREDDIR')),
                str(eng.uuid)
            )
            pdf = get_pdf_from_arxiv(
                obj.data.get(cfg.get('OAIHARVESTER_RECORD_ARXIV_ID_LOOKUP')),
                extract_path
            )

            if pdf:
                obj.extra_data["_result"]["pdf"] = pdf
                new_dict_representation = {
                    "fft": [
                        {
                            "url": pdf,
                            "docfile_type": doctype
                        }
                    ]
                }
                _attach_files_to_obj(obj, new_dict_representation)
                fileinfo = {
                    "type": "fulltext",
                    "filename": os.path.basename(pdf),
                    "full_path": pdf,
                }
                obj.update_task_results(
                    os.path.basename(pdf),
                    [{
                        "name": "PDF",
                        "result": fileinfo,
                        "template": "workflows/results/files.html"
                    }]
                )
            else:
                obj.log.info("No PDF found.")
        else:
            eng.log.info("There was already a pdf register for this record,"
                         "perhaps a duplicate task in you workflow.")
Beispiel #38
0
def get_instance():
    """Make a RT instance and return it."""
    url = cfg.get("CFG_BIBCATALOG_SYSTEM_RT_URL", "")
    login = cfg.get("CFG_BIBCATALOG_SYSTEM_RT_DEFAULT_USER", "")
    password = cfg.get("CFG_BIBCATALOG_SYSTEM_RT_DEFAULT_PWD", "")

    if url:
        tracker = rt.Rt(
            url=url,
            default_login=login,
            default_password=password,
        )
        tracker.login()
        return tracker
Beispiel #39
0
def get_connection_for_dump_on_slave():
    """Return a slave connection for performing dbdump operation on a slave."""
    su_user = cfg.get("CFG_DATABASE_SLAVE_SU_USER", "")
    if "CFG_DATABASE_SLAVE_SU_PASS" not in cfg:
        cfg["CFG_DATABASE_SLAVE_SU_PASS"] = \
            _get_password_from_database_password_file(su_user)

    connection = connect(host=cfg.get("CFG_DATABASE_SLAVE", ""),
                         port=int(cfg.get("CFG_DATABASE_PORT"), 3306),
                         db=cfg.get("CFG_DATABASE_NAME", ""),
                         user=su_user,
                         passwd=cfg.get("CFG_DATABASE_SLAVE_SU_PASS", ""),
                         use_unicode=False, charset='utf8')
    connection.autocommit(True)
    return connection
Beispiel #40
0
def get_connection_for_dump_on_slave():
    """Return a slave connection for performing dbdump operation on a slave."""
    su_user = cfg.get("CFG_DATABASE_SLAVE_SU_USER", "")
    if "CFG_DATABASE_SLAVE_SU_PASS" not in cfg:
        cfg["CFG_DATABASE_SLAVE_SU_PASS"] = \
            _get_password_from_database_password_file(su_user)

    connection = connect(host=cfg.get("CFG_DATABASE_SLAVE", ""),
                         port=int(cfg.get("CFG_DATABASE_PORT"), 3306),
                         db=cfg.get("CFG_DATABASE_NAME", ""),
                         user=su_user,
                         passwd=cfg.get("CFG_DATABASE_SLAVE_SU_PASS", ""),
                         use_unicode=False, charset='utf8')
    connection.autocommit(True)
    return connection
Beispiel #41
0
def load(module='', prefix=''):
    """ Load and returns a template class, given a module name (like
        'websearch', 'webbasket',...).  The module corresponding to
        the currently selected template model (see invenio.conf,
        variable CFG_WEBSTYLE_TEMPLATE_SKIN) is tried first. In case it does
        not exist, it returns the default template for that module.
    """
    local = {}
    # load the right template based on the CFG_WEBSTYLE_TEMPLATE_SKIN and the specified module
    if CFG_WEBSTYLE_TEMPLATE_SKIN == "default":
        try:
            mymodule = __import__("invenio.%s_%stemplates" % (module, prefix), local,
                                  local, ["invenio.templates.%s" % (module)])
        except ImportError:
            mymodule = __import__("invenio.legacy.%s.%stemplates" % (module, prefix),
                                  local, local,
                                  ["invenio.templates.%s" % (module)])
    else:
        try:
            mymodule = __import__("invenio.%s_templates_%s" % (module, CFG_WEBSTYLE_TEMPLATE_SKIN), local, local,
                                  ["invenio.templates.%s_%s" % (module, CFG_WEBSTYLE_TEMPLATE_SKIN)])
        except ImportError:
            mymodule = __import__("invenio.%s_templates" % (module), local, local,
                                  ["invenio.templates.%s" % (module)])
    if 'inspect-templates' in cfg.get('CFG_DEVEL_TOOLS', []):
        for method_name in dir(mymodule.Template):
            if method_name.startswith('tmpl_'):
                enhance_method(module, mymodule.Template, method_name, method_wrapper)

    return mymodule.Template()
Beispiel #42
0
def get_current_user_records_that_can_be_displayed(qid):
    """Return records that current user can display.

    :param qid: query identifier

    :return: records in intbitset
    """
    CFG_WEBSEARCH_SEARCH_CACHE_TIMEOUT = cfg.get(
        'CFG_WEBSEARCH_SEARCH_CACHE_TIMEOUT')

    @search_results_cache.memoize(timeout=CFG_WEBSEARCH_SEARCH_CACHE_TIMEOUT)
    def get_records_for_user(qid, uid):
        from invenio.legacy.search_engine import \
            get_records_that_can_be_displayed
        key = get_search_results_cache_key_from_qid(qid)
        data = search_results_cache.get(key)
        if data is None:
            return intbitset([])
        cc = search_results_cache.get(key + '::cc')
        return get_records_that_can_be_displayed(current_user,
                                                 intbitset().fastload(data),
                                                 cc)

    # Simplifies API
    return get_records_for_user(qid, current_user.get_id())
Beispiel #43
0
 def generator(self):
     """Load function from configuration ``CFG_BIBDOCFILE_FILEDIR``."""
     func = cfg.get('RECORD_DOCUMENT_NAME_GENERATOR',
                    default_name_generator)
     if isinstance(func, six.string_types):
         func = import_string(func)
     return func
Beispiel #44
0
def record_extraction_from_string(
        xml_string, oai_namespace="http://www.openarchives.org/OAI/2.0/"):
    """Given a OAI-PMH XML return a list of every record incl. headers.

    :param xml_string: OAI-PMH XML
    :type xml_string: str

    :param oai_namespace: optionally provide the OAI-PMH namespace
    :type oai_namespace: str

    :return: return a list of XML records as string
    :rtype: str
    """
    if oai_namespace:
        nsmap = {None: oai_namespace}
    else:
        nsmap = cfg.get("OAIHARVESTER_DEFAULT_NAMESPACE_MAP")
    namespace_prefix = "{{{0}}}".format(oai_namespace)
    root = etree.fromstring(xml_string)
    headers = []
    headers.extend(
        root.findall(".//{0}responseDate".format(namespace_prefix), nsmap))
    headers.extend(
        root.findall(".//{0}request".format(namespace_prefix), nsmap))

    records = root.findall(".//{0}record".format(namespace_prefix), nsmap)

    list_of_records = []
    for record in records:
        wrapper = etree.Element("OAI-PMH", nsmap=nsmap)
        for header in headers:
            wrapper.append(header)
        wrapper.append(record)
        list_of_records.append(etree.tostring(wrapper))
    return list_of_records
Beispiel #45
0
def filter_out_based_on_date_range(recids, fromdate="", untildate="", set_spec=None):
    """ Filter out recids based on date range."""
    if fromdate:
        fromdate = normalize_date(fromdate, "T00:00:00Z")
    else:
        fromdate = get_earliest_datestamp()
    fromdate = utc_to_localtime(fromdate)

    if untildate:
        untildate = normalize_date(untildate, "T23:59:59Z")
    else:
        untildate = get_latest_datestamp()
    untildate = utc_to_localtime(untildate)

    if set_spec is not None: ## either it has a value or it empty, thus meaning all records
        last_updated = get_set_last_update(set_spec)
        if last_updated is not None:
            last_updated = utc_to_localtime(last_updated)
            if last_updated > fromdate:
                fromdate = utc_to_localtime(get_earliest_datestamp())

    recids = intbitset(recids) ## Let's clone :-)

    if fromdate and untildate:
        recids &= intbitset(run_sql("SELECT id FROM bibrec WHERE modification_date BETWEEN %s AND %s", (fromdate, untildate)))
    elif fromdate:
        recids &= intbitset(run_sql("SELECT id FROM bibrec WHERE modification_date >= %s", (fromdate, )))
    elif untildate:
        recids &= intbitset(run_sql("SELECT id FROM bibrec WHERE modification_date <= %s", (untildate, )))

    if cfg.get('CFG_OAI_FILTER_RESTRICTED_RECORDS', True):
        recids = recids - get_all_restricted_recids()

    return recids
Beispiel #46
0
def index(p, so, page):
    page = max(page, 1)
    per_page = cfg.get('INSTRUMENTS_DISPLAYED_PER_PAGE', 9)

    instruments = getPaginatedInstrumentsByIdUser(current_user['id'], p, page,
                                                  per_page)
    count = getCountInstrumentsByIdUser(current_user['id'], p)
    instruments_json = json.loads(instruments)

    form = SearchForm()

    my_array = [None] * 0
    for instrument in instruments_json:
        i = Instrument.from_json(instrument)
        my_array.append(i)

    pagination = Pagination(page, per_page, count)

    ctx = dict(
        instruments=my_array,
        form=form,
        page=page,
        per_page=per_page,
        pagination=pagination,
    )

    return render_template("instruments/index.html", **ctx)
Beispiel #47
0
def extract_references_from_url_xml(url):
    """Extract references from the pdf specified in the url.

    The single parameter is the path to the pdf.
    It raises FullTextNotAvailable if the url gives a 404
    The result is given in marcxml.
    """
    file_request = requests.get(url)

    filename, filepath = mkstemp(
        prefix="%s" % (url.split('/')[-1:]),
        dir=cfg.get("CFG_TMPSHAREDDIR"),
    )

    os.write(filename, file_request.content)
    os.close(filename)

    try:
        try:
            marcxml = extract_references_from_file_xml(filepath)
        except IOError as err:
            if err.code == 404:
                raise FullTextNotAvailable()
            else:
                raise
    finally:
        os.remove(filepath)
    return marcxml
Beispiel #48
0
def is_user_viewer_of_record(user_info, recid):
    """
    Check if the user is allow to view the record based in the marc tags
    inside CFG_ACC_GRANT_VIEWER_RIGHTS_TO_EMAILS_IN_TAGS
    i.e. his email is inside the 506__m tag or he is inside an e-group listed
    in the 506__m tag

    :param user_info: the user_info dictionary that describe the user.
    :type user_info: user_info dictionary
    :param recid: the record identifier.
    :type recid: positive integer
    @return: True if the user is 'allow to view' the record; False otherwise
    @rtype: bool
    """

    authorized_emails_or_group = []
    for tag in cfg.get('CFG_ACC_GRANT_VIEWER_RIGHTS_TO_EMAILS_IN_TAGS', []):
        from invenio.legacy.bibrecord import get_fieldvalues
        authorized_emails_or_group.extend(get_fieldvalues(recid, tag))
    for email_or_group in authorized_emails_or_group:
        if email_or_group in user_info['group']:
            return True
        email = email_or_group.strip().lower()
        if user_info['email'].strip().lower() == email:
            return True
    return False
Beispiel #49
0
def is_user_owner_of_record(user_info, recid):
    """Check if the user is owner of the record.

    I.e. he is the submitter and/or belongs to a owner-like group authorized
    to 'see' the record.

    :param user_info: the user_info dictionary that describe the user.
    :type user_info: user_info dictionary
    :param recid: the record identifier.
    :type recid: positive integer
    :return: True if the user is 'owner' of the record; False otherwise
    """
    authorized_emails_or_group = []
    for tag in cfg.get('CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS', []):
        from invenio.legacy.bibrecord import get_fieldvalues
        authorized_emails_or_group.extend(get_fieldvalues(recid, tag))
    for email_or_group in authorized_emails_or_group:
        if email_or_group in user_info['group']:
            return True
        email = email_or_group.strip().lower()
        if user_info['email'].strip().lower() == email:
            return True
        if cfg['CFG_CERN_SITE']:
            # the egroup might be in the form [email protected]
            if email_or_group.replace('@cern.ch', ' [CERN]') in \
                    user_info['group']:
                return True
    return False
Beispiel #50
0
def extract_references_from_url_xml(url):
    """Extract references from the pdf specified in the url.

    The single parameter is the path to the pdf.
    It raises FullTextNotAvailable if the url gives a 404
    The result is given in marcxml.
    """
    file_request = requests.get(url)

    filename, filepath = mkstemp(
        prefix="%s" % (url.split('/')[-1:]),
        dir=cfg.get("CFG_TMPSHAREDDIR"),
    )

    os.write(filename, file_request.content)
    os.close(filename)

    try:
        try:
            marcxml = extract_references_from_file_xml(filepath)
        except IOError as err:
            if err.code == 404:
                raise FullTextNotAvailable()
            else:
                raise
    finally:
        os.remove(filepath)
    return marcxml
Beispiel #51
0
def filter_out_based_on_date_range(recids, fromdate="", untildate="", set_spec=None):
    """ Filter out recids based on date range."""
    if fromdate:
        fromdate = normalize_date(fromdate, "T00:00:00Z")
    else:
        fromdate = get_earliest_datestamp()
    fromdate = utc_to_localtime(fromdate)

    if untildate:
        untildate = normalize_date(untildate, "T23:59:59Z")
    else:
        untildate = get_latest_datestamp()
    untildate = utc_to_localtime(untildate)

    if set_spec is not None: ## either it has a value or it empty, thus meaning all records
        last_updated = get_set_last_update(set_spec)
        if last_updated is not None:
            last_updated = utc_to_localtime(last_updated)
            if last_updated > fromdate:
                fromdate = utc_to_localtime(get_earliest_datestamp())

    recids = intbitset(recids) ## Let's clone :-)

    if fromdate and untildate:
        recids &= intbitset(run_sql("SELECT id FROM bibrec WHERE modification_date BETWEEN %s AND %s", (fromdate, untildate)))
    elif fromdate:
        recids &= intbitset(run_sql("SELECT id FROM bibrec WHERE modification_date >= %s", (fromdate, )))
    elif untildate:
        recids &= intbitset(run_sql("SELECT id FROM bibrec WHERE modification_date <= %s", (untildate, )))

    if cfg.get('CFG_OAI_FILTER_RESTRICTED_RECORDS', True):
        recids = recids - get_all_restricted_recids()

    return recids
Beispiel #52
0
def is_user_owner_of_record(user_info, recid):
    """Check if the user is owner of the record.

    I.e. he is the submitter and/or belongs to a owner-like group authorized
    to 'see' the record.

    :param user_info: the user_info dictionary that describe the user.
    :type user_info: user_info dictionary
    :param recid: the record identifier.
    :type recid: positive integer
    :return: True if the user is 'owner' of the record; False otherwise
    """
    authorized_emails_or_group = []
    for tag in cfg.get('CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS', []):
        from invenio.legacy.bibrecord import get_fieldvalues
        authorized_emails_or_group.extend(get_fieldvalues(recid, tag))
    for email_or_group in authorized_emails_or_group:
        if email_or_group in user_info['group']:
            return True
        email = email_or_group.strip().lower()
        if user_info['email'].strip().lower() == email:
            return True
        if cfg['CFG_CERN_SITE']:
            # the egroup might be in the form [email protected]
            if email_or_group.replace('@cern.ch', ' [CERN]') in \
                    user_info['group']:
                return True
    return False
Beispiel #53
0
def is_user_viewer_of_record(user_info, recid):
    """
    Check if the user is allow to view the record based in the marc tags
    inside CFG_ACC_GRANT_VIEWER_RIGHTS_TO_EMAILS_IN_TAGS
    i.e. his email is inside the 506__m tag or he is inside an e-group listed
    in the 506__m tag

    :param user_info: the user_info dictionary that describe the user.
    :type user_info: user_info dictionary
    :param recid: the record identifier.
    :type recid: positive integer
    @return: True if the user is 'allow to view' the record; False otherwise
    @rtype: bool
    """

    authorized_emails_or_group = []
    for tag in cfg.get('CFG_ACC_GRANT_VIEWER_RIGHTS_TO_EMAILS_IN_TAGS', []):
        from invenio.legacy.bibrecord import get_fieldvalues
        authorized_emails_or_group.extend(get_fieldvalues(recid, tag))
    for email_or_group in authorized_emails_or_group:
        if email_or_group in user_info['group']:
            return True
        email = email_or_group.strip().lower()
        if user_info['email'].strip().lower() == email:
            return True
    return False
Beispiel #54
0
def load(module='', prefix=''):
    """ Load and returns a template class, given a module name (like
        'websearch', 'webbasket',...).  The module corresponding to
        the currently selected template model (see invenio.conf,
        variable CFG_WEBSTYLE_TEMPLATE_SKIN) is tried first. In case it does
        not exist, it returns the default template for that module.
    """
    local = {}
    # load the right template based on the CFG_WEBSTYLE_TEMPLATE_SKIN and the specified module
    if CFG_WEBSTYLE_TEMPLATE_SKIN == "default":
        try:
            mymodule = __import__("invenio.%s_%stemplates" % (module, prefix), local,
                                  local, ["invenio.legacy.%s.templates" % (module)])
        except ImportError:
            mymodule = __import__("invenio.legacy.%s.%stemplates" % (module, prefix),
                                  local, local,
                                  ["invenio.legacy.%s.templates" % (module)])
    else:
        try:
            mymodule = __import__("invenio.legacy.%s.templates_%s" % (module, CFG_WEBSTYLE_TEMPLATE_SKIN), local, local,
                                  ["invenio.legacy.%s.templates" % (module, CFG_WEBSTYLE_TEMPLATE_SKIN)])
        except ImportError:
            mymodule = __import__("invenio.legacy.%s.templates" % (module), local, local,
                                  ["invenio.legacy.%s.templates" % (module)])
    if 'inspect-templates' in cfg.get('CFG_DEVEL_TOOLS', []):
        for method_name in dir(mymodule.Template):
            if method_name.startswith('tmpl_'):
                enhance_method(module, mymodule.Template, method_name, method_wrapper)

    return mymodule.Template()
Beispiel #55
0
def setup_app():
    """Setup OAuth2 provider."""
    # Initialize OAuth2 provider
    oauth2.init_app(current_app)

    # Configures the OAuth2 provider to use the SQLALchemy models for getters
    # and setters for user, client and tokens.
    bind_sqlalchemy(oauth2, db.session, client=Client)

    # Flask-OAuthlib does not support CACHE_REDIS_URL
    if cfg['OAUTH2_CACHE_TYPE'] == 'redis' and \
       cfg.get('CACHE_REDIS_URL'):
        from redis import from_url as redis_from_url
        cfg.setdefault(
            'OAUTHLIB_CACHE_REDIS_HOST',
            redis_from_url(cfg['CACHE_REDIS_URL'])
        )

    # Configures an OAuth2Provider instance to use configured caching system
    # to get and set the grant token.
    bind_cache_grant(current_app, oauth2, OAuthUserProxy.get_current_user)

    # Disables oauthlib's secure transport detection in in debug mode.
    if current_app.debug or current_app.testing:
        os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1'
Beispiel #56
0
def default_name_generator(document):
    """Return default name of record document with storage path.

    The path is generated from the uuid using two folder level, being the first
    two characters the name of the first folder and the second two the name of
    the second folder.

    It avoids creating the directories twice but if any of them is not a
    directory it will raise an OSError exception.

    :param document: The document to be stored.
    :returns: Path based on the `_id` of the document.

    """
    uuid = document['_id']
    directory = os.path.join(cfg.get('CFG_BIBDOCFILE_FILEDIR'), uuid[0:2],
                             uuid[2:4])
    try:
        os.makedirs(directory)
    except OSError as e:
        if e.errno == errno.EEXIST and os.path.isdir(directory):
            pass
        else:
            raise
    return os.path.join(directory, uuid[4:])
Beispiel #57
0
def train(records, output):
    """Train a set of records from the command line.

    Usage: inveniomanage predicter train -r /path/to/json -o model.pickle
    """
    if not records:
        print("Missing records!", file=sys.stderr)
        return

    if not os.path.isfile(records):
        print("{0} is not a file!".format(records), file=sys.stderr)
        return

    if os.path.basename(output) == output:
        # Only a relative name, prefix with config
        output = os.path.join(
            cfg.get("CLASSIFIER_MODEL_PATH", ""), output
        )

    # Make sure directories are created
    if not os.path.exists(os.path.dirname(output)):
        os.makedirs(output)

    # Check that location is writable
    if not os.access(os.path.dirname(output), os.W_OK):
        print("{0} is not writable file!".format(output), file=sys.stderr)
        return

    job = celery_train.delay(records, output)
    print("Scheduled job {0}".format(job.id))
Beispiel #58
0
def getServiceJsonParamenters():
    """
    Returns the Lifewatch service parameters in JSON format
    """
    lfw_service = cfg.get('CFG_LFW_SERVICE')
    lfw_service_json = json.dumps(lfw_service)
    lfw_service_json = json.loads(lfw_service_json)
    return lfw_service_json