예제 #1
0
 def __init__(self, *args, **kwargs):
     """Constructor."""
     super(LiteratureForm, self).__init__(*args, **kwargs)
     from invenio.modules.knowledge.api import get_kb_mappings
     self.subject.choices = [(x['value'], x['value'])
         for x in get_kb_mappings(cfg["DEPOSIT_INSPIRE_SUBJECTS_KB"])]
     self.degree_type.choices = [('', '')] + [(x['value'], x['value'])
         for x in get_kb_mappings(cfg["DEPOSIT_INSPIRE_DEGREE_KB"])]
예제 #2
0
def format_element(bfo, separator=' | ', link="yes"):
    """Print Conference info as best is possible.

    @param link if yes (default) prints link to SPIRES conference info
    @param separator  separates multiple conferences
    """
    authors = bfo.fields('084__')
    output = []
    output1 = []
    pdgcount = 0
    link = ""
    pdgcode = ""

    # Process authors to add link, highlight and format affiliation
    for exp in authors:
        if exp.get('9') == 'PDG' and 'a' in exp:
            values = get_kb_mappings('PDG', key=exp['a'], match_type="e")
            pdgcode = exp['a']
            for ch in [':', '=']:
                if ch in pdgcode:
                    pdgcode = pdgcode.replace(ch, '/')
            if values:
                search_link = ('<a href="http://pdglive.lbl.gov/view/' +
                               pdgcode +
                               '">')
                if values[0]['value'] == "THE TEXT IS MISSING FOR THIS NODE.":
                    search_link += pdgcode + ' (Title Unknown)'
                else:
                    search_link += values[0]['value']
                search_link += '</a>'
                pdgcount += 1
                if pdgcount < 3:
                    output.append(search_link)
                else:
                    output1.append(search_link)

    if len(output1):
        link = """ | <a href="#" style="color:green;background:white;" onclick="toggle2('content', this)"><i>More</i></a>
<div id="content" style="display:none; padding-left:36px;">
%(content)s
</div>
<script type="text/javascript">
function toggle2(id, link) {
var e = document.getElementById(id);
if (e.style.display == '') {
e.style.display = 'none';
link.innerHTML = '<i>More</i>';
}
else
{
e.style.display = '';
link.innerHTML = '<i>Less</i>';
}
}
</script>
""" % {'content': separator.join(output1)}

    return separator.join(output) + link
def _kb_requirements_choices():
    def _mapper(x):
        requirements = json.loads(x['value'])
        if requirements.get('domain_app_env', False):
            return (x['key'], requirements['title'])
        else:
            return None

    return filter(lambda x: x is not None,
                  map(_mapper, get_kb_mappings('requirements', '', '')))
예제 #4
0
def _kb_requirements_choices(domain_flavor=True, domain_os=True):
    def _mapper(x):
        requirements = json.loads(x['value'])
        if (requirements.get('domain_flavor', False) and domain_flavor) or \
                (requirements.get('domain_os', False) and domain_os):
            return (x['key'], requirements['title'])
        else:
            return None
    return filter(lambda x: x is not None,
                  map(_mapper, get_kb_mappings('requirements', '', '')))
예제 #5
0
def format_element(bfo, separator=' | ', link="yes"):
    """Print PACS info as best is possible.

    @param link if yes (default) prints link to search for this item in Inspire
    @param separator separates multiple items
    """
    fields = bfo.fields('084__')

    output = []
    output1 = []
    pacs_count = 0
    link = ""

    for item in fields:
        if item.get('2') == 'PACS':
            pacs_code = item.get('a')
            if pacs_code:
                pacs_kb_mapping = get_kb_mappings('PACS', key=pacs_code, match_type="e")
                title = 'Translation not available'
                if pacs_kb_mapping:
                    title = pacs_kb_mapping[0]['value']
                search_link = ("<a href='/search?" +
                               urlencode({'p': '084__:"' + pacs_code + '"'}) +
                               "' title='" +
                               title +
                               "'>" +
                               pacs_code +
                               "</a>")
                if pacs_count < 25:
                    output.append(search_link)
                else:
                    output1.append(search_link)
            pacs_count += 1

            if len(output1):
                link = """ | <a href="#" onclick="toggle2('content', this); return false;" style="color:green;background:white;"><i>More</i></a>
<div id="content" style="display:none; padding-left:42px;">
%(content)s
</div>
<script type="text/javascript">
function toggle2(id, link) {
var e = document.getElementById(id);
if (e.style.display == '') {
    e.style.display = 'none';
    link.innerHTML = '<i>More</i>';
}
else {
    e.style.display = '';
    link.innerHTML = '<i>Less</i>';
}
}
</script>
""" % {'content': separator.join(output1)}

    return separator.join(output) + link
예제 #6
0
def _kb_license_choices(domain_data=True, domain_content=True,
                        domain_software=True):
    def _mapper(x):
        license = json.loads(x['value'])
        if (license['domain_data'] and domain_data) or \
                (license['domain_content'] and domain_content) or \
                (license['domain_software'] and domain_software):
            return (x['key'], license['title'])
        else:
            return None
    return filter(lambda x: x is not None, map(
        _mapper, get_kb_mappings('licenses', '', ''))
    )
예제 #7
0
def get_journal_info(recid, tags):
    record_info = []
    # TODO: handle recors with multiple journals
    tagsvalues = {}  # we store the tags and their values here
                     # like c->444 y->1999 p->"journal of foo",
                     # v->20
    tmp = get_fieldvalues(recid, tags['publication']['journal'])
    if tmp:
        tagsvalues["p"] = tmp[0]
    tmp = get_fieldvalues(recid, tags['publication']['volume'])
    if tmp:
        tagsvalues["v"] = tmp[0]
    tmp = get_fieldvalues(recid, tags['publication']['year'])
    if tmp:
        tagsvalues["y"] = tmp[0]
    tmp = get_fieldvalues(recid, tags['publication']['pages'])
    if tmp:
        # if the page numbers have "x-y" take just x
        pages = tmp[0]
        hpos = pages.find("-")
        if hpos > 0:
            pages = pages[:hpos]
        tagsvalues["c"] = pages

    # check if we have the required data
    ok = True
    for c in tags['publication_format']:
        if c in ('p', 'v', 'y', 'c'):
            if c not in tagsvalues:
                ok = False

    if ok:
        publ = format_journal(tags['publication_format'], tagsvalues)
        record_info += [publ]

        alt_volume = get_alt_volume(tagsvalues['v'])
        if alt_volume:
            tagsvalues2 = tagsvalues.copy()
            tagsvalues2['v'] = alt_volume
            publ = format_journal(tags['publication_format'], tagsvalues2)
            record_info += [publ]

        # Add codens
        for coden in get_kb_mappings('CODENS',
                                     value=tagsvalues['p']):
            tagsvalues2 = tagsvalues.copy()
            tagsvalues2['p'] = coden['key']
            publ = format_journal(tags['publication_format'], tagsvalues2)
            record_info += [publ]

    return record_info
예제 #8
0
파일: services.py 프로젝트: SCOAP3/invenio
 def prepare_data_cache(self):
     """*Index* knowledge base and cache it."""
     cache = {}
     for mapping in get_kb_mappings(self.get_kbname()):
         key = mapping['key']
         value = mapping['value']
         words = clean_and_split_words_and_stem(key, CFG_SITE_LANG,
                                                stem_p=True)
         for word in words:
             if word not in cache:
                 cache[word] = []
             if value not in cache[word]:
                 cache[word].append(value)
     return cache
예제 #9
0
def update_kb(kb_name, data, key_fun, value_fun=lambda x: x, update=False):
    """Update a knowledge base from data."""
    # Memory greedy, but faster than many individual SQL queries.
    mappings = dict([
        (d['key'], d['value']) for d in get_kb_mappings(kb_name)
    ])

    for item in data:
        k = key_fun(item)
        v = json.dumps(value_fun(item))
        if k not in mappings:
            add_kb_mapping(kb_name, k, v)
        elif update and mappings[k] != v:
            update_kb_mapping(kb_name, k, k, v)
예제 #10
0
def perform_request_knowledge_base_show(kb_id, ln=CFG_SITE_LANG, sortby="to",
                                        startat=0, search_term=""):
    """
    Show the content of a knowledge base.

    @param ln language
    @param kb a knowledge base id
    @param sortby the sorting criteria ('from' or 'to')
    @param startat start showing mapping rules at what number
    @param search_term search for this string in kb
    @return the content of the given knowledge base
    """
    kb = bibknowledge.get_kb_by_id(kb_id)
    name = kb.name
    mappings = bibknowledge.get_kb_mappings(name, sortby)

    kb_type = kb.kbtype
    # filter in only the requested rules if the user is searching..
    if search_term:
        newmappings = []
        for mapping in mappings:
            key = mapping['key']
            value = mapping['value']
            if key.count(search_term) > 0 or value.count(search_term) > 0:
                newmappings.append(mapping)
        # we were searching, so replace
        mappings = newmappings
    # if this bk is dynamic, get the configuration from the DB, and a list of
    # collections as a bonus
    dyn_config = None
    collections = None
    if kb_type == 'd':
        from invenio.legacy.search_engine \
            import get_alphabetically_ordered_collection_list
        dyn_config = kb.kbdefs.to_dict() if kb.kbdefs else {}
        collections = []
        collitems = get_alphabetically_ordered_collection_list()
        for collitem in collitems:
            collections.append(collitem[0])
    return bibknowledge_templates.tmpl_admin_kb_show(ln, kb_id, name,
                                                     mappings, sortby, startat,
                                                     kb_type, search_term,
                                                     dyn_config, collections)
예제 #11
0
    def _convert_files(obj, eng):
        from invenio.modules.knowledge.api import get_kb_mappings
        mappings = dict(
            map(
                lambda item: (item['key'], item['value']),
                get_kb_mappings('JOURNALS')
            )
        )
        ws = WorldScientific(mappings)

        target_folder_full = get_storage_path(suffix=target_folder)

        args = obj.extra_data['args']
        to_date = args.get("to_date") or datetime.now().strftime('%Y-%m-%d')
        from_date = args.get("from_date") or '1900-01-01'

        insert_files = []
        filenames = obj.data['extracted_files']
        for filename in filenames:
            date = ws.get_date(filename)
            if from_date <= date <= to_date:
                marc = ws.get_record(filename)
                if marc:
                    filename = basename(filename)
                    filename = join(target_folder_full, filename)
                    insert_files.append(filename)
                    with open(filename, 'w') as outfile:
                        outfile.write(marc)

        obj.log.info("Converted {0} articles between {1} to {2}".format(
            len(insert_files),
            from_date,
            to_date
        ))

        obj.data['insert'] = insert_files
        obj.data["result_path"] = target_folder_full

        obj.log.debug("Saved converted files to {0}".format(target_folder_full))
        obj.log.debug("{0} files to add".format(
            len(obj.data["insert"]),
        ))
예제 #12
0
def perform_request_knowledge_bases_management(ln=CFG_SITE_LANG, search="",
                                               descriptiontoo=""):
    """
    Return the main page for knowledge bases management.

    @param ln language
    @param search search for this string in kb's
    @param descriptiontoo search in descriptions too
    @return the main page for knowledge bases management
    """
    kbs = bibknowledge.get_kbs_info()
    # if search is nonempty, filter out kb's that do not have the
    # the string that we search
    newkbs = []
    if search:
        for kb in kbs:
            skip = 0  # do-we-need-to-scan-more control
            kbname = kb['name']
            # get description if needed
            if descriptiontoo and kb['description'].count(search) > 0:
                # add and skip
                newkbs.append(kb)
                skip = 1
            # likewise: check if name matches
            if descriptiontoo and kbname.count(search) > 0:
                # add and skip
                newkbs.append(kb)
                skip = 1
            # get mappings
            mappings = bibknowledge.get_kb_mappings(kbname)
            for mapping in mappings:
                if skip == 0:
                    key = mapping['key']
                    value = mapping['value']
                    if key.count(search) > 0 or value.count(search) > 0:
                        # add this in newkbs
                        newkbs.append(kb)
                        # skip the rest, we know there's ok stuff in this kb
                        skip = 1
        kbs = newkbs

    return bibknowledge_templates.tmpl_admin_kbs_management(ln, kbs, search)
예제 #13
0
파일: utils.py 프로젝트: rsalas82/lw-daap
def get_requirements():
    reqs = dict(
        flavors=OrderedDict(),
        images=OrderedDict(),
        app_envs=OrderedDict(),
    )
    domains_to_reqs = {
        # kb_mappings has 'domain-xxx' = True for each type of requirement
        # the value of the key is a tuple: first the name of the OrderedDict
        # to use, seconde
        'domain_flavor': {'reqs': 'flavors', 'id': 'flavor-id'},
        'domain_os': {'reqs': 'images', 'id': 'image-id'},
        'domain_app_env': {'reqs': 'app_envs', 'id': 'app-id'},
    }
    for mapping in get_kb_mappings('requirements'):
        v = json.loads(mapping['value'])
        for d, r in domains_to_reqs.items():
            if v.get(d, False):
                if v.get(r['id']):
                    reqs[r['reqs']][v['id']] = v
    return reqs
예제 #14
0
    def process_sip_metadata(cls, deposition, metadata):
        """Map fields to match jsonalchemy configuration."""
        delete_keys = []
        field_list = ['abstract', 'title']

        # maps from a form field to the corresponding MarcXML field
        field_map = {'abstract': "summary",
                     'title': "title",
                     'subject_term': "term",
                     'institution': "university",
                     'degree_type': 'degree_type',
                     'thesis_date': "date",
                     'journal_title': "journal_title",
                     'page_range_article_id': "page_artid",
                     'volume': "journal_volume",
                     'year': "year",
                     'issue': "journal_issue",
                     'conference_id': "cnum"}

        # exclusive fields for each type of document
        doc_exclusive_fields = {'article': ['journal_title',
                                            'page_range',
                                            'article_id',
                                            'volume',
                                            'year',
                                            'issue',
                                            'conference_id'],
                                'thesis': ['supervisors',
                                           'institution',
                                           'degree_type',
                                           'thesis_date',
                                           'defense_date'],
                                }

        del doc_exclusive_fields[metadata['type_of_doc']]

        def remove_exclusive_fields(fieldlist):
            for field in fieldlist:
                if field in metadata and metadata[field]:
                    del metadata[field]

        map(remove_exclusive_fields, doc_exclusive_fields.values())

        filter_empty_elements(metadata)

        # ============================
        # Abstract, Title and Subjects
        # ============================
        for field in field_list:
            if field in metadata:
                tmp_field = metadata[field]
                metadata[field] = {field_map[field]: tmp_field}

        if "subject_term" in metadata:
            tmp_field = metadata["subject_term"]
            metadata["subject_term"] = [{"term": t,
                                        "scheme": "INSPIRE",
                                        "source": "submitter"}
                                        for t in tmp_field]

        # =======
        # Authors
        # =======
        metadata['authors'] = filter(None, metadata['authors'])
        if 'authors' in metadata and metadata['authors']:
            first_author = metadata['authors'][0].get('full_name').split(',')
            if len(first_author) > 1 and \
                    literature.match_authors_initials(first_author[1]):
                first_author[1] = first_author[1].replace(' ', '')
                metadata['authors'][0]['full_name'] = ", ".join(first_author)
            metadata['_first_author'] = metadata['authors'][0]
            if metadata['authors'][1:]:
                metadata['_additional_authors'] = metadata['authors'][1:]
                for k in metadata['_additional_authors']:
                    try:
                        additional_author = k.get('full_name').split(',')
                        if len(additional_author) > 1 and \
                                literature.match_authors_initials(additional_author[1]):
                            additional_author[1] = additional_author[1].replace(' ', '')
                            k['full_name'] = ", ".join(additional_author)
                    except AttributeError:
                        pass
            delete_keys.append('authors')

        # ===========
        # Supervisors
        # ===========
        if 'supervisors' in metadata and metadata['supervisors']:
            metadata['thesis_supervisor'] = metadata['supervisors']
            delete_keys.append('supervisors')

        # ====
        # Note
        # ====
        if metadata.get('note', None):
            metadata['note'] = [{'value': metadata['note']}]

        # ==============
        # Thesis related
        # ==============
        thesis_fields = filter(lambda field: field in metadata, ['institution',
                                                                 'degree_type',
                                                                 'thesis_date'])
        if thesis_fields:
            metadata['thesis'] = {}

            for field in thesis_fields:
                metadata['thesis'][field_map[field]] = metadata[field]

            delete_keys.extend(thesis_fields)

        if 'defense_date' in metadata and metadata['defense_date']:
            defense_note = {
                'value': 'Presented on ' + metadata['defense_date']
            }
            if metadata.get('note', None):
                metadata['note'].append(defense_note)
            else:
                metadata['note'] = [defense_note]

        # ========
        # Category
        # ========
        metadata['collections'] = [{'primary': "HEP"}]
        if metadata['type_of_doc'] == 'thesis':
            metadata['collections'].append({'primary': "THESIS"})

        # ============
        # Title source
        # ============
        if 'title_source' in metadata and metadata['title_source']:
            metadata['title']['source'] = metadata['title_source']
            delete_keys.append('title_source')

        # =============
        # Report number
        # =============
        if 'report_numbers' in metadata and metadata['report_numbers']:
            user_report_number = metadata['report_numbers']
            metadata['report_number'] = [{'primary': v['report_number']}
                                         for v in user_report_number]
            delete_keys.append('report_numbers')

        # ========
        # arXiv ID
        # ========
        imported_from_arXiv = filter(lambda field: field in metadata,
                                     ['categories', 'title_arXiv'])

        if imported_from_arXiv or metadata.get('title_source') == 'arXiv':
            if is_arxiv_post_2007(metadata['arxiv_id']):
                arxiv_rep_number = {'primary': 'arXiv:' + metadata['arxiv_id'],
                                    'source': 'arXiv'}
            else:
                arxiv_rep_number = {'primary': metadata['arxiv_id'],
                                    'source': 'arXiv'}
            if len(metadata['arxiv_id'].split('/')) == 2:
                arxiv_rep_number['arxiv_category'] = metadata['arxiv_id'].split('/')[0]
            if metadata.get('report_numbers'):
                metadata['report_number'].append(arxiv_rep_number)
            else:
                metadata['report_number'] = [arxiv_rep_number]
            if 'abstract' in metadata:
                metadata['abstract']['source'] = 'arXiv'
            if 'title_arXiv' in metadata:
                title_arXiv = metadata['title_arXiv']
                metadata['title_arXiv'] = {}
                metadata['title_arXiv']['value'] = title_arXiv
                metadata['title_arXiv']['source'] = 'arXiv'
            if 'categories' in metadata and metadata['categories']:
                # arXiv subject categories
                subject_list = [{"term": c, "scheme": "arXiv"}
                                for c in metadata['categories'].split()]
                # INSPIRE subject categories
                if 'subject_term' in metadata and metadata['subject_term']:
                    metadata['subject_term'].extend(subject_list)
                else:
                    metadata['subject_term'] = subject_list
            metadata['system_number_external'] = {'value': 'oai:arXiv.org:' + metadata['arxiv_id'],
                                                  'institute': 'arXiv'}
            metadata['collections'].extend([{'primary': "arXiv"}, {'primary': "Citeable"}])


        # ========
        # Language
        # ========
        if metadata['language'] not in ('en', 'oth'):
            metadata['language'] = unicode(dict(LiteratureForm.languages).get(metadata['language']))
        elif metadata['language'] == 'oth':
            if metadata['other_language']:
                metadata['language'] = metadata['other_language']
        else:
            delete_keys.append('language')

        # ==========
        # Experiment
        # ==========
        if 'experiment' in metadata:
            metadata['accelerator_experiment'] = {'experiment': metadata['experiment']}
            delete_keys.append('experiment')

        # ===============
        # Conference Info
        # ===============
        if 'conf_name' in metadata:
            if 'nonpublic_note' in metadata:
                field = [metadata['nonpublic_note'], metadata['conf_name']]
                metadata['nonpublic_note'] = field
            else:
                metadata['nonpublic_note'] = [metadata['conf_name']]
            metadata['collections'].extend([{'primary': "ConferencePaper"}])
            delete_keys.append('conf_name')

        # =======
        # License
        # =======
        licenses_kb = dict([(x['key'], x['value'])
            for x in get_kb_mappings(cfg["DEPOSIT_INSPIRE_LICENSE_KB"])])
        if 'license' in metadata and metadata['license']:
            metadata['license'] = {'license': metadata['license']}
            if 'license_url' in metadata:
                metadata['license']['url'] = metadata['license_url']
            else:
                metadata['license']['url'] = licenses_kb.get(
                    metadata['license']['license'])
        elif 'license_url' in metadata:
            metadata['license'] = {'url': metadata['license_url']}
            license_key = {v: k for k, v in licenses_kb.items()}.get(
                metadata['license_url'])
            if license_key:
                metadata['license']['license'] = license_key
            delete_keys.append('license_url')

        # ===========
        # Files (FFT)
        # ===========
        if 'fft' in metadata and metadata['fft']:
            def restructure_ffts(fft):
                fft['url'] = fft['path']
                fft['description'] = fft['name']
                fft['docfile_type'] = "INSPIRE-PUBLIC"
                del fft['path'], fft['name']

            map(restructure_ffts, metadata['fft'])

        # ====
        # URLs
        # ====
        if metadata.get('url'):
            metadata['pdf'] = metadata['url']
            if isinstance(metadata['url'], string_types):
                metadata['url'] = [{'url': metadata['url']}]
        if 'additional_url' in metadata and metadata['additional_url']:
            if metadata.get('url'):
                metadata['url'].append({'url': metadata['additional_url']})
            else:
                metadata['url'] = [{'url': metadata['additional_url']}]
            delete_keys.append('additional_url')

        # ================
        # Publication Info
        # ================

        publication_fields = filter(lambda field: field in metadata, ['journal_title',
                                                                      'page_range_article_id',
                                                                      'volume',
                                                                      'year',
                                                                      'issue',
                                                                      'conference_id'])
        if publication_fields:
            metadata['publication_info'] = {}

            for field in publication_fields:
                metadata['publication_info'][field_map[field]] = metadata[field]

            if 'page_nr' not in metadata and 'page_range_article_id' in publication_fields:
                pages = metadata['page_range_article_id'].split('-')
                if len(pages) == 2:
                    try:
                        metadata['page_nr'] = int(pages[1]) - int(pages[0]) + 1
                    except ValueError:
                        pass

            if {'primary': "ConferencePaper"} not in metadata['collections']:
                metadata['collections'].append({'primary': "Published"})

            delete_keys.extend(publication_fields)

        if 'journal_title' in metadata:
            journals_kb = dict([(x['key'].lower(), x['value'])
                                for x in get_kb_mappings(cfg.get("DEPOSIT_INSPIRE_JOURNALS_KB"))])

            metadata['publication_info']['journal_title'] = journals_kb.get(metadata['journal_title'].lower(),
                                                                            metadata['journal_title'])

            if 'nonpublic_note' in metadata:
                if (isinstance(metadata['nonpublic_note'], list)
                        and len(metadata['nonpublic_note']) > 1):
                    del metadata['nonpublic_note'][0]
                else:
                    delete_keys.append('nonpublic_note')

        # =============
        # Preprint Info
        # =============
        if 'created' in metadata and metadata['created']:
            metadata['preprint_info'] = {'date': metadata['created']}
            delete_keys.append('created')

        # ==========
        # Owner Info
        # ==========
        userid = deposition.user_id
        user = UserInfo(userid)
        email = user.info.get('email', '')
        external_ids = UserEXT.query.filter_by(id_user=userid).all()
        sources = ["{0}{1}".format('inspire:uid:', userid)]
        sources.extend(["{0}:{1}".format(e_id.method,
                                         e_id.id) for e_id in external_ids])
        metadata['acquisition_source'] = dict(
            source=sources,
            email=email,
            method="submission",
            submission_number=deposition.id,
        )

        # ==============
        # Extra comments
        # ==============
        if 'extra_comments' in metadata and metadata['extra_comments']:
            metadata['hidden_note'] = [{'value': metadata['extra_comments'],
                                        'source': 'submitter'}]

        # ===================
        # Delete useless data
        # ===================
        for key in delete_keys:
            del metadata[key]
예제 #15
0
파일: admin.py 프로젝트: mhellmic/b2share
def kb_export(req, kbname="", format="kbr", searchkey="", searchvalue="", searchtype="s", limit=None, ln=CFG_SITE_LANG):
    """
    Exports the given kb so that it is listed in stdout (the browser).

    @param req the request
    @param kbname knowledge base name
    @param expression evaluate this for the returned lines
    @param format 'kba' for authority file, 'kbr' for leftside-rightside, json
                  for json-formatted dictionaries
    @param searchkey include only lines that match this on the left side
    @param searchvalue include only lines that match this on the right side
    @param searchtype s = substring match, e = exact match
    @param limit how many results to return. None means all
    @param ln language
    """
    ln = wash_language(ln)
    _ = gettext_set_language(ln)
    navtrail_previous_links = ''' &gt; <a class="navtrail" href="%s/kb?ln=%s">%s</a>''' % (CFG_SITE_SECURE_URL, ln, _("Manage Knowledge Bases"))
    if not kbname:
        return page(title=_("Knowledge base name missing"),
                    body = """Required parameter kbname
                              is missing.""",
                    language=ln,
                    navtrail = navtrail_previous_links,
                    lastupdated=__lastupdated__,
                    req=req)

    #in order to make 'wget' downloads easy we do not require authorization

    #first check the type of the KB
    kbtype = None
    kbinfo = None
    kbid = None
    kbinfos = bibknowledge.get_kbs_info("", kbname)
    if kbinfos:
        kbinfo = kbinfos[0]
        kbtype = kbinfo['kbtype']
        kbid = kbinfo['id']
    else:
        return page(title=_("Unknown knowledge base"),
                    body = _("There is no knowledge base with that name."),
                    language=ln,
                    navtrail = navtrail_previous_links,
                    lastupdated=__lastupdated__,
                    req=req)

    if not kbtype or kbtype == 'w':
        if format and format == "ejson":
            req.content_type = 'application/json'
            return bibknowledge.get_kb_mappings_embedded_json(kbname, searchkey, \
                                                    searchvalue, searchtype, limit)
        elif format and format[0] == 'j':
            # as JSON formatted string
            req.content_type = 'application/json'
            return bibknowledge.get_kb_mappings_json(kbname, searchkey, \
                                                    searchvalue, searchtype, limit)

        # left side / right side KB
        mappings = bibknowledge.get_kb_mappings(kbname, searchkey, \
                                                searchvalue, searchtype)
        if format == 'right' or format == 'kba':
            # as authority sequence
            seq = [m['value'] for m in mappings]
            seq = uniq(sorted(seq))
            for s in seq:
                req.write(s+"\n");
            return

        else:
            # as regularly formatted left-right mapping
            for m in mappings:
                req.write(m['key'] + '---' + m['value'] + '\n')
            return

    elif kbtype == 'd':
        # dynamic kb, another interface for perform_request_search
        if format and format[0] == 'j':
            req.content_type = "application/json"
            return bibknowledge.get_kbd_values_json(kbname, searchvalue)

        else:
            # print it as a list of values
            for hit in bibknowledge.get_kbd_values(kbname, searchvalue):
                req.write(hit + '\n')
            req.write('\n')
            return

    elif kbtype == 't': #taxonomy: output the file
        kbfilename = CFG_WEBDIR+"/kbfiles/"+str(kbid)+".rdf"
        try:
            f = open(kbfilename, 'r')
            for line in f:
                req.write(line)
            f.close()
        except:
            req.write("Reading the file "+kbfilename+" failed.")

    else:
        # This situation should never happen
        raise ValueError, "Unsupported KB Type: %s" % kbtype
예제 #16
0
파일: admin.py 프로젝트: mhellmic/b2share
def kb_add_mapping(req, kb, mapFrom, mapTo, sortby="to", ln=CFG_SITE_LANG,
                   forcetype=None, replacements=None, kb_type=None):
    """
    Adds a new mapping to a kb.

    @param ln language
    @param kb the kb id to show
    @param sortby the sorting criteria ('from' or 'to')
    @param forcetype indicates if this function should ask about replacing left/right sides (None or 'no')
                     replace in current kb ('curr') or in all ('all')
    @param replacements an object containing kbname+++left+++right strings.
                     Can be a string or an array of strings
    @param kb_type None for normal from-to kb's, 't' for taxonomies
    """
    ln = wash_language(ln)
    _ = gettext_set_language(ln)

    navtrail_previous_links = ''' &gt; <a class="navtrail" href="%s/kb?ln=%s">%s</a>''' % (CFG_SITE_SECURE_URL, ln, _("Manage Knowledge Bases"))

    try:
        dummy = getUid(req)
    except:
        return error_page('Error', req)

    (auth_code, auth_msg) = check_user(req, 'cfgbibknowledge')
    if not auth_code:

        kb_id = wash_url_argument(kb, 'int')
        kb_name = bibknowledge.get_kb_name(kb_id)

        if kb_name is None:
            return page(title=_("Unknown Knowledge Base"),
                        body = "",
                        language=ln,
                        navtrail = navtrail_previous_links,
                        errors = [("ERR_KB_ID_UNKNOWN", kb)],
                        lastupdated=__lastupdated__,
                        req=req)

        key = wash_url_argument(mapFrom, 'str')
        value = wash_url_argument(mapTo, 'str')

        #check if key or value already exists in some KB
        left_sides_match = bibknowledge.get_kb_mappings("", key, "")
        #check that the match is exact
        left_sides = []
        for m in left_sides_match:
            if m['key'] == key:
                left_sides.append(m)

        right_sides_match = bibknowledge.get_kb_mappings("", "", value)
        right_sides = []
        for m in right_sides_match:
            if m['value'] == value:
                right_sides.append(m)

        if (len(right_sides) == 0) and (len(left_sides) == 0):
            #no problems, just add in current
            forcetype = "curr"

        #likewise, if this is a taxonomy, just pass on
        if kb_type == 't':
            forcetype = "curr"

        if forcetype and not forcetype == "no":
            pass
        else:
            if len(left_sides) > 0:
                return page(title=_("Left side exists"),
                        body = bibknowledgeadminlib.perform_request_verify_rule(ln, kb_id, key, value, "left", kb_name, left_sides),
                        language=ln,
                        navtrail = navtrail_previous_links,
                        lastupdated=__lastupdated__,
                        req=req)

            if len(right_sides) > 0:
                return page(title=_("Right side exists"),
                        body = bibknowledgeadminlib.perform_request_verify_rule(ln, kb_id, key, value, "right", kb_name, right_sides),
                        language=ln,
                        navtrail = navtrail_previous_links,
                        lastupdated=__lastupdated__,
                        req=req)

        if forcetype == "curr":
            bibknowledge.add_kb_mapping(kb_name, key, value)
        if forcetype == "all":
            #a bit tricky.. remove the rules given in param replacement and add the current
            #rule in the same kb's
            if replacements:
                #"replacements" can be either a string or an array. Let's make it always an array
                if type(replacements) == type("this is a string"):
                    mystr = replacements
                    replacements = []
                    replacements.append(mystr)
                for r in replacements:
                    if r.find("++++") > 0:
                        (rkbname, rleft, dummy) = r.split('++++')
                        bibknowledge.remove_kb_mapping(rkbname, rleft)
                        #add only if this is not yet there..
                        if not bibknowledge.kb_mapping_exists(rkbname, key):
                            bibknowledge.add_kb_mapping(rkbname, key, value)

        redirect_to_url(req, "kb?ln=%(ln)s&kb=%(kb)s&sortby=%(sortby)s&kb_type=%(kb_type)s" % {'ln':ln,
                                                                                               'kb':kb_id,
                                                                                               'sortby':sortby,
                                                                                               'kb_type':kb_type})
    else:
        return page_not_authorized(req=req,
                                   text=auth_msg,
                                   navtrail=navtrail_previous_links)
예제 #17
0
def get_journal_info(record, tags):
    """Fetch journal info from given record"""
    record_info = []

    journals_fields = record.find_fields(tags['publication']['journal'][:5])
    for field in journals_fields:
        # we store the tags and their values here
        # like c->444 y->1999 p->"journal of foo",
        # v->20
        tagsvalues = {}
        try:
            tmp = field.get_subfield_values(
                tags['publication']['journal'][5])[0]
        except IndexError:
            pass
        else:
            tagsvalues["p"] = tmp

        try:
            tmp = field.get_subfield_values(
                tags['publication']['volume'][5])[0]
        except IndexError:
            pass
        else:
            tagsvalues["v"] = tmp

        try:
            tmp = field.get_subfield_values(tags['publication']['year'][5])[0]
        except IndexError:
            pass
        else:
            tagsvalues["y"] = tmp

        try:
            tmp = field.get_subfield_values(tags['publication']['pages'][5])[0]
        except IndexError:
            pass
        else:
            # if the page numbers have "x-y" take just x
            tagsvalues["c"] = tmp.split('-', 1)[0]

        # check if we have the required data
        ok = True
        for c in tags['publication_format']:
            if c in ('p', 'v', 'y', 'c'):
                if c not in tagsvalues:
                    ok = False

        if ok:
            publ = format_journal(tags['publication_format'], tagsvalues)
            record_info += [publ]

            alt_volume = get_alt_volume(tagsvalues['v'])
            if alt_volume:
                tagsvalues2 = tagsvalues.copy()
                tagsvalues2['v'] = alt_volume
                publ = format_journal(tags['publication_format'], tagsvalues2)
                record_info += [publ]

            # Add codens
            for coden in get_kb_mappings('CODENS', value=tagsvalues['p']):
                tagsvalues2 = tagsvalues.copy()
                tagsvalues2['p'] = coden['key']
                publ = format_journal(tags['publication_format'], tagsvalues2)
                record_info += [publ]

    return record_info
예제 #18
0
def get_journal_info(record, tags):
    """Fetch journal info from given record"""
    record_info = []

    journals_fields = record.find_fields(tags["publication"]["journal"][:5])
    for field in journals_fields:
        # we store the tags and their values here
        # like c->444 y->1999 p->"journal of foo",
        # v->20
        tagsvalues = {}
        try:
            tmp = field.get_subfield_values(tags["publication"]["journal"][5])[0]
        except IndexError:
            pass
        else:
            tagsvalues["p"] = tmp

        try:
            tmp = field.get_subfield_values(tags["publication"]["volume"][5])[0]
        except IndexError:
            pass
        else:
            tagsvalues["v"] = tmp

        try:
            tmp = field.get_subfield_values(tags["publication"]["year"][5])[0]
        except IndexError:
            pass
        else:
            tagsvalues["y"] = tmp

        try:
            tmp = field.get_subfield_values(tags["publication"]["pages"][5])[0]
        except IndexError:
            pass
        else:
            # if the page numbers have "x-y" take just x
            tagsvalues["c"] = tmp.split("-", 1)[0]

        # check if we have the required data
        ok = True
        for c in tags["publication_format"]:
            if c in ("p", "v", "y", "c"):
                if c not in tagsvalues:
                    ok = False

        if ok:
            publ = format_journal(tags["publication_format"], tagsvalues)
            record_info += [publ]

            alt_volume = get_alt_volume(tagsvalues["v"])
            if alt_volume:
                tagsvalues2 = tagsvalues.copy()
                tagsvalues2["v"] = alt_volume
                publ = format_journal(tags["publication_format"], tagsvalues2)
                record_info += [publ]

            # Add codens
            for coden in get_kb_mappings("CODENS", value=tagsvalues["p"]):
                tagsvalues2 = tagsvalues.copy()
                tagsvalues2["p"] = coden["key"]
                publ = format_journal(tags["publication_format"], tagsvalues2)
                record_info += [publ]

    return record_info
예제 #19
0
def load_kbs(cfg, run_sql, in_task=False):
    for kb, query in cfg.iteritems():
        task_sleep_now_if_required(can_stop_too=True)
        if not kb_exists(kb):
            add_kb(kb)
        if in_task:
            write_message("Updating %s KB..." % kb)
        try:
            if not in_task:
                print "kb:", kb
                print "kb beginning:", len(get_kb_mappings(kb))
            if kb.startswith('json_'):
                encoder = ComplexEncoder()
                mapping, description = run_sql(query, with_desc=True)
                if kb in CFG_ADDITIONAL_ENTRIES:
                    mapping += CFG_ADDITIONAL_ENTRIES[kb]
                    if not in_task:
                        print CFG_ADDITIONAL_ENTRIES[kb]
                column_counter = {}
                new_description = []
                for column in description[1:]:
                    column = column[0]
                    counter = column_counter[column] = column_counter.get(
                        column, 0) + 1
                    if counter > 1:
                        new_description.append('%s%d' % (column, counter))
                    else:
                        new_description.append(column)
                description = new_description
            else:
                mapping = run_sql(query)
                if kb in CFG_ADDITIONAL_ENTRIES:
                    mapping += CFG_ADDITIONAL_ENTRIES[kb]
                    if not in_task:
                        print CFG_ADDITIONAL_ENTRIES[kb]
                if not in_task:
                    print "mapping:", len(mapping)
                if kb == 'projects':
                    mapping += [('000000', 'NO PROJECT')]
            original_keys = set([key[0] for key in get_kbr_keys(kb)])
            if not in_task:
                print "original_keys before:", len(original_keys)

            updated = 0
            added = 0
            for i, row in enumerate(mapping):
                key, value = row[0], row[1:]
                if kb.startswith('json_'):
                    value = encoder.encode(dict(zip(description, value)))
                else:
                    value = value[0]
                if value:
                    if key in original_keys:
                        original_keys.remove(key)
                    if in_task:
                        task_update_progress("%s - %s%%" %
                                             (kb, i * 100 / len(mapping)))
                    if kb_mapping_exists(kb, key):
                        updated += 1
                        update_kb_mapping(kb, key, key, value)
                    else:
                        added += 1
                        add_kb_mapping(kb, key, value)
            if not in_task:
                print "updated:", updated, "added:", added
                print "kb after update:", len(get_kb_mappings(kb))
                print "original_keys after:", len(original_keys)
            if in_task:
                task_update_progress("Cleaning %s" % kb)
            for key in original_keys:
                remove_kb_mapping(kb, key)
            if not in_task:
                print "kb after remove:", len(get_kb_mappings(kb))
        except:
            register_exception(alert_admin=True,
                               prefix="Error when updating KB %s" % kb)
            continue
예제 #20
0
def load_kbs(cfg, run_sql, in_task=False):
    for kb, query in cfg.iteritems():
        task_sleep_now_if_required(can_stop_too=True)
        if not kb_exists(kb):
            add_kb(kb)
        if in_task:
            write_message("Updating %s KB..." % kb)
        try:
            if not in_task:
                print "kb:", kb
                print "kb beginning:", len(get_kb_mappings(kb))
            if kb.startswith('json_'):
                encoder = ComplexEncoder()
                mapping, description = run_sql(query, with_desc=True)
                if kb in CFG_ADDITIONAL_ENTRIES:
                    mapping += CFG_ADDITIONAL_ENTRIES[kb]
                    if not in_task:
                        print CFG_ADDITIONAL_ENTRIES[kb]
                column_counter = {}
                new_description = []
                for column in description[1:]:
                    column = column[0]
                    counter = column_counter[
                        column] = column_counter.get(column, 0) + 1
                    if counter > 1:
                        new_description.append('%s%d' % (column, counter))
                    else:
                        new_description.append(column)
                description = new_description
            else:
                mapping = run_sql(query)
                if kb in CFG_ADDITIONAL_ENTRIES:
                    mapping += CFG_ADDITIONAL_ENTRIES[kb]
                    if not in_task:
                        print CFG_ADDITIONAL_ENTRIES[kb]
                if not in_task:
                    print "mapping:", len(mapping)
                if kb == 'projects':
                    mapping += [('000000', 'NO PROJECT')]
            original_keys = set([key[0] for key in get_kbr_keys(kb)])
            if not in_task:
                print "original_keys before:", len(original_keys)

            updated = 0
            added = 0
            for i, row in enumerate(mapping):
                key, value = row[0], row[1:]
                if kb.startswith('json_'):
                    value = encoder.encode(dict(zip(description, value)))
                else:
                    value = value[0]
                if value:
                    if key in original_keys:
                        original_keys.remove(key)
                    if in_task:
                        task_update_progress(
                            "%s - %s%%" % (kb, i * 100 / len(mapping)))
                    if kb_mapping_exists(kb, key):
                        updated += 1
                        update_kb_mapping(kb, key, key, value)
                    else:
                        added += 1
                        add_kb_mapping(kb, key, value)
            if not in_task:
                print "updated:", updated, "added:", added
                print "kb after update:", len(get_kb_mappings(kb))
                print "original_keys after:", len(original_keys)
            if in_task:
                task_update_progress("Cleaning %s" % kb)
            for key in original_keys:
                remove_kb_mapping(kb, key)
            if not in_task:
                print "kb after remove:", len(get_kb_mappings(kb))
        except:
            register_exception(
                alert_admin=True, prefix="Error when updating KB %s" % kb)
            continue
예제 #21
0
 def inner(dummy_form, dummy_field, term, limit=50):
     from invenio.modules.knowledge.api import get_kb_mappings
     result = get_kb_mappings(name, '', term, limit=limit)[:limit]
     return map(mapper, result) if mapper is not None else result
예제 #22
0
파일: admin.py 프로젝트: chokribr/invenio-1
def kb_export(req,
              kbname="",
              format="kbr",
              searchkey="",
              searchvalue="",
              searchtype="s",
              limit=None,
              ln=CFG_SITE_LANG):
    """
    Exports the given kb so that it is listed in stdout (the browser).

    @param req the request
    @param kbname knowledge base name
    @param expression evaluate this for the returned lines
    @param format 'kba' for authority file, 'kbr' for leftside-rightside, json
                  for json-formatted dictionaries
    @param searchkey include only lines that match this on the left side
    @param searchvalue include only lines that match this on the right side
    @param searchtype s = substring match, e = exact match
    @param limit how many results to return. None means all
    @param ln language
    """
    ln = wash_language(ln)
    _ = gettext_set_language(ln)
    navtrail_previous_links = ''' &gt; <a class="navtrail" href="%s/kb?ln=%s">%s</a>''' % (
        CFG_SITE_SECURE_URL, ln, _("Manage Knowledge Bases"))
    if not kbname:
        return page(title=_("Knowledge base name missing"),
                    body="""Required parameter kbname
                              is missing.""",
                    language=ln,
                    navtrail=navtrail_previous_links,
                    lastupdated=__lastupdated__,
                    req=req)

    #in order to make 'wget' downloads easy we do not require authorization

    #first check the type of the KB
    kbtype = None
    kbinfo = None
    kbid = None
    kbinfos = bibknowledge.get_kbs_info("", kbname)
    if kbinfos:
        kbinfo = kbinfos[0]
        kbtype = kbinfo['kbtype']
        kbid = kbinfo['id']
    else:
        return page(title=_("Unknown knowledge base"),
                    body=_("There is no knowledge base with that name."),
                    language=ln,
                    navtrail=navtrail_previous_links,
                    lastupdated=__lastupdated__,
                    req=req)

    if not kbtype or kbtype == 'w':
        if format and format == "ejson":
            req.content_type = 'application/json'
            return bibknowledge.get_kb_mappings_embedded_json(kbname, searchkey, \
                                                    searchvalue, searchtype, limit)
        elif format and format[0] == 'j':
            # as JSON formatted string
            req.content_type = 'application/json'
            return bibknowledge.get_kb_mappings_json(kbname, searchkey, \
                                                    searchvalue, searchtype, limit)

        # left side / right side KB
        mappings = bibknowledge.get_kb_mappings(kbname, searchkey, \
                                                searchvalue, searchtype)
        if format == 'right' or format == 'kba':
            # as authority sequence
            seq = [m['value'] for m in mappings]
            seq = uniq(sorted(seq))
            for s in seq:
                req.write(s + "\n")
            return

        else:
            # as regularly formatted left-right mapping
            for m in mappings:
                req.write(m['key'] + '---' + m['value'] + '\n')
            return

    elif kbtype == 'd':
        # dynamic kb, another interface for perform_request_search
        if format and format[0] == 'j':
            req.content_type = "application/json"
            return bibknowledge.get_kbd_values_json(kbname, searchvalue)

        else:
            # print it as a list of values
            for hit in bibknowledge.get_kbd_values(kbname, searchvalue):
                req.write(hit + '\n')
            req.write('\n')
            return

    elif kbtype == 't':  #taxonomy: output the file
        kbfilename = CFG_WEBDIR + "/kbfiles/" + str(kbid) + ".rdf"
        try:
            f = open(kbfilename, 'r')
            for line in f:
                req.write(line)
            f.close()
        except:
            req.write("Reading the file " + kbfilename + " failed.")

    else:
        # This situation should never happen
        raise ValueError, "Unsupported KB Type: %s" % kbtype
예제 #23
0
파일: admin.py 프로젝트: chokribr/invenio-1
def kb_add_mapping(req,
                   kb,
                   mapFrom,
                   mapTo,
                   sortby="to",
                   ln=CFG_SITE_LANG,
                   forcetype=None,
                   replacements=None,
                   kb_type=None):
    """
    Adds a new mapping to a kb.

    @param ln language
    @param kb the kb id to show
    @param sortby the sorting criteria ('from' or 'to')
    @param forcetype indicates if this function should ask about replacing left/right sides (None or 'no')
                     replace in current kb ('curr') or in all ('all')
    @param replacements an object containing kbname+++left+++right strings.
                     Can be a string or an array of strings
    @param kb_type None for normal from-to kb's, 't' for taxonomies
    """
    ln = wash_language(ln)
    _ = gettext_set_language(ln)

    navtrail_previous_links = ''' &gt; <a class="navtrail" href="%s/kb?ln=%s">%s</a>''' % (
        CFG_SITE_SECURE_URL, ln, _("Manage Knowledge Bases"))

    try:
        dummy = getUid(req)
    except:
        return error_page('Error', req)

    (auth_code, auth_msg) = check_user(req, 'cfgbibknowledge')
    if not auth_code:

        kb_id = wash_url_argument(kb, 'int')
        kb_name = bibknowledge.get_kb_name(kb_id)

        if kb_name is None:
            return page(title=_("Unknown Knowledge Base"),
                        body="",
                        language=ln,
                        navtrail=navtrail_previous_links,
                        errors=[("ERR_KB_ID_UNKNOWN", kb)],
                        lastupdated=__lastupdated__,
                        req=req)

        key = wash_url_argument(mapFrom, 'str')
        value = wash_url_argument(mapTo, 'str')

        #check if key or value already exists in some KB
        left_sides_match = bibknowledge.get_kb_mappings("", key, "")
        #check that the match is exact
        left_sides = []
        for m in left_sides_match:
            if m['key'] == key:
                left_sides.append(m)

        right_sides_match = bibknowledge.get_kb_mappings("", "", value)
        right_sides = []
        for m in right_sides_match:
            if m['value'] == value:
                right_sides.append(m)

        if (len(right_sides) == 0) and (len(left_sides) == 0):
            #no problems, just add in current
            forcetype = "curr"

        #likewise, if this is a taxonomy, just pass on
        if kb_type == 't':
            forcetype = "curr"

        if forcetype and not forcetype == "no":
            pass
        else:
            if len(left_sides) > 0:
                return page(
                    title=_("Left side exists"),
                    body=bibknowledgeadminlib.perform_request_verify_rule(
                        ln, kb_id, key, value, "left", kb_name, left_sides),
                    language=ln,
                    navtrail=navtrail_previous_links,
                    lastupdated=__lastupdated__,
                    req=req)

            if len(right_sides) > 0:
                return page(
                    title=_("Right side exists"),
                    body=bibknowledgeadminlib.perform_request_verify_rule(
                        ln, kb_id, key, value, "right", kb_name, right_sides),
                    language=ln,
                    navtrail=navtrail_previous_links,
                    lastupdated=__lastupdated__,
                    req=req)

        if forcetype == "curr":
            bibknowledge.add_kb_mapping(kb_name, key, value)
        if forcetype == "all":
            #a bit tricky.. remove the rules given in param replacement and add the current
            #rule in the same kb's
            if replacements:
                #"replacements" can be either a string or an array. Let's make it always an array
                if type(replacements) == type("this is a string"):
                    mystr = replacements
                    replacements = []
                    replacements.append(mystr)
                for r in replacements:
                    if r.find("++++") > 0:
                        (rkbname, rleft, dummy) = r.split('++++')
                        bibknowledge.remove_kb_mapping(rkbname, rleft)
                        #add only if this is not yet there..
                        if not bibknowledge.kb_mapping_exists(rkbname, key):
                            bibknowledge.add_kb_mapping(rkbname, key, value)

        redirect_to_url(
            req,
            "kb?ln=%(ln)s&kb=%(kb)s&sortby=%(sortby)s&kb_type=%(kb_type)s" % {
                'ln': ln,
                'kb': kb_id,
                'sortby': sortby,
                'kb_type': kb_type
            })
    else:
        return page_not_authorized(req=req,
                                   text=auth_msg,
                                   navtrail=navtrail_previous_links)
예제 #24
0
def get_value(kb_name, list_of_keys):
    """Get the value registered with at least one of the keys."""
    for key in list_of_keys:
        if kb_mapping_exists(kb_name, key):
            return get_kb_mappings(kb_name=kb_name, key=key)[0].get("value")