Beispiel #1
0
 def test_EJOURNALS_keys(self):
     """bibknowledge - test left/right rules (key lookups)"""
     from invenio.modules.knowledge.api import get_kbr_values, get_kbr_keys
     mykeys = get_kbr_keys("EJOURNALS", "Acta")
     self.assertEqual(2, len(mykeys))
     mykeys = get_kbr_values("EJOURNALS", '', searchtype='e')
     self.assertEqual(0, len(mykeys))
     mykeys = get_kbr_values("EJOURNALS", searchtype='s')
     self.assertEqual(327, len(mykeys))
     mykeys = get_kbr_values("EJOURNALS", searchkey='', searchtype='s')
     self.assertEqual(327, len(mykeys))
def load_kbs(cfg, run_sql, in_task=False):
    for kb, query in cfg.iteritems():
        task_sleep_now_if_required(can_stop_too=True)
        if not kb_exists(kb):
            add_kb(kb)
        if in_task:
            write_message("Updating %s KB..." % kb)
        try:
            if not in_task:
                print "kb:", kb
                print "kb beginning:", len(get_kb_mappings(kb))
            if kb.startswith('json_'):
                encoder = ComplexEncoder()
                mapping, description = run_sql(query, with_desc=True)
                if kb in CFG_ADDITIONAL_ENTRIES:
                    mapping += CFG_ADDITIONAL_ENTRIES[kb]
                    if not in_task:
                        print CFG_ADDITIONAL_ENTRIES[kb]
                column_counter = {}
                new_description = []
                for column in description[1:]:
                    column = column[0]
                    counter = column_counter[column] = column_counter.get(
                        column, 0) + 1
                    if counter > 1:
                        new_description.append('%s%d' % (column, counter))
                    else:
                        new_description.append(column)
                description = new_description
            else:
                mapping = run_sql(query)
                if kb in CFG_ADDITIONAL_ENTRIES:
                    mapping += CFG_ADDITIONAL_ENTRIES[kb]
                    if not in_task:
                        print CFG_ADDITIONAL_ENTRIES[kb]
                if not in_task:
                    print "mapping:", len(mapping)
                if kb == 'projects':
                    mapping += [('000000', 'NO PROJECT')]
            original_keys = set([key[0] for key in get_kbr_keys(kb)])
            if not in_task:
                print "original_keys before:", len(original_keys)

            updated = 0
            added = 0
            for i, row in enumerate(mapping):
                key, value = row[0], row[1:]
                if kb.startswith('json_'):
                    value = encoder.encode(dict(zip(description, value)))
                else:
                    value = value[0]
                if value:
                    if key in original_keys:
                        original_keys.remove(key)
                    if in_task:
                        task_update_progress("%s - %s%%" %
                                             (kb, i * 100 / len(mapping)))
                    if kb_mapping_exists(kb, key):
                        updated += 1
                        update_kb_mapping(kb, key, key, value)
                    else:
                        added += 1
                        add_kb_mapping(kb, key, value)
            if not in_task:
                print "updated:", updated, "added:", added
                print "kb after update:", len(get_kb_mappings(kb))
                print "original_keys after:", len(original_keys)
            if in_task:
                task_update_progress("Cleaning %s" % kb)
            for key in original_keys:
                remove_kb_mapping(kb, key)
            if not in_task:
                print "kb after remove:", len(get_kb_mappings(kb))
        except:
            register_exception(alert_admin=True,
                               prefix="Error when updating KB %s" % kb)
            continue
def load_kbs(cfg, run_sql, in_task=False):
    for kb, query in cfg.iteritems():
        task_sleep_now_if_required(can_stop_too=True)
        if not kb_exists(kb):
            add_kb(kb)
        if in_task:
            write_message("Updating %s KB..." % kb)
        try:
            if not in_task:
                print "kb:", kb
                print "kb beginning:", len(get_kb_mappings(kb))
            if kb.startswith('json_'):
                encoder = ComplexEncoder()
                mapping, description = run_sql(query, with_desc=True)
                if kb in CFG_ADDITIONAL_ENTRIES:
                    mapping += CFG_ADDITIONAL_ENTRIES[kb]
                    if not in_task:
                        print CFG_ADDITIONAL_ENTRIES[kb]
                column_counter = {}
                new_description = []
                for column in description[1:]:
                    column = column[0]
                    counter = column_counter[
                        column] = column_counter.get(column, 0) + 1
                    if counter > 1:
                        new_description.append('%s%d' % (column, counter))
                    else:
                        new_description.append(column)
                description = new_description
            else:
                mapping = run_sql(query)
                if kb in CFG_ADDITIONAL_ENTRIES:
                    mapping += CFG_ADDITIONAL_ENTRIES[kb]
                    if not in_task:
                        print CFG_ADDITIONAL_ENTRIES[kb]
                if not in_task:
                    print "mapping:", len(mapping)
                if kb == 'projects':
                    mapping += [('000000', 'NO PROJECT')]
            original_keys = set([key[0] for key in get_kbr_keys(kb)])
            if not in_task:
                print "original_keys before:", len(original_keys)

            updated = 0
            added = 0
            for i, row in enumerate(mapping):
                key, value = row[0], row[1:]
                if kb.startswith('json_'):
                    value = encoder.encode(dict(zip(description, value)))
                else:
                    value = value[0]
                if value:
                    if key in original_keys:
                        original_keys.remove(key)
                    if in_task:
                        task_update_progress(
                            "%s - %s%%" % (kb, i * 100 / len(mapping)))
                    if kb_mapping_exists(kb, key):
                        updated += 1
                        update_kb_mapping(kb, key, key, value)
                    else:
                        added += 1
                        add_kb_mapping(kb, key, value)
            if not in_task:
                print "updated:", updated, "added:", added
                print "kb after update:", len(get_kb_mappings(kb))
                print "original_keys after:", len(original_keys)
            if in_task:
                task_update_progress("Cleaning %s" % kb)
            for key in original_keys:
                remove_kb_mapping(kb, key)
            if not in_task:
                print "kb after remove:", len(get_kb_mappings(kb))
        except:
            register_exception(
                alert_admin=True, prefix="Error when updating KB %s" % kb)
            continue