Esempi in Python per TrainedModel, esempi in Python per kian.TrainedModel

Esempio n. 1

0

Mostra file

File: parser.py Progetto: Wkryst/Kian

def main():
    args = sys.argv[1:]
    parser = argparse.ArgumentParser(description='Parse and add statements '
                                     'based on trained model')
    parser.add_argument('--name', '-n', nargs='?', required=True,
                        help='name of the model to train')
    parsed_args = parser.parse_known_args(args)[0]

    model = TrainedModel.from_file(parsed_args.name)
    model.load()
    file_path = os.path.join(model.data_directory, 'res2.dat')

    if not os.path.isfile(file_path):
        raise ValueError('You should train the model first')
    with codecs.open(file_path, 'r', 'utf-8') as f:
        cv_set = eval(f.read())
    first_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 1)[0]
    second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.5)[0]
    second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.25)[0]
    pywikibot.output('1st, 2nd, and 3rd thrashholds are: %s, %s, %s' %
                     (first_thrashhold, second_thrashhold, second_thrashhold))
    local_args = pywikibot.handle_args(args)
    genFactory = pagegenerators.GeneratorFactory()
    for arg in local_args:
        genFactory.handleArg(arg)
    generator = genFactory.getCombinedGenerator()
    for page in generator:
        cats = [cat.title(underscore=True, withNamespace=False)
                for cat in page.categories()]
        features = model.label_case(cats)
        res = Kian.kian(model.theta, features)[0]
        pywikibot.output(page.title(), res)

Esempio n. 2

0

Mostra file

def main():
    args = sys.argv[1:]
    parser = argparse.ArgumentParser(description='Parse and add statements '
                                     'based on trained model')
    parser.add_argument('--name', '-n', nargs='?', required=True,
                        help='name of the model to train')
    parsed_args = parser.parse_known_args(args)[0]

    model = TrainedModel.from_file(parsed_args.name)
    print('Loading the model')
    model.load_data()
    model.load()
    file_path = os.path.join(model.data_directory, 'res2.dat')

    if not os.path.isfile(file_path):
        raise ValueError('You should train the model first')
    with codecs.open(file_path, 'r', 'utf-8') as f:
        cv_set = eval(f.read())
    first_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 1)[0]
    second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.125)[0]
    print('Finding possible adds to human review')
    print((first_thrashhold, second_thrashhold))
    pros = model.wikidata_data_w | model.wikidata_data_wo
    print((len(pros)))
    name = None
    a = []
    db = MySQLdb.connect(host="tools-db", db="s52709__kian_p",
                         read_default_file="~/replica.my.cnf")
    cursor = db.cursor()
    insert_statement = (
        "INSERT INTO kian "
        "(qid, model_name, wiki_name, property, value, status, prob) "
        "VALUES (%s, %s, %s, %s, %s, %s, %s)")
    with model.wiki_data_file as f:
        for line in f:
            line = line.replace('\n', '')
            if '\t' not in line:
                continue
            if name and name != line.split('\t')[0]:
                if name in pros:
                    a = []
                    name = line.split('\t')[0]
                    continue
                features = model.label_case(a)
                res = Kian.kian(model.theta, features)[0]
                if res > first_thrashhold and res < second_thrashhold:
                    cursor.execute(insert_statement,
                                   (name, model.name, model.wiki,
                                    model.property_name, model.value, 0,
                                    str(res)[:7]))
                a = []
            name = line.split('\t')[0]
            a.append(line.split('\t')[1])
    db.commit()
    cursor.close()
    db.close()

Esempio n. 3

0

Mostra file

File: populate_database.py Progetto: Wkryst/Kian

def main():
    args = sys.argv[1:]
    parser = argparse.ArgumentParser(description='Parse and add statements '
                                     'based on trained model')
    parser.add_argument('--name', '-n', nargs='?', required=True,
                        help='name of the model to train')
    parsed_args = parser.parse_known_args(args)[0]

    model = TrainedModel.from_file(parsed_args.name)
    print('Loading the model')
    model.load_data()
    model.load()
    file_path = os.path.join(model.data_directory, 'res2.dat')

    if not os.path.isfile(file_path):
        raise ValueError('You should train the model first')
    with codecs.open(file_path, 'r', 'utf-8') as f:
        cv_set = eval(f.read())
    first_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 1)[0]
    second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.125)[0]
    print('Finding possible adds to human review')
    print(first_thrashhold, second_thrashhold)
    pros = model.wikidata_data_w | model.wikidata_data_wo
    print(len(pros))
    name = None
    a = []
    db = MySQLdb.connect(host="tools-db", db="s52709__kian_p",
                         read_default_file="~/replica.my.cnf")
    cursor = db.cursor()
    insert_statement = (
        "INSERT INTO kian "
        "(qid, model_name, wiki_name, property, value, status, prob) "
        "VALUES (%s, %s, %s, %s, %s, %s, %s)")
    with model.wiki_data_file as f:
        for line in f:
            line = line.replace('\n', '')
            if u'\t' not in line:
                continue
            if name and name != line.split('\t')[0]:
                if name in pros:
                    a = []
                    name = line.split('\t')[0]
                    continue
                features = model.label_case(a)
                res = Kian.kian(model.theta, features)[0]
                if res > first_thrashhold and res < second_thrashhold:
                    cursor.execute(insert_statement,
                                   (name, model.name, model.wiki,
                                    model.property_name, model.value, 0,
                                    str(res)[:7]))
                a = []
            name = line.split('\t')[0]
            a.append(line.split('\t')[1])
    db.commit()
    cursor.close()
    db.close()

Esempio n. 4

0

Mostra file

File: kian_worker.py Progetto: Ladsgroup/Wikidata-curator

    def __init__(self, name):
        model = TrainedModel.from_file(name)
        model.load()
        file_path = os.path.join(model.data_directory, 'res2.dat')

        if not os.path.isfile(file_path):
            raise ValueError('You should train the model first')
        with open(file_path, 'r') as f:
            cv_set = eval(f.read())
        self.thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.25)[0]
        self.model = model
        super(KianWorker, self).__init__('KianWorker:' + name, model.wiki)

Esempio n. 5

0

Mostra file

File: kian_worker.py Progetto: Python3pkg/Kian

def main():
    args = sys.argv[1:]
    parser = argparse.ArgumentParser(description='Parse and add statements '
                                     'based on trained model')
    parser.add_argument('--name',
                        '-n',
                        nargs='?',
                        required=True,
                        help='name of the model to train')
    parsed_args = parser.parse_known_args(args)[0]

    model = TrainedModel.from_file(parsed_args.name)
    model.load()
    file_path = os.path.join(model.data_directory, 'res2.dat')

    if not os.path.isfile(file_path):
        raise ValueError('You should train the model first')
    with codecs.open(file_path, 'r', 'utf-8') as f:
        cv_set = eval(f.read())
    second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.5)[0]

    site = pywikibot.site.APISite.fromDBName(model.wiki)
    repo = site.data_repository()
    repo.login()
    gen = recent_changes_gen(repo, model.wiki)
    for case in gen:
        page = pywikibot.Page(site, case)
        if page.namespace().id != 0:
            continue
        print(page)
        cats = [
            cat.title(underscore=True, withNamespace=False)
            for cat in page.categories()
        ]
        features = model.label_case(cats)
        res = Kian.kian(model.theta, features)[0]
        if res > second_thrashhold:
            try:
                item = pywikibot.ItemPage.fromPage(page)
            except pywikibot.NoPage:
                continue
            if model.property_name in item.claims:
                continue
            claim = pywikibot.Claim(repo, model.property_name)
            claim.setTarget(pywikibot.ItemPage(repo, model.value))
            item.addClaim(claim,
                          summary='Bot: Adding %s:%s from %s '
                          '([[User:Ladsgroup/Kian|Powered by Kian]])' %
                          (model.property_name, model.value, model.wiki))
            source = pywikibot.Claim(repo, 'P143')
            source.setTarget(pywikibot.ItemPage(repo, sources[model.wiki]))
            claim.addSource(source)

Esempio n. 6

0

Mostra file

def main():
    res_text = "{| class=\"wikitable sortable\"\n!Qid!!Value<br />" \
        "(Wikidata)!!Value<br />(Wipedia)!!Other boring stuff"
    args = sys.argv[1:]
    parser = argparse.ArgumentParser(description='Parse and add statements '
                                     'based on trained model')
    parser.add_argument('--name', '-n', nargs='?', required=True,
                        help='name of the model to train')
    parsed_args = parser.parse_known_args(args)[0]

    model = TrainedModel.from_file(parsed_args.name)
    print('Loading the model')
    model.load_data()
    model.load()
    file_path = os.path.join(model.data_directory, 'res2.dat')

    if not os.path.isfile(file_path):
        raise ValueError('You should train the model first')
    with codecs.open(file_path, 'r', 'utf-8') as f:
        cv_set = eval(f.read())
    second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.125)[0]
    print('Finding possible mistakes')
    print(second_thrashhold)
    name = None
    a = []
    with model.wiki_data_file as f:
        for line in f:
            line = line.replace('\n', '')
            if '\t' not in line:
                continue
            if name and name != line.split('\t')[0]:
                features = model.label_case(a)
                res = Kian.kian(model.theta, features)[0]
                if name in model.wikidata_data_w and \
                        res < (1 - second_thrashhold):
                    res_text += ("\n|-\n|[[%s]]||Yes||No (%s)||%s" %
                                 (name, res, features))
                elif name in model.wikidata_data_wo and \
                        res > second_thrashhold:
                    res_text += ("\n|-\n|[[%s]]||No||Yes (%s)||%s" %
                                 (name, res, features))
                a = []
            name = line.split('\t')[0]
            a.append(line.split('\t')[1])
    page_title = "User:Ladsgroup/Kian/Possible mistakes/%s" % model.name
    page = pywikibot.Page(site, page_title)
    page.put(res_text + "\n|}", "Bot: Report")

Esempio n. 7

0

Mostra file

File: kian_worker.py Progetto: Wkryst/Kian

def main():
    args = sys.argv[1:]
    parser = argparse.ArgumentParser(description='Parse and add statements '
                                     'based on trained model')
    parser.add_argument('--name', '-n', nargs='?', required=True,
                        help='name of the model to train')
    parsed_args = parser.parse_known_args(args)[0]

    model = TrainedModel.from_file(parsed_args.name)
    model.load()
    file_path = os.path.join(model.data_directory, 'res2.dat')

    if not os.path.isfile(file_path):
        raise ValueError('You should train the model first')
    with codecs.open(file_path, 'r', 'utf-8') as f:
        cv_set = eval(f.read())
    second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.5)[0]

    site = pywikibot.site.APISite.fromDBName(model.wiki)
    repo = site.data_repository()
    repo.login()
    gen = recent_changes_gen(repo, model.wiki)
    for case in gen:
        page = pywikibot.Page(site, case)
        if page.namespace().id != 0:
            continue
        print(page)
        cats = [cat.title(underscore=True, withNamespace=False)
                for cat in page.categories()]
        features = model.label_case(cats)
        res = Kian.kian(model.theta, features)[0]
        if res > second_thrashhold:
            try:
                item = pywikibot.ItemPage.fromPage(page)
            except pywikibot.NoPage:
                continue
            if model.property_name in item.claims:
                continue
            claim = pywikibot.Claim(repo, model.property_name)
            claim.setTarget(pywikibot.ItemPage(repo, model.value))
            item.addClaim(claim, summary='Bot: Adding %s:%s from %s '
                          '([[User:Ladsgroup/Kian|Powered by Kian]])' %
                          (model.property_name, model.value, model.wiki))
            source = pywikibot.Claim(repo, 'P143')
            source.setTarget(pywikibot.ItemPage(repo, sources[model.wiki]))
            claim.addSource(source)

Esempio n. 8

0

Mostra file

File: human_review_add.py Progetto: Python3pkg/Kian

def main():
    args = sys.argv[1:]
    parser = argparse.ArgumentParser(description='Parse and add statements '
                                     'based on trained model')
    parser.add_argument('--name',
                        '-n',
                        nargs='?',
                        required=True,
                        help='name of the model to train')
    parsed_args = parser.parse_known_args(args)[0]

    model = TrainedModel.from_file(parsed_args.name)
    print('Loading the model')
    model.load_data()
    model.load()
    file_path = os.path.join(model.data_directory, 'res2.dat')

    if not os.path.isfile(file_path):
        raise ValueError('You should train the model first')
    with codecs.open(file_path, 'r', 'utf-8') as f:
        cv_set = eval(f.read())
    first_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 1)[0]
    second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.125)[0]
    print('Finding possible adds to human review')
    print((first_thrashhold, second_thrashhold))
    pros = model.wikidata_data_w | model.wikidata_data_wo
    print((len(pros)))
    name = None
    a = []
    with model.wiki_data_file as f:
        for line in f:
            line = line.replace('\n', '')
            if '\t' not in line:
                continue
            if name and name != line.split('\t')[0]:
                if name in pros:
                    a = []
                    name = line.split('\t')[0]
                    continue
                features = model.label_case(a)
                res = Kian.kian(model.theta, features)[0]
                if res > first_thrashhold and res < second_thrashhold:
                    print(('%s: %s' % (name, res)))
                a = []
            name = line.split('\t')[0]
            a.append(line.split('\t')[1])

Esempio n. 9

0

Mostra file

File: populate_mistakes_database.py Progetto: Python3pkg/Kian

def main():
    args = sys.argv[1:]
    parser = argparse.ArgumentParser(description='Parse and add statements '
                                     'based on trained model')
    parser.add_argument('--name',
                        '-n',
                        nargs='?',
                        required=True,
                        help='name of the model to train')
    parsed_args = parser.parse_known_args(args)[0]

    model = TrainedModel.from_file(parsed_args.name)
    print('Loading the model')
    model.load_data()
    model.load()
    file_path = os.path.join(model.data_directory, 'res2.dat')

    if not os.path.isfile(file_path):
        raise ValueError('You should train the model first')
    with codecs.open(file_path, 'r', 'utf-8') as f:
        cv_set = eval(f.read())
    second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.125)[0]
    print('Finding possible mistakes')
    print(second_thrashhold)
    name = None
    a = []
    p_number = int(model.property_name.lower().replace('p', ''))
    q_number = int(model.value.lower().replace('q', ''))
    with model.wiki_data_file as f:
        for line in f:
            line = line.replace('\n', '')
            if '\t' not in line:
                continue
            if name and name != line.split('\t')[0]:
                features = model.label_case(a)
                res = Kian.kian(model.theta, features)[0]
                if name in model.wikidata_data_w and \
                        res < (1 - second_thrashhold):
                    store(name, res, model.wiki, p_number, q_number)
                elif name in model.wikidata_data_wo and \
                        res > second_thrashhold:
                    store(name, res, model.wiki, p_number, q_number)
                a = []
            name = line.split('\t')[0]
            a.append(line.split('\t')[1])

Esempio n. 10

0

Mostra file

File: human_review_add.py Progetto: Wkryst/Kian

def main():
    args = sys.argv[1:]
    parser = argparse.ArgumentParser(description='Parse and add statements '
                                     'based on trained model')
    parser.add_argument('--name', '-n', nargs='?', required=True,
                        help='name of the model to train')
    parsed_args = parser.parse_known_args(args)[0]

    model = TrainedModel.from_file(parsed_args.name)
    print('Loading the model')
    model.load_data()
    model.load()
    file_path = os.path.join(model.data_directory, 'res2.dat')

    if not os.path.isfile(file_path):
        raise ValueError('You should train the model first')
    with codecs.open(file_path, 'r', 'utf-8') as f:
        cv_set = eval(f.read())
    first_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 1)[0]
    second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.125)[0]
    print('Finding possible adds to human review')
    print(first_thrashhold, second_thrashhold)
    pros = model.wikidata_data_w | model.wikidata_data_wo
    print(len(pros))
    name = None
    a = []
    with model.wiki_data_file as f:
        for line in f:
            line = line.replace('\n', '')
            if u'\t' not in line:
                continue
            if name and name != line.split('\t')[0]:
                if name in pros:
                    a = []
                    name = line.split('\t')[0]
                    continue
                features = model.label_case(a)
                res = Kian.kian(model.theta, features)[0]
                if res > first_thrashhold and res < second_thrashhold:
                    print('%s: %s' % (name, res))
                a = []
            name = line.split('\t')[0]
            a.append(line.split('\t')[1])

Esempio n. 11

0

Mostra file

File: populate_mistakes_database.py Progetto: Wkryst/Kian

def main():
    args = sys.argv[1:]
    parser = argparse.ArgumentParser(description='Parse and add statements '
                                     'based on trained model')
    parser.add_argument('--name', '-n', nargs='?', required=True,
                        help='name of the model to train')
    parsed_args = parser.parse_known_args(args)[0]

    model = TrainedModel.from_file(parsed_args.name)
    print('Loading the model')
    model.load_data()
    model.load()
    file_path = os.path.join(model.data_directory, 'res2.dat')

    if not os.path.isfile(file_path):
        raise ValueError('You should train the model first')
    with codecs.open(file_path, 'r', 'utf-8') as f:
        cv_set = eval(f.read())
    second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.125)[0]
    print('Finding possible mistakes')
    print(second_thrashhold)
    name = None
    a = []
    p_number = int(model.property_name.lower().replace('p', ''))
    q_number = int(model.value.lower().replace('q', ''))
    with model.wiki_data_file as f:
        for line in f:
            line = line.replace('\n', '')
            if u'\t' not in line:
                continue
            if name and name != line.split('\t')[0]:
                features = model.label_case(a)
                res = Kian.kian(model.theta, features)[0]
                if name in model.wikidata_data_w and \
                        res < (1 - second_thrashhold):
                    store(name, res, model.wiki, p_number, q_number)
                elif name in model.wikidata_data_wo and \
                        res > second_thrashhold:
                    store(name, res, model.wiki, p_number, q_number)
                a = []
            name = line.split('\t')[0]
            a.append(line.split('\t')[1])

Esempio n. 12

0

Mostra file

File: parser.py Progetto: Python3pkg/Kian

def main():
    args = sys.argv[1:]
    parser = argparse.ArgumentParser(description='Parse and add statements '
                                     'based on trained model')
    parser.add_argument('--name',
                        '-n',
                        nargs='?',
                        required=True,
                        help='name of the model to train')
    parsed_args = parser.parse_known_args(args)[0]

    model = TrainedModel.from_file(parsed_args.name)
    model.load()
    file_path = os.path.join(model.data_directory, 'res2.dat')

    if not os.path.isfile(file_path):
        raise ValueError('You should train the model first')
    with codecs.open(file_path, 'r', 'utf-8') as f:
        cv_set = eval(f.read())
    first_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 1)[0]
    second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.5)[0]
    second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.25)[0]
    pywikibot.output('1st, 2nd, and 3rd thrashholds are: %s, %s, %s' %
                     (first_thrashhold, second_thrashhold, second_thrashhold))
    local_args = pywikibot.handle_args(args)
    genFactory = pagegenerators.GeneratorFactory()
    for arg in local_args:
        genFactory.handleArg(arg)
    generator = genFactory.getCombinedGenerator()
    for page in generator:
        cats = [
            cat.title(underscore=True, withNamespace=False)
            for cat in page.categories()
        ]
        features = model.label_case(cats)
        res = Kian.kian(model.theta, features)[0]
        pywikibot.output(page.title(), res)

Esempio n. 13

0

Mostra file

File: evaluate.py Progetto: Wkryst/Kian

import codecs
import os

if __name__ == '__main__' and __package__ is None:
    from os import sys, path
    sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))

import kian.fitness
from kian import TrainedModel

parser = argparse.ArgumentParser(description='Evaluate a trained model')
parser.add_argument('--name', '-n', nargs='?', required=True,
                    help='name of the model to train')
args = parser.parse_args()

model = TrainedModel.from_file(args.name)
file_path = os.path.join(model.data_directory, 'res2.dat')

if not os.path.isfile(file_path):
    raise ValueError('You should train the model first')
with codecs.open(file_path, 'r', 'utf-8') as f:
    cv_set = eval(f.read())
AUC = kian.fitness.AUC(cv_set)

first_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 1)
second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.5)
third_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.25)

print('AUC of the classifier: {0:.4}'.format(AUC))
print('First thrashhold (recall and precision): %s (%s, %s)' %
      (first_thrashhold[0], first_thrashhold[1][0], first_thrashhold[1][1]))

Esempio n. 14

0

Mostra file

File: possible_adds.py Progetto: Wkryst/Kian

def main():
    args = sys.argv[1:]
    parser = argparse.ArgumentParser(description='Parse and add statements '
                                     'based on trained model')
    parser.add_argument('--name', '-n', nargs='?', required=True,
                        help='name of the model to train')
    parsed_args = parser.parse_known_args(args)[0]

    model = TrainedModel.from_file(parsed_args.name)
    print('Loading the model')
    model.load_data()
    model.load()
    file_path = os.path.join(model.data_directory, 'res2.dat')

    if not os.path.isfile(file_path):
        raise ValueError('You should train the model first')
    with codecs.open(file_path, 'r', 'utf-8') as f:
        cv_set = eval(f.read())
    second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.125)[0]
    print('Finding possible adds')
    print(second_thrashhold)
    pros = model.wikidata_data_w | model.wikidata_data_wo
    print(len(pros))
    name = None
    a = []
    with model.wiki_data_file as f:
        for line in f:
            line = line.replace('\n', '')
            if u'\t' not in line:
                continue
            if name and name != line.split('\t')[0]:
                if name in pros:
                    a = []
                    name = line.split('\t')[0]
                    continue
                features = model.label_case(a)
                res = Kian.kian(model.theta, features)[0]
                if res < second_thrashhold:
                    a = []
                    continue
                print('Found something on %s, %s' % (name, res))
                item = pywikibot.ItemPage(repo, name)
                try:
                    item.get()
                except:
                    a = []
                    name = line.split('\t')[0]
                    continue
                if model.property_name in item.claims:
                    a = []
                    continue
                print('Adding statements')
                claim = pywikibot.Claim(repo, model.property_name)
                claim.setTarget(pywikibot.ItemPage(repo, model.value))
                summary = ('Bot: Adding %s:%s from %s ([[User:Ladsgroup'
                           '/Kian|Powered by Kian]])' %
                           (model.property_name, model.value, model.wiki))
                try:
                    item.addClaim(claim, summary=summary)
                    source = pywikibot.Claim(repo, 'P143')
                    source.setTarget(
                        pywikibot.ItemPage(repo, sources[model.wiki]))
                    claim.addSource(source)
                except pywikibot.data.api.APIError:
                    continue
                a = []
            name = line.split('\t')[0]
            a.append(line.split('\t')[1])

Esempio n. 15

0

Mostra file

File: possible_adds.py Progetto: Python3pkg/Kian

def main():
    args = sys.argv[1:]
    parser = argparse.ArgumentParser(description='Parse and add statements '
                                     'based on trained model')
    parser.add_argument('--name',
                        '-n',
                        nargs='?',
                        required=True,
                        help='name of the model to train')
    parsed_args = parser.parse_known_args(args)[0]

    model = TrainedModel.from_file(parsed_args.name)
    print('Loading the model')
    model.load_data()
    model.load()
    file_path = os.path.join(model.data_directory, 'res2.dat')

    if not os.path.isfile(file_path):
        raise ValueError('You should train the model first')
    with codecs.open(file_path, 'r', 'utf-8') as f:
        cv_set = eval(f.read())
    second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.125)[0]
    print('Finding possible adds')
    print(second_thrashhold)
    pros = model.wikidata_data_w | model.wikidata_data_wo
    print((len(pros)))
    name = None
    a = []
    with model.wiki_data_file as f:
        for line in f:
            line = line.replace('\n', '')
            if '\t' not in line:
                continue
            if name and name != line.split('\t')[0]:
                if name in pros:
                    a = []
                    name = line.split('\t')[0]
                    continue
                features = model.label_case(a)
                res = Kian.kian(model.theta, features)[0]
                if res < second_thrashhold:
                    a = []
                    continue
                print(('Found something on %s, %s' % (name, res)))
                item = pywikibot.ItemPage(repo, name)
                try:
                    item.get()
                except:
                    a = []
                    name = line.split('\t')[0]
                    continue
                if model.property_name in item.claims:
                    a = []
                    continue
                print('Adding statements')
                claim = pywikibot.Claim(repo, model.property_name)
                claim.setTarget(pywikibot.ItemPage(repo, model.value))
                summary = ('Bot: Adding %s:%s from %s ([[User:Ladsgroup'
                           '/Kian|Powered by Kian]])' %
                           (model.property_name, model.value, model.wiki))
                try:
                    item.addClaim(claim, summary=summary)
                    source = pywikibot.Claim(repo, 'P143')
                    source.setTarget(
                        pywikibot.ItemPage(repo, sources[model.wiki]))
                    claim.addSource(source)
                except pywikibot.data.api.APIError:
                    continue
                a = []
            name = line.split('\t')[0]
            a.append(line.split('\t')[1])

Esempio n. 16

0

Mostra file

import codecs
import os

if __name__ == '__main__' and __package__ is None:
    from os import sys, path
    sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))

import kian.fitness
from kian import TrainedModel

parser = argparse.ArgumentParser(description='Evaluate a trained model')
parser.add_argument('--name', '-n', nargs='?', required=True,
                    help='name of the model to train')
args = parser.parse_args()

model = TrainedModel.from_file(args.name)
file_path = os.path.join(model.data_directory, 'res2.dat')

if not os.path.isfile(file_path):
    raise ValueError('You should train the model first')
with codecs.open(file_path, 'r', 'utf-8') as f:
    cv_set = eval(f.read())
AUC = kian.fitness.AUC(cv_set)

first_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 1)
second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.5)
third_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.25)

print(('AUC of the classifier: {0:.4}'.format(AUC)))
print(('First thrashhold (recall and precision): %s (%s, %s)' %
      (first_thrashhold[0], first_thrashhold[1][0], first_thrashhold[1][1])))