def main(): args = sys.argv[1:] parser = argparse.ArgumentParser(description='Parse and add statements ' 'based on trained model') parser.add_argument('--name', '-n', nargs='?', required=True, help='name of the model to train') parsed_args = parser.parse_known_args(args)[0] model = TrainedModel.from_file(parsed_args.name) model.load() file_path = os.path.join(model.data_directory, 'res2.dat') if not os.path.isfile(file_path): raise ValueError('You should train the model first') with codecs.open(file_path, 'r', 'utf-8') as f: cv_set = eval(f.read()) first_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 1)[0] second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.5)[0] second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.25)[0] pywikibot.output('1st, 2nd, and 3rd thrashholds are: %s, %s, %s' % (first_thrashhold, second_thrashhold, second_thrashhold)) local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory() for arg in local_args: genFactory.handleArg(arg) generator = genFactory.getCombinedGenerator() for page in generator: cats = [cat.title(underscore=True, withNamespace=False) for cat in page.categories()] features = model.label_case(cats) res = Kian.kian(model.theta, features)[0] pywikibot.output(page.title(), res)
def main(): args = sys.argv[1:] parser = argparse.ArgumentParser(description='Parse and add statements ' 'based on trained model') parser.add_argument('--name', '-n', nargs='?', required=True, help='name of the model to train') parsed_args = parser.parse_known_args(args)[0] model = TrainedModel.from_file(parsed_args.name) print('Loading the model') model.load_data() model.load() file_path = os.path.join(model.data_directory, 'res2.dat') if not os.path.isfile(file_path): raise ValueError('You should train the model first') with codecs.open(file_path, 'r', 'utf-8') as f: cv_set = eval(f.read()) first_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 1)[0] second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.125)[0] print('Finding possible adds to human review') print((first_thrashhold, second_thrashhold)) pros = model.wikidata_data_w | model.wikidata_data_wo print((len(pros))) name = None a = [] db = MySQLdb.connect(host="tools-db", db="s52709__kian_p", read_default_file="~/replica.my.cnf") cursor = db.cursor() insert_statement = ( "INSERT INTO kian " "(qid, model_name, wiki_name, property, value, status, prob) " "VALUES (%s, %s, %s, %s, %s, %s, %s)") with model.wiki_data_file as f: for line in f: line = line.replace('\n', '') if '\t' not in line: continue if name and name != line.split('\t')[0]: if name in pros: a = [] name = line.split('\t')[0] continue features = model.label_case(a) res = Kian.kian(model.theta, features)[0] if res > first_thrashhold and res < second_thrashhold: cursor.execute(insert_statement, (name, model.name, model.wiki, model.property_name, model.value, 0, str(res)[:7])) a = [] name = line.split('\t')[0] a.append(line.split('\t')[1]) db.commit() cursor.close() db.close()
def main(): args = sys.argv[1:] parser = argparse.ArgumentParser(description='Parse and add statements ' 'based on trained model') parser.add_argument('--name', '-n', nargs='?', required=True, help='name of the model to train') parsed_args = parser.parse_known_args(args)[0] model = TrainedModel.from_file(parsed_args.name) print('Loading the model') model.load_data() model.load() file_path = os.path.join(model.data_directory, 'res2.dat') if not os.path.isfile(file_path): raise ValueError('You should train the model first') with codecs.open(file_path, 'r', 'utf-8') as f: cv_set = eval(f.read()) first_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 1)[0] second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.125)[0] print('Finding possible adds to human review') print(first_thrashhold, second_thrashhold) pros = model.wikidata_data_w | model.wikidata_data_wo print(len(pros)) name = None a = [] db = MySQLdb.connect(host="tools-db", db="s52709__kian_p", read_default_file="~/replica.my.cnf") cursor = db.cursor() insert_statement = ( "INSERT INTO kian " "(qid, model_name, wiki_name, property, value, status, prob) " "VALUES (%s, %s, %s, %s, %s, %s, %s)") with model.wiki_data_file as f: for line in f: line = line.replace('\n', '') if u'\t' not in line: continue if name and name != line.split('\t')[0]: if name in pros: a = [] name = line.split('\t')[0] continue features = model.label_case(a) res = Kian.kian(model.theta, features)[0] if res > first_thrashhold and res < second_thrashhold: cursor.execute(insert_statement, (name, model.name, model.wiki, model.property_name, model.value, 0, str(res)[:7])) a = [] name = line.split('\t')[0] a.append(line.split('\t')[1]) db.commit() cursor.close() db.close()
def __init__(self, name): model = TrainedModel.from_file(name) model.load() file_path = os.path.join(model.data_directory, 'res2.dat') if not os.path.isfile(file_path): raise ValueError('You should train the model first') with open(file_path, 'r') as f: cv_set = eval(f.read()) self.thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.25)[0] self.model = model super(KianWorker, self).__init__('KianWorker:' + name, model.wiki)
def main(): args = sys.argv[1:] parser = argparse.ArgumentParser(description='Parse and add statements ' 'based on trained model') parser.add_argument('--name', '-n', nargs='?', required=True, help='name of the model to train') parsed_args = parser.parse_known_args(args)[0] model = TrainedModel.from_file(parsed_args.name) model.load() file_path = os.path.join(model.data_directory, 'res2.dat') if not os.path.isfile(file_path): raise ValueError('You should train the model first') with codecs.open(file_path, 'r', 'utf-8') as f: cv_set = eval(f.read()) second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.5)[0] site = pywikibot.site.APISite.fromDBName(model.wiki) repo = site.data_repository() repo.login() gen = recent_changes_gen(repo, model.wiki) for case in gen: page = pywikibot.Page(site, case) if page.namespace().id != 0: continue print(page) cats = [ cat.title(underscore=True, withNamespace=False) for cat in page.categories() ] features = model.label_case(cats) res = Kian.kian(model.theta, features)[0] if res > second_thrashhold: try: item = pywikibot.ItemPage.fromPage(page) except pywikibot.NoPage: continue if model.property_name in item.claims: continue claim = pywikibot.Claim(repo, model.property_name) claim.setTarget(pywikibot.ItemPage(repo, model.value)) item.addClaim(claim, summary='Bot: Adding %s:%s from %s ' '([[User:Ladsgroup/Kian|Powered by Kian]])' % (model.property_name, model.value, model.wiki)) source = pywikibot.Claim(repo, 'P143') source.setTarget(pywikibot.ItemPage(repo, sources[model.wiki])) claim.addSource(source)
def main(): res_text = "{| class=\"wikitable sortable\"\n!Qid!!Value<br />" \ "(Wikidata)!!Value<br />(Wipedia)!!Other boring stuff" args = sys.argv[1:] parser = argparse.ArgumentParser(description='Parse and add statements ' 'based on trained model') parser.add_argument('--name', '-n', nargs='?', required=True, help='name of the model to train') parsed_args = parser.parse_known_args(args)[0] model = TrainedModel.from_file(parsed_args.name) print('Loading the model') model.load_data() model.load() file_path = os.path.join(model.data_directory, 'res2.dat') if not os.path.isfile(file_path): raise ValueError('You should train the model first') with codecs.open(file_path, 'r', 'utf-8') as f: cv_set = eval(f.read()) second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.125)[0] print('Finding possible mistakes') print(second_thrashhold) name = None a = [] with model.wiki_data_file as f: for line in f: line = line.replace('\n', '') if '\t' not in line: continue if name and name != line.split('\t')[0]: features = model.label_case(a) res = Kian.kian(model.theta, features)[0] if name in model.wikidata_data_w and \ res < (1 - second_thrashhold): res_text += ("\n|-\n|[[%s]]||Yes||No (%s)||%s" % (name, res, features)) elif name in model.wikidata_data_wo and \ res > second_thrashhold: res_text += ("\n|-\n|[[%s]]||No||Yes (%s)||%s" % (name, res, features)) a = [] name = line.split('\t')[0] a.append(line.split('\t')[1]) page_title = "User:Ladsgroup/Kian/Possible mistakes/%s" % model.name page = pywikibot.Page(site, page_title) page.put(res_text + "\n|}", "Bot: Report")
def main(): args = sys.argv[1:] parser = argparse.ArgumentParser(description='Parse and add statements ' 'based on trained model') parser.add_argument('--name', '-n', nargs='?', required=True, help='name of the model to train') parsed_args = parser.parse_known_args(args)[0] model = TrainedModel.from_file(parsed_args.name) model.load() file_path = os.path.join(model.data_directory, 'res2.dat') if not os.path.isfile(file_path): raise ValueError('You should train the model first') with codecs.open(file_path, 'r', 'utf-8') as f: cv_set = eval(f.read()) second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.5)[0] site = pywikibot.site.APISite.fromDBName(model.wiki) repo = site.data_repository() repo.login() gen = recent_changes_gen(repo, model.wiki) for case in gen: page = pywikibot.Page(site, case) if page.namespace().id != 0: continue print(page) cats = [cat.title(underscore=True, withNamespace=False) for cat in page.categories()] features = model.label_case(cats) res = Kian.kian(model.theta, features)[0] if res > second_thrashhold: try: item = pywikibot.ItemPage.fromPage(page) except pywikibot.NoPage: continue if model.property_name in item.claims: continue claim = pywikibot.Claim(repo, model.property_name) claim.setTarget(pywikibot.ItemPage(repo, model.value)) item.addClaim(claim, summary='Bot: Adding %s:%s from %s ' '([[User:Ladsgroup/Kian|Powered by Kian]])' % (model.property_name, model.value, model.wiki)) source = pywikibot.Claim(repo, 'P143') source.setTarget(pywikibot.ItemPage(repo, sources[model.wiki])) claim.addSource(source)
def main(): args = sys.argv[1:] parser = argparse.ArgumentParser(description='Parse and add statements ' 'based on trained model') parser.add_argument('--name', '-n', nargs='?', required=True, help='name of the model to train') parsed_args = parser.parse_known_args(args)[0] model = TrainedModel.from_file(parsed_args.name) print('Loading the model') model.load_data() model.load() file_path = os.path.join(model.data_directory, 'res2.dat') if not os.path.isfile(file_path): raise ValueError('You should train the model first') with codecs.open(file_path, 'r', 'utf-8') as f: cv_set = eval(f.read()) first_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 1)[0] second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.125)[0] print('Finding possible adds to human review') print((first_thrashhold, second_thrashhold)) pros = model.wikidata_data_w | model.wikidata_data_wo print((len(pros))) name = None a = [] with model.wiki_data_file as f: for line in f: line = line.replace('\n', '') if '\t' not in line: continue if name and name != line.split('\t')[0]: if name in pros: a = [] name = line.split('\t')[0] continue features = model.label_case(a) res = Kian.kian(model.theta, features)[0] if res > first_thrashhold and res < second_thrashhold: print(('%s: %s' % (name, res))) a = [] name = line.split('\t')[0] a.append(line.split('\t')[1])
def main(): args = sys.argv[1:] parser = argparse.ArgumentParser(description='Parse and add statements ' 'based on trained model') parser.add_argument('--name', '-n', nargs='?', required=True, help='name of the model to train') parsed_args = parser.parse_known_args(args)[0] model = TrainedModel.from_file(parsed_args.name) print('Loading the model') model.load_data() model.load() file_path = os.path.join(model.data_directory, 'res2.dat') if not os.path.isfile(file_path): raise ValueError('You should train the model first') with codecs.open(file_path, 'r', 'utf-8') as f: cv_set = eval(f.read()) second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.125)[0] print('Finding possible mistakes') print(second_thrashhold) name = None a = [] p_number = int(model.property_name.lower().replace('p', '')) q_number = int(model.value.lower().replace('q', '')) with model.wiki_data_file as f: for line in f: line = line.replace('\n', '') if '\t' not in line: continue if name and name != line.split('\t')[0]: features = model.label_case(a) res = Kian.kian(model.theta, features)[0] if name in model.wikidata_data_w and \ res < (1 - second_thrashhold): store(name, res, model.wiki, p_number, q_number) elif name in model.wikidata_data_wo and \ res > second_thrashhold: store(name, res, model.wiki, p_number, q_number) a = [] name = line.split('\t')[0] a.append(line.split('\t')[1])
def main(): args = sys.argv[1:] parser = argparse.ArgumentParser(description='Parse and add statements ' 'based on trained model') parser.add_argument('--name', '-n', nargs='?', required=True, help='name of the model to train') parsed_args = parser.parse_known_args(args)[0] model = TrainedModel.from_file(parsed_args.name) print('Loading the model') model.load_data() model.load() file_path = os.path.join(model.data_directory, 'res2.dat') if not os.path.isfile(file_path): raise ValueError('You should train the model first') with codecs.open(file_path, 'r', 'utf-8') as f: cv_set = eval(f.read()) first_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 1)[0] second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.125)[0] print('Finding possible adds to human review') print(first_thrashhold, second_thrashhold) pros = model.wikidata_data_w | model.wikidata_data_wo print(len(pros)) name = None a = [] with model.wiki_data_file as f: for line in f: line = line.replace('\n', '') if u'\t' not in line: continue if name and name != line.split('\t')[0]: if name in pros: a = [] name = line.split('\t')[0] continue features = model.label_case(a) res = Kian.kian(model.theta, features)[0] if res > first_thrashhold and res < second_thrashhold: print('%s: %s' % (name, res)) a = [] name = line.split('\t')[0] a.append(line.split('\t')[1])
def main(): args = sys.argv[1:] parser = argparse.ArgumentParser(description='Parse and add statements ' 'based on trained model') parser.add_argument('--name', '-n', nargs='?', required=True, help='name of the model to train') parsed_args = parser.parse_known_args(args)[0] model = TrainedModel.from_file(parsed_args.name) print('Loading the model') model.load_data() model.load() file_path = os.path.join(model.data_directory, 'res2.dat') if not os.path.isfile(file_path): raise ValueError('You should train the model first') with codecs.open(file_path, 'r', 'utf-8') as f: cv_set = eval(f.read()) second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.125)[0] print('Finding possible mistakes') print(second_thrashhold) name = None a = [] p_number = int(model.property_name.lower().replace('p', '')) q_number = int(model.value.lower().replace('q', '')) with model.wiki_data_file as f: for line in f: line = line.replace('\n', '') if u'\t' not in line: continue if name and name != line.split('\t')[0]: features = model.label_case(a) res = Kian.kian(model.theta, features)[0] if name in model.wikidata_data_w and \ res < (1 - second_thrashhold): store(name, res, model.wiki, p_number, q_number) elif name in model.wikidata_data_wo and \ res > second_thrashhold: store(name, res, model.wiki, p_number, q_number) a = [] name = line.split('\t')[0] a.append(line.split('\t')[1])
def main(): args = sys.argv[1:] parser = argparse.ArgumentParser(description='Parse and add statements ' 'based on trained model') parser.add_argument('--name', '-n', nargs='?', required=True, help='name of the model to train') parsed_args = parser.parse_known_args(args)[0] model = TrainedModel.from_file(parsed_args.name) model.load() file_path = os.path.join(model.data_directory, 'res2.dat') if not os.path.isfile(file_path): raise ValueError('You should train the model first') with codecs.open(file_path, 'r', 'utf-8') as f: cv_set = eval(f.read()) first_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 1)[0] second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.5)[0] second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.25)[0] pywikibot.output('1st, 2nd, and 3rd thrashholds are: %s, %s, %s' % (first_thrashhold, second_thrashhold, second_thrashhold)) local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory() for arg in local_args: genFactory.handleArg(arg) generator = genFactory.getCombinedGenerator() for page in generator: cats = [ cat.title(underscore=True, withNamespace=False) for cat in page.categories() ] features = model.label_case(cats) res = Kian.kian(model.theta, features)[0] pywikibot.output(page.title(), res)
import codecs import os if __name__ == '__main__' and __package__ is None: from os import sys, path sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) import kian.fitness from kian import TrainedModel parser = argparse.ArgumentParser(description='Evaluate a trained model') parser.add_argument('--name', '-n', nargs='?', required=True, help='name of the model to train') args = parser.parse_args() model = TrainedModel.from_file(args.name) file_path = os.path.join(model.data_directory, 'res2.dat') if not os.path.isfile(file_path): raise ValueError('You should train the model first') with codecs.open(file_path, 'r', 'utf-8') as f: cv_set = eval(f.read()) AUC = kian.fitness.AUC(cv_set) first_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 1) second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.5) third_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.25) print('AUC of the classifier: {0:.4}'.format(AUC)) print('First thrashhold (recall and precision): %s (%s, %s)' % (first_thrashhold[0], first_thrashhold[1][0], first_thrashhold[1][1]))
def main(): args = sys.argv[1:] parser = argparse.ArgumentParser(description='Parse and add statements ' 'based on trained model') parser.add_argument('--name', '-n', nargs='?', required=True, help='name of the model to train') parsed_args = parser.parse_known_args(args)[0] model = TrainedModel.from_file(parsed_args.name) print('Loading the model') model.load_data() model.load() file_path = os.path.join(model.data_directory, 'res2.dat') if not os.path.isfile(file_path): raise ValueError('You should train the model first') with codecs.open(file_path, 'r', 'utf-8') as f: cv_set = eval(f.read()) second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.125)[0] print('Finding possible adds') print(second_thrashhold) pros = model.wikidata_data_w | model.wikidata_data_wo print(len(pros)) name = None a = [] with model.wiki_data_file as f: for line in f: line = line.replace('\n', '') if u'\t' not in line: continue if name and name != line.split('\t')[0]: if name in pros: a = [] name = line.split('\t')[0] continue features = model.label_case(a) res = Kian.kian(model.theta, features)[0] if res < second_thrashhold: a = [] continue print('Found something on %s, %s' % (name, res)) item = pywikibot.ItemPage(repo, name) try: item.get() except: a = [] name = line.split('\t')[0] continue if model.property_name in item.claims: a = [] continue print('Adding statements') claim = pywikibot.Claim(repo, model.property_name) claim.setTarget(pywikibot.ItemPage(repo, model.value)) summary = ('Bot: Adding %s:%s from %s ([[User:Ladsgroup' '/Kian|Powered by Kian]])' % (model.property_name, model.value, model.wiki)) try: item.addClaim(claim, summary=summary) source = pywikibot.Claim(repo, 'P143') source.setTarget( pywikibot.ItemPage(repo, sources[model.wiki])) claim.addSource(source) except pywikibot.data.api.APIError: continue a = [] name = line.split('\t')[0] a.append(line.split('\t')[1])
def main(): args = sys.argv[1:] parser = argparse.ArgumentParser(description='Parse and add statements ' 'based on trained model') parser.add_argument('--name', '-n', nargs='?', required=True, help='name of the model to train') parsed_args = parser.parse_known_args(args)[0] model = TrainedModel.from_file(parsed_args.name) print('Loading the model') model.load_data() model.load() file_path = os.path.join(model.data_directory, 'res2.dat') if not os.path.isfile(file_path): raise ValueError('You should train the model first') with codecs.open(file_path, 'r', 'utf-8') as f: cv_set = eval(f.read()) second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.125)[0] print('Finding possible adds') print(second_thrashhold) pros = model.wikidata_data_w | model.wikidata_data_wo print((len(pros))) name = None a = [] with model.wiki_data_file as f: for line in f: line = line.replace('\n', '') if '\t' not in line: continue if name and name != line.split('\t')[0]: if name in pros: a = [] name = line.split('\t')[0] continue features = model.label_case(a) res = Kian.kian(model.theta, features)[0] if res < second_thrashhold: a = [] continue print(('Found something on %s, %s' % (name, res))) item = pywikibot.ItemPage(repo, name) try: item.get() except: a = [] name = line.split('\t')[0] continue if model.property_name in item.claims: a = [] continue print('Adding statements') claim = pywikibot.Claim(repo, model.property_name) claim.setTarget(pywikibot.ItemPage(repo, model.value)) summary = ('Bot: Adding %s:%s from %s ([[User:Ladsgroup' '/Kian|Powered by Kian]])' % (model.property_name, model.value, model.wiki)) try: item.addClaim(claim, summary=summary) source = pywikibot.Claim(repo, 'P143') source.setTarget( pywikibot.ItemPage(repo, sources[model.wiki])) claim.addSource(source) except pywikibot.data.api.APIError: continue a = [] name = line.split('\t')[0] a.append(line.split('\t')[1])
import codecs import os if __name__ == '__main__' and __package__ is None: from os import sys, path sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) import kian.fitness from kian import TrainedModel parser = argparse.ArgumentParser(description='Evaluate a trained model') parser.add_argument('--name', '-n', nargs='?', required=True, help='name of the model to train') args = parser.parse_args() model = TrainedModel.from_file(args.name) file_path = os.path.join(model.data_directory, 'res2.dat') if not os.path.isfile(file_path): raise ValueError('You should train the model first') with codecs.open(file_path, 'r', 'utf-8') as f: cv_set = eval(f.read()) AUC = kian.fitness.AUC(cv_set) first_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 1) second_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.5) third_thrashhold = kian.fitness.optimum_thrashhold(cv_set, 0.25) print(('AUC of the classifier: {0:.4}'.format(AUC))) print(('First thrashhold (recall and precision): %s (%s, %s)' % (first_thrashhold[0], first_thrashhold[1][0], first_thrashhold[1][1])))