num_training_distinct = 16000 numIterations = 100 numTrainingPairs = 30 import time t0 = time.time() data_d, data_model, header = init(inputFile) print "importing data ..." if os.path.exists('learned_settings.json') : data_model, predicates = core.readSettings('learned_settings.json') else: #lets do some active learning here training_data, training_pairs, data_model = activeLearning(data_d, data_model, consoleLabel, numTrainingPairs) predicates = trainBlocking(training_pairs, (wholeFieldPredicate, tokenFieldPredicate, commonIntegerPredicate, sameThreeCharStartPredicate, sameFiveCharStartPredicate, sameSevenCharStartPredicate, nearIntegersPredicate, commonFourGram, commonSixGram), data_model, 1, 1) core.writeSettings('learned_settings.json', data_model,
num_training_distinct = 16000 numIterations = 100 numTrainingPairs = 30 import time t0 = time.time() data_d, data_model, header = init(inputFile) print "importing data ..." if os.path.exists('learned_settings.json') : data_model, predicates = core.readSettings('learned_settings.json') else: #lets do some active learning here training_data, training_pairs, data_model = activeLearning(sampleDict(data_d, 700), data_model, consoleLabel, numTrainingPairs) predicates = trainBlocking(training_pairs, (wholeFieldPredicate, tokenFieldPredicate, commonIntegerPredicate, sameThreeCharStartPredicate, sameFiveCharStartPredicate, sameSevenCharStartPredicate, nearIntegersPredicate, commonFourGram, commonSixGram), data_model, 1, 1) core.writeSettings('learned_settings.json', data_model,