def hiksvm_train(labels, features, beta): # calculate class prior np = len([1 for lab in labels if 1 == lab]) nn = len([1 for lab in labels if -1 == lab]) wp = float(beta)/np wn = (1.0-beta)/nn wp *= (np+nn) wn *= (np+nn) parameters = "-s 0 -c 1 -t %d -w-1 %g -w1 %g" % (KERNEL_TYPE.index("HI"), wn, wp) model = svm_train(labels, features, parameters) return model
def hiksvm_train(labels, features, beta): # calculate class prior np = len([1 for lab in labels if 1 == lab]) nn = len([1 for lab in labels if -1 == lab]) wp = float(beta) / np wn = (1.0 - beta) / nn wp *= (np + nn) wn *= (np + nn) parameters = "-s 0 -c 1 -t %d -w-1 %g -w1 %g" % (KERNEL_TYPE.index("HI"), wn, wp) model = svm_train(labels, features, parameters) return model
def process(options, trainCollection, trainAnnotationName, feature): import re p = re.compile( r'best_C=(?P<C>[\.\d]+),\sa=(?P<a>[\.\-\d]+),\sb=(?P<b>[\.\-\d]+)') rootpath = options.rootpath overwrite = options.overwrite #autoweight = options.autoweight numjobs = options.numjobs job = options.job nr_bins = options.nr_bins best_param_dir = options.best_param_dir beta = 0.5 modelName = 'fik%d' % nr_bins if best_param_dir: modelName += '-tuned' concepts = readConcepts(trainCollection, trainAnnotationName, rootpath=rootpath) resultdir = os.path.join(rootpath, trainCollection, 'Models', trainAnnotationName, feature, modelName) todo = [] for concept in concepts: resultfile = os.path.join(resultdir, concept + '.model') if not checkToSkip(resultfile, overwrite): todo.append(concept) todo = [todo[i] for i in range(len(todo)) if i % numjobs == (job - 1)] printStatus(INFO, 'to process %d concepts: %s' % (len(todo), ' '.join(todo))) if not todo: return 0 feat_dir = os.path.join(rootpath, trainCollection, 'FeatureData', feature) feat_file = BigFile(feat_dir) params = {'nr_bins': nr_bins} with open(os.path.join(feat_dir, 'minmax.txt'), 'r') as f: params['min_vals'] = map(float, str.split(f.readline())) params['max_vals'] = map(float, str.split(f.readline())) for concept in todo: if best_param_dir: param_file = os.path.join(best_param_dir, '%s.txt' % concept) m = p.search(open(param_file).readline().strip()) C = float(m.group('C')) A = float(m.group('a')) B = float(m.group('b')) else: C = 1 A = 0 B = 0 printStatus(INFO, '%s, C=%g, A=%g, B=%g' % (concept, C, A, B)) model_file_name = os.path.join(resultdir, concept + '.model') names, labels = readAnnotationsFrom(trainCollection, trainAnnotationName, concept, skip_0=True, rootpath=rootpath) name2label = dict(zip(names, labels)) renamed, vectors = feat_file.read(names) y = [name2label[x] for x in renamed] np = len([1 for lab in labels if 1 == lab]) nn = len([1 for lab in labels if -1 == lab]) wp = float(beta) * (np + nn) / np wn = (1.0 - beta) * (np + nn) / nn svm_params = '-w1 %g -w-1 %g' % (wp * C, wn * C) model = svm_train( y, vectors, svm_params + ' -s 0 -t %d -q' % KERNEL_TYPE.index("HI")) newmodel = svm_to_fiksvm([model], [1.0], feat_file.ndims, params) newmodel.set_probAB(A, B) makedirsforfile(model_file_name) printStatus(INFO, '-> %s' % model_file_name) fiksvm_save_model(model_file_name, newmodel) # reload the model file to do a simple check fiksvm_load_model(model_file_name) assert (abs(newmodel.get_probAB()[0] - A) < 1e-6) assert (abs(newmodel.get_probAB()[1] - B) < 1e-6) return len(todo)
def process(options, trainCollection, trainAnnotationName, feature): import re p = re.compile(r'best_C=(?P<C>[\.\d]+),\sa=(?P<a>[\.\-\d]+),\sb=(?P<b>[\.\-\d]+)') rootpath = options.rootpath overwrite = options.overwrite #autoweight = options.autoweight numjobs = options.numjobs job = options.job nr_bins = options.nr_bins best_param_dir = options.best_param_dir beta = 0.5 modelName = 'fik%d' % nr_bins if best_param_dir: modelName += '-tuned' concepts = readConcepts(trainCollection, trainAnnotationName, rootpath=rootpath) resultdir = os.path.join(rootpath, trainCollection, 'Models', trainAnnotationName, feature, modelName) todo = [] for concept in concepts: resultfile = os.path.join(resultdir, concept + '.model') if not checkToSkip(resultfile, overwrite): todo.append(concept) todo = [todo[i] for i in range(len(todo)) if i%numjobs==(job-1)] printStatus(INFO, 'to process %d concepts: %s' % (len(todo), ' '.join(todo))) if not todo: return 0 feat_dir = os.path.join(rootpath,trainCollection,'FeatureData',feature) feat_file = BigFile(feat_dir) params = {'nr_bins': nr_bins} with open(os.path.join(feat_dir, 'minmax.txt'), 'r') as f: params['min_vals'] = map(float, str.split(f.readline())) params['max_vals'] = map(float, str.split(f.readline())) for concept in todo: if best_param_dir: param_file = os.path.join(best_param_dir, '%s.txt' % concept) m = p.search(open(param_file).readline().strip()) C = float(m.group('C')) A = float(m.group('a')) B = float(m.group('b')) else: C = 1 A = 0 B = 0 printStatus(INFO, '%s, C=%g, A=%g, B=%g' % (concept, C, A, B)) model_file_name = os.path.join(resultdir, concept + '.model') names,labels = readAnnotationsFrom(trainCollection, trainAnnotationName, concept, skip_0=True, rootpath=rootpath) name2label = dict(zip(names,labels)) renamed,vectors = feat_file.read(names) y = [name2label[x] for x in renamed] np = len([1 for lab in labels if 1 == lab]) nn = len([1 for lab in labels if -1== lab]) wp = float(beta) * (np+nn) / np wn = (1.0-beta) * (np+nn) /nn svm_params = '-w1 %g -w-1 %g' % (wp*C, wn*C) model = svm_train(y, vectors, svm_params + ' -s 0 -t %d -q' % KERNEL_TYPE.index("HI")) newmodel = svm_to_fiksvm([model], [1.0], feat_file.ndims, params) newmodel.set_probAB(A, B) makedirsforfile(model_file_name) printStatus(INFO, '-> %s'%model_file_name) fiksvm_save_model(model_file_name, newmodel) # reload the model file to do a simple check fiksvm_load_model(model_file_name) assert(abs(newmodel.get_probAB()[0]-A)<1e-6) assert(abs(newmodel.get_probAB()[1]-B)<1e-6) return len(todo)