예제 #1
0
def process(options, trainCollection, modelAnnotationName, trainAnnotationName, feature):
    rootpath = options.rootpath
    modelName = options.model

    if 'fastlinear' == modelName:
        from fastlinear.fastlinear import fastlinear_load_model as load_model
        from fastlinear.fastlinear import fastlinear_save_model as save_model
    else:
        from fiksvm.fiksvm import fiksvm_load_model as load_model
        from fiksvm.fiksvm import fiksvm_save_model as save_model


    concepts = readConcepts(trainCollection, trainAnnotationName, rootpath)
    concepts = [concepts[i] for i in range(len(concepts)) if (i%options.numjobs + 1) == options.job]

    feat_file = BigFile(os.path.join(rootpath, trainCollection, "FeatureData", feature))

    for concept in concepts:
        modelfile = os.path.join(rootpath, trainCollection, 'Models', modelAnnotationName, feature, modelName, '%s.model' % concept)
        model = load_model(modelfile)
        (A0, B0) = model.get_probAB()
        if abs(A0) > 1e-8 and not options.overwrite:
            printStatus(INFO, "old parameters exist as A=%g, B=%g, skip" % (A0, B0))
            continue
        names,labels = readAnnotationsFrom(trainCollection, trainAnnotationName, concept, skip_0=True, rootpath=rootpath)
        name2label = dict(zip(names, labels))
        results = classify_large_data(model, names, feat_file, prob_output=False)
        labels = [name2label[x[0]] for x in results]
        dec_values = [x[1] for x in results]
        printStatus(INFO, "%s +%d -%d" % (concept, len([x for x in labels if x==1]), len([x for x in labels if x==-1])))
        [A,B] = sigmoid_train(dec_values, labels)
        model.set_probAB(A, B)
        save_model(modelfile, model)
        (A1, B1) = model.get_probAB()
        printStatus(INFO, "A: %g -> %g, B: %g -> %g" % (A0, A1, B0, B1))
예제 #2
0
def process(options, model_name, concept_file, weight_dir, result_dir):
    rootpath = options.rootpath
    overwrite = options.overwrite

    if 'fastlinear' == model_name:
        from fastlinear.fastlinear import fastlinear_load_model as load_model
        from fastlinear.fastlinear import fastlinear_save_model as save_model
    else:
        from fiksvm.fiksvm import fiksvm_load_model as load_model
        from fiksvm.fiksvm import fiksvm_save_model as save_model

    concepts = [
        x.strip() for x in open(concept_file).readlines()
        if x.strip() and not x.strip().startswith('#')
    ]
    todo = [
        x for x in concepts if overwrite
        or not os.path.exists(os.path.join(result_dir, '%s.model' % x))
    ]
    printStatus(INFO, '%d concepts to do' % len(todo))

    for concept in todo:
        weight_file = os.path.join(weight_dir, '%s.txt' % concept)
        weight_data = map(str.strip, open(weight_file).readlines())
        nr_of_models = len(weight_data)
        assert (nr_of_models >= 2)
        weights = [0] * nr_of_models
        models = [None] * nr_of_models

        for i, line in enumerate(weight_data):
            w, model_dir = line.split()
            weights[i] = float(w)
            model_dir = model_dir if model_dir.startswith(
                rootpath) else os.path.join(rootpath, model_dir)
            assert (model_dir.find(model_name) > 0)
            model_file_name = os.path.join(model_dir, '%s.model' % concept)
            models[i] = load_model(model_file_name)

        new_model = models[0]
        new_model.add_fastsvm(models[1], weights[0], weights[1])
        for i in range(2, len(models)):
            new_model.add_fastsvm(models[i], 1, weights[i])

        new_model_file = os.path.join(result_dir, '%s.model' % concept)
        makedirsforfile(new_model_file)
        save_model(new_model_file, new_model)
예제 #3
0
def process(options, model_name, concept_file, weight_dir, result_dir):
    rootpath = options.rootpath
    overwrite = options.overwrite

    if 'fastlinear' == model_name:
        from fastlinear.fastlinear import fastlinear_load_model as load_model
        from fastlinear.fastlinear import fastlinear_save_model as save_model
    else:
        from fiksvm.fiksvm import fiksvm_load_model as load_model
        from fiksvm.fiksvm import fiksvm_save_model as save_model


    concepts = [x.strip() for x in open(concept_file).readlines() if x.strip() and not x.strip().startswith('#')]
    todo = [x for x in concepts if overwrite or not os.path.exists(os.path.join(result_dir, '%s.model'%x))]
    printStatus(INFO, '%d concepts to do' % len(todo))

    for concept in todo:
        weight_file = os.path.join(weight_dir, '%s.txt' % concept)
        weight_data = map(str.strip, open(weight_file).readlines())
        nr_of_models = len(weight_data)
        assert(nr_of_models >= 2)
        weights = [0] * nr_of_models
        models = [None] * nr_of_models

        for i,line in enumerate(weight_data):
            w, model_dir = line.split()
            weights[i] = float(w)
            model_dir =  model_dir if model_dir.startswith(rootpath) else os.path.join(rootpath, model_dir)
            assert (model_dir.find(model_name)>0)
            model_file_name = os.path.join(model_dir, '%s.model' % concept)
            models[i] = load_model(model_file_name)

        new_model = models[0]
        new_model.add_fastsvm(models[1], weights[0], weights[1])
        for i in range(2, len(models)):
            new_model.add_fastsvm(models[i], 1, weights[i])    

        new_model_file = os.path.join(result_dir, '%s.model'%concept)
        makedirsforfile(new_model_file)
        save_model(new_model_file, new_model)