Beispiel #1
0
def train(data, path):
    if os.path.exists(path):
        print 'Model path exists, do nothing.'
        return
    print "Loading features."
    integerizer = tools.integerization.CIntegerization()
    labels = []
    samples = []
    for sent in io.getsent(data):
        for index in range(len(sent)):
            f = feature.extractFeatures(sent, index, integerizer)
            x = int(sent[index][2])
            assert x == 0 or x == 1
            if x == 0: x = -1
            labels.append(x)
            samples.append(f)
    print "Training SVM."
    problem = svm.svm_problem(labels, samples)
    param = svm.svm_parameter()
    param.svm_type = svm.C_SVC
    param.kernel_type = svm.LINEAR  #
    param.C = 1
    #param.degree=2
    param.eps = 1.0
    param.probability = 1
    param.cache_size = 1000
    param.shrinking = 0
    model = svmutil.svm_train(problem, param)
    print "Saving model."
    os.mkdir(path)
    svmutil.svm_save_model(os.path.join(path, "scr"), model)
    integerizer.write(os.path.join(path, "int"))
Beispiel #2
0
def predict(data, path):
   if not os.path.exists(path):
      print 'Model path does not exist, do nothing.'
      return
   print "Loading features."
   integerizer = tools.integerization.CIntegerization.read(os.path.join(path, 'int'))
   model = svmutil.svm_load_model(os.path.join(path, "scr"))  
   print "Working."
   for sent in io.getsent(data):
      labels = []
      for index in range(len(sent)):
         f = feature.extractFeatures(sent, index, integerizer)
         nodes, x = svmutil.gen_svm_nodearray(f)
         probabilities = (ctypes.c_double*2)()
         label = svm.libsvm.svm_predict_probability(model, nodes, probabilities)
         x = 1
         if probabilities[0] > cutoff:
            x = 0
         labels.append(str(x))
      print ' '.join(labels)
Beispiel #3
0
def predict(data, path):
    if not os.path.exists(path):
        print 'Model path does not exist, do nothing.'
        return
    print "Loading features."
    integerizer = tools.integerization.CIntegerization.read(
        os.path.join(path, 'int'))
    model = svmutil.svm_load_model(os.path.join(path, "scr"))
    print "Working."
    for sent in io.getsent(data):
        labels = []
        for index in range(len(sent)):
            f = feature.extractFeatures(sent, index, integerizer)
            nodes, x = svmutil.gen_svm_nodearray(f)
            probabilities = (ctypes.c_double * 2)()
            label = svm.libsvm.svm_predict_probability(model, nodes,
                                                       probabilities)
            x = 1
            if probabilities[0] > cutoff:
                x = 0
            labels.append(str(x))
        print ' '.join(labels)
Beispiel #4
0
    roles = np.argmax(g, axis=1)
    print(g)
    print(roles)
    for i in range(len(roles)):
        role = roles[i]
        color.AddDat(i, roleToColor[role])
    DrawGViz(graph, 0, imgName + '.png', 'Dot', True, color)

if __name__ == '__main__':
    # read graph
    fileName = "dataset/facebook_combined_small.txt"
    #fileName = "dataset/Email-Enron.txt"
    graph = LoadEdgeList(PUNGraph, fileName, 0, 1)

    # feature extraction
    v = feature.extractFeatures(graph)

    # convert the node feature matrix to numpy matrix
    v = toNumpyMatrix(v)

    # pick number of roles with minimum error L = M + E
    minError = sys.float_info.max
    for r in range(2, 3):
        g, f = factorization.nonNegativeFactorization(v, r)
        error = computeDescriptionLength(v, g, f)
        plotRole(graph, g, str(r) + '_roles')
        if error < minError:
            minError = error
            finalG, finalF = g, f
            numRoles = r
    print 'using ' + str(numRoles) + ' roles'