def train(data, path): if os.path.exists(path): print 'Model path exists, do nothing.' return print "Loading features." integerizer = tools.integerization.CIntegerization() labels = [] samples = [] for sent in io.getsent(data): for index in range(len(sent)): f = feature.extractFeatures(sent, index, integerizer) x = int(sent[index][2]) assert x == 0 or x == 1 if x == 0: x = -1 labels.append(x) samples.append(f) print "Training SVM." problem = svm.svm_problem(labels, samples) param = svm.svm_parameter() param.svm_type = svm.C_SVC param.kernel_type = svm.LINEAR # param.C = 1 #param.degree=2 param.eps = 1.0 param.probability = 1 param.cache_size = 1000 param.shrinking = 0 model = svmutil.svm_train(problem, param) print "Saving model." os.mkdir(path) svmutil.svm_save_model(os.path.join(path, "scr"), model) integerizer.write(os.path.join(path, "int"))
def predict(data, path): if not os.path.exists(path): print 'Model path does not exist, do nothing.' return print "Loading features." integerizer = tools.integerization.CIntegerization.read(os.path.join(path, 'int')) model = svmutil.svm_load_model(os.path.join(path, "scr")) print "Working." for sent in io.getsent(data): labels = [] for index in range(len(sent)): f = feature.extractFeatures(sent, index, integerizer) nodes, x = svmutil.gen_svm_nodearray(f) probabilities = (ctypes.c_double*2)() label = svm.libsvm.svm_predict_probability(model, nodes, probabilities) x = 1 if probabilities[0] > cutoff: x = 0 labels.append(str(x)) print ' '.join(labels)
def predict(data, path): if not os.path.exists(path): print 'Model path does not exist, do nothing.' return print "Loading features." integerizer = tools.integerization.CIntegerization.read( os.path.join(path, 'int')) model = svmutil.svm_load_model(os.path.join(path, "scr")) print "Working." for sent in io.getsent(data): labels = [] for index in range(len(sent)): f = feature.extractFeatures(sent, index, integerizer) nodes, x = svmutil.gen_svm_nodearray(f) probabilities = (ctypes.c_double * 2)() label = svm.libsvm.svm_predict_probability(model, nodes, probabilities) x = 1 if probabilities[0] > cutoff: x = 0 labels.append(str(x)) print ' '.join(labels)
roles = np.argmax(g, axis=1) print(g) print(roles) for i in range(len(roles)): role = roles[i] color.AddDat(i, roleToColor[role]) DrawGViz(graph, 0, imgName + '.png', 'Dot', True, color) if __name__ == '__main__': # read graph fileName = "dataset/facebook_combined_small.txt" #fileName = "dataset/Email-Enron.txt" graph = LoadEdgeList(PUNGraph, fileName, 0, 1) # feature extraction v = feature.extractFeatures(graph) # convert the node feature matrix to numpy matrix v = toNumpyMatrix(v) # pick number of roles with minimum error L = M + E minError = sys.float_info.max for r in range(2, 3): g, f = factorization.nonNegativeFactorization(v, r) error = computeDescriptionLength(v, g, f) plotRole(graph, g, str(r) + '_roles') if error < minError: minError = error finalG, finalF = g, f numRoles = r print 'using ' + str(numRoles) + ' roles'