Ejemplo n.º 1
0
Archivo: spam.py Proyecto: g-ych/CS229
def compute_best_svm_radius(train_matrix, train_labels, val_matrix, val_labels,
                            radius_to_consider):
    """Compute the optimal SVM radius using the provided training and evaluation datasets.

    You should only consider radius values within the radius_to_consider list.
    You should use accuracy as a metric for comparing the different radius values.

    Args:
        train_matrix: The word counts for the training data
        train_labels: The spma or not spam labels for the training data
        val_matrix: The word counts for the validation data
        val_labels: The spam or not spam labels for the validation data
        radius_to_consider: The radius values to consider

    Returns:
        The best radius which maximizes SVM accuracy.
    """
    # *** START CODE HERE ***
    results = []
    for r in radius_to_consider:
        model = svm.svm_train(train_matrix, train_labels, r)
        pred = svm.svm_predict(model, val_matrix, r)
        accuracy = np.mean(pred == val_labels)
        print("Radius: {}, Accuracy: {}".format(r, accuracy))
        record = {"r": r, 'acc': accuracy}
        results.append(record)
    results.sort(key=lambda x: -x['acc'])
    return results[0]['r']
Ejemplo n.º 2
0
 def test_svm_predict(self):
     w, max_p, max_acc = svm.svm(lambda ll: ll == 1,
                                 self.train,
                                 self.valid,
                                 params=self.params)
     predict = svm.svm_predict(self.test[1], [(1, w)])
     self.assertGreaterEqual(sum(predict == self.test[0]), 460)
Ejemplo n.º 3
0
def predict(x_validation, config: EnsembleConfig, model_id=None):
    from decision_tree import decision_tree_predict
    from svm import svm_predict
    if config.classifier_mode == 'DTREE':
        return decision_tree_predict(x_validation, config, model_id)
    elif config.classifier_mode == 'SVM':
        return svm_predict(x_validation, config, model_id)
    return []
Ejemplo n.º 4
0
 def test_svm_multiclass(self):
     svms = svm.svm_multiclass(self.train, self.valid, params=self.params)
     preds = svm.svm_predict(self.test[1], svms)
     acc = mean(preds == self.test[0])
     print
     print "Your current accuracy is:", acc
     self.assertGreaterEqual(acc, .94)
     #TODO: uncomment and dump the trained model.
     data.dump_model(svms, "svms.p")
Ejemplo n.º 5
0
 def test_svm_multiclass(self):
     svms = svm.svm_multiclass(self.train, self.valid, params=self.params)
     preds = svm.svm_predict(self.test[1], svms)
     acc = mean(preds == self.test[0])
     print
     print "Your current accuracy is:", acc
     self.assertGreaterEqual(acc, .94)
     #TODO: uncomment and dump the trained model.
     data.dump_model(svms, "svms.p")
Ejemplo n.º 6
0
def get_prediction(test_data, svm):
    m = np.shape(test_data)[0]
    prediction = []
    for i in range(m):
        # 对每一个样本得到预测值
        predict = svm_predict(svm, test_data[i, :])
        # 得到最终的预测类别
        prediction.append(str(np.sign(predict)[0, 0]))
    return prediction
Ejemplo n.º 7
0
def predict(chartID = 'chart_ID', chart_type = 'line', chart_height = 350):
    if request.method == 'POST':
        listtime = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
        liststock = svm.svm_predict(15, 0, str(request.form['stockid']))
        liststock1 = MLP.mlp_predict(15, 0, str(request.form['stockid']))
        chart = {"renderTo": chartID, "type": chart_type, "height": chart_height,}
        series = [{"name": 'SVM', "data": liststock}, {"name": 'MLP', "data": liststock1}]
        # return '<h3>please log in firstly.</h3>'
        title = {"text": 'Price in future 15 days'}
        xAxis = {"categories": listtime}
        yAxis = {"title": {"text": 'yAxis Label'}}
        return render_template('predict.html', chartID=chartID, chart=chart, series=series, title=title, xAxis=xAxis,
                               yAxis=yAxis)
    return render_template('predict.html')
Ejemplo n.º 8
0
def get_prediction(test_data, svm):
    '''对样本进行预测
    input:  test_data(mat):测试数据
            svm:SVM模型
    output: prediction(list):预测所属的类别
    '''
    m = np.shape(test_data)[0]
    prediction = []
    for i in range(m):
        # 对每一个样本得到预测值
        predict = svm_predict(svm, test_data[i, :])
        # 得到最终的预测类别
        prediction.append(str(np.sign(predict)[0, 0]))
    return prediction
def get_prediction(test_data, svm):
    '''对样本进行预测
    input:  test_data(mat):测试数据
            svm:SVM模型
    output: prediction(list):预测所属的类别
    '''
    m = np.shape(test_data)[0]
    prediction = []
    for i in xrange(m):
        # 对每一个样本得到预测值
        predict = svm_predict(svm, test_data[i, :])
        # 得到最终的预测类别
        prediction.append(str(np.sign(predict)[0, 0]))
    return prediction
Ejemplo n.º 10
0
def get_results(training_set, validation_set, options, tipo_modelo):

  result_training = []
  result_validation = []

  if options.rl == True:
    print("Realizando regresion logistica")
  elif options.rn == True:
    print("Realizando redes neuronales")
    ##no hice esta parte porque hay que cambiar cosas desde atras y no queria cambiarle su codigo y luego cagar algo
  
  elif options.a == True:
    print("Realizando arbol de decision")
    
    #Obtenemos los atributos y el target, q van a variar dependiendo del tipo de corrida
    attributes = ["CANTON", "GENERO","EDAD","ZONA","DEPENDIENTE","CASA_ESTADO","CASA_HACINADA","ALFABETA", "ESCOLARIDAD", "EDUACION", "TRABAJADO", "ASEGURADO","EXTRANJERO", "DISCAPACITADO", "JEFE_HOGAR", "POBLACION","SUPERFICIE","DENSIDAD","V_OCUPADAS","OCUPANTES","VOTO1", "VOTO2"]
    if tipo_modelo == "1r":
      del attributes[-1]
    elif tipo_modelo == "2r":
      del attributes[-2]
    target = attributes[-1]

    #Generamos el arbol
    tree = decisionTree.crearArbol(training_set, attributes, target)
    
    #Realizamos la poda
    decisionTree.pruneTree(tree, float(options.up))
   
    
    #Realizamos las predicciones con el training set
    for example in training_set:
      example_dic = {}
      for i in range(len(example)):
        example_dic[attributes[i]] = example[i]

      newResult = decisionTree.decisionTreePredict(tree, example_dic)
      result_training.append(newResult)

    #Realizamos las predicciones con el validation set
    for example in validation_set:
      example_dic = {}
      for i in range(len(example)):
        example_dic[attributes[i]] = example[i]
      newResult = decisionTree.decisionTreePredict(tree, example_dic)
      result_validation.append(newResult)

  elif options.knn == True:
    print("Realizando k nearest neighbors")
    
    training_set_copia = copy.deepcopy(training_set)
    
    #Se agrega un identificador unico a cada ejemplo
    for i in range(len(training_set_copia)):
      training_set_copia[i].append(i)
    
    kd_tree = kd_trees.construir_kd_tree(training_set_copia,0,len(training_set_copia[0]) - 2) #Se le resta 2, ya que el target y el identificador no deben ser tomados como dimensiones
    
   
    
    
    for example in training_set:
      
      example_copia = example[:]
      del example_copia[-1]
      
      

      newResult = kd_trees.kd_predict(kd_tree, example_copia, 0, len(example), int(options.k))
      result_training.append(newResult)
    
    #Realizamos las predicciones con el validation set
    for example in validation_set:
      
      example_copia = example[:]
      del example_copia[-1]
      
      newResult = kd_trees.kd_predict(kd_tree, example_copia, 0, len(example), int(options.k))
      result_validation.append(newResult)
    


  elif options.svm == True:
    print("Realizando SVM")

    #Obtenemos las respuestas del training set
    
    respuestas = get_real_results(training_set)
    
    training_set_x = separarXY(copy.deepcopy(training_set))
    
    
    
    if tipo_modelo == "1r":
      modelo = svm.generate_svm_model(training_set_x, respuestas, 'ovo', options.kernel)
    else:
      modelo = svm.generate_svm_model(training_set_x, respuestas, 'ovr', options.kernel)
    
    for example in training_set_x:

      newResult = svm.svm_predict(example, modelo)
      result_training.append(newResult)

    #Realizamos las predicciones con el validation set
    for example in validation_set:
      example_copy = example[:]
      del example_copy[-1]
      newResult = svm.svm_predict(example_copy, modelo)
      result_validation.append(newResult)

  return result_training, result_validation
Ejemplo n.º 11
0
from datetime import datetime
import json
import svm
import pandas as pd
import datetime
import calendar
from flask import jsonify

data = []
predicted_data = svm.svm_predict(30, 0, 'AAPL')
for x in predicted_data:
    data.append(round(x,3))
utc = []

today = datetime.date.today()
thirtyday = datetime.timedelta(days=29)

daterange = pd.date_range(today, today + thirtyday)
for single_date in daterange:
    utc.append(calendar.timegm(single_date.timetuple()))

json_data=[]

for x in range(1,31):
    json_data.append([utc[x-1],data[x-1]])
# print json.dumps(json_data, separators=(',',','))

b = '''(
[1461110400000,107.13],
[1461196800000,105.97],
[1461283200000,105.68],
Ejemplo n.º 12
0
 def test_svm_predict(self):
     w, max_p, max_acc = svm.svm(lambda ll : ll == 1, self.train, self.valid, params=self.params)
     predict = svm.svm_predict(self.test[1], [(1, w)])
     self.assertGreaterEqual(sum(predict == self.test[0]), 460)
	def predict(self,x):
		data = _convert_to_svm_node_array(x)
		ret = svmc.svm_predict(self.model,data)
		svmc.svm_node_array_destroy(data)
		return ret