コード例 #1
0
def trainModel(mtype, dataset):
    # first column of dataset contains ids
    # the next 10 columns contain the feature vector
    array = dataset.values
    I = array[:, 0]
    # if it is a supervised model the 2nd column is the expected result
    if (mtype > 3):
        R = array[:, 1]
        X = array[:2:]
        validation_size = 0.20
        seed = 7
        X_train, X_validation, Y_train, Y_validation =  \
        cross_validation.train_test_split(X, Y, test_size=validation_size,
                                          random_state=seed)
        num_folds = 10
        num_instances = len(X_train)
        scoring = 'accuracy'
    else:
        X = array[:, 1:]

    if (mtype == 1):  # do unsupervised Kmeans
        kmeans = KMeans(n_clusters=2, random_state=0).fit(X)
        data.updateResult(list(zip(kmeans.labels_, I)))
        model = kmeans
    elif (mtype == 3):  # supervised NB
        nb = GaussianNB()
        nb.fit(X_train, Y_train)
        model = nb
コード例 #2
0
def setTModel(mtype):
  (reviews, names) = data.getTrainData()
  dataset = pandas.DataFrame(data = reviews, columns=names)
  print ("num train samples " + str(dataset.shape))

  # first column of dataset contains ids
  # the next 10 columns contain the feature vector
  array=dataset.values
  I = array[:,0]
  # in a supervised model the 2nd column is the expected result, else,
  # column where the result will be populated
  R = array[:,1]
  X = array[:,2:]
  validation_size = 0.20
  seed = 7
  X_train, X_validation, Y_train, Y_validation =  \
           cross_validation.train_test_split(X, R, test_size=validation_size,
                                      random_state=seed)
  num_folds = 10
  num_instances = len(X_train)
  scoring = 'accuracy'
  
  if (mtype == 1): # do unsupervised Kmeans
    tmodel = KMeans(n_clusters = 2, random_state = 0).fit(X)
    data.updateResult(list(zip(tmodel.labels_, I)))
    data.commit()
  elif (mtype == 3): # supervised NB
    tmodel = GaussianNB()
    tmodel.fit(X_train, Y_train)

  else:
    tmodel = ()
    
  return tmodel
コード例 #3
0
def create_product():
    if not request.get_json(force=True) or not 'asin' in request.get_json(force=True):
        abort(400)
    prod_id = request.get_json(force=True)['asin']
    cat_id = request.get_json(force=True)['cid']
    pname = request.get_json(force=True)['title']
    url = request.get_json(force=True)['imurl']
    price = request.get_json(force=True)['price']
    rank = request.get_json(force=True)['rank']
    if len(prod_id) == 0:
        abort(404)
    # need to check if product is in the product table, if not create it, else throw error 
    if data.existProduct(prod_id):
        abort(405)
    product = {'pid': prod_id, 'cid': cat_id, 'pname': pname, 'imurl': url, 'price': price, 'rank': rank}
    data.insertProductFeatures(product)
    reviews = request.get_json(force=True)['reviews']
    for r in reviews:
      ret = requests.post(sa_url, data=json.dumps({'review': r, 'product': product}))
      review = json.loads(ret.text)
      if data.existReview(prod_id, review['rid']):
          abort(405)
      # review with that prod_id and reviewer_id should not exist, but if they
      # do throw error
      print(review)    
    data.commit()
    (uneval_reviews, names) = data.getUnevalReviews(prod_id)
    print('found some uneval reviews')
    if uneval_reviews is not None:
      print(json.dumps({'reviews': uneval_reviews, 'names': names}))
      r = requests.post(ml_url, data=json.dumps({'reviews': uneval_reviews, 'names': names}))
      result = json.loads(r.text)['result']
      data.updateResult(result)
    product = data.getProductInfo(prod_id)
    return jsonify({'product': product[0]})
コード例 #4
0
def get_product(prod_id):
    print "here?"
    (uneval_reviews, names) = data.getUnevalReviews(prod_id)
    if uneval_reviews is not None:
      print('found some uneval reviews')
      print(json.dumps({'reviews': uneval_reviews, 'names': names}))
      r = requests.post(ml_url, data=json.dumps({'reviews': uneval_reviews, 'names': names}))
      result = json.loads(r.text)['result']
      data.updateResult(result)

    product = data.getProductInfo(prod_id)
    if len(product) == 0:
        abort(404)

    return jsonify({'product': product[0]})
コード例 #5
0
I = array[:, 0]
# in a supervised model the 2nd column is the expected result, else,
# column where the result will be populated
R = array[:, 1]
X = array[:, 2:]
#validation_size = 0.20
#seed = 7
#X_train, X_validation, Y_train, Y_validation =  \
#           cross_validation.train_test_split(X, R, test_size=validation_size,
#                                      random_state=seed)
#num_folds = 10
#num_instances = len(X_train)
#scoring = 'accuracy'

kmeans = KMeans(n_clusters=2, random_state=1).fit(X)
data.updateResult(zip(I, kmeans.labels_))

(reviews, names) = data.getUnTrainData()
dataset = pandas.DataFrame(data=reviews, columns=names)
print(dataset.shape)

# first column of dataset contains ids
# the next 10 columns contain the feature vector
array = dataset.values
I = array[:, 0]
X = array[:, 2:]
data.updateResult(zip(I, kmeans.predict(X)))

(reviews, names) = data.getAllReviewFeatures()
dataset = pandas.DataFrame(data=reviews, columns=names)