def trainModel(mtype, dataset): # first column of dataset contains ids # the next 10 columns contain the feature vector array = dataset.values I = array[:, 0] # if it is a supervised model the 2nd column is the expected result if (mtype > 3): R = array[:, 1] X = array[:2:] validation_size = 0.20 seed = 7 X_train, X_validation, Y_train, Y_validation = \ cross_validation.train_test_split(X, Y, test_size=validation_size, random_state=seed) num_folds = 10 num_instances = len(X_train) scoring = 'accuracy' else: X = array[:, 1:] if (mtype == 1): # do unsupervised Kmeans kmeans = KMeans(n_clusters=2, random_state=0).fit(X) data.updateResult(list(zip(kmeans.labels_, I))) model = kmeans elif (mtype == 3): # supervised NB nb = GaussianNB() nb.fit(X_train, Y_train) model = nb
def setTModel(mtype): (reviews, names) = data.getTrainData() dataset = pandas.DataFrame(data = reviews, columns=names) print ("num train samples " + str(dataset.shape)) # first column of dataset contains ids # the next 10 columns contain the feature vector array=dataset.values I = array[:,0] # in a supervised model the 2nd column is the expected result, else, # column where the result will be populated R = array[:,1] X = array[:,2:] validation_size = 0.20 seed = 7 X_train, X_validation, Y_train, Y_validation = \ cross_validation.train_test_split(X, R, test_size=validation_size, random_state=seed) num_folds = 10 num_instances = len(X_train) scoring = 'accuracy' if (mtype == 1): # do unsupervised Kmeans tmodel = KMeans(n_clusters = 2, random_state = 0).fit(X) data.updateResult(list(zip(tmodel.labels_, I))) data.commit() elif (mtype == 3): # supervised NB tmodel = GaussianNB() tmodel.fit(X_train, Y_train) else: tmodel = () return tmodel
def create_product(): if not request.get_json(force=True) or not 'asin' in request.get_json(force=True): abort(400) prod_id = request.get_json(force=True)['asin'] cat_id = request.get_json(force=True)['cid'] pname = request.get_json(force=True)['title'] url = request.get_json(force=True)['imurl'] price = request.get_json(force=True)['price'] rank = request.get_json(force=True)['rank'] if len(prod_id) == 0: abort(404) # need to check if product is in the product table, if not create it, else throw error if data.existProduct(prod_id): abort(405) product = {'pid': prod_id, 'cid': cat_id, 'pname': pname, 'imurl': url, 'price': price, 'rank': rank} data.insertProductFeatures(product) reviews = request.get_json(force=True)['reviews'] for r in reviews: ret = requests.post(sa_url, data=json.dumps({'review': r, 'product': product})) review = json.loads(ret.text) if data.existReview(prod_id, review['rid']): abort(405) # review with that prod_id and reviewer_id should not exist, but if they # do throw error print(review) data.commit() (uneval_reviews, names) = data.getUnevalReviews(prod_id) print('found some uneval reviews') if uneval_reviews is not None: print(json.dumps({'reviews': uneval_reviews, 'names': names})) r = requests.post(ml_url, data=json.dumps({'reviews': uneval_reviews, 'names': names})) result = json.loads(r.text)['result'] data.updateResult(result) product = data.getProductInfo(prod_id) return jsonify({'product': product[0]})
def get_product(prod_id): print "here?" (uneval_reviews, names) = data.getUnevalReviews(prod_id) if uneval_reviews is not None: print('found some uneval reviews') print(json.dumps({'reviews': uneval_reviews, 'names': names})) r = requests.post(ml_url, data=json.dumps({'reviews': uneval_reviews, 'names': names})) result = json.loads(r.text)['result'] data.updateResult(result) product = data.getProductInfo(prod_id) if len(product) == 0: abort(404) return jsonify({'product': product[0]})
I = array[:, 0] # in a supervised model the 2nd column is the expected result, else, # column where the result will be populated R = array[:, 1] X = array[:, 2:] #validation_size = 0.20 #seed = 7 #X_train, X_validation, Y_train, Y_validation = \ # cross_validation.train_test_split(X, R, test_size=validation_size, # random_state=seed) #num_folds = 10 #num_instances = len(X_train) #scoring = 'accuracy' kmeans = KMeans(n_clusters=2, random_state=1).fit(X) data.updateResult(zip(I, kmeans.labels_)) (reviews, names) = data.getUnTrainData() dataset = pandas.DataFrame(data=reviews, columns=names) print(dataset.shape) # first column of dataset contains ids # the next 10 columns contain the feature vector array = dataset.values I = array[:, 0] X = array[:, 2:] data.updateResult(zip(I, kmeans.predict(X))) (reviews, names) = data.getAllReviewFeatures() dataset = pandas.DataFrame(data=reviews, columns=names)