def feedback(pipeline,parameters,ultimateTraindf): # Read feedback data fbkdf = pd.read_csv("../../results/feedback.csv") # Extract incorrect predictions fbkdf = fbkdf.loc[fbkdf['check'] == False] # Combine with rest of the training data fbkdf = fbkdf.append(ultimateTraindf, ignore_index=True) # Create matrix for learning and prediction X, y = fbkdf['properIngredients'], fbkdf['cuisine'].as_matrix() # Split further into training and validation sets Xtrain, Xvalidate, ytrain, yValidate = train_test_split(X, y, train_size=0.7) # Initialize gridSearchClassifierCV Classifier with parameters gridSearchClassifier = cookut.getGridSearchCv(pipeline, parameters) # Fit the gridSearchClassifier on Training Set gridSearchClassifier.fit(Xtrain,ytrain) # Calculate best set of parameters and make predictions on validation set return validate(parameters, gridSearchClassifier, Xvalidate, yValidate)
def feedback(pipeline, parameters, ultimateTraindf): # Read feedback data fbkdf = pd.read_csv("../../results/feedback.csv") # Extract incorrect predictions fbkdf = fbkdf.loc[fbkdf['check'] == False] # Combine with rest of the training data fbkdf = fbkdf.append(ultimateTraindf, ignore_index=True) # Create matrix for learning and prediction X, y = fbkdf['properIngredients'], fbkdf['cuisine'].as_matrix() # Split further into training and validation sets Xtrain, Xvalidate, ytrain, yValidate = train_test_split(X, y, train_size=0.7) # Initialize gridSearchClassifierCV Classifier with parameters gridSearchClassifier = cookut.getGridSearchCv(pipeline, parameters) # Fit the gridSearchClassifier on Training Set gridSearchClassifier.fit(Xtrain, ytrain) # Calculate best set of parameters and make predictions on validation set return validate(parameters, gridSearchClassifier, Xvalidate, yValidate)
def getBestParameters(pipeline,parameters): # Load training data traindf = pd.read_json('../../data/train.json') # Remove everything but alphabets and then Lemmatize. Also remove extra whitespace traindf['properIngredients'] = [' '.join([WordNetLemmatizer().lemmatize(re.sub('[^A-Za-z]', ' ', line)) for line in lists]).strip() for lists in traindf['ingredients']] # Create learning matrix X, y = traindf['properIngredients'], traindf['cuisine'].as_matrix() # Split into Training and Validation Sets Xtrain, Xvalidate, ytrain, yValidate = train_test_split(X, y, train_size=0.7) # Initialize gridSearchClassifierCV Classifier with parameters gridSearchClassifier = cookut.getGridSearchCv(pipeline, parameters) # Fit/train the gridSearchClassifier on Training Set gridSearchClassifier.fit(Xtrain, ytrain) # Make predictions on validation set and calculate best set of parameters bestParameters,predictions=cookVal.validate(parameters, gridSearchClassifier, Xvalidate, yValidate) # Initialize DataFrame for feedback loop valdf = pd.DataFrame(index = Xvalidate.index.values) # Add ingredients column valdf=valdf.join(Xvalidate)