def feedback(pipeline,parameters,ultimateTraindf):
	# Read feedback data
    fbkdf = pd.read_csv("../../results/feedback.csv")
    # Extract incorrect predictions
    fbkdf = fbkdf.loc[fbkdf['check'] == False]
	# Combine with rest of the training data
    fbkdf = fbkdf.append(ultimateTraindf, ignore_index=True)
    # Create matrix for learning and prediction
    X, y = fbkdf['properIngredients'], fbkdf['cuisine'].as_matrix()
    # Split further into training and validation sets
    Xtrain, Xvalidate, ytrain, yValidate = train_test_split(X, y, train_size=0.7)
    # Initialize gridSearchClassifierCV Classifier with parameters
    gridSearchClassifier = cookut.getGridSearchCv(pipeline, parameters)
    # Fit the gridSearchClassifier on Training Set
    gridSearchClassifier.fit(Xtrain,ytrain)
    # Calculate best set of parameters and make predictions on validation set
    return validate(parameters, gridSearchClassifier, Xvalidate, yValidate)
Exemple #2
0
def feedback(pipeline, parameters, ultimateTraindf):
    # Read feedback data
    fbkdf = pd.read_csv("../../results/feedback.csv")
    # Extract incorrect predictions
    fbkdf = fbkdf.loc[fbkdf['check'] == False]
    # Combine with rest of the training data
    fbkdf = fbkdf.append(ultimateTraindf, ignore_index=True)
    # Create matrix for learning and prediction
    X, y = fbkdf['properIngredients'], fbkdf['cuisine'].as_matrix()
    # Split further into training and validation sets
    Xtrain, Xvalidate, ytrain, yValidate = train_test_split(X,
                                                            y,
                                                            train_size=0.7)
    # Initialize gridSearchClassifierCV Classifier with parameters
    gridSearchClassifier = cookut.getGridSearchCv(pipeline, parameters)
    # Fit the gridSearchClassifier on Training Set
    gridSearchClassifier.fit(Xtrain, ytrain)
    # Calculate best set of parameters and make predictions on validation set
    return validate(parameters, gridSearchClassifier, Xvalidate, yValidate)
def getBestParameters(pipeline,parameters):

    # Load training data
    traindf = pd.read_json('../../data/train.json')
    # Remove everything but alphabets and then Lemmatize. Also remove extra whitespace
    traindf['properIngredients'] = [' '.join([WordNetLemmatizer().lemmatize(re.sub('[^A-Za-z]', ' ', line)) for line in lists]).strip() for lists in traindf['ingredients']]       
    # Create learning matrix
    X, y = traindf['properIngredients'], traindf['cuisine'].as_matrix()
    # Split into Training and Validation Sets
    Xtrain, Xvalidate, ytrain, yValidate = train_test_split(X, y, train_size=0.7)
    # Initialize gridSearchClassifierCV Classifier with parameters
    gridSearchClassifier = cookut.getGridSearchCv(pipeline, parameters)
    # Fit/train the gridSearchClassifier on Training Set
    gridSearchClassifier.fit(Xtrain, ytrain)
    # Make predictions on validation set and calculate best set of parameters  
    bestParameters,predictions=cookVal.validate(parameters, gridSearchClassifier, Xvalidate, yValidate)
    # Initialize DataFrame for feedback loop
    valdf = pd.DataFrame(index = Xvalidate.index.values)
    # Add ingredients column
	valdf=valdf.join(Xvalidate)