def check_hybrid_performance(file_name, column_review, column_rating, size, start = 0):
    dataset = read_json('main/files/'+file_name, lines=True)
    dataset = dataset.sample(frac=1).reset_index(drop=True)
    dataset = dataset[start:start + size]
    reviews = dataset[column_review].tolist()
    actual_rating = dataset[column_rating].tolist()
    prediction_rating = list_ratings_attributes(reviews)
    
    from sklearn.metrics import accuracy_score
    accuracy = accuracy_score(actual_rating,prediction_rating)
    
    precision, recall, fmeasure = accuracy_macro(actual_rating, prediction_rating)
    cm = confusion_matrix(actual_rating, prediction_rating)
    return accuracy, precision, recall, fmeasure, cm
def make_model(file_name, column_review, column_rating, json_balanced=False, have_corpus=False, size=5000, cv_vectors = 3000):
    
    # Making a json file with balanced ratings
    if json_balanced == False:
        make_balance_json("files/"+file_name, column_review, column_rating,"files/uniform_json.json",size/5)
    dataset = read_json('files/uniform_json.json', lines= True)
    
    # Making corpus, in case corpus doesn't exists
    if have_corpus == False:
        corpus = basic.preprocess_lemm_dataset(dataset,'review')
        process_corpus.write_corpus(corpus)
    
    # If corpus exists, read it directly 
    else:
        corpus =[]
        corpus = process_corpus.read_corpus()
        corpus = corpus[:size] 

    # Performing count vectorization over X
    X = bagging_x.get_X_vector(corpus, cv_vectors)
    y = dataset.iloc[:size, 0]

    # Encoding y data into diffeerent categorical columns
    labelencoder_y = LabelEncoder()
    y = labelencoder_y.fit_transform(y)
    y = y.reshape(len(y),1)
    onehotencoder = OneHotEncoder(categorical_features = [0])
    y = onehotencoder.fit_transform(y).toarray()

    # Splitting the dataset into the Training set and Test set
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)
    
    # Initialising the ANN
    classifier = Sequential()
    
    # Adding the input layer and the first hidden layer
    classifier.add(Dense(units = int(cv_vectors/4), kernel_initializer = 'uniform', activation = 'relu', input_dim = cv_vectors))
    
    # Adding the second and third hidden layers
    classifier.add(Dense(units = int(cv_vectors/4), kernel_initializer = 'uniform', activation = 'relu'))
    classifier.add(Dense(units = int(cv_vectors/4), kernel_initializer = 'uniform', activation = 'relu'))
    
    # Adding the output layer
    classifier.add(Dense(units = 5, kernel_initializer = 'uniform', activation = 'sigmoid'))
    
    # Compiling the ANN
    classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
    
    # Fitting the ANN to the Training set
    classifier.fit(X_train, y_train, batch_size = 150, nb_epoch = 30)
    
    # Predicting the Test set results over trained model
    y_pred = classifier.predict(X_test)
    
    # Getting result in proper format that is initially probabilistic
    for i in range(len(y_pred)):
        ind_ = 0
        max_ = y_pred[i][0]
        for j in range(5):
            if y_pred[i][j]>max_:
                max_= y_pred[i][j]
                ind_ = j
            y_pred[i][j]=0
        y_pred[i][ind_]= 1
    
    # Inverse Transforming the categorical encodings on y_pred and y_test
    y_pred= onehotencoder.inverse_transform(y_pred)
    y_test = onehotencoder.inverse_transform(y_test)
    
    # Measuring the performance
    accuracy = accuracy_score(y_test,y_pred,normalize=True, sample_weight=None)
    precision, recall, fmeasure = accuracy_macro(y_test, y_pred)

    # Saving the model
    pickle.dump(classifier, open('models\model.pkl','wb'))
    
    cm = confusion_matrix(y_test, y_pred)
    
    # Returning the performance parameters
    return accuracy,precision,recall, fmeasure, cm
# Adding the third hidden layer
classifier.add(Dense(output_dim=2000, init='uniform', activation='relu'))

# Adding the output layer
classifier.add(Dense(output_dim=5, init='uniform', activation='sigmoid'))

# Compiling the ANN
classifier.compile(optimizer='adam',
                   loss='binary_crossentropy',
                   metrics=['accuracy'])

# Fitting the ANN to the Training set
classifier.fit(X_train, y_train, batch_size=50, nb_epoch=5)

# Predicting the Test set results
y_pred = classifier.predict(X_test)

for i in range(200):
    for j in range(5):
        y_pred[i][j] = y_pred[i][j] > 0.5

y_pred = onehotencoder.inverse_transform(y_pred)
y_test = onehotencoder.inverse_transform(y_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

from performance.accuracy_measures import accuracy_macro
precision, recall, fmeasure = accuracy_macro(y_test, y_pred)