/
app.py
155 lines (123 loc) · 5.14 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#Our import list
import gdown
import streamlit as st
import numpy as np
import pandas as pd
import torch
import transformers
import seaborn as sns
import matplotlib.pyplot as plt
import torch.nn as nn
import os
#Libraries to pull
from transformers import BertModel
from torch import nn
from torch.nn import functional as F
from matplotlib import rc
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from collections import defaultdict
from textwrap import wrap
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup
#for added flavor and color
sns.set(style='whitegrid', palette='muted', font_scale=1.2)
HAPPY_COLORS_PALETTE = ["#01BEFE", "#FFDD00", "#FF7D00", "#FF006D", "#ADFF02", "#8F00FF"]
sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))
#lets set random seed
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
#Set device for pytorch GPU / CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def main():
#global variables to be used in script
PRE_TRAINED_MODEL_NAME = 'bert-base-cased'
class_names = ['negative', 'positive']
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
class Model(nn.Module):
def __init__(self,*args,**kwargs):
super(Model, self).__init__()
#develop a class for the Sentiment Classifier
class SentimentClassifier(nn.Module):
def __init__(self, n_classes):
super(SentimentClassifier, self).__init__()
self.bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
self.drop = nn.Dropout(p=0.3)
self.out = nn.Linear(self.bert.config.hidden_size, n_classes)
def forward(self, input_ids, attention_mask):
_, pooled_output = self.bert(
input_ids=input_ids,
attention_mask=attention_mask
)
output = self.drop(pooled_output)
return self.out(output)
#Generate a title for our webpage
st.title('Sentiment analysis and product reviews.')
#createing a sidebar for our webpage
st.sidebar.title("Sentiment Analysis Web App")
#little comment for our sidebar section
st.sidebar.markdown("😃Is your review positive or negative?😞")
#Here we will load the data into a cache to prevent repeated work)
@st.cache
def load_data():
#Function to pull in data from our Amazon s3 Bucket
data = pd.read_csv('https://amazonproductdata.s3-us-west-1.amazonaws.com/train.csv')
return data
#let's ingest our raw data here
df = load_data()
@st.cache
def get_model():
gdown.download(
"https://drive.google.com/uc?id=1cz41bp4tf37Mky_R31T41qiSN6ucMjGi",
"./assets/model_state_dict.bin", quiet=False
)
get_model()
#A function for loading models incase we include other models later
def load_model(filepath):
model = SentimentClassifier(len(class_names))
device = torch.device('cpu')
model.load_state_dict(torch.load(filepath, map_location=device))
return model
#loading model into memory - works locally
#model = load_model('./model/BERT_trained_model') #This one works locally!
model = load_model('./assets/model_state_dict.bin')
#here we have the ability to plot data metrics
def plot_metrics(metrics_list):
if "Confusion Matrix" in metrics_list:
st.subheader("Confusion Matrix")
plot_confusion_matrix(model, x_test, y_test, display_labels=class_names)
#function to provide inference from BERT model
def BERT_inference(review_text):
#tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
#Now we must encode the use text
encoded_review = tokenizer.encode_plus(
review_text,
max_length=300,
add_special_tokens=True,
return_token_type_ids=False,
pad_to_max_length=True,
return_attention_mask=True,
return_tensors='pt',
)
input_ids = encoded_review['input_ids'].to(device)
attention_mask = encoded_review['attention_mask'].to(device)
output = model(input_ids, attention_mask)
_, prediction = torch.max(output, dim=1)
st.write(f'Review text: {review_text}')
st.write(f'Sentiment : {class_names[prediction]}')
#sidebar options to add more rich features to our app
if st.sidebar.checkbox("Show raw data", False):
st.subheader("Amazon Review Sentiment Analysis. (Polarity Classification)")
st.table(df.head(10))
#Generating a textbox for user input
if st.sidebar.checkbox("Input text for inference", False):
st.subheader("Amazon Review Dataset for Sentiment Analysis. (Inference Demonstration.)")
user_input = st.text_area("Please provide a review here.")
if user_input:
#Let's process the users input
print(user_input)
BERT_inference(user_input)
if __name__ == '__main__':
main()