def fit(self):
        nodes = retrieve('nodes')
        corpus = []
        for node in nodes:
            corpus.extend(node['patterns'])

        tfidf = self.tfidf_vectorizer.fit_transform(corpus)
        self.kmeans.fit(tfidf)
Beispiel #2
0
def load():
    global all_words, ids, model, nodes
    try:
        nodes = retrieve('nodes')

        data = torch.load("data.pth")

        all_words = data['all_words']
        ids = data['ids']

        model = NeuralNet(data["input_size"], data["hidden_size"], data["output_size"]).to(device)
        model.load_state_dict(data["model_state"])
        model.eval()
    except:
        print("An exception occurred")
Beispiel #3
0
def train():
    nodes = retrieve('nodes')

    all_words = []
    ids = []
    xy = []
    # loop through each sentence in our node patterns
    for node in nodes:
        # add to id list
        ids.append(node['id'])
        for pattern in node['patterns']:
            # tokenize each word in the sentence
            w = tokenize(pattern)
            # add to our words list
            all_words.extend(w)
            # add to xy pair
            xy.append((w, node['id']))

    # stem and lower each word and remove stop words
    ignore_words = ['?', '.', '!', '(', ')']
    stop_words = retrieve('stop_words')
    all_words = [w for w in all_words if not w.lower() in stop_words]
    all_words = [stem(w) for w in all_words if w not in ignore_words]

    # remove duplicates and sort
    all_words = sorted(set(all_words))
    ids = sorted(set(ids))

    # create training data
    x_train = []
    y_train = []
    for (pattern_sentence, id) in xy:
        # X: bag of words for each pattern_sentence
        bag = bag_of_words(pattern_sentence, all_words)
        x_train.append(bag)

        # y: PyTorch CrossEntropyLoss needs only class labels, not one-hot
        y_train.append(ids.index(id))

    x_train = np.array(x_train)
    y_train = np.array(y_train)

    # Hyper-parameters
    num_epochs = 1000
    batch_size = 8
    learning_rate = 0.001
    input_size = len(x_train[0])
    hidden_size = 8
    output_size = len(ids)

    class ChatDataset(Dataset):
        def __init__(self):
            self.n_samples = len(x_train)
            self.x_data = x_train
            self.y_data = y_train

        # support indexing such that dataset[i] can be used to get i-th sample
        def __getitem__(self, index):
            return self.x_data[index], self.y_data[index]

        # we can call len(dataset) to return the size
        def __len__(self):
            return self.n_samples

    dataset = ChatDataset()
    train_loader = DataLoader(dataset=dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=0)

    device = torch.device('cpu')

    model = NeuralNet(input_size, hidden_size, output_size).to(device)

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # Train the model
    for epoch in range(num_epochs):
        for (words, labels) in train_loader:
            words = words.to(device)
            labels = labels.to(dtype=torch.long).to(device)

            # Forward pass
            outputs = model(words)
            # if y would be one-hot, we must apply
            # labels = torch.max(labels, 1)[1]
            loss = criterion(outputs, labels)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    data = {
        "model_state": model.state_dict(),
        "input_size": input_size,
        "hidden_size": hidden_size,
        "output_size": output_size,
        "all_words": all_words,
        "ids": ids
    }

    torch.save(data, "data.pth")
 def __init__(self, n_clusters, n_init):
     self.stop_words = retrieve('stop_words')
     self.tfidf_vectorizer = TfidfVectorizer(preprocessor=None,
                                             stop_words=self.stop_words)
     self.kmeans = KMeans(n_clusters=n_clusters, n_init=n_init)
Beispiel #5
0
 def get_state(self):
     return retrieve('state')
 def get_svg(self, key):
     images = retrieve('images')
     return {'key': key, 'url': images[key]}
Beispiel #7
0
 def get_conversations(self):
     return retrieve(self.fileName)
 def get_feedback(self):
     return retrieve('unanswered')
Beispiel #9
0
 def get_nodes(self):
     return retrieve('nodes')