Esempio n. 1
0
        print(ex)
        print("Could't load Doc2Vec, BUILDING...")
        doc2vec_model = generate_doc2vec(doc2vec_name, datasets)
        
# Step 3 Gather Features
print("FEATURIZING")

# Labels 
# yTrain = torch.Tensor([y_val for y_val in dataset_A["sentiment"][0:200]])
yTrain = getSentimentArray(dataset_A["sentiment"], useSmall=None)
yTrain_list = torch.Tensor([y_val for y_val in dataset_A["sentiment"]])
yDev = getSentimentArray(dataset_B["sentiment"], useSmall=None)
yDev_list = torch.Tensor([y_val for y_val in dataset_B["sentiment"]])

# Get word2vec
xTrain = getFeatureList(getCleanReviews(dataset_A, useSmall=None), word2vec_model, NUM_FEATURES, num_splits=4)
xDev = getFeatureList(getCleanReviews(dataset_B, useSmall=None), word2vec_model, NUM_FEATURES, num_splits=4)

# Step 4 Training NN model
MODEL_PATH = "recurrent_model.pt"
doTraining = False
if doTraining:
    print("TRAINING recurrent nn model")
    r_model = nn_recurrent_model.RNN(input_size=NUM_FEATURES)
    r_model.train(xTrain, yTrain, learning_rate = 0.01, epochs=4)
    torch.save(r_model,  MODEL_PATH)
else:
    r_model = torch.load(MODEL_PATH)

# Step 5 Evaluate performance
print()
Esempio n. 2
0
        print("Could't load Doc2Vec, BUILDING...")
        doc2vec_model = generate_doc2vec(doc2vec_name, datasets)

# Step 3 Gather Features
print("FEATURIZING")

# Labels
yTrain = torch.Tensor([y_val for y_val in dataset_A["sentiment"]])
yDev = torch.Tensor([y_val for y_val in dataset_B["sentiment"]])

# Get word2vec
# xTrain = getAvgFeatureVecs(getCleanReviews(dataset_A), word2vec_model, NUM_FEATURES)
# xTrain = torch.tensor(xTrain)

# Get doc2vec
xTrainDoc = getDocFeatureVec(getCleanReviews(dataset_A), doc2vec_model,
                             NUM_FEATURES)
xTrainDoc = torch.tensor(xTrainDoc)

# Step 4 Training NN model
MODEL_PATH = "feedforward_model.pt"
doTraining = False
if doTraining:
    print("TRAINING nn model")
    l_model = nn_model.NeuralNetwork(input_nodes=NUM_FEATURES)
    l_model.train_model_persample(xTrainDoc, yTrain)
    torch.save(l_model, MODEL_PATH)
else:
    l_model = torch.load(MODEL_PATH)

# Step 5 Evaluate performance
Esempio n. 3
0
                        header=0,
                        delimiter="\t",
                        quoting=3)
dataset_D = pd.read_csv("dataset/processed/D.tsv",
                        header=0,
                        delimiter="\t",
                        quoting=3)
dataset_E = pd.read_csv("dataset/processed/E.tsv",
                        header=0,
                        delimiter="\t",
                        quoting=3)
datasets = [dataset_A, dataset_D, dataset_E]

yTrain = torch.Tensor([y_val for y_val in dataset_A["sentiment"]])
yDev = torch.Tensor([y_val for y_val in dataset_B["sentiment"]])
xTrainRaw = getCleanReviews(dataset_A)
xDevRaw = getCleanReviews(dataset_B)

# Step 2 Define sweep range
evaluationRunSpecifications = []

# for num_features in [100, 150, 200, 250, 300]:
# for num_features in [350, 400, 450]:
#     runSpecification = {}

#     runSpecification['model'] = 'Standard Neural Network'
#     runSpecification['num_features'] = num_features
#     runSpecification['context'] = 5
#     runSpecification['optimizing'] = 'num_features'

#     evaluationRunSpecifications.append(runSpecification)
        print(ex)
        print("Could't load Doc2Vec, BUILDING...")
        doc2vec_model = generate_doc2vec(doc2vec_name, datasets)

# Step 3 Gather Features
print("FEATURIZING")

# Labels
# yTrain = torch.Tensor([y_val for y_val in dataset_A["sentiment"][0:200]])
yTrain = getSentimentArray(dataset_A["sentiment"], useSmall=None)
yTrain_list = torch.Tensor([y_val for y_val in dataset_A["sentiment"]])
yDev = getSentimentArray(dataset_B["sentiment"], useSmall=None)
yDev_list = torch.Tensor([y_val for y_val in dataset_B["sentiment"]])

# Get word2vec
xTrain = getFeatureList(getCleanReviews(dataset_A, useSmall=None),
                        word2vec_model,
                        NUM_FEATURES,
                        num_splits=4)
xDev = getFeatureList(getCleanReviews(dataset_B, useSmall=None),
                      word2vec_model,
                      NUM_FEATURES,
                      num_splits=4)

# Step 4 Training NN model
MODEL_PATH = "recurrent_model.pt"
doTraining = True
if doTraining:
    print("TRAINING recurrent nn model")
    r_model = nn_recurrent_model.RNN(input_size=NUM_FEATURES)
    r_model.train(xTrain, yTrain, learning_rate=0.01, epochs=4)