Exemple #1
0
def classify(texts, output_format):
    # load model
    model = textClassification.Classifier('toxic', "gru", list_classes=list_classes)
    model.load()
    start_time = time.time()
    result = model.predict(texts, output_format)
    print("runtime: %s seconds " % (round(time.time() - start_time, 3)))
    return result
Exemple #2
0
def test():
    # load model
    model = textClassification.Classifier('toxic', "gru", list_classes=list_classes)
    model.load()

    print('loading test dataset...')
    xte = load_texts_pandas("data/textClassification/toxic/test.csv")
    print('number of texts to classify:', len(xte))
    start_time = time.time()
    result = model.predict(xte, output_format="csv")
    print("runtime: %s seconds " % (round(time.time() - start_time, 3)))
    return result
Exemple #3
0
def train(embeddings_name, fold_count): 
    model = textClassification.Classifier('toxic', "gru", list_classes=list_classes, max_epoch=30, 
        fold_number=fold_count, embeddings_name=embeddings_name)

    print('loading train dataset...')
    xtr, y = load_texts_and_classes_pandas("data/textClassification/toxic/train.csv")
    if fold_count == 1:
        model.train(xtr, y)
    else:
        model.train_nfold(xtr, y)
    # saving the model
    model.save()
def classify(texts, output_format):
    # load model
    model = textClassification.Classifier('citations',
                                          "gru",
                                          list_classes=list_classes)
    model.load()
    start_time = time.time()
    result = model.predict(texts, output_format)
    runtime = round(time.time() - start_time, 3)
    if output_format is 'json':
        result["runtime"] = runtime
    else:
        print("runtime: %s seconds " % (runtime))
    return result
def train(embeddings_name, fold_count):
    model = textClassification.Classifier('citations',
                                          "gru",
                                          list_classes=list_classes,
                                          max_epoch=70,
                                          fold_number=fold_count,
                                          use_roc_auc=True,
                                          embeddings_name=embeddings_name)

    print('loading citation sentiment corpus...')
    xtr, y = load_citation_sentiment_corpus(
        "data/textClassification/citations/citation_sentiment_corpus.txt")

    if fold_count == 1:
        model.train(xtr, y)
    else:
        model.train_nfold(xtr, y)
    # saving the model
    model.save()
Exemple #6
0
def train_and_eval(embeddings_name, training_data, fold_count):
    model = textClassification.Classifier('suomi24',
                                          "gru",
                                          list_classes=list_classes,
                                          max_epoch=1,
                                          fold_number=fold_count,
                                          use_roc_auc=True,
                                          embeddings_name=embeddings_name)

    print('loading train dataset...')
    #    xtr, y = load_texts_and_classes_pandas("data/textClassification/suomi24/suomi24.csv")
    xtr, y = load_texts_and_classes_pandas(training_data)

    # segment train and eval sets
    x_train, y_train, x_test, y_test = split_data_and_labels(xtr, y, 0.9)

    if fold_count == 1:
        model.train(x_train, y_train)
    else:
        model.train_nfold(x_train, y_train)
    model.eval(x_test, y_test)

    # saving the model
    model.save()
def train_and_eval(embeddings_name, fold_count):
    model = textClassification.Classifier('citations',
                                          "gru",
                                          list_classes=list_classes,
                                          max_epoch=70,
                                          fold_number=fold_count,
                                          use_roc_auc=True,
                                          embeddings_name=embeddings_name)

    print('loading citation sentiment corpus...')
    xtr, y = load_citation_sentiment_corpus(
        "data/textClassification/citations/citation_sentiment_corpus.txt")

    # segment train and eval sets
    x_train, y_train, x_test, y_test = split_data_and_labels(xtr, y, 0.9)

    if fold_count == 1:
        model.train(x_train, y_train)
    else:
        model.train_nfold(x_train, y_train)
    model.eval(x_test, y_test)

    # saving the model
    model.save()
Exemple #8
0
    batch_size = BATCH_SIZE,
    sort_key = lambda x: len(x.text),
    sort_within_batch=True
    )

size_of_vocab = len(TEXT.vocab)
embedding_dim = 100
num_hidden_nodes = 128
num_layers = 2
N_EPOCHS = 20
best_valid_loss = float('inf')




model=textModel.Classifier(size_of_vocab,embedding_dim,num_hidden_nodes,num_layers)
pretrained_embeddings = TEXT.vocab.vectors
model.embedding.weight.data.copy_(pretrained_embeddings)

optimizer = torch.optim.Adam(model.parameters())
criterion = torch.nn.BCELoss()



for epoch in range(N_EPOCHS):
     
    #train the model
    train_loss, train_acc = textModel.train(model, train_iterator, optimizer, criterion)
    
    #evaluate the model
    valid_loss, valid_acc = textModel.evaluate(model, valid_iterator, criterion)