Example #1
0
MAX_DOCUMENT_LENGTH = 10
EMBEDDING_SIZE = 2
HIDDEN_SIZE = 4
ATTENTION_SIZE = 3
lr = 1e-3
BATCH_SIZE = 256
KEEP_PROB = 0.5
LAMBDA = 0.0001

MAX_LABEL = 2
epochs = 100

#dbpedia = tf.contrib.learn.datasets.load_dataset('dbpedia')
parameters = Parameters()
parameters.add_parameter("MAX_LABEL", MAX_LABEL)

# load data
x_train, y_train = (
    [], []
)  #load_data("data/classification_data/Training Data/train.csv", names=["Label", "clean_text", "tweet_text"])
x_test, y_test = (
    [], [])  #load_data("data/classification_data/Training Data/test.csv")

datafolder = 'data/classification_data/Training Data/1045'
exports_folder = 'data/exports/'
training_fileName = 'training_large_top50_clusters.csv'
test_fileName = 'test.csv'
parameters.add_parameter("Data Folder", datafolder)
parameters.add_parameter("Training filename", training_fileName)
parameters.add_parameter("Test filename", test_fileName)
Example #2
0
                                                             1):-1]
    print('Topic {}: {}'.format(
        i, ' '.join([id2word[index] for index in topic_words])))

doc_topics = model.transform(X)

exports_folder = 'data/exports/'
timestamp = time.strftime("%Y%m%d-%H%M%S")
exports_filename = 'guided_LDA_' + str(
    seed_confidence) + "_" + fileName + "_" + timestamp + '.csv'
exports_filepath = os.path.join(exports_folder, exports_filename)
with open(exports_filepath, 'w') as out:
    csv_out = csv.writer(out, delimiter='|')
    csv_out.writerow(['label', 'tweet_text'])
    for i in range(len(X)):
        info = (doc_topics[i].argmax(), X[i])
        tweet = tweets[i]
        csv_out.writerow(
            [doc_topics[i].argmax(), tweet.tweet_text, tweet.clean_text])

parameters = Parameters()
parameters.add_parameter("num_topics", n_topics)
parameters.add_parameter("num_iterations", n_iter)
parameters.add_parameter("seed_probability", seed_confidence)
parameters.add_complex_parameter("seed_topics", seed_topic_list)
parameters.add_complex_parameter("bigrams_counts", sorted_bigrams[:10])
parameters.add_complex_parameter("unigrams_counts", sorted_unigrams[:100])
parameters.write_parameters(exports_folder, timestamp)

#Generate similarity for all sub-clusters
MAX_DOCUMENT_LENGTH = 300
EMBEDDING_SIZE = 5
HIDDEN_SIZE1 = 4
HIDDEN_SIZE2 = 4
ATTENTION_SIZE = 2
lr = 1e-4
BATCH_SIZE = 256
KEEP_PROB = 0.5
LAMBDA = 0.0001

MAX_LABEL = 2
epochs = 200

#dbpedia = tf.contrib.learn.datasets.load_dataset('dbpedia')
parameters = Parameters()
parameters.add_parameter("METHOD", "O-SVM")
parameters.add_parameter("MAX_DOCUMENT_LENGTH", MAX_DOCUMENT_LENGTH)
parameters.add_parameter("EMBEDDING_SIZE", EMBEDDING_SIZE)
parameters.add_parameter("HIDDEN_SIZE1", HIDDEN_SIZE1)
parameters.add_parameter("HIDDEN_SIZE2", HIDDEN_SIZE2)
parameters.add_parameter("lr", lr)
parameters.add_parameter("BATCH_SIZE", BATCH_SIZE)
parameters.add_parameter("KEEP_PROB", KEEP_PROB)
parameters.add_parameter("LAMBDA", LAMBDA)
parameters.add_parameter("MAX_LABEL", MAX_LABEL)
parameters.add_parameter("epochs", epochs)

# load data
x_train, y_train = (
    [], []
)  #load_data("data/classification_data/Training Data/train.csv", names=["Label", "clean_text", "tweet_text"])
Example #4
0
MAX_DOCUMENT_LENGTH = 10
EMBEDDING_SIZE = 2
HIDDEN_SIZE = 4
ATTENTION_SIZE = 3
lr = 1e-3
BATCH_SIZE = 256
KEEP_PROB = 0.5
LAMBDA = 0.0001

MAX_LABEL = 2
epochs = 100

#dbpedia = tf.contrib.learn.datasets.load_dataset('dbpedia')
parameters = Parameters()
parameters.add_parameter("METHOD", "BI-LSTM")
parameters.add_parameter("MAX_DOCUMENT_LENGTH", MAX_DOCUMENT_LENGTH)
parameters.add_parameter("EMBEDDING_SIZE",EMBEDDING_SIZE)
parameters.add_parameter("HIDDEN_SIZE",HIDDEN_SIZE)
parameters.add_parameter("lr",lr)
parameters.add_parameter("BATCH_SIZE",BATCH_SIZE)
parameters.add_parameter("KEEP_PROB",KEEP_PROB)
parameters.add_parameter("LAMBDA",LAMBDA)
parameters.add_parameter("MAX_LABEL",MAX_LABEL)
parameters.add_parameter("epochs",epochs)

# load data
x_train, y_train = ([],[])#load_data("data/classification_data/Training Data/train.csv", names=["Label", "clean_text", "tweet_text"])
x_test, y_test = ([],[])#load_data("data/classification_data/Training Data/test.csv")

datafolder = 'data/classification_data/Training Data/41'