MAX_DOCUMENT_LENGTH = 10 EMBEDDING_SIZE = 2 HIDDEN_SIZE = 4 ATTENTION_SIZE = 3 lr = 1e-3 BATCH_SIZE = 256 KEEP_PROB = 0.5 LAMBDA = 0.0001 MAX_LABEL = 2 epochs = 100 #dbpedia = tf.contrib.learn.datasets.load_dataset('dbpedia') parameters = Parameters() parameters.add_parameter("MAX_LABEL", MAX_LABEL) # load data x_train, y_train = ( [], [] ) #load_data("data/classification_data/Training Data/train.csv", names=["Label", "clean_text", "tweet_text"]) x_test, y_test = ( [], []) #load_data("data/classification_data/Training Data/test.csv") datafolder = 'data/classification_data/Training Data/1045' exports_folder = 'data/exports/' training_fileName = 'training_large_top50_clusters.csv' test_fileName = 'test.csv' parameters.add_parameter("Data Folder", datafolder) parameters.add_parameter("Training filename", training_fileName) parameters.add_parameter("Test filename", test_fileName)
1):-1] print('Topic {}: {}'.format( i, ' '.join([id2word[index] for index in topic_words]))) doc_topics = model.transform(X) exports_folder = 'data/exports/' timestamp = time.strftime("%Y%m%d-%H%M%S") exports_filename = 'guided_LDA_' + str( seed_confidence) + "_" + fileName + "_" + timestamp + '.csv' exports_filepath = os.path.join(exports_folder, exports_filename) with open(exports_filepath, 'w') as out: csv_out = csv.writer(out, delimiter='|') csv_out.writerow(['label', 'tweet_text']) for i in range(len(X)): info = (doc_topics[i].argmax(), X[i]) tweet = tweets[i] csv_out.writerow( [doc_topics[i].argmax(), tweet.tweet_text, tweet.clean_text]) parameters = Parameters() parameters.add_parameter("num_topics", n_topics) parameters.add_parameter("num_iterations", n_iter) parameters.add_parameter("seed_probability", seed_confidence) parameters.add_complex_parameter("seed_topics", seed_topic_list) parameters.add_complex_parameter("bigrams_counts", sorted_bigrams[:10]) parameters.add_complex_parameter("unigrams_counts", sorted_unigrams[:100]) parameters.write_parameters(exports_folder, timestamp) #Generate similarity for all sub-clusters
MAX_DOCUMENT_LENGTH = 300 EMBEDDING_SIZE = 5 HIDDEN_SIZE1 = 4 HIDDEN_SIZE2 = 4 ATTENTION_SIZE = 2 lr = 1e-4 BATCH_SIZE = 256 KEEP_PROB = 0.5 LAMBDA = 0.0001 MAX_LABEL = 2 epochs = 200 #dbpedia = tf.contrib.learn.datasets.load_dataset('dbpedia') parameters = Parameters() parameters.add_parameter("METHOD", "O-SVM") parameters.add_parameter("MAX_DOCUMENT_LENGTH", MAX_DOCUMENT_LENGTH) parameters.add_parameter("EMBEDDING_SIZE", EMBEDDING_SIZE) parameters.add_parameter("HIDDEN_SIZE1", HIDDEN_SIZE1) parameters.add_parameter("HIDDEN_SIZE2", HIDDEN_SIZE2) parameters.add_parameter("lr", lr) parameters.add_parameter("BATCH_SIZE", BATCH_SIZE) parameters.add_parameter("KEEP_PROB", KEEP_PROB) parameters.add_parameter("LAMBDA", LAMBDA) parameters.add_parameter("MAX_LABEL", MAX_LABEL) parameters.add_parameter("epochs", epochs) # load data x_train, y_train = ( [], [] ) #load_data("data/classification_data/Training Data/train.csv", names=["Label", "clean_text", "tweet_text"])
MAX_DOCUMENT_LENGTH = 10 EMBEDDING_SIZE = 2 HIDDEN_SIZE = 4 ATTENTION_SIZE = 3 lr = 1e-3 BATCH_SIZE = 256 KEEP_PROB = 0.5 LAMBDA = 0.0001 MAX_LABEL = 2 epochs = 100 #dbpedia = tf.contrib.learn.datasets.load_dataset('dbpedia') parameters = Parameters() parameters.add_parameter("METHOD", "BI-LSTM") parameters.add_parameter("MAX_DOCUMENT_LENGTH", MAX_DOCUMENT_LENGTH) parameters.add_parameter("EMBEDDING_SIZE",EMBEDDING_SIZE) parameters.add_parameter("HIDDEN_SIZE",HIDDEN_SIZE) parameters.add_parameter("lr",lr) parameters.add_parameter("BATCH_SIZE",BATCH_SIZE) parameters.add_parameter("KEEP_PROB",KEEP_PROB) parameters.add_parameter("LAMBDA",LAMBDA) parameters.add_parameter("MAX_LABEL",MAX_LABEL) parameters.add_parameter("epochs",epochs) # load data x_train, y_train = ([],[])#load_data("data/classification_data/Training Data/train.csv", names=["Label", "clean_text", "tweet_text"]) x_test, y_test = ([],[])#load_data("data/classification_data/Training Data/test.csv") datafolder = 'data/classification_data/Training Data/41'