def update_model(): model_ref = models_collection.document("model") if not model_ref.get().exists: initial_model = models_collection.document( "initial_model").get().to_dict() model_ref.set({"json_string": initial_model["json_string"]}) todos = todos_collection.stream() to_add = [] for t in todos: t_id = t.id text = t.to_dict()["text"] # Add some punctuation if not present to help separate different TODOs. if text[-1] not in string.punctuation: text += "." to_add.append(text) todos_collection.document(t_id).delete() if to_add: to_merge = TextModel("\t".join(to_add)) serialized_model = model_ref.get().to_dict()["json_string"] old_model = TextModel.from_json(serialized_model) new_model = markovify.combine([old_model, to_merge]) model_ref.set({"json_string": new_model.to_json()})
import firebase_admin import markovify from firebase_admin import credentials, firestore from text_model import TextModel creds = credentials.Certificate("./firestore-key.json") firebase_admin.initialize_app(creds) db = firestore.client() with open("./issue_titles.txt") as f: text = f.read() model = TextModel(text) db.collection("models").document("initial_model").set({"json_string": model.to_json()})
def update_model(todo_text=None): to_combine = TextModel(todo_text) session_state.model = markovify.combine([session_state.model, to_combine])
def finished_tasks(col): col.write("### My Finished Tasks") render_tasks_and_buttons( column=col, tasks=session_state.my_finished_tasks, button_label="Remove", button_action=remove_finished_task, ) placeholder = st.empty() if not session_state.model: placeholder.warning("Initializing. Please wait...") with open("./issue_titles.txt") as f: text = f.read() session_state.model = TextModel(text) placeholder.empty() st.write("# TODOs and Stuff") with st.beta_form(submit_label="Submit", key="submit_form"): input_placeholder = st.empty() todo_text = input_placeholder.text_input( "Add a TODO!", key=session_state.input_key ).strip() share_me = st.checkbox("Help improve our TODO suggestions?", value=True) if todo_text: session_state.input_key += 1
slice_s1 = slice_s1.permute(3, 2, 1, 0) q_out = q.gather(2, slice_s1).squeeze(2) slice_s2 = S2.long().expand(1, config.l_q, q.size(0)) slice_s2 = slice_s2.permute(2, 1, 0) q_out = q_out.gather(2, slice_s2).squeeze(2) logits = self.fc(q_out) return logits, self.sm(logits), v, r, heatmap if __name__ == '__main__': #create object embedding model objects = 10 obj_embed = 7 object_model = LookupModel(objects, embed_dim=obj_embed) #text lstm model attn_kernel = 3 attn_in = obj_embed attn_out = 1 # no. of heatmap channels lstm_out = (attn_kernel**2) * attn_in * attn_out vocab_size = 300 instruction_model = TextModel(vocab_size, ninp=15, nhid=30, nlayers=1, out_dim=lstm_out) heatmap_model = AttentionHeatmap(instruction_model, attn_kernel, attn_in, attn_out)
from text_model import TextModel # load configuration default_config_folder = os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'configs') default_config_file = os.path.join(default_config_folder, 'text_classification_inference.ini') cfg = config.load_config(sys.argv[1] if len(sys.argv) > 1 else default_config_file) logging.info('---------------------------------------------------') logging.info('-------- Classification module (inference) --------') logging.info('---------------------------------------------------') # ---------------------------- Load model --------------------------- # classification_model = TextModel(cfg) logging.info('Loading trained classification model...') name, model, feature, trasformer, embeddings, label_encoder = classification_model.load() # ---------------------------- Load data ---------------------------- # data_loader = TextDataLoader(cfg, label_encoder) logging.info('Loading data to be classified...') inference_data, invalid_id = data_loader.load_classification_data('inference') # ----------------------- Extract features -------------------------- # feature_extractor = TextFeatureExtractor(cfg, embeddings=embeddings)
test_x, trans_tfidf_ngram) test_tfidf_char = feature_extractor.get_tfidf_features( test_x, trans_tfidf_char) # word embeddings if cfg.features.word_embeddings: logging.info('Computing word embeddings...') # compute transformers to embedding space trans_emb = feature_extractor.get_embedding_transformer() train_emb = feature_extractor.get_embedding_features(train_x, trans_emb) valid_emb = feature_extractor.get_embedding_features(valid_x, trans_emb) test_emb = feature_extractor.get_embedding_features(test_x, trans_emb) # -------------------- Train classifiers ---------------------------- # classification_model = TextModel(cfg, data_loader.df) models = dict() if cfg.model.train_nb: logging.info('Training Naive Bayes classifier...') # train Naive Bayes on Bag-of-Words features if cfg.features.bow: models["NB_BOW"] = [ classification_model.train('NB', train_bow, train_y), 'bow' ] # train Naive Bayes on TF-IDF features of different levels (word, ngram, and char) if cfg.features.tfidf: models["NB_TF-IDF_WORD"] = [ classification_model.train('NB', train_tfidf_word, train_y),
assert self.word2code is not None begin = stdtime() out = [] for c in text: out.append(self.word2code[c]) out = ''.join(out) compress_rate = len(out) / len(text) time = stdtime() - begin print('Compress time: {:.2f}s, compression rate: {}'.format( time, compress_rate)) return out def fit_compress(self, text): self.fit_text(text) return self.compress(text) if __name__ == '__main__': from text_model import TextModel model = TextModel(['a', 'b', 'c', 'd'], [1, 5, 3, 7]) print(model.entropy()) c1 = Compressor() c2 = Compressor() c1.fit_textmodel(model) text = model.random_text(1000) out1 = c1.compress(text) print(c1.word2code) out2 = c2.fit_compress(text) print(c2.word2code)
# ----------------------- Reduce dimensionality ----------------------- # if cfg.features.lsa: logging.info('Reducing dimensionality...') # Bag-of-Words if cfg.features.bow: data_bow_lsa = feature_extractor.lsa(data_bow) # TF-IDF if cfg.features.tfidf: data_tfidf_word_lsa = feature_extractor.lsa(data_tfidf_word) data_tfidf_ngram_lsa = feature_extractor.lsa(data_tfidf_ngram) data_tfidf_char_lsa = feature_extractor.lsa(data_tfidf_char) # ----------------------- Compute topics (LDA) ------------------------ # clustering_model = TextModel(cfg, data_loader.df) if cfg.model.lda: logging.info('Latent Dirichlet Allocation...') # compute LDA on Bag-of-Words features if cfg.features.bow: _, data_bow_lda = clustering_model.lda(data_bow) # compute LDA on TF-IDF features (words and n-grams only) if cfg.features.tfidf: _, data_tfidf_word_lda = clustering_model.lda(data_tfidf_word) _, data_tfidf_ngram_lda = clustering_model.lda(data_tfidf_ngram) # ------------------------- Compute clusters ------------------------- # models = dict()