def train_model(): emb_matrix, word_map = get_embeddings('datastories.twitter', 300) pipeline = Pipeline([('preprocessor', tweetsPreprocessor(load=True)), ('extractor', EmbExtractor(word_idxs=word_map, maxlen=50))]) X_train, X_val, y_train, y_val = load_train_test(pipeline=pipeline, test_size=0.2) model = build_attention_rnn(emb_matrix, classes=3, maxlen=50, unit=LSTM, layers=2, trainable_emb=False, bidirectional=True, attention='simple', dropout_attention=0.5, layer_dropout_rnn=0.5, dropout_rnn=0.5, rec_dropout_rnn=0.5, clipnorm=1, lr=0.01, loss_l2=0.0001) print(model.summary()) print('Training model...') model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=18, batch_size=128) print('Model trained') print('saving model...') model.save( os.path.join(os.path.abspath('data/model_weights'), 'new_bi_model_1.h5')) print('doone')
continue index = word_indices[k] emb_disease = embeddings[index] emb_period = [sum(x) for x in zip(emb_period, emb_disease)] emb_list.append(emb_period) return emb_list parser = argparse.ArgumentParser() common_utils.add_common_arguments(parser) parser.add_argument('--target_repl_coef', type=float, default=0.0) args = parser.parse_args() print(args) target_repl = (args.target_repl_coef > 0.0 and args.mode == 'train') embeddings, word_indices = get_embeddings(corpus='claims_codes_hs', dim=300) train_reader = ReadmissionReader( dataset_dir='/mnt/MIMIC-III-clean/readmission_cv2/data/', listfile='/mnt/MIMIC-III-clean/readmission_cv2/0_train_listfile801010.csv') val_reader = ReadmissionReader( dataset_dir='/mnt/MIMIC-III-clean/readmission_cv2/data/', listfile='/mnt/MIMIC-III-clean/readmission_cv2/0_val_listfile801010.csv') discretizer = Discretizer(timestep=float(args.timestep), store_masks=True, imput_strategy='previous', start_time='zero') N = train_reader.get_number_of_examples()
TASK = "CE" # Specify the Subtask. It is needed to correctly load the data ############################################################################ # PERSISTENCE ############################################################################ # if True save model checkpoints, as well as the corresponding word indices # you HAVE tp set PERSIST = True, in order to be able to use the trained model later PERSIST = False best_model = lambda: "cp_model_task4_sub{}.hdf5".format(TASK) best_model_word_indices = lambda: "cp_model_task4_sub{}_word_indices.pickle".format( TASK) ############################################################################ # LOAD DATA ############################################################################ embeddings, word_indices = get_embeddings(corpus=WV_CORPUS, dim=WV_DIM) if TASK == "BD": loader = Task4Loader(word_indices, text_lengths=(target_max_length, text_max_length), subtask=TASK, filter_classes={"positive", "negative"}, y_one_hot=False) classes = ['positive', 'negative'] else: loader = Task4Loader(word_indices, text_lengths=(target_max_length, text_max_length), subtask=TASK) classes = ["-2", "-1", "0", "1", "2"] if PERSIST:
neu=opinions[1], neg=opinions[2]) except: print('fail') return render_template( 'analyze.html', error=f'Remember You can only load .csv file and it has to \ contain one of the followings columns: {approved_col_names}') if __name__ == '__main__': url = 'http://localhost:5002/api/' model_weights = os.path.abspath('data/model_weights/new_bi_model_1.h5') model = load_model(model_weights, custom_objects={'Attention': Attention()}) global graph graph = tf.get_default_graph() MAXLEN = 50 CORPUS = 'datastories.twitter' DIM = 300 _, word_map = get_embeddings(CORPUS, DIM) pipeline = Pipeline([('preprocessor', tweetsPreprocessor(load=False)), ('extractor', EmbExtractor(word_idxs=word_map, maxlen=MAXLEN))]) app.run(debug=True, host='localhost', port=5002) #TODO: ''' - add docs for functions '''