コード例 #1
0
def train_model():
    emb_matrix, word_map = get_embeddings('datastories.twitter', 300)

    pipeline = Pipeline([('preprocessor', tweetsPreprocessor(load=True)),
                         ('extractor',
                          EmbExtractor(word_idxs=word_map, maxlen=50))])
    X_train, X_val, y_train, y_val = load_train_test(pipeline=pipeline,
                                                     test_size=0.2)

    model = build_attention_rnn(emb_matrix,
                                classes=3,
                                maxlen=50,
                                unit=LSTM,
                                layers=2,
                                trainable_emb=False,
                                bidirectional=True,
                                attention='simple',
                                dropout_attention=0.5,
                                layer_dropout_rnn=0.5,
                                dropout_rnn=0.5,
                                rec_dropout_rnn=0.5,
                                clipnorm=1,
                                lr=0.01,
                                loss_l2=0.0001)
    print(model.summary())
    print('Training model...')
    model.fit(X_train,
              y_train,
              validation_data=(X_val, y_val),
              epochs=18,
              batch_size=128)
    print('Model trained')
    print('saving model...')
    model.save(
        os.path.join(os.path.abspath('data/model_weights'),
                     'new_bi_model_1.h5'))
    print('doone')
コード例 #2
0
                continue
            index = word_indices[k]
            emb_disease = embeddings[index]
            emb_period = [sum(x) for x in zip(emb_period, emb_disease)]
        emb_list.append(emb_period)
    return emb_list


parser = argparse.ArgumentParser()
common_utils.add_common_arguments(parser)
parser.add_argument('--target_repl_coef', type=float, default=0.0)
args = parser.parse_args()
print(args)

target_repl = (args.target_repl_coef > 0.0 and args.mode == 'train')
embeddings, word_indices = get_embeddings(corpus='claims_codes_hs', dim=300)

train_reader = ReadmissionReader(
    dataset_dir='/mnt/MIMIC-III-clean/readmission_cv2/data/',
    listfile='/mnt/MIMIC-III-clean/readmission_cv2/0_train_listfile801010.csv')

val_reader = ReadmissionReader(
    dataset_dir='/mnt/MIMIC-III-clean/readmission_cv2/data/',
    listfile='/mnt/MIMIC-III-clean/readmission_cv2/0_val_listfile801010.csv')

discretizer = Discretizer(timestep=float(args.timestep),
                          store_masks=True,
                          imput_strategy='previous',
                          start_time='zero')

N = train_reader.get_number_of_examples()
コード例 #3
0
TASK = "CE"  # Specify the Subtask. It is needed to correctly load the data

############################################################################
# PERSISTENCE
############################################################################
# if True save model checkpoints, as well as the corresponding word indices
# you HAVE tp set PERSIST = True, in order to be able to use the trained model later
PERSIST = False
best_model = lambda: "cp_model_task4_sub{}.hdf5".format(TASK)
best_model_word_indices = lambda: "cp_model_task4_sub{}_word_indices.pickle".format(
    TASK)

############################################################################
# LOAD DATA
############################################################################
embeddings, word_indices = get_embeddings(corpus=WV_CORPUS, dim=WV_DIM)

if TASK == "BD":
    loader = Task4Loader(word_indices,
                         text_lengths=(target_max_length, text_max_length),
                         subtask=TASK,
                         filter_classes={"positive", "negative"},
                         y_one_hot=False)
    classes = ['positive', 'negative']
else:
    loader = Task4Loader(word_indices,
                         text_lengths=(target_max_length, text_max_length),
                         subtask=TASK)
    classes = ["-2", "-1", "0", "1", "2"]

if PERSIST:
コード例 #4
0
                               neu=opinions[1],
                               neg=opinions[2])
    except:
        print('fail')
        return render_template(
            'analyze.html',
            error=f'Remember You can only load .csv file and it has to \
                contain one of the followings columns: {approved_col_names}')


if __name__ == '__main__':
    url = 'http://localhost:5002/api/'
    model_weights = os.path.abspath('data/model_weights/new_bi_model_1.h5')
    model = load_model(model_weights,
                       custom_objects={'Attention': Attention()})
    global graph
    graph = tf.get_default_graph()
    MAXLEN = 50
    CORPUS = 'datastories.twitter'
    DIM = 300
    _, word_map = get_embeddings(CORPUS, DIM)
    pipeline = Pipeline([('preprocessor', tweetsPreprocessor(load=False)),
                         ('extractor',
                          EmbExtractor(word_idxs=word_map, maxlen=MAXLEN))])
    app.run(debug=True, host='localhost', port=5002)

#TODO:
'''  
    - add docs for functions
'''