コード例 #1
0
    def createModel(self,input_ids,input_mask,segment_ids,labels,is_training):
        if FLAGS.max_seq_length > self.bert_config.max_position_embeddings: # 模型有个最大的输入长度 512
            raise ValueError("超出模型最大长度")
            # 创建bert模型
        model = modeling.BertModel(
            config=self.bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=False # 这里如果使用TPU 设置为True,速度会快些。使用CPU 或GPU 设置为False ,速度会快些。
        )
        self.embedding = model.get_sequence_output()# 这个获取每个token的output 输入数据[batch_size, seq_length, embedding_size] 如果做seq2seq 或者ner 用这个
        hidden_size = self.embedding.shape[-1].value #获取输出的维度
        max_seq_length = self.embedding.shape[1].value# 获取句子长度

        used = tf.sign(tf.abs(input_ids))
        self.lengths = tf.reduce_sum(used, reduction_indices=1)  # [batch_size] 大小的向量,包含了当前batch中的序列长度

        blstm_crf = BLSTM(embedded_chars=self.embedding, hidden_unit=FLAGS.lstm_size, cell_type=FLAGS.cell,
                              num_layers=FLAGS.num_layers,
                              droupout_rate=FLAGS.droupout_rate, initializers=initializers, num_labels=FLAGS.num_labels,
                              seq_length=max_seq_length, labels=labels, lengths=self.lengths, is_training=is_training)
        blstm_crf.add_blstm_crf_layer()
        self.total_loss =blstm_crf.totle_loss
        self.predict = blstm_crf.predict
        self.trans=blstm_crf.trans
def main():
    if len(sys.argv) != 2:
        print("Usage: python vuldeepecker_train.py [data filename]")
        exit()
    filename = sys.argv[1]
    parse_file(filename)
    base = os.path.splitext(os.path.basename(filename))[0]
    vector_filename = base + "_gadget_vectors.pkl"
    vector_length = 50
    df = get_vectors_df(filename, vector_length)
    df.to_pickle(vector_filename)
    blstm = BLSTM(name=base)
    blstm.train(df, epochs=4)
コード例 #3
0
ファイル: train.py プロジェクト: pikaliov/ner-tf
def create_model(session):
    model = BLSTM(
        FLAGS.num_units,
        FLAGS.num_layers,
        FLAGS.num_steps,
        FLAGS.num_labels,
        FLAGS.emb_size,
        FLAGS.vocab_size,
        FLAGS.learning_rate,
        FLAGS.max_clip_norm,
        FLAGS.use_crf,
        get_embs(),
        FLAGS.trainable_embs,
        tf.float16 if FLAGS.use_fp16 else tf.float32)

    ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
    if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
        print('Restoring model from %s.' % ckpt.model_checkpoint_path)
        model.saver.restore(session, ckpt.model_checkpoint_path)
        epoch = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1])
    else:
        print('Created model with fresh parameters.')
        session.run(tf.global_variables_initializer())
        epoch = 0
    return epoch, model
コード例 #4
0
def main():
    if len(sys.argv) != 2:
        print("Usage: python vuldeepecker_train.py [data filename]")
        exit()
    filename = sys.argv[1]
    parse_file(filename)
    base = os.path.splitext(os.path.basename(filename))[0]
    vector_filename = base + "_gadget_vectors.pkl"
    vector_length = 50
    df = get_vectors_df(filename, vector_length)
    df.to_pickle(vector_filename)
    blstm = BLSTM(name=base)
    vectors = np.stack(df.iloc[:, 1].values)
    print(df)
    print("vectors: ", vectors)
    #np.savetxt(r'np.txt', df.values)
    with open(os.path.join('', 'df_file.txt'),'w') as outfile:
        df.to_string(outfile)

    df.to_csv('out.csv', encoding='utf-8', index=False)

    blstm.train(df, epochs=4)
コード例 #5
0
def create_model(session):
    model = BLSTM(FLAGS.num_units, FLAGS.num_layers, FLAGS.num_steps,
                  FLAGS.num_labels, FLAGS.emb_size, FLAGS.vocab_size,
                  FLAGS.learning_rate, FLAGS.max_clip_norm, FLAGS.use_crf,
                  None, False, tf.float16 if FLAGS.use_fp16 else tf.float32)

    ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
    if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
        print('Restoring model from %s.' % ckpt.model_checkpoint_path)
        model.saver.restore(session, ckpt.model_checkpoint_path)
        return model
    else:
        raise Exception('Could not find model in %s' %
                        ckpt.model_checkpoint_path)
コード例 #6
0
def main():
    if len(sys.argv) != 3:
        print("Usage: python vuldeepecker.py [data filename] [model file]")
        exit()
    filename = sys.argv[1]
    modelpath = sys.argv[2]
    parse_file(filename)
    base = os.path.splitext(os.path.basename(filename))[0]
    vector_length = 50
    df = get_vectors_df(filename, vector_length)
    blstm = BLSTM()
    blstm.load(modelpath)
    blstm.predict(df)
コード例 #7
0
ファイル: vuldeepecker.py プロジェクト: xuxinda/VDPython
def main():
    if len(sys.argv) != 2:
        print("Usage: python vuldeepecker.py [filename]")
        exit()
    filename = sys.argv[1]
    parse_file(filename)
    base = os.path.splitext(os.path.basename(filename))[0]
    vector_filename = base + "_gadget_vectors.pkl"
    vector_length = 50
    if os.path.exists(vector_filename):
        df = pandas.read_pickle(vector_filename)
    else:
        df = get_vectors_df(filename, vector_length)
        df.to_pickle(vector_filename)
    blstm = BLSTM(df, name=base)
    blstm.train()
    blstm.test()
コード例 #8
0
    else:
        feed = {input_x: x, input_len: len, input_labels: labels}
    return feed


wordemb = tf.Variable(np.asarray(word_vectors),
                      name="word2vec",
                      trainable=False)
input_emb = tf.nn.embedding_lookup(wordemb, input_x)
blstm_model = BLSTM(embedded_chars=input_emb,
                    hidden_unit=FLAGS.lstm_size,
                    cell_type=FLAGS.cell,
                    num_layers=FLAGS.num_layers,
                    droupout_rate=FLAGS.droupout_rate,
                    initializers=initializers,
                    num_labels=FLAGS.num_labels,
                    seq_length=FLAGS.max_seq_length,
                    labels=input_labels,
                    lengths=input_len,
                    is_training=is_training)

# blstm_model.add_blstm_layer()#    这里使用blstm
blstm_model.add_blstm_crf_layer()  # 这里使用blstm+crf

train_op = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(
    blstm_model.totle_loss)

init_op = tf.global_variables_initializer()
idx_to_tag = {idx: tag for tag, idx in FLAGS.label_map[0].items()}
コード例 #9
0
            "k_fold",
            "test_size",
        ]
        for parameter in parameters:
            val = getattr(result, parameter)
            if val is not None:
                setattr(config, parameter, val)

        print("[*] loading dataset...")
        dataset = load_dataset(category)
        print("[+] loading dataset complete")
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model = BLSTM(
            config.input_size,
            config.hidden_size,
            config.num_layers,
            config.num_classes,
            config.dropout,
            device,
        ).to(device)
        print("[*] training model...")
        fitter = Fitter(model, device, config)
        total_result = fitter.cross_validation(dataset)

        if result.output is None:
            f = open("./cross_val.csv", "w")
        else:
            f = result.output
        f.write(
            "fold,epoch,train_f1,val_f1,train_acc,val_acc,train_recall,val_recall,train_loss,val_loss\n"
        )
        for fold, fold_result in enumerate(total_result):