def main():
    # create instance of config,这里的config实现了load data的作用
    #拥有词表、glove训练好的embeddings矩阵、str->id的function
    config = Config()
    config.nepochs = 200
    config.dropout = 0.5
    config.batch_size = 40
    config.lr_method = "adam"
    config.lr = 0.0007
    config.lr_decay = 0.97
    config.clip = -5.0  # if negative, no clipping
    config.nepoch_no_imprv = 20

    # build model
    model = NERModel(config)
    model.build("fine_tuning")
    model.restore_session(config.dir_model)

    # model.restore_session("results/crf/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    # create datasets [(char_ids), word_id]
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                       config.processing_tag, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)

    # train model
    model.train(train, dev)
コード例 #2
0
def predict(weights_path, dataset, abstract_sentences):

    parser = argparse.ArgumentParser()
    config = Config(parser, log_config=False, dataset=dataset)

    # restore model weights
    model = HANNModel(config)
    model.build()
    model.restore_session(weights_path)

    sentences_words = []
    # split abstract to sentences
    for line in abstract_sentences:
        # split line into words and map  words to ids
        sentence = [config.processing_word(word) for word in line.split()]
        sentences_words += [sentence]

    # run prediction
    labels_pred, _ = model.predict_batch([sentences_words])

    # map: label id to label string
    tag_id_to_label = dict((v, k) for k, v in config.vocab_tags.items())

    # convert predicted labels to string
    labels_pred_str = []
    for sublist in labels_pred:
        for item in sublist:
            labels_pred_str.append(tag_id_to_label[item])

    return labels_pred_str
コード例 #3
0
def main():
    # create instance of config
    config = Config(parser)

    # build model
    model = HANNModel(config)
    model.build()
    ###############################################comment this if model is trained from scratch
    config.restore = True
    if config.restore:
        model.restore_session(
            "/home/lena/Dokumente/Master/dissertation/Data/output/model.weights"
        )  # optional, restore weights
    model.reinitialize_weights("proj")  #reinitialise for this scope
    #####################################################################

    # create datasets
    dev = Dataset(config.filename_dev, config.processing_word,
                  config.processing_tag, config.max_iter)
    train = Dataset(config.filename_train, config.processing_word,
                    config.processing_tag, config.max_iter)
    test = Dataset(config.filename_test, config.processing_word,
                   config.processing_tag, config.max_iter)

    # train model
    model.train(train, dev)

    # evaluate model
    model.evaluate(test)
コード例 #4
0
def predict():
    config = Config()
    threshold = (config.sequence_length / 2) + 1
    config.batch_size = 1
    model = BertModel(config)
    gpu_config = tf.ConfigProto()
    gpu_config.gpu_options.allow_growth = True
    saver = tf.train.Saver()
    ckpt_dir = config.ckpt_dir
    print("ckpt_dir:", ckpt_dir)
    with tf.Session(config=gpu_config) as sess:
        sess.run(tf.global_variables_initializer())
        saver.restore(sess, tf.train.latest_checkpoint(ckpt_dir))
        for i in range(100):
            # 2.feed data
            input_x = np.random.randn(
                config.batch_size,
                config.sequence_length)  # [None, self.sequence_length]
            input_x[input_x >= 0] = 1
            input_x[input_x < 0] = 0
            target_label = generate_label(input_x, threshold)
            input_sum = np.sum(input_x)
            # 3.run session to train the model, print some logs.
            logit, prediction = sess.run(
                [model.logits, model.predictions],
                feed_dict={
                    model.input_x: input_x,
                    model.dropout_keep_prob: config.dropout_keep_prob
                })
            print("target_label:", target_label, ";input_sum:", input_sum,
                  "threshold:", threshold, ";prediction:", prediction)
            print("input_x:", input_x, ";logit:", logit)
コード例 #5
0
def main():
    # create instance of config
    config = Config()
    config.dir_model = config.dir_output + "model.finetuning.weights/"

    # build model
    model = NERModel(config)
    model.build("fine_tuning")
    model.restore_session(config.dir_model)

    # create dataset
    processing_word = get_processing_word(lowercase=True)

    if len(sys.argv) == 2:
        if sys.argv[1] == 'test':
            test = CoNLLDataset(config.filename_test, processing_word)

        elif sys.argv[1] == 'dev':
            test = CoNLLDataset(config.filename_dev, processing_word)

    else:
        assert len(sys.argv) == 1
        test = CoNLLDataset(config.filename_test, processing_word)

    test4cl = CoNLLdata4classifier(test, processing_word=config.processing_word,
                                   processing_tag=config.processing_tag)

    # evaluate and interact
    model.evaluate(test4cl)
def main(argv=None):
    # Configurations
    config = Config()
    config.DATA_DIR = [
        './data/SiW_M_Makeup_Ob_Binary_Files',
        './data/SiW_M_Mask_Silicone_Binary_Files',
        './data/SiW_M_Makeup_Co_Binary_Files',
        './data/SiW_M_Mask_Paper_Binary_Files',
        './data/SiW_M_Makeup_Im_Binary_Files',
        './data/SiW_M_Mask_Mann_Binary_Files',
        './data/SiW_M_Replay_Binary_Files',
        './data/SiW_M_Partial_Cut_Binary_Files',
        './data/SiW_M_Mask_Half_Binary_Files',
        './data/SiW_M_Partial_Funnyeye_Binary_Files',
        './data/SiW_M_Partial_Paperglass_Binary_Files',
        './data/SiW_M_Mask_Trans_Binary_Files',
        './data/SiW_M_Paper_Binary_Files', './data/SiW_M_Live_Binary_Files',
        './data/SiW_M_Live_Test_Binary_Files'
    ]
    config.DATA_DIR_LIVE = ['./data/SiW_M_Live_Binary_Files']
    config.DATA_DIR_TEST = ['./data/SiW_M_Live_Test_Binary_Files']
    config.LOG_DIR = './logs/model'
    config.MODE = 'training'
    # config.MODE = 'testing'
    config.STEPS_PER_EPOCH_VAL = 180
    config.display()

    # Get images and labels.
    # dataset_train = Dataset(config, 'train')
    # Build a Graph
    model = Model(config)

    # Train the model
    model.compile()
    model.train()
コード例 #7
0
def main():
    # create instance of config
    config = Config()
    if config.use_elmo: config.processing_word = None

    #build model
    model = NERModel(config)

    learn = NERLearner(config, model)
    learn.load()

    if len(sys.argv) == 1:
        print("No arguments given. Running full test")
        sys.argv.append("eval")
        sys.argv.append("pred")

    if sys.argv[1] == "eval":
        # create datasets
        test = CoNLLDataset(config.filename_test, config.processing_word,
                             config.processing_tag, config.max_iter)
        learn.evaluate(test)

    if sys.argv[1] == "pred" or sys.argv[2] == "pred":
        try:
            sent = (sys.argv[2] if sys.argv[1] == "pred" else sys.argv[3])
        except IndexError:
            sent = ["Peter", "Johnson", "lives", "in", "Los", "Angeles"]

        print("Predicting sentence: ", sent)
        pred = learn.predict(sent)
        print(pred)
コード例 #8
0
ファイル: train.py プロジェクト: zhouuuuuu/EBM-NLP
def main(data_prefix=None):
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build()
    # model.restore_session("results/crf/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    if data_prefix:
        cwd = os.getcwd()
        config.filename_dev = os.path.join(
            cwd, 'data',
            data_prefix + '_' + os.path.basename(config.filename_dev))
        config.filename_test = os.path.join(
            cwd, 'data',
            data_prefix + '_' + os.path.basename(config.filename_test))
        config.filename_train = os.path.join(
            cwd, 'data',
            data_prefix + '_' + os.path.basename(config.filename_train))

    # create datasets
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                       config.processing_tag, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)

    # train model
    print('training')
    model.train(train, dev)
コード例 #9
0
def main():
    # create instance of config
    config = Config()
    config.dir_model = config.dir_output + "model.finetuning.weights/"

    # build model
    model = NERModel(config)
    model.build("fine_tuning")
    model.restore_session(config.dir_model)

    # create dataset
    if len(sys.argv) == 2:
        if sys.argv[1] == 'test':
            test = CoNLLDataset(config.filename_test, config.processing_word,
                                config.processing_tag, config.max_iter)
        elif sys.argv[1] == 'dev':
            test = CoNLLDataset(config.filename_dev, config.processing_word,
                                config.processing_tag, config.max_iter)
    else:
        assert len(sys.argv) == 1
        test = CoNLLDataset(config.filename_test, config.processing_word,
                            config.processing_tag, config.max_iter)

    # evaluate and interact
    model.evaluate(test)
コード例 #10
0
def main():
    # Предсказания моделью первого уровня #
    config_first = Config(dir_output='./results/train_first/')
    model = NERModel(config_first)
    model.build()
    model.restore_session(config_first.dir_model)
    test = CoNLLDataset(config_first.filename_test,
                        config_first.processing_word,
                        config_first.processing_tag, config_first.max_iter)

    print()
    print('Predicting first stage!')
    model.evaluate(test)
    print()

    test_predictions = model.predict_test(test)
    formatted_predictions = format_predictions(test_predictions, 'test',
                                               config_first)

    # Предсказания моделью второго уровня #
    tf.reset_default_graph()
    config_second = Config(dir_output='./results/train_second/')
    model = NERModel2(config_second)
    model.build()
    model.restore_session(config_second.dir_model)

    print()
    print('Predicting second stage!')
    model.evaluate(formatted_predictions)
    print()
コード例 #11
0
def main():
    config = Config('./results/train_folds/')
    train_predictions_file = './data/predictions/formatted_train_predictions.npy'

    kf = KFold(n_splits=5)

    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)

    train = np.array([el for el in train])
    predictions = [0 for _ in train]

    for train_ids, evaluate_ids in kf.split(train):
        train_dataset = train[train_ids]
        evaluate_dataset = train[evaluate_ids]
        tf.reset_default_graph()
        config = Config('./results/train_folds/')
        model = NERModel(config)
        model.build()
        model.train(train_dataset, evaluate_dataset)
        for id, tags in zip(evaluate_ids,
                            model.predict_test(evaluate_dataset)):
            predictions[id] = tags
        model.close_session()

    predictions = np.array(predictions)
    formatted_predictions = format_predictions(predictions, 'train', config)
    np.save(train_predictions_file, formatted_predictions)
コード例 #12
0
def compute_confuse_matrix(fname, classes):
    """
    Give a file, compute confuse matrix of y_true and y_pred.
    """
    print('im in')
    y_true = []
    with codecs.open(fname, 'r', 'utf8') as f:
        for line in f:
            line = line.strip().split('\t')[-1]
            y_true.append(line)

    checkpoint_dir = "output/self_attention/multi_attention_0802/"
    pred_path = "tmp/eval_y_self_attention.txt"
    if os.path.exists(checkpoint_dir + 'config.pkl'):
        config = pickle.load(open(checkpoint_dir + 'config.pkl', 'rb'))
    else:
        config = Config()

    config.mode = 'inference'

    word2id, id2word = read_vocab(config.word_vocab_file)
    tag2id, id2tag = read_vocab(config.tag_vocab_file)

    with tf.Session(config=get_config_proto(
            log_device_placement=False)) as sess:
        model = get_model(config.model, config, sess)
        model.build()
        model.restore_model(checkpoint_dir)
        y_pred = infer_file(model, word2id, id2tag, fname, pred_path)

    cmatrix = confusion_matrix(y_true, y_pred, classes)
    print(cmatrix)
    correct = [x == y for x, y in list(zip(y_true, y_pred))]
    print(correct.count(True) / len(correct))
    return cmatrix
コード例 #13
0
ファイル: settings.py プロジェクト: soleren/mynotes
    def apply_changes(self, *args):
        try:
            image_quality = int(args[0])
            if image_quality < 0:
                image_quality = 0
            if image_quality > 100:
                image_quality = 100
            Config.data['quality'] = image_quality
        except:
            PopupMsg().show(Text.data['invalid_quality'])

        if self.isValidIPAddress(args[2]):
            Config.data['server_address'] = args[2]
        else:
            PopupMsg().show(Text.data['invalid_server_address'])

        try:
            port = int(args[3])
            if port < 0:
                port = 0
            if port > 65535:
                port = 65535
            Config.data['server_port'] = port
        except:
            PopupMsg().show(Text.data['invalid_зщке'])

        Config.data['server_mode'] = args[1]
        Config.set_config()
コード例 #14
0
 def initialize(self):
     Data.initialize()
     Config.initialize()
     self.Delete = None
     self.Search = None
     self.Plaster = None
     self.Config = None
     self.currentMenu = None
コード例 #15
0
ファイル: optim.py プロジェクト: mrvoh/LASERWordEmbedder
    def __init__(self,
                 optimization_folder,
                 nr_iterations,
                 iteration_chunck_size,
                 nr_init_points,
                 embedder='LASEREmbedderI',
                 log_file_name='logs.json',
                 load_log=False,
                 prev_log='log.json',
                 probe=True):

        # Set static variables
        self.INTERMEDIATE_RESULTS_FOLDER = optimization_folder
        self.FINAL_RESULTS_FOLDER = optimization_folder
        self.NR_ITERATIONS = nr_iterations
        self.ITERATION_CHUNCK_SIZE = iteration_chunck_size
        self.NR_INIT_POINTS = nr_init_points
        self.EMBEDDER = embedder
        self.probe = probe
        self.config = Config()
        self.log_file = os.path.join(optimization_folder, log_file_name)
        self.prev_log = os.path.join(optimization_folder, prev_log)
        self.logger = JSONLogger(path=self.log_file)
        self.load_log = load_log
        # Boundaries between which to explore the input space
        self.param_boundaries = {
            'dropout_before_laser': (0., 0.5),
            'dropout_in_laser': (0., 0.5),
            'transformer_drop': (0., 0.5),
            'dropout': (0., 0.5),
            'hidden_size_lstm': (50, 350),
            'weight_decay': (0., 0.1),
            'learning_rate_warmup_steps': (1., 10.0),
            'num_heads': (0.5, 4.49),
            'filter_size': (3.5, 350)
        }
        # Set points on which to evaluate the model for exploration of the solution space
        self.explore_points = {
            'dropout_before_laser': [0.1],
            'dropout_in_laser': [0.25],
            'transformer_drop': [0.0],
            'dropout': [0.0],
            'hidden_size_lstm': [350],
            'weight_decay': [0.01],
            'learning_rate_warmup_steps': [2.],
            'num_heads': [4.],
            'filter_size': [350.]
        }

        self.bo = None  # initialize variable for further error handling

        assert len(
            np.unique([len(n) for n in self.explore_points.values()])
        ) == 1, 'number of explore points should be the same for all parameters'
        self.NUM_EXPLORE_POINTS = np.unique(
            [len(n) for n in self.explore_points.values()])[0]
コード例 #16
0
 def __init__(self):
     config = Config()
     self.__db_path = config.get_db_path() + r'\RMsystem.db'
     self.__conn = None
     self.cursor = None
     self.message_box = Messagebox()
     if self.__db_path:
         self.__open(self.__db_path)
     else:
         self.message_box.window_execution('Database path is missing!',
                                           MessageBoxType.ERROR)
コード例 #17
0
def main():
    """Procedure to build data

    You MUST RUN this procedure. It iterates over the whole dataset (train,
    dev and test) and extract the vocabularies in terms of words, tags, and
    characters. Having built the vocabularies it writes them in a file. The
    writing of vocabulary in a file assigns an id (the line #) to each word.
    It then extract the relevant GloVe vectors and stores them in a np array
    such that the i-th entry corresponds to the i-th word in the vocabulary.


    Args:
        config: (instance of Config) has attributes like hyper-params...

    """
    # get config and processing of words
    config = Config(load=False)
    if config.task == 'pos':
        print("USING POS")
        config.filename_train = "data/train.pos"  # test
        config.filename_dev = "data/dev.pos"
        config.filename_test = "data/test.pos"
    else:
        print("USING NER")
    processing_word = get_processing_word(lowercase=True)

    # Generators
    dev = CoNLLDataset(config.filename_dev, processing_word)
    test = CoNLLDataset(config.filename_test, processing_word)
    train = CoNLLDataset(config.filename_train, processing_word)

    # Build Word and Tag vocab
    vocab_words, vocab_tags = get_vocabs([train, dev, test])
    vocab_glove = get_glove_vocab(config.filename_glove)

    vocab = vocab_words & vocab_glove
    vocab.add(UNK)
    vocab.add(NUM)

    # Save vocab
    write_vocab(vocab, config.filename_words)
    write_vocab(vocab_tags, config.filename_tags)

    # Trim GloVe Vectors
    vocab = load_vocab(config.filename_words)
    export_trimmed_glove_vectors(vocab, config.filename_glove,
                                 config.filename_trimmed, config.dim_word)

    # Build and save char vocab
    train = CoNLLDataset(config.filename_train)
    vocab_chars = get_char_vocab(train)
    write_vocab(vocab_chars, config.filename_chars)
コード例 #18
0
def setup(argv):
    config = Config()
    payment = Payment()
    payment.clean()
    sections = config.get_sections()
    for section in sections:
        section_config = config.get_section_config(section)
        if section.startswith('Vietcombank') \
                or section.startswith('Msb') \
                or section.startswith('Techcombank') \
                or section.startswith('Klikbca'):
            payment.set_name(section)
            payment.set_username(section_config['username'])
            payment.save()
コード例 #19
0
 def __init__(self, payment, session=None, proxy={}):
     self.session = session
     self.proxy = proxy
     self.payment = payment
     self.config = Config()
     self.log = Log()
     techcombank = self.get_techcombank_config()
     self.email_transport = EmailTransport()
     self.login_url = techcombank['login_url']
     self.username = payment.get_username()
     self.password = payment.get_password()
     self.debug_mode = techcombank['debug_mode']
     self.total_transactions = 0
     self.history = History()
     self.code = GenerateCode()
コード例 #20
0
ファイル: test.py プロジェクト: yaojieliu/ECCV20-STDN
def main(argv=None): 
  # Configurations
  config = Config(gpu='1',
                  root_dir='./data/test/',
                  root_dir_val=None,
                  mode='testing')
  config.BATCH_SIZE = 1

  # Get images and labels.
  dataset_test = Dataset(config, 'test')

  # Train
  _M, _s, _b, _C, _T, _imname = _step(config, dataset_test, False)

  # Add ops to save and restore all the variables.
  saver = tf.train.Saver(max_to_keep=50,)
  with tf.Session(config=config.GPU_CONFIG) as sess:
    # Restore the model
    ckpt = tf.train.get_checkpoint_state(config.LOG_DIR)
    if ckpt and ckpt.model_checkpoint_path:
      saver.restore(sess, ckpt.model_checkpoint_path)
      last_epoch = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
      print('**********************************************************')
      print('Restore from Epoch '+str(last_epoch))
      print('**********************************************************')
    else:
      init = tf.initializers.global_variables()
      last_epoch = 0
      sess.run(init)
      print('**********************************************************')
      print('Train from scratch.')
      print('**********************************************************')

    step_per_epoch = int(len(dataset_test.name_list) / config.BATCH_SIZE)
    with open(config.LOG_DIR + '/test/score.txt', 'w') as f:
      for step in range(step_per_epoch):
        M, s, b, C, T, imname = sess.run([_M, _s, _b, _C, _T, _imname])
        # save the score
        for i in range(config.BATCH_SIZE):
            _name = imname[i].decode('UTF-8')
            _line = _name + ',' + str("{0:.3f}".format(M[i])) + ','\
                                + str("{0:.3f}".format(s[i])) + ','\
                                + str("{0:.3f}".format(b[i])) + ','\
                                + str("{0:.3f}".format(C[i])) + ','\
                                + str("{0:.3f}".format(T[i]))
            f.write(_line + '\n')  
            print(str(step+1)+'/'+str(step_per_epoch)+':'+_line, end='\r')  
    print("\n")
コード例 #21
0
ファイル: train.py プロジェクト: lixusheng1/bacterial_NER
def main():
    # create instance of config
    config = Config()
    # create datasets
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                       config.processing_tag, config.processing_pos,
                       config.processing_chunk, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.processing_pos,
                         config.processing_chunk, config.max_iter)
    test = CoNLLDataset(config.filename_test, config.processing_word,
                        config.processing_tag, config.processing_pos,
                        config.processing_chunk, config.max_iter)
    max_sequence_length = max(max([len(seq[0]) for seq in train]),
                              max([len(seq[0]) for seq in dev]),
                              max([len(seq[0]) for seq in test]))

    max_word_length = max(
        max([len(word[0]) for seq in train for word in seq[0]]),
        max([len(word[0]) for seq in test for word in seq[0]]),
        max([len(word[0]) for seq in dev for word in seq[0]]))
    print(max_word_length, max_sequence_length)
    model = NERModel(config, max_word_length, max_sequence_length)
    model.build()
    model.train(train, dev)
    model.restore_session(config.dir_model)
    model.evaluate(test)
コード例 #22
0
def get_model_api():
    """Returns lambda function for api"""

    # 1. initialize model once and for all
    config = Config()
    model = NERModel(config)
    model.build()
    model.restore_session("results/crf/model.weights/")

    def model_api(input_data):
        """
        Args:
            input_data: submitted to the API, raw string

        Returns:
            output_data: after some transformation, to be
                returned to the API

        """
        # 2. process input
        punc = [",", "?", ".", ":", ";", "!", "(", ")", "[", "]"]
        s = "".join(c for c in input_data if c not in punc)
        words_raw = s.strip().split(" ")

        # 3. call model predict function
        preds = model.predict(words_raw)

        # 4. process the output
        output_data = align_data({"input": words_raw, "output": preds})

        # 5. return the output for the api
        return output_data

    return model_api
コード例 #23
0
def main():
    # create instance of config
    config = Config()
    if config.use_elmo: config.processing_word = None

    #build model
    model = NERModel(config)

    # create datasets
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                         config.processing_tag, config.max_iter, config.use_crf)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter, config.use_crf)

    learn = NERLearner(config, model)
    learn.fit(train, dev)
コード例 #24
0
 def __init__(self):
     self.config = Config()
     self.config.dim_word = 250
     self.config.dim_char = 50
     self.model = NERModel(self.config)
     self.model.build()
     self.model.restore_session(self.MODEL_DIR)
コード例 #25
0
    def write(md5, data):
        save_path = Config.get("data_save_path")
        if not os.path.exists(save_path):
            os.makedirs(save_path)

        with open(os.path.join(save_path, md5), 'wb') as file:
            file.write(data)
コード例 #26
0
def main():
    # create instance of config,这里的config实现了load data的作用
    #拥有词表、glove训练好的embeddings矩阵、str->id的function
    config = Config()

    # build model
    model = NERModel(config)
    model.build("train")

    # model.restore_session("results/crf/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    # create datasets [(char_ids), word_id]
    # processing_word = get_processing_word(lowercase=True)
    dev = CoNLLDataset(config.filename_dev)
    train = CoNLLDataset(config.filename_train)
    test = CoNLLDataset(config.filename_test)

    train4cl = CoNLLdata4classifier(train,
                                    processing_word=config.processing_word,
                                    processing_tag=config.processing_tag)
    dev4cl = CoNLLdata4classifier(dev,
                                  processing_word=config.processing_word,
                                  processing_tag=config.processing_tag)
    test4cl = CoNLLdata4classifier(test,
                                   processing_word=config.processing_word,
                                   processing_tag=config.processing_tag)

    # train model
    model.train(train4cl, dev4cl, test4cl)
コード例 #27
0
def main():

    # get config and processing of words
    config = Config(load=False)
    processing_word = get_processing_word(lowercase=True)

    # Generators
    dev = CoNLLDataset(config.filename_dev, processing_word)
    test = CoNLLDataset(config.filename_test, processing_word)
    train = CoNLLDataset(config.filename_train, processing_word)

    # Build Word and Tag vocab
    vocab_words, vocab_tags = get_vocabs([train, dev, test])
    vocab_glove = get_glove_vocab(config.filename_glove)

    vocab = vocab_words & vocab_glove
    vocab.add(UNK)
    vocab.add(NUM)

    # Save vocab
    write_vocab(vocab, config.filename_words)
    write_vocab(vocab_tags, config.filename_tags)

    # Trim GloVe Vectors
    vocab = load_vocab(config.filename_words)
    export_trimmed_glove_vectors(vocab, config.filename_glove,
                                 config.filename_trimmed, config.dim_word)

    # Build and save char vocab
    train = CoNLLDataset(config.filename_train)
    vocab_chars = get_char_vocab(train)
    write_vocab(vocab_chars, config.filename_chars)
コード例 #28
0
def main():
    # create instance of config
    config_file = sys.argv[1]

    config = Config(config_file)

    print("dir model : ", config.dir_model)

    # build model
    model = NERModel(config)
    model.build()
    model.restore_session(config.dir_model)
    # model.reinitialize_weights("words")
    # model.reinitialize_weights("chars")
    # model.reinitialize_weights("train_step")

    # Evaluate on another data set
    if len(sys.argv) > 2:
        test_file_name = sys.argv[2]
        test = CoNLLDataset(test_file_name, config.processing_word,
                            config.processing_tag, config.max_iter)
        print("Testing on ", test_file_name, "..")


    # create dataset
    else:
        test = CoNLLDataset(config.filename_test, config.processing_word,
                            config.processing_tag, config.max_iter)
        print("Testing on ", config.filename_test, "..")

        # evaluate and interact
    # model.predict_test(test, output=sys.stdout)
    model.evaluate(test)
コード例 #29
0
def main():
    # create instance of config
    config = Config(parser)

    # build model
    model = HANNModel(config)
    model.build()
    if config.restore:
        model.restore_session(
            "results/test/model.weights/")  # optional, restore weights

    # create datasets
    dev = Dataset(config.filename_dev, config.processing_word,
                  config.processing_tag, config.max_iter)
    train = Dataset(config.filename_train, config.processing_word,
                    config.processing_tag, config.max_iter)
    test = Dataset(config.filename_test, config.processing_word,
                   config.processing_tag, config.max_iter)

    # train model
    model.train(train, dev)

    # evaluate model
    model.restore_session(config.dir_model)
    metrics = model.evaluate(test)

    with open(os.path.join(config.dir_output, 'test_results.txt'),
              'a') as file:
        file.write('{}\n'.format(metrics['classification-report']))
        file.write('{}\n'.format(metrics['confusion-matrix']))
        file.write('{}\n\n'.format(metrics['weighted-f1']))
コード例 #30
0
def train():
    # 1.init config and model
    config=Config()
    threshold=(config.sequence_length/2)+1
    model = BertCNNModel(config)
    gpu_config = tf.ConfigProto()
    gpu_config.gpu_options.allow_growth = True
    saver = tf.train.Saver()
    save_path = config.ckpt_dir + "model.ckpt"
    #if not os.path.exists(config.ckpt_dir):
    #    os.makedirs(config.ckpt_dir)
    batch_size = 8
    with tf.Session(config=gpu_config) as sess:
        sess.run(tf.global_variables_initializer())
        if os.path.exists(config.ckpt_dir): #
            saver.restore(sess, tf.train.latest_checkpoint(save_path))
        for i in range(10000):
            # 2.feed data
            input_x = np.random.randn(config.batch_size, config.sequence_length)  # [None, self.sequence_length]
            input_x[input_x >= 0] = 1
            input_x[input_x < 0] = 0
            input_y = generate_label(input_x,threshold)
            p_mask_lm=[i for i in range(batch_size)]
            # 3.run session to train the model, print some logs.
            loss, _ = sess.run([model.loss_val,  model.train_op],feed_dict={model.x_mask_lm: input_x, model.y_mask_lm: input_y,model.p_mask_lm:p_mask_lm,
                                                                            model.dropout_keep_prob: config.dropout_keep_prob})
            print(i, "loss:", loss, "-------------------------------------------------------")
            if i==300:
                print("label[0]:", input_y[0]);print("input_x:",input_x)
            if i % 500 == 0:
                saver.save(sess, save_path, global_step=i)
コード例 #31
0
ファイル: wxutil.py プロジェクト: zy4290/qilaihi.club
def send_custom_msg(msg, reply):
    custom_text = copy.deepcopy(wxconfig.custom_text_template)
    custom_text['touser'] = msg.fromusername
    custom_text['text']['content'] = reply
    logging.debug(custom_text)

    config = yield dbutil.do(Config.select().get)
    url = wxconfig.custom_msg_url.format(config.accesstoken)
    logging.debug(url)

    logging.debug(json.dumps(custom_text, ensure_ascii=False, indent=4))
    http_client = AsyncHTTPClient()
    response = yield http_client.fetch(url, **{'method': 'POST',
                                               'body': json.dumps(custom_text, ensure_ascii=False)})
    logging.debug(response.body.decode())
コード例 #32
0
ファイル: wxutil.py プロジェクト: zy4290/qilaihi.club
def refresh_access_token():
    logging.info('开始刷新微信access token')
    try:
        config = yield dbutil.do(Config.select().get)
        http_client = AsyncHTTPClient()
        logging.debug(wxconfig.access_token_url.format(config.appid, config.appsecret))
        response = yield http_client.fetch(
            wxconfig.access_token_url.format(config.appid, config.appsecret))
        logging.info(response.body.decode())
        result = json.loads(response.body.decode())
        config.accesstoken = result['access_token']
        config.jsapiticket = yield refresh_jsapi_ticket(config.accesstoken)
        config.expires = result['expires_in']
        logging.debug(config.accesstoken)
        logging.debug(config.jsapiticket)
        logging.debug(config.expires)
        yield dbutil.do(config.save)
    except Exception:
        pass
コード例 #33
0
    def encAES(self, m):
        if self.dek is None:
            raise Exception('DEK is null, cannot encrypt')

        a = AuthenticatedCryptoAbstraction(bytes(self.dek, "utf-8"))
        CT_AES = a.encrypt(m)
        groupObj = PairingGroup('SS512')

        return objectToBytes(CT_AES, groupObj)

    def loop_end(self):
        self._client.loop_stop();
        self._client.disconnect();
            
                                
if __name__ == '__main__':
    import sys, os

    sys.path.append(os.path.join(os.path.dirname(__file__), '../'))
    from model.config import Config

    groupCfg = Config('../conf/gc_config.ini')
    cfg = groupCfg.getConfig("mqtt_client")
    cfg['topic'] = "test/topic"
    pub = Publisher(cfg)
    pub.loop();
    pub.publish("dude");
    pub.loop_end()

コード例 #34
0
ファイル: sub.py プロジェクト: SNET-Entrance/gc_for_pubsub
        
        
        a = AuthenticatedCryptoAbstraction(bytes(self.dek, "utf-8"))
        #CT_AES = a.encrypt(message)
        return a.decrypt(CT)

    def on_log(self, client, userdata, level, buf):
        print("log: " + str(level) + ": " + str(buf));

if __name__ == '__main__':
    import sys, os
    
    sys.path.append(os.path.join(os.path.dirname(__file__), '../'))    
    from model.config import Config
    
    groupCfg = Config('../conf/gc_config.ini')
    
    sub = Subscriber(groupCfg.getConfig("mqtt_client"))
    sub.loop()

#    client = mqtt.Client()
#    client.on_connect = on_connect
#    client.on_message = on_message
#    client.on_log = on_log
    
    #ssh -N al@me -L 1883/localhost/1883
#    client.connect(cfg['host'], int(cfg['port']), int(cfg['keepalive']))
    #client.connect("localhost", 1883, 360)
    
    # Blocking call that processes network traffic, dispatches callbacks and
    # handles reconnecting.