Esempio n. 1
0
 def test(self, test_file, model_file):
     self.loadModel(model_file)
     reader = Reader()
     testInsts = reader.readInstances(test_file, self.hyperParams.maxInstance)
     testExamples = self.instance2Example(testInsts)
     for idx in range(len(testExamples)):
         self.predict(testExamples[idx])
Esempio n. 2
0
    def train(self, train_file, dev_file, test_file):
        self.hyperParams.show()
        torch.set_num_threads(self.hyperParams.thread)
        reader = Reader(self.hyperParams.maxInstance)

        trainInsts = reader.readInstances(train_file)
        devInsts = reader.readInstances(dev_file)
        testInsts = reader.readInstances(test_file)
        print("Training Instance: ", len(trainInsts))
        print("Dev Instance: ", len(devInsts))
        print("Test Instance: ", len(testInsts))

        self.createAlphabet(trainInsts, devInsts, testInsts)

        trainExamples = self.instance2Example(trainInsts)
        devExamples = self.instance2Example(devInsts)
        testExamples = self.instance2Example(testInsts)

        self.model = RNNLabeler(self.hyperParams)
        parameters = filter(lambda p: p.requires_grad, self.model.parameters())
        optimizer = torch.optim.Adagrad(parameters,
                                        lr=self.hyperParams.learningRate)

        indexes = []
        for idx in range(len(trainExamples)):
            indexes.append(idx)
        for iter in range(self.hyperParams.maxIter):
            print('###Iteration' + str(iter) + "###")
            random.shuffle(indexes)
            for idx in range(len(trainExamples)):
                self.model.zero_grad()
                self.model.LSTMHidden = self.model.init_hidden()
                exam = trainExamples[indexes[idx]]
                lstm_feats = self.model(exam.feat)
                loss = self.model.crf.neg_log_likelihood(
                    lstm_feats, exam.labelIndexs)
                loss.backward()
                optimizer.step()
                if (idx + 1) % self.hyperParams.verboseIter == 0:
                    print('current: ', idx + 1, ", cost:", loss.data[0])

            eval_dev = Eval()
            for idx in range(len(devExamples)):
                predictLabels = self.predict(devExamples[idx])
                devInsts[idx].evalPRF(predictLabels, eval_dev)
            print('Dev: ', end="")
            eval_dev.getFscore()

            eval_test = Eval()
            for idx in range(len(testExamples)):
                predictLabels = self.predict(testExamples[idx])
                testInsts[idx].evalPRF(predictLabels, eval_test)
            print('Test: ', end="")
            eval_test.getFscore()
Esempio n. 3
0
def main():
    try:
        env = {}
        for lib in ['stdlib.lisp', 'bq.lisp']:
            with open(lib) as f:
                try:
                    env.update(load(f, lib, env))
                except (LoadError, LoadWarning) as e:
                    print_exception(e)
                    exit(2)

        while True:
            try:
                expr = raw_input('* ')
            except (KeyboardInterrupt, EOFError):
                print
                break
            if not expr:
                continue
            try:
                form = Reader(expr, '<string>').read()
                result = eval_form(form, env)
            except ReadError as e:
                print_exception(e)
                continue
            if isinstance(result, Error):
                print_error(result)
            print pprint(result)

            env[Symbol('_')] = result
    except KeyboardInterrupt:
        print
Esempio n. 4
0
 def getAEOcount(self,path):
     a = 0
     e = 0
     o = 0
     reader = Reader()
     Inst = reader.readfiles(path)
     for i in Inst:
         for j in range(len(i.labels)):   
            if len(i.labels[j]) == 3:
                 if i.labels[j][2] == 'A':
                     a = a+1
                 elif i.labels[j][2] == 'E':
                     e = e+1
            elif i.labels[j] == 'O':
                 o += 1
     return a,e,o
Esempio n. 5
0
 def __init__(self, hyperParams):
     super(Encoder, self).__init__()
     self.hyperParams = hyperParams
     if hyperParams.wordEmbFile == "":
         self.wordEmb = nn.Embedding(hyperParams.postWordNum,
                                     hyperParams.wordEmbSize)
         self.wordDim = hyperParams.wordEmbSize
     else:
         reader = Reader()
         self.wordEmb, self.wordDim = reader.load_pretrain(
             hyperParams.wordEmbFile, hyperParams.postWordAlpha,
             hyperParams.unk)
     self.wordEmb.weight.requires_grad = hyperParams.wordFineTune
     self.dropOut = nn.Dropout(hyperParams.dropProb)
     self.gru = nn.GRU(input_size=self.wordDim,
                       hidden_size=hyperParams.rnnHiddenSize,
                       batch_first=True,
                       dropout=hyperParams.dropProb)
Esempio n. 6
0
    def __init__(self, hyperParams):
        super(Decoder, self).__init__()

        reader = Reader()
        self.wordEmb, self.wordDim = reader.load_pretrain(
            hyperParams.wordEmbFile, hyperParams.wordAlpha, hyperParams.unk)

        self.wordEmb.weight.requires_grad = hyperParams.wordFineTune
        self.dropOut = torch.nn.Dropout(hyperParams.dropProb)
        self.lastWords = []
        self.hyperParams = hyperParams
        #self.linearLayer = nn.Linear(hyperParams.rnnHiddenSize * 2, hyperParams.labelSize)

        self.linearLayer = nn.Linear(
            hyperParams.rnnHiddenSize * 2 + self.wordDim,
            hyperParams.labelSize)

        self.softmax = nn.LogSoftmax()
Esempio n. 7
0
    def train(self, train_file, dev_file, test_file):
        self.hyperParams.show()
        torch.set_num_threads(self.hyperParams.thread)
        reader = Reader(self.hyperParams.maxInstance)

        trainInsts = reader.readInstances(train_file)
        devInsts = reader.readInstances(dev_file)

        trainExamples = self.instance2Example(trainInsts)
        devExamples = self.instance2Example(devInsts)

        print("Training Instance: ", len(trainInsts))
        print("Dev Instance: ", len(devInsts))

        self.createAlphabet(trainInsts)

        self.model = RNNLabeler(self.hyperParams)
        optimizer = torch.optim.Adagrad(self.model.parameters(),
                                        lr=self.hyperParams.learningRate)

        indexes = []
        for idx in range(len(trainExamples)):
            indexes.append(idx)

        for iter in range(self.hyperParams.maxIter):
            print('###Iteration' + str(iter) + "###")
            random.shuffle(indexes)
            for idx in range(len(trainExamples)):
                self.model.zero_grad()
                self.model.LSTMHidden = self.model.init_hidden()
                exam = trainExamples[indexes[idx]]
                tag_scores = self.model(exam.feat)
                loss = torch.nn.functional.cross_entropy(
                    tag_scores, exam.labelIndexs)
                loss.backward()
                optimizer.step()
                if (idx + 1) % self.hyperParams.verboseIter == 0:
                    print('current: ', idx + 1, ", cost:", loss.data[0])

            eval_dev = Eval()
            for idx in range(len(devExamples)):
                predictLabels = self.predict(devExamples[idx])
                devInsts[idx].evalPRF(predictLabels, eval_dev)
            eval_dev.getFscore()
Esempio n. 8
0
def run_tests(f, filename, env):
    passed = 0
    failed = 0
    error = 0

    reader = Reader(f, filename)
    while True:
        form = reader.read()

        if form == None:
            break

        result = eval_form(form, env)
        if result == False:
            failed += 1
            print 'Test failed:'
            display_form(form, False)
        elif result == True:
            passed += 1
            print 'Test passed.'
        elif isinstance(result, Error):
            error += 1

            try:
                desc = assoc(result.attrs, Symbol(':msg'))
                print 'Test error ({}): {}'.format(result.type.name, desc)
            except (KeyError, InvalidAssocList) as e:
                print 'Test error ({}).'.format(result.type.name)

            try:
                form = assoc(result, Symbol(':form'))
                display_form(form, False)
            except (KeyError, InvalidAssocList):
                print 'No location information for the error.'
        else:
            error += 1
            print 'Test cases must return either #t or #f; got {}.'.format(
                result)
            display_form(form, False)

    return passed, failed, error
Esempio n. 9
0
 def prediction(self, path_test, path_train, path_bestModel):
     self.hyperpara = Hyperparameter()
     eval_test = Eval()
     ner = NER()
     reader = Reader()
     traininsts = reader.readfiles(path_train)
     testinsts = reader.readfiles(path_test)
     ner.create_alphabet(traininsts)
     self.hyperpara.tag_size = ner.hyperpara.tag_size
     self.hyperpara.embedding_num = ner.hyperpara.embedding_num
     self.model = BiLstm(self.hyperpara)  # BiLstm模型
     if self.hyperpara.loadModel == 1 and\
        self.hyperpara.load_pattern ==  1:
         try:
             self.model.load_state_dict(torch.load(path_bestModel))
         except Exception:
             print('模型参数不匹配')
         else:
             pass
     elif self.hyperpara.loadModel == 1 and\
          self.hyperpara.load_pattern == 0 :
         try:
             self.model = torch.load(path_bestModel)
         except Exception:
             print('模型参数不匹配')
         else:
             pass
     testExamples = ner.change(testinsts)
     for idx in range(len(testExamples)):
         test_list = []
         test_list.append(testExamples[idx])
         x, y = ner.variable(test_list)
         lstm_feats = self.model(x)
         predict = ner.getMaxIndex(lstm_feats)
         predictLabels = []
         for idy in range(len(predict)):
             predictLabels.append(ner.label_AlphaBet.list[predict[idy]])
         testinsts[idx].evalPRF(predictLabels, eval_test)
         a, e = testinsts[idx].extractA_and_E()
         self.Attr.append(a)
         self.Eval.append(e)
Esempio n. 10
0
    def __init__(self, hyperParams):
        super(Encoder, self).__init__()
        self.hyperParams = hyperParams
        if hyperParams.charEmbFile == "":
            self.charEmb = nn.Embedding(hyperParams.charNUM,
                                        hyperParams.charEmbSize)
            self.charDim = hyperParams.charEmbSize
        else:
            reader = Reader()
            self.charEmb, self.charDim = reader.load_pretrain(
                hyperParams.charEmbFile, hyperParams.charAlpha,
                hyperParams.unk)
        self.charEmb.weight.requires_grad = hyperParams.charFineTune

        if hyperParams.bicharEmbFile == "":
            self.bicharEmb = nn.Embedding(hyperParams.bicharNUM,
                                          hyperParams.bicharEmbSize)
            self.bicharDim = hyperParams.bicharEmbSize
        else:
            reader = Reader()
            self.bicharEmb, self.bicharDim = reader.load_pretrain(
                hyperParams.bicharEmbFile, hyperParams.bicharAlpha,
                hyperParams.unk)
        self.bicharEmb.weight.requires_grad = hyperParams.bicharFineTune

        self.dropOut = nn.Dropout(hyperParams.dropProb)
        self.bilstm = nn.LSTM(input_size=self.charDim + self.bicharDim,
                              hidden_size=hyperParams.rnnHiddenSize,
                              batch_first=True,
                              bidirectional=True,
                              num_layers=2,
                              dropout=hyperParams.dropProb)
Esempio n. 11
0
def load(f, filename, env):
    reader = Reader(f, filename)
    while True:
        form = reader.read()

        if form == None:
            break

        expanded, _ = macro_expand(form, env)

        if type(expanded) != List or len(
                expanded) != 3 or expanded[0] != Symbol('define'):
            raise LoadWarning('Unrecognized top-level form.', expanded)

        if type(expanded[1]) != Symbol:
            raise LoadError('Invalid top-level form.', form)

        val = eval_form(expanded[2], env)
        if isinstance(val, Error):
            print_error(val)
            exit(2)
        env[expanded[1]] = val

    return env
Esempio n. 12
0
def time_since(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (as_minutes(s), as_minutes(rs))


if __name__ == '__main__':
    start_time = time.time()
    print('begin time:', as_minutes(time.time() - start_time))
    config_path = './config.cfg'
    config = Configer(config_path)
    print('reading ', config.train_path)
    train_reader = Reader(config.train_path, max_len=config.max_len)
    print('done, using time:', as_minutes(time.time() - start_time))

    print('reading ', config.test_path)
    test_reader = Reader(config.test_path, max_len=config.max_len)
    print('done, using time:', as_minutes(time.time() - start_time))
    train_texts, train_labels = train_reader.getData()
    test_texts, test_labels = test_reader.getData()
    print('train:', len(train_texts))
    print('test:', len(test_texts))
    # print(train_texts[:10])
    # print(train_labels[:10])

    text_alpha = Alphabet()
    label_alpha = Alphabet()
    text_dic = OrderedDict()
Esempio n. 13
0
    def train(self, train_file, dev_file, test_file, model_file):
        self.hyperParams.show()
        torch.set_num_threads(self.hyperParams.thread)
        reader = Reader()

        trainInsts = reader.readInstances(train_file,
                                          self.hyperParams.maxInstance)
        devInsts = reader.readInstances(dev_file, self.hyperParams.maxInstance)
        testInsts = reader.readInstances(test_file,
                                         self.hyperParams.maxInstance)

        print("Training Instance: ", len(trainInsts))
        print("Dev Instance: ", len(devInsts))
        print("Test Instance: ", len(testInsts))

        self.createAlphabet(trainInsts)

        trainExamples = self.instance2Example(trainInsts)
        devExamples = self.instance2Example(devInsts)
        testExamples = self.instance2Example(testInsts)

        self.encoder = Encoder(self.hyperParams)
        self.decoder = Decoder(self.hyperParams)

        indexes = []
        for idx in range(len(trainExamples)):
            indexes.append(idx)

        encoder_parameters = filter(lambda p: p.requires_grad,
                                    self.encoder.parameters())
        encoder_optimizer = torch.optim.Adam(encoder_parameters,
                                             lr=self.hyperParams.learningRate)

        decoder_parameters = filter(lambda p: p.requires_grad,
                                    self.decoder.parameters())
        decoder_optimizer = torch.optim.Adam(decoder_parameters,
                                             lr=self.hyperParams.learningRate)

        batchBlock = len(trainExamples) // self.hyperParams.batch
        for iter in range(self.hyperParams.maxIter):
            print('###Iteration' + str(iter) + "###")
            random.shuffle(indexes)
            self.encoder.train()
            self.decoder.train()
            for updateIter in range(batchBlock):
                exams = []
                start_pos = updateIter * self.hyperParams.batch
                end_pos = (updateIter + 1) * self.hyperParams.batch
                for idx in range(start_pos, end_pos):
                    exams.append(trainExamples[indexes[idx]])
                postFeats, responseFeats = self.getBatchFeatLabel(exams)
                encoder_optimizer.zero_grad()
                decoder_optimizer.zero_grad()

                encoder_hidden = self.encoder.init_hidden(
                    self.hyperParams.batch)
                encoder_output, encoder_hidden = self.encoder(
                    postFeats, encoder_hidden)
                decoder_hidden = self.decoder.initHidden(
                    self.hyperParams.batch)
                response_len = responseFeats.size()[1]
                last_word = torch.autograd.Variable(
                    torch.LongTensor(1, self.hyperParams.batch))
                for idx in range(self.hyperParams.batch):
                    last_word.data[0][idx] = self.hyperParams.responseStartID
                loss = 0
                for idx in range(response_len):
                    decoder_output, decoder_hidden = self.decoder(
                        encoder_hidden, decoder_hidden, last_word)
                    loss += torch.nn.functional.nll_loss(
                        decoder_output,
                        responseFeats.permute(1, 0)[idx])
                    for idy in range(self.hyperParams.batch):
                        last_word.data[0][idy] = self.getMaxIndex(
                            decoder_output[idy])
                loss.backward()
                print('Current: ', updateIter + 1, ", Cost:", loss.data[0])
                encoder_optimizer.step()
                decoder_optimizer.step()
            if iter + 1 % 10 == 0:
                self.encoder.eval()
                self.decoder.eval()
                print("Save model .....")
                self.saveModel(model_file + str(iter))
                print("Model model ok")
Esempio n. 14
0
from read import Reader
from models import model
from keras.preprocessing.image import ImageDataGenerator
from generator import BSONIterator

r = Reader(full_prep=True)

# need the generator
train_bson_file = open(r.train_bson_path, "rb")

# num_classes = len(train_offsets_df["category_id"].unique())
num_classes = 5270
print("Total number of categories in train {}".format(num_classes))

num_train_images = len(r.train_images_df)
num_val_images = len(r.val_images_df)
batch_size = 16

# Tip: use ImageDataGenerator for data augmentation and preprocessing.
train_datagen = ImageDataGenerator()
train_gen = BSONIterator(train_bson_file,
                         r.train_images_df,
                         r.train_offsets_df,
                         num_classes,
                         train_datagen,
                         batch_size=batch_size,
                         shuffle=True)

val_datagen = ImageDataGenerator()
val_gen = BSONIterator(train_bson_file,
                       r.val_images_df,
Esempio n. 15
0
    def train(self, train_file, dev_file, test_file, model_file):
        self.hyperParams.show()
        torch.set_num_threads(self.hyperParams.thread)
        reader = Reader()

        trainInsts = reader.readInstances(train_file, self.hyperParams.maxInstance)
        devInsts = reader.readInstances(dev_file, self.hyperParams.maxInstance)
        testInsts = reader.readInstances(test_file, self.hyperParams.maxInstance)

        print("Training Instance: ", len(trainInsts))
        print("Dev Instance: ", len(devInsts))
        print("Test Instance: ", len(testInsts))

        self.createAlphabet(trainInsts, devInsts, testInsts)

        trainExamples = self.instance2Example(trainInsts)
        devExamples = self.instance2Example(devInsts)
        testExamples = self.instance2Example(testInsts)

        self.encoder = Encoder(self.hyperParams)
        self.decoder = Decoder(self.hyperParams)

        indexes = []
        for idx in range(len(trainExamples)):
            indexes.append(idx)

        encoder_parameters = filter(lambda p: p.requires_grad, self.encoder.parameters())
        encoder_optimizer = torch.optim.Adam(encoder_parameters, lr = self.hyperParams.learningRate)

        decoder_parameters = filter(lambda p: p.requires_grad, self.decoder.parameters())
        decoder_optimizer = torch.optim.Adam(decoder_parameters, lr = self.hyperParams.learningRate)
        train_num = len(trainExamples)
        batchBlock = train_num // self.hyperParams.batch
        if train_num % self.hyperParams.batch != 0:
            batchBlock += 1
        for iter in range(self.hyperParams.maxIter):
            print('###Iteration' + str(iter) + "###")
            random.shuffle(indexes)
            self.encoder.train()
            self.decoder.train()
            train_eval = Eval()
            for updateIter in range(batchBlock):
                exams = []
                start_pos = updateIter * self.hyperParams.batch
                end_pos = (updateIter + 1) * self.hyperParams.batch
                if end_pos > train_num:
                    end_pos = train_num
                for idx in range(start_pos, end_pos):
                    exams.append(trainExamples[indexes[idx]])
                batchCharFeats, batchBiCharFeats, batchLabel, batch, maxSentSize = self.getBatchFeatLabel(exams)
                encoder_optimizer.zero_grad()
                decoder_optimizer.zero_grad()

                encoderHidden = self.encoder.init_hidden(batch)
                encoderOutput, encoderHidden = self.encoder(batchCharFeats, batchBiCharFeats, encoderHidden)
                loss = 0
                decoderOutput = self.decoder(batch, encoderOutput, exams, bTrain=True)
                for idx in range(batch):
                    exam = exams[idx]
                    for idy in range(exam.size):
                        labelID = getMaxIndex(self.hyperParams, decoderOutput[idx * maxSentSize + idy])
                        if labelID == exam.labelIndexes[idy]:
                            train_eval.correct_num += 1
                        train_eval.gold_num += 1

                loss += torch.nn.functional.nll_loss(decoderOutput, batchLabel)
                loss.backward()
                if (updateIter + 1) % self.hyperParams.verboseIter == 0:
                    print('Current: ', updateIter + 1, ", Cost:", loss.data[0], ", ACC:", train_eval.acc())
                encoder_optimizer.step()
                decoder_optimizer.step()

            self.encoder.eval()
            self.decoder.eval()

            dev_eval = Eval()
            for idx in range(len(devExamples)):
                exam = devExamples[idx]
                predict_labels = self.predict(exam)
                devInsts[idx].evalPRF(predict_labels, dev_eval)
            p, r, f = dev_eval.getFscore()
            print("precision: ", p, ", recall: ", r, ", fscore: ", f)

            test_eval = Eval()
            for idx in range(len(testExamples)):
                exam = testExamples[idx]
                predict_labels = self.predict(exam)
                testInsts[idx].evalPRF(predict_labels, test_eval)
            p, r, f = test_eval.getFscore()
            print("precision: ", p, ", recall: ", r, ", fscore: ", f)
        '''
Esempio n. 16
0
from __future__ import absolute_import, division, print_function
from builtins import super, range, zip, round, map

import logging
from read import Reader
from ditto.store import Store

logger = logging.getLogger(__name__)

m = Store()
reader = Reader()
reader.parse(m, "test_input.csv")
for i in m.models:
    logger.debug(i)

for obj_name in m.model_names:
    logger.debug(obj_name)

for i in m.model_names["load1"].traits():
    # logger.debug(i,type(m.model_names['load1'].traits()[i]))
    class_name = (str(type(
        m.model_names["load1"].traits()[i])).strip("<>'").split(".")[-1])
    if class_name == "List":
        logger.debug(m.model_names["load1"].traits()[i]._trait.klass)
Esempio n. 17
0
 def createReader():
     reader = Reader("reader", fileName, Tidy.queue)
     reader.start()
Esempio n. 18
0
    def train(self, train_file, dev_file, test_file):
        self.hyperParams.show()
        torch.set_num_threads(self.hyperParams.thread)
        reader = Reader()

        trainInsts = reader.readInstances(train_file, self.hyperParams.maxInstance)
        devInsts = reader.readInstances(dev_file, self.hyperParams.maxInstance)
        testInsts = reader.readInstances(test_file, self.hyperParams.maxInstance)

        print("Training Instance: ", len(trainInsts))
        print("Dev Instance: ", len(devInsts))
        print("Test Instance: ", len(testInsts))

        self.createAlphabet(trainInsts, devInsts, testInsts)

        trainExamples = self.instance2Example(trainInsts)
        devExamples = self.instance2Example(devInsts)
        testExamples = self.instance2Example(testInsts)

        self.model = RNNLabeler(self.hyperParams)
        parameters = filter(lambda p: p.requires_grad, self.model.parameters())
        optimizer = torch.optim.Adam(parameters, lr=self.hyperParams.learningRate)

        indexes = []
        for idx in range(len(trainExamples)):
            indexes.append(idx)

        batchBlock = len(trainExamples) // self.hyperParams.batch
        for iter in range(self.hyperParams.maxIter):
            print('###Iteration' + str(iter) + "###")
            random.shuffle(indexes)
            self.model.train()
            for updateIter in range(batchBlock):
                #self.model.zero_grad()
                optimizer.zero_grad()
                exams = []
                start_pos = updateIter * self.hyperParams.batch
                end_pos = (updateIter + 1) * self.hyperParams.batch
                for idx in range(start_pos, end_pos):
                    exams.append(trainExamples[indexes[idx]])
                feats, labels = self.getBatchFeatLabel(exams)
                output = self.model(feats, self.hyperParams.batch)
                loss = torch.nn.functional.cross_entropy(output, labels)
                loss.backward()
                optimizer.step()
                if (updateIter + 1) % self.hyperParams.verboseIter == 0:
                    print('current: ', idx + 1, ", cost:", loss.data[0])

            self.model.eval()
            eval_dev = Eval()
            for idx in range(len(devExamples)):
                predictLabel = self.predict(devExamples[idx])
                devInsts[idx].evalACC(predictLabel, eval_dev)
            print("dev: ", end='')
            eval_dev.getACC()

            eval_test = Eval()
            for idx in range(len(testExamples)):
                predictLabel = self.predict(testExamples[idx])
                testInsts[idx].evalACC(predictLabel, eval_test)
            print("test: ", end='')
            eval_test.getACC()
Esempio n. 19
0
def main():
    import argparse
    parser = argparse.ArgumentParser(
        description='Loads and evaluates parable files and expressions.')
    parser.add_argument('-l',
                        '--load',
                        type=str,
                        dest='load_files',
                        nargs='+',
                        metavar='FILES',
                        help='Load one or more files.')
    parser.add_argument('-t',
                        '--test',
                        type=str,
                        dest='test_files',
                        nargs='+',
                        metavar='FILES',
                        help='run one or more test files.')
    parser.add_argument('-e',
                        '--eval',
                        type=str,
                        dest='eval_expression',
                        metavar='EXPR',
                        help='Evaluate the given expression.')
    parser.add_argument('-m',
                        '--macro-expand',
                        type=str,
                        dest='expand_expression',
                        metavar='EXPR',
                        help='Macro-expand the given expression.')
    args = parser.parse_args()

    count = len(
        list(1 for i in ('eval_expression', 'expand_expression', 'test_files')
             if getattr(args, i) != None))

    if count > 1:
        print 'Only one of -t, -m and -e can be used.'
        exit(1)
    elif count == 0:
        print 'Either -t, -m or -e must be used.'
        exit(1)

    env = {}
    for lib in args.load_files:
        with open(lib) as f:
            try:
                env.update(load(f, lib, env))
            except (LoadError, LoadWarning) as e:
                print_exception(e)
                exit(2)

    if args.eval_expression:
        try:
            form = Reader(args.eval_expression, '<string>').read()
            result = eval_form(form, env)
        except ReadError as e:
            print_exception(e)
            exit(2)
        if isinstance(result, Error):
            print_error(result)
            exit(2)
        print 'Evaluation Result:', pprint(result)
    elif args.expand_expression:
        try:
            form = Reader(args.expand_expression, '<string>').read()
            result = macro_expand(form, env)
        except ReadError as e:
            print_exception(e)
            exit(2)
        if isinstance(result, Error):
            print_error(result)
            exit(2)
        print 'Macro Expansion Result:', result[1], pprint(result[0])
    elif args.test_files:
        passed = failed = error = 0
        for test_file in args.test_files:
            try:
                with open(test_file) as f:
                    p, f, e = run_tests(f, test_file, env)
                    passed += p
                    failed += f
                    error += e
            except ReadError as e:
                print_exception(e)
                exit(2)

        print 'Total tests:', passed + failed + error
        print '   Successful:', passed
        print '   Failed:', failed
        print '   Error:', error

        if failed != 0 or error != 0:
            exit(3)
Esempio n. 20
0
    def train(self, path_train, path_dev, path_test, path_PRF, path_model,
              path_bestModel):
        #读取训练集、测试集、开发集 并 建立字典
        reader = Reader()
        traininsts = reader.readfiles(path_train)
        devinsts = reader.readfiles(path_dev)
        testinsts = reader.readfiles(path_test)
        print('Training Instance:', len(traininsts))
        print('Dev Instance:', len(devinsts))
        print('Test Instance:', len(testinsts))
        self.create_alphabet(traininsts)

        #字符串转成ID
        trainExamples = self.change(traininsts)  # e_train
        devExamples = self.change(devinsts)
        testExamples = self.change(testinsts)

        self.model = BiLstm(self.hyperpara)  # BiLstm模型
        # 加载模型
        if self.hyperpara.loadModel == 1 and\
           self.hyperpara.load_pattern ==  1:
            try:
                self.model.load_state_dict(torch.load(path_bestModel))
            except Exception:
                print('模型参数不匹配')
            else:
                pass
        elif self.hyperpara.loadModel == 1 and\
             self.hyperpara.load_pattern == 0 :
            try:
                self.model = torch.load(path_bestModel)
            except Exception:
                print('模型参数不匹配')
            else:
                pass
        optimizer = torch.optim.Adam(self.model.parameters(),
                                     lr=self.hyperpara.lr)  # 优化器
        total_num = len(trainExamples)
        for epoch in range(1, self.hyperpara.epochs):
            print("————————第{}轮迭代,共{}轮————————".format(epoch,
                                                       self.hyperpara.epochs))
            total = 0
            random.shuffle(trainExamples)  # 随机打乱训练集顺序,能有效提高准确率
            try:
                part = total_num // self.hyperpara.batch
                if total_num % self.hyperpara.batch != 0:
                    part += 1
            except ZeroDivisionError:
                print('batch数为0,除0错误')
            else:

                #开始训练
                self.model.train()
                for idx in range(part):
                    begin = idx * self.hyperpara.batch
                    end = (idx + 1) * self.hyperpara.batch
                    if end > total_num:
                        end = total_num
                    batch_list = []
                    #             batch_list_len = []
                    for idy in range(begin, end):
                        batch_list.append(trainExamples[idy])

    #                  batch_list_len.append(len(trainExamples[idy].wordIndexs))
                    optimizer.zero_grad()
                    x, y = self.variable(batch_list)
                    lstm_feats = self.model(x)
                    loss = F.cross_entropy(lstm_feats, y)
                    total += 1
                    loss.backward()
                    optimizer.step()
                    print('current:', total, ", loss:", loss.data[0])
            #开发集测试效果
            eval_dev = Eval()
            eval_dev_A = Eval()
            eval_dev_E = Eval()
            for idx in range(len(devExamples)):
                dev_list = []
                dev_list.append(devExamples[idx])
                x, y = self.variable(dev_list)
                lstm_feats = self.model(x)
                predict = self.getMaxIndex(lstm_feats)
                predictLabels = []
                for idy in range(len(predict)):
                    predictLabels.append(
                        self.label_AlphaBet.list[predict[idy]])
                gold_ent, predict_ent = devinsts[idx].evalPRF(
                    predictLabels, eval_dev)
                gold_ent_A, gold_ent_E, predict_ent_A, predict_ent_E = devinsts[
                    idx].getAE(gold_ent, predict_ent)
                devinsts[idx].evalAEPRF(gold_ent_A, predict_ent_A, eval_dev_A)
                devinsts[idx].evalAEPRF(gold_ent_E, predict_ent_E, eval_dev_E)
            line = ''
            print('Dev: ', end="")
            d_precision, d_recall, d_fscore = eval_dev.getFscore()
            line = line + str(epoch) + '.dev:\nP:' + (
                '%.2f' % (d_precision * 100)) + ' R:' + (
                    '%.2f' %
                    (d_recall * 100)) + ' F:' + ('%.2f' %
                                                 (d_fscore * 100)) + '\n'
            print("precision:", d_precision * 100, ", recall: ",
                  d_recall * 100, ", fscore:", d_fscore * 100)
            d_precision, d_recall, d_fscore = eval_dev_A.getFscore()
            line = line + 'A_P:' + ('%.2f' % (d_precision * 100)) + ' A_R:' + (
                '%.2f' %
                (d_recall * 100)) + ' A_F:' + ('%.2f' %
                                               (d_fscore * 100)) + '\n'
            print("precision:", d_precision * 100, ", recall: ",
                  d_recall * 100, ", fscore:", d_fscore * 100)
            d_precision, d_recall, d_fscore = eval_dev_E.getFscore()
            line = line + 'E_P:' + ('%.2f' % (d_precision * 100)) + ' E_R:' + (
                '%.2f' %
                (d_recall * 100)) + ' E_F:' + ('%.2f' %
                                               (d_fscore * 100)) + '\n'
            print("precision:", d_precision * 100, ", recall: ",
                  d_recall * 100, ", fscore:", d_fscore * 100)
            #测试集测试效果
            eval_test = Eval()
            eval_test_A = Eval()
            eval_test_E = Eval()
            for idx in range(len(testExamples)):
                test_list = []
                test_list.append(testExamples[idx])
                x, y = self.variable(test_list)
                lstm_feats = self.model(x)
                predict = self.getMaxIndex(lstm_feats)
                predictLabels = []
                for idy in range(len(predict)):
                    predictLabels.append(
                        self.label_AlphaBet.list[predict[idy]])
                gold_ent, predict_ent = testinsts[idx].evalPRF(
                    predictLabels, eval_test)
                gold_ent_A, gold_ent_E, predict_ent_A, predict_ent_E = testinsts[
                    idx].getAE(gold_ent, predict_ent)
                testinsts[idx].evalAEPRF(gold_ent_A, predict_ent_A,
                                         eval_test_A)
                testinsts[idx].evalAEPRF(gold_ent_E, predict_ent_E,
                                         eval_test_E)
            print('Test: ', end="")
            t_precision, t_recall, t_fscore = eval_test.getFscore()
            line = line + 'test:\nP:' + (
                '%.2f' % (t_precision * 100)) + ' R:' + (
                    '%.2f' %
                    (t_recall * 100)) + ' F:' + ('%.2f' %
                                                 (t_fscore * 100)) + '\n'
            print("precision:", t_precision * 100, ", recall: ",
                  t_recall * 100, ", fscore:", t_fscore * 100)
            t_precision, t_recall, t_fscore = eval_test_A.getFscore()
            line = line + 'A_P:' + ('%.2f' % (t_precision * 100)) + ' A_R:' + (
                '%.2f' %
                (t_recall * 100)) + ' A_F:' + ('%.2f' %
                                               (t_fscore * 100)) + '\n'
            print("precision:", t_precision * 100, ", recall: ",
                  t_recall * 100, ", fscore:", t_fscore * 100)
            t_precision, t_recall, t_fscore = eval_test_E.getFscore()
            line = line + 'E_P:' + ('%.2f' % (t_precision * 100)) + ' E_R:' + (
                '%.2f' %
                (t_recall * 100)) + ' E_F:' + ('%.2f' %
                                               (t_fscore * 100)) + '\n'
            print("precision:", t_precision * 100, ", recall: ",
                  t_recall * 100, ", fscore:", t_fscore * 100)
            #保存模型
            if self.hyperpara.save_pattern == 0:
                torch.save(self.model.state_dict(),
                           path_model + str(epoch) + '.pkl')
            elif self.hyperpara.save_pattern == 1:
                torch.save(self.model, path_model + str(epoch) + '.pkl')
            try:
                file = open(path_PRF, 'a+', encoding='utf-8')
            except IOError:
                print('文件读取异常')
            else:
                file.write(line)
                file.close()
Esempio n. 21
0
 def __init__(self, config):
     super(OnlineDistributor,self).__init__(config)
     self._reader = Reader(config)
Esempio n. 22
0
class OnlineDistributor(Distributor):
    '''
    Distributes mails in "real-time".

    There is one public method, update. When it is called, the server is polled. If new mails have
    arrived, they are processed and resent to the members of the list. Afterward, the mails
    are deleted, but only if the resend process finished successfully.

    If the subject is in a special format, instead of resending the
    mail, a DownloadMessage is generated and sent back.
    '''

    def __init__(self, config):
        super(OnlineDistributor,self).__init__(config)
        self._reader = Reader(config)

    def update(self):
        '''
        Update the distribution list. Every new message in the server is processed and resent to the
        members of the list. If the resend is successful the new messages are deleted.
        '''
        logger.debug('update is called')

        try:
            self._reader.connect()
        except Exception as e:
            logger.info('connect failed with the exception: %s', e)
            return False

        ids = self._reader.new_messages()
        for id in ids:
            msg = self._reader.get(id)
            if self._isvalid(msg):
                self._process(msg)
                self._reader.delete(id)
        self._reader.disconnect()
        logger.debug('update is finished')
        return len(ids) != 0

    def _process(self, msg):
        '''
        Redirects to the correct action based on the subject of the
        message.
        '''
        subject = msg['Subject']

        if subject.lower().startswith('get'):
            logger.debug('calling _download_and_send')
            self._download_and_send(subject, msg)
        else:
            logger.debug('calling _resend')
            self._resend(msg)

    def _download_and_send(self, subject, msg):
        '''
        Creates a new DownloadMessage based on the subject and sends
        it back to the sender.

        The format of the subject must be: GET 'url'.
        '''
        id = self._store.archive(msg)
        sender = self._find_sender_email(msg)
        url = self._get_download_url(subject)

        if url is not None:
            logger.info('Downloading message for %s with url %s', sender, url)
            self._sender.send(DownloadMessage(url), sender)
            self._store.mark_as_sent(id)

    def _get_download_url(self, subject):
        '''
        Returns the url to download from the subject of the message,
        or None if no url could be found.
        '''
        subject = subject.lower().strip(' ')
        parts = re.split(r'\s+', subject)
        if len(parts) != 2:
            logger.error('_get_download_url, %s has no valid url', subject)
            return None
        return parts[1]

    def _resend(self, msg):
        '''
        Sends a message to the appropriate members of the list after processing it.
        '''
        self._edit_msg(msg)
        id = self._store.archive(msg)
        sender = self._find_sender_email(msg)
        self._sender.send(msg, *self._mgr.active_members(sender))
        self._store.digest(id, *self._mgr.digest_members(sender))
        self._store.mark_as_sent(id)

    def _edit_msg(self, msg):
        '''
        Processes a message and returns it. The following steps are taken for each part of the
        message that can be interpreted as text:

        - A header and a footer are added, both using the encoding of the payload.
        - The payload has all the email hosts removed.

        The parts are separated with newlines, which depend on whether the message is plain text or
        other (like HTML).
        '''
        header = self._create_header(msg)
        footer = self._create_footer(msg)
        for editable in self._find_actual_text(msg):
            nl = u'\n' if editable.get_content_subtype() == 'plain' else u'<br>'
            editable.set_payload((nl * 2).join([
                        nl.join(header),
                        EMAIL.sub(anonymize_email, editable.get_clean_payload(self._cfg['forbidden_words'])),
                        nl.join(footer)]))

    def _choose_intro(self):
        '''Randomly chooses an introduction text from the configuration.'''
        return random.choice(self._cfg['introductions'])

    def _create_footer(self, msg):
        '''
        Creates a footer for the message, returned as a list of strings. The footer contains the
        name of the list, a randomly chosen quote and the program id.
        '''
        return [FOOTER_DIVIDER,
                self._cfg['real_name'],
                random.choice(self._cfg['quotes']),
                self._powered_by()]

    def _powered_by(self):
        '''
        Returns the program id, which consists of the name, version, and description of this sw.
        '''
        name = self._manifest['name']
        version = self._manifest['version']
        description = self._manifest['description']
        return u'Powered by %s %s, %s' % (name, version, description)