Python PyTorchWrapper Exemples, thinc.extra.wrappers.PyTorchWrapper Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : encoder_decoder.py Projet : giannisdaras/thinc

 def __init__(self, nM=300, nH=6, device='cpu'):
     Model.__init__(self)
     self.y_attn = MultiHeadedAttention(nM=nM, nH=nH)
     self.x_attn = MultiHeadedAttention(nM=nM, nH=nH)
     self.norm = PyTorchWrapper(PytorchLayerNorm(nM, device=device))
     self.ffd = PositionwiseFeedForward(nM, 4 * nM)
     self.layers_ = [self.norm, self.y_attn, self.x_attn, self.ffd]

Exemple #2

0

Afficher le fichier

Fichier : encoder_categorizer.py Projet : giannisdaras/thinc

class Categorizer(Model):
    def __init__(self, nS=12, nM=768, nH=12, nO=2, device='cpu'):
        Model.__init__(self)
        self.nM = nM
        self.nO = nO
        self.nS = nS
        self.enc = clone(EncoderLayer(nM=nM, nH=nH, device=device), nS)
        self.affine = Affine(nI=nM, nO=nM)
        self.softmax = Softmax(nI=nM, nO=nO)
        self.norm = PyTorchWrapper(PytorchLayerNorm(nM=nM, device=device))
        self.slicer = PyTorchWrapper(PytorchSlicer())
        self.device = device
        self.layers_ = [self.enc]

    def begin_update(self, inputs, drop=0.0):
        X0, Xmask = inputs
        (
            X1,
            _,
        ), b_X1 = self.enc.begin_update((X0, Xmask))
        X2, b_X2 = self.norm.begin_update(X1)
        X3, b_X3 = self.slicer.begin_update(X2)
        X4, b_X4 = self.affine.begin_update(X3)
        X5, b_X5 = self.softmax.begin_update(X4)

        def finish_update(dX5, sgd=None):
            dX4 = b_X5(dX5, sgd=sgd)
            dX3 = b_X4(dX4, sgd=sgd)
            dX2 = b_X3(dX3)
            dX1 = b_X2(dX2, sgd=sgd)
            dX0 = b_X1(dX1, sgd=sgd)
            return dX0

        return X5, finish_update

Exemple #3

0

Afficher le fichier

Fichier : encoder_decoder.py Projet : giannisdaras/thinc

class DecoderLayer(Model):
    def __init__(self, nM=300, nH=6, device='cpu'):
        Model.__init__(self)
        self.y_attn = MultiHeadedAttention(nM=nM, nH=nH)
        self.x_attn = MultiHeadedAttention(nM=nM, nH=nH)
        self.norm = PyTorchWrapper(PytorchLayerNorm(nM, device=device))
        self.ffd = PositionwiseFeedForward(nM, 4 * nM)
        self.layers_ = [self.norm, self.y_attn, self.x_attn, self.ffd]

    def begin_update(self, input, drop=0.1):
        Y0, X0, X_mask, Y_mask = input
        Y1, b_Y1 = self.y_attn.begin_update((Y0, Y_mask, None), drop=drop)
        Y2, b_Y2 = self.norm.begin_update(Y1)
        Y3 = Y0 + Y2
        Y4, b_Y4 = self.x_attn.begin_update((Y3, X0, X_mask, None, None),
                                            drop=drop)
        Y5, b_Y5 = self.norm.begin_update(Y4)
        Y6 = Y3 + Y5
        Y7, b_Y7 = self.ffd.begin_update(Y6, drop=drop)

        def finish_update(dI, sgd=None):
            dY7, dX = dI
            dY6 = b_Y7(dY7, sgd=sgd)
            dY5 = dY6
            dY4 = b_Y5(dY5, sgd=sgd)
            dY3, dX0 = b_Y4(dY4, sgd=sgd)
            dY3 += dY6
            dY2 = dY3
            dY1 = b_Y2(dY2, sgd=sgd)
            dY0 = b_Y1(dY1, sgd=sgd)
            dY0 += dY3
            dX0 += dX
            return (dY0, dX0)

        return (Y7, X0, X_mask, Y_mask), finish_update

Exemple #4

0

Afficher le fichier

Fichier : encoder_decoder.py Projet : giannisdaras/thinc

class EncoderLayer(Model):
    def __init__(self, nM=300, nH=6, device='cpu'):
        Model.__init__(self)
        self.attn = MultiHeadedAttention(nM=nM, nH=nH)
        self.ffd = PositionwiseFeedForward(nM, 4 * nM)
        self.norm = PyTorchWrapper(PytorchLayerNorm(nM, device=device))
        self.nM = nM
        self.layers_ = [self.attn, self.ffd, self.norm]

    def begin_update(self, input, drop=0.1):
        X0, mask = input
        X1, b_X1 = self.attn.begin_update((X0, mask, None), drop=drop)
        X2, b_X2 = self.norm.begin_update(X1)
        X3 = X0 + X2

        X4, b_X4 = self.ffd.begin_update(X3, drop=drop)
        X5, b_X5 = self.norm.begin_update(X4)
        X6 = X3 + X5

        def finish_update(dX6, sgd=None):
            dX5 = dX6
            dX4 = b_X5(dX5, sgd=sgd)
            dX3 = b_X4(dX4, sgd=sgd)
            dX3 += dX6

            dX2 = dX3
            dX1 = b_X2(dX2, sgd=sgd)
            dX0 = b_X1(dX1, sgd=sgd)

            dX0 += dX3
            return X0

        return (X6, mask), finish_update

Exemple #5

0

Afficher le fichier

Fichier : encoder_categorizer.py Projet : giannisdaras/thinc

 def __init__(self, nS=12, nM=768, nH=12, nO=2, device='cpu'):
     Model.__init__(self)
     self.nM = nM
     self.nO = nO
     self.nS = nS
     self.enc = clone(EncoderLayer(nM=nM, nH=nH, device=device), nS)
     self.affine = Affine(nI=nM, nO=nM)
     self.softmax = Softmax(nI=nM, nO=nO)
     self.norm = PyTorchWrapper(PytorchLayerNorm(nM=nM, device=device))
     self.slicer = PyTorchWrapper(PytorchSlicer())
     self.device = device
     self.layers_ = [self.enc]

Exemple #6

0

Afficher le fichier

def main(length=1000, nO=32, nI=32):
    pt_model = nn.Linear(nI, nO)
    optimizer = torch.optim.Adam(pt_model.parameters())

    model = PyTorchWrapper(pt_model)

    X = numpy.ones((length, nI), dtype='f')
    y = 1. / X
    for i in range(10):
        yh, get_dX = model.begin_update(X)
        dY = (yh - y) / len(y)
        dX = get_dX(dY)

Exemple #7

0

Afficher le fichier

def test_wrapper(nN=2, nI=3, nO=4):
    if PyTorchWrapper is None:
        return
    model = PyTorchWrapper(torch.nn.Linear(nI, nO))
    sgd = SGD(model.ops, 0.001)
    X = numpy.zeros((nN, nI), dtype="f")
    X += numpy.random.uniform(size=X.size).reshape(X.shape)
    Y = numpy.zeros((nN, nO), dtype="f")
    Yh, get_dX = model.begin_update(X)
    assert Yh.shape == (nN, nO)
    dYh = (Yh - Y) / Yh.shape[0]
    dX = get_dX(dYh, sgd=sgd)
    assert dX.shape == (nN, nI)
    check_learns_zero_output(model, sgd, X, Y)

Exemple #8

0

Afficher le fichier

Fichier : test_pytorch_wrapper.py Projet : gelavizh1/thinc

def test_wrapper(nN=2, nI=3, nO=4):
    if PyTorchWrapper is None:
        return
    model = PyTorchWrapper(torch.nn.Linear(nI, nO))
    sgd = SGD(model.ops, 0.001)
    X = numpy.zeros((nN, nI), dtype='f')
    X += numpy.random.uniform(size=X.size).reshape(X.shape)
    Y = numpy.zeros((nN, nO), dtype='f')
    Yh, get_dX = model.begin_update(X)
    assert Yh.shape == (nN, nO)
    dYh = (Yh-Y) / Yh.shape[0]
    dX = get_dX(dYh, sgd=sgd)
    assert dX.shape == (nN, nI)
    check_learns_zero_output(model, sgd, X, Y)

Exemple #9

0

Afficher le fichier

Fichier : multiheaded_attention.py Projet : giannisdaras/thinc

 def __init__(self, nM=300, nH=6):
     self.nH = nH
     self.nM = nM  # model size: the length of the embeddings
     self.nD = nM // nH
     self.get_queries = with_reshape(Affine(nM, nM))
     self.get_keys = with_reshape(Affine(nM, nM))
     self.get_values = with_reshape(Affine(nM, nM))
     self.get_output = with_reshape(Affine(nM, nM))
     self._layers = [self.get_queries, self.get_keys, self.get_values, self.get_output]
     self._softmax = PyTorchWrapper(nn.Softmax(dim=-1))
     ''' mask conf '''
     i_grad = [1, 0]
     o_xp = None
     b_map = None
     ret_x = [0]
     conf = [i_grad, o_xp, b_map, ret_x]
     self._mask = PyTorchWrapper(PytorchMaskScores(), conf=conf)

Exemple #10

0

Afficher le fichier

Fichier : wrap_pytorch.py Projet : youikim/thinc

def main(length=1000, nO=32, nI=32):
    if CupyOps.xp is not None:
        print("Use GPU")
        Model.ops = CupyOps()
        Model.Ops = CupyOps
        torch.set_default_tensor_type("torch.cuda.FloatTensor")

    pt_model = nn.Linear(nI, nO)
    optimizer = torch.optim.Adam(pt_model.parameters())  # noqa: F841

    model = PyTorchWrapper(pt_model)

    X = Model.ops.xp.ones((length, nI), dtype="f")
    y = 1.0 / X
    for i in range(10):
        yh, get_dX = model.begin_update(X)
        dY = (yh - y) / len(y)
        dX = get_dX(dY)  # noqa: F841

Exemple #11

0

Afficher le fichier

Fichier : wrap_pytorch.py Projet : honnibal/thinc

def main(length=1000, nO=32, nI=32):
    ''' Driver function '''
    if CupyOps.xp != None:
        print("Use GPU")
        Model.ops = CupyOps()
        Model.Ops = CupyOps
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
    else:
        print("GPU not available. Running on CPU.")
    pt_model = nn.Linear(nI, nO)
    optimizer = torch.optim.Adam(pt_model.parameters())  # noqa: F841

    model = PyTorchWrapper(pt_model)

    X = Model.ops.xp.ones((length, nI), dtype="f")
    y = 1.0 / X
    for i in range(10):
        yh, get_dX = model.begin_update(X)
        dY = (yh - y) / len(y)
        dX = get_dX(dY)  # noqa: F841

Exemple #12

0

Afficher le fichier

Fichier : pytorch_mnist_mlp.py Projet : spacy-io/thinc

def main(depth=2, width=512, nb_epoch=30):
    prefer_gpu()
    torch.set_num_threads(1)

    train_data, dev_data, _ = datasets.mnist()
    train_X, train_y = Model.ops.unzip(train_data)
    dev_X, dev_y = Model.ops.unzip(dev_data)

    dev_y = to_categorical(dev_y)
    model = PyTorchWrapper(
        PyTorchFeedForward(
            depth=depth,
            width=width,
            input_size=train_X.shape[1],
            output_size=dev_y.shape[1],
        )
    )
    with model.begin_training(train_X, train_y, L2=1e-6) as (trainer, optimizer):
        epoch_loss = [0.0]

        def report_progress():
            # with model.use_params(optimizer.averages):
            print(epoch_loss[-1], model.evaluate(dev_X, dev_y), trainer.dropout)
            epoch_loss.append(0.0)

        trainer.each_epoch.append(report_progress)
        trainer.nb_epoch = nb_epoch
        trainer.dropout = 0.3
        trainer.batch_size = 128
        trainer.dropout_decay = 0.0
        train_X = model.ops.asarray(train_X, dtype="float32")
        y_onehot = to_categorical(train_y)
        for X, y in trainer.iterate(train_X, y_onehot):
            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            loss = ((yh - y) ** 2.0).sum() / y.shape[0]
            backprop(yh - y, optimizer)
            epoch_loss[-1] += loss
        with model.use_params(optimizer.averages):
            print("Avg dev.: %.3f" % model.evaluate(dev_X, dev_y))
            with open("out.pickle", "wb") as file_:
                pickle.dump(model, file_, -1)

Exemple #13

0

Afficher le fichier

Fichier : encoder_decoder.py Projet : giannisdaras/thinc

 def __init__(self, nS=1, nH=6, nM=300, nTGT=10000, device='cpu'):
     '''
     EncoderDecoder consists of an encoder stack, a decoder stack and an
     output layer which is a linear + softmax.
     Parameters explanation:
         nS: the number of encoders/decoders in the stack
         nH: the number of heads in the multiheaded attention
         nM: the token's embedding size
         nTGT: the number of unique words in output vocabulary
     '''
     Model.__init__(self)
     self.nS = nS
     self.nH = nH
     self.nM = nM
     self.nTGT = nTGT
     self.device = device
     self.enc = Encoder(nM=nM, nH=nH, device=device, nS=nS)
     self.norm = PyTorchWrapper(PytorchLayerNorm(nM=nM, device=device))
     self.dec = clone(DecoderLayer(nM=nM, nH=nH, device=device), nS)
     self.proj = with_reshape(Softmax(nO=nTGT, nI=nM))
     self._layers = [self.enc, self.dec, self.proj]

Exemple #14

0

Afficher le fichier

Fichier : encoder_decoder.py Projet : giannisdaras/thinc

class EncoderDecoder(Model):
    def __init__(self, nS=1, nH=6, nM=300, nTGT=10000, device='cpu'):
        '''
        EncoderDecoder consists of an encoder stack, a decoder stack and an
        output layer which is a linear + softmax.
        Parameters explanation:
            nS: the number of encoders/decoders in the stack
            nH: the number of heads in the multiheaded attention
            nM: the token's embedding size
            nTGT: the number of unique words in output vocabulary
        '''
        Model.__init__(self)
        self.nS = nS
        self.nH = nH
        self.nM = nM
        self.nTGT = nTGT
        self.device = device
        self.enc = Encoder(nM=nM, nH=nH, device=device, nS=nS)
        self.norm = PyTorchWrapper(PytorchLayerNorm(nM=nM, device=device))
        self.dec = clone(DecoderLayer(nM=nM, nH=nH, device=device), nS)
        self.proj = with_reshape(Softmax(nO=nTGT, nI=nM))
        self._layers = [self.enc, self.dec, self.proj]

    def begin_update(self, inputs, drop=0.1):
        '''
        A batch object flows through the network. It contains input, output and
        corresponding masks. Input changes while the object travels through
        the network. Output is the golden output.
        Input: nB x nL x nM
        '''
        X0, Xmask, Y0, Ymask = inputs
        X1, backprop_encode = self.enc.begin_update((X0, Xmask), drop=drop)
        (Y1, _, _, _), backprop_decode = self.dec.begin_update(
            (Y0, X1, Xmask, Ymask), drop=drop)
        Y2, b_Y2 = self.norm.begin_update(Y1)
        word_probs, backprop_output = self.proj.begin_update(Y2, drop=drop)

        def finish_update(d_word_probs, sgd=None):
            dY2 = backprop_output(d_word_probs, sgd=sgd)
            dY1 = b_Y2(dY2, sgd=sgd)
            zeros = Model.ops.xp.zeros(X0.shape, dtype=Model.ops.xp.float32)
            dY0, dX1 = backprop_decode((dY1, zeros), sgd=sgd)
            dX0 = backprop_encode(dX1, sgd=sgd)
            return (dX0, dY0)

        return (word_probs, Xmask), finish_update

Exemple #15

0

Afficher le fichier

Fichier : encoder_decoder.py Projet : giannisdaras/thinc

class Encoder(Model):
    def __init__(self, nM=300, nH=6, nS=6, device='cpu'):
        Model.__init__(self)
        self.stack = clone(EncoderLayer(nM=nM, nH=nH, device=device), nS)
        self.norm = PyTorchWrapper(PytorchLayerNorm(nM=nM, device=device))

    def begin_update(self, input, drop=0.1):
        X0, mask = input
        (X1, _), b_X1 = self.stack.begin_update((X0, mask), drop=0.1)
        X2, b_X2 = self.norm.begin_update(X1)

        def finish_update(dX2, sgd=None):
            dX1 = b_X2(dX2, sgd=sgd)
            dX0 = b_X1(dX1, sgd=sgd)
            return dX0

        return X2, finish_update

Exemple #16

0

Afficher le fichier

Fichier : pytorch_mnist_mlp.py Projet : youikim/thinc

def main(depth=2, width=512, nb_epoch=30):
    prefer_gpu()
    torch.set_num_threads(1)

    train_data, dev_data, _ = datasets.mnist()
    train_X, train_y = Model.ops.unzip(train_data)
    dev_X, dev_y = Model.ops.unzip(dev_data)

    dev_y = to_categorical(dev_y)
    model = PyTorchWrapper(
        PyTorchFeedForward(
            depth=depth,
            width=width,
            input_size=train_X.shape[1],
            output_size=dev_y.shape[1],
        ))
    with model.begin_training(train_X, train_y,
                              L2=1e-6) as (trainer, optimizer):
        epoch_loss = [0.0]

        def report_progress():
            # with model.use_params(optimizer.averages):
            print(epoch_loss[-1], model.evaluate(dev_X, dev_y),
                  trainer.dropout)
            epoch_loss.append(0.0)

        trainer.each_epoch.append(report_progress)
        trainer.nb_epoch = nb_epoch
        trainer.dropout = 0.3
        trainer.batch_size = 128
        trainer.dropout_decay = 0.0
        train_X = model.ops.asarray(train_X, dtype="float32")
        y_onehot = to_categorical(train_y)
        for X, y in trainer.iterate(train_X, y_onehot):
            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            loss = ((yh - y)**2.0).sum() / y.shape[0]
            backprop(yh - y, optimizer)
            epoch_loss[-1] += loss
        with model.use_params(optimizer.averages):
            print("Avg dev.: %.3f" % model.evaluate(dev_X, dev_y))
            with open("out.pickle", "wb") as file_:
                pickle.dump(model, file_, -1)

Exemple #17

0

Afficher le fichier

Fichier : textcat_recipe.py Projet : ysunlp/data_science_challenge

def batch_train_increment(dataset, input_model=None, output_model=None, lang='en',
                factor=1, dropout=0.2, n_iter=1, batch_size=10,
                eval_id=None, eval_split=None, long_text=False, silent=False,shuffle=False,gpu_id = None):
    """
    Batch train a new text classification model from annotations. Prodigy will
    export the best result to the output directory, and include a JSONL file of
    the training and evaluation examples. You can either supply a dataset ID
    containing the evaluation data, or choose to split off a percentage of
    examples for evaluation.
    """
    #log("RECIPE: Starting recipe textcat.batch-train", locals())
    if(gpu_id):
        spacy.util.use_gpu(gpu_id)
    if(n_iter ==1):
        print("one pass mode")
    print("batch_size",batch_size)
    print(factor,type(factor))
    DB = connect()
    print_ = get_print(silent)
    random.seed(0)
    if input_model is not None:
        nlp = spacy.load(input_model, disable=['ner'])
        print_('\nLoaded model {}'.format(input_model))
    else:
        print("build your customized model")
        nlp = spacy.load('en_core_web_lg')
        pt_model = FastText(vocab_size=684831, emb_dim = 300)
        pt_model.embeds.weight.data.copy_(torch.from_numpy(nlp.vocab.vectors.data))
        model = PyTorchWrapper(pt_model)
        #textcat = TextCategorizer(nlp.vocab,model)
        textcat = Loss_TextCategorizer(nlp.vocab,model)
        nlp.add_pipe(textcat)
    examples = DB.get_dataset(dataset)
    labels = {eg['label'] for eg in examples}
    labels = list(sorted(labels))
    print(labels)
    model = TextClassifier(nlp, labels, long_text=long_text,
                           low_data=len(examples) < 1000)
    if shuffle:    
        print("it's shuffling")
        random.shuffle(examples)
    else:
        print("it's not shuffling")
    if eval_id:
        evals = DB.get_dataset(eval_id)
        print_("Loaded {} evaluation examples from '{}'"
               .format(len(evals), eval_id))
    else:
        examples, evals, eval_split = split_evals(examples, eval_split)
        print_("Using {}% of examples ({}) for evaluation"
               .format(round(eval_split * 100), len(evals)))
    if shuffle:
        random.shuffle(examples)
    examples = examples[:int(len(examples) * factor)]
    print_(printers.trainconf(dropout, n_iter, batch_size, factor,
                              len(examples)))
    if len(evals) > 0:
        print_(printers.tc_update_header())
    # best_acc = {'accuracy': 0}
    # best_model = None
    if long_text:
        examples = list(split_sentences(nlp, examples, min_length=False))
    batch_idx = 0
    start_time = datetime.now()
    for batch in cytoolz.partition_all(batch_size,
                                       tqdm.tqdm(examples, leave=False)):
        batch = list(batch)
        for i in range(n_iter):
            loss = model.update(batch, revise=False, drop=dropout)
            if len(evals) > 0:
                #print("optimizer averages",model.optimizer.averages)
                with nlp.use_params(model.optimizer.averages):
                    acc = model.evaluate(tqdm.tqdm(evals, leave=False))
                #print_(printers.tc_update(i, loss, acc))
                end_time = datetime.now() -start_time
                print('Time:[{0} seconds], Epoch: [{1}/{2}], batch: [{3}/{4}], Loss:{5}, Accuracy:{6}'.format( 
                   end_time.seconds,i+1, n_iter, batch_idx+1, len(examples)//batch_size, loss, acc['accuracy']))
            batch_idx += 1
    return acc

Exemple #18

0

Afficher le fichier

Fichier : textcat_recipe.py Projet : ysunlp/data_science_challenge

def textcat_al(dataset, spacy_model,source=None, label='', api=None, patterns=None,
          loader=None, long_text=False, exclude=None):
    """
    Collect the best possible training data for a text classification model
    with the model in the loop. Based on your annotations, Prodigy will decide
    which questions to ask next.
    """
    # logDB = setup_mongo('activelearning')
    #nlp = spacy.load('/home/ysun/pytorchprodigy/')
    if(spacy_model is not None):
        if(type(spacy_model) == str):
            print("Load model ",spacy_model)
            nlp=spacy.load(spacy_model, disable=['ner', 'parser'])
            model = TextClassifier(nlp, label, long_text=long_text)
        else:
            model = spacy_model
    else:
        print("build your customized model")
        nlp = spacy.load('en_core_web_lg')

    #pt_model = nn.Linear(100,1)
    #pt_model = LSTMSentiment(embedding_dim = 100, hidden_dim =100, vocab_size=259136, label_size=2, batch_size=3, dropout=0.5)
        pt_model = FastText_test(vocab_size=684831, emb_dim = 300)
        pt_model.embeds.weight.data.copy_(torch.from_numpy(nlp.vocab.vectors.data))
        model = PyTorchWrapper(pt_model)

        textcat = Loss_TextCategorizer(nlp.vocab,model)
        nlp.add_pipe(textcat)
        model = TextClassifier(nlp, label, long_text=long_text)
    stream = get_stream(source,input_key = 'text')
    if patterns is None:
        predict = model
        update = model.update
    else:
        matcher = PatternMatcher(model.nlp, prior_correct=5.,
                                 prior_incorrect=5., label_span=False,
                                 label_task=True)
        matcher = matcher.from_disk(patterns)
        #log("RECIPE: Created PatternMatcher and loaded in patterns", patterns)
        # Combine the textcat model with the PatternMatcher to annotate both
        # match results and predictions, and update both models.
        predict, update = combine_models(model, matcher)
    # Rank the stream. Note this is continuous, as model() is a generator.
    # As we call model.update(), the ranking of examples changes.
    stream = test_stream(stream,predict)

    def updateDB(answers):
        model.update(answers)
        #print("update model")
        #for eg in answers:
        #    print(eg)
        #for score,eg in model(answers):
        #    eg["update_score"] = score
        #    print("new",score)
        #print(answers)
        
    def on_exit():
        print("on_exit")
        return model
    
    return {
        'view_id': 'classification',
        'dataset': dataset,
        'stream': stream,
        'exclude': exclude,
        'update': updateDB,
        'on_exit': on_exit,
        'config': {'labels': model.labels,'batch_size':1}
    }

Exemple #19

0

Afficher le fichier

Fichier : textcat_recipe.py Projet : ysunlp/data_science_challenge

def batch_train(dataset, input_model=None, output_model=None, lang='en',
                factor=1, dropout=0.2, n_iter=10, batch_size=10,
                eval_id=None, eval_split=None, long_text=False, silent=False,shuffle=False):
    """
    Batch train a new text classification model from annotations. Prodigy will
    export the best result to the output directory, and include a JSONL file of
    the training and evaluation examples. You can either supply a dataset ID
    containing the evaluation data, or choose to split off a percentage of
    examples for evaluation.
    """
    #log("RECIPE: Starting recipe textcat.batch-train", locals())
    print("batch_size",batch_size)
    print(factor,type(factor))
    DB = connect()
    print_ = get_print(silent)
    random.seed(0)
    if input_model is not None:
        nlp = spacy.load(input_model, disable=['ner'])
        print_('\nLoaded model {}'.format(input_model))
    else:
        print("build your customized model")
        nlp = spacy.load('en_core_web_lg')
        pt_model = FastText(vocab_size=684831, emb_dim = 300)
        pt_model.embeds.weight.data.copy_(torch.from_numpy(nlp.vocab.vectors.data))
        model = PyTorchWrapper(pt_model)
        textcat = TextCategorizer(nlp.vocab,model)
        nlp.add_pipe(textcat)

        #pt_model = LSTMSentiment(embedding_dim = 100, hidden_dim =100, vocab_size=259136, label_size=2, batch_size=3, dropout=0.5)
        #model = PyTorchWrapper(pt_model)
        #nlp = spacy.load('/home/ysun/pytorchprodigy/')
        #textcat = TextCategorizer(nlp.vocab,model)
        #nlp.add_pipe(textcat)
    examples = DB.get_dataset(dataset)
    labels = {eg['label'] for eg in examples}
    labels = list(sorted(labels))
    print(labels)
    model = TextClassifier(nlp, labels, long_text=long_text,
                           low_data=len(examples) < 1000)
    #log('RECIPE: Initialised TextClassifier with model {}'
    #    .format(input_model), model.nlp.meta)
    if shuffle:    
        print("it's shuffling")
        random.shuffle(examples)
    else:
        print("it's not shuffling")
    if eval_id:
        evals = DB.get_dataset(eval_id)
        print_("Loaded {} evaluation examples from '{}'"
               .format(len(evals), eval_id))
    else:
        examples, evals, eval_split = split_evals(examples, eval_split)
        print_("Using {}% of examples ({}) for evaluation"
               .format(round(eval_split * 100), len(evals)))
    if shuffle:
        random.shuffle(examples)
    examples = examples[:int(len(examples) * factor)]
    print_(printers.trainconf(dropout, n_iter, batch_size, factor,
                              len(examples)))
    if len(evals) > 0:
        print_(printers.tc_update_header())
    best_acc = {'accuracy': 0}
    best_model = None
    if long_text:
        examples = list(split_sentences(nlp, examples, min_length=False))
    for i in range(n_iter):
        loss = 0.
        random.shuffle(examples)
        for batch in cytoolz.partition_all(batch_size,
                                           tqdm.tqdm(examples, leave=False)):
            batch = list(batch)
            loss += model.update(batch, revise=False, drop=dropout)
        if len(evals) > 0:
            with nlp.use_params(model.optimizer.averages):
                acc = model.evaluate(tqdm.tqdm(evals, leave=False))
                if acc['accuracy'] > best_acc['accuracy']:
                    best_acc = dict(acc)
                    best_model = nlp.to_bytes()
            print_(printers.tc_update(i, loss, acc))
    if len(evals) > 0:
        print_(printers.tc_result(best_acc))
    if output_model is not None:
        if best_model is not None:
            nlp = nlp.from_bytes(best_model)
        msg = export_model_data(output_model, nlp, examples, evals)
        print_(msg)
    return best_acc['accuracy']

Exemple #20

0

Afficher le fichier

 def __init__(self, name, config, model):
     PyTorchWrapper.__init__(self, model)
     self.cfg = dict(config)

Exemple #21

0

Afficher le fichier

Fichier : wrapper.py Projet : kormilitzin/spacy-pytorch-transformers

 def __init__(self, name):
     model = get_pytt_model(name)
     PyTorchWrapper.__init__(self, model)

Exemple #22

0

Afficher le fichier

Fichier : encoder_decoder.py Projet : giannisdaras/thinc

 def __init__(self, nM=300, nH=6, nS=6, device='cpu'):
     Model.__init__(self)
     self.stack = clone(EncoderLayer(nM=nM, nH=nH, device=device), nS)
     self.norm = PyTorchWrapper(PytorchLayerNorm(nM=nM, device=device))

Exemple #23

0

Afficher le fichier

Fichier : multiheaded_attention.py Projet : giannisdaras/thinc

class SparseAttention(Model):
    ''' This class implements multiheaded attention in steps, factorizing
    the attention matrix. '''
    def __init__(self, nM=300, nH=6):
        self.nH = nH
        self.nM = nM  # model size: the length of the embeddings
        self.nD = nM // nH
        self.get_queries = with_reshape(Affine(nM, nM))
        self.get_keys = with_reshape(Affine(nM, nM))
        self.get_values = with_reshape(Affine(nM, nM))
        self.get_output = with_reshape(Affine(nM, nM))
        self._layers = [self.get_queries, self.get_keys, self.get_values, self.get_output]
        self._softmax = PyTorchWrapper(nn.Softmax(dim=-1))
        ''' mask conf '''
        i_grad = [1, 0]
        o_xp = None
        b_map = None
        ret_x = [0]
        conf = [i_grad, o_xp, b_map, ret_x]
        self._mask = PyTorchWrapper(PytorchMaskScores(), conf=conf)

    def begin_update(self, input, drop=0.1):
        if len(input) == 3:
            x0, mask, sentX = input
            sentY = sentX
            y0 = x0
            self_attention = True
        else:
            self_attention = False
            x0, y0, mask, sentX, sentY = input
        ''' Shapes '''
        # x0: nB, nL, nM
        # q0: nB, nL, nM
        # k0: nB, nL, nM
        # v0: nB, nL, nM
        # q1: nB, nH, nL, nD
        # k1: nB, nH, nL, nD
        # v1: nB, nH, nL, nD
        nB, nL, nD, nH = x0.shape[0], x0.shape[1], self.nD, self.nH
        q0, get_dx0 = self.get_queries.begin_update(x0)
        q1 = q0.reshape(nB, -1, self.nH, self.nD).transpose(0, 2, 1, 3)
        k0, get_dy0_1 = self.get_keys.begin_update(y0)
        k1 = k0.reshape(nB, -1, self.nH, self.nD).transpose(0, 2, 1, 3)
        v0, get_dy0_2 = self.get_values.begin_update(y0)
        v1 = v0.reshape(nB, -1, self.nH, self.nD).transpose(0, 2, 1, 3)
        q2, get_dq1_dk1_dv1 = self.attn(q1, k1, v1, mask=mask & self.mask_floor(nB, nL), sentX=sentX,
                                        sentY=sentY, self_attn=self_attention)
        x1, get_dq2_dk1_dv1 = self.attn(q1, k1, v1, mask=mask & self.mask_repetitive(nB, nL), sentX=sentX,
                                        sentY=sentY, self_attn=self_attention)

        x2 = x1.transpose(0, 2, 1, 3).reshape((nB, nL, nH*nD))
        x3, get_dx2 = self.get_output.begin_update(x2)

        def finish_update(dx3, sgd=None):
            dx2 = get_dx2(dx3, sgd=sgd)
            dx1 = dx2.reshape((nB, nL, nH, nD)).transpose(0, 2, 1, 3)
            dq2, dk11, dv11 = get_dq2_dk1_dv1(dx1)
            dq1, dk12, dv12 = get_dq1_dk1_dv1(dq2)
            dk1 = dk11 + dk12
            dv1 = dv11 + dv12
            nM = nH * nD
            dq0 = dq1.transpose(0, 2, 1, 3).reshape((nB, nL, nM))
            dk0 = dk1.transpose(0, 2, 1, 3).reshape((nB, nL, nM))
            dv0 = dv1.transpose(0, 2, 1, 3).reshape((nB, nL, nM))
            dy0 = get_dy0_2(dv0, sgd=sgd) + get_dy0_1(dk0, sgd=sgd)
            dx0 = get_dx0(dq0, sgd=sgd)
            if self_attention:
                return dx0 + dy0
            else:
                return (dx0, dy0)
        return x3, finish_update

    def attn(self, Q, K, V, mask=None, sentX=None, sentY=None, self_attn=True):
        '''
        Compute attention on (query, key, value) triplets.
        The similarity of the (Q, K) pairs are used to
        compute an attention matrix, which is used to rescale
        V.
        '''
        # TESTED
        S0, bp_scaled_dp = self._scaled_dot_prod(Q, K)
        S1, bp_mask = self._mask.begin_update((S0, mask))
        S2, bp_softmax = self._softmax.begin_update(S1)
        S3, bp_apply_attn = self._apply_attn(S2, V)

        def backprop_attn(dS3):
            ''' Attention three inputs, one output '''
            dS2, dV = bp_apply_attn(dS3)
            dS1 = bp_softmax(dS2)
            dS0 = bp_mask(dS1)
            dQ, dK = bp_scaled_dp(dS0)
            return dQ, dK, dV
        return S3, backprop_attn

    def _scaled_dot_prod(self, Q0, K0):
        # TESTED
        # Q0: nB, nH, nL, nD
        # K0: nB, nH, nL, nD
        nB, nH, nL, nD = Q0.shape
        # Q1: nB*nH, nL, nD
        Q1 = Q0.reshape((nB*nH, nL, nD))
        # K1: (nB*nH, nD, nL)
        K1 = K0.transpose(0, 1, 3, 2).reshape((nB*nH, nD, nL))
        # K2: (nB*nH, nD, nL)
        K2 = (K1 / self.ops.xp.sqrt(self.nM)).astype("float32")
        # S0: (nB*nH, nL, nL)
        S0 = self.ops.xp.matmul(Q1, K2)

        # S1 shape: (nB, nH, nL, nL)
        S1 = S0.reshape((nB, nH, nL, nL))

        def backprop_attn1(dS1):
            dS0 = dS1.reshape((nB*nH, nL, nL))
            dQ1 = self.ops.xp.matmul(dS0, K2.transpose(0, 2, 1))
            dK2 = self.ops.xp.matmul(Q1.transpose(0, 2, 1), dS0)
            dK1 = (dK2 / self.ops.xp.sqrt(self.nM)).astype("float32")
            dK0 = dK1.reshape((nB, nH, nD, nL)).transpose(0, 1, 3, 2)
            dQ0 = dQ1.reshape((nB, nH, nL, nD))
            return dQ0, dK0
        return S1, backprop_attn1

    def _apply_attn(self, S0, V0):
        ''' Multiplication with values '''
        # TESTED
        # S0: (nB, nH, nL, nL)
        # VO: (nB, nH, nL, nD)
        # S1: (nB*nH, nL, nL)
        # V1:  (nB*nH, nL, nD)
        # S2: (nB*nH, nL, nD)
        # S3: (nB, nH, nL, nD)
        nB, nH, nL, nL = S0.shape
        nD = V0.shape[-1]
        V1 = V0.reshape((nB*nH, nL, nD))
        S1 = S0.reshape((nB*nH, nL, nL))
        S2 = self.ops.xp.matmul(S1, V1)

        S3 = S2.reshape((nB, nH, nL, nD))

        def backprop_attn4(dS3):
            dS2 = dS3.reshape((nB*nH, nL, nD))
            # (nB*nH, nL, nD) @ (nB*nH, nL, nD).T --> (nB*nH, nL, nL)
            dS1 = self.ops.xp.matmul(dS2, V1.transpose(0, 2, 1))
            # (nB*nH, nL, nL).T @ (nB*nH, nL, nD) --> (nB*nH, nL, nD)
            dV1 = self.ops.xp.matmul(S1.transpose(0, 2, 1), dS2)
            dS0 = dS1.reshape((nB, nH, nL, nL))
            dV0 = dV1.reshape((nB, nH, nL, nD))
            return dS0, dV0

        return S3, backprop_attn4

    def mask_floor(nB, nL):
        stride = math.ceil(math.sqrt(nL))
        floor_mask = Model.ops.xp.zeros((nB, nL, nL), dtype=Model.ops.xp.uint8)
        for i in range(nL):
            lower = max(0, i - (i % stride))
            higher = i + 1
            floor_mask[:, i, lower:higher] = 1
        return floor_mask

    def mask_repetitive(nB, nL):
        ''' Every stride tokens, mask one (independent of row) '''
        stride = math.ceil(math.sqrt(nL))
        repetitive_mask = Model.ops.xp.zeros((nB, nL, nL), dtype=Model.ops.xp.uint8)
        for j in range(nL):
            if ((j % stride) >= (stride - c)):
                if mode == 'left':
                    repetitive_mask[:, j:, j] = 1
        return repetitive_mask

Exemple #24

0

Afficher le fichier

Fichier : multiheaded_attention.py Projet : giannisdaras/thinc

class MultiHeadedAttention(Model):
    ''' This class implements multiheaded attention. It can be used for self
    attention or outer attention, depending on our needs. There is no left
    and right context width. We attend to the whole sentence and we take
    care of the masks to adjust appropriately. There are no actual different
    weight matrices for each head, but a bigger weight matrix for all heads.
    Going to bigger dimensions is the key to get the multiple heads.
    For the time being; key, query and value matrices are supposed to have the
    same length.
    '''
    def __init__(self, nM=300, nH=6):
        Model.__init__(self)
        self.nH = nH
        self.nM = nM  # model size: the length of the embeddings
        self.nD = nM // nH
        self.get_queries = with_reshape(Affine(nM, nM))
        self.get_keys = with_reshape(Affine(nM, nM))
        self.get_values = with_reshape(Affine(nM, nM))
        self.get_output = with_reshape(Affine(nM, nM))
        self._layers = [self.get_queries, self.get_keys, self.get_values, self.get_output]
        self._softmax = PyTorchWrapper(nn.Softmax(dim=-1))

        ''' mask conf '''
        i_grad = [1, 0]
        o_xp = None
        b_map = None
        ret_x = [0]
        conf = [i_grad, o_xp, b_map, ret_x]
        self._mask = PyTorchWrapper(PytorchMaskScores(), conf=conf)

    def begin_update(self, input, drop=0.1):
        # TESTED
        # Queries come from input[0], keys and values from input[1]
        if len(input) == 3:
            x0, mask, sentX = input
            sentY = sentX
            y0 = x0
            self_attention = True
        else:
            self_attention = False
            x0, y0, mask, sentX, sentY = input
        ''' Shapes '''
        # x0: nB, nL, nM
        # q0: nB, nL, nM
        # k0: nB, nL, nM
        # v0: nB, nL, nM
        # q1: nB, nH, nL, nD
        # k1: nB, nH, nL, nD
        # v1: nB, nH, nL, nD
        nB, nL, nD, nH = x0.shape[0], x0.shape[1], self.nD, self.nH
        q0, get_dx0 = self.get_queries.begin_update(x0)
        q1 = q0.reshape(nB, -1, self.nH, self.nD).transpose(0, 2, 1, 3)
        k0, get_dy0_1 = self.get_keys.begin_update(y0)
        k1 = k0.reshape(nB, -1, self.nH, self.nD).transpose(0, 2, 1, 3)
        v0, get_dy0_2 = self.get_values.begin_update(y0)
        v1 = v0.reshape(nB, -1, self.nH, self.nD).transpose(0, 2, 1, 3)
        x1, get_dq1_dk1_dv1 = self.attn(q1, k1, v1, mask=mask, sentX=sentX,
                                        sentY=sentY, self_attn=self_attention)
        x2 = x1.transpose(0, 2, 1, 3).reshape((nB, nL, nH*nD))
        x3, get_dx2 = self.get_output.begin_update(x2)

        def finish_update(dx3, sgd=None):
            dx2 = get_dx2(dx3, sgd=sgd)
            dx1 = dx2.reshape((nB, nL, nH, nD)).transpose(0, 2, 1, 3)
            dq1, dk1, dv1 = get_dq1_dk1_dv1(dx1)
            nM = nH * nD
            dq0 = dq1.transpose(0, 2, 1, 3).reshape((nB, nL, nM))
            dk0 = dk1.transpose(0, 2, 1, 3).reshape((nB, nL, nM))
            dv0 = dv1.transpose(0, 2, 1, 3).reshape((nB, nL, nM))
            dy0 = get_dy0_2(dv0, sgd=sgd) + get_dy0_1(dk0, sgd=sgd)
            dx0 = get_dx0(dq0, sgd=sgd)
            if self_attention:
                return dx0 + dy0
            else:
                return (dx0, dy0)
        return x3, finish_update

    def attn(self, Q, K, V, mask=None, sentX=None, sentY=None, self_attn=True):
        '''
        Compute attention on (query, key, value) triplets.
        The similarity of the (Q, K) pairs are used to
        compute an attention matrix, which is used to rescale
        V.
        '''
        # TESTED
        S0, bp_scaled_dp = self._scaled_dot_prod(Q, K)
        S1, bp_mask = self._mask.begin_update((S0, mask))
        S2, bp_softmax = self._softmax.begin_update(S1)
        S3, bp_apply_attn = self._apply_attn(S2, V)

        def backprop_attn(dS3):
            ''' Attention three inputs, one output '''
            dS2, dV = bp_apply_attn(dS3)
            dS1 = bp_softmax(dS2)
            dS0 = bp_mask(dS1)
            dQ, dK = bp_scaled_dp(dS0)
            return dQ, dK, dV
        return S3, backprop_attn

    def _scaled_dot_prod(self, Q0, K0):
        # TESTED
        # Q0: nB, nH, nL, nD
        # K0: nB, nH, nL, nD
        nB, nH, nL, nD = Q0.shape
        # Q1: nB*nH, nL, nD
        Q1 = Q0.reshape((nB*nH, nL, nD))
        # K1: (nB*nH, nD, nL)
        K1 = K0.transpose(0, 1, 3, 2).reshape((nB*nH, nD, nL))
        # K2: (nB*nH, nD, nL)
        K2 = (K1 / self.ops.xp.sqrt(self.nM)).astype("float32")
        # S0: (nB*nH, nL, nL)
        S0 = self.ops.xp.matmul(Q1, K2)

        # S1 shape: (nB, nH, nL, nL)
        S1 = S0.reshape((nB, nH, nL, nL))

        def backprop_attn1(dS1):
            dS0 = dS1.reshape((nB*nH, nL, nL))
            dQ1 = self.ops.xp.matmul(dS0, K2.transpose(0, 2, 1))
            dK2 = self.ops.xp.matmul(Q1.transpose(0, 2, 1), dS0)
            dK1 = (dK2 / self.ops.xp.sqrt(self.nM)).astype("float32")
            dK0 = dK1.reshape((nB, nH, nD, nL)).transpose(0, 1, 3, 2)
            dQ0 = dQ1.reshape((nB, nH, nL, nD))
            return dQ0, dK0
        return S1, backprop_attn1

    # def _mask(self, S0, mask):
    #     S1 = S0.transpose(1, 0, 2, 3)
    #     S2 = S1 - (1 - mask) * (1e9)
    #     S3 = S2.transpose(1, 0, 2, 3)
    #
    #     def backprop_attn2(dS3):
    #         dS2 = dS3.transpose(1, 0, 2, 3)
    #         dS1 = dS2
    #         dS0 = dS1.transpose(1, 0, 2, 3)
    #         return dS0
    #
    #     return S3, backprop_attn2

    def _apply_attn(self, S0, V0):
        ''' Multiplication with values '''
        # TESTED
        # S0: (nB, nH, nL, nL)
        # VO: (nB, nH, nL, nD)
        # S1: (nB*nH, nL, nL)
        # V1:  (nB*nH, nL, nD)
        # S2: (nB*nH, nL, nD)
        # S3: (nB, nH, nL, nD)
        nB, nH, nL, nL = S0.shape
        nD = V0.shape[-1]
        V1 = V0.reshape((nB*nH, nL, nD))
        S1 = S0.reshape((nB*nH, nL, nL))
        S2 = self.ops.xp.matmul(S1, V1)

        S3 = S2.reshape((nB, nH, nL, nD))

        def backprop_attn4(dS3):
            dS2 = dS3.reshape((nB*nH, nL, nD))
            # (nB*nH, nL, nD) @ (nB*nH, nL, nD).T --> (nB*nH, nL, nL)
            dS1 = self.ops.xp.matmul(dS2, V1.transpose(0, 2, 1))
            # (nB*nH, nL, nL).T @ (nB*nH, nL, nD) --> (nB*nH, nL, nD)
            dV1 = self.ops.xp.matmul(S1.transpose(0, 2, 1), dS2)
            dS0 = dS1.reshape((nB, nH, nL, nL))
            dV0 = dV1.reshape((nB, nH, nL, nD))
            return dS0, dV0

        return S3, backprop_attn4