Python split Examples, src.preprocessing.split Python Examples

Example #1

0

Show file

File: test_preprocessing.py Project: cscorley/mud2014-modeling-changeset-topics

    def test_split_random_punct(self):
        for i in range(1, 100):
            r = random.randint(1, i)
            word = u''
            for j in range(1, r):
                p = random.randint(0, len(string.punctuation) - 1)
                word += string.punctuation[p]

            result = split([word])
            self.assertEqual(list(result), list(word))

Example #2

0

Show file

File: test_preprocessing.py Project: cscorley/changeset-feature-location

    def test_split_random_punct(self):
        for i in range(1, 100):
            r = random.randint(1, i)
            word = u''
            for j in range(1, r):
                p = random.randint(0, len(string.punctuation) - 1)
                word += string.punctuation[p]

            result = split([word])
            expected = list(word)
            self.assertEqual(list(result), expected)

Example #3

0

Show file

File: test_preprocessing.py Project: cscorley/changeset-feature-location

    def test_split(self):
        """Split tokens into terms using the following rules:

            0. All digits are discarded
            1. A sequence beginning with an lc letter must be followed by lc letters
            2. A sequence beginning with an uc letter can be followed by either:
                a. One or more uc letters
                b. One or more lc letters

        """
        cases = dict({
            'camelCase': ('camel', 'Case'),
            'CamelCase': ('Camel', 'Case'),
            'camel2case': ('camel', '2', 'case'),
            'camel2Case': ('camel', '2', 'Case'),
            'word': ('word', ),
            'HTML': ('HTML', ),
            'readXML': ('read', 'XML'),
            'XMLRead': ('XML', 'Read'),
            'firstMIDDLELast': ('first', 'MIDDLE', 'Last'),
            'CFile': ('C', 'File'),
            'Word2Word34': ('Word', '2', 'Word', '34'),
            'WORD123Word': ('WORD', '123', 'Word'),
            'c_amelCase': ('c', '_', 'amel', 'Case'),
            'CamelC_ase': ('Camel', 'C', '_', 'ase'),
            'camel2_case': ('camel', '2', '_', 'case'),
            'camel_2Case': ('camel', '_', '2', 'Case'),
            'word': ('word', ),
            'HTML': ('HTML', ),
            'read_XML': ('read', '_', 'XML'),
            'XML_Read': ('XML', '_', 'Read'),
            'firstM_IDDL_ELast': ('first', 'M', '_', 'IDDL', '_', 'E', 'Last'),
            'the_CFile': ('the', '_', 'C', 'File'),
            'Word_2_Word3_4': ('Word', '_', '2', '_', 'Word', '3', '_', '4'),
            'WO_RD123W_or_d':
            ('WO', '_', 'RD', '123', 'W', '_', 'or', '_', 'd'),
            'hypen-ation': ('hypen', '-', 'ation'),
            '*****@*****.**': ('email', '@', 'address', '.', 'com'),
            '/*comment*/': ('/', '*', 'comment', '*', '/'),
            'word1': ('word', '1'),
            'Word1': ('Word', '1'),
            'f1': ('f', '1'),
            '1ms': ('1', 'ms'),
            'F1': ('F', '1'),
            'WORD_THING': ('WORD', '_', 'THING'),
            '@': ('@', ),
            'WORD_THING_ONE': ('WORD', '_', 'THING', '_', 'ONE'),
            'wordThing_one': ('word', 'Thing', '_', 'one'),
            '_w': ('_', 'w'),
            '_wt': ('_', 'wt'),
            '_wT': ('_', 'w', 'T'),
            '_WT': ('_', 'WT'),
            '_Wt': ('_', 'Wt'),
            'wt_': ('wt', '_'),
            '<5>': ('<', '5', '>'),
            '==': ('=', '='),
            'x=5;': ('x', '=', '5', ';'),
            '2.0': ('2', '.', '0'),
            '2,0': ('2', ',', '0'),
            '//test': ('/', '/', 'test'),
            'Boolean.FALSE': ('Boolean', '.', 'FALSE'),
            'word': ('word', ),
            'word.': ('word', '.'),
            '.word.': ('.', 'word', '.'),
            '.word': ('.', 'word'),
            'WordThing.': ('Word', 'Thing', '.'),
            'WordThing.FLAG': ('Word', 'Thing', '.', 'FLAG'),
            'WordThing.cmd': ('Word', 'Thing', '.', 'cmd'),
            'WordThing.cmdDo': ('Word', 'Thing', '.', 'cmd', 'Do'),
            'System.out.println': ('System', '.', 'out', '.', 'println'),
            'System.out.println();':
            ('System', '.', 'out', '.', 'println', '(', ')', ';'),
            'x++': ('x', '+', '+'),
            '++x': ('+', '+', 'x'),
            "n't": ('n', "'", 't'),
            u"testðŸ’©word": ('test', u'ðŸ’©', 'word'),
            u'Erwin_SchrÃ¶dinger': ('Erwin', '_', u'SchrÃ¶dinger')
        })

        for term, expected in cases.items():
            result = split([term])
            self.assertEqual(tuple(result), expected)
        """

Example #4

0

Show file

File: test_preprocessing.py Project: cscorley/changeset-feature-location

 def test_split_creates_generator(self):
     """ Split tokens creates a generator """
     result = split('butts')
     self.assertIsInstance(result, type(x for x in list()))

Example #5

0

Show file

File: test_preprocessing.py Project: cscorley/doc2vec-feature-location

    def test_split(self):
        """Split tokens into terms using the following rules:

            0. All digits are discarded
            1. A sequence beginning with an lc letter must be followed by lc letters
            2. A sequence beginning with an uc letter can be followed by either:
                a. One or more uc letters
                b. One or more lc letters

        """
        cases = dict({
            'camelCase': ('camel', 'Case'),
            'CamelCase': ('Camel', 'Case'),
            'camel2case': ('camel', '2', 'case'),
            'camel2Case': ('camel', '2', 'Case'),
            'word': ('word', ),
            'HTML': ('HTML', ),
            'readXML': ('read', 'XML'),
            'XMLRead': ('XML', 'Read'),
            'firstMIDDLELast': ('first', 'MIDDLE', 'Last'),
            'CFile': ('C', 'File'),
            'Word2Word34': ('Word', '2', 'Word', '34'),
            'WORD123Word': ('WORD', '123', 'Word'),
            'c_amelCase': ('c', '_', 'amel', 'Case'),
            'CamelC_ase': ('Camel', 'C', '_', 'ase'),
            'camel2_case': ('camel', '2', '_', 'case'),
            'camel_2Case': ('camel', '_', '2', 'Case'),
            'word': ('word', ),
            'HTML': ('HTML', ),
            'read_XML': ('read', '_', 'XML'),
            'XML_Read': ('XML', '_', 'Read'),
            'firstM_IDDL_ELast': ('first', 'M', '_', 'IDDL', '_', 'E', 'Last'),
            'the_CFile': ('the', '_', 'C', 'File'),
            'Word_2_Word3_4': ('Word', '_', '2', '_', 'Word', '3', '_', '4'),
            'WO_RD123W_or_d': ('WO', '_', 'RD', '123', 'W', '_', 'or', '_', 'd'),
            'hypen-ation': ('hypen', '-', 'ation'),
            '*****@*****.**': ('email', '@', 'address', '.', 'com'),
            '/*comment*/': ('/', '*', 'comment', '*', '/'),
            'word1': ('word', '1'),
            'Word1': ('Word', '1'),
            'f1': ('f', '1'),
            '1ms': ('1', 'ms'),
            'F1': ('F', '1'),
            'WORD_THING': ('WORD', '_', 'THING'),
            '@': ('@',),
            'WORD_THING_ONE': ('WORD', '_', 'THING', '_', 'ONE'),
            'wordThing_one': ('word', 'Thing', '_', 'one'),
            '_w': ('_', 'w'),
            '_wt': ('_', 'wt'),
            '_wT': ('_', 'w', 'T'),
            '_WT': ('_', 'WT'),
            '_Wt': ('_', 'Wt'),
            'wt_': ('wt', '_'),
            '<5>': ('<', '5', '>'),
            '==': ('=', '='),
            'x=5;': ('x', '=', '5', ';'),
            '2.0': ('2', '.', '0'),
            '2,0': ('2', ',', '0'),
            '//test': ('/', '/', 'test'),
            'Boolean.FALSE': ('Boolean', '.', 'FALSE'),
            'word': ('word', ),
            'word.': ('word', '.'),
            '.word.': ('.', 'word', '.'),
            '.word': ('.', 'word'),
            'WordThing.': ('Word', 'Thing', '.'),
            'WordThing.FLAG': ('Word', 'Thing', '.', 'FLAG'),
            'WordThing.cmd': ('Word', 'Thing', '.', 'cmd'),
            'WordThing.cmdDo': ('Word', 'Thing', '.', 'cmd', 'Do'),
            'System.out.println': ('System', '.', 'out', '.', 'println'),
            'System.out.println();': ('System', '.', 'out', '.', 'println', '(', ')', ';'),
            'x++': ('x', '+', '+'),
            '++x': ('+', '+', 'x'),
            "n't": ('n', "'", 't'),
            u"testðŸ’©word": ('test', u'ðŸ’©', 'word'),
            u'Erwin_SchrÃ¶dinger': ('Erwin', '_', u'SchrÃ¶dinger')
            })

        for term, expected in cases.items():
            result = split([term])
            self.assertEqual(tuple(result), expected)

        """

Example #6

0

Show file

File: test_preprocessing.py Project: cscorley/doc2vec-feature-location

 def test_split_creates_generator(self):
     """ Split tokens creates a generator """
     result = split('butts')
     self.assertIsInstance(result, type(x for x in list()))

Example #7

0

Show file

File: mnist.py Project: salvaRC/target-propagation

import numpy as np

from src.activation_functions import ReLU, Softmax, Sin, Cos
from src.evaluation import plot_loss_and_accuracy, accuracy
from src.loss_functions import SquaredLoss, CrossEntropy
from src.neural_net.layers import InputLayer, Layer
from src.neural_net.network import NeuralNetwork
from src.preprocessing import to_categorical, split
from keras.datasets import mnist

(X, Y), (Xtest, Ytest) = mnist.load_data()
# Reshape & Normalize
dimensions = X.shape[1] * X.shape[2]  # i.e. flattened
X = X.reshape((X.shape[0], dimensions)) / 255.0
Xtest = Xtest.reshape((Xtest.shape[0], dimensions)) / 255.0
(X, Y), (Xval, Yval) = split(X, Y, train_frac=0.9)

Y = to_categorical(Y)

hidden_layer_act = ReLU()
layers = [
    InputLayer(X.shape[1]),
    Layer(25, hidden_layer_act),
    Layer(25, hidden_layer_act),
    Layer(10, Softmax())
]

nn = NeuralNetwork(layers)
nn.compile(loss_function=SquaredLoss(), metric=accuracy)
history = nn.fit(X,
                 Y,