예제 #1
0
 def test_t7(self):
     # logger.info("Running Tests4Dataset1test1/test_t7")
     ds = Dataset(TESTFILE3)
     ds.split(convert=True,
              keep_orig=True,
              validation_size=3,
              random_seed=1)
     # check if getting the batches and validation sets works
     valset_orig = ds.validation_set_orig()
     # print("DEBUG: valset_orig=%s" % valset_orig, file=sys.stderr)
     assert len(valset_orig) == 3
     vorigi2 = valset_orig[1]
     assert vorigi2 == [[
         'you', 'think', 'this', 'place', 'is', 'nice', 'VERB', 'DET', 'a',
         'a', 'a', 'a', 'a', 'a', '', 'nk', 'is', 'ce', '', 'ce', '', 'ink',
         '', 'ace', '', ''
     ], 'NOUN']
     valset_conv = ds.validation_set_converted()
     # print("DEBUG: valset_conv=%s" % valset_conv, file=sys.stderr)
     assert len(valset_conv) == 3
     vconvi2 = valset_conv[1]
     # print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! DEBUG: vconvi2=", vconvi2, file=sys.stderr)
     assert vconvi2 == [[
         13, 157, 25, 104, 12, 319, 2, 5, 2, 2, 2, 2, 2, 2, 0, 151, 28, 14,
         0, 14, 0, 215, 0, 101, 0, 0
     ], 0]
     valset_conv_b = ds.validation_set_converted(as_batch=True)
     # print("DEBUG: valset_conv_b=%s" % (valset_conv_b,), file=sys.stderr)
     # we expect a tuple for indep and dep
     assert len(valset_conv_b) == 2
     indep1, dep1 = valset_conv_b
     # the indep part should now have lenth equal to the number of features
     assert len(indep1) == ds.nFeatures
     # there should be 3 values for that first feature
     assert len(indep1[0]) == 3
     # get a batch of original data
     bitb1 = ds.batches_original(train=True, batch_size=4, reshape=False)
     batch_orig1 = next(iter(bitb1))
     # print("DEBUG: batch_orig1=%s" % (batch_orig1,), file=sys.stderr)
     # if reshape was False, this is just a list of instances in original format
     assert len(batch_orig1) == 4
     assert batch_orig1[1] == [[
         'Bill', 'Bradford', 'in', 'Credit', 'are', 'supposed', 'PROPN',
         'ADP', 'Aa', 'Aa', 'a', 'Aa', 'a', 'a', 'll', 'rd', '', 'it', '',
         'ed', '', 'ord', '', 'dit', '', 'sed'
     ], 'NOUN']
     bitb2 = ds.batches_original(train=True, batch_size=4, reshape=True)
     batch_orig2 = next(iter(bitb2))
     # print("DEBUG: batch_orig2=%s" % (batch_orig2,), file=sys.stderr)
     # if reshape was True, this is a tuple where the first element is the list of features
     assert len(batch_orig2) == 2
     featurelist1 = batch_orig2[0]
     feature1 = featurelist1[0]
     assert feature1[1] == 'Bill'
     bconvb1 = ds.batches_converted(train=True, batch_size=4, reshape=False)
     batch_conv1 = next(iter(bconvb1))
     # print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! DEBUG: !!!batch_conv1[1]=%s" % (batch_conv1[1],), file=sys.stderr)
     assert len(batch_conv1) == 4
     # TODO: check why some indices changed between previously and now and if this is till correct!
     assert batch_conv1[1] == [[
         1210, 1495, 9, 796, 23, 3075, 6, 3, 3, 3, 2, 3, 2, 2, 20, 54, 0,
         86, 0, 2, 0, 391, 0, 300, 0, 77
     ], 0]
     bconvb2 = ds.batches_converted(train=True, batch_size=4, reshape=True)
     batch_conv2 = next(iter(bconvb2))
     # print("DEBUG: batch_conv2=%s" % (batch_conv2,), file=sys.stderr)
     assert len(batch_conv2) == 2
     featurelist1 = batch_conv2[0]
     feature1 = featurelist1[0]
     assert feature1[1] == 1210
예제 #2
0
 def test_t6(self):
     # logger.info("Running Tests4Dataset1test1/test_t6")
     ds = Dataset(TESTFILE2)
     ds.split(convert=True,
              keep_orig=True,
              validation_size=3,
              random_seed=1)
     # check if getting the batches and validation sets works
     valset_orig = ds.validation_set_orig()
     # print("DEBUG: valset_orig=%s" % valset_orig, file=sys.stderr)
     assert len(valset_orig) == 3
     vorigi2 = valset_orig[1]
     assert vorigi2 == [[[
         'a', 'very', 'well-made', ',', 'funny', 'and', 'entertaining',
         'picture', '.'
     ]], 'pos']
     valset_conv = ds.validation_set_converted()
     # print("DEBUG: valset_conv=%s" % valset_conv, file=sys.stderr)
     assert len(valset_conv) == 3
     vconvi2 = valset_conv[1]
     # print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! DEBUG: vconvi2=", vconvi2, file=sys.stderr)
     assert vconvi2 == [[[5, 84, 1530, 4, 75, 6, 190, 175, 2]], 1]
     valset_conv_b = ds.validation_set_converted(as_batch=True)
     # print("DEBUG: valset_conv_b=%s" % (valset_conv_b,), file=sys.stderr)
     # we expect a tuple for indep and dep
     assert len(valset_conv_b) == 2
     indep1, dep1 = valset_conv_b
     # the indep part should now have lenth one because there is only one feature
     assert len(indep1) == 1
     # there should be 3 values for that first feature
     # print("DEBUG: indep1[0]=%r" % (indep1[0]), file=sys.stderr)
     assert len(indep1[0]) == 3
     # get a batch of original data
     bitb1 = ds.batches_original(train=True, batch_size=4, reshape=False)
     batch_orig1 = next(iter(bitb1))
     # print("DEBUG: batch_orig1=%s" % (batch_orig1,), file=sys.stderr)
     # if reshape was False, this is just a list of instances in original format
     assert len(batch_orig1) == 4
     assert batch_orig1[1] == [[[
         'rife', 'with', 'nutty', 'cliches', 'and', 'far', 'too', 'much',
         'dialogue', '.'
     ]], 'neg']
     bitb2 = ds.batches_original(train=True, batch_size=4, reshape=True)
     batch_orig2 = next(iter(bitb2))
     # print("DEBUG: batch_orig2=%s" % (batch_orig2,), file=sys.stderr)
     # if reshape was True, this is a tuple where the first element is the list of features
     assert len(batch_orig2) == 2
     featurelist1 = batch_orig2[0]
     feature1 = featurelist1[0]
     # print("DEBUG: feature1[1]=%s" % (feature1[1],), file=sys.stderr)
     assert feature1[1] == [
         'rife', 'with', 'nutty', 'cliches', 'and', 'far', 'too', 'much',
         'dialogue', '.', '', '', '', '', '', '', '', '', '', '', '', '',
         '', '', '', '', '', '', '', '', '', '', '', '', '', ''
     ]
     bconvb1 = ds.batches_converted(train=True, batch_size=4, reshape=False)
     batch_conv1 = next(iter(bconvb1))
     # print("DEBUG: batch_conv1=%s" % (batch_conv1,), file=sys.stderr)
     assert len(batch_conv1) == 4
     # print("DEBUG: batch_conv1[1]=%s" % (batch_conv1[1],), file=sys.stderr)
     assert batch_conv1[1] == [[[
         6694, 17, 6469, 544, 6, 168, 51, 59, 237, 2
     ]], 0]
     bconvb2 = ds.batches_converted(train=True, batch_size=4, reshape=True)
     batch_conv2 = next(iter(bconvb2))
     # print("DEBUG: batch_conv2=%s" % (batch_conv2,), file=sys.stderr)
     assert len(batch_conv2) == 2
     featurelist1 = batch_conv2[0]
     feature1 = featurelist1[0]
     # print("DEBUG: feature1[1]=%s" % (feature1[1],), file=sys.stderr)
     assert feature1[1] == [
         6694, 17, 6469, 544, 6, 168, 51, 59, 237, 2, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
     ]